-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathfetchData.py
More file actions
56 lines (37 loc) · 1.11 KB
/
fetchData.py
File metadata and controls
56 lines (37 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
from tqdm import tqdm
import requests
import threading
def save_file(url,name):
try:
response = requests.get(url)
with open(f'./AdditionalData/{name}.xml', 'wb') as file:
file.write(response.content)
except Exception as e:
print(e)
def fetch(urls, names):
base_url = "https://dblp.org/pid/"
n = 0
for i in range(len(urls)):
save_file(base_url+urls[i]+".xml",names[i])
if int(i/len(urls)) != n:
n = int(i/len(urls))
print(n*10)
if __name__ =="__main__":
df = pd.read_csv("Additional.csv")
print("Number of People : ",len(df))
l2 = df.Pid.to_list()
l1 = df.Name.to_list()
n, p = [], []
k = 200
for i in range(len(l1)//k):
n.append(l1[(i)*k:(i+1)*k])
p.append(l2[(i)*k:(i+1)*k])
n.append(l1[(i+1)*k:])
p.append(l2[(i+1)*k:])
t = [threading.Thread(target=fetch, args=(p[i],n[i],)) for i in range(len(n))]
for i in t:
i.start()
for i in t:
i.join()
print("Done Fetching")