-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscrawlerSelenium.py
More file actions
82 lines (67 loc) · 2.67 KB
/
scrawlerSelenium.py
File metadata and controls
82 lines (67 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from WebScrapers import *
from WebScrapers import snipd
from globals import fileName
import sys
# everything start here
def webscrape(targetNumWeek=1):
# reset
with open(fileName, 'w') as file:
file.flush()
print(f'scraping weeks: {targetNumWeek}')
# open_ai.scrapeOpenAI(targetNumWeek)
# google_blog_ai.scrapeGoogleBlogAI(targetNumWeek)
# developer_archives.scrapeDevelopersArchives(targetNumWeek)
# alchemy_blog.scrapeAlchemyBlog(targetNumWeek)
# decrypt.scrapeDecrypt(targetNumWeek)
# cointelegraph.scrapeCointelegraph(targetNumWeek)
# coin_desk.scrapeCoinDesk(targetNumWeek)
# hak_research.scrapeHakResearch1(targetNumWeek)
# ibm.scrapeIBM(targetNumWeek)
# vng.scrapeVNG(targetNumWeek)
# hugging_face.scrapeHuggingFace(targetNumWeek)
# zkblab.scrapeZkblab(targetNumWeek)
# google_lab.scrapeGoogleLab(targetNumWeek)
# apple.scrapeApple(targetNumWeek)
# forte_lab.scrapeForteLab(targetNumWeek)
# ali_abdaal.scrapeAliAbdaal(targetNumWeek)
# gfi.scrapeGfi(targetNumWeek)
# bankless.scrapeBankless(targetNumWeek)
# coin98.scrapeCoin98(targetNumWeek)
# hak_research.scrapeHakResearch(targetNumWeek)
# hak_research.scrapeHakResearch1(targetNumWeek)
# webflow.scrapeWebflow(targetNumWeek)
# hackerrank.scrapeHackerrank(targetNumWeek)
# atlassian.scrapeAtlassian(targetNumWeek)
# cognizant.scrapeCognizant(targetNumWeek)
# yc.scrapeYC(targetNumWeek)
# accenture.scrapeAccenture(targetNumWeek)
# mygreatlearning.scrapeMygreatlearning(targetNumWeek)
# kdnugget.scrapeKdnugget(targetNumWeek)
# analytic_vidhya.scrapeAnalytic_Vidhya(targetNumWeek)
# hubspot.scrapeHubspot(targetNumWeek)
# Feb/2024
# academy_binance.scrapeAcademyBinance(targetNumWeek)
# binance.scrapeBinance(targetNumWeek)
# chain_link.scrapeChainlink(targetNumWeek)
# vitalik.scrapeVitalik(targetNumWeek)
# theBlock.scrapeLatest(targetNumWeek)
# decrypt.scrapeNewsExplorer(targetNumWeek)
# nextrope.scrapeArticles(targetNumWeek)
# theBlock.startScrapeReport(targetNumWeek)
# applePodcast.startScrape(targetNumWeek)
# decrypt2.startScrape(targetNumWeek)
# coin68.startScrape(targetNumWeek)
# blockWork.startScrape(targetNumWeek)
# okx.startScrape(targetNumWeek)
# coinbase.scrapeArticles(targetNumWeek)
# coinbase1.startScrape(targetNumWeek) #web browser on
# Mar/2024
# snipd.startScrape(targetNumWeek)
hak_research.startScrape(targetNumWeek)
print('** Done **')
# start scraping
if __name__ == '__main__':
inputWeek = 1
if (len(sys.argv) > 1):
inputWeek = int(sys.argv[1])
webscrape(inputWeek)