forked from yellekelyk/scrape-spec
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapeSpec.py
More file actions
40 lines (30 loc) · 897 Bytes
/
scrapeSpec.py
File metadata and controls
40 lines (30 loc) · 897 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import BeautifulSoup
import Spec2006Data
import Spec2000Data
import Spec1995Data
import SpecDataElem
import urllib
import sys
import utils
import pdb
def main():
if len(sys.argv) < 4:
print "Usage: scrapeSpec url SpecData SpecDataElem\n"
sys.exit(0)
html = urllib.urlopen(sys.argv[1]).read()
html = filter(utils.onlyascii, html)
#f = open("./test.cint2000.html")
#html = f.read()
#f.close()
soup = BeautifulSoup.BeautifulSoup(html)
mod = __import__(sys.argv[3])
elem = mod.__dict__.get(mod.__name__)
mod = __import__(sys.argv[2])
data = mod.__dict__.get(mod.__name__)(soup, elem=elem)
for name in data.getNames():
f = open(str("./test."+ name.replace(" ", "_").replace(":","") + ".csv"),
'w')
f.write(data.getTable(name).toString())
f.close()
if __name__ == "__main__":
main()