-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy patheoddata.py
More file actions
479 lines (368 loc) · 15.4 KB
/
eoddata.py
File metadata and controls
479 lines (368 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
import os
import sys
import datetime
import sqlite3
import optparse
import csv
import glob
import ConfigParser
import tempfile
import fnmatch
import zipfile
import shutil
import ftplib
options = None
args = None
config = None
def extractSymbol(symbol, exchange):
"""
Extracts all the data for a given EXCHANGE/SYMBOL combination to the
standard output. The exchange parameter is needed to decide which table
to query.
"""
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, "Table")
con = sqlite3.connect(dbPath)
cursor = con.cursor()
cursor.execute('select timestamp, open, high, low, close, volume from ' + tableName + ' where symbol = \'' + symbol + '\' order by timestamp')
for row in cursor:
print( str(row[0]) + ',' + str(row[1]) + ',' + str(row[2]) + ',' + str(row[3]) + ',' + str(row[4]) + ',' + str(row[5]))
con.commit()
cursor.close()
def extractExchangeSymbols(exchange):
"""
Extracts all "interesting" symbols for an exchange. The interesting symbols
and the corresponding output files are read from the .ini file, more
specifically from the corresponding [EXCHANGE Symbols] section.
For example, if exchange == "COMEX" and the .ini file contains:
[COMEX]
Dir: comex
[COMEX Symbols]
GC: gc.csv
SI: si.csv
Then the function will extract the data for GC and SI symbols, storing the
output to comex/gc.csv and comex/si.csv files, respectively. As you can see
the output files are relative to the directory defined for the exchange.
"""
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, "Table")
dir = config.get(exchange, "Dir")
symbols = config.items(exchange + " Symbols")
con = sqlite3.connect(dbPath)
cursor = con.cursor()
for ss in symbols:
ff = open(os.path.join(dir, ss[1]), 'w')
cursor.execute('select timestamp, open, high, low, close, volume from ' + tableName + ' where symbol = \'' + ss[0] + '\' order by timestamp')
for row in cursor:
ff.write( str(row[0]) + ',' + str(row[1]) + ',' + str(row[2]) + ',' + str(row[3]) + ',' + str(row[4]) + ',' + str(row[5]) + '\n')
ff.close()
con.commit()
cursor.close()
def addExchangeFiles(exchange, filePaths):
"""
Add a list of files for a particular exchange into the database. The input
files are supposed to be in csv format.
"""
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, 'Table')
try:
removeSuffix = config.get(exchange, 'RemoveSuffix')
except ConfigParser.NoOptionError:
removeSuffix = ''
con = sqlite3.connect(dbPath)
cursor = con.cursor()
cursor.execute('create table if not exists ' + tableName + ' (symbol text, timestamp text, open float, high float, low float, close float, volume integer, constraint indexes_id primary key (symbol, timestamp))')
con.commit()
if options.replace:
cmd = 'insert or replace into ' + tableName + ' '
else:
cmd = 'insert or ignore into ' + tableName + ' '
for filePath in filePaths:
if options.verbose:
print 'Processing ' + filePath
reader = csv.reader(open(filePath), delimiter=',')
if options.header:
reader.next()
readDataLine = False
for row in reader:
if removeSuffix and row[0].endswith(removeSuffix):
symbol = row[0][:-len(removeSuffix)]
else:
symbol = row[0]
format = '%d-%b-%Y'
try:
date = datetime.datetime.strptime(row[1], format).date()
except ValueError:
format = '%Y%m%d'
# If we have already seen a line with data - break the program if the second
# parsing fails, otherwise assume we are cycling through header lines
if readDataLine:
date = datetime.datetime.strptime(row[1], format).date()
else:
try:
date = datetime.datetime.strptime(row[1], format).date()
except ValueError:
continue
readDataLine = True
cursor.execute(cmd + 'values(?, ?, ?, ?, ?, ?, ?)', (symbol, date, row[2], row[3], row[4], row[5], row[6]))
con.commit()
cursor.close()
def addExchangeZipFiles(exchange, filePaths):
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, "Table")
tempDir = tempfile.mkdtemp()
for filePath in filePaths:
if zipfile.is_zipfile(filePath):
zipFile = zipfile.ZipFile(filePath)
if options.verbose:
print "Unzipping " + filePath
zipFile.extractall(tempDir)
zipFile.close()
else:
if options.verbose:
print "\"" + filePath + "\" is not a zip file\n"
if options.verbose:
print "Sorting the file list"
dataFiles = [os.path.join(tempDir, oo) for oo in os.listdir(tempDir)]
dataFiles.sort()
addExchangeFiles(exchange, dataFiles)
if os.path.exists(tempDir):
shutil.rmtree(tempDir)
def buildExchange(exchange, filePaths):
"""
Re-creates the table for the specified exchange. The input files are
expected to be zip files, each of which contains multiple csv files
with historic data.
These files are available for download on www.eoddata.com once you
purchase "Historical Data" for a particular exchange.
"""
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, "Table")
con = sqlite3.connect(dbPath)
cursor = con.cursor()
if options.verbose:
print "Removing table " + tableName
cursor.execute('drop table if exists ' + tableName)
con.commit()
cursor.close()
tempDir = tempfile.mkdtemp()
for filePath in filePaths:
if zipfile.is_zipfile(filePath):
zipFile = zipfile.ZipFile(filePath)
if options.verbose:
print "Unzipping " + filePath
zipFile.extractall(tempDir)
zipFile.close()
else:
if options.verbose:
print "\"" + filePath + "\" is not a zip file\n"
if options.verbose:
print "Sorting the file list"
dataFiles = [os.path.join(tempDir, oo) for oo in os.listdir(tempDir)]
dataFiles.sort()
addExchangeFiles(exchange, dataFiles)
if os.path.exists(tempDir):
shutil.rmtree(tempDir)
def buildAllExchanges():
"""
Re-builds all the exchanges defined in the ini file. For each exchange
the function calls buildExchange with the proper settings from the ini
file.
"""
global config, options
for item in config.items("Exchanges"):
exchange = item[0]
dir = config.get(exchange, "Dir")
filePaths = []
for filePath in os.listdir(dir):
fullPath = os.path.join(dir, filePath)
if zipfile.is_zipfile(fullPath):
filePaths.append(fullPath)
buildExchange(exchange, filePaths)
def extractAllSymbols():
"""
Extracts all interesting symbols defined in the ini file.
"""
global config, options
for item in config.items("Exchanges"):
extractExchangeSymbols(item[0])
def downloadExchangeFiles(user, password, remotePath, localPath, since):
if os.path.exists(localPath):
shutil.rmtree(localPath)
os.mkdir(localPath)
ftp = ftplib.FTP("ftp.eoddata.com")
ftp.login(user, password)
if remotePath:
ftp.cwd(remotePath)
nlst = ftp.nlst()
fileStarts = [config.get(oo[0], 'FileStart') for oo in config.items("Exchanges")]
for file in nlst:
for fileStart in fileStarts:
if file.startswith(fileStart):
if since:
fileStartLen = len(fileStart)
dateStr = file[fileStartLen:(fileStartLen+8)]
if dateStr > since:
processFile = True
else:
processFile = False
else:
processFile = True
if processFile:
if options.verbose:
print "Downloading " + file + " to " + localPath
outfile = open(os.path.join(localPath, file), 'w')
ftp.retrbinary("RETR " + file, outfile.write)
outfile.close()
break
ftp.close()
def downloadDaily(user, password):
"""
Downloads the daily csv files from ftp.eoddata.com and stores them locally.
The daily files are retrieved from the root directory of the server and
are filtered by the exchange name. For example, if the NYMEX exchange is
in the ini, then all NYMEX_* files are downloaded from the ftp and stored
in the directory defined in the ini file.
"""
global config, options
dailyPath = config.get('General', 'DailyDir')
downloadExchangeFiles(user, password, '', dailyPath, '')
def downloadHistory(user, password, since):
global config, options
historyPath = config.get('General', 'HistoryDir')
downloadExchangeFiles(user, password, 'History', historyPath, since)
def addDaily():
global config, options
dailyPath = config.get('General', 'DailyDir')
exchanges = [oo[0] for oo in config.items("Exchanges")]
allFiles = os.listdir(dailyPath)
for exchange in exchanges:
filePaths = []
fileStart = config.get(exchange, 'FileStart')
for file in allFiles:
if file.startswith(fileStart):
filePaths.append(os.path.join(dailyPath, file))
addExchangeFiles(exchange, filePaths)
def addHistory():
global config, options
historyPath = config.get('General', 'HistoryDir')
exchanges = [oo[0] for oo in config.items("Exchanges")]
allFiles = os.listdir(historyPath)
for exchange in exchanges:
filePaths = []
fileStart = config.get(exchange, 'FileStart')
for file in allFiles:
if file.startswith(fileStart):
filePaths.append(os.path.join(historyPath, file))
addExchangeFiles(exchange, filePaths)
def verifyNames(exchange, symbolsFile):
global config, options
dbPath = config.get('General', 'DbPath')
tableName = config.get(exchange, "Table")
symbols = set()
with open(symbolsFile, 'r') as ff:
while True:
line = ff.readline()
if not line:
break
parts = line.split(None, 1)
if len(parts) > 0:
symbols.add(parts[0].upper())
con = sqlite3.connect(dbPath)
cursor = con.cursor()
cursor.execute('select distinct(symbol) from ' + tableName)
for row in cursor:
symbol = row[0]
if symbol.upper() not in symbols:
print symbol.upper(), ' not in the symbol list'
con.commit()
cursor.close()
def main(argv=None):
if argv is None:
argv = sys.argv
usage = "usage: %prog [options] arguments"
parser = optparse.OptionParser(usage)
parser.add_option("--add", action="store_true", dest="add", help="Add files")
parser.add_option("--add-zip", action="store_true", dest="add_zip", help="Add zip files")
parser.add_option("--extract", dest="extract", help="Extract SYMBOL", metavar="SYMBOL")
parser.add_option("--replace", action="store_true", dest="replace", help="Replace on conflict")
parser.add_option("--verbose", action="store_true", dest="verbose", help="Verbose")
parser.add_option("--cfg", dest="cfg", help="Config file path", default="eoddata.ini", metavar="CFGPATH")
parser.add_option("--exchange", dest="exchange", help="Exchange to work on", metavar="EXCHANGE")
parser.add_option("--extract-symbols", action="store_true", dest="extract_symbols", help="Extract all configured symbols")
parser.add_option("--header", action="store_true", dest="header", help="skip header")
parser.add_option("--user", dest="user", help="User name for the ftp connections", metavar="USER")
parser.add_option("--password", dest="password", help="Password for the ftp connections", metavar="PASSWORD")
parser.add_option("--download-daily", action="store_true", dest="download_daily", help="Download daily data")
parser.add_option("--add-daily", action="store_true", dest="add_daily", help="Add the daily data to the database")
parser.add_option("--download-history", action="store_true", dest="download_history", help="Download the short term history data")
parser.add_option("--since", dest="since", help="Don't consider dates earlier than this")
parser.add_option("--add-history", action="store_true", dest="add_history", help="Add the short term history data to the database")
parser.add_option("--build-exchange", dest="build_exchange", help="Build an exchange from scratch (from zip files)", metavar="EXCHANGE")
parser.add_option("--build-exchanges", action="store_true", dest="build_exchanges", help="Build all exchanges from scratch")
parser.add_option("--verify-names", dest="verify_names", help="Checks whether all symbols for an exchange are as in the list", metavar="FILE")
global options, args, config
(options, args) = parser.parse_args()
config = ConfigParser.ConfigParser()
config.optionxform = str # Makes the parser case sensitive
config.read(options.cfg)
if options.build_exchanges:
buildAllExchanges()
elif options.extract_symbols:
extractAllSymbols()
elif options.add_daily:
addDaily()
elif options.download_daily:
password = options.password
user = options.user
if not password or not user:
print "Invalid user/password combination"
else:
downloadDaily(user, password)
elif options.download_history:
password = options.password
user = options.user
if not password or not user:
print "Invalid user/password combination"
else:
downloadHistory(user, password, options.since)
elif options.add_history:
addHistory()
elif options.build_exchange:
filePaths = []
for filePattern in args:
for filePath in glob.glob(filePattern):
filePaths.append(filePath)
buildExchange(options.build_exchange, filePaths)
elif options.add:
if not options.exchange:
print "--add requires --exchange"
else:
filePaths = []
for filePattern in args:
for filePath in glob.glob(filePattern):
filePaths.append(filePath)
addExchangeFiles(options.exchange, filePaths)
elif options.add_zip:
filePaths = []
for filePattern in args:
for filePath in glob.glob(filePattern):
filePaths.append(filePath)
addExchangeZipFiles(options.exchange, filePaths)
elif options.extract:
if not options.exchange:
print "--extract requires --exchange"
else:
extractSymbol(options.extract, options.exchange)
elif options.verify_names:
if not options.exchange:
print "--verify-names requires --exchange"
else:
verifyNames(options.exchange, options.verify_names)
if __name__ == "__main__":
sys.exit(main())