-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreading_excel_files.py
More file actions
81 lines (62 loc) · 2.4 KB
/
reading_excel_files.py
File metadata and controls
81 lines (62 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples
Please see the test function for the expected return format
"""
import xlrd
from zipfile import ZipFile
datafile = "2013_ERCOT_Hourly_Load_Data.xls"
def open_zip(datafile):
with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
myzip.extractall()
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
### example on how you can get the data
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
### other useful methods:
# print "\nROWS, COLUMNS, and CELLS:"
# print "Number of rows in the sheet:",
# print sheet.nrows
# print "Type of data in cell (row 3, col 2):",
# print sheet.cell_type(3, 2)
# print "Value in cell (row 3, col 2):",
# print sheet.cell_value(3, 2)
# print "Get a slice of values in column 3, from rows 1-3:"
# print sheet.col_values(3, start_rowx=1, end_rowx=4)
# print "\nDATES:"
# print "Type of data in cell (row 1, col 0):",
# print sheet.cell_type(1, 0)
# exceltime = sheet.cell_value(1, 0)
# print "Time in Excel format:",
# print exceltime
# print "Convert time to a Python datetime tuple, from the Excel float:",
# print xlrd.xldate_as_tuple(exceltime, 0)
data = {
'maxtime': (0, 0, 0, 0, 0, 0),
'maxvalue': 0,
'mintime': (0, 0, 0, 0, 0, 0),
'minvalue': 0,
'avgcoast': 0
}
coasts = sheet.col_values(1, start_rowx=1)
data['maxvalue'] = max(coasts)
data['minvalue'] = min(coasts)
data['avgcoast'] = sum(coasts)/len(coasts)
max_index = coasts.index(data['maxvalue'])
min_index = coasts.index(data['minvalue'])
max_time = sheet.cell_value(max_index+1, 0)
min_time = sheet.cell_value(min_index+1, 0)
data['maxtime'] = xlrd.xldate_as_tuple(max_time, 0)
data['mintime'] = xlrd.xldate_as_tuple(min_time, 0)
return data
def test():
open_zip(datafile)
data = parse_file(datafile)
# assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
# assert round(data['maxvalue'], 10) == round(18779.02551, 10)
test()