-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreadFile.py
More file actions
43 lines (32 loc) · 1.42 KB
/
readFile.py
File metadata and controls
43 lines (32 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import xlwings as xw
##Reads the specified file and sheet ,filtering the "unneeded data"
#Params :
# fName : String specifying the file's name
def readFile(fName : str) -> pd.DataFrame:
#dataset = pd.read_excel( fName ,skiprows=lambda x: x in [0, 1], usecols=lambda x: 'Unnamed' not in x,sheet_name = sName)
data = removeBorders(fName)
cols_to_use = ['Month','Group','AM','Client','Solution Portfolio',
'TOTAL Amount in Php', 'GP', "% GP"]
#Filters read excel file to a dataframe to only include wanted columns
data = data[cols_to_use]
#data.rename(columns={'TOTAL Amount in Php' : 'Revenue'}, inplace = True)
data['TOTAL Amount in Php'] = data['TOTAL Amount in Php'] / 1_000_000
data['GP'] = data['GP'] / 1_000_000
#strip spaces
df_obj = data.select_dtypes(['object'])
data[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
return data
##Removes variable amount of filler before header column and row
def removeBorders(fName : str):
wb = xw.Book(fName)
sheet = wb.sheets[0]
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value
##check which row has "Month"
# idx = df.loc[(df == 'Month').any(axis=1)].index.values
# if (not idx): idx = 0
# else: idx = idx[0] + 1
# #filter out rows above idx
# df = pd.read_excel(fName,header= idx)
idx = 0
return df