CSSWENG_PROJECT/readFile.py at main · WZT006/CSSWENG_PROJECT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import xlwings as xw


##Reads the specified file and sheet ,filtering the "unneeded data"
#Params :
# fName : String specifying the file's name
def readFile(fName : str) -> pd.DataFrame:

    #dataset = pd.read_excel( fName ,skiprows=lambda x: x in [0, 1], usecols=lambda x: 'Unnamed' not in x,sheet_name = sName)

    data = removeBorders(fName)


    cols_to_use = ['Month','Group','AM','Client','Solution Portfolio',
                   'TOTAL Amount in Php', 'GP', "% GP"]
    #Filters read excel file to a dataframe to only include wanted columns
    data = data[cols_to_use]
    #data.rename(columns={'TOTAL Amount in Php' : 'Revenue'}, inplace = True)
    data['TOTAL Amount in Php'] = data['TOTAL Amount in Php'] / 1_000_000
    data['GP'] = data['GP'] / 1_000_000

    #strip spaces
    df_obj = data.select_dtypes(['object'])
    data[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
    return data


##Removes variable amount of filler before header column and row
def removeBorders(fName : str):
    wb = xw.Book(fName)
    sheet = wb.sheets[0]

    df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value
    ##check which row has "Month"
    # idx = df.loc[(df == 'Month').any(axis=1)].index.values
    # if (not idx): idx = 0
    # else: idx = idx[0] + 1

    # #filter out rows above idx
    # df = pd.read_excel(fName,header= idx)
    idx = 0
    return df