-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_excel_file.py
59 lines (45 loc) · 1.93 KB
/
read_excel_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
from configparser import ConfigParser
#Loading config file
config=ConfigParser()
config.read('config.ini')
#Excel config
path=config['excel']['path']
sheet=config['excel']['sheetname']
table_start_row=config.getint('excel','table_start_row')
table1_columns = list(map(int, config.get('excel', 'table1_columns').split(',')))
#table2_columns = list(map(int, config.get('excel', 'table2_columns').split(',')))
def clean_column_name(column_name):
parts = column_name.split(' ',)
capitalized_parts = [part[0].upper()+part[1:] for part in parts]
return ''.join(capitalized_parts)
def read_tables_from_excel():
#read excel file and extract table
excel_full_df=pd.read_excel(path,sheet_name=sheet,header=None)
table_end_row=len(excel_full_df)-1 #determing end row by decrementing two from the dataframelength due to trend in data in excel
#extracting table1
table1=excel_full_df.iloc[table_start_row:table_end_row,table1_columns]
header_row = table1.iloc[0]
table1.columns = header_row
table1.columns = [clean_column_name(col) for col in table1.columns]
table1 = table1[1:]
return(table1)
#table1['FileSize(GB)']=table1['FileSize(GB)'].astype(float)
'''
#extracting table1 i.e. EU data
table1=excel_full_df.iloc[table_start_row:table_end_row,table1_columns]
header_row = table1.iloc[0]
table1.columns = header_row
table1.columns = [clean_column_name(col) for col in table1.columns]
table1 = table1[1:]
table1['FileSize(GB)']=table1['FileSize(GB)'].astype(float)
#extracting table2 i.e. NA data
table2=excel_full_df.iloc[table_start_row:table_end_row,table2_columns]
header_row = table2.iloc[0]
table2.columns = header_row
table2.columns = [clean_column_name(col) for col in table2.columns]
table2 = table2[1:]
table1['FileSize(GB)']=table1['FileSize(GB)'].astype(float)
return(table1,table2)
'''
read_tables_from_excel()