-
Notifications
You must be signed in to change notification settings - Fork 0
/
DATA_GAP_ANALYSIS_OUPUT_READER_ReadAllParams.py
140 lines (116 loc) · 5.05 KB
/
DATA_GAP_ANALYSIS_OUPUT_READER_ReadAllParams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 29 14:05:16 2020
@author: bcubrich
"""
#%%
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 14 13:21:10 2018
@author: bcubrich
"""
import pandas as pd
import numpy as np
import seaborn as sns
from tkinter import Tk
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
import matplotlib.pyplot as plt
import os
#import xlrd
#import wx
#The following function is just used to get filepaths
#I usually just run it once to get the path, and then leave this
#fucntion so that I can get othe rpaths if needed
def get_dat():
root = Tk()
root.withdraw()
root.focus_force()
root.attributes("-topmost", True) #makes the dialog appear on top
filename = askdirectory() # Open single file
return filename
#def onButton2(event):
#
#app = wx.App()
#
#frame = wx.Frame(None, -1, 'win.py')
#frame.SetDimensions(0,0,200,50)
#
## Create open file dialog
#openFileDialog = wx.DirDialog(frame, "Choose folder to save output to", "",
# wx.DD_DEFAULT_STYLE | wx.DD_DIR_MUST_EXIST)
#
#openFileDialog.ShowModal()
#print(openFileDialog.GetPath())
#
## outfile_path is the string with the path name saved as a variable
#outfile_path = openFileDialog.GetPath()+'\\'
#openFileDialog.Destroy()
#
#del app
sites=r'U:/PLAN/BCUBRICH/Python/Parameter Reader/'\
r'PARAMETERS.xls'
sites_df=pd.read_excel(sites, converters={'SITE NAME':str,'State Code':str,
'County Code':str, 'Site Code':str,
'Paramter':str, 'Analyt':str,
'Method':str, 'Unit':str}) # load data
sites_df['Analyt']=sites_df['Analyt'].str.strip('()') #strip parentheses from
directory=get_dat()
columns_raw=r'Transaction Type|Action Indicator|Assessment Type|Performing '\
r'Agency|State Code / Tribal Indicator|County Code / Tribal Code|Site '\
r'Number|Parameter Code|POC|Assessment Date|Assessment Number|Monitor '\
r'Method Code|Reported Unit|Level 1 Monitor Concentration|Level 1 '\
r'Assessment Concentration|Level 2 Monitor Concentration|Level 2 '\
r'Assessment Concentration|Level 3 Monitor Concentration|Level 3 '\
r'Assessment Concentration|Level 4 Monitor Concentration|Level 4 '\
r'Assessment Concentration|Level 5 Monitor Concentration|Level 5 '\
r'Assessment Concentration|Level 6 Monitor Concentration|Level 6 '\
r'Assessment Concentration|Level 7 Monitor Concentration|Level 7 '\
r'Assessment Concentration|Level 8 Monitor Concentration|Level 8 '\
r'Assessment Concentration|Level 9 Monitor Concentration|Level 9 '\
r'Assessment Concentration|Level 10 Monitor Concentration|Level '\
r'10 Assessment Concentration'
print(os.listdir(directory))
#%%
columns=columns_raw.split('|')
text_all=''
output_df=pd.DataFrame(columns=columns)
count=0
for filename in os.listdir(directory):
if filename.endswith(".xls") or filename.endswith(".csv"):
with open(directory+'/'+filename) as f:
text=f.read()
if '11/30/2019 22.0 - 1/1/2020' not in text.split('\n')[3] and '11/29/2019 0.0 - 12/31/2019' not in text.split('\n')[3]:
text_all+=text
#%%
final_missing_data=''
old_site=''
for line in text_all.split('\n'):
if '1/1/2020' not in line:
if '11/29/2019 0.0 - 12/31/2019' not in line:
if '1st' not in line:
if 'Last' not in line:
if '/' in line:
final_missing_data+=line
final_missing_data+='\n'
else:
site = line[0:11]
if old_site != site:
old_site=site
final_missing_data+='\n\n'
final_missing_data+='---------------------------------------------'
final_missing_data+='\n'
final_missing_data+=old_site
final_missing_data+='\n'
final_missing_data+='---------------------------------------------'
final_missing_data+='\n'
final_missing_data+='--------------------------------\n'
final_missing_data+=line[12:20]
if '88101-3' in line: final_missing_data += ' ****Continuous PM25*****'
if '88101-4' in line: final_missing_data += ' ****Continuous PM25*****'
if '88101-5' in line: final_missing_data += ' ****Continuous PM25*****'
final_missing_data+='\n'
#%%
text_file = open(directory+'/'+"summary_of_year_end_review_with_solids.txt", "wt")
n = text_file.write(final_missing_data)
text_file.close()