In [1]:
import pandas as pd
import os
import numpy as np
import geopandas as gpd

# Getting water levels

## Creating functions

### V1. Mixed raw data and referenced to NN

In [230]:
#function to get excel data from inowas, convert to long data and generate outputs

#It works only for G wells
def DatabaseG (input_name, sheetname, output_name, code_explanation):
    gwl_fn = input_name
    gwlG_df = pd.read_excel(gwl_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gwlG_df2 = gwlG_df.iloc[4:,:]
    cols_list[0] = 'Date'
    cols_list[2] = 'Time'
    gwlG_df2.columns = cols_list
    gwlG_df2 = gwlG_df2.drop('MST', axis=1)

    #wide to long
    gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Date', 'Time'], value_vars = gwlG_df2.columns[2:])

    #creating a column with well ID
    gwlG_df3['WellID'] = gwlG_df3.variable.str.split('_', expand = True)[0]

    #Editting variable column
    gwlG_df3['variable'] = np.where(gwlG_df3['variable'].str.len() > 5, 'Temperature', 'GWL')

    #transforming strings in factors
    labels, uniques = pd.factorize(gwlG_df3['variable'])
    gwlG_df3['variable'] = labels

    #renaming and reorganizing columns
    gwlG_df3 = gwlG_df3.rename(columns = {
        'variable': 'Variable',
        'value' : 'Value'
    })
    gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Variable', 'Value']]

    #exporting csv
    gwlG_df3.to_csv(output_name, index = False)
    
    #save explanation of variable
    with open(code_explanation, '+w') as f:
        f.write('Code of Variable below:\n')
        for i, j in enumerate(uniques):
            f.write(str(i) + ':' + str(j) + '\n' )
        
    return (gwlG_df3)

def DatabaseGWM (input_name, sheetname, output_name):
    gwl_fn = input_name
    gwlG_df = pd.read_excel(gwl_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gwlG_df2 = gwlG_df.iloc[5:,:]
    cols_list[0] = 'Time'
    gwlG_df2.columns = cols_list

    #wide to long
    gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Time'], value_vars = gwlG_df2.columns[1:])

    # renaming and reorganizing columns
    gwlG_df3 = gwlG_df3.rename(columns = {
        'variable': 'WellID',
        'value': 'Value'
    })
    gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Value']]

    #exporting csv
    gwlG_df3.to_csv(output_name, index = False)
    return(gwlG_df3)

### V2. Referenced to NN only - 2022.11.21

In [3]:
os.chdir('Data/Groundwater levels')

In [393]:
def DatabaseGWM2019 (gw_fn,sheetname, output_name):
    
    gwlG_df = pd.read_excel(gw_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gwl_df = gwlG_df.iloc[4:,:] #removing the first rows
    cols_list[0] = 'Date'
    cols_list[2] = 'Time'
    cols_list[cols_list.index('Unnamed: 14')] = 'DateTime2'
    gwl_df.columns = cols_list
    gwl_df = gwl_df.drop(['MST', 'Date'],axis=1)
    if 'Unnamed: 20' in gwl_df.columns:
        gwl_df = gwl_df.drop(['Unnamed: 20'],axis=1)

    cols_list = list(gwl_df.columns)
    cols_list = [WellName.replace('GWM',"GWM0") for WellName in cols_list]
    cols_list[cols_list.index('G1')] = 'G01'
    cols_list[cols_list.index('G4')] = 'G04'
    gwl_df.columns = cols_list
    gwl_df['Time'] = pd.to_datetime(gwl_df['Time'])
    gwl_df['DateTime2'] = pd.to_datetime(gwl_df['DateTime2']).dt.floor('H')
    gwl1_df = gwl_df.iloc [:, : cols_list.index('G23')  +1]
    gwl2_df = gwl_df.iloc [:, cols_list.index('G23') + 1 : ].rename(columns = {'DateTime2' : 'Time'} )
    gwl1_df_ = pd.melt(gwl1_df, id_vars = ['Time'], value_vars = gwl1_df.columns[1:], var_name = 'WellName', value_name = 'Value')
    gwl2_df_ = pd.melt(gwl2_df, id_vars = ['Time'], value_vars = gwl2_df.columns[1:], var_name = 'WellName', value_name = 'Value')

    gwl_df = pd.concat ([gwl1_df_, gwl2_df_])
    gwl_df['Variable'] = 0
    sortedcols = ['Time', 'Variable', 'WellName' , 'Value']
    gwl_df = gwl_df[sortedcols]
    gwl_df = gwl_df.sort_values(by = sortedcols).reset_index(drop = True)
    
    gwl_df.to_csv(output_name, index = False)
    return(gwl_df)

In [392]:
def DatabaseGWM2015 (gw_fn,sheetname, output_name):
    gwl_df = gwlG_df.iloc[4:,:].reset_index (drop = True) #removing the first rows
    cols_list = list(gwlG_df.columns)
    cols_list[0] = 'Date'
    cols_list[2] = 'Time'
    gwl_df.columns = cols_list
    gwl_df = gwl_df.drop(['MST', 'Date'],axis=1)
    if 'Unnamed: 20' in gwl_df.columns:
        gwl_df = gwl_df.drop(['Unnamed: 20'],axis=1)

    gwl_df = gwl_df.iloc[:,:-2]
    cols_list = list(gwl_df.columns)
    cols_list = [WellName.replace('GWM',"GWM0") for WellName in cols_list]
    cols_list[cols_list.index('G1')] = 'G01'
    cols_list[cols_list.index('G4')] = 'G04'
    gwl_df.Time = gwl_df.Time.astype('str')
    gwl_df = gwl_df.drop (gwl_df[gwl_df.Time.str.startswith('00')].index)
    gwl_df.Time = pd.to_datetime(gwl_df.Time)

    gwl_df.columns = cols_list
    gwl_df = pd.melt(gwl_df, id_vars = ['Time'], value_vars = gwl_df.columns[1:], var_name = 'WellName', value_name = 'Value')

    gwl_df['Variable'] = 0
    sortedcols = ['Time', 'Variable', 'WellName' , 'Value']
    gwl_df = gwl_df[sortedcols]
    gwl_df = gwl_df.sort_values(by = sortedcols).reset_index(drop = True)
    
    gwl_df.to_csv(output_name, index = False)
    return(gwl_df)

In [342]:
#It works only for G wells
def DatabaseTempG (input_name, sheetname, output_name):
    gwl_fn = input_name
    gwlG_df = pd.read_excel(gwl_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gw_df2 = gwlG_df.iloc[4:,:]

    cols_list = list(gw_df2.columns)
    cols_list[0] = 'Date'
    cols_list[2] = 'Time'
    gw_df2.columns = cols_list
    gw_df2 = gw_df2.drop(['Date', 'MST'], axis=1)
    gw_df2 = gw_df2[[col for col in gw_df2 if len (col) >3]]


    cols_list = list(gw_df2.columns)
    gw_df2.columns = [col.split("_")[0] for col in cols_list]
    cols_list = list(gw_df2.columns)
    cols_list[cols_list.index('G1')] = 'G01'
    cols_list[cols_list.index('G4')] = 'G04'
    gw_df2.columns = cols_list

    gw_df2 = pd.melt (gw_df2, id_vars = ['Time'], value_vars = gw_df2.columns[1:], var_name = 'WellName', value_name = 'Value')
    
    gw_df2['Variable'] = 1
    
    sortedcols = ['Time', 'Variable', 'WellName' , 'Value']
    gw_df2 = gw_df2[sortedcols]
    gw_df2 = gw_df2.sort_values(by = sortedcols).reset_index(drop = True)
    
    return (gw_df2)

## Databases

In [None]:
os.chdir('D:/Repos/PirnaCaseStudy/Data/Groundwater levels')

In [343]:
Temp_2019_df = DatabaseTempG('GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
                             'G-Messstellen',
                             'Temp_2019.csv')

In [344]:
Temp_2015_df = DatabaseTempG('GW Pirna_ab300115bis300117_hourly_1Zoll.xlsx', 
                             'GW table_from 300115',
                             'Temp_2015.csv')

In [394]:
GWL_2019_df = DatabaseGWM2019('GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
                             'referenced to NN',
                             'GWL_2019.csv')

In [395]:
GWL_2015_df = DatabaseGWM2015('GW Pirna_ab300115bis300117_hourly_1Zoll.xlsx',
                              'referenced to NN_from300115',
                              'GWL_2015.csv')

### Concatenate databases

In [379]:
wells_df = pd.read_csv('Wells.csv')
wells_df.head()

Unnamed: 0,ID,Name,DrillID,CaseHeight,Diameter,FilterTop,FilterBase,Depth
0,0,G01,0,119.49,1,1.0,13.0,14.0
1,1,G04,3,119.19,1,1.0,13.0,14.0
2,2,G04neu,4,120.09,1,8.0,14.0,15.1
3,3,G10,6,119.29,1,1.0,13.0,14.0
4,4,G11,7,116.95,1,1.0,13.0,14.0


In [403]:
sortedcols = ['Time', 'Variable', 'WellName', 'Value']
database_df = pd.concat ([Temp_2015_df, Temp_2019_df,GWL_2019_df, GWL_2015_df])
database_df = database_df.merge( wells_df[['ID', 'Name']], left_on = 'WellName', right_on ='Name').reset_index(drop = True).drop(['WellName', 'Name'], axis = 1)
database_df = database_df.rename(columns= {'ID': 'WellID'})
database_df.to_csv('DiversMeasurements.csv', index = 'False')

In [408]:
for i in GWL_2015_df.WellName.unique():
    print(i)

G01
G04
G10
G11
G12
G13
G15
G17
G19
G21
G23


## Functions to convert readings to hydraulic heads

Here I'll create 2 lists of diver data to index the dataframe and calculate the heads. That is necessary because there is a different procedure to calculate the heads. I passed the procedure from the excel file to this python script.

In [397]:
os.chdir('D:/Repos/PirnaCaseStudy/Data/Databases')

Reading case elevation and merging it ont

In [481]:
WellID_df = pd.read_csv('WellID_edited.csv', encoding = 'utf-8')
WellID_df = WellID_df.iloc[:33,:] #cutting additional intel to compute coords in the future
WellID_df.CaseHeight = pd.array(WellID_df.CaseHeight,  dtype = pd.Float32Dtype() )

#merging and getting case height
df = database_df
df = pd.merge(WellID_df[['WellID', "CaseHeight"]] , database_df, on = 'WellID')
df.head()

Unnamed: 0,WellID,CaseHeight,Value,Variable,Date,Hour
0,G01,119.489998,,0,2015-01-30,12
1,G01,119.489998,,1,2015-01-30,12
2,G01,119.489998,,0,2015-01-30,13
3,G01,119.489998,,1,2015-01-30,13
4,G01,119.489998,,0,2015-01-30,14


#### Important

**The apply functions below will be always used to convert the diver data to the hydraulic heads before filling the database**

In [482]:
#defining lists of wells that will guide the type of apply function applied
Solinst_list = [well for well in database_df.WellID.unique() if 'W' not in well]
UIT_list = [well for well in database_df.WellID.unique() if 'W' in well]

#apply function with lambda to convert readings of Solinst to heads using conditionals
head1 = df.apply(lambda x: x['CaseHeight'] - x['Value'] # Use value as input
                     if x['WellID'] in Solinst_list
                         and x['Variable'] == 0
                     else x['Value'], axis = 1)
df['Head'] = head1 #create a new column

#apply function with lambda to convert readings of UIT to heads using conditionals
DiverDepth = 12.3
# elevation - DiverDepth + water column
head2 = df.apply(lambda x: x['CaseHeight'] - DiverDepth + x['head']  #use new head as input now. Not value anymore
                     if x['WellID'] in UIT_list 
                         and x['Variable'] == 0
                     else x['Value'], axis = 1)

df['Head'] = head2

database_df = df [['WellID', 'Date', 'Hour', 'Variable', 'Head']] #finally update database


database_df.head()

Unnamed: 0,WellID,Date,Hour,Variable,head
0,G01,2015-01-30,12,0,
1,G01,2015-01-30,12,1,
2,G01,2015-01-30,13,0,
3,G01,2015-01-30,13,1,
4,G01,2015-01-30,14,0,


In [484]:
database_df.head()

Unnamed: 0,WellID,Date,Hour,Variable,Head
0,G01,2015-01-30,12,0,
1,G01,2015-01-30,12,1,
2,G01,2015-01-30,13,0,
3,G01,2015-01-30,13,1,
4,G01,2015-01-30,14,0,


## Comparing results with excel

It worked!!

* **Problem:** they coordinates from the Koordinates+Elevation file are different from the coordinates present in the excel file. I have used the coords present in Koordinates+Elevation for this calculation.

In [487]:
database_df.loc [(database_df['WellID'] == 'GWM2') &
                 (database_df['Date'] == pd.to_datetime('2022-01-17').date()) ].tail(n=30)a

Unnamed: 0,WellID,Date,Hour,Variable,Head
945695,GWM2,2022-01-17,0,0,110.310003
945696,GWM2,2022-01-17,1,0,110.300003
945697,GWM2,2022-01-17,2,0,110.300003
945698,GWM2,2022-01-17,3,0,110.300003
945699,GWM2,2022-01-17,4,0,110.300003
945700,GWM2,2022-01-17,5,0,110.300003
945701,GWM2,2022-01-17,6,0,110.290003
945702,GWM2,2022-01-17,7,0,110.290003
945703,GWM2,2022-01-17,8,0,110.290003
945704,GWM2,2022-01-17,9,0,110.290003


In [488]:
database_df.to_csv('GWL.csv', index = False)