# Organizing well levels

In [1]:
import pandas as pd
import os
import numpy as np
import geopandas as gpd

## Organizing coordinates

### Renaming columns

In [2]:
os.chdir('data')

In [3]:
os.listdir()
coords_fn = 'Koordinates+Elevation.xlsx'
coords_df = pd.read_excel(coords_fn)
coords_df = coords_df.iloc[:,:-1]

col_names_list = ['WellID', 'CaseHeight', 'E', 'N', 'Type']
names_dict = {}
for i in range(len(col_names_list)):
    names_dict [coords_df.iloc[:,i].name] = col_names_list[i]

coords_df = coords_df.rename(columns = names_dict)
coords_df.head()

Unnamed: 0,WellID,CaseHeight,E,N,Type
0,G1,119.49,5424455.69,5648391.1,
1,G2,118.83,5424474.0,5648390.0,no existing well
2,G3,118.73,5424491.0,5648392.0,no existing well
3,G4,119.19,5424509.75,5648392.55,
4,G4 neu,120.09,,,


### Droping NA and assuming well type

In [4]:
coords_df.Type.loc[coords_df.Type != 'no existing well'] = 'Well'
coords_df = coords_df.dropna()
coords_df.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coords_df.Type.loc[coords_df.Type != 'no existing well'] = 'Well'


Unnamed: 0,WellID,CaseHeight,E,N,Type
0,G1,119.49,5424455.69,5648391.1,Well
1,G2,118.83,5424474.0,5648390.0,no existing well
2,G3,118.73,5424491.0,5648392.0,no existing well
3,G4,119.19,5424509.75,5648392.55,Well
5,G5,118.933,5424457.0,5648402.0,no existing well


## Getting water levels

In [44]:
#function to get excel data from inowas, convert to long data and generate outputs

#It works only for G wells
def DatabaseG (input_name, sheetname, output_name, code_explanation):
    gwl_fn = input_name
    gwlG_df = pd.read_excel(gwl_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gwlG_df2 = gwlG_df.iloc[4:,:]
    cols_list[0] = 'Date'
    cols_list[2] = 'Time'
    gwlG_df2.columns = cols_list
    gwlG_df2 = gwlG_df2.drop('MST', axis=1)

    #wide to long
    gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Date', 'Time'], value_vars = gwlG_df2.columns[2:])

    #creating a column with well ID
    gwlG_df3['WellID'] = gwlG_df3.variable.str.split('_')[0]

    #Editting variable column
    #syntax: df[“column_name”] = np.where(df[“column_name”]==”some_value”, value_if_true, value_if_false)
    gwlG_df3['variable'] = np.where(gwlG_df3['variable'].str.len() > 5, 'Temperature', 'GWL')

    #transforming strings in factors
    labels, uniques = pd.factorize(gwlG_df3['variable'])
    gwlG_df3['variable'] = labels

    #renaming and reorganizing columns
    gwlG_df3 = gwlG_df3.rename(columns = {
        'variable': 'Variable',
        'value' : 'Value'
    })
    gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Variable', 'Value']]

    #exporting csv
    gwlG_df3.to_csv(output_name, index = False)
    
    #save explanation of variable
    with open(code_explanation, '+w') as f:
        for i, j in enumerate(uniques):
            f.write('Code of Variable below:\n')
            f.write(str(i) + ':' + str(j))
        
    return (gwlG_df3)

def DatabaseGWM (input_name, sheetname, output_name):
    gwl_fn = input_name
    gwlG_df = pd.read_excel(gwl_fn, sheet_name = sheetname)
    cols_list = list(gwlG_df.columns)
    gwlG_df2 = gwlG_df.iloc[5:,:]
    cols_list[0] = 'Time'
    gwlG_df2.columns = cols_list

    #wide to long
    gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Time'], value_vars = gwlG_df2.columns[1:])

    # renaming and reorganizing columns
    gwlG_df3 = gwlG_df3.rename(columns = {
        'variable': 'WellID',
        'value': 'Value'
    })
    gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Value']]

    #exporting csv
    gwlG_df3.to_csv(output_name, index = False)
    return(gwlG_df3)

In [13]:
os.listdir()

['.ipynb_checkpoints',
 'GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx',
 'GW Pirna_ab300115bis300117_hourly_1Zoll.xlsx',
 'GWL_2019',
 'GWL_2019.csv',
 'readme.txt',
 'readme1.txt',
 '~$GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx']

In [32]:
GWLG_2019_df = DatabaseG('GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
                         'G-Messstellen',
                         'GWL_2019.csv',
                         'readme1.txt')

In [45]:
GWLG_2015_df = DatabaseG('GW Pirna_ab300115bis300117_hourly_1Zoll.xlsx', 
                         'GW table_from 300115',
                         'GWL_2015.csv',
                         'readme2.txt')

GWLG_2015_df.WellID.unique()

ValueError: Length of values (1) does not match length of index (385836)

In [34]:
GWLGWM_2019_df = DatabaseGWM('GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
                         'GWM-Messstellen',
                         'GWL_2019b.csv')

In [26]:
# GWL_2019_df = DatabaseGWM('GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
#                          'GWM-Messstellen',
#                          'GWL_2019b.csv')

Unnamed: 0,WellID,Time,Value
0,GWM-2,2019-04-09 01:00:00.950,
1,GWM-2,2019-04-09 02:00:00.955,
2,GWM-2,2019-04-09 03:00:00.960,
3,GWM-2,2019-04-09 04:00:00.965,
4,GWM-2,2019-04-09 05:00:00.965,


array(['G1', 'G1_', 'G4', 'G4_', 'G10', 'G11', 'G12', 'G13', 'G15', 'G17',
       'G19', 'G21', 'G23'], dtype=object)

In [41]:
GWLG_2015_df.loc[GWLG_2019_df.Variable == 0 & GWLG_2019_df.WellID == 'G10' ]

TypeError: Cannot perform 'rand_' with a dtyped [object] array and scalar of type [bool]

In [None]:
input_name = 'GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx', 
# 'GWM-Messstellen',
#                          'GWL_2019b.csv')

gwl_fn = input_name
gwlG_df = pd.read_Qexcel(gwl_fn, sheet_name = sheetname)
cols_list = list(gwlG_df.columns)
gwlG_df2 = gwlG_df.iloc[5:,:]
cols_list[0] = 'Time'
gwlG_df2.columns = cols_list

#wide to long
gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Time'], value_vars = gwlG_df2.columns[1:])

# renaming and reorganizing columns
gwlG_df3 = gwlG_df3.rename(columns = {
'variable': 'WellID',
'value': 'Value'
})
gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Value']]

#exporting csv
gwlG_df3.to_csv(output_name, index = False)
return(gwlG_df3)