# Organizing well levels

In [15]:
import pandas as pd
import os
import numpy as np
import geopandas as gpd

## Organizing coordinates

### Renaming columns

In [18]:
os.chdir('data')

In [59]:
os.listdir()
coords_fn = 'Koordinates+Elevation.xlsx'
coords_df = pd.read_excel(coords_fn)
coords_df = coords_df.iloc[:,:-1]

col_names_list = ['WellID', 'CaseHeight', 'E', 'N', 'Type']
names_dict = {}
for i in range(len(col_names_list)):
    names_dict [coords_df.iloc[:,i].name] = col_names_list[i]

coords_df = coords_df.rename(columns = names_dict)
coords_df.head()

Unnamed: 0,WellID,CaseHeight,E,N,Type
0,G1,119.49,5424455.69,5648391.1,
1,G2,118.83,5424474.0,5648390.0,no existing well
2,G3,118.73,5424491.0,5648392.0,no existing well
3,G4,119.19,5424509.75,5648392.55,
4,G4 neu,120.09,,,


### Droping NA and assuming well type

In [67]:
coords_df.Type.loc[coords_df.Type != 'no existing well'] = 'Well'
coords_df = coords_df.dropna()
coords_df.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coords_df.Type.loc[coords_df.Type != 'no existing well'] = 'Well'


Unnamed: 0,WellID,CaseHeight,E,N,Type
0,G1,119.49,5424455.69,5648391.1,Well
1,G2,118.83,5424474.0,5648390.0,no existing well
2,G3,118.73,5424491.0,5648392.0,no existing well
3,G4,119.19,5424509.75,5648392.55,Well
5,G5,118.933,5424457.0,5648402.0,no existing well


## Getting water levels

In [72]:
os.chdir('Groundwater levels')
os.listdir()

['GW Pirna_ab2019-Hourly_1Zoll+5Zoll.xlsx',
 'GW Pirna_ab300115bis300117_hourly_1Zoll.xlsx']

In [83]:
gwl_fn = os.listdir()[0]
gwlG_df = pd.read_excel(gwl_fn, sheet_name = 'G-Messstellen')
gwlGWM_df = pd.read_excel(gwl_fn, sheet_name = 'GWM-Messstellen')

In [79]:
gwlG_df.head()

Unnamed: 0.1,Unnamed: 0,MST,Unnamed: 2,G1,G1_GW temp,G4,G4_GW temp,G10,G10_GW temp,G11,...,G15,G15_GW temp,G17,G17_GW temp,G19,G19_GW temp,G21,G21_GW temp,G23,G23_GW temp
0,NaT,Easting [m],NaT,5424455.688,,5424511.0,,5424501.985,,5424540.557,...,5424561.758,,5424523.812,,5424490.301,,5424453.021,,5424440.135,
1,NaT,Northing [m],NaT,5648391.095,,5648391.0,,5648417.502,,5648379.253,...,5648363.805,,5648354.494,,5648346.331,,5648335.981,,5648430.76,
2,NaT,Top level (m.a.s.l.),NaT,119.478,,119.18,,119.277,,116.939,...,116.076,,115.709,,115.527,,115.564,,119.06,
3,NaT,Top level (m.b.g.s.),NaT,0.0,,0.0,,0.0,,0.0,...,0.0,,0.0,,0.0,,0.0,,0.0,
4,2019-04-09,03:30:00,2019-04-09 03:30:00,9.089,12.8,,,8.865,13.0,,...,5.698,12.1,5.338,12.0,5.161,12.2,,,8.647,12.9


In [87]:
cols_S

Index(['Unnamed: 0', 'MST', 'Unnamed: 2', 'G1', 'G1_GW temp', 'G4',
       'G4_GW temp', 'G10', 'G10_GW temp', 'G11', 'G11_GW temp', 'G12',
       'G12_GW temp', 'G13', 'G13_GW temp', 'G15', 'G15_GW temp', 'G17',
       'G17_GW temp', 'G19', 'G19_GW temp', 'G21', 'G21_GW temp', 'G23',
       'G23_GW temp'],
      dtype='object')

In [151]:
#renaming it
cols_list = list(gwlG_df.columns)
gwlG_df2 = gwlG_df.iloc[4:,:]
cols_list[0] = 'Date'
cols_list[2] = 'Time'
gwlG_df2.columns = cols_list
gwlG_df2 = gwlG_df2.drop('MST', axis=1)

#wide to long
gwlG_df3= pd.melt(gwlG_df2, id_vars = ['Date', 'Time'], value_vars = gwlG_df2.columns[2:])

#creating a column with well ID
gwlG_df3['WellID'] = gwlG_df3.variable.str[:3]

#Editting variable column
#syntax: df[“column_name”] = np.where(df[“column_name”]==”some_value”, value_if_true, value_if_false)
gwlG_df3['variable'] = np.where(gwlG_df3['variable'].str.len() > 5, 'Temperature', 'GWL')

#transforming strings in factors
labels, uniques = pd.factorize(gwlG_df3['variable'])
gwlG_df3['variable'] = labels

#renaming and reorganizing columns
gwlG_df3 = gwlG_df3.rename(columns = {
    'variable': 'Variable',
    'value' : 'Value'
})
gwlG_df3 = gwlG_df3[['WellID', 'Time', 'Variable', 'Value']]

#exporting csv
gwlG_df3.to_csv('GWL_2019.csv', index = False)
gwlG_df3.head()

Unnamed: 0,WellID,Time,Variable,Value
0,G1,2019-04-09 03:30:00,0,9.089
1,G1,2019-04-09 04:30:00,0,9.092
2,G1,2019-04-09 05:30:00,0,9.094
3,G1,2019-04-09 06:30:00,0,9.097
4,G1,2019-04-09 07:30:00,0,9.1


In [165]:
#save explanation of variable
with open('readme.txt', '+w') as f:
    for i, j in enumerate(uniques):
        f.write('Code of Variable below:\n')
        f.write(str(i) + ':' + str(j))
    

In [132]:

dir(pd.factorize(gwlG_df3['variable']))

['__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'count',
 'index']