In [1]:
import numpy as np
import pandas as pd

In [2]:
# Setup the data paths
hydroportal_files_paths = ['Discharge_3_hour_mean_inflow.csv',
                           'Lake_Height.csv',
                           'PercentFull_Active_Lake_Storage.csv',
                           'Snow_Volume_Opuha_Catchment.csv',
                           'Turbidity_Buoy.csv',
                           'Turbidity_Platform.csv',
                           'Water_Temp_Buoy.csv',
                           'Water_Temp_Platform.csv']

cliflo_files = 'Cliflo_39255_Fairlie_Env_Data.csv'

In [15]:
# Load the data and process the column name etc

dataframe_list = []

for file in hydroportal_files_paths:
    df = pd.read_csv(file, skiprows=1,index_col=0)
    df.index = pd.to_datetime(df.index)
    dataframe_list.append(df)

# Merge the dataframes in the list into a single dataframe

merged_hydro_portal_data = pd.concat(dataframe_list, axis=1, join='outer')

merged_hydro_portal_data.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 782231 entries, 1981-04-02 00:00:00 to 2024-06-10 12:30:00
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   Value (m^3/s)  28349 non-null   float64
 1   Value (m)      699906 non-null  float64
 2   Value (%)      691883 non-null  float64
 3   Value (Mm^3)   15766 non-null   float64
 4   Value (NTU)    217615 non-null  float64
 5   Value (NTU)    213917 non-null  float64
 6   Value (°C)     197546 non-null  float64
 7   Value (°C)     214817 non-null  float64
dtypes: float64(8)
memory usage: 53.7 MB


In [17]:
# Rename the columns of the dataframe to make it more readable

new_column_names = [
    'Discharge_(m^3/s)',
    'Lake_Height_(m)',
    'PercentFull_Active_Lake_Storage_(%)',
    'Snow_Volume_Opuha_Catchment_(mm)',
    'Turbidity_Buoy_(NTU)',
    'Turbidity_Platform_(NTU)',
    'Water_Temp_Buoy_(degC)',
    'Water_Temp_Platform_(degC)']

rename_dic = dict(zip(merged_hydro_portal_data.columns, new_column_names))

# Rename the columns of the dataframe

merged_hydro_portal_data.rename(columns=rename_dic, inplace=True)

In [40]:
# Rading the cliflo data

cliflo_data = pd.read_csv(cliflo_files,skiprows=10)

# drop the last few rows
cliflo_data=cliflo_data[:-6]

# Drop the first column
cliflo_data = cliflo_data.drop(cliflo_data.columns[0], axis=1)

In [41]:
# Convert 'Day(Local_Date)' to datetime and set it as the index
cliflo_data['Day(Local_Date)'] = pd.to_datetime(cliflo_data['Day(Local_Date)'], format='%Y%m%d:%H%M')
cliflo_data.set_index('Day(Local_Date)', inplace=True)
# Rename the index
cliflo_data.index.rename('Timestamp (UTC+12:00)', inplace=True)



In [43]:
# Merge the two dataframes

Final_Ground_Data = pd.concat([merged_hydro_portal_data, cliflo_data], axis=1, join='outer')

In [47]:
Final_Ground_Data.shape

(782231, 47)

In [48]:
Final_Ground_Data.to_csv('Final_Ground_Data.csv')