# Notebook for changing data format to deepTSF long multiseries file

In [35]:
import pandas as pd

def timescale_to_deepTSF_long_multi_zeros(csv, output):
    '''
    Function that takes as input a csv path of Timescale EPU data and converts them to deepTSF long format
    This function applies zeros (0) to every ID as a Timeseries ID
    params
    ------
    csv: str
        path of csv file
    
    output: str
        path of output csv file (user defined)

    returns
    -------
    None
    '''
    df = pd.read_csv(csv, parse_dates=['timestamp'])
    
    df = df.drop('point_type', axis=1)
    df.drop(columns=df.columns[0], axis=1, inplace=True)
    df.insert(3, 'Timeseries ID', 0)
    df.columns = ['Datetime', 'Value', 'ID', 'Timeseries ID']

    # Rescale Data
    datetime_threshold = pd.to_datetime('2017-05-31 00:00:00')
    for index, row in df.iterrows():
        if(row['Datetime'] >= datetime_threshold):
            df.at[index, 'Value'] = row['Value'] / 1000

    # Drop Duplicates
    df = df.drop_duplicates(subset=['ID', 'Datetime']).reset_index(drop=True)

    # Sort by Datetime
    df = df.sort_values(by=['ID', 'Datetime']).reset_index(drop=True)

    df.to_csv(output)


timescale_to_deepTSF_long_multi_zeros('../TIMESCALE_TO_CSV/6H.csv', '../TIMESCALE_TO_CSV/6H_long.csv')
timescale_to_deepTSF_long_multi_zeros('../TIMESCALE_TO_CSV/1H.csv', '../TIMESCALE_TO_CSV/1H_long.csv')

In [1]:
import pandas as pd

def csv_concat(csv1, csv2, output):

    df1 = pd.read_csv(csv1, parse_dates=['Datetime']).drop('Unnamed: 0', axis=1)
    df2 = pd.read_csv(csv2, parse_dates=['Datetime']).drop('Unnamed: 0', axis=1)
    
    df = pd.concat([df1, df2]).reset_index(drop = True)
    # Assign unique Timeseries ID for each unique ID
    unique_ids = df['ID'].unique()
    id_to_timeseries = {id_: idx for idx, id_ in enumerate(unique_ids)}

    # Apply the mapping to create unique Timeseries IDs
    df['Timeseries ID'] = df['ID'].map(id_to_timeseries)

    #df.groupby(by=['ID'], sort=['Datetime'])
    df = df.sort_values(by=['ID', 'Datetime'], ascending=True).reset_index(drop = True)

    df.to_csv(output)

csv_concat('../TIMESCALE_TO_CSV/1H_to_6H_long.csv', '../TIMESCALE_TO_CSV/6H_long.csv', '../TIMESCALE_TO_CSV/all_data_6H_long.csv')