In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
def append_csv_by_key_columns(original_csv, new_csv, key_columns, output_csv):
    """
    Reads two CSVs into pandas DataFrames and appends the values from the key columns of the new CSV 
    directly after the values of the original CSV.

    :param original_csv: Path to the original CSV file.
    :param new_csv: Path to the new CSV file.
    :param key_columns: List of columns that are used for appending.
    :param output_csv: Path where the appended CSV will be saved.
    """
    # Read the original and new CSV files
    original_df = pd.read_csv(original_csv)
    new_df = pd.read_csv(new_csv)

    # Validate key columns
    if not all(col in original_df.columns for col in key_columns):
        raise ValueError("One or more key columns are not present in the original CSV file.")
    if not all(col in new_df.columns for col in key_columns):
        raise ValueError("One or more key columns are not present in the new CSV file.")

    new_df_filtered = new_df[key_columns]
    # Filter the new DataFrame to keep only the key columns
    new_df_filtered = new_df[key_columns]

    # Append the new DataFrame to the original DataFrame
    appended_df = original_df.append(new_df_filtered, ignore_index=True)

    
    # Find the index where the epoch resets (starts again from 0)
    reset_index = appended_df[appended_df['epoch'] == min(original_df["epoch"].values)].index[1]  # Assumes there are at least two runs

    # Get the last epoch number of the first run
    last_epoch_first_run = appended_df.loc[reset_index - 1, 'epoch']

    # Increment the epochs of the second run
    appended_df.loc[reset_index:, 'epoch'] += last_epoch_first_run

    # Save the appended data to the output CSV
    appended_df.to_csv(output_csv, index=False)

    print(f"Data appended and saved to {output_csv}")

In [3]:
all_sessions = [
        "758519303","759189643","759660390","759666166","759872185",
        "760269100","761730740","762415169","763646681","761624763", 
        "761944562","762250376","760260459","760659782","761269197", 
        "763949859","764897534","765427689","766755831","767254594",
        "768807532","764704289","765193831","766502238","777496949", 
        "778374308","779152062","777914830","778864809","779650018",
        "826187862","826773996","827833392","826338612","826819032", 
        "828816509","829283315","823453391","824434038","825180479", 
        "826659257","827300090","828475005","829520904","832883243", 
        "833704570","834403597","836968429","837360280","838633305" 
        ]
dend_sessions = ['759666166', '759872185', '760269100', '761730740', 
                 '762415169', '763646681', '763949859', '764897534', 
                 '765427689', '766755831', '767254594', '768807532', 
                 '764704289', '765193831', '766502238', '777914830', 
                 '778864809', '779650018', '826187862', '826773996', 
                 '827833392', '826338612', '826819032', '828816509', 
                 '829283315', '823453391', '824434038', '825180479']

soma_sessions = ['758519303', '759189643', '759660390', '761624763', 
                 '761944562', '762250376', '760260459', '760659782', 
                 '761269197', '777496949', '778374308', '779152062', 
                 '826659257', '827300090', '828475005', '829520904', 
                 '832883243', '833704570', '834403597', '836968429', 
                 '837360280', '838633305']


In [7]:
#Read two csvs into pandas df where information needs to be stitched
original_csv = "/home/mila/x/xuejing.pan/POYO/results/cross_sess/loss/roi_embed_loss.csv"
new_csv = "/home/mila/x/xuejing.pan/POYO/results/cross_sess/loss/roi_embed_loss_cont.csv"
output_csv = "/home/mila/x/xuejing.pan/POYO/results/cross_sess/loss/roi_embed_loss_combined.csv"


In [5]:
def get_cols(all_sessions):
        key_columns = ["epoch"]
        val_col_names = []

        for sess in all_sessions:
                curr_name = "val/session_{}_accuracy_gabor_orientation".format(sess)
                key_columns.append(curr_name)

        return key_columns

In [6]:
key_columns = get_cols(all_sessions)
append_csv_by_key_columns(original_csv,new_csv,key_columns,output_csv)

Data appended and saved to /home/mila/x/xuejing.pan/POYO/results/cross_sess/val/roi_embed_vals_combined.csv


  appended_df = original_df.append(new_df_filtered, ignore_index=True)


In [8]:
#For train
key_columns = ["epoch", "train_loss"]
append_csv_by_key_columns(original_csv,new_csv,key_columns,output_csv)

Data appended and saved to /home/mila/x/xuejing.pan/POYO/results/cross_sess/loss/roi_embed_loss_combined.csv


  appended_df = original_df.append(new_df_filtered, ignore_index=True)
