In [1]:
# script to process the intermediate legacy files to get the R square values and a/b coeff
import os
import glob
import pandas as pd

# files are located in heatmap folder
intermediate_folder = r"Z:\\Arc_Intermediate_Files" # moved to z drive to change filename of few files without gain

In [2]:
def extract_date_gain(path):
    # get the date and gain setting from the filename

    basename = os.path.basename(path) # ie Arc_010713_gn0_r2.csv
    basename = os.path.splitext(basename)[0] # removes .csv
    baseparts = basename.split("_") # splits name into separate parts

    date = baseparts[1]
    gain = baseparts[2]
    return [date, gain]

In [3]:
#r2_csv = ex_r2
#r2_pd = pd.read_csv(r2_csv)
#r2_value = r2["x"][0]
#print(r2_value)

def extract_rsquare(rsquare_file):
    r2_pd = pd.read_csv(rsquare_file)
    r2_value = r2_pd["x"][0]
    return r2_value

def extract_coeffs(coeff_file):
    coeff_pd = pd.read_csv(coeff_file)
    a_coeff = coeff_pd["x"][0]
    b_coeff = coeff_pd["x"][1]
    return [a_coeff, b_coeff]


In [4]:
# get list of all csv files in intermediate_folder
all_r2_csvs = glob.glob(os.path.join(intermediate_folder, "*r2.csv"))

list_results = []

for path in all_r2_csvs:
    dategain = extract_date_gain(path)
    date = dategain[0]
    gain = dategain[1]
    
    r2_value = extract_rsquare(path)
    
    # build path to coeff file 
    coeff_path = path.replace("r2", "coeffs")
    
    coeffs = extract_coeffs(coeff_path)
    a_coeff = coeffs[0]
    b_coeff = coeffs[1]
    
    print("Date: {}  Gain Setting: {}  R2: {}, A: {}, B: {}".format(date, gain, r2_value, a_coeff, b_coeff))
    
    list_results.append([date, gain, r2_value, a_coeff, b_coeff])

Date: 010713  Gain Setting: gn0  R2: 0.999913452106, A: -0.700480071906, B: 0.911512370843
Date: 010813  Gain Setting: gn0  R2: 0.712038046822, A: 0.353666429071, B: 0.190055423761
Date: 011013  Gain Setting: gn0  R2: 0.175292257503, A: 2.31933396825, B: -1.0721551944
Date: 011314  Gain Setting: gn10  R2: 0.987127460968, A: -1.77156121034, B: 1.01214030496
Date: 011314  Gain Setting: gn1  R2: 0.881928349698, A: -2.19331177439, B: 1.77889434406
Date: 011714  Gain Setting: gn10  R2: 0.920366244266, A: 3.36516051977, B: 1.27952205282
Date: 011714  Gain Setting: gn1  R2: 0.853855159012, A: 1.05690933222, B: 1.32446472745
Date: 020513  Gain Setting: gn0  R2: 0.778968833036, A: -0.473173257346, B: 1.04349079808
Date: 020713  Gain Setting: gn0  R2: 0.00105487054216, A: 1.91086040612, B: 0.13669383583
Date: 021716  Gain Setting: gn100  R2: 0, A: 1.099160486, B: nan
Date: 021716  Gain Setting: gn10  R2: 0.505658622463, A: -2.72449087627, B: 1.00779712313
Date: 022414  Gain Setting: gn10  R2: 0.

In [5]:
print(list_results)

[['010713', 'gn0', 0.99991345210583493, -0.70048007190619899, 0.91151237084268588], ['010813', 'gn0', 0.712038046822288, 0.35366642907134099, 0.19005542376068105], ['011013', 'gn0', 0.17529225750318098, 2.3193339682549601, -1.07215519440455], ['011314', 'gn10', 0.98712746096777504, -1.7715612103444101, 1.01214030496408], ['011314', 'gn1', 0.88192834969768708, -2.19331177438904, 1.7788943440609], ['011714', 'gn10', 0.920366244266279, 3.3651605197736298, 1.2795220528165201], ['011714', 'gn1', 0.85385515901185305, 1.0569093322157701, 1.3244647274487999], ['020513', 'gn0', 0.77896883303571907, -0.47317325734557897, 1.0434907980823001], ['020713', 'gn0', 0.00105487054215861, 1.9108604061237202, 0.136693835829648], ['021716', 'gn100', 0, 1.0991604859999999, nan], ['021716', 'gn10', 0.50565862246311799, -2.7244908762666999, 1.0077971231349698], ['022414', 'gn10', 0.90136731706070405, 2.3873894655799104, 0.64014833263535109], ['022414', 'gn1', 0.95837962747373084, 1.4475523637499699, 0.7557552

In [6]:
# change to data frame

df = pd.DataFrame(list_results, columns=["Date", "Gain", "Rsquared", "A_coeff", "B_coeff"])

print(df.head())

     Date  Gain  Rsquared   A_coeff   B_coeff
0  010713   gn0  0.999913 -0.700480  0.911512
1  010813   gn0  0.712038  0.353666  0.190055
2  011013   gn0  0.175292  2.319334 -1.072155
3  011314  gn10  0.987127 -1.771561  1.012140
4  011314   gn1  0.881928 -2.193312  1.778894


In [7]:
# change date 

df['Date'] = pd.to_datetime(df['Date'])

print(df.head())

        Date  Gain  Rsquared   A_coeff   B_coeff
0 2013-01-07   gn0  0.999913 -0.700480  0.911512
1 2013-01-08   gn0  0.712038  0.353666  0.190055
2 2013-01-10   gn0  0.175292  2.319334 -1.072155
3 2014-01-13  gn10  0.987127 -1.771561  1.012140
4 2014-01-13   gn1  0.881928 -2.193312  1.778894


In [8]:
# save to csv
df.to_csv("legacy_coeffs_rsquared.csv", index=False)