In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Load Dataset
data = pd.read_csv('influx-data.csv')

In [3]:
# Filter out by C-state
cstate = 'C7'
treatment = data[data['C-state'] == cstate]

In [None]:
# find out the unique utid from each C-state
unique = treatment['utid'].unique()
for i in unique:
    print(i)

In [5]:
# Load functions
def plotter(df, plotter): # data frame, plotter (eg. ax1)
    plotter.plot(df['time'], df['value'])
    plotter.text(0.5, 0.5, f"Mean: {df['value'].mean()}", fontsize=12, ha='center', transform=plotter.transAxes)
    plotter.text(0.5, 0.4, f"Median: {df['value'].median()}", fontsize=12, ha='center', transform=plotter.transAxes)
    plotter.set_ylim(ylim)

def make_list(df): # data frame
    return list(zip(list(df['time']), list(df['value'])))

def integration(records):
    # Integration of Data
    t_intial = records[0][0]
    for i in range(len(records)): # list modification
        records[i][0] = records[i][0] - t_intial

    area = 0
    for i in range(len(records)-1):
        ts_i = records[i][0]
        ts_f = records[i+1][0]
        v_i = records[i][1]
        v_f = records[i+1][1]
        elapsed = ts_f - ts_i
        area += (v_i * elapsed + (v_f - v_i) * elapsed / 2) / records[-1][0] # trapezoidal rule
    return area


In [384]:
# Write CSV Header (only run once)
with open('influxdb.csv','w') as f:
    f.write('c-state,hrs,mean,med,25%,filtered,cpu\n')

# Data Visualization For Loop

In [None]:
limit = 33 # set a cutoff for filtered data
ylim = (20, 50) # adjust y-axis range
for i in unique:
    unit = treatment[treatment['utid'] == i]
    unit_mod = unit[unit['value']<limit]
    fig, (ax1, ax2, ax3) = plt.subplots(1,3)
    fig.set_size_inches(25,3)
    plotter(unit, ax1)
    plotter(unit_mod, ax2)
    ax3.hist(unit['value'], bins=10)

In [7]:
# break apart plotting and csv writing for faster csv writing
for i in unique:
    unit = treatment[treatment['utid'] == i]
    unit_mod = unit[unit['value']<limit]
    orig_record = list(map(list, make_list(unit)))
    mod_record = list(map(list, make_list(unit_mod)))
    # define variables
    hrs = unit['hrs'].mean().astype(int)
    mean = integration(orig_record)
    med = unit['value'].median()
    q25 = unit['value'].quantile(0.25)
    filtered = integration(mod_record)
    cpu = unit['cpu'].mean().round(2)
    with open('influxdb.csv','a') as f:
        f.write(f"{cstate},{hrs},{mean},{med},{q25},{filtered},{cpu}\n")