# AERO 356-03 Lab 2 Data Processing

This notebook was created to preprocess lab data for data extractor consumption. All files will need to have column names:

Solar panel data will be formatted:

- current_pre: current before arcing (Amps)
- voltage_pre: voltage before arcing (Volts)
- current_post: current after arcing (Amps)
- voltage_post: voltage after arcing (Volts)

Arcing data will be formatted:

- pressure: pressure of vacuum chamber (Torr)
- voltage: recorded voltage when arcing occured (kV)
- current: recorded current when arcing occured (mA)

In [1]:
# external imports
import pandas as pd
import os

# create file path
base_dir = '../data/'
dirty_dir = os.path.join(base_dir, 'original')

# show data in dirty data directory
file_names = os.listdir(dirty_dir)
file_names

['Al.csv',
 'C.csv',
 'ABCFO_pre.csv',
 'CBCFO.csv',
 'ABCFI_pre.csv',
 'SS.csv',
 'CBCFI_BeforeTest.txt',
 'CBCFI_AfterTest.txt',
 'ABCFO_post.csv',
 'arcing_group4.csv',
 'ABCFI_post.csv']

### Create new directory for clean data

In [2]:
# create directory for panel data and arcing data
panelData = os.path.join(base_dir, 'panelData')
# os.mkdir(panelData)

arcingData = os.path.join(base_dir, 'arcingData')
# os.mkdir(arcingData)

## Process data from group 4

CBCFO: Cathode Biased - cell facing out

SS2D: Stainless Steel 2 D

In [3]:
# create file paths
group4_panel_csv = os.path.join(dirty_dir, 'CBCFO.csv')
group4_arcing_csv = os.path.join(dirty_dir, 'arcing_group4.csv')

# load in data
group4_panel = pd.read_csv(group4_panel_csv)
group4_arcing = pd.read_csv(group4_arcing_csv)

print(group4_panel.head())
print('-' * 60)
print(group4_arcing.head())

   Voltage [V]  Current [A]  Unnamed: 2  Voltage [V].1  Current [A].1
0        0.520         0.01         NaN          0.525           0.01
1        0.518         0.06         NaN          0.518           0.06
2        0.502         0.11         NaN          0.510           0.11
3        0.487         0.16         NaN          0.496           0.16
4        0.477         0.21         NaN          0.488           0.21
------------------------------------------------------------
   Pressure Goal [Torr]  Recorded Pressure [Torr]  Voltage of Arc [kV]  \
0                  3.00                      2.80                0.880   
1                  2.00                      2.00                0.839   
2                  1.20                      1.20                0.788   
3                  0.60                      0.61                0.663   
4                  0.14                      0.14                0.960   

   Current [mA]  
0           120  
1             5  
2             1  
3 

In [4]:
# clean panel data and export
group4_panel.dropna(axis=1, inplace=True)

# split data
group4_pre = group4_panel.iloc[:,[0,1]].copy()
group4_post = group4_panel.iloc[:,[2,3]].copy()

# rename columns
group4_pre.columns = ['voltage', 'current']
group4_post.columns = ['voltage', 'current']

#create path for saving
CBCFO_pre = os.path.join(panelData, "CBCFO_pre.csv")
CBCFO_post = os.path.join(panelData, "CBCFO_post.csv")

group4_pre.to_csv(CBCFO_pre, index=False)
group4_post.to_csv(CBCFO_post, index=False)

In [5]:
# clean arcing data and export
group4_arcing.drop(group4_arcing.columns[0], axis=1, inplace = True)
group4_arcing.columns = ['pressure', 'voltage', 'current']

# create path for saving
SS2D = os.path.join(arcingData, 'SS2D.csv')
group4_arcing.to_csv(SS2D, index = False)

## Process data from group 2

In [6]:
# create file paths
ABFO_pre = os.path.join(dirty_dir, 'ABCFO_pre.csv')
ABFO_post = os.path.join(dirty_dir, 'ABCFO_post.csv')
C_arcing = os.path.join(dirty_dir, 'C.csv')

# load in data
ABFO_pre = pd.read_csv(ABFO_pre)
ABFO_post = pd.read_csv(ABFO_post)
C_arcing = pd.read_csv(C_arcing)

print(ABFO_pre.head())
print('-'*60)
print(ABFO_post.head())
print('-'*60)
print(C_arcing.head())

   Current (A)  Voltage (V)  Power (W)
0         0.01        0.526    0.00526
1         0.02        0.523    0.01046
2         0.03        0.517    0.01551
3         0.04        0.518    0.02072
4         0.06        0.516    0.03096
------------------------------------------------------------
   Current (A)  Voltage (V)  Power (W)
0         0.01        0.527    0.00527
1         0.02        0.526    0.01052
2         0.03        0.526    0.01578
3         0.04        0.526    0.02104
4         0.06        0.528    0.03168
------------------------------------------------------------
   Torr-Inch  Voltage (kV)  Initial Torr  Actual Torr  Current (mA)
0       3.00         0.986          5.90         6.00           0.0
1       2.00         0.760          4.00         4.00         120.0
2       1.20         0.660          2.37         2.40          42.0
3       0.60         0.584          1.19         1.20          34.0
4       0.14         0.623          0.28         0.31          12.2


In [7]:
# clean data and assign appropriate columns

# pull relevant columns
ABFO_pre = ABFO_pre.iloc[:, [0,1]]
ABFO_post = ABFO_post.iloc[:, [0,1]]

# assign new names
col_names = ['current', 'voltage']
ABFO_pre.columns = col_names
ABFO_post.columns = col_names

# pull relevant columns
C_arcing_cut = C_arcing.iloc[:,[1,2,4]]

# assign new names
C_arcing_cut.columns = ['volage', 'pressure', 'current']
C_arcing_cut.head()

Unnamed: 0,volage,pressure,current
0,0.986,5.9,0.0
1,0.76,4.0,120.0
2,0.66,2.37,42.0
3,0.584,1.19,34.0
4,0.623,0.28,12.2


In [8]:
# create file paths
ABFO_pre_path = os.path.join(panelData, 'ABCFO_pre.csv')
ABFO_post_path = os.path.join(panelData, 'ABCFO_post.csv')
C_arcing_path = os.path.join(arcingData, 'C.csv')

# export data
ABFO_pre.to_csv(ABFO_pre_path, index=False)
ABFO_post.to_csv(ABFO_post_path, index=False)
C_arcing_cut.to_csv(C_arcing_path, index=False)

## Process data from group 3

In [9]:
# create file paths
ABCFI_pre_path = os.path.join(dirty_dir, "ABCFI_pre.csv")
ABCFI_post_path = os.path.join(dirty_dir, "ABCFI_post.csv")
SS_path = os.path.join(dirty_dir, "SS.csv")

# load in data
ABCFI_pre = pd.read_csv(ABCFI_pre_path)
ABCFI_post = pd.read_csv(ABCFI_post_path)
SS = pd.read_csv(SS_path)

# display data
print(ABCFI_pre.head())
print('-'*60)
print(ABCFI_post.head())
print('-'*60)
print(SS.head())

   Commanded Current  Cell Voltage (V)
0               0.00             0.520
1               0.02             0.528
2               0.03             0.532
3               0.05             0.515
4               0.10             0.505
------------------------------------------------------------
   Commanded Current  Cell Voltage (V)  Unnamed: 2  Unnamed: 3
0               0.00             0.518         NaN         NaN
1               0.02             0.522         NaN         NaN
2               0.03             0.519         NaN         NaN
3               0.05             0.518         NaN         NaN
4               0.10             0.502         NaN         NaN
------------------------------------------------------------
   Target (Torr-Inch)  Pressure (Torr)  Voltage (V)  Press-Gap (Torr-Inch)
0                3.00             4.00        885.0                 3540.0
1                2.00             2.40        746.0                 2124.0
2                1.20             1.10   

In [10]:
# clean data

# pull relevant columns and rename
ABCFI_pre.columns = col_names
ABCFI_post = ABCFI_post.iloc[:, [0,1]]
ABCFI_post.columns = col_names

SS = SS.iloc[:,[1,2]]
SS.columns = ['pressure', 'voltage']

# add target current column (no individual current specified)
SS['current'] = [100 for _ in range(len(SS))]

# turn V into kV
SS.voltage /= 1000

In [11]:
# creat file paths
ABCFI_pre_path = os.path.join(panelData, "ABCFI_pre.csv")
ABCFI_post_path = os.path.join(panelData, "ABCFI_post.csv")
SS_path = os.path.join(arcingData, "SS.csv")

# export data
ABCFI_pre.to_csv(ABCFI_pre_path, index=False)
ABCFI_post.to_csv(ABCFI_post_path, index=False)
SS.to_csv(SS_path, index=False)

## Process data from group 1

In [12]:
# creat file paths
CBCFI_pre_path = os.path.join(dirty_dir, "CBCFI_BeforeTest.txt")
CBCFI_post_path = os.path.join(dirty_dir, "CBCFI_AfterTest.txt")
Al_path = os.path.join(dirty_dir, "Al.csv")

# load in and displacy data
CBCFI_pre = pd.read_csv(CBCFI_pre_path, sep='\t')
CBCFI_post = pd.read_csv(CBCFI_post_path, sep='\t')
Al = pd.read_csv(Al_path)

# display data
print(CBCFI_pre.head())
print('-'*60)
print(CBCFI_post.head())
print('-'*60)
print(Al.head())

   Current[A]  Voltage[mV]
0        0.01          496
1        0.05          495
2        0.10          476
3        0.15          455
4        0.20          454
------------------------------------------------------------
   Current[A]  Voltage[mV]
0        0.01          508
1        0.05          503
2        0.10          487
3        0.15          477
4        0.20          469
------------------------------------------------------------
   pressure  voltage  current
0      3.00      6.0        0
1      2.10      4.2      697
2      1.50      3.0      464
3      1.25      2.4      470
4      0.80      1.6      675


In [13]:
# clean data and assign new column names (Al data is okay)
CBCFI_pre.columns = col_names
CBCFI_post.columns = col_names

# turn mV in V
CBCFI_pre.voltage /= 1000
CBCFI_post.voltage /= 1000

In [14]:
# create paths for export
CBCFI_pre_path = os.path.join(panelData, "CBCFI_pre.csv")
CBCFI_post_path = os.path.join(panelData, "CBCFI_post.csv")
Al_path = os.path.join(arcingData, "Al.csv")

# export data
CBCFI_pre.to_csv(CBCFI_pre_path, index=False)
CBCFI_post.to_csv(CBCFI_post_path, index=False)
Al.to_csv(Al_path, index=False)