# Exercise: Have a look at the electrophysiology data using Pandas

In [37]:
import pandas as pd

# Set some Pandas options: maximum number of rows/columns it's going to display
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 100)

# Load electrophysiology data

In [10]:
df = pd.read_csv('../../data/QC_passed_2024-07-04_collected.csv', index_col=0)

## 1. How many rows/columns does the data set have?

In [11]:
df.shape

(827, 35)

## 2. Display the first 5 rows of the DataFrame

In [38]:
df.head()

Unnamed: 0,area,tissue_source,OP,patcher,patient_age,filename,slice,cell_ch,cell_ID,day,treatment,hrs_incubation,repatch,hrs_after_OP,Rs,Rin,resting_potential,max_spikes,Rheobase,AP_heigth,TH,max_depol,max_repol,membra_time_constant_tau,capacitance,comments,rheo_ramp,AP_halfwidth,Rheobse_ramp,Unnamed: 27,rheos_ramp,comment,Unnamed: 33,high K concentration,RMP_from_char
0,temporal,Bielefeld,OP230420,Verji,13,23420003.abf,S1,1,23420S1c1,D1,TTX,0.0,no,10.416389,6.675643,39.025301,-74.285889,24,200.0,80.749512,-35.27832,336.181641,-60.791016,19.4,510.601767,0,753.380113,1.151009,,,,,,8 mM,-61.828554
1,temporal,Bielefeld,OP230420,Verji,13,23420003.abf,S1,3,23420S1c3,D1,TTX,0.0,no,10.416389,7.867174,48.728367,-69.573975,26,300.0,78.448486,-32.043457,350.097656,-67.138672,17.3,393.397918,1,585.102837,1.006321,,,,,,8 mM,-60.460298
2,temporal,Bielefeld,OP230420,Verji,13,23420003.abf,S1,6,23420S1c6,D1,TTX,0.0,no,10.416389,8.820134,35.971082,-54.956055,22,300.0,76.660156,-29.827881,270.629883,-52.246094,14.85,426.098774,3,173.915797,1.266335,,,,,,8 mM,-59.615979
3,temporal,Bielefeld,OP230420,Verji,13,23420003.abf,S1,7,23420S1c7,D1,TTX,0.0,yes,10.416389,7.269195,39.186101,-69.268799,24,300.0,75.030518,-29.699707,242.553711,-71.411133,17.15,478.273362,4,598.079936,0.994396,,,,,,8 mM,-61.173839
4,temporal,Bielefeld,OP230420,Verji,13,23420003.abf,S1,8,23420S1c8,D1,TTX,0.0,yes,10.416389,6.0004,31.599917,-70.550537,22,350.0,81.011963,-33.068848,309.448242,-61.401367,16.65,575.513924,5,786.927898,1.18283,,,,,,8 mM,-60.95635


## 3. Display the names and dtypes of all the columns

In [13]:
df.dtypes

area                         object
tissue_source                object
OP                           object
patcher                      object
patient_age                   int64
filename                     object
slice                        object
cell_ch                       int64
cell_ID                      object
day                          object
treatment                    object
hrs_incubation              float64
repatch                      object
hrs_after_OP                float64
Rs                          float64
Rin                         float64
resting_potential           float64
max_spikes                    int64
Rheobase                    float64
AP_heigth                   float64
TH                          float64
max_depol                   float64
max_repol                   float64
membra_time_constant_tau    float64
capacitance                 float64
comments                     object
rheo_ramp                   float64
AP_halfwidth                

## 4. Display the unique values of the `high K concentration` and of the `treatment` columns

In [14]:
df['high K concentration'].unique()

array(['8 mM', '15 mM'], dtype=object)

In [25]:
df['treatment'].unique()

array(['TTX', 'high K', 'Ctrl', 'wash in high K'], dtype=object)

## 5. Display the main statistics of the `max_spikes` column

In [18]:
df['max_spikes'].describe()

count     827.000000
mean       27.920193
std        57.997378
min         0.000000
25%        19.000000
50%        26.000000
75%        33.000000
max      1664.000000
Name: max_spikes, dtype: float64

## 6. Show all the rows where the max number of spikes is larger than 50

In [26]:
df.loc[df['max_spikes'] > 50]

Unnamed: 0,area,tissue_source,OP,patcher,patient_age,filename,slice,cell_ch,cell_ID,day,treatment,hrs_incubation,repatch,hrs_after_OP,Rs,Rin,resting_potential,max_spikes,Rheobase,AP_heigth,TH,max_depol,max_repol,membra_time_constant_tau,capacitance,comments,rheo_ramp,AP_halfwidth,Rheobse_ramp,Unnamed: 27,rheos_ramp,comment,Unnamed: 33,high K concentration,RMP_from_char
131,temporal,Bielefeld,OP231130,Verji,39,23n30003.abf,S1,1,23n30S1c1,D1,Ctrl,0.0,yes,11.992778,15.627081,58.666581,-78.060913,55,300.0,88.775635,-41.577148,352.294922,-103.515625,13.8,329.350619,0,,0.811086,351.581719,,,,,8 mM,-71.584465
138,temporal,Bielefeld,OP231130,Verji,39,23n30037.abf,S1_D2,2,23n30S1_D2c2,D2,wash in high K,21.0,no,32.699444,7.426442,65.804793,-67.544556,61,250.0,77.716064,-36.505127,246.459961,-74.34082,12.5,200.293398,8; exclude; Rs_end > 30,,0.950158,322.090736,,,,,8 mM,-59.579331
401,temporal,Hamburg,OP230808,Verji,14,23808003.abf,S1,6,23808S1c6,D1,TTX,0.0,no,8.163333,10.394754,106.082649,-83.300781,1664,-300.0,71.081543,-15.057373,237.426758,-69.702148,13.05,98.940861,5,,1.025354,,,,,,8 mM,-32.684415
483,temporal,Bielefeld,OP230209,Verji,63,23209012.abf,S2,5,23209S2c5,D1,high K,0.0,no,-2.874722,8.52573,81.231493,-69.049072,53,100.0,82.275391,-34.912109,365.478516,-98.266602,13.05,141.831642,8,192.024601,0.713136,,,,,,8 mM,-58.246034
579,temporal,Mitte,OP230810,Verji,63,23810004.abf,S1,5,23810S1c5,D1,high K,0.0,yes,5.660833,25.468412,79.043216,-65.155029,55,100.0,85.491943,-43.07251,389.770508,-80.078125,11.45,241.592788,3,224.5875,0.966593,,,,,,8 mM,-60.860893
581,temporal,Mitte,OP230810,Verji,63,23810004.abf,S1,7,23810S1c7,D1,high K,0.0,yes,5.660833,26.75653,74.709503,-64.855957,53,150.0,90.942383,-42.932129,423.950195,-83.007812,9.1,239.316854,5,307.5403,0.965371,,,,,,8 mM,-61.513494
582,temporal,Mitte,OP230810,Verji,63,23810004.abf,S1,8,23810S1c8,D1,high K,0.0,no,5.660833,18.023665,63.532613,-61.413574,55,200.0,84.509277,-39.605713,339.84375,-77.392578,7.1,146.691551,6,199.5067,1.043352,,,,,,8 mM,-62.291177
795,temporal,Virchow,OP230314,Verji,12,23314003.abf,S1,8,23314S1c8,D1,high K,0.0,no,5.940833,22.054204,97.59613,-67.358398,52,100.0,79.431152,-41.333008,325.073242,-111.572266,13.3,186.40479,5,,0.776089,,,201.505075,,,8 mM,-61.035575
889,temporal,Hamburg,OP240117,Verji,55,24118004.abf,S2_D2,4,24117S2_D2c4,D1,Ctrl,20.0,no,26.4242,17.188385,139.095453,-76.916504,61,100.0,78.436279,-40.686035,316.040039,-95.092773,19.8,201.367598,3,,0.740537,295.539851,,,,,8 mM,-59.561161
890,temporal,Hamburg,OP240117,Verji,55,24118004.abf,S2_D2,5,24117S2_D2c5,D1,Ctrl,20.0,no,26.4242,27.929918,140.091217,-70.422363,56,100.0,82.684326,-44.421387,325.561523,-96.923828,18.85,226.172391,4,,0.769121,207.0069,,,,,8 mM,-60.495223


## 7. Do the same of part 6, for the rows where `high K concentration` is `8 mM` and `15 mM`

Is the distribution any different?

In [31]:
df.loc[df['high K concentration'] == '8 mM', 'max_spikes'].describe()

count     474.000000
mean       30.955696
std        75.960740
min         1.000000
25%        21.000000
50%        27.000000
75%        34.000000
max      1664.000000
Name: max_spikes, dtype: float64

In [30]:
df.loc[df['high K concentration'] == '15 mM', 'max_spikes'].describe()

count    353.000000
mean      23.844193
std       10.519791
min        0.000000
25%       18.000000
50%       24.000000
75%       31.000000
max       48.000000
Name: max_spikes, dtype: float64

## 8. Display the statistics of `max_spikes` when `high K concentration` is `8 mM`, and the maximum number of spikes is <= 100

Does that change your conclusion?

In [32]:
df.loc[(df['high K concentration'] == '8 mM') & (df['max_spikes'] <= 100), 'max_spikes'].describe()

count    473.000000
mean      27.503171
std       10.965493
min        1.000000
25%       21.000000
50%       27.000000
75%       34.000000
max       61.000000
Name: max_spikes, dtype: float64

## 9. Add a new column with an "anonymized" patcher name

Add a new column, `patcher_id`, composed of the first and last uppercase letters of the patcher's name.

Display all the unique `patcher_id` values

In [49]:
patcher_upper = df['patcher'].str.upper()
df['patcher_id'] = patcher_upper.str[0] + patcher_upper.str[-1]

In [50]:
df['patcher_id'].unique()

array(['VI', 'RE'], dtype=object)