# Series and DataFrames

In [1]:
import numpy as np
import pandas as pd
import os 
# current working dir
cwd = os.getcwd()

## Pandas Series

In [4]:
np.random.seed(0)
N=12
data = np.random.rand(N)
print(data)

prefix = 'Sub'
index = []
for n in np.arange(N):
    if n<10:
        index.append(prefix + str(0) + str(n))
    else:
        index.append(prefix + str(n))
print(index)

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492]
['Sub00', 'Sub01', 'Sub02', 'Sub03', 'Sub04', 'Sub05', 'Sub06', 'Sub07', 'Sub08', 'Sub09', 'Sub10', 'Sub11']


In [5]:
s = pd.Series(data,index = index) # dtype = 'str' will make dtype:object instead of float64
print(s)

Sub00    0.548814
Sub01    0.715189
Sub02    0.602763
Sub03    0.544883
Sub04    0.423655
Sub05    0.645894
Sub06    0.437587
Sub07    0.891773
Sub08    0.963663
Sub09    0.383442
Sub10    0.791725
Sub11    0.528895
dtype: float64


In [20]:
# by index
print(s.Sub00)
print(s['Sub00'])
print(s[0])

0.5488135039273248
0.5488135039273248
0.5488135039273248


In [10]:
# if a label exists
print('Sub11' in s)
print('Sub111' in s)

True
False


In [18]:
# just labels or just values
print(s.index)
print(s.values)

Index(['Sub00', 'Sub01', 'Sub02', 'Sub03', 'Sub04', 'Sub05', 'Sub06', 'Sub07',
       'Sub08', 'Sub09', 'Sub10', 'Sub11'],
      dtype='object')
[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492]


### Changing the source (data) will change the view (s)

In [21]:
N =4
data = np.arange(N)
index = ['d1','d2','d3','d4']
s = pd.Series(data, index = index)
s

d1    0
d2    1
d3    2
d4    3
dtype: int64

In [22]:
data[0]=1000
s

d1    1000
d2       1
d3       2
d4       3
dtype: int64

In [23]:
s[0]=99
s

d1    99
d2     1
d3     2
d4     3
dtype: int64

### To remove this effect... use copy=True, so that changing the source (data) does not affect the view (s)

In [28]:
N =4
data = np.arange(N)
index = ['d1','d2','d3','d4']
s = pd.Series(data, index = index, copy = True)
data[0]=10000
s

d1    0
d2    1
d3    2
d4    3
dtype: int64

In [31]:
# np.cumprod(s)
c = np.cumsum(s)
c
# pandas will keep the labels intact but numpy will mess up the labels

d1    0
d2    1
d3    3
d4    6
dtype: int64

In [32]:
s.mean()
# s.diff()

1.5

In [35]:
N = 4
data0 = np.arange(N)
index0 = ['s1','s2','s3','s4']
s0 = pd.Series(data=data0,index=index0)

data1 = np.arange(N)
index1 = ['s4','s1','s3','s2']
s1 = pd.Series(data=data1,index=index1)

print(s0+s1) # will do addition based on labels not on order

s1    1
s2    4
s3    4
s4    3
dtype: int64


## DataFrames

In [38]:
from pandas import DataFrame, read_csv

In [41]:
np.random.seed(0)
nl=['n1','n2','n3','n4']

min_resp = 0
max_resp = 90

r1 = np.random.randint(min_resp, max_resp, len(nl))
r2 = np.random.randint(min_resp, max_resp, len(nl))

In [42]:
r1,r2

(array([44, 47, 64, 67]), array([67,  9, 83, 21]))

In [45]:
nd = list(zip(r1,r2))
# zipping corresponding elements into tuples
print(nd)

[(44, 67), (47, 9), (64, 83), (67, 21)]


In [46]:
df = pd.DataFrame(data = nd, index = nl, columns=['resp1','resp2'])
display(df)

Unnamed: 0,resp1,resp2
n1,44,67
n2,47,9
n3,64,83
n4,67,21


In [48]:
# first and last couple of cols and rows
df.head()

Unnamed: 0,resp1,resp2
n1,44,67
n2,47,9
n3,64,83
n4,67,21


In [49]:
df.describe()

Unnamed: 0,resp1,resp2
count,4.0,4.0
mean,55.5,45.0
std,11.676187,35.590261
min,44.0,9.0
25%,46.25,18.0
50%,55.5,44.0
75%,64.75,71.0
max,67.0,83.0


In [61]:
df.describe(percentiles = np.linspace(0,1,11))

Unnamed: 0,resp1,resp2
count,4.0,4.0
mean,55.5,45.0
std,11.676187,35.590261
min,44.0,9.0
0%,44.0,9.0
10%,44.9,12.6
20%,45.8,16.2
30%,46.7,19.8
40%,50.4,30.2
50%,55.5,44.0


In [62]:
df['resp2']

n1    67
n2     9
n3    83
n4    21
Name: resp2, dtype: int64

In [66]:
df['resp3'] = df.resp1 * df.resp2
display(df)

Unnamed: 0,resp1,resp2,resp3
n1,44,67,2948
n2,47,9,423
n3,64,83,5312
n4,67,21,1407


In [67]:
del df['resp3']
display(df)

Unnamed: 0,resp1,resp2
n1,44,67
n2,47,9
n3,64,83
n4,67,21


In [71]:
# name-based counting/indexing
df.loc['n1':'n2']

Unnamed: 0,resp1,resp2
n1,44,67
n2,47,9


In [72]:
#iloc: index location, normal integer indexing
df.iloc[0]

resp1    44
resp2    67
Name: n1, dtype: int64

In [73]:
df['resp1']

n1    44
n2    47
n3    64
n4    67
Name: resp1, dtype: int64

In [76]:
# mean based on axis
df.mean(axis=0) # all rows based on columns

resp1    55.5
resp2    45.0
dtype: float64

In [102]:
df = pd.read_csv(cwd + '/spike_rates.csv', index_col=0,header=0)
df

Unnamed: 0,resp1,resp2
Nrn0,44,9
Nrn1,47,83
Nrn2,64,21
Nrn3,67,36
Nrn4,67,87


In [109]:
df.to_csv(cwd + '/test.csv', index=True,header=True)

In [110]:
df = pd.read_csv(cwd + '/test.csv', index_col=0,header=0)

In [111]:
df

Unnamed: 0,resp1,resp2
Nrn0,44,9
Nrn1,47,83
Nrn2,64,21
Nrn3,67,36
Nrn4,67,87


In [None]:
# df.style.apply(some_functions)