### Pandas basics

In [2]:
import pandas as pd
import numpy as np

In [2]:
pd.Series(np.random.randn(5))

0    0.667176
1    0.029158
2   -0.877311
3   -0.001431
4    0.074574
dtype: float64

In [7]:
d={'a':1, 'b':2, 'c':3}
d=pd.Series(d)
print(d)

a    1
b    2
c    3
dtype: int64


In [8]:
d.dtype

dtype('int64')

In [11]:
d={
    "one":pd.Series([1,2,3,4,5]),
    'two':pd.Series([5,6,7,8,0])
}

df=pd.DataFrame(d)
print(df)

   one  two
0    1    5
1    2    6
2    3    7
3    4    8
4    5    0


In [13]:
d={
    "one":[1,2,3,4],
    "two":[5,6,7,8]
}

df=pd.DataFrame(d)
print(df)

   one  two
0    1    5
1    2    6
2    3    7
3    4    8


In [14]:
df.columns

Index(['one', 'two'], dtype='object')

In [17]:
data2=[{"a":1,"b":2},{"a":5,"b":10,"c":20}]
data2=pd.DataFrame(data2)

In [18]:
data2[["a","b"]]

Unnamed: 0,a,b
0,1,2
1,5,10


In [19]:
ser=pd.Series(range(3),name="ser")
ser=pd.DataFrame(ser)
ser

Unnamed: 0,ser
0,0
1,1
2,2


In [21]:
long_series=pd.Series(np.random.randn(1000))
print(long_series.head())
print(long_series.tail())

0    0.162123
1   -1.964647
2   -0.343654
3   -0.274528
4   -0.965258
dtype: float64
995    1.649149
996    1.245903
997    0.480838
998    0.211389
999    0.391535
dtype: float64


In [22]:
df= pd.DataFrame(
    {
        "one":pd.Series(np.random.randn(3)),
        "two":pd.Series(np.random.randn(4)),
        "three":pd.Series(np.random.randn(3))
    }
)
print(df)

        one       two     three
0  0.127862  0.900103  1.027582
1  1.358931  0.567895 -0.047917
2  0.801439  0.444854 -0.098605
3       NaN -1.858040       NaN


In [23]:
df.iloc[1]

one      1.358931
two      0.567895
three   -0.047917
Name: 1, dtype: float64

In [24]:
df["two"]

0    0.900103
1    0.567895
2    0.444854
3   -1.858040
Name: two, dtype: float64

In [26]:
col=df['two']
df.sub(col, axis="index")

Unnamed: 0,one,two,three
0,-0.772242,0.0,0.127478
1,0.791036,0.0,-0.615812
2,0.356585,0.0,-0.54346
3,,0.0,


In [27]:
(df>0).all()

one      False
two      False
three    False
dtype: bool

In [28]:
df.empty

False

In [30]:
df.mean(1)

0    0.685182
1    0.626303
2    0.382563
3   -1.858040
dtype: float64

In [31]:
df.sum(0, skipna=True)

one      2.288232
two      0.054813
three    0.881060
dtype: float64

In [33]:
df.describe()

Unnamed: 0,one,two,three
count,3.0,4.0,3.0
mean,0.762744,0.013703,0.293687
std,0.616446,1.262557,0.636077
min,0.127862,-1.85804,-0.098605
25%,0.46465,-0.130869,-0.073261
50%,0.801439,0.506375,-0.047917
75%,1.080185,0.650947,0.489832
max,1.358931,0.900103,1.027582


In [34]:
data=np.random.randint(0,10,size=50)

In [35]:
s=pd.Series(data)
s.head()

0    8
1    5
2    0
3    5
4    3
dtype: int32

In [37]:
s.value_counts().sort_index()

0    4
1    3
2    3
3    7
4    4
5    8
6    4
7    2
8    6
9    9
Name: count, dtype: int64

In [38]:
s5=pd.Series([1,1,3,3,3,5,5,7,7,7])
s5.mode()

0    3
1    7
dtype: int64

In [40]:
df.apply(np.cumsum)

Unnamed: 0,one,two,three
0,0.127862,0.900103,1.027582
1,1.486793,1.467999,0.979665
2,2.288232,1.912853,0.88106
3,,0.054813,


In [41]:
df.apply("mean")

one      0.762744
two      0.013703
three    0.293687
dtype: float64

In [42]:
df.mean()

one      0.762744
two      0.013703
three    0.293687
dtype: float64

In [43]:
df.apply("mean", axis=1)

0    0.685182
1    0.626303
2    0.382563
3   -1.858040
dtype: float64

In [3]:
ser=pd.Series(range(5), index=list("abcde"))
print(ser)

a    0
b    1
c    2
d    3
e    4
dtype: int64


In [4]:
ser.loc[["a", 'b']]

a    0
b    1
dtype: int64

In [5]:
df=pd.DataFrame(np.arange(25).reshape(5,5), index=list("abcde"), columns=list("abcde"))
print(df)

    a   b   c   d   e
a   0   1   2   3   4
b   5   6   7   8   9
c  10  11  12  13  14
d  15  16  17  18  19
e  20  21  22  23  24


In [6]:
df.loc[["a","b","c"]]

Unnamed: 0,a,b,c,d,e
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14


In [7]:
df.loc[["a","b","c"],["b","d"]]

Unnamed: 0,b,d
a,1,3
b,6,8
c,11,13


In [8]:
dates=pd.date_range('1/1/2000', periods=8)
df= pd.DataFrame(np.random.randn(8,4),
                 index=dates, columns=['A', 'B','C', 'D'])
df

Unnamed: 0,A,B,C,D
2000-01-01,-0.369812,1.112446,0.077792,0.895914
2000-01-02,-1.038972,1.357381,-0.160671,0.098188
2000-01-03,0.037776,-1.982405,0.332962,-0.699513
2000-01-04,0.722556,1.58155,0.637607,0.048642
2000-01-05,-0.381097,1.019708,0.666692,2.136263
2000-01-06,0.452302,-0.412611,-0.224727,0.739594
2000-01-07,-0.925517,-0.436363,-0.437983,-0.275917
2000-01-08,-0.862351,1.609377,-1.65175,1.809911


In [9]:
df['A']

2000-01-01   -0.369812
2000-01-02   -1.038972
2000-01-03    0.037776
2000-01-04    0.722556
2000-01-05   -0.381097
2000-01-06    0.452302
2000-01-07   -0.925517
2000-01-08   -0.862351
Freq: D, Name: A, dtype: float64

In [10]:
df[["B","A"]]=df[["A","B"]]

In [11]:
df

Unnamed: 0,A,B,C,D
2000-01-01,1.112446,-0.369812,0.077792,0.895914
2000-01-02,1.357381,-1.038972,-0.160671,0.098188
2000-01-03,-1.982405,0.037776,0.332962,-0.699513
2000-01-04,1.58155,0.722556,0.637607,0.048642
2000-01-05,1.019708,-0.381097,0.666692,2.136263
2000-01-06,-0.412611,0.452302,-0.224727,0.739594
2000-01-07,-0.436363,-0.925517,-0.437983,-0.275917
2000-01-08,1.609377,-0.862351,-1.65175,1.809911


In [12]:
df[["A","B"]]

Unnamed: 0,A,B
2000-01-01,1.112446,-0.369812
2000-01-02,1.357381,-1.038972
2000-01-03,-1.982405,0.037776
2000-01-04,1.58155,0.722556
2000-01-05,1.019708,-0.381097
2000-01-06,-0.412611,0.452302
2000-01-07,-0.436363,-0.925517
2000-01-08,1.609377,-0.862351


In [13]:
df.loc[:,["B","A"]]=df[["A","B"]]
df[["A","B"]]

Unnamed: 0,A,B
2000-01-01,1.112446,-0.369812
2000-01-02,1.357381,-1.038972
2000-01-03,-1.982405,0.037776
2000-01-04,1.58155,0.722556
2000-01-05,1.019708,-0.381097
2000-01-06,-0.412611,0.452302
2000-01-07,-0.436363,-0.925517
2000-01-08,1.609377,-0.862351


In [14]:
df.loc[:,["B","A"]]=df[["A","B"]].to_numpy()
df[["A","B"]]

Unnamed: 0,A,B
2000-01-01,-0.369812,1.112446
2000-01-02,-1.038972,1.357381
2000-01-03,0.037776,-1.982405
2000-01-04,0.722556,1.58155
2000-01-05,-0.381097,1.019708
2000-01-06,0.452302,-0.412611
2000-01-07,-0.925517,-0.436363
2000-01-08,-0.862351,1.609377


In [17]:
df.iloc[:, [0,1]]

Unnamed: 0,A,B
2000-01-01,-0.369812,1.112446
2000-01-02,-1.038972,1.357381
2000-01-03,0.037776,-1.982405
2000-01-04,0.722556,1.58155
2000-01-05,-0.381097,1.019708
2000-01-06,0.452302,-0.412611
2000-01-07,-0.925517,-0.436363
2000-01-08,-0.862351,1.609377


In [18]:
df.iloc[:,[1,0]]=df[["A","B"]]
df[["A", "B"]]

Unnamed: 0,A,B
2000-01-01,1.112446,-0.369812
2000-01-02,1.357381,-1.038972
2000-01-03,-1.982405,0.037776
2000-01-04,1.58155,0.722556
2000-01-05,1.019708,-0.381097
2000-01-06,-0.412611,0.452302
2000-01-07,-0.436363,-0.925517
2000-01-08,1.609377,-0.862351


In [19]:
dfa=df.copy()

In [20]:
dfa.A

2000-01-01    1.112446
2000-01-02    1.357381
2000-01-03   -1.982405
2000-01-04    1.581550
2000-01-05    1.019708
2000-01-06   -0.412611
2000-01-07   -0.436363
2000-01-08    1.609377
Freq: D, Name: A, dtype: float64

In [22]:
x=pd.DataFrame({'x':[1,2,3],'y':[3,4,5]})

In [23]:
x.iloc[1]={'x':9, 'y':99}

In [24]:
x

Unnamed: 0,x,y
0,1,3
1,9,99
2,3,5


In [25]:
df1=pd.DataFrame(np.random.randn(5,4),
                 columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
0,0.724374,2.481409,-0.908904,-0.177923
1,-0.817959,-0.516817,0.56702,0.915322
2,0.189909,0.484638,-1.724778,-0.158015
3,2.560256,1.512477,-0.317522,0.374679
4,-1.172405,1.629811,-0.320174,0.663295


In [26]:
df1.loc[2:3]

Unnamed: 0,A,B,C,D
2,0.189909,0.484638,-1.724778,-0.158015
3,2.560256,1.512477,-0.317522,0.374679


In [27]:
df1.iloc[2:3]

Unnamed: 0,A,B,C,D
2,0.189909,0.484638,-1.724778,-0.158015


In [28]:
df1=pd.DataFrame(np.random.randn(6,4),
                 columns=list('ABCD'))

In [30]:
df1.loc[:,'A']

0   -0.935072
1    0.068191
2    0.800036
3   -0.398361
4   -0.039319
5    0.978752
Name: A, dtype: float64

In [31]:
df1.iloc[:,0]

0   -0.935072
1    0.068191
2    0.800036
3   -0.398361
4   -0.039319
5    0.978752
Name: A, dtype: float64

In [32]:
df1.loc[lambda df:df["A"]>0,:]

Unnamed: 0,A,B,C,D
1,0.068191,1.23315,-1.383937,-0.551224
2,0.800036,1.89147,-0.198137,-1.291126
5,0.978752,0.192075,-0.671211,1.535948


In [39]:
df1.iloc[:,lambda df:[0,1]]

Unnamed: 0,A,B
0,-0.935072,1.792251
1,0.068191,1.23315
2,0.800036,1.89147
3,-0.398361,0.382846
4,-0.039319,-0.518009
5,0.978752,0.192075


In [40]:
df1['A'].loc[lambda s:s>0]

1    0.068191
2    0.800036
5    0.978752
Name: A, dtype: float64

In [41]:
dfi=pd.DataFrame(np.random.randn(6).reshape(3,2),
                 columns=['A','B'])
dfi

Unnamed: 0,A,B
0,1.201326,0.157173
1,-0.975199,-0.498893
2,-0.345524,-0.643859


In [42]:
dfi.loc[:,'C']=dfi.loc[:,"A"]

In [43]:
dfi

Unnamed: 0,A,B,C
0,1.201326,0.157173,1.201326
1,-0.975199,-0.498893,-0.975199
2,-0.345524,-0.643859,-0.345524


In [44]:
dfi['D']=dfi['B']
dfi

Unnamed: 0,A,B,C,D
0,1.201326,0.157173,1.201326,0.157173
1,-0.975199,-0.498893,-0.975199,-0.498893
2,-0.345524,-0.643859,-0.345524,-0.643859


In [45]:
df1[df1['A']>0]

Unnamed: 0,A,B,C,D
1,0.068191,1.23315,-1.383937,-0.551224
2,0.800036,1.89147,-0.198137,-1.291126
5,0.978752,0.192075,-0.671211,1.535948


In [46]:
df.where(df<0)

Unnamed: 0,A,B,C,D
2000-01-01,,-0.369812,,
2000-01-02,,-1.038972,-0.160671,
2000-01-03,-1.982405,,,-0.699513
2000-01-04,,,,
2000-01-05,,-0.381097,,
2000-01-06,-0.412611,,-0.224727,
2000-01-07,-0.436363,-0.925517,-0.437983,-0.275917
2000-01-08,,-0.862351,-1.65175,


In [47]:
df.where(df<0, -df)

Unnamed: 0,A,B,C,D
2000-01-01,-1.112446,-0.369812,-0.077792,-0.895914
2000-01-02,-1.357381,-1.038972,-0.160671,-0.098188
2000-01-03,-1.982405,-0.037776,-0.332962,-0.699513
2000-01-04,-1.58155,-0.722556,-0.637607,-0.048642
2000-01-05,-1.019708,-0.381097,-0.666692,-2.136263
2000-01-06,-0.412611,-0.452302,-0.224727,-0.739594
2000-01-07,-0.436363,-0.925517,-0.437983,-0.275917
2000-01-08,-1.609377,-0.862351,-1.65175,-1.809911


In [48]:
df2=df.copy()

In [50]:
df2[df2[1:4]>0]=3
df2

Unnamed: 0,A,B,C,D
2000-01-01,1.112446,-0.369812,0.077792,0.895914
2000-01-02,3.0,-1.038972,-0.160671,3.0
2000-01-03,-1.982405,3.0,3.0,-0.699513
2000-01-04,3.0,3.0,3.0,3.0
2000-01-05,1.019708,-0.381097,0.666692,2.136263
2000-01-06,-0.412611,0.452302,-0.224727,0.739594
2000-01-07,-0.436363,-0.925517,-0.437983,-0.275917
2000-01-08,1.609377,-0.862351,-1.65175,1.809911


In [None]:
df=pd.DataFrame({'a':list