In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

Creating a Series by passing a list of values, letting pandas create a default integer index

In [3]:
seri = pd.Series([1,2,3,np.nan,45])
seri

0     1.0
1     2.0
2     3.0
3     NaN
4    45.0
dtype: float64

Creating a DataFrameby passing a numpy array, with a datetime index and labeled columns:

In [11]:
dates = pd.date_range("20210101", periods=10)
dates

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10'],
              dtype='datetime64[ns]', freq='D')

In [16]:
df = pd.DataFrame(np.random.randn(10,4), index=dates, columns=["a","b","c","d"]) #list('abcd') possible
df

Unnamed: 0,a,b,c,d
2021-01-01,-0.248532,-0.633809,0.365227,-2.241089
2021-01-02,-1.047581,1.161637,-0.803006,-2.093864
2021-01-03,-1.151396,0.575674,-0.723401,-0.487148
2021-01-04,-0.628977,1.176879,-0.412652,2.204962
2021-01-05,1.115872,1.940716,0.584026,0.259215
2021-01-06,0.944919,-1.035663,-0.510705,-1.078126
2021-01-07,-0.581449,1.246238,-0.510814,-1.517736
2021-01-08,1.77243,-0.427322,0.726918,-0.831456
2021-01-09,0.601997,-0.600356,0.700874,-0.725331
2021-01-10,1.728482,0.321951,1.031182,1.13289


Creating a DataFrame by passing a dict of objects that can be converted to series­like

In [24]:
df2 = pd.DataFrame({
    "a" : 12,
    "b" : pd.Timestamp("20201201"),
    "c" : pd.Series(1, index=list(range(4)), dtype=float ),
    "d" : np.array([3] * 4, dtype="int32"),
    "e" : pd.Categorical(["test", "train"]*2),
    "f" : "foo"
})
df2

Unnamed: 0,a,b,c,d,e,f
0,12,2020-12-01,1.0,3,test,foo
1,12,2020-12-01,1.0,3,train,foo
2,12,2020-12-01,1.0,3,test,foo
3,12,2020-12-01,1.0,3,train,foo


In [25]:
df2.dtypes

a             int64
b    datetime64[ns]
c           float64
d             int32
e          category
f            object
dtype: object

In [28]:
df2.b

0   2020-12-01
1   2020-12-01
2   2020-12-01
3   2020-12-01
Name: b, dtype: datetime64[ns]

In [31]:
df.index

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10'],
              dtype='datetime64[ns]', freq='D')

In [32]:
df.columns

Index(['a', 'b', 'c', 'd'], dtype='object')

In [33]:
df.values

array([[-0.24853179, -0.63380923,  0.36522685, -2.24108911],
       [-1.04758114,  1.16163672, -0.80300609, -2.0938644 ],
       [-1.15139608,  0.57567368, -0.72340056, -0.4871477 ],
       [-0.62897748,  1.17687869, -0.41265173,  2.20496219],
       [ 1.11587208,  1.94071591,  0.58402557,  0.25921499],
       [ 0.94491853, -1.03566334, -0.51070505, -1.07812627],
       [-0.58144915,  1.24623828, -0.51081397, -1.51773565],
       [ 1.77242963, -0.42732231,  0.72691761, -0.8314562 ],
       [ 0.60199719, -0.60035598,  0.70087449, -0.72533091],
       [ 1.72848153,  0.32195097,  1.03118161,  1.1328903 ]])

In [34]:
df.describe()

Unnamed: 0,a,b,c,d
count,10.0,10.0,10.0,10.0
mean,0.250576,0.372594,0.044765,-0.537768
std,1.116558,1.006538,0.699013,1.400314
min,-1.151396,-1.035663,-0.803006,-2.241089
25%,-0.617095,-0.557098,-0.510787,-1.407833
50%,0.176733,0.448812,-0.023712,-0.778394
75%,1.073134,1.173068,0.671662,0.072624
max,1.77243,1.940716,1.031182,2.204962


In [35]:
df.T

Unnamed: 0,2021-01-01 00:00:00,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,2021-01-09 00:00:00,2021-01-10 00:00:00
a,-0.248532,-1.047581,-1.151396,-0.628977,1.115872,0.944919,-0.581449,1.77243,0.601997,1.728482
b,-0.633809,1.161637,0.575674,1.176879,1.940716,-1.035663,1.246238,-0.427322,-0.600356,0.321951
c,0.365227,-0.803006,-0.723401,-0.412652,0.584026,-0.510705,-0.510814,0.726918,0.700874,1.031182
d,-2.241089,-2.093864,-0.487148,2.204962,0.259215,-1.078126,-1.517736,-0.831456,-0.725331,1.13289


In [36]:
df.sort_values(by="c")

Unnamed: 0,a,b,c,d
2021-01-02,-1.047581,1.161637,-0.803006,-2.093864
2021-01-03,-1.151396,0.575674,-0.723401,-0.487148
2021-01-07,-0.581449,1.246238,-0.510814,-1.517736
2021-01-06,0.944919,-1.035663,-0.510705,-1.078126
2021-01-04,-0.628977,1.176879,-0.412652,2.204962
2021-01-01,-0.248532,-0.633809,0.365227,-2.241089
2021-01-05,1.115872,1.940716,0.584026,0.259215
2021-01-09,0.601997,-0.600356,0.700874,-0.725331
2021-01-08,1.77243,-0.427322,0.726918,-0.831456
2021-01-10,1.728482,0.321951,1.031182,1.13289


In [37]:
df.a

2021-01-01   -0.248532
2021-01-02   -1.047581
2021-01-03   -1.151396
2021-01-04   -0.628977
2021-01-05    1.115872
2021-01-06    0.944919
2021-01-07   -0.581449
2021-01-08    1.772430
2021-01-09    0.601997
2021-01-10    1.728482
Freq: D, Name: a, dtype: float64

In [38]:
df["a"]

2021-01-01   -0.248532
2021-01-02   -1.047581
2021-01-03   -1.151396
2021-01-04   -0.628977
2021-01-05    1.115872
2021-01-06    0.944919
2021-01-07   -0.581449
2021-01-08    1.772430
2021-01-09    0.601997
2021-01-10    1.728482
Freq: D, Name: a, dtype: float64

In [39]:
df[0:4]

Unnamed: 0,a,b,c,d
2021-01-01,-0.248532,-0.633809,0.365227,-2.241089
2021-01-02,-1.047581,1.161637,-0.803006,-2.093864
2021-01-03,-1.151396,0.575674,-0.723401,-0.487148
2021-01-04,-0.628977,1.176879,-0.412652,2.204962


In [40]:
df["20210102":"20210107"]

Unnamed: 0,a,b,c,d
2021-01-02,-1.047581,1.161637,-0.803006,-2.093864
2021-01-03,-1.151396,0.575674,-0.723401,-0.487148
2021-01-04,-0.628977,1.176879,-0.412652,2.204962
2021-01-05,1.115872,1.940716,0.584026,0.259215
2021-01-06,0.944919,-1.035663,-0.510705,-1.078126
2021-01-07,-0.581449,1.246238,-0.510814,-1.517736


In [41]:
df.loc[dates[0]]

a   -0.248532
b   -0.633809
c    0.365227
d   -2.241089
Name: 2021-01-01 00:00:00, dtype: float64

In [42]:
df.loc[:,["b","c"]]

Unnamed: 0,b,c
2021-01-01,-0.633809,0.365227
2021-01-02,1.161637,-0.803006
2021-01-03,0.575674,-0.723401
2021-01-04,1.176879,-0.412652
2021-01-05,1.940716,0.584026
2021-01-06,-1.035663,-0.510705
2021-01-07,1.246238,-0.510814
2021-01-08,-0.427322,0.726918
2021-01-09,-0.600356,0.700874
2021-01-10,0.321951,1.031182


In [43]:
df.loc["20210103":"20210106",["a","b"]]

Unnamed: 0,a,b
2021-01-03,-1.151396,0.575674
2021-01-04,-0.628977,1.176879
2021-01-05,1.115872,1.940716
2021-01-06,0.944919,-1.035663


In [44]:
df.loc["20210103", ["c","d"]]

c   -0.723401
d   -0.487148
Name: 2021-01-03 00:00:00, dtype: float64

In [45]:
df.loc[dates[1],"a"]

-1.0475811442402923

In [47]:
df.at[dates[1],"a"] #same with the previous method

-1.0475811442402923