In [1]:
import pandas as pd
import numpy as np

In [2]:
index = pd.date_range('1/1/2000', periods=8)
s = pd.Series(np.random.randn(5), index=list('abcde'))
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))

In [3]:
long_series = pd.Series(np.random.randn(1000))

In [4]:
long_series.head()

0   -0.792948
1    0.332755
2    1.215749
3   -0.773596
4   -1.553215
dtype: float64

In [5]:
long_series.tail(3)

997    0.418473
998   -1.049084
999   -0.595619
dtype: float64

In [6]:
df[:2]

Unnamed: 0,A,B,C
2000-01-01,0.75856,-0.873196,0.641238
2000-01-02,-1.617565,0.989961,-0.634085


In [7]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,a,b,c
2000-01-01,0.75856,-0.873196,0.641238
2000-01-02,-1.617565,0.989961,-0.634085
2000-01-03,0.775357,-0.706251,0.093086
2000-01-04,-0.17536,1.000553,-0.618728
2000-01-05,-0.563869,-1.152684,-0.497674
2000-01-06,-0.008242,-0.039551,-1.216492
2000-01-07,0.70325,1.907884,-0.564622
2000-01-08,-1.324701,1.734838,-1.849883


In [8]:
s.array

<NumpyExtensionArray>
[-1.0924504529041525, -0.4393609793059458, -1.2518543308914079,
  0.9507256932847403,  0.1406321619045329]
Length: 5, dtype: float64

In [9]:
s.index.array

<NumpyExtensionArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

In [10]:
s.index.array[1]

'b'

In [11]:
s.to_numpy()

array([-1.09245045, -0.43936098, -1.25185433,  0.95072569,  0.14063216])

In [12]:
np.asarray(s)

array([-1.09245045, -0.43936098, -1.25185433,  0.95072569,  0.14063216])

In [13]:
s.to_numpy() is np.asarray(s)

True

In [14]:
ser = pd.Series(pd.date_range('2000', periods=2, tz='CET'))
ser.to_numpy(dtype=object)

array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
       Timestamp('2000-01-02 00:00:00+0100', tz='CET')], dtype=object)

In [15]:
ser.to_numpy(dtype='datetime64[ns]')

array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'],
      dtype='datetime64[ns]')

In [16]:
df.to_numpy()

array([[ 0.75855987, -0.87319575,  0.64123794],
       [-1.61756461,  0.98996059, -0.6340854 ],
       [ 0.77535725, -0.706251  ,  0.09308607],
       [-0.17535978,  1.00055286, -0.61872774],
       [-0.56386857, -1.15268367, -0.49767413],
       [-0.00824161, -0.03955067, -1.21649242],
       [ 0.70325047,  1.90788404, -0.56462181],
       [-1.32470091,  1.73483775, -1.84988278]])

In [17]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)
df

Unnamed: 0,one,two,three
a,0.121243,0.397873,
b,0.498935,-1.146834,-2.98784
c,1.002301,0.249228,-0.045676
d,,-0.844664,0.296286


In [18]:
row = df.iloc[1]
row

one      0.498935
two     -1.146834
three   -2.987840
Name: b, dtype: float64

In [19]:
col = df['two']
col

a    0.397873
b   -1.146834
c    0.249228
d   -0.844664
Name: two, dtype: float64

In [20]:
df.sub(row, axis='columns')

Unnamed: 0,one,two,three
a,-0.377691,1.544707,
b,0.0,0.0,0.0
c,0.503366,1.396063,2.942164
d,,0.302171,3.284126


In [21]:
df.sub(col, axis='index')

Unnamed: 0,one,two,three
a,-0.276629,0.0,
b,1.645769,0.0,-1.841006
c,0.753072,0.0,-0.294904
d,,0.0,1.140949


In [22]:
df.sub(col, axis='columns')

Unnamed: 0,a,b,c,d,one,three,two
a,,,,,,,
b,,,,,,,
c,,,,,,,
d,,,,,,,


In [23]:
dfmi = df.copy()

In [24]:
dfmi.index = pd.MultiIndex.from_tuples(
    [(1, 'a'), (1, 'b'), (2, 'c'), (2, 'a')], names=['first', 'second']
)

dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,0.121243,0.397873,
1,b,0.498935,-1.146834,-2.98784
2,c,1.002301,0.249228,-0.045676
2,a,,-0.844664,0.296286


In [25]:
dfmi.sub(col, axis='index', level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-0.276629,0.0,
1,b,1.645769,0.0,-1.841006
2,c,0.753072,0.0,-0.294904
2,a,,-1.242537,-0.101587


In [26]:
dfmi.sub(col, axis=0, level='second')

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-0.276629,0.0,
1,b,1.645769,0.0,-1.841006
2,c,0.753072,0.0,-0.294904
2,a,,-1.242537,-0.101587


In [27]:
s = pd.Series(np.arange(10))
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [28]:
div, rem = divmod(s, 3)

In [29]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    2
7    2
8    2
9    3
dtype: int64

In [30]:
rem

0    0
1    1
2    2
3    0
4    1
5    2
6    0
7    1
8    2
9    0
dtype: int64

In [31]:
type(div)

pandas.core.series.Series

In [32]:
idx = pd.Index(np.arange(10))
idx

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [33]:
div, rem = divmod(idx, 3)

In [34]:
div

Index([0, 0, 0, 1, 1, 1, 2, 2, 2, 3], dtype='int64')

In [35]:
rem

Index([0, 1, 2, 0, 1, 2, 0, 1, 2, 0], dtype='int64')

In [36]:
div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6])

In [37]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
8    1
9    1
dtype: int64

In [38]:
rem

0    0
1    1
2    2
3    0
4    0
5    1
6    1
7    2
8    2
9    3
dtype: int64

In [39]:
df2 = df.copy()
df2

Unnamed: 0,one,two,three
a,0.121243,0.397873,
b,0.498935,-1.146834,-2.98784
c,1.002301,0.249228,-0.045676
d,,-0.844664,0.296286


In [40]:
df2.loc['a', 'three'] = 1.0

In [41]:
df

Unnamed: 0,one,two,three
a,0.121243,0.397873,
b,0.498935,-1.146834,-2.98784
c,1.002301,0.249228,-0.045676
d,,-0.844664,0.296286


In [42]:
df2

Unnamed: 0,one,two,three
a,0.121243,0.397873,1.0
b,0.498935,-1.146834,-2.98784
c,1.002301,0.249228,-0.045676
d,,-0.844664,0.296286


In [43]:
df + df2

Unnamed: 0,one,two,three
a,0.242487,0.795745,
b,0.997869,-2.293669,-5.97568
c,2.004602,0.498457,-0.091351
d,,-1.689328,0.592571


In [44]:
df.add(df2, fill_value=0)

Unnamed: 0,one,two,three
a,0.242487,0.795745,1.0
b,0.997869,-2.293669,-5.97568
c,2.004602,0.498457,-0.091351
d,,-1.689328,0.592571


In [45]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [46]:
(df > 0).any()

one      True
two      True
three    True
dtype: bool

In [47]:
(df > 0).any().any()

True

In [48]:
df.empty

False

In [49]:
pd.DataFrame(columns=list('ABC')).empty

True

In [50]:
df + df == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [51]:
(df + df == df * 2).all()

one      False
two       True
three    False
dtype: bool

In [52]:
(df + df).equals(df * 2)

True

In [53]:
df1 = pd.DataFrame(
    {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
)
df2 = pd.DataFrame(
    {
        "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
        "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
    }
)

In [54]:
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [55]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [56]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [57]:
def combiner(x, y):
    return np.where(pd.isna(x), y, x)

In [58]:
df1.combine(df2, combiner)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [59]:
df

Unnamed: 0,one,two,three
a,0.121243,0.397873,
b,0.498935,-1.146834,-2.98784
c,1.002301,0.249228,-0.045676
d,,-0.844664,0.296286


In [60]:
df.mean(0)

one      0.540826
two     -0.336099
three   -0.912410
dtype: float64

In [61]:
df.mean(1)

a    0.259558
b   -1.211913
c    0.401951
d   -0.274189
dtype: float64

In [62]:
df.sum(axis=0, skipna=False)

one           NaN
two     -1.344397
three         NaN
dtype: float64

In [63]:
df.sum(axis=0, skipna=True)

one      1.622479
two     -1.344397
three   -2.737230
dtype: float64

In [64]:
ts_stand = (df - df.mean()) / df.std()

In [65]:
ts_stand.std()

one      1.0
two      1.0
three    1.0
dtype: float64

In [66]:
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)

In [67]:
xs_stand.std(1)

a    1.0
b    1.0
c    1.0
d    1.0
dtype: float64

In [68]:
df.cumsum()

Unnamed: 0,one,two,three
a,0.121243,0.397873,
b,0.620178,-0.748962,-2.98784
c,1.622479,-0.499733,-3.033516
d,,-1.344397,-2.73723
