## Object creation

In [1]:
import pandas as pd
import numpy as np

pandas.core.series.Series : One-dimensional ndarray with axis labels (including time series).

pandas.core.frame.DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.

pandas.core.indexes.datetimes.DatetimeIndex : Return a fixed frequency DatetimeIndex.

In [2]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
print(type(s))
df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"])
print(type(df))
index = pd.date_range("1/1/2000", periods=8, freq='D')
print(type(index))

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [3]:
s

a    1.176446
b   -0.957168
c    1.048738
d    0.089576
e    0.596707
dtype: float64

In [4]:
df

Unnamed: 0,A,B,C
0,1.26464,-0.624526,-0.43277
1,-0.533275,-0.671251,1.422024
2,0.750324,0.23806,-0.696794
3,0.443728,-0.526435,0.303939
4,-0.613461,-0.948812,-0.207974
5,-1.238519,0.87838,0.456624
6,1.131209,-2.211309,1.630645
7,0.024361,-0.304483,-1.512404


In [5]:
index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

## Head and tail

To view a small sample of a Series or DataFrame object, use the head() and tail() methods. The default number of elements to display is five, but you may pass a custom number.

In [6]:
long_series = pd.Series(np.random.randn(1000))
long_series.head()

0    1.370738
1    1.750484
2    0.771888
3    0.556423
4    0.196744
dtype: float64

In [7]:
long_series.tail(3)

997   -0.614576
998   -0.085807
999    0.295430
dtype: float64

## Datatypes in a dataframe

In [8]:
df.dtypes

A    float64
B    float64
C    float64
dtype: object

In [9]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### PandasArray from a pandas series

In [11]:
print(s.array)
type(s.array)

<PandasArray>
[ 1.1764456247726587, -0.9571677971553935,  1.0487375734617637,
 0.08957579106346647,  0.5967071170816219]
Length: 5, dtype: float64


pandas.core.arrays.numpy_.PandasArray

In [12]:
s.index.array

<PandasArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

### Numpy Array from a pandas series

In [13]:
print(s.to_numpy(dtype=object))
type(s.to_numpy(dtype=object))

[1.1764456247726587 -0.9571677971553935 1.0487375734617637
 0.08957579106346647 0.5967071170816219]


numpy.ndarray

In [14]:
print(np.asarray(s))
type(np.asarray(s))


[ 1.17644562 -0.9571678   1.04873757  0.08957579  0.59670712]


numpy.ndarray

In [15]:
s.to_numpy(dtype=object)

array([1.1764456247726587, -0.9571677971553935, 1.0487375734617637,
       0.08957579106346647, 0.5967071170816219], dtype=object)

### Numpy Array from a pandas dataframe

In [16]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [17]:
df.to_numpy()

array([[ 1.26464019, -0.62452599, -0.43276983],
       [-0.53327497, -0.6712514 ,  1.42202379],
       [ 0.75032426,  0.23806033, -0.69679398],
       [ 0.44372759, -0.52643522,  0.30393925],
       [-0.61346082, -0.94881187, -0.20797361],
       [-1.23851869,  0.87838006,  0.45662425],
       [ 1.1312087 , -2.21130938,  1.63064525],
       [ 0.02436081, -0.30448306, -1.5124041 ]])

### add(), sub(), mul(), div() methods

In [18]:
df3 = pd.DataFrame(
    {
        'angles': [0, 3, 4, 5],
        'degrees': [360, 180, 360, 540]
    },
    index=['circle', 'triangle', 'rectangle', 'pentagon']
)
df3

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360
pentagon,5,540


Add a scalar with operator version which return the same results.

In [19]:
df3+1

Unnamed: 0,angles,degrees
circle,1,361
triangle,4,181
rectangle,5,361
pentagon,6,541


In [20]:
df3.add(5)
# 5 will be added to every value

Unnamed: 0,angles,degrees
circle,5,365
triangle,8,185
rectangle,9,365
pentagon,10,545


In [21]:
df3.add(df3.iloc[1], axis='columns')
# in every column, index 1 element will be added to every value in that column

Unnamed: 0,angles,degrees
circle,3,540
triangle,6,360
rectangle,7,540
pentagon,8,720


Subtract a list and Series by axis with operator version.

In [22]:
df3 - [1, 180]

Unnamed: 0,angles,degrees
circle,-1,180
triangle,2,0
rectangle,3,180
pentagon,4,360


In [23]:
df3.sub([1, 150, 5, 10], axis=0)

Unnamed: 0,angles,degrees
circle,-1,359
triangle,-147,30
rectangle,-1,355
pentagon,-5,530


Multiply a dictionary by axis.

In [24]:
df3.mul({'angles': 0, 'degrees': 2})

Unnamed: 0,angles,degrees
circle,0,720
triangle,0,360
rectangle,0,720
pentagon,0,1080


In [25]:
df3.mul({'circle': 0, 'triangle': 2, 'rectangle': 3, 'pentagon': 4}, axis='index')

Unnamed: 0,angles,degrees
circle,0,0
triangle,6,360
rectangle,12,1080
pentagon,20,2160


Multiply a DataFrame of different shape with operator version.

In [26]:
other_df = pd.DataFrame(
    {
        'angles': [0, 3, 4, 5]
    },
    index=['circle', 'triangle', 'rectangle', 'pentagon'])
other_df

Unnamed: 0,angles
circle,0
triangle,3
rectangle,4
pentagon,5


In [27]:
df3 * other_df

Unnamed: 0,angles,degrees
circle,0,
triangle,9,
rectangle,16,
pentagon,25,


In [28]:
df3.mul(other_df, fill_value=0)

Unnamed: 0,angles,degrees
circle,0,0.0
triangle,9,0.0
rectangle,16,0.0
pentagon,25,0.0


Divide by constant with reverse version.

In [29]:
df3.div(10)

Unnamed: 0,angles,degrees
circle,0.0,36.0
triangle,0.3,18.0
rectangle,0.4,36.0
pentagon,0.5,54.0


In [30]:
#reverse version
df3.rdiv(10)

Unnamed: 0,angles,degrees
circle,inf,0.027778
triangle,3.333333,0.055556
rectangle,2.5,0.027778
pentagon,2.0,0.018519


In [31]:
df1 = pd.DataFrame({'A': [1, 9], 'B': [4, 4]})
df2 = pd.DataFrame({'A': [4, 4], 'B': [5, 5]})
print("df1")
print(df1)
print("df2")
print(df2)
take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df1.combine(df2, take_smaller)

df1
   A  B
0  1  4
1  9  4
df2
   A  B
0  4  5
1  4  5


Unnamed: 0,A,B
0,4,4
1,4,4
