In [71]:
import pandas as pd
import numpy as np

## `DataFrame`

#### Create `DataFrame`

In [72]:
forum_users = {
    'User ID': np.array([1, 2, 3, 4, 5]),
    'Username': ['bill', 'john', 'elly', 'fred', 'any'],
    'Age': [18, 35, 25, 38, None],
    'Joined date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-26', '2032-06-21', '2032-09-15']),
    'Total posts': [150, 230, 80, 420, 310],
    'Reputation': [500, 720, 200, 940, 500]
}

df = pd.DataFrame(forum_users)
df

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation
0,1,bill,18.0,2032-01-01,150,500
1,2,john,35.0,2032-02-15,230,720
2,3,elly,25.0,2032-04-26,80,200
3,4,fred,38.0,2032-06-21,420,940
4,5,any,,2032-09-15,310,500


#### DataFrame `shape`

In [73]:
df.shape

(5, 6)

In [74]:
type(df)

pandas.core.frame.DataFrame

#### DataFrame `columns`

In [75]:
df.columns

Index(['User ID', 'Username', 'Age', 'Joined date', 'Total posts',
       'Reputation'],
      dtype='object')

In [76]:
type(df.columns)

pandas.core.indexes.base.Index

In [77]:
df.columns.tolist()

['User ID', 'Username', 'Age', 'Joined date', 'Total posts', 'Reputation']

#### DataFrame `index`

In [78]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [79]:
df.index.tolist()

[0, 1, 2, 3, 4]

In [80]:
df.dtypes

User ID                 int32
Username               object
Age                   float64
Joined date    datetime64[ns]
Total posts             int64
Reputation              int64
dtype: object

#### DataFrame `values`

In [81]:
df.values

array([[1, 'bill', 18.0, Timestamp('2032-01-01 00:00:00'), 150, 500],
       [2, 'john', 35.0, Timestamp('2032-02-15 00:00:00'), 230, 720],
       [3, 'elly', 25.0, Timestamp('2032-04-26 00:00:00'), 80, 200],
       [4, 'fred', 38.0, Timestamp('2032-06-21 00:00:00'), 420, 940],
       [5, 'any', nan, Timestamp('2032-09-15 00:00:00'), 310, 500]],
      dtype=object)

In [82]:
type(df.values)

numpy.ndarray

#### DataFrame `head` and `tail`

In [83]:
df.head(3)  # default 5

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation
0,1,bill,18.0,2032-01-01,150,500
1,2,john,35.0,2032-02-15,230,720
2,3,elly,25.0,2032-04-26,80,200


In [84]:
df.tail(2)  # default 5

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation
3,4,fred,38.0,2032-06-21,420,940
4,5,any,,2032-09-15,310,500


#### DataFrame `describe`

In [85]:
df.describe()

Unnamed: 0,User ID,Age,Joined date,Total posts,Reputation
count,5.0,4.0,5,5.0,5.0
mean,3.0,29.0,2032-04-28 04:48:00,238.0,572.0
min,1.0,18.0,2032-01-01 00:00:00,80.0,200.0
25%,2.0,23.25,2032-02-15 00:00:00,150.0,500.0
50%,3.0,30.0,2032-04-26 00:00:00,230.0,500.0
75%,4.0,35.75,2032-06-21 00:00:00,310.0,720.0
max,5.0,38.0,2032-09-15 00:00:00,420.0,940.0
std,1.581139,9.201449,,133.304163,276.622486


In [86]:
df.describe().round(2)

Unnamed: 0,User ID,Age,Joined date,Total posts,Reputation
count,5.0,4.0,5,5.0,5.0
mean,3.0,29.0,2032-04-28 04:48:00,238.0,572.0
min,1.0,18.0,2032-01-01 00:00:00,80.0,200.0
25%,2.0,23.25,2032-02-15 00:00:00,150.0,500.0
50%,3.0,30.0,2032-04-26 00:00:00,230.0,500.0
75%,4.0,35.75,2032-06-21 00:00:00,310.0,720.0
max,5.0,38.0,2032-09-15 00:00:00,420.0,940.0
std,1.58,9.2,,133.3,276.62


#### DataFrame `select_dtypes`

In [87]:
df.select_dtypes

<bound method DataFrame.select_dtypes of    User ID Username   Age Joined date  Total posts  Reputation
0        1     bill  18.0  2032-01-01          150         500
1        2     john  35.0  2032-02-15          230         720
2        3     elly  25.0  2032-04-26           80         200
3        4     fred  38.0  2032-06-21          420         940
4        5      any   NaN  2032-09-15          310         500>

In [88]:
df.select_dtypes(include='object')

Unnamed: 0,Username
0,bill
1,john
2,elly
3,fred
4,any


In [89]:
df.select_dtypes(include='object').columns

Index(['Username'], dtype='object')

In [90]:
df.select_dtypes(include='int64')

Unnamed: 0,Total posts,Reputation
0,150,500
1,230,720
2,80,200
3,420,940
4,310,500


#### DataFrame `isna`

In [91]:
df.isna()  # If some value is not available you will see True

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,True,False,False,False


In [92]:
df.isna().sum()

User ID        0
Username       0
Age            1
Joined date    0
Total posts    0
Reputation     0
dtype: int64

In [93]:
type(df.isna().sum())

pandas.core.series.Series

## `Series`
Every `column` is specific `Series`

In [97]:
username_series = df['Username']
username_series

0    bill
1    john
2    elly
3    fred
4     any
Name: Username, dtype: object

In [98]:
type(username_series)

pandas.core.series.Series

#### `values`

In [99]:
username_series.values

array(['bill', 'john', 'elly', 'fred', 'any'], dtype=object)

In [100]:
type(username_series.values)

numpy.ndarray

#### `index`

In [101]:
username_series.index

RangeIndex(start=0, stop=5, step=1)

#### `value_counts`

In [103]:
df['Username'].value_counts()

Username
bill    1
john    1
elly    1
fred    1
any     1
Name: count, dtype: int64

In [105]:
df['Reputation'].value_counts(ascending=True)

Reputation
720    1
200    1
940    1
500    2
Name: count, dtype: int64

#### `unique`

In [106]:
df['Reputation'].unique()

array([500, 720, 200, 940], dtype=int64)

#### `sort_values`

In [108]:
df['Username'].sort_values(ascending=False)

1    john
3    fred
2    elly
0    bill
4     any
Name: Username, dtype: object