In [1]:
# Import Pandas and Numpy 
import pandas as pd
import numpy as np

In [2]:
# Creating an Object/Series

s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
# Create a Dataframe

dates = pd.date_range("20130101", periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [7]:
# Created a DataFrame
df

Unnamed: 0,A,B,C,D
2013-01-01,0.171397,0.637813,1.010527,-0.659848
2013-01-02,0.400307,1.041624,-1.299575,-1.248932
2013-01-03,-0.051891,1.11252,0.355347,0.299261
2013-01-04,0.507409,-0.282377,0.031663,2.452497
2013-01-05,0.13902,0.964411,-1.651988,0.264638
2013-01-06,0.17567,0.113377,0.611708,-0.105987


In [8]:
# Creating a Dataframe with Dicts

df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [10]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
# Check DataTypes
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [12]:
# Viewing Data 

In [14]:
## Top Row 
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,0.171397,0.637813,1.010527,-0.659848
2013-01-02,0.400307,1.041624,-1.299575,-1.248932


In [15]:
## Bottom Row
df2.tail(3)

Unnamed: 0,A,B,C,D,E,F
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [16]:
# To view the columns 
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [17]:
# To view the index 
df2.index

Index([0, 1, 2, 3], dtype='int64')

In [19]:
# Convert to Numpy 

type(df)

pandas.core.frame.DataFrame

In [20]:
df.to_numpy()

array([[ 0.17139683,  0.63781273,  1.01052699, -0.65984771],
       [ 0.40030666,  1.04162374, -1.29957507, -1.24893233],
       [-0.05189142,  1.11252027,  0.35534653,  0.2992613 ],
       [ 0.50740874, -0.28237736,  0.03166276,  2.45249656],
       [ 0.13902008,  0.96441076, -1.65198801,  0.26463775],
       [ 0.17566978,  0.11337721,  0.61170753, -0.10598672]])

In [21]:
# quick statistic summary of your data

df2.describe()

Unnamed: 0,A,B,C,D
count,4.0,4,4.0,4.0
mean,1.0,2013-01-02 00:00:00,1.0,3.0
min,1.0,2013-01-02 00:00:00,1.0,3.0
25%,1.0,2013-01-02 00:00:00,1.0,3.0
50%,1.0,2013-01-02 00:00:00,1.0,3.0
75%,1.0,2013-01-02 00:00:00,1.0,3.0
max,1.0,2013-01-02 00:00:00,1.0,3.0
std,0.0,,0.0,0.0


In [24]:
# Transposing your data
df2.T

Unnamed: 0,0,1,2,3
A,1.0,1.0,1.0,1.0
B,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00
C,1.0,1.0,1.0,1.0
D,3,3,3,3
E,test,train,test,train
F,foo,foo,foo,foo


In [25]:
# sort by an axis
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.659848,1.010527,0.637813,0.171397
2013-01-02,-1.248932,-1.299575,1.041624,0.400307
2013-01-03,0.299261,0.355347,1.11252,-0.051891
2013-01-04,2.452497,0.031663,-0.282377,0.507409
2013-01-05,0.264638,-1.651988,0.964411,0.13902
2013-01-06,-0.105987,0.611708,0.113377,0.17567


In [28]:
# Sort by Values 
df2.sort_values(by="B")

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo
