In [1]:
import pandas as pd
import numpy as np

## Pandas has two types of data structures- Series and DataFrame

In [2]:
marks = pd.Series([87,86,92,82,78])

In [3]:
marks

0    87
1    86
2    92
3    82
4    78
dtype: int64

In [4]:
marks.name = "Marks in Each Subjects"

In [5]:
#The data is stored like a dictionary.
marks

0    87
1    86
2    92
3    82
4    78
Name: Marks in Each Subjects, dtype: int64

In [6]:
#Lets alter the index of marks
marks.index = ['English','Science','Mathematics','Geography','History']

In [7]:
marks

English        87
Science        86
Mathematics    92
Geography      82
History        78
Name: Marks in Each Subjects, dtype: int64

In [8]:
#Also now
fruits = pd.Series({
    'apple':5,
    'banana':12,
    'mango':10
})

In [9]:
fruits.name = "Number of fruits in refrigerator"

In [10]:
fruits

apple      5
banana    12
mango     10
Name: Number of fruits in refrigerator, dtype: int64

In [11]:
fruits['apple']

5

In [12]:
marks['English']

87

In [13]:
marks.iloc[0]

87

In [14]:
marks.iloc[[0,2]]

English        87
Mathematics    92
Name: Marks in Each Subjects, dtype: int64

In [15]:
fruits + 6

apple     11
banana    18
mango     16
Name: Number of fruits in refrigerator, dtype: int64

In [16]:
fruits

apple      5
banana    12
mango     10
Name: Number of fruits in refrigerator, dtype: int64

In [17]:
# I ate one of each fruit so
fruits - 1

apple      4
banana    11
mango      9
Name: Number of fruits in refrigerator, dtype: int64

In [18]:
marks[marks>90]

Mathematics    92
Name: Marks in Each Subjects, dtype: int64

In [19]:
marks[marks>80]

English        87
Science        86
Mathematics    92
Geography      82
Name: Marks in Each Subjects, dtype: int64

In [20]:
marks>80

English         True
Science         True
Mathematics     True
Geography       True
History        False
Name: Marks in Each Subjects, dtype: bool

So we used the above booleans to filter out elements

In [21]:
marks[(marks > marks.mean())| (marks <80)]

English        87
Science        86
Mathematics    92
History        78
Name: Marks in Each Subjects, dtype: int64

In [22]:
marks[(marks > 80) & (marks <90)]

English      87
Science      86
Geography    82
Name: Marks in Each Subjects, dtype: int64

In [23]:
#My new marks
marks['English'] = 85

In [24]:
marks

English        85
Science        86
Mathematics    92
Geography      82
History        78
Name: Marks in Each Subjects, dtype: int64

In [25]:
marks[marks < 80] = 99

In [26]:
marks

English        85
Science        86
Mathematics    92
Geography      82
History        99
Name: Marks in Each Subjects, dtype: int64

# DataFrame
It looks like a normal tables.

In [41]:
data = {'name': ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'],
        'city': ['Mexico City', 'Toronto', 'Prague', 'Shanghai','Manchester', 'Cairo', 'Osaka'],
        'age': [41, 28, 33, 34, 38, 31, 37],
        'marks': [88.0, 79.0, 81.0, 80.0, 68.0, 61.0, 84.0]
       }

In [42]:
df = pd.DataFrame(data,index = ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'])

In [43]:
df

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Ann,Ann,Toronto,28,79.0
Jana,Jana,Prague,33,81.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0


In [44]:
df.columns

Index(['name', 'city', 'age', 'marks'], dtype='object')

In [45]:
df.index

Index(['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'], dtype='object')

In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Xavier to Nori
Data columns (total 4 columns):
name     7 non-null object
city     7 non-null object
age      7 non-null int64
marks    7 non-null float64
dtypes: float64(1), int64(1), object(2)
memory usage: 280.0+ bytes


In [47]:
#For numerical columns
df.describe()

Unnamed: 0,age,marks
count,7.0,7.0
mean,34.571429,77.285714
std,4.429339,9.446592
min,28.0,61.0
25%,32.0,73.5
50%,34.0,80.0
75%,37.5,82.5
max,41.0,88.0


In [48]:
df.dtypes

name      object
city      object
age        int64
marks    float64
dtype: object

In [50]:
df['marks']

Xavier    88.0
Ann       79.0
Jana      81.0
Yi        80.0
Robin     68.0
Amal      61.0
Nori      84.0
Name: marks, dtype: float64

In [51]:
df['name'][2]

'Jana'

In [54]:
#Filter by indexx name with loc
df.loc['Xavier']

name          Xavier
city     Mexico City
age               41
marks             88
Name: Xavier, dtype: object

In [55]:
#Filter by index number using iloc
df.iloc[0]

name          Xavier
city     Mexico City
age               41
marks             88
Name: Xavier, dtype: object

In [56]:
df['marks'].to_frame()

Unnamed: 0,marks
Xavier,88.0
Ann,79.0
Jana,81.0
Yi,80.0
Robin,68.0
Amal,61.0
Nori,84.0


In [57]:
df.loc['Xavier':'Yi','marks']

Xavier    88.0
Ann       79.0
Jana      81.0
Yi        80.0
Name: marks, dtype: float64

In [58]:
df['marks'] > 80

Xavier     True
Ann       False
Jana       True
Yi        False
Robin     False
Amal      False
Nori       True
Name: marks, dtype: bool

In [59]:
df[df['marks'] > 80]

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Jana,Jana,Prague,33,81.0
Nori,Nori,Osaka,37,84.0


# Modifying DataFrames

In [63]:
df

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Ann,Ann,Toronto,28,79.0
Jana,Jana,Prague,33,81.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0


In [70]:
df.drop('Jana')

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0


In [71]:
#DataFrame is immutable
df

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Jana,Jana,Prague,33,81.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0


In [72]:
#In this way we can change the original dataframe,
#But a good practise is it to use another df
temp_df = df.drop('Jana')

In [73]:
temp_df

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0


In [74]:
#Also,
df = df.drop('Jana')

In [75]:
df

Unnamed: 0,name,city,age,marks
Xavier,Xavier,Mexico City,41,88.0
Yi,Yi,Shanghai,34,80.0
Robin,Robin,Manchester,38,68.0
Amal,Amal,Cairo,31,61.0
Nori,Nori,Osaka,37,84.0
