### What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

https://pandas.pydata.org/about/index.html

### Pandas Series

A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

### Importing Pandas

In [2]:
import numpy as np
import pandas as pd

### Series from lists

In [3]:
# string
country = ['India','Pakistan','USA','Nepal','Srilanka']

pd.Series(country)

0       India
1    Pakistan
2         USA
3       Nepal
4    Srilanka
dtype: object

In [4]:
# integers
runs = [13,24,56,78,100]

runs_ser = pd.Series(runs)
runs_ser

0     13
1     24
2     56
3     78
4    100
dtype: int64

In [5]:
# custom index
marks = [67,57,89,100]
subjects = ['maths','english','science','urdu']

pd.Series(marks,index=subjects)

maths       67
english     57
science     89
urdu       100
dtype: int64

In [6]:
# setting a name
marks = pd.Series(marks,index=subjects,name='Uzair ke marks')
marks

maths       67
english     57
science     89
urdu       100
Name: Uzair ke marks, dtype: int64

### Series from dict

In [7]:
marks = {
    'maths':67,
    'english':57,
    'science':89,
    'urdu':100
}

marks_series = pd.Series(marks,name='Uzair ke marks')
marks_series

maths       67
english     57
science     89
urdu       100
Name: Uzair ke marks, dtype: int64

### Series Attributes

In [8]:
# size
marks_series.size

4

In [9]:
# dtype
marks_series.dtype

dtype('int64')

In [10]:
# name
marks_series.name

'Uzair ke marks'

In [11]:
# is_unique
marks_series.is_unique

pd.Series([1,1,2,3,4,5]).is_unique

False

In [12]:
# index
marks_series.index

Index(['maths', 'english', 'science', 'urdu'], dtype='object')

In [13]:
runs_ser.index

RangeIndex(start=0, stop=5, step=1)

In [20]:
# values
marks_series.values

array([ 67,  57,  89, 100])

### Series using read_csv

In [28]:
# with one col
subs = pd.read_csv('subs.csv')
subs

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
...,...
360,231
361,226
362,155
363,144


In [30]:
# with 2 cols
vk = pd.read_csv('kohli_ipl.csv',index_col='match_no')
vk

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1
...,...
211,0
212,20
213,73
214,25


In [36]:
movies = pd.read_csv('bollywood.csv',index_col='movie')
movies

Unnamed: 0_level_0,lead
movie,Unnamed: 1_level_1
Uri: The Surgical Strike,Vicky Kaushal
Battalion 609,Vicky Ahuja
The Accidental Prime Minister (film),Anupam Kher
Why Cheat India,Emraan Hashmi
Evening Shadows,Mona Ambegaonkar
...,...
Hum Tumhare Hain Sanam,Shah Rukh Khan
Aankhen (2002 film),Amitabh Bachchan
Saathiya (film),Vivek Oberoi
Company (film),Ajay Devgn


### Series methods

In [31]:
# head and tail
subs.head()

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44


In [32]:
vk.head(3)

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13


In [33]:
vk.tail(10)

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
206,0
207,0
208,9
209,58
210,30
211,0
212,20
213,73
214,25
215,7


In [37]:
# sample
movies.sample(5)

Unnamed: 0_level_0,lead
movie,Unnamed: 1_level_1
Ek Second... Jo Zindagi Badal De?,Bhushan Agarwal
Tahaan,Victor Banerjee
Bloody Isshq,Akash
Badlapur (film),Varun Dhawan
Calcutta Mail,Anil Kapoor


In [38]:
# value_counts -> movies
movies.value_counts()

lead            
Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Zara Sheikh          1
Zareen Khan          1
Ganesh Acharya       1
Garima Agarwal       1
Aanchal Dwivedi      1
Name: count, Length: 566, dtype: int64

In [53]:
# sort_values -> inplace
vk.sort_values(by='runs',ascending=False).head(1).values[0]

array([113])

In [None]:
vk.sort_values(ascending=False)

In [None]:
# sort_index -> inplace -> movies
movies.sort_index(ascending=False,inplace=True)

In [54]:
movies

Unnamed: 0_level_0,lead
movie,Unnamed: 1_level_1
Uri: The Surgical Strike,Vicky Kaushal
Battalion 609,Vicky Ahuja
The Accidental Prime Minister (film),Anupam Kher
Why Cheat India,Emraan Hashmi
Evening Shadows,Mona Ambegaonkar
...,...
Hum Tumhare Hain Sanam,Shah Rukh Khan
Aankhen (2002 film),Amitabh Bachchan
Saathiya (film),Vivek Oberoi
Company (film),Ajay Devgn


In [57]:
vk.sort_values(by='runs', inplace=True)

In [58]:
vk

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
8,0
87,0
93,0
91,0
206,0
...,...
164,100
120,100
123,108
126,109


### Series Maths Methods

In [None]:
# count
vk.count()

In [None]:
# sum -> product
subs.sum()

In [None]:
# mean -> median -> mode -> std -> var
subs.mean()
print(vk.median())
print(movies.mode())
print(subs.std())
print(vk.var())

In [None]:
# min/max
subs.max()

In [None]:
# describe
subs.describe()

### Series Indexing

In [None]:
# integer indexing
x = pd.Series([12,13,14,35,46,57,58,79,9])
x

In [None]:
# negative indexing
x[-1]

In [None]:
movies

In [None]:
vk[-1]

In [None]:
marks_series[-1]

In [None]:
# slicing
vk[5:16]

In [None]:
# negative slicing
vk[-5:]

In [None]:
movies[::2]

In [None]:
# fancy indexing
vk[[1,3,4,5]]

In [None]:
# indexing with labels -> fancy indexing
movies['2 States (2014 film)']

### Editing Series

In [None]:
# using indexing
marks_series[1] = 100
marks_series

In [None]:
# what if an index does not exist
marks_series['evs'] = 100

In [None]:
marks_series

In [None]:
# slicing
runs_ser[2:4] = [100,100]
runs_ser

In [None]:
# fancy indexing
runs_ser[[0,3,4]] = [0,0,0]
runs_ser

In [None]:
# using index label
movies['2 States (2014 film)'] = 'Alia Bhatt'
movies

### Copy and Views

### Series with Python Functionalities

In [None]:
# len/type/dir/sorted/max/min
print(len(subs))
print(type(subs))
print(dir(subs))
print(sorted(subs))
print(min(subs))
print(max(subs))

In [None]:
# type conversion
list(marks_series)

In [None]:
dict(marks_series)

In [None]:
# membership operator

'2 States (2014 film)' in movies

In [None]:
'Alia Bhatt' in movies.values

In [None]:
movies

In [None]:
# looping
for i in movies.index:
  print(i)

In [None]:
# Arithmetic Operators(Broadcasting)
100 + marks_series

In [None]:
# Relational Operators

vk >= 50

### Boolean Indexing on Series

In [None]:
# Find no of 50's and 100's scored by kohli
vk[vk >= 50].size

In [None]:
# find number of ducks
vk[vk == 0].size

In [None]:
# Count number of day when I had more than 200 subs a day
subs[subs > 200].size

In [None]:
# find actors who have done more than 20 movies
num_movies = movies.value_counts()
num_movies[num_movies > 20]

### Plotting Graphs on Series

In [None]:
subs.plot()

In [None]:
movies.value_counts().head(20).plot(kind='pie')

### Some Important Series Methods

In [None]:
# astype
# between
# clip
# drop_duplicates
# isnull
# dropna
# fillna
# isin
# apply
# copy

In [None]:
import numpy as np
import pandas as pd

In [None]:
subs = pd.read_csv('/content/subs.csv',squeeze=True)
subs

In [None]:
vk = pd.read_csv('/content/kohli_ipl.csv',index_col='match_no',squeeze=True)
vk

In [None]:
movies = pd.read_csv('/content/bollywood.csv',index_col='movie',squeeze=True)
movies

In [None]:
# astype
import sys
sys.getsizeof(vk)

In [None]:
sys.getsizeof(vk.astype('int16'))

In [None]:
# between
vk[vk.between(51,99)].size

In [None]:
# clip
subs

In [None]:
subs.clip(100,200)

In [None]:
# drop_duplicates
temp = pd.Series([1,1,2,2,3,3,4,4])
temp

In [None]:
temp.drop_duplicates(keep='last')

In [None]:
temp.duplicated().sum()

In [None]:
vk.duplicated().sum()

In [None]:
movies.drop_duplicates()

In [None]:
temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
temp

In [None]:
temp.size

In [None]:
temp.count()

In [None]:
# isnull
temp.isnull().sum()

In [None]:
# dropna
temp.dropna()

In [None]:
# fillna
temp.fillna(temp.mean())

In [None]:
# isin
vk[(vk == 49) | (vk == 99)]

In [None]:
vk[vk.isin([49,99])]

In [None]:
# apply
movies

In [None]:
movies.apply(lambda x:x.split()[0].upper())

In [None]:
subs

In [None]:
subs.apply(lambda x:'good day' if x > subs.mean() else 'bad day')

In [None]:
subs.mean()

In [None]:
# copy

In [None]:
vk

In [None]:
new = vk.head()

In [None]:
new

In [None]:
new[1] = 1

In [None]:
new = vk.head().copy()

In [None]:
new[1] = 100

In [None]:
new

In [None]:
vk