## DataFrames

In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Creating DataFrame

In [2]:
# using lists
student_data = [
    [100,80,10],
    [90,70,7],
    [120,100,14],
    [80,50,2]
]

s1 = pd.DataFrame(student_data)
print(s1)
student = pd.DataFrame(student_data, columns=["iq","marks","package"])


     0    1   2
0  100   80  10
1   90   70   7
2  120  100  14
3   80   50   2


Unnamed: 0,iq,marks,package
0,100,80,10
1,90,70,7
2,120,100,14
3,80,50,2


In [33]:
# using dicts

student_dict = {
    'name':['nitish','ankit','rupesh','rishabh','amit','ankita'],
    'iq':[100,90,120,80,0,0],
    'marks':[80,70,100,50,0,0],
    'package':[10,7,14,2,0,0]
}

# This will automatically assigns column names from keys of dict
student = pd.DataFrame(student_dict)
print(student)
student.set_index("name", inplace=True)   # by default it autogenerates, but we can assign ourselves as well


      name   iq  marks  package
0   nitish  100     80       10
1    ankit   90     70        7
2   rupesh  120    100       14
3  rishabh   80     50        2
4     amit    0      0        0
5   ankita    0      0        0


In [4]:
# using read_csv
movies = pd.read_csv('pandas02_movies.csv')
movies.head()

Unnamed: 0,title_x,imdb_id,poster_path,wiki_link,title_y,original_title,is_adult,year_of_release,runtime,genres,imdb_rating,imdb_votes,story,summary,tagline,actors,wins_nominations,release_date
0,Uri: The Surgical Strike,tt8291224,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Uri:_The_Surgica...,Uri: The Surgical Strike,Uri: The Surgical Strike,0,2019,138,Action|Drama|War,8.4,35112,Divided over five chapters the film chronicle...,Indian army special forces execute a covert op...,,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...,4 wins,11 January 2019 (USA)
1,Battalion 609,tt9472208,,https://en.wikipedia.org/wiki/Battalion_609,Battalion 609,Battalion 609,0,2019,131,War,4.1,73,The story revolves around a cricket match betw...,The story of Battalion 609 revolves around a c...,,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...,,11 January 2019 (India)
2,The Accidental Prime Minister (film),tt6986710,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/The_Accidental_P...,The Accidental Prime Minister,The Accidental Prime Minister,0,2019,112,Biography|Drama,6.1,5549,Based on the memoir by Indian policy analyst S...,Explores Manmohan Singh's tenure as the Prime ...,,Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S...,,11 January 2019 (USA)
3,Why Cheat India,tt8108208,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Why_Cheat_India,Why Cheat India,Why Cheat India,0,2019,121,Crime|Drama,6.0,1891,The movie focuses on existing malpractices in ...,The movie focuses on existing malpractices in ...,,Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ...,,18 January 2019 (USA)
4,Evening Shadows,tt6028796,,https://en.wikipedia.org/wiki/Evening_Shadows,Evening Shadows,Evening Shadows,0,2018,102,Drama,7.3,280,While gay rights and marriage equality has bee...,Under the 'Evening Shadows' truth often plays...,,Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva...,17 wins & 1 nomination,11 January 2019 (India)


In [5]:
ipl = pd.read_csv('pandas02_ipl-matches.csv')
ipl.head()

Unnamed: 0,ID,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,method,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
0,1312200,Ahmedabad,2022-05-29,2022,Final,Rajasthan Royals,Gujarat Titans,"Narendra Modi Stadium, Ahmedabad",Rajasthan Royals,bat,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon
1,1312199,Ahmedabad,2022-05-27,2022,Qualifier 2,Royal Challengers Bangalore,Rajasthan Royals,"Narendra Modi Stadium, Ahmedabad",Rajasthan Royals,field,N,Rajasthan Royals,Wickets,7.0,,JC Buttler,"['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ ...","['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...",CB Gaffaney,Nitin Menon
2,1312198,Kolkata,2022-05-25,2022,Eliminator,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",Lucknow Super Giants,field,N,Royal Challengers Bangalore,Runs,14.0,,RM Patidar,"['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ ...","['Q de Kock', 'KL Rahul', 'M Vohra', 'DJ Hooda...",J Madanagopal,MA Gough
3,1312197,Kolkata,2022-05-24,2022,Qualifier 1,Rajasthan Royals,Gujarat Titans,"Eden Gardens, Kolkata",Gujarat Titans,field,N,Gujarat Titans,Wickets,7.0,,DA Miller,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",BNJ Oxenford,VK Sharma
4,1304116,Mumbai,2022-05-22,2022,70,Sunrisers Hyderabad,Punjab Kings,"Wankhede Stadium, Mumbai",Sunrisers Hyderabad,bat,N,Punjab Kings,Wickets,5.0,,Harpreet Brar,"['PK Garg', 'Abhishek Sharma', 'RA Tripathi', ...","['JM Bairstow', 'S Dhawan', 'M Shahrukh Khan',...",AK Chaudhary,NA Patwardhan


### DataFrame Attributes and Methods

In [6]:
# shape
movies.shape
ipl.shape

(950, 20)

In [7]:
# dtypes
movies.dtypes   # gives a series, column name is index and type is value
ipl.dtypes

ID                   int64
City                object
Date                object
Season              object
MatchNumber         object
Team1               object
Team2               object
Venue               object
TossWinner          object
TossDecision        object
SuperOver           object
WinningTeam         object
WonBy               object
Margin             float64
method              object
Player_of_Match     object
Team1Players        object
Team2Players        object
Umpire1             object
Umpire2             object
dtype: object

In [8]:
# index
movies.index
ipl.index

RangeIndex(start=0, stop=950, step=1)

In [9]:
# columns
movies.columns
ipl.columns
student.columns

Index(['iq', 'marks', 'package'], dtype='object')

In [10]:
# values
student.values
ipl.values      #gets 2d numpy array. One outer array. Each inner array contains the row.

array([[1312200, 'Ahmedabad', '2022-05-29', ...,
        "['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pandya', 'DA Miller', 'R Tewatia', 'Rashid Khan', 'R Sai Kishore', 'LH Ferguson', 'Yash Dayal', 'Mohammed Shami']",
        'CB Gaffaney', 'Nitin Menon'],
       [1312199, 'Ahmedabad', '2022-05-27', ...,
        "['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D Padikkal', 'SO Hetmyer', 'R Parag', 'R Ashwin', 'TA Boult', 'YS Chahal', 'M Prasidh Krishna', 'OC McCoy']",
        'CB Gaffaney', 'Nitin Menon'],
       [1312198, 'Kolkata', '2022-05-25', ...,
        "['Q de Kock', 'KL Rahul', 'M Vohra', 'DJ Hooda', 'MP Stoinis', 'E Lewis', 'KH Pandya', 'PVD Chameera', 'Mohsin Khan', 'Avesh Khan', 'Ravi Bishnoi']",
        'J Madanagopal', 'MA Gough'],
       ...,
       [335984, 'Delhi', '2008-04-19', ...,
        "['T Kohli', 'YK Pathan', 'SR Watson', 'M Kaif', 'DS Lehmann', 'RA Jadeja', 'M Rawat', 'D Salunkhe', 'SK Warne', 'SK Trivedi', 'MM Patel']",
        'Aleem Dar', 'GA Pratapkumar'],
    

In [11]:
# head and tail
movies.head(2)

Unnamed: 0,title_x,imdb_id,poster_path,wiki_link,title_y,original_title,is_adult,year_of_release,runtime,genres,imdb_rating,imdb_votes,story,summary,tagline,actors,wins_nominations,release_date
0,Uri: The Surgical Strike,tt8291224,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Uri:_The_Surgica...,Uri: The Surgical Strike,Uri: The Surgical Strike,0,2019,138,Action|Drama|War,8.4,35112,Divided over five chapters the film chronicle...,Indian army special forces execute a covert op...,,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...,4 wins,11 January 2019 (USA)
1,Battalion 609,tt9472208,,https://en.wikipedia.org/wiki/Battalion_609,Battalion 609,Battalion 609,0,2019,131,War,4.1,73,The story revolves around a cricket match betw...,The story of Battalion 609 revolves around a c...,,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...,,11 January 2019 (India)


In [12]:
ipl.tail(2)

Unnamed: 0,ID,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,method,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
948,335983,Chandigarh,2008-04-19,2007/08,2,Kings XI Punjab,Chennai Super Kings,"Punjab Cricket Association Stadium, Mohali",Chennai Super Kings,bat,N,Chennai Super Kings,Runs,33.0,,MEK Hussey,"['K Goel', 'JR Hopes', 'KC Sangakkara', 'Yuvra...","['PA Patel', 'ML Hayden', 'MEK Hussey', 'MS Dh...",MR Benson,SL Shastri
949,335982,Bangalore,2008-04-18,2007/08,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140.0,,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen


In [13]:
# sample
ipl.sample(5)

Unnamed: 0,ID,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,method,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
757,501215,Kochi,2011-04-18,2011,18,Kochi Tuskers Kerala,Chennai Super Kings,Nehru Stadium,Kochi Tuskers Kerala,field,N,Kochi Tuskers Kerala,Wickets,7.0,D/L,BB McCullum,"['BB McCullum', 'DPMD Jayawardene', 'PA Patel'...","['MEK Hussey', 'M Vijay', 'SK Raina', 'S Badri...",K Hariharan,AL Hill
160,1216522,,2020-10-17,2020/21,33,Rajasthan Royals,Royal Challengers Bangalore,Dubai International Cricket Stadium,Rajasthan Royals,bat,N,Royal Challengers Bangalore,Wickets,7.0,,AB de Villiers,"['RV Uthappa', 'BA Stokes', 'SV Samson', 'SPD ...","['D Padikkal', 'AJ Finch', 'V Kohli', 'AB de V...",AK Chaudhary,Nitin Menon
87,1254112,Sharjah,2021-10-02,2021,46,Mumbai Indians,Delhi Capitals,Sharjah Cricket Stadium,Delhi Capitals,field,N,Delhi Capitals,Wickets,4.0,,AR Patel,"['RG Sharma', 'Q de Kock', 'SA Yadav', 'SS Tiw...","['PP Shaw', 'S Dhawan', 'SPD Smith', 'RR Pant'...",AK Chaudhary,MA Gough
676,548332,Chennai,2012-04-21,2012,26,Chennai Super Kings,Rajasthan Royals,"MA Chidambaram Stadium, Chepauk",Rajasthan Royals,bat,N,Chennai Super Kings,Wickets,7.0,,F du Plessis,"['S Badrinath', 'F du Plessis', 'SK Raina', 'M...","['R Dravid', 'AM Rahane', 'OA Shah', 'AL Menar...",Aleem Dar,BNJ Oxenford
224,1178404,Kolkata,2019-04-14,2019,29,Kolkata Knight Riders,Chennai Super Kings,Eden Gardens,Chennai Super Kings,field,N,Chennai Super Kings,Wickets,5.0,,Imran Tahir,"['CA Lynn', 'SP Narine', 'N Rana', 'RV Uthappa...","['SR Watson', 'F du Plessis', 'SK Raina', 'AT ...",CK Nandan,RJ Tucker


In [14]:
# info ; Very useful, gives high level summary of the dataset
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1629 entries, 0 to 1628
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   title_x           1629 non-null   object 
 1   imdb_id           1629 non-null   object 
 2   poster_path       1526 non-null   object 
 3   wiki_link         1629 non-null   object 
 4   title_y           1629 non-null   object 
 5   original_title    1629 non-null   object 
 6   is_adult          1629 non-null   int64  
 7   year_of_release   1629 non-null   int64  
 8   runtime           1629 non-null   object 
 9   genres            1629 non-null   object 
 10  imdb_rating       1629 non-null   float64
 11  imdb_votes        1629 non-null   int64  
 12  story             1609 non-null   object 
 13  summary           1629 non-null   object 
 14  tagline           557 non-null    object 
 15  actors            1624 non-null   object 
 16  wins_nominations  707 non-null    object 


In [15]:
ipl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ID               950 non-null    int64  
 1   City             899 non-null    object 
 2   Date             950 non-null    object 
 3   Season           950 non-null    object 
 4   MatchNumber      950 non-null    object 
 5   Team1            950 non-null    object 
 6   Team2            950 non-null    object 
 7   Venue            950 non-null    object 
 8   TossWinner       950 non-null    object 
 9   TossDecision     950 non-null    object 
 10  SuperOver        946 non-null    object 
 11  WinningTeam      946 non-null    object 
 12  WonBy            950 non-null    object 
 13  Margin           932 non-null    float64
 14  method           19 non-null     object 
 15  Player_of_Match  946 non-null    object 
 16  Team1Players     950 non-null    object 
 17  Team2Players    

In [16]:
# describe; Gives distribution of the numerical columns
movies.describe()

Unnamed: 0,is_adult,year_of_release,imdb_rating,imdb_votes
count,1629.0,1629.0,1629.0,1629.0
mean,0.0,2010.263966,5.557459,5384.263352
std,0.0,5.381542,1.567609,14552.103231
min,0.0,2001.0,0.0,0.0
25%,0.0,2005.0,4.4,233.0
50%,0.0,2011.0,5.6,1000.0
75%,0.0,2015.0,6.8,4287.0
max,0.0,2019.0,9.4,310481.0


In [17]:
ipl.describe()

Unnamed: 0,ID,Margin
count,950.0,932.0
mean,830485.2,17.056867
std,337567.8,21.633109
min,335982.0,1.0
25%,501261.2,6.0
50%,829738.0,8.0
75%,1175372.0,19.0
max,1312200.0,146.0


In [18]:
# isnull
movies.isnull().sum()   # how many null values inside the dataset

title_x                0
imdb_id                0
poster_path          103
wiki_link              0
title_y                0
original_title         0
is_adult               0
year_of_release        0
runtime                0
genres                 0
imdb_rating            0
imdb_votes             0
story                 20
summary                0
tagline             1072
actors                 5
wins_nominations     922
release_date         107
dtype: int64

In [19]:
# duplicated
movies.duplicated().sum()   # if any entire rows are repeating

0

In [20]:
student.duplicated().sum()

0

In [21]:
# rename
student

Unnamed: 0,iq,marks,package
0,100,80,10
1,90,70,7
2,120,100,14
3,80,50,2


In [22]:
student.rename(columns={"marks":"percent","package":"LPA"},inplace=True)
student

Unnamed: 0,iq,percent,LPA
0,100,80,10
1,90,70,7
2,120,100,14
3,80,50,2


### Math Methods

In [23]:
# sum -> axis argument ; axis = 0 means axis is row and do summation col wise and axis = 1 is col and 
student.sum(axis='columns')     # axis = 0 -> col, 1 -> row

0    190
1    167
2    234
3    132
dtype: int64

In [24]:
student.mean(axis=1)

0    63.333333
1    55.666667
2    78.000000
3    44.000000
dtype: float64

In [25]:
student.var()

iq         291.666667
percent    433.333333
LPA         25.583333
dtype: float64

### Selecting cols from a DataFrame

In [26]:
# single cols
type(movies['title_x'])

pandas.core.series.Series

In [28]:
ipl['Venue'].unique()

array(['Narendra Modi Stadium, Ahmedabad', 'Eden Gardens, Kolkata',
       'Wankhede Stadium, Mumbai', 'Brabourne Stadium, Mumbai',
       'Dr DY Patil Sports Academy, Mumbai',
       'Maharashtra Cricket Association Stadium, Pune',
       'Dubai International Cricket Stadium', 'Sharjah Cricket Stadium',
       'Zayed Cricket Stadium, Abu Dhabi', 'Arun Jaitley Stadium, Delhi',
       'MA Chidambaram Stadium, Chepauk, Chennai', 'Sheikh Zayed Stadium',
       'Rajiv Gandhi International Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'MA Chidambaram Stadium',
       'Punjab Cricket Association IS Bindra Stadium', 'Wankhede Stadium',
       'M.Chinnaswamy Stadium', 'Arun Jaitley Stadium', 'Eden Gardens',
       'Sawai Mansingh Stadium',
       'Maharashtra Cricket Association Stadium',
       'Holkar Cricket Stadium',
       'Rajiv Gandhi International Stadium, Uppal',
       'M Chinnaswamy Stadium', 'Feroz Shah Kotla', 'Green Park',
       'Punjab Cricket A

In [30]:
type(movies[['title_x']])

pandas.core.frame.DataFrame

In [31]:
# multiple cols
movies[['year_of_release','actors','title_x']]

Unnamed: 0,year_of_release,actors,title_x
0,2019,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...,Uri: The Surgical Strike
1,2019,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...,Battalion 609
2,2019,Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S...,The Accidental Prime Minister (film)
3,2019,Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ...,Why Cheat India
4,2018,Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva...,Evening Shadows
...,...,...,...
1624,2001,Ajay Devgn|Sonali Bendre|Namrata Shirodkar|Pre...,Tera Mera Saath Rahen
1625,2001,Ameesha Patel|Jimmy Sheirgill|Nafisa Ali|Gulsh...,Yeh Zindagi Ka Safar
1626,2018,Vijay Arora|Asrani|Rajni Bala|Kumud Damle|Utpa...,Sabse Bada Sukh
1627,2019,Gippy Grewal|Zareen Khan|,Daaka


In [32]:
ipl[['Team1','Team2','WinningTeam']]

Unnamed: 0,Team1,Team2,WinningTeam
0,Rajasthan Royals,Gujarat Titans,Gujarat Titans
1,Royal Challengers Bangalore,Rajasthan Royals,Rajasthan Royals
2,Royal Challengers Bangalore,Lucknow Super Giants,Royal Challengers Bangalore
3,Rajasthan Royals,Gujarat Titans,Gujarat Titans
4,Sunrisers Hyderabad,Punjab Kings,Punjab Kings
...,...,...,...
945,Kolkata Knight Riders,Deccan Chargers,Kolkata Knight Riders
946,Mumbai Indians,Royal Challengers Bangalore,Royal Challengers Bangalore
947,Delhi Daredevils,Rajasthan Royals,Delhi Daredevils
948,Kings XI Punjab,Chennai Super Kings,Chennai Super Kings


### Selecting rows from a DataFrame

- **iloc** - searches using index positions
- **loc** - searches using index labels

In [39]:
# single row
movies.iloc[0]

title_x                                      Uri: The Surgical Strike
imdb_id                                                     tt8291224
poster_path         https://upload.wikimedia.org/wikipedia/en/thum...
wiki_link           https://en.wikipedia.org/wiki/Uri:_The_Surgica...
title_y                                      Uri: The Surgical Strike
original_title                               Uri: The Surgical Strike
is_adult                                                            0
year_of_release                                                  2019
runtime                                                           138
genres                                               Action|Drama|War
imdb_rating                                                       8.4
imdb_votes                                                      35112
story               Divided over five chapters  the film chronicle...
summary             Indian army special forces execute a covert op...
tagline             

In [40]:
# multiple row
movies.iloc[:5]

Unnamed: 0,title_x,imdb_id,poster_path,wiki_link,title_y,original_title,is_adult,year_of_release,runtime,genres,imdb_rating,imdb_votes,story,summary,tagline,actors,wins_nominations,release_date
0,Uri: The Surgical Strike,tt8291224,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Uri:_The_Surgica...,Uri: The Surgical Strike,Uri: The Surgical Strike,0,2019,138,Action|Drama|War,8.4,35112,Divided over five chapters the film chronicle...,Indian army special forces execute a covert op...,,Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...,4 wins,11 January 2019 (USA)
1,Battalion 609,tt9472208,,https://en.wikipedia.org/wiki/Battalion_609,Battalion 609,Battalion 609,0,2019,131,War,4.1,73,The story revolves around a cricket match betw...,The story of Battalion 609 revolves around a c...,,Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen...,,11 January 2019 (India)
2,The Accidental Prime Minister (film),tt6986710,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/The_Accidental_P...,The Accidental Prime Minister,The Accidental Prime Minister,0,2019,112,Biography|Drama,6.1,5549,Based on the memoir by Indian policy analyst S...,Explores Manmohan Singh's tenure as the Prime ...,,Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S...,,11 January 2019 (USA)
3,Why Cheat India,tt8108208,https://upload.wikimedia.org/wikipedia/en/thum...,https://en.wikipedia.org/wiki/Why_Cheat_India,Why Cheat India,Why Cheat India,0,2019,121,Crime|Drama,6.0,1891,The movie focuses on existing malpractices in ...,The movie focuses on existing malpractices in ...,,Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ...,,18 January 2019 (USA)
4,Evening Shadows,tt6028796,,https://en.wikipedia.org/wiki/Evening_Shadows,Evening Shadows,Evening Shadows,0,2018,102,Drama,7.3,280,While gay rights and marriage equality has bee...,Under the 'Evening Shadows' truth often plays...,,Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva...,17 wins & 1 nomination,11 January 2019 (India)


In [None]:
# fancy indexing
movies.iloc[[0,4,5]]

In [None]:
# loc
students

In [None]:
students.loc['nitish']

In [None]:
students.loc['nitish':'rishabh':2]

In [None]:
students.loc[['nitish','ankita','rupesh']]

In [None]:
students.iloc[[0,3,4]]

### Selecting both rows and cols

In [None]:
movies.iloc[0:3,0:3]

In [None]:
movies.loc[0:2,'title_x':'poster_path']

### Filtering a DataFrame

In [None]:
ipl.head(2)

In [None]:
# find all the final winners
mask = ipl['MatchNumber'] == 'Final'
new_df = ipl[mask]
new_df[['Season','WinningTeam']]

ipl[ipl['MatchNumber'] == 'Final'][['Season','WinningTeam']]

In [None]:
# how many super over finishes have occured
ipl[ipl['SuperOver'] == 'Y'].shape[0]

In [None]:
# how many matches has csk won in kolkata
ipl[(ipl['City'] == 'Kolkata') & (ipl['WinningTeam'] == 'Chennai Super Kings')].shape[0]

In [None]:
# toss winner is match winner in percentage
(ipl[ipl['TossWinner'] == ipl['WinningTeam']].shape[0]/ipl.shape[0])*100

In [None]:
# movies with rating higher than 8 and votes>10000
movies[(movies['imdb_rating'] > 8.5) & (movies['imdb_votes'] > 10000)].shape[0]

In [None]:
# Action movies with rating higher than 7.5
# mask1 = movies['genres'].str.split('|').apply(lambda x:'Action' in x)
mask1 = movies['genres'].str.contains('Action')
mask2 = movies['imdb_rating'] > 7.5

movies[mask1 & mask2]

In [None]:
# write a function that can return the track record of 2 teams against each other

### Adding new cols

In [None]:
# completely new
movies['Country'] = 'India'
movies.head()

In [None]:
# from existing ones
movies.dropna(inplace=True)

In [None]:
movies['lead actor'] = movies['actors'].str.split('|').apply(lambda x:x[0])
movies.head()

In [None]:
movies.info()

### Important DataFrame Functions

In [None]:
# astype
ipl.info()

In [None]:
ipl['ID'] = ipl['ID'].astype('int32')

In [None]:
ipl.info()

In [None]:
# ipl['Season'] = ipl['Season'].astype('category')
ipl['Team1'] = ipl['Team1'].astype('category')
ipl['Team2'] = ipl['Team2'].astype('category')

In [None]:
ipl.info()

In [None]:
# value_counts

In [None]:
# find which player has won most potm -> in finals and qualifiers

In [None]:
# Toss decision plot

In [None]:
# how many matches each team has played

In [None]:
# sort_values -> ascending -> na_position -> inplace -> multiple cols