In [1]:
import numpy as np
import pandas as pd

## IMDB Case-Study

In [2]:
movies = pd.read_csv('movies.csv')
movies.head()

Unnamed: 0.1,Unnamed: 0,id,budget,popularity,revenue,title,vote_average,vote_count,director_id,year,month,day
0,0,43597,237000000,150,2787965087,Avatar,7.2,11800,4762,2009,Dec,Thursday
1,1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,4763,2007,May,Saturday
2,2,43599,245000000,107,880674609,Spectre,6.3,4466,4764,2015,Oct,Monday
3,3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,4765,2012,Jul,Monday
4,5,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,4767,2007,May,Tuesday


In [3]:
# remove Unnamed col its a extra index col
movies = pd.read_csv('movies.csv',index_col=0)
movies.head()

Unnamed: 0,id,budget,popularity,revenue,title,vote_average,vote_count,director_id,year,month,day
0,43597,237000000,150,2787965087,Avatar,7.2,11800,4762,2009,Dec,Thursday
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,4763,2007,May,Saturday
2,43599,245000000,107,880674609,Spectre,6.3,4466,4764,2015,Oct,Monday
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,4765,2012,Jul,Monday
5,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,4767,2007,May,Tuesday


In [4]:
directors = pd.read_csv('directors.csv',index_col=0)
directors.head()

Unnamed: 0,director_name,id,gender
0,James Cameron,4762,Male
1,Gore Verbinski,4763,Male
2,Sam Mendes,4764,Male
3,Christopher Nolan,4765,Male
4,Andrew Stanton,4766,Male


In [5]:
data=movies.merge(directors, how='left',left_on='director_id',right_on='id')
data.drop(['director_id','id_y'],axis=1,inplace=True)

In [6]:
data.head()

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender
0,43597,237000000,150,2787965087,Avatar,7.2,11800,2009,Dec,Thursday,James Cameron,Male
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,2007,May,Saturday,Gore Verbinski,Male
2,43599,245000000,107,880674609,Spectre,6.3,4466,2015,Oct,Monday,Sam Mendes,Male
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male
4,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,2007,May,Tuesday,Sam Raimi,Male


In [7]:
# Encoding male --> o
# female --> 1

In [8]:
def encode(text):
    if text =="Male":
        return 0
    else:
        return 1

In [9]:
data.iloc[0]['gender']

'Male'

In [10]:
# passing function
encode(data.iloc[0]['gender'])

0

In [11]:
# apply function on whole gender column
data['gender'].apply(encode)

0       0
1       0
2       0
3       0
4       0
       ..
1460    0
1461    0
1462    0
1463    0
1464    1
Name: gender, Length: 1465, dtype: int64

In [12]:
data['gender_mapping'] = data['gender'].apply(encode)

In [13]:
data

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping
0,43597,237000000,150,2787965087,Avatar,7.2,11800,2009,Dec,Thursday,James Cameron,Male,0
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,2007,May,Saturday,Gore Verbinski,Male,0
2,43599,245000000,107,880674609,Spectre,6.3,4466,2015,Oct,Monday,Sam Mendes,Male,0
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0
4,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,2007,May,Tuesday,Sam Raimi,Male,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460,48363,0,3,321952,The Last Waltz,7.9,64,1978,May,Monday,Martin Scorsese,Male,0
1461,48370,27000,19,3151130,Clerks,7.4,755,1994,Sep,Tuesday,Kevin Smith,Male,0
1462,48375,0,7,0,Rampage,6.0,131,2009,Aug,Friday,Uwe Boll,Male,0
1463,48376,0,3,0,Slacker,6.4,77,1990,Jul,Friday,Richard Linklater,Male,0


#### How to find sum of revenue and budget per movie
* Multiple col


In [14]:
data[['revenue','budget']].apply(np.sum)

revenue    209866997305
budget      70353617179
dtype: int64

In [15]:
# Row wise
data[['revenue','budget']].apply(np.sum,axis=1)

0       3024965087
1       1261000000
2       1125674609
3       1334939099
4       1148871626
           ...    
1460        321952
1461       3178130
1462             0
1463             0
1464       2260920
Length: 1465, dtype: int64

### How to find Profit per movie(revenue-budget)

In [16]:
def proof(x):
    return x['revenue'] - x['budget']
    
    
data['profit'] = data[['revenue','budget']].apply(proof,axis=1)
data

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
0,43597,237000000,150,2787965087,Avatar,7.2,11800,2009,Dec,Thursday,James Cameron,Male,0,2550965087
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,2007,May,Saturday,Gore Verbinski,Male,0,661000000
2,43599,245000000,107,880674609,Spectre,6.3,4466,2015,Oct,Monday,Sam Mendes,Male,0,635674609
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0,834939099
4,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,2007,May,Tuesday,Sam Raimi,Male,0,632871626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460,48363,0,3,321952,The Last Waltz,7.9,64,1978,May,Monday,Martin Scorsese,Male,0,321952
1461,48370,27000,19,3151130,Clerks,7.4,755,1994,Sep,Tuesday,Kevin Smith,Male,0,3124130
1462,48375,0,7,0,Rampage,6.0,131,2009,Aug,Friday,Uwe Boll,Male,0,0
1463,48376,0,3,0,Slacker,6.4,77,1990,Jul,Friday,Richard Linklater,Male,0,0


#### apply function
* apply a function on a dataframe
* create a function/inbuilt function np sum
* Column on a dataframe and then you need to pass to function 

### Group By

In [18]:
# How can we know the number of movies released by a particular director, say, Christopher Nolan?


In [21]:
data['director_name'] =='Christopher Nolan'

0       False
1       False
2       False
3        True
4       False
        ...  
1460    False
1461    False
1462    False
1463    False
1464    False
Name: director_name, Length: 1465, dtype: bool

In [22]:
data.loc[data['director_name'] =='Christopher Nolan']

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0,834939099
45,43662,185000000,187,1004558444,The Dark Knight,8.2,12002,2008,Jul,Wednesday,Christopher Nolan,Male,0,819558444
58,43692,165000000,724,675120017,Interstellar,8.1,10867,2014,Nov,Wednesday,Christopher Nolan,Male,0,510120017
59,43693,160000000,167,825532764,Inception,8.1,13752,2010,Jul,Wednesday,Christopher Nolan,Male,0,665532764
74,43716,150000000,115,374218673,Batman Begins,7.5,7359,2005,Jun,Friday,Christopher Nolan,Male,0,224218673
565,44630,46000000,41,113714830,Insomnia,6.8,1148,2002,May,Friday,Christopher Nolan,Male,0,67714830
641,44793,40000000,74,109676311,The Prestige,8.0,4391,2006,Oct,Thursday,Christopher Nolan,Male,0,69676311
1341,47170,9000000,60,39723096,Memento,8.1,4028,2000,Oct,Wednesday,Christopher Nolan,Male,0,30723096


In [25]:
data.loc[data['director_name'] =='Christopher Nolan','title'].count()

8

In [26]:
### Movie of each director

In [27]:
data['director_name'].value_counts()

Steven Spielberg      26
Martin Scorsese       19
Clint Eastwood        19
Woody Allen           18
Ridley Scott          16
                      ..
Tim Hill               5
Jonathan Liebesman     5
Roman Polanski         5
Larry Charles          5
Nicole Holofcener      5
Name: director_name, Length: 199, dtype: int64

In [28]:
data['director_name'].nunique()

199

In [29]:
# groupby object
data.groupby('director_name')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000262E5EC2D10>

In [32]:
# Total no of group
data.groupby('director_name').ngroups

199

In [33]:
data.groupby('director_name').groups

{'Adam McKay': [176, 323, 366, 505, 839, 916], 'Adam Shankman': [265, 300, 350, 404, 458, 843, 999, 1231], 'Alejandro González Iñárritu': [106, 749, 1015, 1034, 1077, 1405], 'Alex Proyas': [95, 159, 514, 671, 873], 'Alexander Payne': [793, 1006, 1101, 1211, 1281], 'Andrew Adamson': [11, 43, 328, 501, 947], 'Andrew Niccol': [533, 603, 701, 722, 1439], 'Andrzej Bartkowiak': [349, 549, 754, 911, 924], 'Andy Fickman': [517, 681, 909, 926, 973, 1023], 'Andy Tennant': [314, 320, 464, 593, 676, 885], 'Ang Lee': [99, 134, 748, 840, 1089, 1110, 1132, 1184], 'Anne Fletcher': [610, 650, 736, 789, 1206], 'Antoine Fuqua': [310, 338, 424, 467, 576, 808, 818, 1105], 'Atom Egoyan': [946, 1128, 1164, 1194, 1347, 1416], 'Barry Levinson': [313, 319, 471, 594, 878, 898, 1013, 1037, 1082, 1143, 1185, 1345, 1378], 'Barry Sonnenfeld': [13, 48, 90, 205, 591, 778, 783], 'Ben Stiller': [209, 212, 547, 562, 850], 'Bill Condon': [102, 307, 902, 1233, 1381], 'Bobby Farrelly': [352, 356, 481, 498, 624, 630, 654, 80

In [34]:
## Specific person group
data.groupby('director_name').get_group('Alexander Payne')
# all movie of Alexander Payne

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
793,45163,30000000,19,105834556,About Schmidt,6.7,362,2002,Dec,Friday,Alexander Payne,,1,75834556
1006,45699,20000000,40,177243185,The Descendants,6.7,934,2011,Sep,Friday,Alexander Payne,,1,157243185
1101,46004,16000000,23,109502303,Sideways,6.9,478,2004,Oct,Friday,Alexander Payne,,1,93502303
1211,46446,12000000,29,17654912,Nebraska,7.4,636,2013,Sep,Saturday,Alexander Payne,,1,5654912
1281,46813,0,13,0,Election,6.7,270,1999,Apr,Friday,Alexander Payne,,1,0


In [35]:
data.groupby('director_name').get_group('Christopher Nolan')

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0,834939099
45,43662,185000000,187,1004558444,The Dark Knight,8.2,12002,2008,Jul,Wednesday,Christopher Nolan,Male,0,819558444
58,43692,165000000,724,675120017,Interstellar,8.1,10867,2014,Nov,Wednesday,Christopher Nolan,Male,0,510120017
59,43693,160000000,167,825532764,Inception,8.1,13752,2010,Jul,Wednesday,Christopher Nolan,Male,0,665532764
74,43716,150000000,115,374218673,Batman Begins,7.5,7359,2005,Jun,Friday,Christopher Nolan,Male,0,224218673
565,44630,46000000,41,113714830,Insomnia,6.8,1148,2002,May,Friday,Christopher Nolan,Male,0,67714830
641,44793,40000000,74,109676311,The Prestige,8.0,4391,2006,Oct,Thursday,Christopher Nolan,Male,0,69676311
1341,47170,9000000,60,39723096,Memento,8.1,4028,2000,Oct,Wednesday,Christopher Nolan,Male,0,30723096


In [36]:
data.groupby('director_name').get_group('Christopher Nolan')['title']

3       The Dark Knight Rises
45            The Dark Knight
58               Interstellar
59                  Inception
74              Batman Begins
565                  Insomnia
641              The Prestige
1341                  Memento
Name: title, dtype: object

In [37]:
data.groupby('director_name').get_group('Christopher Nolan')['title'].count()

8

In [38]:
## How can we find multiple aggregation of any feature?

In [39]:
data['year']

0       2009
1       2007
2       2015
3       2012
4       2007
        ... 
1460    1978
1461    1994
1462    2009
1463    1990
1464    1992
Name: year, Length: 1465, dtype: int64

In [40]:
data.groupby('director_name')['year'].min()

director_name
Adam McKay                     2004
Adam Shankman                  2001
Alejandro González Iñárritu    2000
Alex Proyas                    1994
Alexander Payne                1999
                               ... 
Wes Craven                     1984
Wolfgang Petersen              1981
Woody Allen                    1977
Zack Snyder                    2004
Zhang Yimou                    2002
Name: year, Length: 199, dtype: int64

In [41]:
data.groupby('director_name')['year'].max()

director_name
Adam McKay                     2015
Adam Shankman                  2012
Alejandro González Iñárritu    2015
Alex Proyas                    2016
Alexander Payne                2013
                               ... 
Wes Craven                     2011
Wolfgang Petersen              2006
Woody Allen                    2013
Zack Snyder                    2016
Zhang Yimou                    2014
Name: year, Length: 199, dtype: int64

In [42]:
# First and last movie
data.groupby('director_name')['year'].aggregate(['min','max'])

Unnamed: 0_level_0,min,max
director_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adam McKay,2004,2015
Adam Shankman,2001,2012
Alejandro González Iñárritu,2000,2015
Alex Proyas,1994,2016
Alexander Payne,1999,2013
...,...,...
Wes Craven,1984,2011
Wolfgang Petersen,1981,2006
Woody Allen,1977,2013
Zack Snyder,2004,2016


In [43]:
## We can get highest budget movie for every director

In [52]:
data.groupby('director_name')['budget'].aggregate(['min','max'])

Unnamed: 0_level_0,min,max
director_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adam McKay,26000000,100000000
Adam Shankman,11000000,80000000
Alejandro González Iñárritu,0,135000000
Alex Proyas,15000000,140000000
Alexander Payne,0,30000000
...,...,...
Wes Craven,1800000,40000000
Wolfgang Petersen,14000000,175000000
Woody Allen,0,30000000
Zack Snyder,28000000,250000000


In [53]:
data.groupby('director_name')['budget'].max()

director_name
Adam McKay                     100000000
Adam Shankman                   80000000
Alejandro González Iñárritu    135000000
Alex Proyas                    140000000
Alexander Payne                 30000000
                                 ...    
Wes Craven                      40000000
Wolfgang Petersen              175000000
Woody Allen                     30000000
Zack Snyder                    250000000
Zhang Yimou                     94000000
Name: budget, Length: 199, dtype: int64

In [54]:
### How can we filter_out the director name with max budget > 100M  ?

In [56]:
data_dir_budget = data.groupby('director_name')['budget'].max().reset_index()
data_dir_budget.head()

Unnamed: 0,director_name,budget
0,Adam McKay,100000000
1,Adam Shankman,80000000
2,Alejandro González Iñárritu,135000000
3,Alex Proyas,140000000
4,Alexander Payne,30000000


In [57]:
names = data_dir_budget.loc[data_dir_budget['budget'] >= 100000000, 'director_name']

In [58]:
names

0                       Adam McKay
2      Alejandro González Iñárritu
3                      Alex Proyas
5                   Andrew Adamson
10                         Ang Lee
                  ...             
187                    Tom Shadyac
188                     Tom Tykwer
189                     Tony Scott
195              Wolfgang Petersen
197                    Zack Snyder
Name: director_name, Length: 85, dtype: object

In [59]:
data['director_name'].isin(names)

0        True
1        True
2        True
3        True
4        True
        ...  
1460     True
1461    False
1462    False
1463    False
1464    False
Name: director_name, Length: 1465, dtype: bool

In [60]:
data.loc[data['director_name'].isin(names)]

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
0,43597,237000000,150,2787965087,Avatar,7.2,11800,2009,Dec,Thursday,James Cameron,Male,0,2550965087
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,2007,May,Saturday,Gore Verbinski,Male,0,661000000
2,43599,245000000,107,880674609,Spectre,6.3,4466,2015,Oct,Monday,Sam Mendes,Male,0,635674609
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0,834939099
4,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,2007,May,Tuesday,Sam Raimi,Male,0,632871626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1450,48267,400000,33,100000000,Mad Max,6.6,1213,1979,Apr,Thursday,George Miller,Male,0,99600000
1451,48268,200000,13,4505922,Swingers,6.8,253,1996,Oct,Friday,Doug Liman,Male,0,4305922
1452,48274,0,5,2611555,Three,6.3,31,2010,Dec,Thursday,Tom Tykwer,Male,0,2611555
1458,48335,60000,27,3221152,Pi,7.1,586,1998,Jul,Friday,Darren Aronofsky,Male,0,3161152


### Lambda Function


In [61]:
def add(a):
    a = a+10
    print(a)

In [62]:
add(2)

12


In [63]:
#lambda input:output
x = lambda a:a+10

In [64]:
x(2)

12

In [65]:
yuvraj = lambda x,y : x*y

In [66]:
yuvraj(3,4)

12

In [67]:
# Create a lambda function that takes three values and add them

In [68]:
add =  lambda a,b,c: a+b+c

In [69]:
add(10,20,30)

60

In [70]:
data.groupby('director_name')['vote_average'].max()

director_name
Adam McKay                     7.3
Adam Shankman                  7.5
Alejandro González Iñárritu    7.6
Alex Proyas                    7.3
Alexander Payne                7.4
                              ... 
Wes Craven                     7.2
Wolfgang Petersen              7.9
Woody Allen                    7.8
Zack Snyder                    7.0
Zhang Yimou                    7.2
Name: vote_average, Length: 199, dtype: float64

In [71]:
data.groupby('director_name')['vote_average'].max().sort_values()

director_name
Jason Friedberg      4.2
John Whitesell       5.4
Brian Levant         5.5
Raja Gosnell         5.6
Tim Hill             5.8
                    ... 
Christopher Nolan    8.2
Robert Zemeckis      8.2
Quentin Tarantino    8.3
Steven Spielberg     8.3
David Fincher        8.3
Name: vote_average, Length: 199, dtype: float64

In [72]:
data.groupby('director_name')['vote_average'].max().sort_values(ascending=False)

director_name
David Fincher        8.3
Steven Spielberg     8.3
Quentin Tarantino    8.3
Robert Zemeckis      8.2
Christopher Nolan    8.2
                    ... 
Tim Hill             5.8
Raja Gosnell         5.6
Brian Levant         5.5
John Whitesell       5.4
Jason Friedberg      4.2
Name: vote_average, Length: 199, dtype: float64

In [73]:
data.groupby('director_name').filter( lambda x:x['vote_average'].max()>=8.3)

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit
37,43650,185000000,75,786636033,Indiana Jones and the Kingdom of the Crystal S...,5.7,2495,2008,May,Wednesday,Steven Spielberg,Male,0,601636033
62,43697,150000000,60,333932083,The Curious Case of Benjamin Button,7.3,3292,2008,Nov,Monday,David Fincher,Male,0,183932083
105,43772,140000000,44,183345589,The BFG,6.0,1000,2016,Jun,Wednesday,Steven Spielberg,Male,0,43345589
110,43782,132000000,48,591739379,War of the Worlds,6.2,2322,2005,Jun,Tuesday,Steven Spielberg,Male,0,459739379
114,43787,130000000,89,371940071,The Adventures of Tintin,6.7,2061,2011,Oct,Tuesday,Steven Spielberg,Male,0,241940071
166,43872,102000000,65,358372926,Minority Report,7.1,2608,2002,Jun,Thursday,Steven Spielberg,Male,0,256372926
178,43884,100000000,82,425368238,Django Unchained,7.8,10099,2012,Dec,Tuesday,Quentin Tarantino,Male,0,325368238
213,43951,90000000,47,232617430,The Girl with the Dragon Tattoo,7.2,2434,2011,Dec,Wednesday,David Fincher,Male,0,142617430
219,43960,100000000,34,235926552,A.I. Artificial Intelligence,6.8,1974,2001,Jun,Friday,Steven Spielberg,Male,0,135926552
253,44018,65000000,51,84785914,Zodiac,7.3,2023,2007,Mar,Friday,David Fincher,Male,0,19785914


In [74]:
## How do we find risky movie?
## Risky movie --> low earning

In [85]:
def func(x):
    x['risky'] = x['budget'] - x['revenue'].mean() >=0
    return x

In [86]:
data_risky=data.groupby('director_name').apply(func)
data_risky

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  data_risky=data.groupby('director_name').apply(func)


Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit,risky
0,43597,237000000,150,2787965087,Avatar,7.2,11800,2009,Dec,Thursday,James Cameron,Male,0,2550965087,False
1,43598,300000000,139,961000000,Pirates of the Caribbean: At World's End,6.9,4500,2007,May,Saturday,Gore Verbinski,Male,0,661000000,False
2,43599,245000000,107,880674609,Spectre,6.3,4466,2015,Oct,Monday,Sam Mendes,Male,0,635674609,False
3,43600,250000000,112,1084939099,The Dark Knight Rises,7.6,9106,2012,Jul,Monday,Christopher Nolan,Male,0,834939099,False
4,43602,258000000,115,890871626,Spider-Man 3,5.9,3576,2007,May,Tuesday,Sam Raimi,Male,0,632871626,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460,48363,0,3,321952,The Last Waltz,7.9,64,1978,May,Monday,Martin Scorsese,Male,0,321952,False
1461,48370,27000,19,3151130,Clerks,7.4,755,1994,Sep,Tuesday,Kevin Smith,Male,0,3124130,False
1462,48375,0,7,0,Rampage,6.0,131,2009,Aug,Friday,Uwe Boll,Male,0,0,False
1463,48376,0,3,0,Slacker,6.4,77,1990,Jul,Friday,Richard Linklater,Male,0,0,False


In [87]:
data_risky.loc[data_risky['risky']]

Unnamed: 0,id_x,budget,popularity,revenue,title,vote_average,vote_count,year,month,day,director_name,gender,gender_mapping,profit,risky
7,43608,200000000,107,586090727,Quantum of Solace,6.1,2965,2008,Oct,Thursday,Marc Forster,Male,0,386090727,True
12,43614,380000000,135,1045713802,Pirates of the Caribbean: On Stranger Tides,6.4,4948,2011,May,Saturday,Rob Marshall,Male,0,665713802,True
15,43618,200000000,37,310669540,Robin Hood,6.2,1398,2010,May,Wednesday,Ridley Scott,Male,0,110669540,True
20,43624,209000000,64,303025485,Battleship,5.5,2114,2012,Apr,Wednesday,Peter Berg,Male,0,94025485,True
24,43630,210000000,3,459359555,X-Men: The Last Stand,6.3,3525,2006,May,Wednesday,Brett Ratner,Male,0,249359555,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,47224,5000000,7,3263585,The Sweet Hereafter,6.8,103,1997,May,Wednesday,Atom Egoyan,Male,0,-1736415,True
1349,47229,5000000,3,4842699,90 Minutes in Heaven,5.4,40,2015,Sep,Friday,Michael Polish,Male,0,-157301,True
1351,47233,5000000,6,0,Light Sleeper,5.7,15,1992,Aug,Friday,Paul Schrader,,1,-5000000,True
1356,47263,15000000,10,0,Dying of the Light,4.5,118,2014,Dec,Thursday,Paul Schrader,,1,-15000000,True


* Yes, there are some 131 movies whose budget was greater than average earning of its director

In [91]:
temp = lambda a,b:True if a>b else False

In [92]:
temp(20,5)

True

In [94]:
num = lambda a,b:"Dancing" if a>b else "Cooking"
num


<function __main__.<lambda>(a, b)>

In [95]:
num(20,5)

'Dancing'

In [96]:
a = 10
b = 20

In [97]:
if a>b:
    print("Dancing")
else:
    print("Cooking")

Cooking


In [98]:
## filter only the ages that are greater than 18
ages=[13,90,17,21,56,5]

In [99]:
q = lambda a:a>18

In [100]:
q(ages)

TypeError: '>' not supported between instances of 'list' and 'int'

In [101]:
filter(lambda a:a>18,ages)

<filter at 0x262ece16320>

In [103]:
list(filter(lambda a:a>18,ages))  # filter remove all false value

[90, 21, 56]

In [104]:
a = [5,4,3,6,7,8,3,4,56]

In [107]:
l=[]
for i in a:
    print(i**2)
    l.append(i**2)
    l
        

25
16
9
36
49
64
9
16
3136


In [108]:
list(map(lambda a:a**2,a))

[25, 16, 9, 36, 49, 64, 9, 16, 3136]