# Loading Data into Pandas

In [2]:
import pandas as pd
highest_movies = pd.read_csv("Data/Top 200 Highest Grossing Movies.csv")
highest_movies

Unnamed: 0,Rank,Title,Lifetime Gross,Year
0,1,Avatar,"$2,847,397,339",2009
1,2,Avengers: Endgame,"$2,797,501,328",2019
2,3,Titanic,"$2,201,647,264",1997
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015
4,5,Avengers: Infinity War,"$2,048,359,754",2018
...,...,...,...,...
195,196,Detective Chinatown 2,"$544,185,156",2018
196,197,X-Men: Apocalypse,"$543,934,105",2016
197,198,Sherlock Holmes: A Game of Shadows,"$543,848,418",2011
198,199,Despicable Me,"$543,157,985",2010


In [3]:
# shows top 5 rows in data set
highest_movies.head(5)

Unnamed: 0,Rank,Title,Lifetime Gross,Year
0,1,Avatar,"$2,847,397,339",2009
1,2,Avengers: Endgame,"$2,797,501,328",2019
2,3,Titanic,"$2,201,647,264",1997
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015
4,5,Avengers: Infinity War,"$2,048,359,754",2018


In [4]:
# shows bottom 5 rows in data set
highest_movies.tail(5)

Unnamed: 0,Rank,Title,Lifetime Gross,Year
195,196,Detective Chinatown 2,"$544,185,156",2018
196,197,X-Men: Apocalypse,"$543,934,105",2016
197,198,Sherlock Holmes: A Game of Shadows,"$543,848,418",2011
198,199,Despicable Me,"$543,157,985",2010
199,200,Cinderella,"$542,358,331",2015


# Reading Data in Pandas

In [5]:
# read headers
highest_movies.columns

Index(['Rank', 'Title', 'Lifetime Gross', 'Year'], dtype='object')

In [6]:
# read each column
highest_movies['Title']

0                                          Avatar
1                               Avengers: Endgame
2                                         Titanic
3      Star Wars: Episode VII - The Force Awakens
4                          Avengers: Infinity War
                          ...                    
195                         Detective Chinatown 2
196                             X-Men: Apocalypse
197            Sherlock Holmes: A Game of Shadows
198                                 Despicable Me
199                                    Cinderella
Name: Title, Length: 200, dtype: object

In [7]:
# read top 4 titles
highest_movies.head(4)['Title']

0                                        Avatar
1                             Avengers: Endgame
2                                       Titanic
3    Star Wars: Episode VII - The Force Awakens
Name: Title, dtype: object

In [8]:
highest_movies.iloc[1:4]

Unnamed: 0,Rank,Title,Lifetime Gross,Year
1,2,Avengers: Endgame,"$2,797,501,328",2019
2,3,Titanic,"$2,201,647,264",1997
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015


In [9]:
# read each column with multiple columns
highest_movies[['Title', 'Rank', 'Year']]

Unnamed: 0,Title,Rank,Year
0,Avatar,1,2009
1,Avengers: Endgame,2,2019
2,Titanic,3,1997
3,Star Wars: Episode VII - The Force Awakens,4,2015
4,Avengers: Infinity War,5,2018
...,...,...,...
195,Detective Chinatown 2,196,2018
196,X-Men: Apocalypse,197,2016
197,Sherlock Holmes: A Game of Shadows,198,2011
198,Despicable Me,199,2010


In [10]:
# read each row
highest_movies.iloc[0:5]

Unnamed: 0,Rank,Title,Lifetime Gross,Year
0,1,Avatar,"$2,847,397,339",2009
1,2,Avengers: Endgame,"$2,797,501,328",2019
2,3,Titanic,"$2,201,647,264",1997
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015
4,5,Avengers: Infinity War,"$2,048,359,754",2018


In [11]:
# reading lifetime gross from 3 to 9
highest_movies.iloc[3:9]['Lifetime Gross']

3    $2,069,521,700 
4    $2,048,359,754 
5    $1,901,218,408 
6    $1,671,537,444 
7    $1,663,250,487 
8    $1,518,815,515 
Name: Lifetime Gross, dtype: object

In [12]:
lg = (highest_movies['Lifetime Gross'] == '$2,069,521,700')
highest_movies[lg]

Unnamed: 0,Rank,Title,Lifetime Gross,Year


In [13]:
# iterrate through each row
for Title, row in highest_movies.iterrows():
    print(Title,row['Title'])

0 Avatar
1 Avengers: Endgame
2 Titanic
3 Star Wars: Episode VII - The Force Awakens
4 Avengers: Infinity War
5 Spider-Man: No Way Home
6 Jurassic World
7 The Lion King
8 The Avengers
9 Furious 7
10 Frozen II
11 Avengers: Age of Ultron
12 Black Panther
13 Harry Potter and the Deathly Hallows: Part 2
14 Star Wars: Episode VIII - The Last Jedi
15 Jurassic World: Fallen Kingdom
16 Frozen
17 Beauty and the Beast
18 Incredibles 2
19 The Fate of the Furious
20 Iron Man 3
21 Minions
22 Captain America: Civil War
23 Aquaman
24 The Lord of the Rings: The Return of the King
25 Spider-Man: Far from Home
26 Captain Marvel
27 Transformers: Dark of the Moon
28 Skyfall
29 Transformers: Age of Extinction
30 Jurassic Park
31 The Dark Knight Rises
32 Joker
33 Star Wars: Episode IX - The Rise of Skywalker
34 Toy Story 4
35 Toy Story 3
36 Pirates of the Caribbean: Dead Man's Chest
37 The Lion King
38 Rogue One: A Star Wars Story
39 Aladdin
40 Pirates of the Caribbean: On Stranger Tides
41 Despicable Me 3
4

In [14]:
# iterrate through each column DataFrame

In [24]:
# read a specific location
#for gross, row in highest_movies.iterrows():
    #print(gross, row['Lifetime Gross' >= 1000000000])
    
lg = (highest_movies['Lifetime Gross'] >= "$2,000,000,000")
highest_movies[lg]

Unnamed: 0,Rank,Title,Lifetime Gross,Year,Total
0,1,Avatar,"$2,847,397,339",2009,"$2,847,397,339"
1,2,Avengers: Endgame,"$2,797,501,328",2019,"$2,797,501,328"
2,3,Titanic,"$2,201,647,264",1997,"$2,201,647,264"
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015,"$2,069,521,700"
4,5,Avengers: Infinity War,"$2,048,359,754",2018,"$2,048,359,754"
...,...,...,...,...,...
195,196,Detective Chinatown 2,"$544,185,156",2018,"$544,185,156"
196,197,X-Men: Apocalypse,"$543,934,105",2016,"$543,934,105"
197,198,Sherlock Holmes: A Game of Shadows,"$543,848,418",2011,"$543,848,418"
198,199,Despicable Me,"$543,157,985",2010,"$543,157,985"


# Sorting/Describing Data

In [16]:
highest_movies.describe()

Unnamed: 0,Rank,Year
count,200.0,200.0
mean,100.5,2011.57
std,57.879185,7.045801
min,1.0,1977.0
25%,50.75,2008.0
50%,100.5,2013.0
75%,150.25,2017.0
max,200.0,2022.0


In [17]:
# for columns in alphabetical order
highest_movies.sort_values('Title')

Unnamed: 0,Rank,Title,Lifetime Gross,Year
97,98,2012,"$791,217,826",2009
39,40,Aladdin,"$1,050,693,953",2019
44,45,Alice in Wonderland,"$1,025,468,216",2010
193,194,American Sniper,"$547,459,020",2014
161,162,Ant-Man and the Wasp,"$622,674,139",2018
...,...,...,...,...
74,75,Wolf Warrior 2,"$870,325,439",2017
85,86,Wonder Woman,"$822,854,286",2017
196,197,X-Men: Apocalypse,"$543,934,105",2016
115,116,X-Men: Days of Future Past,"$746,045,700",2014


In [18]:
# for columns in reverse alphabetical order
highest_movies.sort_values('Title', ascending=False)

Unnamed: 0,Rank,Title,Lifetime Gross,Year
45,46,Zootopia,"$1,024,121,104",2016
115,116,X-Men: Days of Future Past,"$746,045,700",2014
196,197,X-Men: Apocalypse,"$543,934,105",2016
85,86,Wonder Woman,"$822,854,286",2017
74,75,Wolf Warrior 2,"$870,325,439",2017
...,...,...,...,...
161,162,Ant-Man and the Wasp,"$622,674,139",2018
193,194,American Sniper,"$547,459,020",2014
44,45,Alice in Wonderland,"$1,025,468,216",2010
39,40,Aladdin,"$1,050,693,953",2019


# Making Changes to the Data

In [19]:
# adds new column total
highest_movies['Total'] = highest_movies['Lifetime Gross']
highest_movies.head(5)

Unnamed: 0,Rank,Title,Lifetime Gross,Year,Total
0,1,Avatar,"$2,847,397,339",2009,"$2,847,397,339"
1,2,Avengers: Endgame,"$2,797,501,328",2019,"$2,797,501,328"
2,3,Titanic,"$2,201,647,264",1997,"$2,201,647,264"
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015,"$2,069,521,700"
4,5,Avengers: Infinity War,"$2,048,359,754",2018,"$2,048,359,754"


In [20]:
# drops new column total
highest_movies = highest_movies.drop(columns = ['Total'])
highest_movies.head(5)

Unnamed: 0,Rank,Title,Lifetime Gross,Year
0,1,Avatar,"$2,847,397,339",2009
1,2,Avengers: Endgame,"$2,797,501,328",2019
2,3,Titanic,"$2,201,647,264",1997
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015
4,5,Avengers: Infinity War,"$2,048,359,754",2018


In [21]:
highest_movies['Total'] = highest_movies.iloc[:,2:3].sum(axis=1)
highest_movies.head(5)

Unnamed: 0,Rank,Title,Lifetime Gross,Year,Total
0,1,Avatar,"$2,847,397,339",2009,"$2,847,397,339"
1,2,Avengers: Endgame,"$2,797,501,328",2019,"$2,797,501,328"
2,3,Titanic,"$2,201,647,264",1997,"$2,201,647,264"
3,4,Star Wars: Episode VII - The Force Awakens,"$2,069,521,700",2015,"$2,069,521,700"
4,5,Avengers: Infinity War,"$2,048,359,754",2018,"$2,048,359,754"


# Saving the Data

In [22]:
highest_movies.to_csv('modified.csv')

# Filtering Data

In [23]:
highest_movies.loc[(highest_movies['Year'] == 2018)] 

Unnamed: 0,Rank,Title,Lifetime Gross,Year,Total
4,5,Avengers: Infinity War,"$2,048,359,754",2018,"$2,048,359,754"
12,13,Black Panther,"$1,347,597,973",2018,"$1,347,597,973"
15,16,Jurassic World: Fallen Kingdom,"$1,310,466,296",2018,"$1,310,466,296"
18,19,Incredibles 2,"$1,243,089,244",2018,"$1,243,089,244"
23,24,Aquaman,"$1,148,528,393",2018,"$1,148,528,393"
62,63,Bohemian Rhapsody,"$910,809,311",2018,"$910,809,311"
79,80,Venom,"$856,085,151",2018,"$856,085,151"
96,97,Mission: Impossible - Fallout,"$791,657,398",2018,"$791,657,398"
101,102,Deadpool 2,"$785,896,609",2018,"$785,896,609"
145,146,Fantastic Beasts: The Crimes of Grindelwald,"$654,855,901",2018,"$654,855,901"
