# Set up

In [95]:
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd

# configurations
pd.set_option('display.max_colwidth', -1)

In [2]:
# initial set up
base_url = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/2/"
response = requests.get(base_url)
html = response.content
soup = BeautifulSoup(html, 'lxml')

In [3]:
# export html source code to a file
with open("140 Action Movies_Rotten Tomatoes.html","wb") as file:
    file.write(soup.prettify("utf-8"))

# Extract

In [5]:
# element with content
all_div = soup.find_all("div",{"class":"col-sm-18 col-full-xs countdown-item-content"})

In [7]:
headings = [div.find("h2") for div in all_div]

In [11]:
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>

In [28]:
# Title
movie_titles = [heading.find("a").text for heading in headings]
print(len(movie_titles))
print(movie_titles[0])

140
Running Scared


In [38]:
# Years
years = [heading.find("span", {"class":"subtle start-year"}).text for heading in headings]
print(len(years))
print(years[0])                                 # we need to remove the "(" & ")" symbols
print(years[0].strip("()"))                     # clean text
years = [year.strip("()") for year in years]    # clean all year symbols
years = [int(year) for year in years]           # convert data type into int
print(years[0])

140
(1986)
1986
1986


In [44]:
# Score
scores = [heading.find("span",{"class":"tMeterScore"}).text for heading in headings]
print(scores[0])
print(len(scores))

57%
140


In [90]:
# Adjusted Score
adj_scores = [div.find("div", {"class":"info countdown-adjusted-score"}).text for div in all_div]
adj_scores = [s.replace("Adjusted Score: ","").strip(" ") for s in adj_scores]      # clean text
print(adj_scores[0])
print(len(adj_scores))

58.22%
140


In [69]:
# Critics Consensus
critics_consensus = [div.find("div",{"class":"info critics-consensus"}).text for div in all_div]
critics_consensus = [con.replace("Critics Consensus: ","") for con in critics_consensus]   # clean text
print(critics_consensus[0])
print(len(critics_consensus))

Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.
140


In [102]:
# Synopsis
synopsis = [div.find("div", {"class":"info synopsis"}).text for div in all_div]
synopsis = [syn.replace("Synopsis: ","").replace(" [More]","") for syn in synopsis]   # clean text
print(len(synopsis))
print(synopsis[0])

140
Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this...


In [80]:
# Cast
cast = [div.find("div", {"class":"info cast"}).text for div in all_div]
cast = [c.replace("\nStarring: ","") for c in cast]     # clean text
print(len(cast))
print(cast[0])

140
Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer


In [76]:
# Directors
directors = [div.find("div", {"class":"info director"}).text for div in all_div]
directors = [director.replace("\nDirected By: ","") for director in directors]   # clean text
print(len(directors))
print(directors[0])

140
Peter Hyams


In [92]:
# Rank
ranks = [div.find("div", {"class":"countdown-index"}).text for div in all_div]
ranks = [rank.replace("#","") for rank in ranks]    # clean text
print(len(ranks))
print(ranks[4])

140
136


# Store data in structured form

In [103]:
# create dataframe
rotten_tomatoes = pd.DataFrame()

# store data in dataframe
rotten_tomatoes["Movie_Title"] = movie_titles
rotten_tomatoes["Rank"] = ranks
rotten_tomatoes["Year"] = years
rotten_tomatoes["Score"] = scores
rotten_tomatoes["Adjusted_Score"] = adj_scores
rotten_tomatoes["Director"] = directors
rotten_tomatoes["Cast"] = cast
rotten_tomatoes["Critics_Consensus"] = critics_consensus
rotten_tomatoes["Synopsis"] = synopsis

In [104]:
rotten_tomatoes

Unnamed: 0,Movie_Title,Rank,Year,Score,Adjusted_Score,Director,Cast,Critics_Consensus,Synopsis
0,Running Scared,140,1986,57%,58.22%,Peter Hyams,"Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer","Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.","Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this..."
1,Equilibrium,139,2002,41%,43.025%,Kurt Wimmer,"Christian Bale, Emily Watson, Taye Diggs, Angus Macfadyen",Equilibrium is a reheated mishmash of other sci-fi movies.,"In the nation of Libria, there is always peace among men. The rules of the Librian system are simple. If..."
2,Hero,138,2004,95%,100.865%,Zhang Yimou,"Jet Li, Tony Leung Chiu Wai, Maggie Cheung, Daoming Chen","With death-defying action sequences and epic historic sweep, Hero offers everything a martial arts fan could ask for.",Hero is two-time Academy Award nominee Zhang Yimou's directorial attempt at exploring the concept of a Chinese hero. During the...
3,Road House,137,1989,39%,41.989%,Rowdy Herrington,"Patrick Swayze, Kelly Lynch, Sam Elliott, Ben Gazzara",Whether Road House is simply bad or so bad it's good depends largely on the audience's fondness for Swayze -- and tolerance for violently cheesy action.,Dalton (Swayze) is a true gentleman with a degree in philosophy from NYU. He also has a flip side -...
4,Unstoppable,136,2010,87%,92.685%,Tony Scott,"Denzel Washington, Chris Pine, Rosario Dawson, Ethan Suplee","As fast, loud, and relentless as the train at the center of the story, Unstoppable is perfect popcorn entertainment -- and director Tony Scott's best movie in years.","In this action thriller from director Tony Scott, rookie train operator Will (Chris Pine) and grizzled veteran engineer Frank (Denzel..."
...,...,...,...,...,...,...,...,...,...
135,Lat sau san taam (Hard-Boiled),5,1992,94%,96.11%,John Woo,"Yun-Fat Chow, Tony Leung Chiu Wai, Anthony Wong, Teresa Mo","Boasting impactful action as well as surprising emotional resonance, Hard Boiled is a powerful thriller that hits hard in more ways than one.","Yun-Fat portrays a maverick, clarinet-playing cop nicknamed ""Tequila"" whose partner is killed in the dizzying chaos of a restaurant gunfight..."
136,The Matrix,4,1999,88%,94.881%,"Lilly Wachowski, Lana Wachowski","Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, Joe Pantoliano","Thanks to the Wachowskis' imaginative vision, The Matrix is a smartly crafted combination of spectacular action and groundbreaking special effects.","What if virtual reality wasn't just for fun, but was being used to imprison you? That's the dilemma that faces..."
137,Terminator 2: Judgment Day,3,1991,93%,99.169%,James Cameron,"Arnold Schwarzenegger, Linda Hamilton, Edward Furlong, Robert Patrick","T2 features thrilling action sequences and eye-popping visual effects, but what takes this sci-fi/ action landmark to the next level is the depth of the human (and cyborg) characters.","A sequel to the sci-fi action thriller that made him and star Arnold Schwarzenegger A-list Hollywood names, writer/director James Cameron..."
138,Die Hard,2,1988,94%,99.708%,John McTiernan,"Bruce Willis, Alan Rickman, Reginald VelJohnson, Bonnie Bedelia",Its many imitators (and sequels) have never come close to matching the taut thrills of the definitive holiday action classic.,"It's Christmas time in L.A., and there's an employee party in progress on the 30th floor of the Nakatomi Corporation..."


In [105]:
# export to csv
rotten_tomatoes.to_csv("Rotten_Tomatoes.csv", index=False, header=True)

___