# Regression Open-Ended Project

-----

# Define Question / Hypothesis

**Use publicly available data to generate Roger Ebert-esque ratings for recent movies.**

In [1]:
import warnings
warnings.filterwarnings('ignore')

# Obtain Data via Scraping and APIs

- time: to add in a sleep delay when scraping
- tqdm: a nifty tool to show progress bar

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time
import tqdm
import pickle
import re
import datetime

### Scraping

- Manual - download website files locally
- Procedural - find AJAX script
- Pythonic - headless browser with Selenium

In [13]:
def scrape_eberts_review(num_pages=100):
    """
    Parses through webpage with list of movies and returns DataFrame.
    :num_pages = Number of pages to go through
    """
    url = "http://www.rogerebert.com/reviews?great_movies=0&no_stars=0&title=Cabin+in+the+Woods&filtersgreat_movies%5D%5B%5D=&filters%5Bno_stars%5D%5B%5D=&filters%5Bno_stars%5D%5B%5D=1&filters%5Btitle%5D=&filters%5Breviewers%5D=&filters%5Bgenres%5D=&page={}&sort%5Border%5D=newest"
    pages = list(range(1, num_pages))
    links = [url.format(i) for i in pages]
    
    review_list = list()
    
    for link in links:
        webpage = requests.get(link).text
        soup = BeautifulSoup(webpage, 'lxml')
        all_movies = soup('figure', {'class':'movie review'})
    
        for movie in all_movies:
            url = movie.a.get('href')
            title = movie.find_all('a')[1].text
            stars = len(movie.find_all('i', {'class':'icon-star-full'})) + 0.5 * len(movie.find_all('i', {'class':'icon-star-half'}))

            try:
                year = movie.find('span', {'class':'release-year'}).text[1:-1]
            except:
                year = ''

            review_list.append([title, stars, year, url])
    
    df = pd.DataFrame(review_list, columns = ['Title', 'EbertStars', 'Year', 'URL'])
    return df

In [20]:
review_df = scrape_eberts_review(num_pages=400)

In [21]:
print(review_df.shape)
print(review_df.dtypes)
print(review_df.head())
print(review_df.tail())

(9212, 4)
Title          object
EbertStars    float64
Year           object
URL            object
dtype: object
                 Title  EbertStars  Year                                URL
0  A Cure for Wellness         2.0  2017  /reviews/a-cure-for-wellness-2017
1      Big Little Lies         3.5  2017      /reviews/big-little-lies-2017
2       The Great Wall         3.0  2017       /reviews/the-great-wall-2017
3       American Fable         3.0  2017       /reviews/american-fable-2017
4             Lovesong         3.0  2017             /reviews/lovesong-2017
                       Title  EbertStars  Year  \
9207    Bang the Drum Slowly         4.0  1973   
9208        Visions of Eight         3.0  1973   
9209      The Mackintosh Man         1.5  1973   
9210  Jesus Christ Superstar         3.0  1973   
9211       American Graffiti         4.0  1973   

                                       URL  
9207    /reviews/bang-the-drum-slowly-1973  
9208        /reviews/visions-of-eight-197

In [24]:
def scrape_webpage(link):
    """
    Parses each individual review page and returns list of key attributes.
    :link = URL for review
    """
    link = "http://www.rogerebert.com" + link
    webpage = requests.get(link).text
    soup = BeautifulSoup(webpage, 'lxml')

    try:
        mpaa = soup.find('p', {'class':'mpaa-rating'}).strong.text[6:]
    except:
        mpaa = ''

    try: 
        runningtime = int(soup.find('p', {'class':'running-time'}).strong.text[:3].strip())
    except:
        runningtime = ''

    try:
        genres = soup.find('p', {'class':'genres'}).strong.text.replace(',', '').split()
    except:
        genres = []

    try:
        reviewbody = ' '.join([paragraph.text for paragraph in soup.find('div', {'itemprop':'reviewBody'}).find_all('p')])
    except:
        reviewbody = ''
    
    return [link, mpaa, runningtime, reviewbody]

In [27]:
scraped_list = list()

for movie in tqdm.tqdm(review_df['URL']):
    scraped_list.append(scrape_webpage(movie))
    time.sleep(0.5)

review_content = pd.DataFrame(scraped_list, columns = ['URL', 'Rating', 'Runtime', 'Review'])


  0%|          | 0/9212 [00:00<?, ?it/s][A
100%|██████████| 9212/9212 [1:54:47<00:00,  1.34it/s]


In [28]:
review_content.head()

Unnamed: 0,URL,Rating,Runtime,Review
0,http://www.rogerebert.com//reviews/a-cure-for-...,R,146,I keep forgetting the title of “A Cure for Wel...
1,http://www.rogerebert.com//reviews/big-little-...,NR,420,"HBO’s excellent “Big Little Lies,” based on th..."
2,http://www.rogerebert.com//reviews/the-great-w...,PG-13,103,"Chinese/American co-produced action-fantasy ""T..."
3,http://www.rogerebert.com//reviews/american-fa...,NR,96,11-year-old Gitty (Peyton Kennedy) listens as ...
4,http://www.rogerebert.com//reviews/lovesong-2017,NR,85,Whether you find “Lovesong” refreshingly coy a...


In [70]:
pickle.dump(review_df, open('data/review_df.pkl', 'wb'))
pickle.dump(review_content, open('data/review_content.pkl', 'wb'))

In [3]:
review_df = pickle.load(open('data/review_df.pkl', 'rb'))
review_content = pickle.load(open('data/review_content.pkl', 'rb'))

In [4]:
review_df.head()

Unnamed: 0,Title,EbertStars,Year,URL
0,A Cure for Wellness,2.0,2017,/reviews/a-cure-for-wellness-2017
1,Big Little Lies,3.5,2017,/reviews/big-little-lies-2017
2,The Great Wall,3.0,2017,/reviews/the-great-wall-2017
3,American Fable,3.0,2017,/reviews/american-fable-2017
4,Lovesong,3.0,2017,/reviews/lovesong-2017


In [5]:
def scrape_imdb_listing(df):
    """
    Searches IMDB, parses results and returns DataFrame.
    :df = DataFrame with movie titles
    """
    movie_list = list()

    for movie in tqdm.tqdm(df['Title']):
        base_url = 'http://www.imdb.com/find?q='
        url = base_url + movie +'&s=all'
        webpage = requests.get(url).text
        soup = BeautifulSoup(webpage, 'lxml')

        try:
            results = soup('table', {'class':'findList'})[0]
        except:
            continue
            
        title = results.find_all('tr')[0]
        link = title.find('a', href=True)['href']

        url = 'http://www.imdb.com' + link
        webpage = requests.get(url).text
        soup = BeautifulSoup(webpage, 'lxml')

        movie_title = soup.find('title')
        
        try: 
            rate = soup.find('span', itemprop='ratingValue').text
        except:
            rate = ''
        
        try: 
            count = soup.find('span', itemprop='ratingCount').text
        except:
            count = ''
        
        try: 
            des = soup.find('meta',{'name':'description'})['content']
        except:
            des = ''
        
        try: 
            metascore = soup.find('div', class_='metacriticScore').text
        except:
            metascore = ''

        try: 
            reviews_count = soup.find('div', class_='titleReviewbarItemBorder')
            u_reviews = reviews_count.find_all('a')[0].text.split(' ')[0]
            c_reviews = reviews_count.find_all('a')[1].text.split(' ')[0]
        except:
            u_reviews = []
            c_review = []

        try: 
            genre_items = soup.find_all('span', itemprop='genre')
            genre_list = [item.text for item in genre_items]
        except:
            genre_list = []

        try: 
            stars_items = soup.find_all('span', itemprop='actors')
            stars_list = [item.text.strip() for item in stars_items]
        except:
            stars_list = []

        try: 
            director = soup.find('span', itemprop='name').text
        except:
            director = ''

        try: 
            country = soup.find('div', class_='subtext').find_all('a', title=True)[-1].text.split(' ')[-1]
            country = re.sub('[\(\)\{\}<>]', '', country)
        except:
            country = ''
            
        try:
            rel_date = (', ').join(soup.find('div', class_='subtext').find_all('a', 
                                            title=True)[-1].text.split(' ')[:-1])
        except:
            rel_date = ''
    
        movie_list.append([movie, rate, count, des, metascore, u_reviews, c_reviews, 
                       genre_list, stars_list, director, country, rel_date])
        
        time.sleep(0.5)

    
    df = pd.DataFrame(movie_list, columns = ['Title', 'IMDB_Rating', 'Rating_Count', 
        'Description', 'Metascore', 'User_Review_Count', 'Critic_Review_Count',
        'Genre_List', 'Stars_List', 'Director', 'Country', 'Release_Date'])
    return df, movie_list

In [124]:
imdb_listings, temp_list = scrape_imdb_listing(review_df)


  0%|          | 0/9212 [00:00<?, ?it/s][A
  0%|          | 1/9212 [00:01<3:26:10,  1.34s/it][A
  0%|          | 2/9212 [00:02<3:30:39,  1.37s/it][A
  0%|          | 3/9212 [00:04<3:38:25,  1.42s/it][A
  0%|          | 4/9212 [00:05<3:45:17,  1.47s/it][A
100%|██████████| 9212/9212 [4:28:51<00:00,  1.68s/it]


In [74]:
pickle.dump(imdb_listings, open('data/imdb_listings.pkl', 'wb'))

In [6]:
imdb_listings = pickle.load(open('data/imdb_listings.pkl', 'rb'))

In [7]:
imdb_listings.head()

Unnamed: 0,Title,IMDB_Rating,Rating_Count,Description,Metascore,User_Review_Count,Critic_Review_Count,Genre_List,Stars_List,Director,Country,Release_Date
0,A Cure for Wellness,6.6,1646.0,"Directed by Gore Verbinski. With Dane DeHaan,...",\n47\n,16,90,"[Mystery, Thriller]","[Dane DeHaan,, Jason Isaacs,, Mia Goth]",Gore Verbinski,USA\n\n,"17, February, 2017"
1,Big Little Lies,,,"With Alexander Skarsgård, James Tupper, Laura ...",,[],90,"[Comedy, Crime, Drama]","[Alexander Skarsgård,, James Tupper,, Laura Dern]",Alexander Skarsgård,\n,"TV, Series, (2017–"
2,The Great Wall,6.3,14568.0,"Directed by Yimou Zhang. With Matt Damon, Tia...",\n42\n,96,152,"[Action, Adventure, Fantasy]","[Matt Damon,, Tian Jing,, Willem Dafoe]",Yimou Zhang,USA\n\n,"17, February, 2017"
3,American Fable,6.3,145.0,Directed by Anne Hamilton. With Peyton Kenned...,\n57\n,1,10,[Thriller],"[Peyton Kennedy,, Richard Schiff,, Kip Pardue]",Anne Hamilton,USA\n\n,"17, February, 2017"
4,Lovesong,6.4,153.0,"Directed by So Yong Kim. With Riley Keough, J...",\n74\n,3,14,[Drama],"[Riley Keough,, Jena Malone,, Jessie Ok Gray]",So Yong Kim,USA\n\n,"17, February, 2017"


In [8]:
imdb_listings.dtypes

Title                  object
IMDB_Rating            object
Rating_Count           object
Description            object
Metascore              object
User_Review_Count      object
Critic_Review_Count    object
Genre_List             object
Stars_List             object
Director               object
Country                object
Release_Date           object
dtype: object

# Process and Clean Data

In [9]:
review_df = pickle.load(open('data/review_df.pkl', 'rb'))

In [10]:
print(review_df.shape)
print(review_df.dtypes)
review_df.head()

(9212, 4)
Title          object
EbertStars    float64
Year           object
URL            object
dtype: object


Unnamed: 0,Title,EbertStars,Year,URL
0,A Cure for Wellness,2.0,2017,/reviews/a-cure-for-wellness-2017
1,Big Little Lies,3.5,2017,/reviews/big-little-lies-2017
2,The Great Wall,3.0,2017,/reviews/the-great-wall-2017
3,American Fable,3.0,2017,/reviews/american-fable-2017
4,Lovesong,3.0,2017,/reviews/lovesong-2017


In [11]:
def convert_year(row):
    try:
        year = int(row['Year'])
        return year
    except:
        return np.nan

In [12]:
review_df['Year'] = review_df.apply(lambda x: convert_year(x), 1)

In [13]:
review_df.head()

Unnamed: 0,Title,EbertStars,Year,URL
0,A Cure for Wellness,2.0,2017.0,/reviews/a-cure-for-wellness-2017
1,Big Little Lies,3.5,2017.0,/reviews/big-little-lies-2017
2,The Great Wall,3.0,2017.0,/reviews/the-great-wall-2017
3,American Fable,3.0,2017.0,/reviews/american-fable-2017
4,Lovesong,3.0,2017.0,/reviews/lovesong-2017


In [14]:
print(review_content.shape)
print(review_content.dtypes)
review_content.head()

(9212, 4)
URL        object
Rating     object
Runtime    object
Review     object
dtype: object


Unnamed: 0,URL,Rating,Runtime,Review
0,http://www.rogerebert.com//reviews/a-cure-for-...,R,146,I keep forgetting the title of “A Cure for Wel...
1,http://www.rogerebert.com//reviews/big-little-...,NR,420,"HBO’s excellent “Big Little Lies,” based on th..."
2,http://www.rogerebert.com//reviews/the-great-w...,PG-13,103,"Chinese/American co-produced action-fantasy ""T..."
3,http://www.rogerebert.com//reviews/american-fa...,NR,96,11-year-old Gitty (Peyton Kennedy) listens as ...
4,http://www.rogerebert.com//reviews/lovesong-2017,NR,85,Whether you find “Lovesong” refreshingly coy a...


In [15]:
def convert_runtime(row):
    try:
        runtime = int(row['Runtime'])
        return runtime
    except:
        return np.nan

In [16]:
review_content['Runtime'] = review_content.apply(lambda x: convert_runtime(x), 1)

In [17]:
review_content['URL'] = review_content['URL'].apply(lambda x: x.replace('http://www.rogerebert.com/', ''))

In [18]:
review_content.head()

Unnamed: 0,URL,Rating,Runtime,Review
0,/reviews/a-cure-for-wellness-2017,R,146.0,I keep forgetting the title of “A Cure for Wel...
1,/reviews/big-little-lies-2017,NR,420.0,"HBO’s excellent “Big Little Lies,” based on th..."
2,/reviews/the-great-wall-2017,PG-13,103.0,"Chinese/American co-produced action-fantasy ""T..."
3,/reviews/american-fable-2017,NR,96.0,11-year-old Gitty (Peyton Kennedy) listens as ...
4,/reviews/lovesong-2017,NR,85.0,Whether you find “Lovesong” refreshingly coy a...


In [19]:
print(imdb_listings.shape)
print(imdb_listings.dtypes)
imdb_listings.head()

(9194, 12)
Title                  object
IMDB_Rating            object
Rating_Count           object
Description            object
Metascore              object
User_Review_Count      object
Critic_Review_Count    object
Genre_List             object
Stars_List             object
Director               object
Country                object
Release_Date           object
dtype: object


Unnamed: 0,Title,IMDB_Rating,Rating_Count,Description,Metascore,User_Review_Count,Critic_Review_Count,Genre_List,Stars_List,Director,Country,Release_Date
0,A Cure for Wellness,6.6,1646.0,"Directed by Gore Verbinski. With Dane DeHaan,...",\n47\n,16,90,"[Mystery, Thriller]","[Dane DeHaan,, Jason Isaacs,, Mia Goth]",Gore Verbinski,USA\n\n,"17, February, 2017"
1,Big Little Lies,,,"With Alexander Skarsgård, James Tupper, Laura ...",,[],90,"[Comedy, Crime, Drama]","[Alexander Skarsgård,, James Tupper,, Laura Dern]",Alexander Skarsgård,\n,"TV, Series, (2017–"
2,The Great Wall,6.3,14568.0,"Directed by Yimou Zhang. With Matt Damon, Tia...",\n42\n,96,152,"[Action, Adventure, Fantasy]","[Matt Damon,, Tian Jing,, Willem Dafoe]",Yimou Zhang,USA\n\n,"17, February, 2017"
3,American Fable,6.3,145.0,Directed by Anne Hamilton. With Peyton Kenned...,\n57\n,1,10,[Thriller],"[Peyton Kennedy,, Richard Schiff,, Kip Pardue]",Anne Hamilton,USA\n\n,"17, February, 2017"
4,Lovesong,6.4,153.0,"Directed by So Yong Kim. With Riley Keough, J...",\n74\n,3,14,[Drama],"[Riley Keough,, Jena Malone,, Jessie Ok Gray]",So Yong Kim,USA\n\n,"17, February, 2017"


In [20]:
imdb_listings = pickle.load(open('data/imdb_listings.pkl', 'rb'))

In [21]:
imdb_listings.head()

Unnamed: 0,Title,IMDB_Rating,Rating_Count,Description,Metascore,User_Review_Count,Critic_Review_Count,Genre_List,Stars_List,Director,Country,Release_Date
0,A Cure for Wellness,6.6,1646.0,"Directed by Gore Verbinski. With Dane DeHaan,...",\n47\n,16,90,"[Mystery, Thriller]","[Dane DeHaan,, Jason Isaacs,, Mia Goth]",Gore Verbinski,USA\n\n,"17, February, 2017"
1,Big Little Lies,,,"With Alexander Skarsgård, James Tupper, Laura ...",,[],90,"[Comedy, Crime, Drama]","[Alexander Skarsgård,, James Tupper,, Laura Dern]",Alexander Skarsgård,\n,"TV, Series, (2017–"
2,The Great Wall,6.3,14568.0,"Directed by Yimou Zhang. With Matt Damon, Tia...",\n42\n,96,152,"[Action, Adventure, Fantasy]","[Matt Damon,, Tian Jing,, Willem Dafoe]",Yimou Zhang,USA\n\n,"17, February, 2017"
3,American Fable,6.3,145.0,Directed by Anne Hamilton. With Peyton Kenned...,\n57\n,1,10,[Thriller],"[Peyton Kennedy,, Richard Schiff,, Kip Pardue]",Anne Hamilton,USA\n\n,"17, February, 2017"
4,Lovesong,6.4,153.0,"Directed by So Yong Kim. With Riley Keough, J...",\n74\n,3,14,[Drama],"[Riley Keough,, Jena Malone,, Jessie Ok Gray]",So Yong Kim,USA\n\n,"17, February, 2017"


In [22]:
def convert_imdb_rating(row):
    try:
        rating = float(row['IMDB_Rating'])
        return rating
    except:
        return np.nan

def convert_rating_count(row):
    try:
        count = float(row['Rating_Count'].replace(',', ''))
        return count
    except:
        return np.nan
    
def user_review_count(row):
    try:
        count = float(row['User_Review_Count'].replace(',', ''))
        return count
    except:
        return np.nan

def critic_review_count(row):
    try:
        count = float(row['Critic_Review_Count'].replace(',', ''))
        return count
    except:
        return np.nan

def convert_metascore(row):
    try:
        score = float(row['Metascore'].strip())
        return score
    except:
        return np.nan
    
def convert_country(row):
    try:
        country = row['Country'].strip()
        return country
    except:
        return np.nan
    
def convert_release_date(row):
    try:
        rel_date = row['Release_Date'].strip()

        if 'TV' in rel_date:
            return np.nan
        else:
            try:
                rel_date = datetime.datetime.strptime(rel_date, "%d, %B, %Y")
                return rel_date
            except:
                return np.nan
            
    except:
        return np.nan
    
def convert_genre(row):
    try:
        genres = ', '.join(row['Genre_List'])
        return genres
    except:
        return np.nan
    
def convert_actors(row):
    try:
        actors = ', '.join(row['Stars_List'])
        return actors
    except:
        return np.nan

In [23]:
imdb_listings['IMDB_Rating'] = imdb_listings.apply(lambda x: convert_imdb_rating(x), 1)
imdb_listings['Rating_Count'] = imdb_listings.apply(lambda x: convert_rating_count(x), 1)
imdb_listings['User_Review_Count'] = imdb_listings.apply(lambda x: user_review_count(x), 1)
imdb_listings['Critic_Review_Count'] = imdb_listings.apply(lambda x: critic_review_count(x), 1)
imdb_listings['Metascore'] = imdb_listings.apply(lambda x: convert_metascore(x), 1)
imdb_listings['Country'] = imdb_listings.apply(lambda x: convert_country(x), 1)
imdb_listings['Release_Date'] = imdb_listings.apply(lambda x: convert_release_date(x), 1)
imdb_listings['Genre_List'] = imdb_listings.apply(lambda x: convert_genre(x), 1)
imdb_listings['Stars_List'] = imdb_listings.apply(lambda x: convert_actors(x), 1)

In [24]:
imdb_listings.head()

Unnamed: 0,Title,IMDB_Rating,Rating_Count,Description,Metascore,User_Review_Count,Critic_Review_Count,Genre_List,Stars_List,Director,Country,Release_Date
0,A Cure for Wellness,6.6,1646.0,"Directed by Gore Verbinski. With Dane DeHaan,...",47.0,16.0,90.0,"Mystery, Thriller","Dane DeHaan,, Jason Isaacs,, Mia Goth",Gore Verbinski,USA,2017-02-17
1,Big Little Lies,,,"With Alexander Skarsgård, James Tupper, Laura ...",,,90.0,"Comedy, Crime, Drama","Alexander Skarsgård,, James Tupper,, Laura Dern",Alexander Skarsgård,,NaT
2,The Great Wall,6.3,14568.0,"Directed by Yimou Zhang. With Matt Damon, Tia...",42.0,96.0,152.0,"Action, Adventure, Fantasy","Matt Damon,, Tian Jing,, Willem Dafoe",Yimou Zhang,USA,2017-02-17
3,American Fable,6.3,145.0,Directed by Anne Hamilton. With Peyton Kenned...,57.0,1.0,10.0,Thriller,"Peyton Kennedy,, Richard Schiff,, Kip Pardue",Anne Hamilton,USA,2017-02-17
4,Lovesong,6.4,153.0,"Directed by So Yong Kim. With Riley Keough, J...",74.0,3.0,14.0,Drama,"Riley Keough,, Jena Malone,, Jessie Ok Gray",So Yong Kim,USA,2017-02-17


In [25]:
combined_df = pd.merge(review_df, review_content, how='left', on='URL')

In [26]:
ebert_imdb_df = pd.merge(imdb_listings, combined_df, how='left', on='Title')
ebert_imdb_df.head()

Unnamed: 0,Title,IMDB_Rating,Rating_Count,Description,Metascore,User_Review_Count,Critic_Review_Count,Genre_List,Stars_List,Director,Country,Release_Date,EbertStars,Year,URL,Rating,Runtime,Review
0,A Cure for Wellness,6.6,1646.0,"Directed by Gore Verbinski. With Dane DeHaan,...",47.0,16.0,90.0,"Mystery, Thriller","Dane DeHaan,, Jason Isaacs,, Mia Goth",Gore Verbinski,USA,2017-02-17,2.0,2017.0,/reviews/a-cure-for-wellness-2017,R,146.0,I keep forgetting the title of “A Cure for Wel...
1,Big Little Lies,,,"With Alexander Skarsgård, James Tupper, Laura ...",,,90.0,"Comedy, Crime, Drama","Alexander Skarsgård,, James Tupper,, Laura Dern",Alexander Skarsgård,,NaT,3.5,2017.0,/reviews/big-little-lies-2017,NR,420.0,"HBO’s excellent “Big Little Lies,” based on th..."
2,The Great Wall,6.3,14568.0,"Directed by Yimou Zhang. With Matt Damon, Tia...",42.0,96.0,152.0,"Action, Adventure, Fantasy","Matt Damon,, Tian Jing,, Willem Dafoe",Yimou Zhang,USA,2017-02-17,3.0,2017.0,/reviews/the-great-wall-2017,PG-13,103.0,"Chinese/American co-produced action-fantasy ""T..."
3,American Fable,6.3,145.0,Directed by Anne Hamilton. With Peyton Kenned...,57.0,1.0,10.0,Thriller,"Peyton Kennedy,, Richard Schiff,, Kip Pardue",Anne Hamilton,USA,2017-02-17,3.0,2017.0,/reviews/american-fable-2017,NR,96.0,11-year-old Gitty (Peyton Kennedy) listens as ...
4,Lovesong,6.4,153.0,"Directed by So Yong Kim. With Riley Keough, J...",74.0,3.0,14.0,Drama,"Riley Keough,, Jena Malone,, Jessie Ok Gray",So Yong Kim,USA,2017-02-17,3.0,2017.0,/reviews/lovesong-2017,NR,85.0,Whether you find “Lovesong” refreshingly coy a...


In [27]:
pickle.dump(ebert_imdb_df, open('data/ebert_imdb_df_v1.pkl', 'wb'))

# Plan for Following Notebooks

- Exploratory Data Analysis
- Feature Engineering
- Making predictions
- Final analysis