# Block 1

In [1]:
# imported the libraries
import pandas as pd
import numpy as np

# Block 2

In [2]:
# web scraping the MonsterVerse data
monsterVerse_data = pd.read_html('https://en.wikipedia.org/wiki/MonsterVerse')
uncleaned_monsterVerse_df = monsterVerse_data[3]

# Block 3

In [6]:
# function for cleaning up the web scraped data
def data_cleaning(uncleaned_df):
  """
  purpose of function: making the web scraped data more presentable

  input:
  uncleaned_df = a dataframe
  """
  # replaced NaN (float) with 'NaN' (string)
  uncleaned_df.fillna('NaN',inplace=True)

  # got the first column name in the dataframe
  first_col = uncleaned_df.columns[0]

  # set the first column to the index of the dataframe
  cleaned_df = uncleaned_df.set_index(first_col)

  # renamed the first column
  cleaned_df = cleaned_df.rename_axis(first_col[0])

  # renamed the columns to the movie title
  cleaned_df.columns = [i[1] for i in cleaned_df.columns]

  # removed rows w/ duplicates
  for i in cleaned_df.index:
    if len(set(cleaned_df.loc[i])) == 1: # if True, the values in the row are the same (aka the row has all duplicates)
      cleaned_df = cleaned_df.drop(cleaned_df.loc[i]) # any row w/ all duplicates is dropped from the dataframe

  return cleaned_df

# Block 4

In [7]:
# running the function w/ the MonsterVerse data
cleaned_monsterVerse_df = data_cleaning(uncleaned_monsterVerse_df)
print(cleaned_monsterVerse_df)

                                          Godzilla  ...            Godzilla vs. Kong
Character                                           ...                             
Godzilla                               T.J. StormS  ...                          CGI
MUTO                          Matt CrossSLee RossS  ...              Archive footage
King Kong                                      NaN  ...              Eric PeteyS[57]
King Ghidorah                                  NaN  ...              Archive footage
Rodan                                          NaN  ...              Archive footage
Ishiro Serizawa                       Ken Watanabe  ...                          NaN
Vivienne Graham                      Sally Hawkins  ...                          NaN
William Stentz                    David Strathairn  ...                          NaN
Ford Brody           Aaron Taylor-JohnsonCJ AdamsY  ...                          NaN
Elle Brody                         Elizabeth Olsen  ...          

# Block 5

In [8]:
# saved the cleaned MonsterVerse dataframe as a csv file w/ the delimiter being tabs
cleaned_monsterVerse_df.to_csv('cleaned_monsterVerse_data.csv',sep='\t')

# Block 6

In [9]:
# running the function w/ the Fast and Furious link
FAF = pd.read_html('https://en.wikipedia.org/wiki/List_of_Fast_%26_Furious_characters#Characters_table')
uncleaned_FAF_df = FAF[2]
cleaned_FAF_df = data_cleaning(uncleaned_FAF_df)
print(cleaned_FAF_df)

# saved the cleaned Fast and Furious dataframe as a csv file w/ the delimiter being tabs
cleaned_FAF_df.to_csv('cleaned_FAF_data.csv',sep='\t')

                       Portrayed by  ...           F9
Character                            ...             
Dominic Toretto          Vin Diesel  ...         Main
Brian O'Conner        Paul Walker †  ...  Archive [1]
Letty Ortiz      Michelle Rodriguez  ...         Main
Mia Toretto        Jordana Brewster  ...         Main
Roman Pearce          Tyrese Gibson  ...         Main
...                             ...  ...          ...
Denlinger         Samuel M. Stewart  ...          NaN
Mose Jakande         Djimon Hounsou  ...          NaN
Kiet                       Tony Jaa  ...          NaN
Kara                   Ronda Rousey  ...          NaN
Connor Rhodes       Kristofer Hivju  ...          NaN

[100 rows x 13 columns]
