In [28]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [29]:
def get_content_value(row_data):
    if row_data.find('li'):
        return [li.get_text(' ', strip=True).replace('\xa0','') for li in row_data.find_all('li')]
    else:
        return row_data.get_text(' ', strip=True).replace('\xa0','')

def get_info_box(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    info = soup.find('table', class_='infobox vevent')
    info_rows = info.find_all('tr')
    
    
    movie_info = {}
    for index, row in enumerate(info_rows):
        if index==0:
            movie_info['title'] = row.find('th').get_text()
        elif index==1:
            continue
        else:
            content_key = row.find('th').get_text(' ',strip=True)
            content_value = get_content_value(row.find('td'))
            movie_info[content_key] = content_value
    
    return movie_info
    
    

In [30]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_Marvel_Cinematic_Universe_films')
soup = BeautifulSoup(page.content, 'html.parser')
movies = soup.select('.wikitable.plainrowheaders i')

base_path= 'https://www.wikipedia.org/'

movie_info_list = []

for index, movie in enumerate(movies[:30]):
    try:
        relative_path = movie.a['href']
        full_path = base_path + relative_path
        title = movie.a['title']
        
        movie_info_list.append(get_info_box(full_path))
    except:
        pass

In [31]:
movie_info_list

[{'title': 'Iron Man',
  'Directed by': 'Jon Favreau',
  'Screenplay by': ['Mark Fergus Hawk Ostby', 'Art Marcum Matt Holloway'],
  'Based on': ['Stan Lee', 'Larry Lieber', 'Don Heck', 'Jack Kirby'],
  'Produced by': ['Avi Arad', 'Kevin Feige'],
  'Starring': ['Robert Downey Jr.',
   'Terrence Howard',
   'Jeff Bridges',
   'Shaun Toub',
   'Gwyneth Paltrow'],
  'Cinematography': 'Matthew Libatique',
  'Edited by': 'Dan Lebental',
  'Music by': 'Ramin Djawadi',
  'Production company': 'Marvel Studios',
  'Distributed by': 'Paramount Pictures [N 1]',
  'Release date': ['April14,2008 ( 2008-04-14 ) (Sydney)',
   'May2,2008 ( 2008-05-02 ) (United States)'],
  'Running time': '126 minutes [4]',
  'Country': 'United States',
  'Language': 'English',
  'Budget': '$140million [5]',
  'Box office': '$585.8million [5]'},
 {'title': 'The Incredible Hulk',
  'Directed by': 'Louis Leterrier',
  'Written by': 'Zak Penn',
  'Based on': ['Stan Lee', 'Jack Kirby'],
  'Produced by': ['Avi Arad', 'Gale 

In [32]:
import json

def save_data(title,data):
    with open(title, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
        
        
def load_data(title):
    with open(title, encoding='utf-8') as f:
        return json.load(f)

In [33]:
save_data('marvel_movies.json', movie_info_list)

In [34]:
pd.read_json('marvel_movies.json')

Unnamed: 0,title,Directed by,Screenplay by,Based on,Produced by,Starring,Cinematography,Edited by,Music by,Production company,Distributed by,Release date,Running time,Country,Language,Budget,Box office,Written by,Production companies,Story by
0,Iron Man,Jon Favreau,"[Mark Fergus Hawk Ostby, Art Marcum Matt Hollo...","[Stan Lee, Larry Lieber, Don Heck, Jack Kirby]","[Avi Arad, Kevin Feige]","[Robert Downey Jr., Terrence Howard, Jeff Brid...",Matthew Libatique,Dan Lebental,Ramin Djawadi,Marvel Studios,Paramount Pictures [N 1],"[April14,2008 ( 2008-04-14 ) (Sydney), May2,20...",126 minutes [4],United States,English,$140million [5],$585.8million [5],,,
1,The Incredible Hulk,Louis Leterrier,,"[Stan Lee, Jack Kirby]","[Avi Arad, Gale Anne Hurd, Kevin Feige]","[Edward Norton, Liv Tyler, Tim Roth, Tim Blake...",Peter Menzies Jr.,"[John Wright, Rick Shaine, Vincent Tabaillon]",Craig Armstrong,,Universal Pictures,"[June8,2008 ( 2008-06-08 ) ( Gibson Amphitheat...",112 minutes [1],United States,English,$137.5–150 million [2] [3],$264.8 million [4],Zak Penn,"[Marvel Studios, Valhalla Motion Pictures]",
2,Thor,Kenneth Branagh,"[Ashley Edward Miller, Zack Stentz, Don Payne]","[Stan Lee, Larry Lieber, Jack Kirby]",Kevin Feige,"[Chris Hemsworth, Natalie Portman, Tom Hiddles...",Haris Zambarloukos [1],Paul Rubell [1],Patrick Doyle,Marvel Studios,Paramount Pictures [N 1],"[April17,2011 ( 2011-04-17 ) ( Sydney ), May6,...",114 minutes [5],United States,English,$150 million [6],$449.3million [7],,,"[J. Michael Straczynski, Mark Protosevich]"
3,Captain America: The First Avenger,Joe Johnston,Christopher Markus Stephen McFeely,"[Joe Simon, Jack Kirby]",Kevin Feige,"[Chris Evans, Tommy Lee Jones, Hugo Weaving, H...",Shelly Johnson,"[Jeffrey Ford, Robert Dalva]",Alan Silvestri,Marvel Studios,Paramount Pictures [N 1],"[July19,2011 ( 2011-07-19 ) ( El Capitan Theat...",124 minutes [4],United States,English,$140–216.7 million [5] [6],$370.6 million [7],,,
4,The Avengers,Joss Whedon,Joss Whedon,"[Stan Lee, Jack Kirby]",Kevin Feige,"[Robert Downey Jr., Chris Evans, Mark Ruffalo,...",Seamus McGarvey,"[Jeffrey Ford, Lisa Lassek]",Alan Silvestri,Marvel Studios,Walt Disney Studios Motion Pictures [N 1],"[April11,2012 ( 2012-04-11 ) ( El Capitan Thea...",143 minutes [3],United States,English,$220million [4],$1.519 billion [5],,,"[Zak Penn, Joss Whedon]"
5,Thor: The Dark World,Alan Taylor,"[Christopher Yost, Christopher Markus Stephen ...","[Stan Lee, Larry Lieber, Jack Kirby]",Kevin Feige,"[Chris Hemsworth, Natalie Portman, Tom Hiddles...",Kramer Morgenthau,"[Dan Lebental, Wyatt Smith]",Brian Tyler,Marvel Studios,Walt Disney Studios Motion Pictures,"[October22,2013 ( 2013-10-22 ) ( Leicester Squ...",112 minutes [1],United States,English,$150–170 million [2] [3],$644.8million [4],,,"[Don Payne, Robert Rodat]"
6,Captain America: The Winter Soldier,"[Anthony Russo, Joe Russo]","[Christopher Markus, Stephen McFeely]","[Joe Simon, Jack Kirby]",Kevin Feige,"[Chris Evans, Scarlett Johansson, Sebastian St...",Trent Opaloch,"[Jeffrey Ford, Matthew Schmidt]",Henry Jackman,Marvel Studios,Walt Disney Studios Motion Pictures,"[March13,2014 ( 2014-03-13 ) ( El Capitan Thea...",136 minutes [1],United States,English,$170–177 million [2] [3],$714.4 million [2],,,
7,Guardians of the Galaxy,James Gunn,,"[Dan Abnett, Andy Lanning]",Kevin Feige,"[Chris Pratt, Zoe Saldana, Dave Bautista, Vin ...",Ben Davis,"[Fred Raskin, Craig Wood, Hughes Winborne]",Tyler Bates,Marvel Studios,Walt Disney Studios Motion Pictures,"[July21,2014 ( 2014-07-21 ) ( Dolby Theatre ),...",122 minutes [1],United States,English,"[$232.3 million (gross) [2], $195.9 million (n...",$772.8 million [3],"[James Gunn, Nicole Perlman]",,
8,Avengers: Age of Ultron,Joss Whedon,,"[Stan Lee, Jack Kirby]",Kevin Feige,"[Robert Downey Jr., Chris Hemsworth, Mark Ruff...",Ben Davis,"[Jeffrey Ford, Lisa Lassek]","[Brian Tyler, Danny Elfman]",Marvel Studios,Walt Disney Studios Motion Pictures,"[April13,2015 ( 2015-04-13 ) ( Dolby Theatre )...",141 minutes [1],United States,English,"[$444–495.2 million (gross) [2] [3], $365 mill...",$1.403 billion [4],Joss Whedon,,
9,Ant-Man,Peyton Reed,"[Edgar Wright, Joe Cornish, Adam McKay, Paul R...","[Stan Lee, Larry Lieber, Jack Kirby]",Kevin Feige,"[Paul Rudd, Evangeline Lilly, Corey Stoll, Bob...",Russell Carpenter,"[Dan Lebental, Colby Parker, Jr.]",Christophe Beck,Marvel Studios,Walt Disney Studios Motion Pictures,"[June29,2015 ( 2015-06-29 ) ( Dolby Theatre ),...",117 minutes [1],United States,English,$130–169.3 million [2] [3] [4],$519.3 million [2],,,"[Edgar Wright, Joe Cornish]"


In [35]:
load_data('marvel_movies.json')

[{'title': 'Iron Man',
  'Directed by': 'Jon Favreau',
  'Screenplay by': ['Mark Fergus Hawk Ostby', 'Art Marcum Matt Holloway'],
  'Based on': ['Stan Lee', 'Larry Lieber', 'Don Heck', 'Jack Kirby'],
  'Produced by': ['Avi Arad', 'Kevin Feige'],
  'Starring': ['Robert Downey Jr.',
   'Terrence Howard',
   'Jeff Bridges',
   'Shaun Toub',
   'Gwyneth Paltrow'],
  'Cinematography': 'Matthew Libatique',
  'Edited by': 'Dan Lebental',
  'Music by': 'Ramin Djawadi',
  'Production company': 'Marvel Studios',
  'Distributed by': 'Paramount Pictures [N 1]',
  'Release date': ['April14,2008 ( 2008-04-14 ) (Sydney)',
   'May2,2008 ( 2008-05-02 ) (United States)'],
  'Running time': '126 minutes [4]',
  'Country': 'United States',
  'Language': 'English',
  'Budget': '$140million [5]',
  'Box office': '$585.8million [5]'},
 {'title': 'The Incredible Hulk',
  'Directed by': 'Louis Leterrier',
  'Written by': 'Zak Penn',
  'Based on': ['Stan Lee', 'Jack Kirby'],
  'Produced by': ['Avi Arad', 'Gale 