In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd


### Info box of COCO

In [2]:
r= requests.get("https://en.wikipedia.org/wiki/Coco_(2017_film)")
#convert into soup object
soup= bs(r.content)
contents= soup.prettify()

In [3]:
info_box= soup.find(class_= "infobox vevent")
info_rows=info_box.find_all('tr')


def get_content_value(row_data):
    if row_data.find('li'):
        return [li.get_text(" ", strip= True).replace("\xa0", " ") for li in row_data.find_all("li")]
    else:
        return row_data.get_text(" ", strip= True).replace("\xa0", " ")

movie_info= {}
for index, row in enumerate(info_rows):
    if index==0:
        movie_info['title']= row.find('th').get_text()
    elif index==1:
        continue
    else:
        content_key= row.find('th').get_text(" ", strip= True)
        content_value=get_content_value(row.find('td'))
        
        movie_info[content_key]= content_value

        
    

In [4]:
movie_info

{'title': 'Coco',
 'Directed by': 'Lee Unkrich',
 'Produced by': 'Darla K. Anderson',
 'Screenplay by': ['Adrian Molina', 'Matthew Aldrich'],
 'Story by': ['Lee Unkrich', 'Jason Katz', 'Matthew Aldrich', 'Adrian Molina'],
 'Starring': ['Anthony Gonzalez',
  'Gael García Bernal',
  'Benjamin Bratt',
  'Alanna Ubach',
  'Renée Victor',
  'Ana Ofelia Murguía',
  'Edward James Olmos'],
 'Music by': 'Michael Giacchino [1]',
 'Cinematography': ['Matt Aspbury (camera) [2]',
  'Danielle Feinberg (lighting) [2]'],
 'Edited by': 'Steve Bloom [2]',
 'Production companies': ['Walt Disney Pictures [3]',
  'Pixar Animation Studios [3]'],
 'Distributed by': 'Walt Disney Studios Motion Pictures',
 'Release date': ['October 20, 2017 ( 2017-10-20 ) ( Morelia ) [4]',
  'November 22, 2017 ( 2017-11-22 ) (United States)'],
 'Running time': '105 minutes [5]',
 'Country': 'United States',
 'Language': 'English [5]',
 'Budget': '$175–225 million [6] [7]',
 'Box office': '$807.8 million [8]'}

### Infobox of All  Disney movies

In [5]:
#load the page
r= requests.get("https://en.wikipedia.org/wiki/List_of_Walt_Disney_Pictures_films")
#convert into soup object
soup= bs(r.content)


In [6]:
d_movies= soup.select(".wikitable.sortable i")

In [7]:
def get_content_value(row_data):
    if row_data.find('li'):
        return [li.get_text(" ", strip= True).replace("\xa0", " ") for li in row_data.find_all("li")]
    #STRIP long strings
    elif row_data.find('br'):
        return [text for text in row_data.stripped_strings]
        
    else:
        return row_data.get_text(" ", strip= True).replace("\xa0", " ")

# Remove References    
def clean_tags(soup):
    for tag in soup.find_all(['sup','span']):
        tag.decompose()

def get_info_box(url):
    r= requests.get(url)
    soup= bs(r.content)
    info_box= soup.find(class_= "infobox vevent")
    info_rows=info_box.find_all('tr')

    clean_tags(soup)
    
    movie_info= {}
    for index, row in enumerate(info_rows):
        if index==0:
            movie_info['title']= row.find('th').get_text()
       
        else:
            header= row.find('th')
            if header:
                
                content_key= row.find('th').get_text(" ", strip= True)
                content_value=get_content_value(row.find('td'))
                movie_info[content_key]= content_value
    return  movie_info
    

In [8]:
get_info_box('https://en.wikipedia.org/wiki/One_Little_Indian_(film)')

{'title': 'One Little Indian',
 'Directed by': 'Bernard McEveety',
 'Produced by': 'Winston Hibler',
 'Written by': 'Harry Spalding',
 'Starring': ['James Garner',
  'Vera Miles',
  'Pat Hingle',
  'Morgan Woodward',
  'Jodie Foster'],
 'Music by': 'Jerry Goldsmith',
 'Cinematography': 'Charles F. Wheeler',
 'Edited by': 'Robert Stafford',
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'Buena Vista Distribution',
 'Release date': ['June 20, 1973'],
 'Running time': '90 Minutes',
 'Country': 'United States',
 'Language': 'English',
 'Box office': '$2 million'}

In [9]:
rt= requests.get("https://en.wikipedia.org/wiki/List_of_Walt_Disney_Pictures_films")
soup= bs(rt.content)
d_movies= soup.select(".wikitable.sortable i a")

base= "https://en.wikipedia.org/"

movie_info_list= []
for index, movie in enumerate(d_movies):
   
    try:
        path=movie['href']
        title= movie['title']
        actual_path= base+path
        movie_info_list.append(get_info_box(actual_path))
    except Exception as e:
        print(movie.get_text())
        #print(e)

Zorro the Avenger
The Sign of Zorro
True-Life Adventures


In [10]:
len(movie_info_list)

447

### Save and Load data

In [11]:
import json

def save_data(title, data):
    with open(title, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii= False, indent=2 )

In [12]:
import json

def load_data(title):
    with open(title, encoding='utf-8') as f:
        return json.load(f)
    

In [13]:
save_data('Disney_data_cleaned.json', movie_info_list)

#### Data Cleaning

In [14]:
dsny_movie= load_data('Disney_data_cleaned.json')

##### Clean up
- Convert dates into python date object
- ~~Running time strip off the text, just keep the integer~~
- Convert monetary values into numerical
- ~~remove references~~
- standardize the data
- ~~figure out to clean long strings under certain fields~~



In [15]:
# Fix running time to integer
for movie in dsny_movie:
    print(movie.get('Running time','NA'))

41 minutes (74 minutes 1966 release)
83 minutes
88 minutes
126 minutes
74 minutes
64 minutes
70 minutes
42 minutes
65 min.
71 minutes
75 minutes
94 minutes
73 minutes
75 minutes
82 minutes
68 minutes
74 minutes
96 minutes
75 minutes
84 minutes
77 minutes
92 minutes
69 minutes
81 minutes
['60 minutes (VHS version)', '71 minutes (original)']
127 minutes
92 minutes
76 minutes
75 minutes
73 minutes
85 minutes
81 minutes
70 minutes
90 min.
80 minutes
75 minutes
83 minutes
83 minutes
72 minutes
97 minutes
75 minutes
104 minutes
93 minutes
105 minutes
95 minutes
97 minutes
134 minutes
69 minutes
92 minutes
126 minutes
79 minutes
97 minutes
128 minutes
74 minutes
91 minutes
105 minutes
98 minutes
130 minutes
89 min.
93 minutes
67 minutes
98 minutes
100 minutes
118 minutes
103 Minutes
110 minutes
80 min.
79 minutes
91 minutes
91 minutes
97 minutes
118 minutes
139 minutes
92 minutes
131 mins.
87 minutes
116 minutes
93 minutes
110 min.
110 min.
131 minutes
101 minutes
108 minutes
84 minutes
78 mi

In [16]:
def running_time_to_int(running_time):
    if running_time == 'NA':
        return None 
    if isinstance(running_time, list):
        entry= running_time[0]
        return  int(entry.split(' ')[0])
    else:
        value= int(running_time.split(' ')[0])
        return value
    
for movie in dsny_movie:
    movie['Running time(int)']= running_time_to_int(movie.get('Running time','NA'))  
    
dsny_movie    

[{'title': 'Academy Award Review of ',
  'Production company': 'Walt Disney Productions',
  'Release date': ['May 19, 1937'],
  'Running time': '41 minutes (74 minutes 1966 release)',
  'Country': 'United States',
  'Language': 'English',
  'Box office': '$45.472',
  'Running time(int)': 41},
 {'title': 'Snow White and the Seven Dwarfs',
  'Directed by': ['David Hand (supervising)',
   'William Cottrell',
   'Wilfred Jackson',
   'Larry Morey',
   'Perce Pearce',
   'Ben Sharpsteen'],
  'Produced by': 'Walt Disney',
  'Written by': ['Ted Sears',
   'Richard Creedon',
   'Otto Englander',
   'Dick Rickard',
   'Earl Hurd',
   'Merrill De Maris',
   'Dorothy Ann Blank',
   'Webb Smith'],
  'Based on': ['Snow White', 'by The', 'Brothers Grimm'],
  'Starring': ['Adriana Caselotti',
   'Lucille La Verne',
   'Harry Stockwell',
   'Roy Atwell',
   'Pinto Colvig',
   'Otis Harlan',
   'Scotty Mattraw',
   'Billy Gilbert',
   'Eddie Collins',
   'Moroni Olsen',
   'Stuart Buchanan'],
  'Music 

In [17]:
for movie in dsny_movie:
    print(movie.get('Budget','NA'))

NA
$1.49 million
$2.6 million
$2.28 million
$600,000
$950,000
$858,000
NA
$788,000
NA
$1.35 million
$2.125 million
NA
$1.5 million
$1.5 million
NA
$2.9 million
$1,800,000
$3 million
NA
$4 million
$2 million
$300,000
$1.8 million
NA
$5 million
NA
$4 million
NA
NA
NA
NA
NA
NA
$700,000
NA
NA
NA
NA
NA
$6 million
under $1 million or $1,250,000
NA
$2 million
NA
NA
$2.5 million
NA
NA
$4 million
$3.6 million
NA
NA
NA
NA
$3 million
NA
$3 million
NA
NA
NA
NA
NA
NA
NA
NA
NA
$3 million
NA
NA
NA
NA
$4.4–6 million
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
$4 million
NA
$5 million
NA
NA
NA
NA
$5 million
NA
NA
NA
NA
NA
NA
$4 million
NA
NA
NA
$6.3 million
NA
NA
NA
NA
NA
NA
NA
NA
$5 million
NA
NA
NA
NA
$8 million
NA
NA
NA
NA
NA
AU$1 million
NA
NA
NA
NA
$5 million
NA
NA
NA
$7.5 million
NA
$10 million
NA
NA
$3.5 to 4 million
NA
NA
NA
$5.25 million
$20 million
NA
NA
$9 million
NA
$6-8 million
$20 million
NA
NA
$18 million
$12 million
$14 million
NA

$5 million
unknown
$20 million
$11 million
$28 million
$44 million

In [18]:
import re

In [19]:

amounts= r'thousand|million|billion'
number= r"\d+(,\d{3})*\.*\d*"

word_re= rf"\${number}(-|\sto\s|–)?({number})?\s({amounts})"
value_re= rf"\${number}"


def word_to_value(word):
    value_dict = {"thousand": 1000, "million": 1000000, "billion": 1000000000}
    return value_dict[word]

def parse_word_syntax(string):
    value_string = re.search(number, string).group()
    value = float(value_string.replace(",", ""))
    word = re.search(amounts, string, flags=re.I).group().lower()
    word_value = word_to_value(word)
    return value*word_value

def parse_value_syntax(string):
    value_string= re.search(number, string).group()
    value1= float(value_string.replace(",",""))
    
    return value1
    
def money_conversion(money):
    if money == "N/A":
        return None
    
    if isinstance(money, list):
        money = money[0]
    

    word_syntax= re.search(word_re, money, flags=re.I)
    value_syntax= re.search(value_re, money)

    if word_syntax:
        return parse_word_syntax(word_syntax.group())

    elif value_syntax:
        
        return parse_value_syntax(value_syntax.group())

    else:
        return None

print(money_conversion('$120–133 million'))  
print(money_conversion('$1,800,000'))

120000000.0
1800000.0


In [20]:
for movie in dsny_movie:
    movie['Budget (float)']= money_conversion(movie.get('Budget','NA'))
    movie['Box Office (float)']= money_conversion(movie.get('Box office','NA'))

In [21]:
dsny_movie[-40]['Budget (float)']

120000000.0

In [22]:
for movie in dsny_movie:
    print(movie['Budget (float)'])

None
1490000.0
2600000.0
2280000.0
600000.0
950000.0
858000.0
None
788000.0
None
1350000.0
2125000.0
None
1500000.0
1500000.0
None
2900000.0
1800000.0
3000000.0
None
4000000.0
2000000.0
300000.0
1800000.0
None
5000000.0
None
4000000.0
None
None
None
None
None
None
700000.0
None
None
None
None
None
6000000.0
1000000.0
None
2000000.0
None
None
2500000.0
None
None
4000000.0
3600000.0
None
None
None
None
3000000.0
None
3000000.0
None
None
None
None
None
None
None
None
None
3000000.0
None
None
None
None
4400000.0
None
None
None
None
None
None
None
None
None
None
None
4000000.0
None
5000000.0
None
None
None
None
5000000.0
None
None
None
None
None
None
4000000.0
None
None
None
6300000.0
None
None
None
None
None
None
None
None
5000000.0
None
None
None
None
8000000.0
None
None
None
None
None
1000000.0
None
None
None
None
5000000.0
None
None
None
7500000.0
None
10000000.0
None
None
3500000.0
None
None
None
5250000.0
20000000.0
None
None
9000000.0
None
6000000.0
20000000.0
None
None
18000000.0
12

In [23]:
from datetime import datetime

dates= [movie.get("Release date","NA ") for movie in dsny_movie]

def clean_date(date):
    return date.split('(')[0].strip()

def date_conversion(date):
    if isinstance(date, list):
        date= date[0]
        
    if date == 'NA':
        return None
        
    date_str = clean_date(date)
    #print(date_str)
    
    fmts= ['%B %d, %Y', '%d %B %Y']
    
    for fmt in fmts:
        try:
            return datetime.strptime(date_str, fmt)
        except:
            pass
    return None
    
for date in dates:
    print(date_conversion(date))
    print()

1937-05-19 00:00:00

1937-12-21 00:00:00

1940-02-07 00:00:00

1940-11-13 00:00:00

1941-06-27 00:00:00

1941-10-23 00:00:00

1942-08-09 00:00:00

1942-08-24 00:00:00

1943-07-17 00:00:00

1944-12-21 00:00:00

1946-04-20 00:00:00

1946-11-12 00:00:00

1947-09-27 00:00:00

1948-05-27 00:00:00

1948-11-29 00:00:00

1949-10-05 00:00:00

1950-02-22 00:00:00

1950-06-22 00:00:00

1951-07-26 00:00:00

1952-03-13 00:00:00

1953-02-05 00:00:00

1953-08-08 00:00:00

1953-11-10 00:00:00

1953-10-26 00:00:00

1954-08-17 00:00:00

1954-12-23 00:00:00

1955-05-25 00:00:00

1955-06-22 00:00:00

1955-09-14 00:00:00

1955-12-22 00:00:00

1956-06-08 00:00:00

1956-07-18 00:00:00

1956-09-04 00:00:00

1956-12-20 00:00:00

1957-06-19 00:00:00

1957-08-28 00:00:00

1957-12-25 00:00:00

1958-07-08 00:00:00

1958-08-12 00:00:00

1958-12-25 00:00:00

1959-01-29 00:00:00

1959-03-19 00:00:00

1959-06-24 00:00:00

1959-11-10 00:00:00

1960-01-21 00:00:00

1960-02-24 00:00:00

1960-05-19 00:00:00

None

1960-11

In [24]:
dsny_movie[1]

{'title': 'Snow White and the Seven Dwarfs',
 'Directed by': ['David Hand (supervising)',
  'William Cottrell',
  'Wilfred Jackson',
  'Larry Morey',
  'Perce Pearce',
  'Ben Sharpsteen'],
 'Produced by': 'Walt Disney',
 'Written by': ['Ted Sears',
  'Richard Creedon',
  'Otto Englander',
  'Dick Rickard',
  'Earl Hurd',
  'Merrill De Maris',
  'Dorothy Ann Blank',
  'Webb Smith'],
 'Based on': ['Snow White', 'by The', 'Brothers Grimm'],
 'Starring': ['Adriana Caselotti',
  'Lucille La Verne',
  'Harry Stockwell',
  'Roy Atwell',
  'Pinto Colvig',
  'Otis Harlan',
  'Scotty Mattraw',
  'Billy Gilbert',
  'Eddie Collins',
  'Moroni Olsen',
  'Stuart Buchanan'],
 'Music by': ['Frank Churchill', 'Paul Smith', 'Leigh Harline'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['December 21, 1937 ( Carthay Circle Theatre , Los Angeles , CA , premiere)'],
 'Running time': '83 minutes',
 'Country': 'United States',
 'Language': 'Engli

In [25]:
for movie in dsny_movie:
    movie['Release date (datetime)']= date_conversion(movie.get('Release date','NA'))

In [26]:
dsny_movie[1]

{'title': 'Snow White and the Seven Dwarfs',
 'Directed by': ['David Hand (supervising)',
  'William Cottrell',
  'Wilfred Jackson',
  'Larry Morey',
  'Perce Pearce',
  'Ben Sharpsteen'],
 'Produced by': 'Walt Disney',
 'Written by': ['Ted Sears',
  'Richard Creedon',
  'Otto Englander',
  'Dick Rickard',
  'Earl Hurd',
  'Merrill De Maris',
  'Dorothy Ann Blank',
  'Webb Smith'],
 'Based on': ['Snow White', 'by The', 'Brothers Grimm'],
 'Starring': ['Adriana Caselotti',
  'Lucille La Verne',
  'Harry Stockwell',
  'Roy Atwell',
  'Pinto Colvig',
  'Otis Harlan',
  'Scotty Mattraw',
  'Billy Gilbert',
  'Eddie Collins',
  'Moroni Olsen',
  'Stuart Buchanan'],
 'Music by': ['Frank Churchill', 'Paul Smith', 'Leigh Harline'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['December 21, 1937 ( Carthay Circle Theatre , Los Angeles , CA , premiere)'],
 'Running time': '83 minutes',
 'Country': 'United States',
 'Language': 'Engli

In [27]:
import pickle

def save_data_pickle(name, data):
    with open(name, 'wb') as f:
        pickle.dump(data,f)

In [28]:
def load_data_pickle(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [29]:
save_data_pickle("DISNEY Movie Dataset_Cleaned", dsny_movie )

In [30]:
a= load_data_pickle("DISNEY Movie Dataset_Cleaned")

In [31]:
a[5]

{'title': 'Dumbo',
 'Directed by': ['Ben Sharpsteen',
  '(supervising director)',
  'Norman Ferguson',
  'Wilfred Jackson',
  'Bill Roberts',
  'Jack Kinney',
  'Samuel Armstrong'],
 'Produced by': 'Walt Disney',
 'Story by': ['Otto Englander', 'Joe Grant', 'Dick Huemer'],
 'Based on': ['Dumbo, the Flying Elephant',
  'by',
  'Helen Aberson',
  'Harold Pearl'],
 'Starring': ['Edward Brophy',
  'Verna Felton',
  'Cliff Edwards',
  'Herman Bing',
  'Sterling Holloway',
  'Margaret Wright',
  'Hall Johnson Choir'],
 'Narrated by': 'John McLeish',
 'Music by': ['Frank Churchill', 'Oliver Wallace'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['October 23, 1941 (New York City)',
  'October 31, 1941 (U.S.)'],
 'Running time': '64 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$950,000',
 'Box office': '$1.3 million (est. United States/Canada rentals, 1941)',
 'Running time(int)': 64,
 'Budget (float)'

#### Attach IMDB?Rotten Tomatoes Scores

In [32]:
disney_mov= a

In [33]:
disney_mov[1]

{'title': 'Snow White and the Seven Dwarfs',
 'Directed by': ['David Hand (supervising)',
  'William Cottrell',
  'Wilfred Jackson',
  'Larry Morey',
  'Perce Pearce',
  'Ben Sharpsteen'],
 'Produced by': 'Walt Disney',
 'Written by': ['Ted Sears',
  'Richard Creedon',
  'Otto Englander',
  'Dick Rickard',
  'Earl Hurd',
  'Merrill De Maris',
  'Dorothy Ann Blank',
  'Webb Smith'],
 'Based on': ['Snow White', 'by The', 'Brothers Grimm'],
 'Starring': ['Adriana Caselotti',
  'Lucille La Verne',
  'Harry Stockwell',
  'Roy Atwell',
  'Pinto Colvig',
  'Otis Harlan',
  'Scotty Mattraw',
  'Billy Gilbert',
  'Eddie Collins',
  'Moroni Olsen',
  'Stuart Buchanan'],
 'Music by': ['Frank Churchill', 'Paul Smith', 'Leigh Harline'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['December 21, 1937 ( Carthay Circle Theatre , Los Angeles , CA , premiere)'],
 'Running time': '83 minutes',
 'Country': 'United States',
 'Language': 'Engli

In [34]:
#http://www.omdbapi.com/?apikey=[yourkey]&


In [48]:
import requests
import urllib




def get_omdb_info(title):
    base_url= "http://www.omdbapi.com/?"
    parameters= {'apikey':'key', 't':title}
    prm_encoded= urllib.parse.urlencode(parameters) # to give in url format
    full_urlpath= base_url+ prm_encoded
    #print(full_urlpath)
    return requests.get(full_urlpath).json()


    
get_omdb_info('Snow White and the Seven Dwarfs')
#print(get_omdb_info('Snow White and the Seven Dwarfs').get('imdbRating', None))


{'Title': 'Snow White and the Seven Dwarfs',
 'Year': '1937',
 'Rated': 'Approved',
 'Released': '04 Feb 1938',
 'Runtime': '83 min',
 'Genre': 'Animation, Family, Fantasy, Musical, Romance',
 'Director': 'William Cottrell, David Hand, Wilfred Jackson, Larry Morey, Perce Pearce, Ben Sharpsteen',
 'Writer': 'Jacob Grimm (fairy tales), Wilhelm Grimm (fairy tales), Ted Sears (story adaptation), Richard Creedon (story adaptation), Otto Englander (story adaptation), Dick Rickard (story adaptation), Earl Hurd (story adaptation), Merrill De Maris (story adaptation), Dorothy Ann Blank (story adaptation), Webb Smith (story adaptation)',
 'Actors': 'Roy Atwell, Stuart Buchanan, Adriana Caselotti, Eddie Collins',
 'Plot': 'Exiled into the dangerous forest by her wicked stepmother, a princess is rescued by seven dwarf miners who make her part of their household.',
 'Language': 'English',
 'Country': 'USA',
 'Awards': 'Nominated for 1 Oscar. Another 11 wins & 5 nominations.',
 'Poster': 'https://m.

In [36]:
for index, movie in enumerate(disney_mov):
    if index%10 ==0:
        print(index)
    title= movie['title']
        
    omdb_info = get_omdb_info(title)
    movie['imdb']= omdb_info.get('imdbRating', None)
    movie['metascore'] = omdb_info.get('Metascore', None)
    

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440


In [37]:
disney_mov[1]

{'title': 'Snow White and the Seven Dwarfs',
 'Directed by': ['David Hand (supervising)',
  'William Cottrell',
  'Wilfred Jackson',
  'Larry Morey',
  'Perce Pearce',
  'Ben Sharpsteen'],
 'Produced by': 'Walt Disney',
 'Written by': ['Ted Sears',
  'Richard Creedon',
  'Otto Englander',
  'Dick Rickard',
  'Earl Hurd',
  'Merrill De Maris',
  'Dorothy Ann Blank',
  'Webb Smith'],
 'Based on': ['Snow White', 'by The', 'Brothers Grimm'],
 'Starring': ['Adriana Caselotti',
  'Lucille La Verne',
  'Harry Stockwell',
  'Roy Atwell',
  'Pinto Colvig',
  'Otis Harlan',
  'Scotty Mattraw',
  'Billy Gilbert',
  'Eddie Collins',
  'Moroni Olsen',
  'Stuart Buchanan'],
 'Music by': ['Frank Churchill', 'Paul Smith', 'Leigh Harline'],
 'Production company': 'Walt Disney Productions',
 'Distributed by': 'RKO Radio Pictures',
 'Release date': ['December 21, 1937 ( Carthay Circle Theatre , Los Angeles , CA , premiere)'],
 'Running time': '83 minutes',
 'Country': 'United States',
 'Language': 'Engli

#### Save data as CSV

In [41]:
disney_movies_info= [movie.copy() for movie in disney_mov]

In [49]:
import pandas as pd
df= pd.DataFrame(disney_movies_info)
df.head()

Unnamed: 0,title,Production company,Release date,Running time,Country,Language,Box office,Running time(int),Budget (float),Box Office (float),...,Languages,Screenplay by,Countries,Production companies,Japanese,Hepburn,Adaptation by,Animation by,Traditional,Simplified
0,Academy Award Review of,Walt Disney Productions,"[May 19, 1937]",41 minutes (74 minutes 1966 release),United States,English,$45.472,41.0,,45.472,...,,,,,,,,,,
1,Snow White and the Seven Dwarfs,Walt Disney Productions,"[December 21, 1937 ( Carthay Circle Theatre , ...",83 minutes,United States,English,$418 million,83.0,1490000.0,418000000.0,...,,,,,,,,,,
2,Pinocchio,Walt Disney Productions,"[February 7, 1940 ( Center Theatre ), February...",88 minutes,United States,English,$164 million,88.0,2600000.0,164000000.0,...,,,,,,,,,,
3,Fantasia,Walt Disney Productions,"[November 13, 1940]",126 minutes,United States,English,$76.4–$83.3 million,126.0,2280000.0,83300000.0,...,,,,,,,,,,
4,The Reluctant Dragon,Walt Disney Productions,"[June 27, 1941]",74 minutes,United States,English,"$960,000 (worldwide rentals)",74.0,600000.0,960000.0,...,,,,,,,,,,


In [50]:
df.to_csv('Disney_movie_csv')