# 0. 기본 정보
- 코드 작성자: 여서연
- 코드 작성일: 2024-09-19 ~ 2024-09-22
- 데이터 분석 목적: 가설 검정
- 데이터 출처: [The Movies Dataset](https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset?select=movies_metadata.csv)

## 라이브러리 및 기초 설정

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as plt
import koreanize_matplotlib
import seaborn as sns

In [2]:
# 열 중간 잘리지 않게 설정

pd.set_option('display.max_columns', None)

In [3]:
import warnings
warnings.filterwarnings('ignore')

## 데이터 세부 정보 확인

컬럼 설명
---
- adult(bool): 성인 영화 여부
- belongs_to_collection(json): 컬렉션 정보
- budget(int): 제작 예산
- genres(json): 장르 정보
- homepage(str): 공식 홈페이지 주소
<br></br>
- id(int): id
- imdb_id(str): IMDB 사용 id
- original_language(str): 원어 코드
- original_title(str): 원제
- overview(str): 줄거리 또는 요약 정보
<br></br>
- popularity(float): 인기도 점수
- poster_path(str): 포스터 이미지 경로
- production_companies(json): 제작사
- production_countries(json): 제작 국가
- release_date(YY-MM-DD): 개봉일
<br></br>
- revenue(int): 총 수익
- runtime(int): 상영 시간(분)
- spoken_languages(json): 사용 언어
- status(str): 상태
- tagline(str): 태그라인 (짧은 홍보 문구)
<br></br>
- title(str): 제목
- video(bool): 관련 클립 존재 여부
- vote_average(float): 사용자 평점 평균
- vote_count(int): 사용자 평점 수

In [4]:
mvmd_origin = pd.read_csv('../data/movies_metadata.csv')

In [15]:
df = mvmd_origin.copy()

In [16]:
for col in list(df.columns):
    df[col] = df[col].replace(0, np.nan)
    df[col] = df[col].replace('[]', np.nan)
    df[col] = df[col].replace('0', np.nan)

In [17]:
df

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,,5.7,173.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45461,False,,,"[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...",http://www.imdb.com/title/tt6209470/,439050,tt6209470,fa,رگ خواب,Rising and falling between a man and woman.,0.072051,/jldsYflnId4tTWPx8es3uzsB1I8.jpg,,"[{'iso_3166_1': 'IR', 'name': 'Iran'}]",,,90.0,"[{'iso_639_1': 'fa', 'name': 'فارسی'}]",Released,Rising and falling between a man and woman,Subdue,,4.0,1.0
45462,False,,,"[{'id': 18, 'name': 'Drama'}]",,111109,tt2028550,tl,Siglo ng Pagluluwal,An artist struggles to finish his work while a...,0.178241,/xZkmxsNmYXJbKVsTRLLx3pqGHx7.jpg,"[{'name': 'Sine Olivia', 'id': 19653}]","[{'iso_3166_1': 'PH', 'name': 'Philippines'}]",2011-11-17,,360.0,"[{'iso_639_1': 'tl', 'name': ''}]",Released,,Century of Birthing,,9.0,3.0
45463,False,,,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",,67758,tt0303758,en,Betrayal,"When one of her hits goes wrong, a professiona...",0.903007,/d5bX92nDsISNhu3ZT69uHwmfCGw.jpg,"[{'name': 'American World Pictures', 'id': 6165}]","[{'iso_3166_1': 'US', 'name': 'United States o...",2003-08-01,,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A deadly game of wits.,Betrayal,,3.8,6.0
45464,False,,,,,227506,tt0008536,en,Satana likuyushchiy,"In a small town live two brothers, one a minis...",0.003503,/aorBPO7ak8e8iJKT5OcqYxU3jlK.jpg,"[{'name': 'Yermoliev', 'id': 88753}]","[{'iso_3166_1': 'RU', 'name': 'Russia'}]",1917-10-21,,87.0,,Released,,Satan Triumphant,,,


In [None]:
# 결측치가 있는 행을 제거하고 결측치가 없는 데이터만 출력
df_no_missing_series = df.dropna(subset=['belongs_to_collection','genres','spoken_languages','runtime',''])

# 결측치가 없는 데이터 확인
df_no_missing_series

In [19]:
# 결측치가 있는 행을 제거하고 결측치가 없는 데이터만 출력
df_no_missing_series = df.dropna(subset=['belongs_to_collection'])

# 결측치가 없는 데이터 확인
df_no_missing_series

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,,7.7,5415.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,,6.5,92.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,,5.7,173.0
9,False,"{'id': 645, 'name': 'James Bond Collection', '...",58000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.mgm.com/view/movie/757/Goldeneye/,710,tt0113189,en,GoldenEye,James Bond must unmask the mysterious head of ...,14.686036,/5c0ovjT41KnYIHYuF4AWsTe3sKh.jpg,"[{'name': 'United Artists', 'id': 60}, {'name'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",1995-11-16,352194034.0,130.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,No limits. No fears. No substitutes.,GoldenEye,,6.6,1194.0
12,False,"{'id': 117693, 'name': 'Balto Collection', 'po...",,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",,21032,tt0112453,en,Balto,An outcast half-wolf risks his life to prevent...,12.140733,/gV5PCAVCPNxlOLFM1bKk50EqLXO.jpg,"[{'name': 'Universal Pictures', 'id': 33}, {'n...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,11348324.0,78.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Part Dog. Part Wolf. All Hero.,Balto,,7.1,423.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45355,False,"{'id': 37261, 'name': 'The Carry On Collection...",,"[{'id': 35, 'name': 'Comedy'}]",,24568,tt0061680,en,Follow That Camel,Bertram Oliphant West (also known as Bo West) ...,1.114848,/8nifhAeOQoTJReWuJTTnsUn2pBa.jpg,"[{'name': 'The Rank Organisation', 'id': 364},...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",1967-09-01,,95.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Carry On Follow That Camel,,6.1,19.0
45358,False,"{'id': 37261, 'name': 'The Carry On Collection...",,"[{'id': 35, 'name': 'Comedy'}]",,19307,tt0064133,en,Carry On Camping,Sid and Bernie keep having their amorous inten...,4.103723,/dyas4yIxnhGarAhkuD7gsRCR7eQ.jpg,"[{'name': 'The Rank Organisation', 'id': 364}]","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",1969-05-29,,88.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fun and games in the great outdoors!,Carry On Camping,,6.5,25.0
45369,False,"{'id': 37261, 'name': 'The Carry On Collection...",,"[{'id': 35, 'name': 'Comedy'}]",,21251,tt0074286,en,Carry On England,Captain S. Melly takes over as the new Command...,0.846154,/xd9qImTfWqQ1WzVtvo4CnLmH5IQ.jpg,"[{'name': 'The Rank Organisation', 'id': 364}]","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",1976-10-01,,89.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Carry On England,,4.8,15.0
45371,False,"{'id': 477208, 'name': 'DC Super Hero Girls Co...",,"[{'id': 16, 'name': 'Animation'}]",,460135,tt7158814,en,LEGO DC Super Hero Girls: Brain Drain,"When Supergirl, Wonder Woman, Batgirl, Bumbleb...",8.413734,/niLX2txdI5GlVowJlnb5Hr26QpK.jpg,"[{'name': 'Warner Bros. Animation', 'id': 2785}]","[{'iso_3166_1': 'US', 'name': 'United States o...",2017-08-30,,,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,LEGO DC Super Hero Girls: Brain Drain,,10.0,2.0


In [20]:
df_no_missing[''].isnull().mean() * 100

adult                     0.000000
belongs_to_collection     0.000000
budget                   64.663996
genres                    1.513129
homepage                 78.015131
id                        0.000000
imdb_id                   0.111259
original_language         0.000000
original_title            0.000000
overview                  1.312862
popularity                0.066756
poster_path               0.044504
production_companies     13.862928
production_countries      5.473965
release_date              0.066756
revenue                  67.000445
runtime                   2.180685
spoken_languages          1.668892
status                    0.133511
tagline                  42.768135
title                     0.089008
video                    99.910992
vote_average              1.490877
vote_count                1.379617
dtype: float64