utkarshbajaj · eklipse18 · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,148 @@
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+pytestdebug.log
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+doc/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pythonenv*
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# profiling data
+.prof
+
+# End of https://www.toptal.com/developers/gitignore/api/python
+
+
+# the csv that is produced
+film_ratings.csv
diff --git a/GUI.py b/GUI.py
@@ -1,12 +1,33 @@
 import PySimpleGUI as sg
 from find_IMDb_rating import find_movie
 
-event, values = sg.Window('Browse the Folder', [[sg.Text('Folder Path')], [sg.Input(), sg.FolderBrowse()], [sg.OK(), sg.Cancel()] ]).read(close=True)
-names,rating,genres = find_movie(values[0])
+event, values = sg.Window(
+    'Browse the Folder',
+    [
+        [sg.Text('Folder Path')],
+        [sg.Input(), sg.FolderBrowse()],
+        [sg.OK(), sg.Cancel()]
+    ]).read(close=True)
 
-header =  [[sg.Text('Movie Name',size=(55,1)),sg.Text('Rating',size=(8,1)),sg.Text('Genres',size=(25,1))]]
-input_rows = [[sg.Text(names[i],size=(55,1)),sg.Text(rating[i],size=(3,2)),sg.Text(genres[i],size=(25,1))] for i in range(len(names))]
+names, rating, genres, platforms = find_movie(values[0])
+
+header = [
+    [
+        sg.Text('Movie Name', size=(55, 1)),
+        sg.Text('Rating', size=(8, 1)),
+        sg.Text('Genres', size=(25, 1)),
+        sg.Text('Platforms', size=(25, 1))
+    ]
+]
+
+input_rows = [
+    [
+        sg.Text(names[i], size=(55, 1)),
+        sg.Text(rating[i], size=(3, 2)),
+        sg.Text(genres[i], size=(25, 1)),
+        sg.Text(platforms[i], size=(25, 1))
+    ] for i in range(len(names))]
 
 layout = header + input_rows
 window = sg.Window('Movie Rating with Genres', layout, font='Courier 12')
-event, values = window.read(close=True)
+event, values = window.read(close=True)
diff --git a/README.md b/README.md
@@ -1,10 +1,12 @@
 # Find IMDB Ratings 
 <!--Remove the below lines and add yours -->
-This script is used to fetch the Ratings and Genre of the films in your films folder that match with ones on IMDb, the data is scraped from IMDB's official website and store in a csv file. The csv file can be used for analysis then, sorting acc to rating etc. 
+This script is used to fetch the Ratings, genre and the platforms available to stream of the films in your films folder that match with ones on IMDb.
+
+The data is scraped from IMDB's official website and store in a CSV file. The csv file can be used for analysis then, sorting acc to rating etc. 
 
 Input: -> Path of the directory which contains the films. 
 
-Output: -> A new csv file is made - 'film_ratings.csv' which contains the ratings for the films in your directory. 
+Output: -> A new csv file - 'film_ratings.csv' - which contains the ratings for the films in your directory. 
 
 __P.S. - Please ask for assign before making a PR.__<br>Creation of new issues is encouraged. 
 
@@ -16,14 +18,14 @@ Do a git pull from the master repo before making a new Pull Request using<br>
 ### Prerequisites
 <!--Remove the below lines and add yours -->
 This program uses and external dependency of 'BeautifulSoup' (for web scraping), 'requests' (for fetching content of the webpage), 'pandas' (to make the csv file), 'os' (to get data from directory). <br>
-These libraries can be installed easily by using the following command: pip install -r requirements.txt
+These libraries can be installed easily by using the following command: `pip install -r requirements.txt`
 
 ### How to run the script
 <!--Remove the below lines and add yours -->
--> Install the requirements. <br>
--> Type the following command: python GUI.py <br>
--> Browse to the path where Films are located inside the folder  <br>
--> A csv file with rating will be created in the same directory as the python file. <br>
+1. Install the requirements using the `pip install -r requirements.txt` command. <br>
+2. Type the following command: `python GUI.py` <br>
+3. Browse to the path where Films are located inside the folder  <br>
+4. A csv file with rating will be created in the same directory as the python file. <br>
 
 ### Sample use of the script
 <!--Remove the below lines and add yours -->
@@ -41,12 +43,8 @@ Result: (Also stored in a csv file)
 
 
 ## Author
-<a href="https://github.com/utkarshbajaj"> Utkarsh Bajaj </a>
+[Utkarsh Bajaj](https://github.com/utkarshbajaj)
 
 ## Contributors 
-<ul> 
-  <li> <a href ="https://github.com/Aravindha1234u"> Avarindha </a> </li>
-  <li> <a href = "https://github.com/Vipul-Bajaj"> Vipul Bajaj </a> </li>
-</ul>
-
-
+* [Avarindha](https://github.com/Aravindha1234u)
+* [Vipul Bajaj](https://github.com/Vipul-Bajaj)
diff --git a/find_IMDb_rating.py b/find_IMDb_rating.py
@@ -1,12 +1,33 @@
 from bs4 import BeautifulSoup
 import requests
-import json
 import pandas as pd
 import os
 
+
+def get_online_platforms(move_name):
+    SEARCH_URL = f'https://www.imdb.com/find?q={move_name}'
+
+    session = requests.session()
+    response = session.get(SEARCH_URL)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    first_movie_url = soup.find('td', attrs={'class': 'result_text'}).find('a').get('href')
+    # with open('test.html', 'w+') as f: f.write(soup.__str__())
+    movie_id = first_movie_url.split('/')[-2]
+
+    PLATFORMS_URL = f"https://www.imdb.com/watch/_ajax/box/{movie_id}"
+
+    response = session.get(PLATFORMS_URL)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    all_platforms = [div.text for div in soup.find_all('div', attrs={'class': 'watchoption-modal__provider'})]
+
+    return all_platforms if len(all_platforms) > 0 else ["None"]
+
+
 def find_movie(directory_path):
     # Setting up session
-    s = requests.session()  
+    s = requests.session()
 
     # List contaiting all the films for which data has to be scraped from IMDB
     films = []
@@ -15,8 +36,9 @@ def find_movie(directory_path):
     names = []
     ratings = []
     genres = []
+    platforms = []
 
-    # Define path where your films are present 
+    # Define path where your films are present
     # For eg: "/Users/utkarsh/Desktop/films"
     path = directory_path
 
@@ -36,51 +58,52 @@ def find_movie(directory_path):
         # release = x[1]
         query = "+".join(title.split(" "))
         URL = "https://www.imdb.com/search/title/?title=" + query
-        
+
         # print(release)
         try:
             response = s.get(URL)
 
-            #getting contect from IMDB Website
+            # getting contect from IMDB Website
             content = response.content
 
             # print(response.status_code)
 
-            soup = BeautifulSoup(response.content, features="html.parser")
-            #searching all films containers found
+            soup = BeautifulSoup(content, features="html.parser")
+            # searching all films containers found
             containers = soup.find_all("div", class_="lister-item-content")
             for result in containers:
                 name1 = result.h3.a.text
                 name = result.h3.a.text.lower()
 
-                # Uncomment below lines if you want year specific as well, define year variable before this 
+                # Uncomment below lines if you want year specific as well, define year variable before this
                 # year = result.h3.find(
                 # "span", class_="lister-item-year text-muted unbold"
-                # ).text.lower() 
+                # ).text.lower()
 
-                #if film found (searching using name)
+                # if film found (searching using name)
                 if title in name:
-                    #scraping rating
-                    rating = result.find("div",class_="inline-block ratings-imdb-rating")["data-value"]
-                    #scraping genre
+                    # scraping rating
+                    rating = result.find(
+                        "div", class_="inline-block ratings-imdb-rating")["data-value"]
+                    # scraping genre
                     genre = result.p.find("span", class_="genre")
                     genre = genre.contents[0].strip()
 
-                    #appending name, rating and genre to individual lists
+                    # appending name, rating and genre to individual lists
                     names.append(name1)
                     ratings.append(rating)
                     genres.append(genre)
-
-
+                    platforms.append(", ".join(get_online_platforms(name1)))
 
         except Exception:
             print("Try again with valid combination of tile and release year")
 
-    #storing in pandas dataframe
-    df = pd.DataFrame({'Film Name':names,'Rating':ratings,'Genre':genres})
+    # storing in pandas dataframe
+    df = pd.DataFrame({'Film Name': names, 'Rating': ratings, 'Genre': genres,
+                       "Platforms": platforms})
     df = df.sort_values("Rating", ascending=False)
 
-    #making csv using pandas
+    # making csv using pandas
     df.to_csv('film_ratings.csv', index=False, encoding='utf-8')
 
-    return (names, ratings, genres)
+    return (names, ratings, genres, platforms)
diff --git a/requirements.txt b/requirements.txt