Skip to content

Commit

Permalink
Initial commit including workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
yotkadata committed Jun 5, 2023
1 parent 7a1159a commit 07d2028
Show file tree
Hide file tree
Showing 16 changed files with 192,581 additions and 0 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
187 changes: 187 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""
A movie recommender app built with streamlit.
"""

import numpy as np
import pandas as pd
import streamlit as st
from imdb import Cinemagoer
from imdb.helpers import resizeImage

from recommender import Recommender


@st.cache_data
def load_movies():
"""
Function to load prepared data from CSV files.
"""
movies = pd.read_csv("./data/movies_prepared.csv")
return movies


@st.cache_data
def get_random_movies_to_rate(num_movies=5):
"""
Function to randomly get movie titles and ids to be rated by the user.
"""
movies = load_movies()

movies = movies.sort_values("rating", ascending=False).reset_index(drop=True)
movies = movies[:100]

select = np.random.choice(movies.index, size=num_movies, replace=False)

return movies.iloc[select]


@st.cache_data
def get_movies(num_movies=5):
"""
Function to get movie titles and ids to be selected by the user.
"""
movies = load_movies()
if num_movies == "all":
num_movies = len(movies)

movies = movies.sort_values("title").reset_index(drop=True)

return movies


@st.cache_data
def get_movie_id_from_title(title_str):
"""
Function that returns a movies ID from a title input.
"""
movies = load_movies()
movies = movies[movies["title"] == title_str]["movie_id"]

return int(movies.iloc[0])


def prepare_query_favourites():
"""
Function to prepare query to search for movies based on favourite movies.
"""
data = get_movies("all")

st.markdown(
"Don't know which movie to watch tonight?"
"Just **tell us some of your favourite movies** and based on that "
"we'll recommend you something you might like."
)

user_ratings = st.multiselect(
"Select as many movies as you like. Type to filter the list.",
data["title"],
)

query = {}
for title_selected in user_ratings:
# Get movie ids
mid = get_movie_id_from_title(title_selected)
# Set rating to 5 for selected movies
query[mid] = 5

return query


def prepare_query_rating():
"""
Function to prepare query to search for movies based on rating.
"""
data = get_random_movies_to_rate(10)

st.markdown(
"Don't know which movie to watch tonight? Here are 10 randomly chosen movies."
"Just **rate as many of them as you like** and based on your rating we'll recommend you something you might like."
)

query = {}
for movie_id, title in zip(data["movie_id"], data["title"]):
query[movie_id] = st.select_slider(title, options=[0, 1, 2, 3, 4, 5])

return query


def recommender(rec_type="fav"):
"""
Function to recommend movies.
"""

# Prepare query based on type
query = (
prepare_query_rating() if rec_type == "rating" else prepare_query_favourites()
)

# Show select list for algorithm to use
method_select = st.selectbox(
"Select algorithm",
["Nearest Neighbors", "Non-negative matrix factorization"],
key="method_selector_" + rec_type,
)

# Translate selection into keaywords
method = "neighbors" if method_select == "Nearest Neighbors" else "nmf"

num_movies = st.slider(
"How many movies should we recommend?",
min_value=1,
max_value=10,
value=3,
key="num_movies_slider_" + rec_type,
)

# Start recommender
if st.button("Recommend some movies!", key="button_" + rec_type):
with st.spinner(f"Calculating recommendations using {method_select}..."):
recommend = Recommender(query, method=method, k=num_movies)
_, titles = recommend.recommend()

with st.spinner("Fetching movie information from IMDB..."):
st.write("Recommended movies using Nearest Neighbors:\n")
for title in titles:
imdb = Cinemagoer()
imdb_movies = imdb.search_movie(title)
imdb_movie = imdb.get_movie(imdb_movies[0].movieID)
display_movie(imdb_movie)


def display_movie(movie):
"""
Function that displays a movie with information from IMDB.
"""
directors = [director["name"] for director in movie["director"]]
cast = [actor["name"] for actor in movie["cast"]]
img_url = resizeImage(movie["full-size cover url"], width=200)

col1, col2 = st.columns([1, 4])

with col1:
st.image(img_url)

with col2:
st.header(f"{movie['title']} ({movie['year']})")
st.markdown(f"**IMDB-rating:** {movie['rating']}/10")
st.markdown(f"**Genres:** {', '.join(movie['genres'])}")
st.markdown(f"**Director(s):** {', '.join(directors)}")
st.markdown(f"**Cast:** {', '.join(cast[:10])}, ...")
st.markdown(f"{movie['plot outline']}")
st.divider()


# Set page title
st.set_page_config(page_title="What should I watch tonight? | Your movie recommender")

# Print title and subtitle
st.title("What should I watch tonight?")
st.subheader("Your personal movie recommender")

tab1, tab2 = st.tabs(["By favourite movies", "By rating"])

with tab1:
recommender("fav")

with tab2:
recommender("rating")
96 changes: 96 additions & 0 deletions build_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Build and save models for the recommender
"""

import pickle

import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF
from sklearn.neighbors import NearestNeighbors


def build_model_nmf(n_components: int = 2000, max_iter: int = 1000) -> str:
"""
Function to build and save a recommender model using NMF.
"""
# Load prepared data
ratings = pd.read_csv("data/ratings_prepared.csv")

# Initialize a sparse user-item rating matrix
r_matrix = csr_matrix(
(ratings["rating"], (ratings["user_id"], ratings["movie_id"]))
)

# Instantiate model and fit
model = NMF(n_components=n_components, max_iter=max_iter)
print(
"NMF model instantiated with following hyperparameters:\n"
f"n_components={n_components}\n"
f"max_iter={max_iter}\n"
"Starting to fit.\n"
)

# Fit it to the Ratings matrix
model.fit(r_matrix)

# Print reconstruction error
print(f"NMF model built. Reconstruction error: {model.reconstruction_err_}")

# Save model
file_name = "data/model_nmf.pkl"
with open(file_name, "wb") as file:
pickle.dump(model, file)

return file_name


def build_model_neighbors(metric: str = "cosine", n_jobs: int = -1) -> str:
"""
Function to build and save a recommender model using Nearest Neighbors.
"""
# Load prepared data
ratings = pd.read_csv("data/ratings_prepared.csv")

# Initialize a sparse user-item rating matrix
r_matrix = csr_matrix(
(ratings["rating"], (ratings["user_id"], ratings["movie_id"]))
)

# Initialize the NearestNeighbors model
model = NearestNeighbors(metric=metric, n_jobs=n_jobs)
print(
"Nearest Neighbors model instantiated with following hyperparameters:\n"
f"metric={metric}\n"
f"n_jobs={n_jobs}\n\n"
"Starting to fit.\n"
)

# Fit it to the Ratings matrix
model.fit(r_matrix)

# Print reconstruction error
print("Nearest neighbor model built.")

# Save model
file_name = "data/model_neighbors.pkl"
with open(file_name, "wb") as file:
pickle.dump(model, file)

return file_name


def main():
"""
Main function
"""

file_name_nmf = build_model_nmf()
print(f"NMF model saved to {file_name_nmf}.")

file_name_neighbors = build_model_neighbors()
print(f"Nearest Neighbors model saved to {file_name_neighbors}.")


if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions data/data_with_imdb.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
,movie_id,imdb_id,title,imdb_rating,year,genre,director,cast,cover_url,plot
0,0,0114709,Toy Story,8.3,1995.0,"['Animation', 'Adventure', 'Comedy', 'Family', 'Fantasy']",['John Lasseter'],"['Tom Hanks', 'Tim Allen', 'Don Rickles', 'Jim Varney', 'Wallace Shawn', 'John Ratzenberger', 'Annie Potts', 'John Morris', 'Erik von Detten', 'Laurie Metcalf', 'R. Lee Ermey', 'Sarah Rayne', 'Penn Jillette', 'Jack Angel', 'Spencer Aste', 'Greg Berg', 'Lisa Bradley', 'Kendall Cunningham', 'Debi Derryberry', 'Cody Dorkin', 'Bill Farmer', 'Craig Good', 'Gregory Grudt', 'Danielle Judovits', 'Sam Lasseter', 'Brittany Levenbrown', 'Sherry Lynn', 'Scott McAfee', 'Mickie McGowan', ""Ryan O'Donohue"", 'Jeff Pidgeon', 'Patrick Pinney', 'Phil Proctor', 'Jan Rabson', 'Joe Ranft', 'Andrew Stanton', 'Shane Sweet', 'Anthony Burch', 'John Lasseter']",https://m.media-amazon.com/images/M/MV5BMDU2ZWJlMjktMTRhMy00ZTA5LWEzNDgtYmNmZTEwZTViZWJkXkEyXkFqcGdeQXVyNDQ2OTk4MzI@._V1_SX200_.jpg,"A little boy named Andy loves to be in his room, playing with his toys, especially his doll named ""Woody"". But, what do the toys do when Andy is not with them, they come to life. Woody believes that his life (as a toy) is good. However, he must worry about Andy's family moving, and what Woody does not know is about Andy's birthday party. Woody does not realize that Andy's mother gave him an action figure known as Buzz Lightyear, who does not believe that he is a toy, and quickly becomes Andy's new favorite toy. Woody, who is now consumed with jealousy, tries to get rid of Buzz. Then, both Woody and Buzz are now lost. They must find a way to get back to Andy before he moves without them, but they will have to pass through a ruthless toy killer, Sid Phillips."
1,1,0113228,Grumpier Old Men,6.6,1995.0,"['Comedy', 'Romance']",['Howard Deutch'],"['Walter Matthau', 'Jack Lemmon', 'Sophia Loren', 'Ann-Margret', 'Burgess Meredith', 'Daryl Hannah', 'Kevin Pollak', 'Katie Sagona', 'Ann Morgan Guilbert', 'James Andelin', 'Marcus Klemp', 'Max Wright', 'Cheryl Hawker', 'Wayne A. Evenson', 'Allison Levine', 'John Patrick Martin', 'Adam Ward', 'Ryan Waldoch', 'James Cada', 'Jaclyn Ross', 'Kyle Christopherson', 'Jeffrey L. Smith', 'Geraldo Rivera', 'Warren Schueneman', 'Jack Mitsch', 'Sterling Robson', 'Gregory Schuneman', 'Denny Schusted', 'Michelle Johnston', 'Wallace Olson', 'Carl Johnson', 'Eugene Karels', 'Lawrence Grivna', 'Joel Edwards', 'Larissa Lowthorp', 'Johnny Luckett', 'Tammara Melloy', 'Shirley Stoler', 'Ryan Wotherspoon']",https://m.media-amazon.com/images/M/MV5BMjQxM2YyNjMtZjUxYy00OGYyLTg0MmQtNGE2YzNjYmUyZTY1XkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_SX200_.jpg,"Things don't seem to change much in Wabasha County: Max and John are still fighting after 35 years, Grandpa still drinks, smokes, and chases women , and nobody's been able to catch the fabled ""Catfish Hunter"", a gigantic catfish that actually smiles at fishermen who try to snare it. Six months ago John married the new girl in town (Ariel), and people begin to suspect that Max might be missing something similar in his life. The only joy Max claims is left in his life is fishing, but that might change with the new owner of the bait shop."
2,2,0113277,Heat,8.3,1995.0,"['Action', 'Crime', 'Drama']",['Michael Mann'],"['Al Pacino', 'Robert De Niro', 'Val Kilmer', 'Jon Voight', 'Tom Sizemore', 'Diane Venora', 'Amy Brenneman', 'Ashley Judd', 'Mykelti Williamson', 'Wes Studi', 'Ted Levine', 'Dennis Haysbert', 'William Fichtner', 'Natalie Portman', 'Tom Noonan', 'Kevin Gage', 'Hank Azaria', 'Susan Traylor', 'Kim Staunton', 'Danny Trejo', 'Henry Rollins', 'Jerry Trimble', 'Martin Ferrero', 'Ricky Harris', 'Tone Loc', 'Begonya Plaza', 'Hazelle Goodman', 'Ray Buktenica', 'Jeremy Piven', 'Xander Berkeley', 'Rick Avery', 'Brad Baldridge', 'Andrew Camuccio', 'Brian Camuccio', 'Max Daniels', 'Vince Deadrick Jr.', 'Charles Duke', 'Thomas Elfmont', 'Kenny Endoso', 'Kimberly Flynn', 'Steven Ford', 'Farrah Forke', 'Hannes Fritsch', 'Amanda Graves', 'Emily Graves', 'Niki Haris', 'Ted Harvey', 'Patricia Healy', 'Paul Herman', 'Cindy Katz', 'Brian Libby', 'Bill McIntosh', 'Dan Martin', 'Rick Marzan', 'Terry Miller', 'Paul Moyer', ""Daniel O'Haco"", 'Mario Roberts', 'Phillip Robinson', 'Thomas Rosales Jr.', 'Rainelle Saunders', 'Kai Soremekun', 'Rey Verdugo', 'Wendy L. Walsh', 'Yvonne Zima', 'Anthony Backman', 'Monica Lee Bellais', 'Peter Blackwell', 'Trevor Coppola', 'Bud Cort', 'Michele Edison', 'Annette Goodman', 'Mick Gould', 'Mary Kircher', 'David Koseruba', 'Darin Mangan', 'Melissa S. Markess', 'Andre McCoy', 'Darren Melton', 'Robert Miranda', 'Kathryn Mullen', 'Manny Perry', 'Jimmy N. Roberts', 'Iva Franks Singer', 'Jimmy Star', 'Gloria Straube', 'Viviane Vives', 'Tim Werner']",https://m.media-amazon.com/images/M/MV5BYjZjNTJlZGUtZTE1Ny00ZDc4LTgwYjUtMzk0NDgwYzZjYTk1XkEyXkFqcGdeQXVyNjU0OTQ0OTY@._V1_SX200_.jpg,"Hunters and their prey--Neil and his professional criminal crew hunt to score big money targets (banks, vaults, armored cars) and are, in turn, hunted by Lt. Vincent Hanna and his team of cops in the Robbery/Homicide police division. A botched job puts Hanna onto their trail while they regroup and try to put together one last big 'retirement' score. Neil and Vincent are similar in many ways, including their troubled personal lives. At a crucial moment in his life, Neil disobeys the dictum taught to him long ago by his criminal mentor--'Never have anything in your life that you can't walk out on in thirty seconds flat, if you spot the heat coming around the corner'--as he falls in love. Thus the stage is set for the suspenseful ending...."
3,3,,Seven (a.k.a. Se7en) (1995),,,,,,,
4,4,0114814,The Usual Suspects,8.5,1995.0,"['Crime', 'Drama', 'Mystery', 'Thriller']",['Bryan Singer'],"['Stephen Baldwin', 'Gabriel Byrne', 'Benicio Del Toro', 'Kevin Pollak', 'Kevin Spacey', 'Chazz Palminteri', 'Pete Postlethwaite', 'Suzy Amis', 'Giancarlo Esposito', 'Dan Hedaya', 'Paul Bartel', 'Carl Bressler', 'Phillipe Simon', 'Jack Shearer', 'Christine Estabrook', 'Clark Gregg', 'Morgan Hunter', 'Ken Daly', 'Michelle Clunie', 'Louis Lombardi', 'Frank Medrano', 'Ron Gilbert', ""Vito D'Ambrosio"", 'Gene Lythgow', 'Bob Elmore', 'David Powledge', 'Bob Pennetta', 'Billy Bates', 'Smadar Hanson', 'Castulo Guerra', 'Peter Rocca', 'Bert Williams', 'Jaime H. Campos', 'John Gillespie', 'Johnathan Gorman', 'Peter Greene', 'Michael McKay', 'Christopher McQuarrie', 'Ralph Moratz', 'Scott B. Morgan', 'Mike Nyman', 'Grace Sinden']",https://m.media-amazon.com/images/M/MV5BYTViNjMyNmUtNDFkNC00ZDRlLThmMDUtZDU2YWE4NGI2ZjVmXkEyXkFqcGdeQXVyNjU0OTQ0OTY@._V1_SX200_.jpg,"Following a truck hijack in New York, five criminals are arrested and brought together for questioning. As none of them are guilty, they plan a revenge operation against the police. The operation goes well, but then the influence of a legendary mastermind criminal called Keyser Söze is felt. It becomes clear that each one of them has wronged Söze at some point and must pay back now. The payback job leaves 27 men dead in a boat explosion, but the real question arises now: Who actually is Keyser Söze?"
Loading

0 comments on commit 07d2028

Please sign in to comment.