In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../datasets/raw/movies_raw.csv")

In [3]:
print(df.dtypes)

movie_id               int64
title                 object
overview              object
genres                object
cast                  object
keywords              object
runtime                int64
release_year           int64
language              object
vote_average         float64
vote_count             int64
combined_features     object
dtype: object


In [4]:
df['overview'].tail(3)

4997    A woman watches time pass beside the suitcases...
4998    A weekend at the seaside brings out a sick rel...
4999    Desperate to take care of his pregnant wife be...
Name: overview, dtype: object

In [5]:

print(df.isnull().sum())

movie_id               0
title                  0
overview             219
genres                 0
cast                   0
keywords               0
runtime                0
release_year           0
language               0
vote_average           0
vote_count             0
combined_features      0
dtype: int64


In [6]:
df = df.fillna({'overview': 'no overview'})

In [7]:
import ast


def normalize_text(x):
    # Handle NaN
    if pd.isna(x):
        return ""

    # Case 1: already a real list
    if isinstance(x, list):
        return ", ".join(g.strip() for g in x if isinstance(g, str) and g.strip())

    # Case 2: string that looks like a list -> parse it
    if isinstance(x, str):
        x = x.strip()

        # empty or invalid
        if x.lower() in ["", "nan", "none", "[]"]:
            return ""

        # stringified list like "['Drama', 'Romance']"
        if x.startswith("[") and x.endswith("]"):
            try:
                parsed = ast.literal_eval(x)
                if isinstance(parsed, list):
                    return ", ".join(
                        g.strip() for g in parsed if isinstance(g, str) and g.strip()
                    )
            except Exception:
                pass  # fall through

        # already pipe or comma separated
        return x.replace(",", ", ")

    return ""


In [8]:
import re

def clean_brackets(text):
    if pd.isna(text):
        return ""
    # Remove [, ], and ' using regex
    return re.sub(r"[\[\]']", "", str(text))

In [9]:
import re
import pandas as pd

def clean_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r"[^a-z\s,:.\'\"]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

In [10]:
df['overview'].iloc[67:75]

67    Bruce gets phone calls from a woman claiming t...
68    A 2-part original movie featuring Scorpio: An ...
69    Abel is a ghostwriter. He just finished writin...
70    Many years after a deadly terrorist siege in a...
71    Filmed version of the 2019 Stratford Festival ...
72    Following a brutal civil war, an interrogation...
73    Tune-in on Friday 5/22 @ midnight to watch Rel...
74                                          no overview
Name: overview, dtype: object

In [11]:
df['overview']= df['overview'].apply(clean_text)
df['overview'].iloc[67:75]

67    bruce gets phone calls from a woman claiming t...
68    a part original movie featuring scorpio: an ag...
69    abel is a ghostwriter. he just finished writin...
70    many years after a deadly terrorist siege in a...
71    filmed version of the stratford festival produ...
72    following a brutal civil war, an interrogation...
73    tunein on friday midnight to watch relaxing ol...
74                                          no overview
Name: overview, dtype: object

In [12]:
df['keywords'] = df['keywords'].apply(normalize_text).apply(clean_brackets)

df['keywords'].tail(5)

df["genres"] = df["genres"].apply(normalize_text).apply(lambda x: x.lower())

df["genres"].head(2)

df['genres']= df['genres'].apply(clean_brackets)
df["keywords"].iloc[87:94]
df['cast']=df['cast'].apply(normalize_text)

df['cast']= df['cast'].apply(clean_brackets)

df['runtime'] = df['runtime'].astype(int)
df['release_year']=df['release_year'].astype(int)

# Check if the keywords at index 87 is empty (length 0)
print(type(df['overview'].iloc[87]))


<class 'str'>


In [13]:
import pycountry
import pandas as pd

def convert_lang(code):
    if pd.isnull(code):
        return code
    
    # pycountry needs uppercase 2-letter codes (e.g., 'EN')
    code_clean = str(code).strip().upper()
    
    try:
        lang = pycountry.languages.get(alpha_2=code_clean)
        return lang.name.lower() if lang else code
    except (AttributeError, LookupError):
        return code

# Apply the helper function
df["language"] = df["language"].apply(convert_lang)

In [14]:
df['language']= df['language'].apply(lambda x: x.lower() if isinstance(x,str) else x)
df['language'].iloc[78]

'english'

In [15]:
def replace_with_keywords(row):
    """
    Takes a row (Series) from the DataFrame.
    If overview is 'no overview', joins keywords into a string.
    """
    # Access columns by name from the row object
    overview = str(row['overview']).lower().strip()
    keywords = row['keywords']
    genres = str(row['genres'])
    
    # Logic: if 'no overview' and keywords exist
    if overview == 'no overview' and len(keywords) > 0:
        # If keywords is a list, join it; if it's already a string, return it
        if isinstance(keywords, str):
            row['extracted_text'] = str(keywords.strip())
        return row
    
    if overview == 'no overview' and len(genres) > 0:
        if isinstance(genres, str):
            row['extracted_text'] = str(genres.strip())
        return row

    row['extracted_text'] = pd.NA
    # Otherwise, keep the original overview
    return row

df = df.apply(replace_with_keywords, axis =1)

In [16]:
mask = (df['extracted_text'].notna())

df.loc[mask, 'extracted_text'].head()

22                          france, society, confinement
74     sports, basketball, national basketball associ...
98                                       comedy, romance
119                                   documentary, music
120    archive footage, movie star, hollywood star, g...
Name: extracted_text, dtype: object

In [17]:
# df = df.apply(replace_with_keywords, axis =1)

In [18]:
# Creates a view of the data where extracted_text has values
filtered_df = df[df['extracted_text'].notna()]
print(filtered_df.count())

movie_id             170
title                170
overview             170
genres               170
cast                 170
keywords             170
runtime              170
release_year         170
language             170
vote_average         170
vote_count           170
combined_features    170
extracted_text       170
dtype: int64


In [19]:
from thefuzz import fuzz, process

def get_unique_fuzzy_keywords(input_str, threshold=70):
    # 1. Clean and split the string into a list
    raw_keywords = [k.strip() for k in input_str.split(',') if k.strip()]
    raw_keywords.sort(key= len, reverse= True)
    
    unique_keywords = []

    for kw in raw_keywords:
        # 2. Check if the keyword is similar to anything already accepted
        # If the list is empty, just add the first word
        if not unique_keywords:
            unique_keywords.append(kw)
            continue
        
        # 3. Find the best match score among already accepted words
        # extractOne returns (best_match, score)
        _, score = process.extractOne(kw, unique_keywords, scorer=fuzz.token_set_ratio)
        
        # 4. If the similarity score is low, it's a "unique" new concept
        if score < threshold:
            unique_keywords.append(kw)
            
    return unique_keywords

# Example Usage
# data = "sports, basketball, national basketball association (nba)"
# result = get_unique_fuzzy_keywords(data)

# print(result) 
# Output: ['sports', 'national basketball association (nba)']

In [20]:
mask = (df['extracted_text'].notna())

df.loc[mask,'extracted_text']= df.loc[mask,'extracted_text'].apply(get_unique_fuzzy_keywords)

In [21]:
df.loc[mask,'extracted_text'].head(5)

22                        [confinement, society, france]
74       [national basketball association (nba), sports]
98                                     [romance, comedy]
119                                 [documentary, music]
120    [biographical documentary, archive footage, li...
Name: extracted_text, dtype: object

In [22]:
def keywords_to_plot(keywords: list):
    if isinstance(keywords,list):
        keyword_text = ','.join(keywords)
        text_to_append = 'a movie that is about: '
        
        final_text = text_to_append + keyword_text
        return final_text
    

df.loc[mask,'extracted_text']= df.loc[mask,'extracted_text'].apply(keywords_to_plot)

df.loc[mask,'extracted_text'].head(5)

    



22     a movie that is about: confinement,society,france
74     a movie that is about: national basketball ass...
98                 a movie that is about: romance,comedy
119             a movie that is about: documentary,music
120    a movie that is about: biographical documentar...
Name: extracted_text, dtype: object

In [23]:
import pandas as pd
import numpy as np

def new_overview(row):
    overview = row['overview']
    extracted_text = row['extracted_text']

    # Use pd.isna() instead of 'is pd.nan'
    if overview == 'no overview':
        if not pd.isna(extracted_text):
            row['new_overview'] = extracted_text
        else:
            row['new_overview'] = 'no overview'
    else:
        # Crucial: define what happens if overview is NOT 'no overview'
        row['new_overview'] = overview

    return row

# Specify axis=1 to process by row
df = df.apply(new_overview, axis=1)

In [24]:
df[df['new_overview']=='no overview'].tail(4)

Unnamed: 0,movie_id,title,overview,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,combined_features,extracted_text,new_overview
4757,1057313,劇場の灯を消すな！サンシャイン劇場編,no overview,,"Shoko Takada, Masato Irie, Makoto Awane",,0,2020,japanese,0.0,0,"[] ['Shoko Takada', 'Masato Irie', 'Makoto Aw...",,no overview
4761,1246226,地球「Earth」20欧阳娜娜生日音乐会,no overview,,Ouyang Nana,,0,2020,chinese,0.0,0,[] ['Ouyang Nana'] [],,no overview
4888,1476822,Official髭男dism one-man tour 2019@日本武道館,no overview,,"Makoto Narazaki, Daisuke Ozasa, Masaki Matsuura",,0,2020,korean,0.0,0,"[] ['Makoto Narazaki', 'Daisuke Ozasa', 'Masa...",,no overview
4955,1012092,浦島さん,no overview,,"Sota Fukushi, Aki Hano, Makoto Awane",,0,2020,japanese,0.0,0,"[] ['Sota Fukushi', 'Aki Hano', 'Makoto Awane...",,no overview


In [25]:
# 1️⃣ Build a strict, correct mask
mask = (
    (df["overview"] == "no overview") &
    (df["extracted_text"].notna()) &
    (df["extracted_text"].str.strip() != "")
)

print(f"Rows eligible for generation: {mask.sum()}")

# 2️⃣ Safety check: exit early if nothing to process
if mask.sum() == 0:
    print("No rows match criteria. Nothing to generate.")



Rows eligible for generation: 170


In [26]:
import pandas as pd

# Set the max column width to None (unlimited)
pd.set_option('display.max_colwidth', None)

In [27]:
df['new_overview'].tail(5)

4995                                                                                                                                                                                                                                                                             after a recent and difficult divorce, alice hasn't seen her children in two months as she awaits a custody verdict. when her son calls her in the middle of the night, alice takes action, abducting the children on an illicit charter trip to the canary islands.
4996    tells the story of three young people living in the after corona and wis corona worlds. when a lost salaried worker, mochizuki yuto, who had lived all over the world, wanted to quit his job, an emergency declaration was issued due to the spread of the new coronavirus infection. the work environment of yuto, such as telework and online meetings, has changed completely. then, one day after the declaration was lifted, the eldest brother, taito and his younge

In [28]:
df.columns

Index(['movie_id', 'title', 'overview', 'genres', 'cast', 'keywords',
       'runtime', 'release_year', 'language', 'vote_average', 'vote_count',
       'combined_features', 'extracted_text', 'new_overview'],
      dtype='object')

In [29]:
columns_to_keep = [
    'movie_id', 'title', 'genres', 'cast', 'keywords', 
    'runtime', 'release_year', 'language', 
    'vote_average', 'vote_count', 'new_overview'
]

new_df = df[columns_to_keep]

In [30]:
new_df.head(2)

Unnamed: 0,movie_id,title,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,new_overview
0,670347,Bone Marrow,drama,"Parinaz Izadyar, Babak Hamidian, Javad Ezzati",,108,2020,persian,6.0,3,"bahar has divorced her husband and is now remarried living a new life. her son from her previous marriage, payam, has leukemia and now she must find her exhusband in order to save her son no matter what it takes."
1,695675,Fox Hunting,action,"Eva Huang Shengyi, Xu Jia, Eric Tsang Chi-Wai",,105,2020,chinese,9.0,1,"fox hunting adapted from wang jianxing's popular novel of the same name, the original work won the ""golden shield literature award"". it is about the kidnapping of the daughter of the chairman of a company based in a certain country abroad, and a huge ransom. the company's security captain and veteran xiao jian, in order to make up for his negligence, safeguard the dignity of the chinese people, assist the country's police officer mo tai through hardships, wipe out the terrorists, and rescue the touching stories of his compatriots."


In [31]:
new_df= new_df.rename(columns={'new_overview':'overview'})

In [32]:
new_df.head(1)

Unnamed: 0,movie_id,title,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,overview
0,670347,Bone Marrow,drama,"Parinaz Izadyar, Babak Hamidian, Javad Ezzati",,108,2020,persian,6.0,3,"bahar has divorced her husband and is now remarried living a new life. her son from her previous marriage, payam, has leukemia and now she must find her exhusband in order to save her son no matter what it takes."


In [33]:


new_df['embedding_text']= new_df['overview']


In [34]:
new_df.iloc[67:74]

Unnamed: 0,movie_id,title,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,overview,embedding_text
67,862246,Mama's Dead and Lives in the Basement,mystery,"Eugene Torres, June Cuthbertson",short film,5,2020,english,0.0,0,bruce gets phone calls from a woman claiming to be his dead mother.,bruce gets phone calls from a woman claiming to be his dead mother.
68,756167,Sitsit,horror,"Ivana Alawi, Jake Cuenca, Sarah Patricia Gill",,60,2020,tagalog,5.7,3,"a part original movie featuring scorpio: an aging photographer seeks the powers of an ancient chinese potion and turns into a hunk of a man to win the love of a sensual young maiden. but choices made might lead to a miserable and tragic picture, and, aswang: a woman with very little means adopted a stray dog who transforms into an aswang to protect her from danger.","a part original movie featuring scorpio: an aging photographer seeks the powers of an ancient chinese potion and turns into a hunk of a man to win the love of a sensual young maiden. but choices made might lead to a miserable and tragic picture, and, aswang: a woman with very little means adopted a stray dog who transforms into an aswang to protect her from danger."
69,899308,Gérard Gérard,"comedy, romance","Pierre Hancisse, Lucie Debay, Grégoire Oestermann",,21,2020,french,6.0,2,"abel is a ghostwriter. he just finished writing charlottes thesis. she is convinced that he is gerard gerard, a successful author of porn novels. abel takes advantage of the misunderstanding to seduce her...","abel is a ghostwriter. he just finished writing charlottes thesis. she is convinced that he is gerard gerard, a successful author of porn novels. abel takes advantage of the misunderstanding to seduce her..."
70,603258,Conference,drama,"Natalya Pavlenkova, Olga Lapshina, Kseniya Zueva","post-traumatic stress disorder (ptsd), sense of guilt, paralysis, based on true story, hostage-taking, grief, terrorism, guilt, terrorist attack, moscow, russia, mother daughter relationship",129,2020,russian,5.8,11,"many years after a deadly terrorist siege in a moscow theatre, survivor natalya returns to the crime scene to hold a memorial evening, finally able to confront her survivor's guilt and her estranged daughter and husband.","many years after a deadly terrorist siege in a moscow theatre, survivor natalya returns to the crime scene to hold a memorial evening, finally able to confront her survivor's guilt and her estranged daughter and husband."
71,791705,Othello,drama,"Michael Blake, Gordon S. Miller, Amelia Sargisson","jealousy, live theatre, filmed theater, shakespeare",116,2020,english,8.0,1,filmed version of the stratford festival production.,filmed version of the stratford festival production.
72,760793,Truth,drama,"Rachel Alig, Eric Paul Erickson, Jannica Olin","civil war, criminal",107,2020,english,3.0,1,"following a brutal civil war, an interrogation of a possible war criminal has a much deeper meaning buried in lies.","following a brutal civil war, an interrogation of a possible war criminal has a much deeper meaning buried in lies."
73,801489,TreeTV,"documentary, tv movie",,,309,2020,xx,6.0,1,tunein on friday midnight to watch relaxing old footage with joe pera on your tv.,tunein on friday midnight to watch relaxing old footage with joe pera on your tv.


In [35]:
new_df.columns

Index(['movie_id', 'title', 'genres', 'cast', 'keywords', 'runtime',
       'release_year', 'language', 'vote_average', 'vote_count', 'overview',
       'embedding_text'],
      dtype='object')

In [36]:
col = new_df["embedding_text"]

print("Total rows:", len(col))
print("NaNs:", col.isna().sum())
print("Non-strings:", sum(not isinstance(x, str) for x in col if pd.notna(x)))


Total rows: 5000
NaNs: 0
Non-strings: 0


In [37]:
# assert new_df["embedding_text"].notna().all()
# assert not new_df["embedding_text"].str.contains(r"\[|\]|'").any()
# assert new_df["runtime"].dtype in ["int64", "float64"]

In [38]:
import os

os.makedirs("../datasets/cleaned", exist_ok=True)
new_df.to_csv("../datasets/cleaned/movies_cleaned.csv", index=False)

In [39]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0]  # moviebot/
sys.path.append(str(PROJECT_ROOT))


In [40]:
from src.data.movie_repository import MovieRepository

repo = MovieRepository()
repo.load()

df = repo.get_all_movies()
df.head()
repo.filter_movies(language=["english"], max_runtime=120).head()


Unnamed: 0,movie_id,title,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,overview,embedding_text
0,360920,The Grinch,"family, comedy, animation","Benedict Cumberbatch, Rashida Jones, Kenan Thompson","holiday, surrealism, remake, based on childrens book, christmas, illumination, dr. seuss",85,2018,english,6.87,4327,the grinch hatches a scheme to ruin christmas when the residents of whoville plan their annual holiday celebration.,the grinch hatches a scheme to ruin christmas when the residents of whoville plan their annual holiday celebration.
2,502682,Book Club,"romance, comedy, drama","Diane Keaton, Jane Fonda, Candice Bergen","new love, judge, widow, female friendship, book club, chef, get together, former lovers, older women",104,2018,english,6.229,818,four lifelong friends decide that their lives could change by becoming nasty and reading fifty shades of grey in their monthly book club to get inspiration on how to handle sexual pleasure at an elderly age.,four lifelong friends decide that their lives could change by becoming nasty and reading fifty shades of grey in their monthly book club to get inspiration on how to handle sexual pleasure at an elderly age.
3,324857,Spider-Man: Into the Spider-Verse,"animation, action, adventure, science fiction","Shameik Moore, Jake Johnson, Hailee Steinfeld","superhero, based on comic, aftercreditsstinger, alternate universe",117,2018,english,8.395,16687,"struggling to find his place in the world while juggling school and family, brooklyn teenager miles morales is unexpectedly bitten by a radioactive spider and develops unfathomable powers just like the one and only spiderman. while wrestling with the implications of his new abilities, miles discovers a super collider created by the madman wilson ""kingpin"" fisk, causing others from across the spiderverse to be inadvertently transported to his dimension.","struggling to find his place in the world while juggling school and family, brooklyn teenager miles morales is unexpectedly bitten by a radioactive spider and develops unfathomable powers just like the one and only spiderman. while wrestling with the implications of his new abilities, miles discovers a super collider created by the madman wilson ""kingpin"" fisk, causing others from across the spiderverse to be inadvertently transported to his dimension."
4,400155,Hotel Transylvania 3: Summer Vacation,"animation, comedy, family, fantasy","Adam Sandler, Andy Samberg, Selena Gomez","monster, vampire, vacation, cruise ship, summer vacation, dracula",97,2018,english,6.9,4616,"dracula, mavis, johnny and the rest of the drac pack take a vacation on a luxury monster cruise ship, where dracula falls in love with the ships captain, ericka, whos secretly a descendant of abraham van helsing, the notorious monster slayer.","dracula, mavis, johnny and the rest of the drac pack take a vacation on a luxury monster cruise ship, where dracula falls in love with the ships captain, ericka, whos secretly a descendant of abraham van helsing, the notorious monster slayer."
5,500919,211,"crime, action, thriller","Nicolas Cage, Sophie Skelton, Michael Rainey Jr.","police, pregnancy, afghanistan, based on true story, interpol, murder, bank robbery, bank heist, father daughter relationship, ride along",86,2018,english,5.312,481,"inspired by one of the longest and bloodiest reallife events in police history, officer mike chandler and a young civilian passenger find themselves underprepared and outgunned when fate puts them squarely in the crosshairs of a daring bank heist in progress by a fearless team of highlytrained and heavilyarmed men.","inspired by one of the longest and bloodiest reallife events in police history, officer mike chandler and a young civilian passenger find themselves underprepared and outgunned when fate puts them squarely in the crosshairs of a daring bank heist in progress by a fearless team of highlytrained and heavilyarmed men."


In [41]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parents[0]
sys.path.append(str(PROJECT_ROOT))


In [42]:
from src.models.embedding_model import EmbeddingModel

model = EmbeddingModel()
vec = model.embed_text("A feel-good animated movie about friendship")

vec.shape
from src.models.embedding_model import EmbeddingModel
from src.index.faiss_index import FaissIndex

texts = [
    "A happy animated movie about friendship",
    "A dark psychological thriller",
    "A romantic drama about relationships"
]

# Embed
model = EmbeddingModel()
vectors = model.embed_texts(texts)

# Build index
index = FaissIndex(dim=vectors.shape[1])
index.build(vectors)

# Query
query = model.embed_text("I want a feel good animation")
scores, indices = index.search(query, top_k=2)

indices, scores


  from .autonotebook import tqdm as notebook_tqdm
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.95it/s]


(array([0, 2], dtype=int64), array([0.5136562, 0.2117837], dtype=float32))

In [44]:
from src.data.movie_repository import MovieRepository
from src.models.embedding_model import EmbeddingModel
from src.index.index_builder import IndexBuilder

repo = MovieRepository()
repo.load()

embedder = EmbeddingModel()

builder = IndexBuilder(
    repository=repo,
    embedding_model=embedder
)

builder.build()


Batches: 100%|██████████| 250/250 [14:21<00:00,  3.45s/it]


AttributeError: 'FaissIndex' object has no attribute 'add'

In [None]:
index, movie_ids= builder.load_index()

movie_ids[:6]

array([670347, 695675, 750809, 618208, 611395, 717634], dtype=int64)

In [None]:
from src.data.movie_repository import MovieRepository
from src.models.embedding_model import EmbeddingModel
from src.index.index_builder import IndexBuilder
from src.recommender.recommendation_engine import RecommendationEngine

# Load data
repo = MovieRepository()
repo.load()

# Load index
builder = IndexBuilder(repo, EmbeddingModel())
faiss_index, mapping = builder.load_index()

# Engine
engine = RecommendationEngine(
    repository=repo,
    embedding_model=EmbeddingModel(),
    faiss_index=faiss_index,
    index_to_movie_id=mapping,
)

user_profile = {
    "query_text": "I want some feminine feel good animation",
    "genres": ["animation", "family"],
    "language": ["english"],
    "runtime": {"max": 180}
    
}

engine.recommend(user_profile)


Unnamed: 0,movie_id,title,genres,cast,keywords,runtime,release_year,language,vote_average,vote_count,overview,embedding_text,similarity_score,final_score
0,1245228,The Wonder,animation,"Leon De Luna, Cynthia Potvin, Jason Ryll",,0,2020,english,0.0,0,an animation short,an animation short,0.420171,0.720171
1,756747,A Creepshow Animated Special,"animation, horror","Joey King, Fayna Sanchez, Kiefer Sutherland",special,46,2020,english,6.2,32,"two animated tales of terror: survivor type, a man determined to stay alive alone on a deserted island no matter what the cost. twittering from the circus of the dead, a teen's family road trip includes a visit to the gravest show on earth.","two animated tales of terror: survivor type, a man determined to stay alive alone on a deserted island no matter what the cost. twittering from the circus of the dead, a teen's family road trip includes a visit to the gravest show on earth.",0.314783,0.645783
2,803835,The Missfits,,,,20,2020,english,0.0,0,"an allgirls robotics team competes to smash stereotypes and prove themselves in a maledominated field, while also trying to get through high school.","an allgirls robotics team competes to smash stereotypes and prove themselves in a maledominated field, while also trying to get through high school.",0.399203,0.399203
3,661826,Body of Truth,documentary,"Marina Abramović, Sigalit Landau, Shirin Neshat",,92,2020,english,6.6,5,"four female artists have been politicized by experiences with war, violence and suppression and integrated them into their work, using their most personal tool: their own bodies.","four female artists have been politicized by experiences with war, violence and suppression and integrated them into their work, using their most personal tool: their own bodies.",0.343396,0.376396
4,722926,Skin: A History of Nudity in the Movies,documentary,"Malcolm McDowell, Shannon Elizabeth, Sean Young","movie business, interview, archive footage, social documentary, old footage, cinema history, old hollywood, hollywood history, film archives, social & cultural documentary, new hollywood, metoo, observational documentary, sex, hayes code, history and legacy, nudity, documentary",131,2020,english,6.519,52,"the definitive documentary on the history of nudity in feature films from the early silent days to the present, studying the changes in morality that led to the use of nudity in films while emphasizing the political, sociological and artistic changes that shaped that history. skin will also study the gender inequality in presenting nude images in motion pictures and will follow the revolution that has created nude gender equality in feature films today.","the definitive documentary on the history of nudity in feature films from the early silent days to the present, studying the changes in morality that led to the use of nudity in films while emphasizing the political, sociological and artistic changes that shaped that history. skin will also study the gender inequality in presenting nude images in motion pictures and will follow the revolution that has created nude gender equality in feature films today.",0.340733,0.373328
5,737741,Gurl,drama,"Jay Tewake, Mika X., Regan Taylor",,20,2020,english,10.0,1,this short film takes place one frantic night as a beautiful young sex worker waits for her salesman lover to whisk her away from her last night on the strip to live happily ever after. but its and shes a mori drag queen. and hes not the white knight he said he was.,this short film takes place one frantic night as a beautiful young sex worker waits for her salesman lover to whisk her away from her last night on the strip to live happily ever after. but its and shes a mori drag queen. and hes not the white knight he said he was.,0.31344,0.36344
6,682758,Tomboy,"documentary, music","Samantha Maloney, Chase Noelle, Bo-Pah",,95,2020,english,0.0,0,"shot over a fiveyear period in a nuanced, cinematic style, tomboy tells the story of four women drummers, making their mark in a stereotypically masculine field. together they span sixty years of popular music celebrated through a trove of previously unseen archival footage, intimate portraiture, insightful interviews and some serious drumming skills. the film captures the intoxicating atmosphere onstage, then goes beyond to chart each artists personal journey, converging in a unique celebration of the female experience that hits hard and hits home","shot over a fiveyear period in a nuanced, cinematic style, tomboy tells the story of four women drummers, making their mark in a stereotypically masculine field. together they span sixty years of popular music celebrated through a trove of previously unseen archival footage, intimate portraiture, insightful interviews and some serious drumming skills. the film captures the intoxicating atmosphere onstage, then goes beyond to chart each artists personal journey, converging in a unique celebration of the female experience that hits hard and hits home",0.36118,0.36118
7,1293879,Fleeting Nirvana,"drama, fantasy","Michael A. Phoenix, Allison Pittel",,12,2020,english,0.0,0,"minute short film inspired by true events, you will be taken into a world of beauty, romance, and deception.","minute short film inspired by true events, you will be taken into a world of beauty, romance, and deception.",0.357318,0.357318
8,743383,Joy Run,,"Andraya Yearwood, Terry Miller, Chase Strangio",,4,2020,english,0.0,0,"joy run, a film by tourmaline, continues the creative reimagining of athletics as a genderinclusive space.","joy run, a film by tourmaline, continues the creative reimagining of athletics as a genderinclusive space.",0.356839,0.356839
9,684977,Single,"romance, comedy","Delaney Feener, Jordan Wiseley, Kaycee Campbell",,15,2020,english,6.0,2,a girl born with one arm gets set up with a guy who has one hand and she is pissed.,a girl born with one arm gets set up with a guy who has one hand and she is pissed.,0.321923,0.351923
