In [1]:
import setup_django
setup_django.init()


In [45]:
import pandas as pd
from django.conf import settings
from ratings.models import Rating
from movies.models import Movie

LINKS_SMALL_CSV = settings.DATA_DIR / 'links_small.csv'
LINKS_SMALL_CSV.exists()

True

In [7]:
qs = Rating.objects.all()
missing_movie_ids = []
for instance in qs:
    if instance.content_object is None:
        missing_movie_ids.append(instance.object_id)

_total = len(missing_movie_ids)
total_missing = list(set(missing_movie_ids))


In [12]:
print(len(total_missing), _total)

6397 57213


In [16]:
links_df = pd.read_csv(LINKS_SMALL_CSV)
links_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [21]:
ms_df = links_df.copy()[links_df.movieId.isin(total_missing)]
ms_df.head()


Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
3,4,114885,31357.0
6,7,114319,11860.0
7,8,112302,45325.0
8,9,114576,9091.0


In [23]:
ms_df.shape[0] == len(total_missing)

True

In [24]:
def enrich_imdb_col(val):
    val = str(val)
    if len(val) == 7:
        val = f"tt{val}"
        return val
    if len(val) == 6:
        val = f"tt0{val}"
        return val
    if len(val) == 5:
        val = f"tt00{val}"
        return val
    return val

In [27]:
ms_df['tt'] = ms_df['imdbId'].apply(enrich_imdb_col)

ms_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt
0,1,114709,862.0,tt0114709
3,4,114885,31357.0,tt0114885
6,7,114319,11860.0,tt0114319
7,8,112302,45325.0,tt0112302
8,9,114576,9091.0,tt0114576


In [29]:
MOVIES_CSV = settings.DATA_DIR / 'movies_metadata.csv'
MOVIES_CSV.exists()

True

In [32]:
movies_cols = ['title','overview','release_date','imdb_id']
movies_df = pd.read_csv(MOVIES_CSV, usecols=movies_cols)
movies_df.head()

Unnamed: 0,imdb_id,overview,release_date,title
0,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story
1,tt0113497,When siblings Judy and Peter discover an encha...,1995-12-15,Jumanji
2,tt0113228,A family wedding reignites the ancient feud be...,1995-12-22,Grumpier Old Men
3,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale
4,tt0113041,Just when George Banks has recovered from his ...,1995-02-10,Father of the Bride Part II


In [35]:
missing_movies_df = ms_df.merge(movies_df, left_on='tt', right_on='imdb_id')

missing_movies_df.head()


Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title
0,1,114709,862.0,tt0114709,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story
1,4,114885,31357.0,tt0114885,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale
2,7,114319,11860.0,tt0114319,tt0114319,An ugly duckling having undergone a remarkable...,1995-12-15,Sabrina
3,8,112302,45325.0,tt0112302,tt0112302,"A mischievous young boy, Tom Sawyer, witnesses...",1995-12-22,Tom and Huck
4,9,114576,9091.0,tt0114576,tt0114576,International action superstar Jean Claude Van...,1995-12-22,Sudden Death


In [40]:
missing_movies_df['id'] = missing_movies_df['movieId']
missing_movies_df['id_alt'] = missing_movies_df['tmdbId'].apply(lambda x: str(int(x)))

In [41]:
missing_movies_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title,id,id_alt
0,1,114709,862.0,tt0114709,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story,1,862
1,4,114885,31357.0,tt0114885,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale,4,31357
2,7,114319,11860.0,tt0114319,tt0114319,An ugly duckling having undergone a remarkable...,1995-12-15,Sabrina,7,11860
3,8,112302,45325.0,tt0112302,tt0112302,"A mischievous young boy, Tom Sawyer, witnesses...",1995-12-22,Tom and Huck,8,45325
4,9,114576,9091.0,tt0114576,tt0114576,International action superstar Jean Claude Van...,1995-12-22,Sudden Death,9,9091


In [52]:
final_df = missing_movies_df.copy()[['id', 'id_alt','title']]
final_df['id_alt'] = final_df['id_alt'].astype(str)

In [43]:
final_df.head()

Unnamed: 0,id,id_alt,title
0,1,862,Toy Story
1,4,31357,Waiting to Exhale
2,7,11860,Sabrina
3,8,45325,Tom and Huck
4,9,9091,Sudden Death


In [44]:
alt_id_list = final_df['id_alt'].to_list()

In [46]:
movies_qs = Movie.objects.filter(id__in=alt_id_list)

In [48]:
movies_qs

<MovieQuerySet [<Movie: Ariel (1988)>, <Movie: Judgment Night (1993)>, <Movie: Finding Nemo (2003)>, <Movie: Forrest Gump (1994)>, <Movie: American Beauty (1999)>, <Movie: Citizen Kane (1941)>, <Movie: Dancer in the Dark (2000)>, <Movie: The Fifth Element (1997)>, <Movie: My Life Without Me (2003)>, <Movie: The Endless Summer (1966)>, <Movie: Pirates of the Caribbean: The Curse of the Black Pearl (2003)>, <Movie: Kill Bill: Vol. 1 (2003)>, <Movie: Jarhead (2005)>, <Movie: Walk on Water (2004)>, <Movie: Apocalypse Now (1979)>, <Movie: Eternal Sunshine of the Spotless Mind (2004)>, <Movie: A History of Violence (2005)>, <Movie: Twelve Monkeys (1995)>, <Movie: Talk to Her (2002)>, <Movie: 8 Mile (2002)>, '...(remaining elements truncated)...']>

In [50]:
movies_qs.count()

6039

In [51]:
from django.forms.models import model_to_dict

In [None]:
movies_qs = Movie.objects.filter(id__in=alt_id_list)
for obj in movies_qs:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]

    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        update_data = data.to_dict('records')[0]
        if obj.title == update_data.get('title'):
            print(og_model_data)
            og_model_data['id'] = update_data['id']
            new_model_data = {**og_model_data}
            print(new_model_data)
            obj.delete()
            Movie.objects.create(**new_model_data)

{'id': 6, 'title': 'Judgment Night', 'overview': 'While racing to a boxing match, Frank, Mike, John and Rey get more than they bargained for. A wrong turn lands them directly in the path of Fallon, a vicious, wise-cracking drug lord. After accidentally witnessing Fallon murder a disloyal henchman, the four become his unwilling prey in a savage game of cat &amp; mouse as they are mercilessly stalked through the urban jungle in this taut suspense drama', 'release_date': datetime.date(1993, 10, 15), 'rating_last_updated': datetime.datetime(2024, 1, 19, 20, 10, 31, 919273, tzinfo=datetime.timezone.utc), 'rating_count': 104, 'rating_avg': Decimal('3.95')}
{'id': 479, 'title': 'Judgment Night', 'overview': 'While racing to a boxing match, Frank, Mike, John and Rey get more than they bargained for. A wrong turn lands them directly in the path of Fallon, a vicious, wise-cracking drug lord. After accidentally witnessing Fallon murder a disloyal henchman, the four become his unwilling prey in a 

In [None]:
from ratings.tasks import task_update_movie_ratings
task_update_movie_ratings()