In [793]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from kneed import KneeLocator

import string
import re
import os

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
lemmatizer = nltk.stem.WordNetLemmatizer()
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sergejromanov/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/sergejromanov/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [794]:
current_dir = os.getcwd()  # gets current working directory
project_dir = os.path.dirname(current_dir)  
directory_path = os.path.dirname(current_dir) + os.sep + 'data' + os.sep

In [795]:
#read the pkl file for movies
df = pd.read_pickle(directory_path + 'full_movies.pkl')
#read the pkl file for shows
df_2 = pd.read_pickle(directory_path + 'full_shows.pkl')

In [796]:
#read json file activity for movies
# activity_movie = pd.read_json('./movie_activities.json')
# activity_shows = pd.read_json('./show_activities.json')
activities = pd.read_json(directory_path + 'activities.json')

In [797]:
activities.head()
activities.groupby('content_id').count().sort_values('activity', ascending=False).head(10)

Unnamed: 0_level_0,activity,user_id,datetime,category
content_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
36,104,104,104,104
347,98,98,98,98
10290,94,94,94,94
189,93,93,93,93
110,93,93,93,93
52,93,93,93,93
76,93,93,93,93
107,93,93,93,93
10050,93,93,93,93
18,92,92,92,92


## TF-IDF + kmeans clustering

In [798]:
stop_words = stopwords.words('english')
def clean_text(x):
    x = x.lower()
    x = x.translate(str.maketrans('', '', string.punctuation))
    # tokenize
    x = nltk.word_tokenize(x)
    x = [token for token in x if token not in stop_words]
    # lemmatization and pass string back
    x = ' '.join([lemmatizer.lemmatize(w) for w in x])
    return x

In [799]:
df['features'] = df['description'].apply(lambda x: clean_text(x))
df_2['features'] = df_2['description'].apply(lambda x: clean_text(x))

In [800]:
#tf-idf and kmeans for all movies
# Convert the text data to a matrix of TF-IDF features
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf = tfidf_vectorizer.fit_transform(df['features'])


inertia = []
#calculate the optimal number of clusters
K = range(1, 100)
for k in K:
    kmeanModel = KMeans(n_clusters=k,random_state=0)
    kmeanModel.fit(tfidf)
    inertia.append(kmeanModel.inertia_)


kn = KneeLocator(K, inertia, curve='convex', direction='decreasing')
print(kn.knee)




82


In [801]:
#tf-idf and kmeans for all tv-shows
# Convert the text data to a matrix of TF-IDF features
tfidf_vectorizer_shows = TfidfVectorizer(stop_words='english')
tfidf_shows = tfidf_vectorizer_shows.fit_transform(df_2['features'])


inertia = []
#calculate the optimal number of clusters
K = range(1, 20)
for k in K:
    kmeanModel = KMeans(n_clusters=k,random_state=0)
    kmeanModel.fit(tfidf_shows)
    inertia.append(kmeanModel.inertia_)


kn = KneeLocator(K, inertia, curve='convex', direction='decreasing')
print(kn.knee)

15


In [802]:
# Cluster the documents using KMeans
num_clusters = 13
km = KMeans(n_clusters=num_clusters)
km.fit(tfidf)

# Assign cluster labels to each document
clusters = km.labels_.tolist()
df['cluster'] = clusters
df.head()

Unnamed: 0,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features,cluster
0,The Way We Were,0,Movies,The Way We Were,,The Way We Were,A love story that begins with the attraction o...,A love story that begins with the attraction o...,"[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zy/ZY933...,[['Director Sydney Pollack']],"[abc1, abc2, drama, classic, film, feature-len...",2021-06-25 07:00:00,PG,6802.0,,,love story begin attraction opposite katie hub...,5
1,Starman,1,Movies,Starman,,Starman,An alien stranded on Earth clones himself into...,An alien stranded on Earth clones himself into...,"[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW301...,[['Director John Carpenter']],"[abc1, abc2, drama, sci-fi, fantasy, romance, ...",2021-06-25 07:00:00,PG,6612.0,,,alien stranded earth clone form young woman de...,7
2,Mr Deeds Goes To Town,2,Movies,Mr Deeds Goes To Town,,Mr Deeds Goes To Town,A small-town poet inherits a vast fortune and ...,A small-town poet inherits a vast fortune and ...,"[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW302...,"[['Cast Gary Cooper', 'Jean Arthur', 'Douglas ...","[abc1, abc2, drama, comedy-drama, film, featur...",2021-06-25 07:00:00,G,6649.0,,,smalltown poet inherits vast fortune find new ...,6
3,On The Road,3,Movies,On The Road,,On The Road,Aspiring writer Sal Paradise has his world roc...,Aspiring writer Sal Paradise has his world roc...,"[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW291...,[['Director Walter Salles']],"[abc1, abc2, usa, drama, adaptation, adventure...",2021-06-01 07:00:00,MA,8014.0,,,aspiring writer sal paradise world rocked foll...,4
4,The Boys Are Back,4,Movies,The Boys Are Back,,The Boys Are Back,"Journalist Joe Warr and his two sons, from dif...","Journalist Joe Warr and his two sons, from dif...","[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW238...,[['Director Scott Hicks']],"[abc1, abc2, drama, comedy-drama, film, single...",2021-06-11 07:00:00,M,5950.0,,,journalist joe warr two son different marriage...,0


In [803]:
# Cluster the documents using KMeans
num_clusters_shows = 8
km_shows = KMeans(n_clusters=num_clusters_shows)
km_shows.fit(tfidf_shows)

# Assign cluster labels to each document
clusters_shows = km_shows.labels_.tolist()
df_2['cluster'] = clusters_shows
df_2.reset_index(inplace=True)
df_2.head()



Unnamed: 0,index,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features,cluster
0,0,10000,199 Little Heroes,Education,The journey to school is a very special type o...,"[Enjo lives in Quinten, a forest glade in the ...",[[]],2021-06-01 06:20:00,G,"[311.0, 311.0, 312.0, 311.0, 311.0, 320.0, 311...","[ABC ME, EDUCATION]","[primary-humanities, education, abc3, refugee-...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW073...,journey school special type life experience sh...,5
1,1,10001,7.30,News,Leigh Sales presents Australia's leading night...,[The devastation caused by the recent floods i...,[[]],2022-03-14 20:00:00,,"[1906.0, 1894.0, 1879.0, 1935.0, 1891.0, 1822....","[ABC TV, ABC NEWS]","[lismore, news, childcare, flooding, abc-faves...",https://cdn.iview.abc.net.au/thumbs/i/nc/NC220...,leigh sale present australia leading nightly p...,4
2,2,10002,7.30 Mark Humphries Satire,News,Satirist Mark Humphries brings his unique pers...,[Satirist Mark Humphries goes inside the Healt...,[[]],2021-05-11 14:00:00,,"[145.0, 110.0, 118.0, 136.0, 202.0, 115.0, 241...",[ABC NEWS],"[mark, parachuting, humphries, candidates, sus...",https://cdn.iview.abc.net.au/thumbs/i/nn/NN211...,satirist mark humphries brings unique perspect...,4
3,3,10003,7.30: The Interviews,News,Revisit some of 7.30's most compelling convers...,[Musician Dave Grohl sits down with 7.30's Lei...,[[]],2022-03-08 20:00:00,,[1030.0],[ABC NEWS],"[news24, news]",https://cdn.iview.abc.net.au/thumbs/i/ns/NS224...,revisit 730s compelling conversation australia...,4
4,4,10004,72 Cutest Animals,Family,The animal kingdom continually astounds us wit...,[Cute is a notion that can mean many things to...,[[]],2022-03-05 15:03:06,G,"[1616.0, 1616.0, 1620.0, 1620.0, 1614.0, 1620....","[ABC ME, FAMILY]","[family-viewing, family-viewing-abcme, abc3]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW243...,animal kingdom continually astounds u showcase...,7


In [804]:
australian = 'Australia|australia|australian|Australian|Aboriginal|Indigenous|First People'
diverse = 'queer|lbgtqi|diversity|inclusion|disability|aboriginal|race|racism|indigenous|equal-opportunit'

In [805]:
expat_library = df[df['tags2'].str.contains(australian) | df['description'].str.contains(australian) | df['description2'].str.contains(australian) | df['title'].str.contains(australian)
]
expat_library_shows= df_2[df_2['tags2'].str.contains(australian) | df_2['description'].str.contains(australian) | df_2['description2'].str.contains(australian) | df_2['title'].str.contains(australian)
]
diversity_library = df[df['tags2'].str.contains(diverse) | df['description'].str.contains(diverse) | df['description2'].str.contains(diverse) | df['title'].str.contains(diverse)]
diversity_library_shows= df_2[df_2['tags2'].str.contains(diverse) | df_2['description'].str.contains(diverse) | df_2['description2'].str.contains(diverse) | df_2['title'].str.contains(diverse)
]

In [806]:
#reset index for df_2
df_2.reset_index(inplace=True)
#drop the column 'index'
df_2.drop(columns=['index'], inplace=True)
df_2.head()

Unnamed: 0,level_0,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features,cluster
0,0,10000,199 Little Heroes,Education,The journey to school is a very special type o...,"[Enjo lives in Quinten, a forest glade in the ...",[[]],2021-06-01 06:20:00,G,"[311.0, 311.0, 312.0, 311.0, 311.0, 320.0, 311...","[ABC ME, EDUCATION]","[primary-humanities, education, abc3, refugee-...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW073...,journey school special type life experience sh...,5
1,1,10001,7.30,News,Leigh Sales presents Australia's leading night...,[The devastation caused by the recent floods i...,[[]],2022-03-14 20:00:00,,"[1906.0, 1894.0, 1879.0, 1935.0, 1891.0, 1822....","[ABC TV, ABC NEWS]","[lismore, news, childcare, flooding, abc-faves...",https://cdn.iview.abc.net.au/thumbs/i/nc/NC220...,leigh sale present australia leading nightly p...,4
2,2,10002,7.30 Mark Humphries Satire,News,Satirist Mark Humphries brings his unique pers...,[Satirist Mark Humphries goes inside the Healt...,[[]],2021-05-11 14:00:00,,"[145.0, 110.0, 118.0, 136.0, 202.0, 115.0, 241...",[ABC NEWS],"[mark, parachuting, humphries, candidates, sus...",https://cdn.iview.abc.net.au/thumbs/i/nn/NN211...,satirist mark humphries brings unique perspect...,4
3,3,10003,7.30: The Interviews,News,Revisit some of 7.30's most compelling convers...,[Musician Dave Grohl sits down with 7.30's Lei...,[[]],2022-03-08 20:00:00,,[1030.0],[ABC NEWS],"[news24, news]",https://cdn.iview.abc.net.au/thumbs/i/ns/NS224...,revisit 730s compelling conversation australia...,4
4,4,10004,72 Cutest Animals,Family,The animal kingdom continually astounds us wit...,[Cute is a notion that can mean many things to...,[[]],2022-03-05 15:03:06,G,"[1616.0, 1616.0, 1620.0, 1620.0, 1614.0, 1620....","[ABC ME, FAMILY]","[family-viewing, family-viewing-abcme, abc3]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW243...,animal kingdom continually astounds u showcase...,7


In [807]:
# Print the top 10 terms for each cluster
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
terms = tfidf_vectorizer.get_feature_names_out()
for i in range(num_clusters):
    print("Cluster %d:" % i, end='')
    for ind in order_centroids[i, :13]:
        print(' %s' % terms[ind], end='')
    print()



Cluster 0: attenborough david night sir animal magical world history reveals journey london ancient join
Cluster 1: book based search 2011 tale donaldson world tell father animated julia story adventure
Cluster 2: school play special australia episode emma country join people celebrating high indigenous story
Cluster 3: art sydney australia english opera house life stage story drama harbour creates history
Cluster 4: world new life hand girl wash year song lunar culture stay gang murder
Cluster 5: man love family stick life story war child 2012 wwi unlikely heartwarming bond
Cluster 6: stage geography city science 45 eruption film mumbai step problem asks program impact
Cluster 7: young woman home way make beach alien story stranded australian attempt company come
Cluster 8: ballet australian performed orchestra music victoria production story present love opera australia tale
Cluster 9: making attenboroughs series david behindthescenes tv went documentary look life natural technology 

In [808]:
# Print the top 10 terms for each cluster
order_centroids_shows = km_shows.cluster_centers_.argsort()[:, ::-1]
terms_shows = tfidf_vectorizer_shows.get_feature_names_out()
for i in range(num_clusters_shows):
    print("Cluster %d:" % i, end='')
    for ind in order_centroids_shows[i, :13]:
        print(' %s' % terms_shows[ind], end='')
    print()

Cluster 0: world help navigate friend wonder learns cadet need news learn live way life
Cluster 1: adventure friend little family battle epic new doctor life team story wiggle creature
Cluster 2: life series new explores world david australia australian attenborough art change planet artist
Cluster 3: news abc reporting story brings issue today original exclusive presented matter live investigation
Cluster 4: australia year big political return team story life people crime question week home
Cluster 5: stage english series music fun early history child young people world story solve
Cluster 6: science australian math teacher teach skill stage lesson fundamental experiment tenminute family like
Cluster 7: kid book animal big world word school love program meet australian based friend


In [809]:
joined_activity = pd.merge(df, activities, left_on='id', right_on='content_id', how='right')
#make from list in tag2 column to string and delete [] in the string
joined_activity['tags2'] = joined_activity['tags2'].apply(lambda x: str(x).replace('[', '').replace(']', ''))
#drop NaN values in id column
joined_activity.dropna(subset=['id'], inplace=True)
#drop columns that are not needed
joined_activity.drop(['series', 'episode_name', 'description2', 'publication_date', 'rating', 'season', 'episode', 'episode_title', 'datetime', 'duration_sec', 'tags'], axis=1, inplace=True)
joined_activity.head()

Unnamed: 0,id,category_x,title,description,image,directors_actors,tags2,features,cluster,content_id,activity,user_id,category_y
2,179.0,Family,The Penguin King With David Attenborough,"The Penguin King's story is often comical, som...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW259...,[['Host Sir David Attenborough']],"'abc2', 'uk', 'docs', 'factual', 'animals', 'n...",penguin king story often comical sometimes tra...,10.0,179,Like,10,movie
4,266.0,Documentary,Women Of Steel,Outraged at sexual harassment in the work plac...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW292...,[[]],"'abc2', 'australia', 'docs', 'factual', 'histo...",outraged sexual harassment work place denied w...,7.0,266,Dislike,10,movie
7,303.0,Comedy,Kylie Minogue Golden: Live in Concert,Filmed at various UK venues over the course of...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW314...,[['Cast Kylie Minogue']],"'abc2', 'arts', 'australia', 'music', 'perform...",filmed various uk venue course kylie minogues ...,3.0,303,Play,15,movie
9,114.0,Movies,You're Next,When the Davison family comes under attack dur...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW286...,[['Director Adam Wingard']],"'abc1', 'abc2', 'drama', 'horror', 'thriller',...",davison family come attack wedding anniversary...,1.0,114,Play,14,movie
13,60.0,Movies,Adoration,Two lifelong best friends each begin a steamy ...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW237...,[['Director Anne Fontaine']],"'abc1', 'abc2', 'drama', 'sydney', 'romance', ...",two lifelong best friend begin steamy affair o...,12.0,60,Play,17,movie


In [810]:
joined_activity_shows = pd.merge(df_2, activities, left_on='id', right_on='content_id', how='right')
#make from list in tag2 column to string and delete [] in the string
joined_activity_shows['tags2'] = joined_activity_shows['tags2'].apply(lambda x: str(x).replace('[', '').replace(']', ''))
#drop NaN values in id column
joined_activity_shows.dropna(subset=['id'], inplace=True)
#drop columns that are not needed
joined_activity_shows.drop([ 'description2', 'publication_date', 'rating', 'datetime', 'duration_sec', 'tags'], axis=1, inplace=True)
joined_activity_shows.head()

Unnamed: 0,level_0,id,title,category_x,description,directors_actors,tags2,image,features,cluster,content_id,activity,user_id,category_y
0,212.0,10212.0,Growing Up In The Early 1900s,Education,What was life like for working-class children ...,[[]],"'primary-humanities', 'education', 'abc3'",https://cdn.iview.abc.net.au/thumbs/i/ed/ED200...,life like workingclass child australian city e...,6.0,10212,Like,13,show
1,145.0,10145.0,Dementia & Us,Documentary,This two-part series follows four people with ...,[['Host Dreane Williams']],"'real-life', 'science', 'informative', 'factua...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW325...,twopart series follows four people dementia co...,2.0,10145,Like,10,show
3,230.0,10230.0,Horrible Histories,Family,A Horribly Historical look at everything from ...,"['Emily Lloyd-Saini'], 'Richard David-Caine', ...","'family-viewing-abcme', '3science-week', 'fami...",https://cdn.iview.abc.net.au/thumbs/i/zw/publi...,horribly historical look everything ancient ol...,4.0,10230,Like,11,show
5,139.0,10139.0,David Attenborough's Kingdom Of Plants,Family,David Attenborough explores a whole new dimens...,[['Host Sir David Attenborough']],"'natural-world', 'wild-docs', 'family-viewing'...",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX915...,david attenborough explores whole new dimensio...,2.0,10139,Like,15,show
6,373.0,10373.0,"One, Two, Threebies!",Kids,"Join Boxy, Tri and Roh as they dance and bounc...",[[]],"'abc4kids', 'primary-maths', 'education', 'abc3'",https://cdn.iview.abc.net.au/thumbs/i/ch/CH193...,join boxy tri roh dance bounce around discover...,2.0,10373,Like,12,show


In [811]:
#select rows where activity is 'Like'
expat_data_likes = joined_activity[joined_activity['activity'] == 'Like']
expat_data_likes.sort_values(by=['user_id'])

Unnamed: 0,id,category_x,title,description,image,directors_actors,tags2,features,cluster,content_id,activity,user_id,category_y
49380,228.0,Documentary,Joanna Lumley And The Human Swan,This inspiring film sees Joanna Lumley follow ...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW324...,[['Host Joanna Lumley']],"'abc1', 'uk', 'factual', 'docs', 'travel', 'ad...",inspiring film see joanna lumley follow advent...,0.0,228,Like,0,movie
63104,32.0,Movies,Wild Target,Longing to get out of the assassination busine...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW285...,[['Director Jonathan Lynn']],"'abc1', 'abc2', 'drama', 'comedy-drama', 'crim...",longing get assassination business hitman deci...,3.0,32,Like,0,movie
42497,272.0,Documentary,Catching A Killer: A Knock At The Door,The life of 64-year-old Hang Yin Leung and her...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW274...,[[]],"'abc1', 'uk', 'docs', 'factual', 'true-crime',...",life 64yearold hang yin leung hardworking fami...,0.0,272,Like,0,movie
42494,98.0,Movies,Three Kingdoms,"Zhao Zilong, a common man heeds the call of du...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW286...,[['Director Daniel Lee']],"'abc1', 'abc2', 'action', 'drama', 'film', 'pe...",zhao zilong common man heed call duty humblest...,5.0,98,Like,0,movie
54907,10.0,Movies,The Lost Aviator,"Set in the Golden Age of Aviation, Andrew Lanc...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW277...,[['Director Andrew Lancaster']],"'abc1', 'abc2', 'docs', 'film', 'feature-lengt...",set golden age aviation andrew lancaster follo...,10.0,10,Like,0,movie
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64067,296.0,Comedy,Shed,"An enslaved girl, Mary, endures daily torture ...",https://cdn.iview.abc.net.au/thumbs/i/ip/IP180...,[['Director Chantelle Murray']],"'abc1', 'arts', 'australia', 'drama', 'regiona...",enslaved girl mary endures daily torture world...,4.0,296,Like,30,movie
38549,302.0,Comedy,China's Artful Dissident (Simplified Chinese ...,"Follows a Chinese political artist in exile, o...",https://cdn.iview.abc.net.au/thumbs/i/iv/IV191...,[[]],"'abc1', 'docs', 'arts', 'abcarts', 'artists', ...",follows chinese political artist exile persona...,3.0,302,Like,30,movie
63418,145.0,Family,Armadillo: Narrated By David Attenborough,In the heart of Brazil lives an animal so elus...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW311...,[['Host Sir David Attenborough']],"'abc1', 'uk', 'docs', 'factual', 'nature', 'wi...",heart brazil life animal elusive ever seen wil...,0.0,145,Like,30,movie
38717,250.0,Documentary,China's Artful Dissident,"Follows a Chinese political artist in exile, o...",https://cdn.iview.abc.net.au/thumbs/i/ac/AC181...,[[]],"'abc2', 'docs', 'arts', 'abcarts', 'artists', ...",follows chinese political artist exile persona...,3.0,250,Like,30,movie


In [812]:
#select rows where activity is 'Like'
expat_data_shows_likes = joined_activity_shows[joined_activity_shows['activity'] == 'Like']
expat_data_shows_likes.sort_values(by=['user_id'])

Unnamed: 0,level_0,id,title,category_x,description,directors_actors,tags2,image,features,cluster,content_id,activity,user_id,category_y
46063,502.0,10502.0,The Five,Drama,"Jesse, a five-year-old boy, goes missing near ...","['Hannah Arterton', ['Cast O-T Fagbenle', 'Lee...","'crime-drama', 'edge-of-your-seat', 'mini-seri...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW303...,jesse fiveyearold boy go missing near home twe...,4.0,10502,Like,0,show
53314,401.0,10401.0,Play School Story Time,Kids,Get comfy on the Story Time couch as Australia...,[[]],"'4aussie', '4upper', '4liveaction', '4lower', ...",https://cdn.iview.abc.net.au/thumbs/i/ck/CK190...,get comfy story time couch australian celebrit...,7.0,10401,Like,0,show
46568,286.0,10286.0,Lest We Forget What? Education Shorts,Education,A young woman investigates Australia's remembr...,[[]],"'all-episodes', 'education', 'abc3', 'history'...",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX996...,young woman investigates australia remembrance...,5.0,10286,Like,0,show
53404,374.0,10374.0,Operation Buffalo,Drama,"In Cold War South Australia, at a remote Briti...","['Jessica De Gouw', ['Cast Ewen Leslie', 'Shak...","'compelling', 'thriller', 'ns:be-entertained',...",https://cdn.iview.abc.net.au/thumbs/i/dr/DR182...,cold war south australia remote british nuclea...,4.0,10374,Like,0,show
42898,280.0,10280.0,Lah-Lah's Stripy Sock Club: Songs,Kids,Join Lah-Lah and her adorable band of quirky a...,[[]],"'4song', 'abc4kids'",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW184...,join lahlah adorable band quirky fun loving mu...,5.0,10280,Like,0,show
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36439,377.0,10377.0,Operation Ouch!: Do Try This At Home,Family,The Docs are back in Xand's kitchen lab! Every...,"[['Host Chris Van Tulleken', 'Xand Van Tulleke...","'science', 'family-viewing-abcme', 'learn', 'e...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW246...,doc back xands kitchen lab every episode chris...,4.0,10377,Like,30,show
24732,283.0,10283.0,Laura's Choice,Documentary,"Laura Henkel is eccentric, outspoken, feisty a...",[[]],"'real-life', 'ns:be-informed', 'a:older-sink-d...",https://cdn.iview.abc.net.au/thumbs/i/dc/DC191...,laura henkel eccentric outspoken feisty 90 yea...,4.0,10283,Like,30,show
31147,253.0,10253.0,Jack Irish Specials,Drama,Jack Irish is a man getting his life back toge...,"[['Cast Guy Pearce', 'Roy Billing', 'Aaron Ped...","'a:older-sink-dink', 'crime-drama', 'adaptatio...",https://cdn.iview.abc.net.au/thumbs/i/dr/DR112...,jack irish man getting life back together form...,2.0,10253,Like,30,show
36603,83.0,10083.0,Big Block SingSong,Kids,A series of musical boxes: one loves his moust...,[[]],"'4quirky', '4sing', '4lower', '4short-sweet', ...",https://cdn.iview.abc.net.au/thumbs/i/zw/publi...,series musical box one love moustache one want...,5.0,10083,Like,30,show


In [813]:
#top-3 clusters for each user in expat_data_likes
top3 = expat_data_likes.groupby('user_id')['cluster'].value_counts().groupby(level=0).nlargest(3).reset_index(level=1, drop=True)
#make from series to dataframe
top3 = top3.to_frame()
#drop the 3rd column
top3 = top3.drop(columns=['cluster'])
#make user_id column as index
top3 = top3.reset_index()
top3

Unnamed: 0,user_id,cluster
0,0,3.0
1,0,1.0
2,0,12.0
3,1,2.0
4,1,3.0
...,...,...
88,29,1.0
89,29,0.0
90,30,3.0
91,30,2.0


In [814]:
expat_data_shows_likes.sort_values(by=['user_id'])
#top-3 clusters for each user in expat_data_likes
top3_show = expat_data_shows_likes.groupby('user_id')['cluster'].value_counts().groupby(level=0).nlargest(3).reset_index(level=1,
                                                                                                              drop=True)
#make from series to dataframe
top3_show = top3_show.to_frame()
#drop the 3rd column
top3_show = top3_show.drop(columns=['cluster'])
#make user_id column as index
top3_show = top3_show.reset_index()
top3_show

Unnamed: 0,user_id,cluster
0,0,4.0
1,0,2.0
2,0,5.0
3,1,4.0
4,1,2.0
...,...,...
88,29,2.0
89,29,5.0
90,30,4.0
91,30,2.0


In [815]:
#join top 3 clusters for each user with expat_library
recommended= pd.merge(top3, expat_library, on=['cluster'], how='left')
recommended

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features
0,0,3.0,Murray Whelan: The Brush Off,65.0,Movies,Murray Whelan: The Brush Off,,Murray Whelan: The Brush Off,Political adviser to Australia's Minister of t...,Political adviser to Australia's Minister of t...,"[ABC TV, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW281...,[['Director Sam Neill']],"[abc1, australia, drama, crime, crime-drama, 2...",2021-06-25 07:00:00,M,5436.0,,,political adviser australia minister art murra...
1,0,3.0,Designing A Legacy,223.0,Documentary,Designing A Legacy,,Designing A Legacy,"Comedian, architecture enthusiast and design n...","Comedian, architecture enthusiast and design n...","[ABC TV, LIFESTYLE, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/fa/FA190...,[['Host Tim Ross']],"[abc1, australia, lifestyle, home, creative, a...",2021-02-02 21:03:51,M,3422.0,,,comedian architecture enthusiast design nerd t...
2,0,3.0,Fighting Spirit: Wheeling Diggers' Invictus Ga...,251.0,Documentary,Fighting Spirit: Wheeling Diggers' Invictus Ga...,,Fighting Spirit: Wheeling Diggers' Invictus Ga...,This deeply moving documentary goes behind-the...,This deeply moving documentary goes behind-the...,"[ABC TV, SPORT, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/do/DO171...,[[]],"[abc1, aussie, sport, docs, factual, inspirati...",2018-10-16 21:28:00,M,3442.0,,,deeply moving documentary go behindthescenes i...
3,0,3.0,The Exhibitionists,252.0,Documentary,The Exhibitionists,,The Exhibitionists,Four friends dare to get locked in the Nationa...,Four friends dare to get locked in the Nationa...,"[ABC TV, ABC TV Plus, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/ac/AC210...,[[]],"[abc1, abc2, docs, factual, abcarts, art, cult...",2022-03-08 20:30:57,M,3452.0,,,four friend dare get locked national gallery d...
4,0,3.0,The Art of Remembrance,271.0,Documentary,The Art of Remembrance,,The Art of Remembrance,Nate Byrne explores the role of paintings and ...,Nate Byrne explores the role of paintings and ...,"[ABC TV, DOCUMENTARY, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF200...,[['Host Nate Byrne']],"[abc1, docs, factual, australia, history, hist...",2021-04-25 18:28:14,PG,1594.0,,,nate byrne explores role painting sketch repre...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,30,2.0,Christmas In Australia With Christine Anu,269.0,Documentary,Christmas In Australia With Christine Anu,,Christmas In Australia With Christine Anu,Celebrate Christmas with Christine Anu as she ...,Celebrate Christmas with Christine Anu as she ...,"[ABC TV, LIFESTYLE, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/rn/RN201...,[['Host Christine Anu']],"[abc1, lifestyle, docs, family, australia, rel...",2021-12-21 20:33:38,G,3139.0,,,celebrate christmas christine anu explores aus...
770,30,2.0,NAIDOC Week Musical Celebration,306.0,Comedy,NAIDOC Week Musical Celebration,,NAIDOC Week Musical Celebration,ABC Melbourne presents a special performance t...,ABC Melbourne presents a special performance t...,"[ABC TV Plus, ABC ARTS, ARTS & CULTURE, INDIGE...",https://cdn.iview.abc.net.au/thumbs/i/iv/IV201...,"[['Host Jacinta Parsons', 'Shelley Ware']]","[abc2, abcarts, arts, music, performance, conc...",2021-07-04 07:00:00,G,2821.0,,,abc melbourne present special performance cele...
771,30,2.0,Mark Seymour And The Undertow: Slow Dawn Live,315.0,Comedy,Mark Seymour And The Undertow: Slow Dawn Live,,Mark Seymour And The Undertow: Slow Dawn Live,Hunters & Collectors' frontman Mark Seymour pr...,Hunters & Collectors' frontman Mark Seymour pr...,"[ABC TV Plus, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW312...,[[]],"[abc2, arts, australia, music, performance, po...",2021-11-05 20:31:33,PG,2960.0,,,hunter collector frontman mark seymour present...
772,30,2.0,Ochres,326.0,Comedy,Ochres,,Ochres,"Told in five parts, Ochres was inspired by the...","Told in five parts, Ochres was inspired by the...","[ABC ARTS, ARTS & CULTURE, INDIGENOUS]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW229...,[[]],"[abcarts, arts, dance, bangarra, first-nations...",2021-06-18 07:00:00,G,3417.0,,,told five part ochre inspired special role och...


In [816]:
#shows
recommended_shows= pd.merge(top3_show, expat_library_shows, on=['cluster'], how='left')
recommended_shows

Unnamed: 0,user_id,cluster,index,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features
0,0,4.0,51,10051,Australia Debates,Panel Discussion,The funniest minds in Australia debate the fin...,"[Tom Cashman, Demi Lardner and Nat Damena argu...",[['Host Nikki Britton']],2021-06-28 21:33:30,M,"[2579.0, 2605.0, 2492.0]","[ABC TV Plus, PANEL & DISCUSSION]","[ns:be-informed, public-affairs, current-affai...",https://cdn.iview.abc.net.au/thumbs/i/fr/FR201...,funniest mind australia debate finding three i...
1,0,4.0,54,10054,Australia Remastered: Nature's Great Divide,Family,The hard line that separates the natural world...,[The narrow strait between the Indonesian isla...,[['Host Aaron Pedersen']],2021-12-07 21:27:05,G,"[3240.0, 3240.0, 3200.0]","[ABC TV, DOCUMENTARY, SCIENCE, FAMILY]","[ocean, informative, natural-world, animals, f...",https://cdn.iview.abc.net.au/thumbs/i/do/DO195...,hard line separate natural world asia australi...
2,0,4.0,60,10060,Australian Of The Year Profiles 2022,Comedy,Celebrate the achievements of our 2022 Austral...,"[A materials scientist, engineer and inventor ...",[[]],2022-01-05 17:00:00,G,"[197.0, 210.0, 198.0, 209.0, 181.0, 196.0, 210...","[ABC TV, DOCUMENTARY]","[diversity, australian-capital-territory, tasm...",https://cdn.iview.abc.net.au/thumbs/i/rk/RK210...,celebrate achievement 2022 australian year you...
3,0,4.0,144,10144,Deep Dive Into Australia's Ocean Odyssey,Education,"Focussing on marine life, interconnectedness o...",[Emily Jateff from Australian National Maritim...,[[]],2020-08-17 11:56:21,PG,"[1206.0, 1090.0, 1271.0]","[ABC ME, EDUCATION]","[secondary-maths, education, abc3]",https://cdn.iview.abc.net.au/thumbs/i/ck/CK203...,focussing marine life interconnectedness land ...
4,0,4.0,195,10195,Gardening Australia - My Garden Path,Comedy,Gardening Australia is hosted by Costa Georgia...,[We meet a horticulturalist and indoor plant c...,[[]],2021-02-12 07:00:00,G,"[372.0, 366.0, 509.0, 407.0, 385.0, 467.0, 401...","[ABC TV, LIFESTYLE, REGIONAL AUSTRALIA]","[diversity, melbourne, australian-capital-terr...",https://cdn.iview.abc.net.au/thumbs/i/rk/RK190...,gardening australia hosted costa georgiadis sc...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,30,2.0,58,10058,Australia's Ocean Odyssey: A Journey Down The ...,Family,A landmark documentary series that takes a spe...,[Journey down the East Australian Current all ...,[['Narrator Marta Dusseldorp']],2020-06-09 21:33:25,G,"[3549.0, 360.0, 3515.0, 3446.0]","[ABC TV, ABC ME, DOCUMENTARY, SCIENCE, FAMILY]","[ocean, informative, natural-world, animals, s...",https://cdn.iview.abc.net.au/thumbs/i/do/DO180...,landmark documentary series take spectacular j...
462,30,2.0,61,10061,Australian Story,News,"Putting the 'real' back into reality TV, the a...",[A daring sea rescue led by the Australian Nav...,[[]],2021-08-23 20:31:30,,"[1856.0, 1920.0, 1837.0, 1782.0, 2070.0, 1693....","[ABC TV, ABC NEWS, DOCUMENTARY]","[natural-world, politics, bob-hawke, sport, or...",https://cdn.iview.abc.net.au/thumbs/i/nc/NC210...,putting real back reality tv awardwinning seri...
463,30,2.0,408,10408,Prepping Australia,Documentary,Meet real Australian Preppers bunkering down f...,[After the 2020 supermarket shortages left her...,[[]],2021-11-16 22:30:00,G,"[601.0, 909.0, 934.0]","[ABC TV Plus, DOCUMENTARY]","[real-life, factual, docuseries, quirky, unbel...",https://cdn.iview.abc.net.au/thumbs/i/do/DO191...,meet real australian preppers bunkering uncert...
464,30,2.0,485,10485,The Art Of Australia,Documentary,The Art of Australia explores how art and arti...,[The story of how art helped European settlers...,[['Host Edmund Capon']],2021-06-18 07:00:00,G,"[3390.0, 3423.0, 3402.0]","[ABC TV, ABC ARTS, ARTS & CULTURE, DOCUMENTARY]","[informative, a:older-sink-dink, ns:be-inspire...",https://cdn.iview.abc.net.au/thumbs/i/ac/AC123...,art australia explores art artist helped shape...


In [817]:
recommended_div= pd.merge(top3, diversity_library, on=['cluster'], how='left')
recommended_shows_div= pd.merge(top3_show, diversity_library_shows, on=['cluster'], how='left')
recommended_div

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features
0,0,3.0,Australia Day Live,313.0,Comedy,Australia Day Live 2022,,Australia Day Live 2022,A concert and fireworks spectacular from Sydne...,A concert and fireworks spectacular from Sydne...,"[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rv/RV210...,"[['Host Jeremy Fernandez', 'Casey Donovan', 'J...","[abc1, aussie, australia, event, concert, musi...",2022-01-26 21:31:00,PG,7871.0,,,concert firework spectacular sydney featuring ...
1,0,1.0,,,,,,,,,,,,,,,,,,
2,0,12.0,,,,,,,,,,,,,,,,,,
3,1,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...
4,1,2.0,Ochres,326.0,Comedy,Ochres,,Ochres,"Told in five parts, Ochres was inspired by the...","Told in five parts, Ochres was inspired by the...","[ABC ARTS, ARTS & CULTURE, INDIGENOUS]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW229...,[[]],"[abcarts, arts, dance, bangarra, first-nations...",2021-06-18 07:00:00,G,3417.0,,,told five part ochre inspired special role och...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,30,3.0,Australia Day Live,313.0,Comedy,Australia Day Live 2022,,Australia Day Live 2022,A concert and fireworks spectacular from Sydne...,A concert and fireworks spectacular from Sydne...,"[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rv/RV210...,"[['Host Jeremy Fernandez', 'Casey Donovan', 'J...","[abc1, aussie, australia, event, concert, musi...",2022-01-26 21:31:00,PG,7871.0,,,concert firework spectacular sydney featuring ...
127,30,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...
128,30,2.0,Ochres,326.0,Comedy,Ochres,,Ochres,"Told in five parts, Ochres was inspired by the...","Told in five parts, Ochres was inspired by the...","[ABC ARTS, ARTS & CULTURE, INDIGENOUS]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW229...,[[]],"[abcarts, arts, dance, bangarra, first-nations...",2021-06-18 07:00:00,G,3417.0,,,told five part ochre inspired special role och...
129,30,4.0,What's For Dinner? Lunar New Year,154.0,Family,What's For Dinner? Lunar New Year,,What's For Dinner? Lunar New Year,Welcome to Penny's place for Lunar New Year! I...,Welcome to Penny's place for Lunar New Year! I...,"[ABC ME, EDUCATION, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/ck/CK204...,[[]],"[abc3, childrens, primary-humanities, educatio...",2021-02-05 06:00:00,G,783.0,,,welcome penny place lunar new year start race ...


In [818]:
#drop intersections of recommended and activity by user_id and content_id
recommended = recommended[~recommended[['user_id', 'id']].apply(frozenset, axis=1).isin(activities[['user_id', 'content_id']].apply(frozenset, axis=1))]
recommended

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features
38,1,3.0,Opera on Sydney Harbour: Carmen,322.0,Comedy,Opera on Sydney Harbour: Carmen,,Opera on Sydney Harbour: Carmen,"Opera Australia performs Bizet's Carmen, again...","Opera Australia performs Bizet's Carmen, again...","[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX954...,[['Director Cameron Kirkpatrick']],"[abc1, abcarts, arts, opera, performance, love...",2021-06-18 07:00:00,PG,8467.0,,,opera australia performs bizet carmen backdrop...
61,2,2.0,Dubboo: Life Of A Songman,267.0,Documentary,Dubboo: Life Of A Songman,,Dubboo: Life Of A Songman,Bangarra Dance Theatre joins forces with some ...,Bangarra Dance Theatre joins forces with some ...,"[ABC TV Plus, ABC ARTS, INDIGENOUS, ARTS & CUL...",https://cdn.iview.abc.net.au/thumbs/i/ac/AC194...,"[['Cast Archie Roach', 'Ursula Yovich', 'Djaka...","[abc2, indigenous, abcarts, arts, docs, factua...",2021-07-07 22:00:55,G,3379.0,,,bangarra dance theatre join force celebrated i...
74,3,3.0,Fighting Spirit: Wheeling Diggers' Invictus Ga...,251.0,Documentary,Fighting Spirit: Wheeling Diggers' Invictus Ga...,,Fighting Spirit: Wheeling Diggers' Invictus Ga...,This deeply moving documentary goes behind-the...,This deeply moving documentary goes behind-the...,"[ABC TV, SPORT, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/do/DO171...,[[]],"[abc1, aussie, sport, docs, factual, inspirati...",2018-10-16 21:28:00,M,3442.0,,,deeply moving documentary go behindthescenes i...
75,3,3.0,The Exhibitionists,252.0,Documentary,The Exhibitionists,,The Exhibitionists,Four friends dare to get locked in the Nationa...,Four friends dare to get locked in the Nationa...,"[ABC TV, ABC TV Plus, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/ac/AC210...,[[]],"[abc1, abc2, docs, factual, abcarts, art, cult...",2022-03-08 20:30:57,M,3452.0,,,four friend dare get locked national gallery d...
81,3,3.0,"Tell Your Story, Change Your World",331.0,Education,"Tell Your Story, Change Your World",,"Tell Your Story, Change Your World","Only you can tell your own story, but getting ...","Only you can tell your own story, but getting ...","[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/ck/CK189...,[[]],"[abc3, education, secondary-english]",2018-10-19 10:42:31,PG,875.0,,,tell story getting started isnt always easy fo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
734,28,1.0,Deadly Family Portraits: Electric Mimili,321.0,Comedy,Deadly Family Portraits: Electric Mimili,,Deadly Family Portraits: Electric Mimili,Robert Fielding and Zaachariaha Fielding tell ...,Robert Fielding and Zaachariaha Fielding tell ...,"[ABC TV, ABC ARTS, DOCUMENTARY, ARTS & CULTURE...",https://cdn.iview.abc.net.au/thumbs/i/ac/AC183...,[[]],"[abc1, abcarts, indigenious, docs, arts, cultu...",2019-08-09 07:00:00,G,555.0,,,robert fielding zaachariaha fielding tell stor...
750,30,3.0,Fighting Spirit: Wheeling Diggers' Invictus Ga...,251.0,Documentary,Fighting Spirit: Wheeling Diggers' Invictus Ga...,,Fighting Spirit: Wheeling Diggers' Invictus Ga...,This deeply moving documentary goes behind-the...,This deeply moving documentary goes behind-the...,"[ABC TV, SPORT, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/do/DO171...,[[]],"[abc1, aussie, sport, docs, factual, inspirati...",2018-10-16 21:28:00,M,3442.0,,,deeply moving documentary go behindthescenes i...
765,30,2.0,Brazen Hussies,235.0,Documentary,Brazen Hussies,,Brazen Hussies,A revealing documentary celebrating the legacy...,A revealing documentary celebrating the legacy...,"[ABC TV, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/do/DO190...,[['Director Catherine Dwyer']],"[abc1, featured, docs, australia, factual, his...",2022-03-08 01:04:06,M,5318.0,,,revealing documentary celebrating legacy bold ...
769,30,2.0,Christmas In Australia With Christine Anu,269.0,Documentary,Christmas In Australia With Christine Anu,,Christmas In Australia With Christine Anu,Celebrate Christmas with Christine Anu as she ...,Celebrate Christmas with Christine Anu as she ...,"[ABC TV, LIFESTYLE, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/rn/RN201...,[['Host Christine Anu']],"[abc1, lifestyle, docs, family, australia, rel...",2021-12-21 20:33:38,G,3139.0,,,celebrate christmas christine anu explores aus...


In [819]:
recommended_shows = recommended_shows[~recommended_shows[['user_id', 'id']].apply(frozenset, axis=1).isin(activities[['user_id', 'content_id']].apply(frozenset, axis=1))]
recommended_shows

Unnamed: 0,user_id,cluster,index,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features
16,1,4.0,54,10054,Australia Remastered: Nature's Great Divide,Family,The hard line that separates the natural world...,[The narrow strait between the Indonesian isla...,[['Host Aaron Pedersen']],2021-12-07 21:27:05,G,"[3240.0, 3240.0, 3200.0]","[ABC TV, DOCUMENTARY, SCIENCE, FAMILY]","[ocean, informative, natural-world, animals, f...",https://cdn.iview.abc.net.au/thumbs/i/do/DO195...,hard line separate natural world asia australi...
59,3,5.0,59,10059,Australia's Prime Ministers,Education,"This series profiles Australia's leaders, feat...","[This series profiles Australia's leaders, fea...",[[]],2022-02-28 09:00:00,G,"[183.0, 119.0, 186.0, 180.0, 199.0, 227.0, 157...","[ABC ME, EDUCATION, DOCUMENTARY]","[primary-humanities, education, abc3, gough-wh...",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX642...,series profile australia leader featuring care...
63,4,4.0,144,10144,Deep Dive Into Australia's Ocean Odyssey,Education,"Focussing on marine life, interconnectedness o...",[Emily Jateff from Australian National Maritim...,[[]],2020-08-17 11:56:21,PG,"[1206.0, 1090.0, 1271.0]","[ABC ME, EDUCATION]","[secondary-maths, education, abc3]",https://cdn.iview.abc.net.au/thumbs/i/ck/CK203...,focussing marine life interconnectedness land ...
67,4,2.0,52,10052,Australia Remastered: Australia's Oceans,Family,Explore the incredible diversity of ocean wild...,[The Southern Ocean is the only ocean that str...,[['Host Aaron Pedersen']],2020-10-25 19:03:00,G,"[3230.0, 3195.0, 3185.0]","[ABC TV, ABC ME, DOCUMENTARY, SCIENCE, FAMILY]","[ns:be-informed, natural-world, wildlife, a:ol...",https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,explore incredible diversity ocean wildlife en...
68,4,2.0,53,10053,Australia Remastered: Forces of Nature,Family,"Hosted and narrated by Aaron Pedersen, this se...",[Cyclones burst upon Australia's tropical coas...,[['Host Aaron Pedersen']],2022-01-30 18:04:10,G,"[3230.0, 3260.0, 3195.0, 3240.0]","[ABC TV, DOCUMENTARY, REGIONAL AUSTRALIA, FAMILY]","[informative, natural-world, animals, docuseri...",https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,hosted narrated aaron pedersen series explores...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,28,4.0,51,10051,Australia Debates,Panel Discussion,The funniest minds in Australia debate the fin...,"[Tom Cashman, Demi Lardner and Nat Damena argu...",[['Host Nikki Britton']],2021-06-28 21:33:30,M,"[2579.0, 2605.0, 2492.0]","[ABC TV Plus, PANEL & DISCUSSION]","[ns:be-informed, public-affairs, current-affai...",https://cdn.iview.abc.net.au/thumbs/i/fr/FR201...,funniest mind australia debate finding three i...
441,29,4.0,210,10210,Griff's Great Australian Rail Trip,Documentary,British comedian and travel enthusiast Griff R...,[Journeying from Brisbane to Cairns aboard the...,[['Host Griff Rhys Jones']],2021-06-30 07:00:00,G,"[2829.0, 2785.0, 2831.0, 2768.0, 2821.0, 2832.0]","[ABC TV, LIFESTYLE, DOCUMENTARY]","[factual, travel, lifestyle, abc1, uk, austral...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW184...,british comedian travel enthusiast griff rhys ...
453,30,4.0,60,10060,Australian Of The Year Profiles 2022,Comedy,Celebrate the achievements of our 2022 Austral...,"[A materials scientist, engineer and inventor ...",[[]],2022-01-05 17:00:00,G,"[197.0, 210.0, 198.0, 209.0, 181.0, 196.0, 210...","[ABC TV, DOCUMENTARY]","[diversity, australian-capital-territory, tasm...",https://cdn.iview.abc.net.au/thumbs/i/rk/RK210...,celebrate achievement 2022 australian year you...
456,30,4.0,210,10210,Griff's Great Australian Rail Trip,Documentary,British comedian and travel enthusiast Griff R...,[Journeying from Brisbane to Cairns aboard the...,[['Host Griff Rhys Jones']],2021-06-30 07:00:00,G,"[2829.0, 2785.0, 2831.0, 2768.0, 2821.0, 2832.0]","[ABC TV, LIFESTYLE, DOCUMENTARY]","[factual, travel, lifestyle, abc1, uk, austral...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW184...,british comedian travel enthusiast griff rhys ...


In [820]:
recommended_shows.groupby('cluster')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x2a9788250>

In [821]:
recommended_div = recommended_div[~recommended_div[['user_id', 'id']].apply(frozenset, axis=1).isin(activities[['user_id', 'content_id']].apply(frozenset, axis=1))]
recommended_div

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features
1,0,1.0,,,,,,,,,,,,,,,,,,
2,0,12.0,,,,,,,,,,,,,,,,,,
11,2,7.0,,,,,,,,,,,,,,,,,,
17,4,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...
25,5,1.0,,,,,,,,,,,,,,,,,,
26,6,3.0,Australia Day Live,313.0,Comedy,Australia Day Live 2022,,Australia Day Live 2022,A concert and fireworks spectacular from Sydne...,A concert and fireworks spectacular from Sydne...,"[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rv/RV210...,"[['Host Jeremy Fernandez', 'Casey Donovan', 'J...","[abc1, aussie, australia, event, concert, musi...",2022-01-26 21:31:00,PG,7871.0,,,concert firework spectacular sydney featuring ...
29,6,6.0,,,,,,,,,,,,,,,,,,
31,7,1.0,,,,,,,,,,,,,,,,,,
32,7,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...
40,9,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...


In [822]:
recommended_shows_div = recommended_shows_div[~recommended_shows_div[['user_id', 'id']].apply(frozenset, axis=1).isin(activities[['user_id', 'content_id']].apply(frozenset, axis=1))]
recommended_shows_div

Unnamed: 0,user_id,cluster,index,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features
0,0,4.0,,,,,,,,,,,,,,
1,0,2.0,,,,,,,,,,,,,,
2,0,5.0,,,,,,,,,,,,,,
3,1,4.0,,,,,,,,,,,,,,
4,1,2.0,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,29,2.0,,,,,,,,,,,,,,
89,29,5.0,,,,,,,,,,,,,,
90,30,4.0,,,,,,,,,,,,,,
91,30,2.0,,,,,,,,,,,,,,


In [823]:
#choose 2 rows per each user_id and each cluster
recommended_films = recommended.groupby(['user_id', 'cluster']).head(2)
recommended_films

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,tags,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features
38,1,3.0,Opera on Sydney Harbour: Carmen,322.0,Comedy,Opera on Sydney Harbour: Carmen,,Opera on Sydney Harbour: Carmen,"Opera Australia performs Bizet's Carmen, again...","Opera Australia performs Bizet's Carmen, again...","[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX954...,[['Director Cameron Kirkpatrick']],"[abc1, abcarts, arts, opera, performance, love...",2021-06-18 07:00:00,PG,8467.0,,,opera australia performs bizet carmen backdrop...
61,2,2.0,Dubboo: Life Of A Songman,267.0,Documentary,Dubboo: Life Of A Songman,,Dubboo: Life Of A Songman,Bangarra Dance Theatre joins forces with some ...,Bangarra Dance Theatre joins forces with some ...,"[ABC TV Plus, ABC ARTS, INDIGENOUS, ARTS & CUL...",https://cdn.iview.abc.net.au/thumbs/i/ac/AC194...,"[['Cast Archie Roach', 'Ursula Yovich', 'Djaka...","[abc2, indigenous, abcarts, arts, docs, factua...",2021-07-07 22:00:55,G,3379.0,,,bangarra dance theatre join force celebrated i...
74,3,3.0,Fighting Spirit: Wheeling Diggers' Invictus Ga...,251.0,Documentary,Fighting Spirit: Wheeling Diggers' Invictus Ga...,,Fighting Spirit: Wheeling Diggers' Invictus Ga...,This deeply moving documentary goes behind-the...,This deeply moving documentary goes behind-the...,"[ABC TV, SPORT, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/do/DO171...,[[]],"[abc1, aussie, sport, docs, factual, inspirati...",2018-10-16 21:28:00,M,3442.0,,,deeply moving documentary go behindthescenes i...
75,3,3.0,The Exhibitionists,252.0,Documentary,The Exhibitionists,,The Exhibitionists,Four friends dare to get locked in the Nationa...,Four friends dare to get locked in the Nationa...,"[ABC TV, ABC TV Plus, DOCUMENTARY]",https://cdn.iview.abc.net.au/thumbs/i/ac/AC210...,[[]],"[abc1, abc2, docs, factual, abcarts, art, cult...",2022-03-08 20:30:57,M,3452.0,,,four friend dare get locked national gallery d...
83,3,2.0,Stargazing: Moon and Beyond,167.0,Family,Stargazing: Moon and Beyond,,Stargazing: Moon and Beyond,Stargazing celebrates the 50th anniversary of ...,Stargazing celebrates the 50th anniversary of ...,"[ABC TV, SCIENCE, DOCUMENTARY, FAMILY, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/sc/SC180...,"[['Host Julia Zemiro', 'Brian Cox']]","[abc1, science, docs, space, family-viewing, e...",2019-07-16 21:01:50,PG,3599.0,,,stargazing celebrates 50th anniversary moon la...
98,4,2.0,Stargazing: Moon and Beyond,167.0,Family,Stargazing: Moon and Beyond,,Stargazing: Moon and Beyond,Stargazing celebrates the 50th anniversary of ...,Stargazing celebrates the 50th anniversary of ...,"[ABC TV, SCIENCE, DOCUMENTARY, FAMILY, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/sc/SC180...,"[['Host Julia Zemiro', 'Brian Cox']]","[abc1, science, docs, space, family-viewing, e...",2019-07-16 21:01:50,PG,3599.0,,,stargazing celebrates 50th anniversary moon la...
104,4,2.0,Capturing Cricket: Steve Waugh In India,232.0,Documentary,Capturing Cricket: Steve Waugh In India,,Capturing Cricket: Steve Waugh In India,"Steve Waugh, the mastermind behind Australia's...","Steve Waugh, the mastermind behind Australia's...","[ABC TV, DOCUMENTARY, SPORT]",https://cdn.iview.abc.net.au/thumbs/i/rf/RF192...,[['Host Steve Waugh']],"[abc1, australia, docs, factual, sport, divers...",2020-11-17 21:30:47,PG,3407.0,,,steve waugh mastermind behind australia golden...
142,5,2.0,Cracking COVID,242.0,Documentary,Cracking COVID,,Cracking COVID,Tracking the real-time story of Australia's sc...,Tracking the real-time story of Australia's sc...,"[ABC TV, DOCUMENTARY, SCIENCE]",https://cdn.iview.abc.net.au/thumbs/i/do/DO200...,[['Director Sonya Pemberton']],"[abc1, aussie, factual, docs, science, health,...",2021-07-13 20:29:59,PG,4496.0,,,tracking realtime story australia scientific r...
157,6,3.0,Australia Day Live,313.0,Comedy,Australia Day Live 2022,,Australia Day Live 2022,A concert and fireworks spectacular from Sydne...,A concert and fireworks spectacular from Sydne...,"[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rv/RV210...,"[['Host Jeremy Fernandez', 'Casey Donovan', 'J...","[abc1, aussie, australia, event, concert, musi...",2022-01-26 21:31:00,PG,7871.0,,,concert firework spectacular sydney featuring ...
164,6,2.0,Ready Together,215.0,Documentary,Ready Together,,Ready Together,Craig Reucassel presents the stories of two co...,Craig Reucassel presents the stories of two co...,"[ABC TV, DOCUMENTARY, LIFESTYLE, SCIENCE]",https://cdn.iview.abc.net.au/thumbs/i/do/DO202...,[['Host Craig Reucassel']],"[abc1, docs, lifestyle, factual, nature, weath...",2021-03-28 18:28:08,PG,1500.0,,,craig reucassel present story two community de...


In [824]:
recommended_shows = recommended_shows.groupby(['user_id', 'cluster']).head(2)
recommended_shows

Unnamed: 0,user_id,cluster,index,id,title,category,description,description2,directors_actors,publication_date,rating,duration_sec,tags,tags2,image,features
16,1,4.0,54,10054,Australia Remastered: Nature's Great Divide,Family,The hard line that separates the natural world...,[The narrow strait between the Indonesian isla...,[['Host Aaron Pedersen']],2021-12-07 21:27:05,G,"[3240.0, 3240.0, 3200.0]","[ABC TV, DOCUMENTARY, SCIENCE, FAMILY]","[ocean, informative, natural-world, animals, f...",https://cdn.iview.abc.net.au/thumbs/i/do/DO195...,hard line separate natural world asia australi...
59,3,5.0,59,10059,Australia's Prime Ministers,Education,"This series profiles Australia's leaders, feat...","[This series profiles Australia's leaders, fea...",[[]],2022-02-28 09:00:00,G,"[183.0, 119.0, 186.0, 180.0, 199.0, 227.0, 157...","[ABC ME, EDUCATION, DOCUMENTARY]","[primary-humanities, education, abc3, gough-wh...",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX642...,series profile australia leader featuring care...
63,4,4.0,144,10144,Deep Dive Into Australia's Ocean Odyssey,Education,"Focussing on marine life, interconnectedness o...",[Emily Jateff from Australian National Maritim...,[[]],2020-08-17 11:56:21,PG,"[1206.0, 1090.0, 1271.0]","[ABC ME, EDUCATION]","[secondary-maths, education, abc3]",https://cdn.iview.abc.net.au/thumbs/i/ck/CK203...,focussing marine life interconnectedness land ...
67,4,2.0,52,10052,Australia Remastered: Australia's Oceans,Family,Explore the incredible diversity of ocean wild...,[The Southern Ocean is the only ocean that str...,[['Host Aaron Pedersen']],2020-10-25 19:03:00,G,"[3230.0, 3195.0, 3185.0]","[ABC TV, ABC ME, DOCUMENTARY, SCIENCE, FAMILY]","[ns:be-informed, natural-world, wildlife, a:ol...",https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,explore incredible diversity ocean wildlife en...
68,4,2.0,53,10053,Australia Remastered: Forces of Nature,Family,"Hosted and narrated by Aaron Pedersen, this se...",[Cyclones burst upon Australia's tropical coas...,[['Host Aaron Pedersen']],2022-01-30 18:04:10,G,"[3230.0, 3260.0, 3195.0, 3240.0]","[ABC TV, DOCUMENTARY, REGIONAL AUSTRALIA, FAMILY]","[informative, natural-world, animals, docuseri...",https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,hosted narrated aaron pedersen series explores...
78,5,4.0,51,10051,Australia Debates,Panel Discussion,The funniest minds in Australia debate the fin...,"[Tom Cashman, Demi Lardner and Nat Damena argu...",[['Host Nikki Britton']],2021-06-28 21:33:30,M,"[2579.0, 2605.0, 2492.0]","[ABC TV Plus, PANEL & DISCUSSION]","[ns:be-informed, public-affairs, current-affai...",https://cdn.iview.abc.net.au/thumbs/i/fr/FR201...,funniest mind australia debate finding three i...
79,5,4.0,54,10054,Australia Remastered: Nature's Great Divide,Family,The hard line that separates the natural world...,[The narrow strait between the Indonesian isla...,[['Host Aaron Pedersen']],2021-12-07 21:27:05,G,"[3240.0, 3240.0, 3200.0]","[ABC TV, DOCUMENTARY, SCIENCE, FAMILY]","[ocean, informative, natural-world, animals, f...",https://cdn.iview.abc.net.au/thumbs/i/do/DO195...,hard line separate natural world asia australi...
92,5,5.0,59,10059,Australia's Prime Ministers,Education,"This series profiles Australia's leaders, feat...","[This series profiles Australia's leaders, fea...",[[]],2022-02-28 09:00:00,G,"[183.0, 119.0, 186.0, 180.0, 199.0, 227.0, 157...","[ABC ME, EDUCATION, DOCUMENTARY]","[primary-humanities, education, abc3, gough-wh...",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX642...,series profile australia leader featuring care...
101,6,2.0,53,10053,Australia Remastered: Forces of Nature,Family,"Hosted and narrated by Aaron Pedersen, this se...",[Cyclones burst upon Australia's tropical coas...,[['Host Aaron Pedersen']],2022-01-30 18:04:10,G,"[3230.0, 3260.0, 3195.0, 3240.0]","[ABC TV, DOCUMENTARY, REGIONAL AUSTRALIA, FAMILY]","[informative, natural-world, animals, docuseri...",https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,hosted narrated aaron pedersen series explores...
104,6,2.0,61,10061,Australian Story,News,"Putting the 'real' back into reality TV, the a...",[A daring sea rescue led by the Australian Nav...,[[]],2021-08-23 20:31:30,,"[1856.0, 1920.0, 1837.0, 1782.0, 2070.0, 1693....","[ABC TV, ABC NEWS, DOCUMENTARY]","[natural-world, politics, bob-hawke, sport, or...",https://cdn.iview.abc.net.au/thumbs/i/nc/NC210...,putting real back reality tv awardwinning seri...


In [825]:
recommended_films_div = recommended_div.groupby(['user_id', 'cluster']).head(2)
recommended_films_div.user_id.value_counts()

30    3
9     2
29    2
27    2
23    2
12    2
0     2
7     2
6     2
10    1
21    1
4     1
28    1
5     1
26    1
25    1
22    1
20    1
11    1
19    1
17    1
2     1
15    1
14    1
13    1
16    1
Name: user_id, dtype: int64

In [826]:
recommended_shows_div = recommended_shows_div.groupby(['user_id', 'cluster']).head(2)
recommended_shows_div.user_id.value_counts()

0     3
16    3
29    3
28    3
27    3
26    3
25    3
24    3
23    3
22    3
21    3
20    3
19    3
18    3
17    3
15    3
1     3
14    3
13    3
12    3
11    3
10    3
9     3
8     3
7     3
6     3
5     3
4     3
3     3
2     3
30    3
Name: user_id, dtype: int64

In [831]:
#drop na in id column and mix recommended_films and recommended_shows
# recommended_films.dropna(subset=['id'], inplace=True)
# recommended_shows.dropna(subset=['id'], inplace=True)
#join recommended_films and recommended_shows
recommended_mix_austalia = pd.concat([recommended_films, recommended_shows])
#choose the random 6 rows per each user_id
recommended_mix_austalia = recommended_mix_austalia.groupby('user_id').apply(lambda x: x.sample(4, replace=False))
recommended_mix_austalia.reset_index(drop=True, inplace=True)


ValueError: Cannot take a larger sample than population when 'replace=False'

In [None]:
recommended_mix_austalia.dropna(subset=['id'], inplace=True)
recommended_mix_austalia

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,...,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features,index
0,0,6.0,Stargazing: Moon and Beyond,167.0,Family,Stargazing: Moon and Beyond,,Stargazing: Moon and Beyond,Stargazing celebrates the 50th anniversary of ...,Stargazing celebrates the 50th anniversary of ...,...,https://cdn.iview.abc.net.au/thumbs/i/sc/SC180...,"[['Host Julia Zemiro', 'Brian Cox']]","[abc1, science, docs, space, family-viewing, e...",2019-07-16 21:01:50,PG,3599.0,,,stargazing celebrates 50th anniversary moon la...,
1,0,4.0,,10058.0,Family,Australia's Ocean Odyssey: A Journey Down The ...,,,A landmark documentary series that takes a spe...,[Journey down the East Australian Current all ...,...,https://cdn.iview.abc.net.au/thumbs/i/do/DO180...,[['Narrator Marta Dusseldorp']],"[ocean, informative, natural-world, animals, s...",2020-06-09 21:33:25,G,"[3549.0, 360.0, 3515.0, 3446.0]",,,landmark documentary series take spectacular j...,58.0
2,1,2.0,,10055.0,Family,Australia Remastered: Wild Australians,,,Explore some of Australia's most iconic and fa...,"[Echidnas and platypuses are unique, the only ...",...,https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,[['Host Aaron Pedersen']],"[ns:be-informed, natural-world, wildlife, a:ol...",2020-08-30 18:57:00,G,"[3310.0, 3330.0, 3285.0, 3185.0, 3230.0, 3294.0]",,,explore australia iconic fascinating animal my...,55.0
3,1,6.0,,10052.0,Family,Australia Remastered: Australia's Oceans,,,Explore the incredible diversity of ocean wild...,[The Southern Ocean is the only ocean that str...,...,https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,[['Host Aaron Pedersen']],"[ns:be-informed, natural-world, wildlife, a:ol...",2020-10-25 19:03:00,G,"[3230.0, 3195.0, 3185.0]",,,explore incredible diversity ocean wildlife en...,52.0
4,2,7.0,,10051.0,Panel Discussion,Australia Debates,,,The funniest minds in Australia debate the fin...,"[Tom Cashman, Demi Lardner and Nat Damena argu...",...,https://cdn.iview.abc.net.au/thumbs/i/fr/FR201...,[['Host Nikki Britton']],"[ns:be-informed, public-affairs, current-affai...",2021-06-28 21:33:30,M,"[2579.0, 2605.0, 2492.0]",,,funniest mind australia debate finding three i...,51.0
5,2,2.0,,10055.0,Family,Australia Remastered: Wild Australians,,,Explore some of Australia's most iconic and fa...,"[Echidnas and platypuses are unique, the only ...",...,https://cdn.iview.abc.net.au/thumbs/i/do/DO184...,[['Host Aaron Pedersen']],"[ns:be-informed, natural-world, wildlife, a:ol...",2020-08-30 18:57:00,G,"[3310.0, 3330.0, 3285.0, 3185.0, 3230.0, 3294.0]",,,explore australia iconic fascinating animal my...,55.0
6,3,4.0,Fighting Spirit: Wheeling Diggers' Invictus Ga...,251.0,Documentary,Fighting Spirit: Wheeling Diggers' Invictus Ga...,,Fighting Spirit: Wheeling Diggers' Invictus Ga...,This deeply moving documentary goes behind-the...,This deeply moving documentary goes behind-the...,...,https://cdn.iview.abc.net.au/thumbs/i/do/DO171...,[[]],"[abc1, aussie, sport, docs, factual, inspirati...",2018-10-16 21:28:00,M,3442.0,,,deeply moving documentary go behindthescenes i...,
7,3,4.0,A Few Best Men,23.0,Movies,A Few Best Men,,A Few Best Men,When English lad David announces he is getting...,When English lad David announces he is getting...,...,https://cdn.iview.abc.net.au/thumbs/i/zw/ZW285...,[['Director Stephen Elliott']],"[abc1, abc2, comedy, love, relationships, roma...",2021-06-01 07:00:00,MA,5550.0,,,english lad david announces getting married mi...,
8,4,4.0,,10290.0,Kids,Little J And Big Cuz Indigenous Languages,,,Little J and Big Cuz are Indigenous Australian...,[Nanna is unwell so Little J and Big Cuz eager...,...,https://cdn.iview.abc.net.au/thumbs/i/ck/CK194...,[[]],"[littlejin, abc4kids, education]",2021-07-31 06:00:00,G,"[760.0, 778.0, 706.0, 704.0, 720.0, 760.0, 727...",,,little j big cuz indigenous australian kid liv...,290.0
9,4,5.0,Can We Save The Reef?,152.0,Family,Can We Save The Reef?,,Can We Save The Reef?,An epic story of Australian and international ...,An epic story of Australian and international ...,...,https://cdn.iview.abc.net.au/thumbs/i/sc/SC160...,[[]],"[abc1, docs, nature, science, science-week, cl...",2018-05-12 14:58:55,G,3428.0,,,epic story australian international scientist ...,


In [828]:
# recommended_films_div.dropna(subset=['id'], inplace=True)
# recommended_shows_div.dropna(subset=['id'], inplace=True)
recommended_mix_div = pd.concat([recommended_films_div, recommended_shows_div])
#choose the random 6 rows per each user_id
recommended_mix_div = recommended_mix_div.groupby('user_id').apply(lambda x: x.sample(6, replace=False))
recommended_mix_div.reset_index(drop=True, inplace=True)

ValueError: Cannot take a larger sample than population when 'replace=False'

In [None]:
recommended_mix_div.dropna( inplace=True)

In [835]:
recommended_mix_div.user_id.value_counts().sort_index()

0     5
1     3
2     4
3     3
4     4
5     4
6     5
7     5
8     3
9     5
10    4
11    4
12    5
13    4
14    4
15    4
16    4
17    4
18    3
19    4
20    4
21    4
22    4
23    5
24    3
25    4
26    4
27    5
28    4
29    5
30    6
Name: user_id, dtype: int64

In [None]:
#recommended_mix_austalia.to_csv(directory_path + 'RECOMMENDED_mix_austalia.csv', index=False)
recommended_mix_div.to_csv(directory_path + 'RECOMMENDED_mix_div.csv', index=False)

In [832]:
recommended_mix_austalia.user_id.value_counts().sort_index()
recommended_mix_austalia.user_id.value_counts().sort_index()

1     2
2     1
3     4
4     5
5     4
6     6
7     3
8     5
9     3
10    8
11    4
12    2
13    3
14    6
15    3
16    5
17    5
18    4
19    3
21    3
22    7
23    5
24    5
25    2
26    5
27    4
28    2
29    1
30    6
Name: user_id, dtype: int64

In [834]:
df_9 = recommended_mix_austalia[recommended_mix_austalia.user_id 
    == 6  ].to_csv(directory_path + 'RECOMMENDED_FOR_Peter_austalia.csv')

df_24 = recommended_mix_austalia[recommended_mix_austalia.user_id 
    == 14 ].to_csv(directory_path + 'RECOMMENDED_FOR_Anouk_austalia.csv')  

df_19 = recommended_mix_austalia[recommended_mix_austalia.user_id 
    == 22 ].to_csv(directory_path + 'RECOMMENDED_FOR_Bob_austalia.csv')  

In [None]:
recommended_mix_austalia[recommended_mix_austalia.user_id == 9]

Unnamed: 0,user_id,cluster,episode_title,id,category,title,series,episode_name,description,description2,...,image,directors_actors,tags2,publication_date,rating,duration_sec,season,episode,features,index
18,9,0.0,,10163.0,Documentary,Employable Me Australia,,,The Rose d'Or nominated series follows six ind...,"[This uplifting, warm, and insightful series s...",...,https://cdn.iview.abc.net.au/thumbs/i/dc/DC182...,[[]],"[real-life, endearing, informative, disability...",2019-04-09 21:32:30,PG,"[239.0, 168.0, 3566.0, 3579.0, 235.0, 233.0, 3...",,,rose dor nominated series follows six individu...,163.0
19,9,9.0,The Art of Remembrance,271.0,Documentary,The Art of Remembrance,,The Art of Remembrance,Nate Byrne explores the role of paintings and ...,Nate Byrne explores the role of paintings and ...,...,https://cdn.iview.abc.net.au/thumbs/i/rf/RF200...,[['Host Nate Byrne']],"[abc1, docs, factual, australia, history, hist...",2021-04-25 18:28:14,PG,1594.0,,,nate byrne explores role painting sketch repre...,
