# Importation des modules

In [1]:
import pandas as pd
import numpy
import numpy as np
import glob
from collections import defaultdict
from operator import itemgetter
from itertools import chain

## Dictionnaire Film : Identifiant 

In [100]:
dico_title = {}

with open("../Data/Titles.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_title[content[1]] = content[0]

## Liste des fichiers csv contenant l'historique des films vu de quelques utilisateurs

In [95]:
files = sorted(glob.glob('../Historiques_FilmsVu_Netflix/Historique_user*.csv'))
files

['../Historiques_FilmsVu_Netflix\\Historique_user1.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user2.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user3.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user4.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user5.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user6.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user7.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user8.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user9.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_10.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_11.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_12.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_13.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_14.csv',
 '../Historiques_FilmsVu_Netflix\\Historique_user_15.csv']

## Compte le nombre de fois qu'un film a été vu

In [5]:
def traitement_fct(filename):
    historique = pd.read_csv(filename, sep = ',')
    historique = historique['Title']
    
    historique_pd = []
    N=len(historique)

    for film in range(N):
        nom_film = historique[film]
        result = nom_film.split(": ")
        historique_pd.append(result[0].strip())

    historique_pd = np.unique(historique_pd)
    result = [(i,1) for i in historique_pd]
    return result


def partitioner(mapped_values):
    """ get lists from mapper and create a dict with
    (word,[1,1,1])"""
    
    res = defaultdict(list)
    for w, c in mapped_values:
        res[w].append(c)
        
    return res.items()


def reducer( item ):
    """ Compute words occurences from dict computed
    by partioner
    """
    w, v = item
    return (w,len(v))

In [21]:
mapped_values = map(traitement_fct, files)
partioned_values = partitioner(chain(*mapped_values))
occurences = map(reducer, partioned_values)
most_commons = sorted(occurences, key=itemgetter(1), reverse=True)
most_commons[:30]

[('La casa de papel', 12),
 ('Black Mirror', 9),
 ('Sex Education', 9),
 ('Stranger Things', 9),
 ('Dark', 7),
 ('Élite', 7),
 ('Plan Cœur', 7),
 ('Suits', 7),
 ('13 Reasons Why', 6),
 ('Murder', 6),
 ('The End of the F***ing World', 6),
 ('Lucifer', 5),
 ('MINDHUNTER', 5),
 ('Outlander', 5),
 ('Peaky Blinders', 5),
 ('The Crown', 5),
 ('The Last Kingdom', 5),
 ('The OA', 5),
 ('The Rain', 5),
 ('Umbrella Academy', 5),
 ('Family Business', 5),
 ('You', 5),
 ('Breaking Bad', 5),
 ('Unbelievable', 5),
 ('Bird Box', 4),
 ('Huge en France', 4),
 ('Les voyageurs du temps', 4),
 ('Nightflyers', 4),
 ('Polar', 4),
 ('Zoe', 4)]

## Création dictionnaire Film : Identifiant des k films les plus vus
Les k films les plus vus par les utilisateurs dont nous disposons de leur historique

In [17]:
def id_film(liste, k):
    dico_film_id = {}
    count = 1
    for film in liste:
        if count>k:
            break
        else:
            if film[0] in dico_title:
                dico_film_id[film[0]] = dico_title[film[0]]
                count+=1
            #else:
            #   dico_film_id[film] = ''
    return dico_film_id

In [18]:
id_film(most_commons, 10)

{'La casa de papel': '/title/80192098/',
 'Black Mirror': '/title/70264888/',
 'Sex Education': '/title/80197526/',
 'Stranger Things': '/title/80057281/',
 'Dark': '/title/80100172/',
 'Élite': '/title/80200942/',
 'Plan Cœur': '/title/80190086/',
 '13 Reasons Why': '/title/80117470/',
 'Murder': '/title/80024057/',
 'The End of the F***ing World': '/title/80175722/',
 'Lucifer': '/title/80057918/'}

## Création dictionnaire Film : Identifiant des films étant vu au moins 5 fois

In [24]:
def id_film2(liste, k):
    dico_film_id = {}
    for film in liste:
        if film[1]<k:
            break
        else:
            if film[0] in dico_title:
                dico_film_id[film[0]] = dico_title[film[0]]
            #else:
            #   dico_film_id[film] = ''
    return dico_film_id

In [32]:
id_film2 = id_film2(most_commons, 5)
id_film2

{'La casa de papel': '/title/80192098/',
 'Black Mirror': '/title/70264888/',
 'Sex Education': '/title/80197526/',
 'Stranger Things': '/title/80057281/',
 'Dark': '/title/80100172/',
 'Élite': '/title/80200942/',
 'Plan Cœur': '/title/80190086/',
 '13 Reasons Why': '/title/80117470/',
 'Murder': '/title/80024057/',
 'The End of the F***ing World': '/title/80175722/',
 'Lucifer': '/title/80057918/',
 'MINDHUNTER': '/title/80114855/',
 'Outlander': '/title/70285581/',
 'Peaky Blinders': '/title/80002479/',
 'The Crown': '/title/80025678/',
 'The Last Kingdom': '/title/80074249/',
 'The OA': '/title/80044950/',
 'The Rain': '/title/80154610/',
 'Umbrella Academy': '/title/80186863/',
 'Family Business': '/title/81010818/',
 'You': '/title/80211991/',
 'Breaking Bad': '/title/70143836/',
 'Unbelievable': '/title/80153467/'}

## Matrice Users-Films 
Cette matrice indique si oui ou non l'utilisateur a vu le film (1 : il a vu le film, 0 : il n'a pas vu le film)

In [96]:
id_user = []
for i in range(1, len(files)+1) :
    id_user.append(str('user'+str(i)))
    
liste_film = []
for film in id_film2.keys() :
    liste_film.append(film)

N = len(id_film2)
nb_users = len(files)

result = []

for i in range(nb_users) :
    result.append([0] * N)
    
ar = numpy.array(result)
matrice_user_films = pd.DataFrame(ar, index=id_user, columns=liste_film) 

i=-1
for file in files :
    i+=1
    for film in traitement_fct(file) :
        if film[0] in liste_film:
                matrice_user_films[film[0]][[i]]=1
                
matrice_user_films

Unnamed: 0,La casa de papel,Black Mirror,Sex Education,Stranger Things,Dark,Élite,Plan Cœur,13 Reasons Why,Murder,The End of the F***ing World,...,Peaky Blinders,The Crown,The Last Kingdom,The OA,The Rain,Umbrella Academy,Family Business,You,Breaking Bad,Unbelievable
user1,0,1,1,1,1,1,0,1,1,1,...,1,1,1,1,1,1,0,0,0,0
user2,1,1,1,1,0,1,0,1,1,0,...,1,1,1,0,0,1,0,0,1,0
user3,0,1,0,1,1,0,0,0,0,1,...,0,0,1,1,1,1,0,0,0,0
user4,1,1,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
user5,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
user6,1,1,1,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,1
user7,1,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,1,0,0,0
user8,1,1,0,1,0,0,0,0,0,0,...,0,0,1,1,0,1,0,1,0,0
user9,1,1,1,1,1,1,1,1,0,1,...,0,0,0,1,0,0,0,1,0,0
user10,1,0,1,0,1,1,1,0,0,0,...,0,0,0,0,0,0,1,1,0,0
