# Packages importation

In [1]:
import networkx as nx
import glob

# Fonctions definition

In [2]:
def sorted_dict(d):
    return sorted(d.items(), key=lambda t: t[1], reverse=True)

def prediction_jaccard_with_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.jaccard_coefficient(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1]*G.get_edge_data(key,j[0])["weight"] if G.get_edge_data(key,j[0]) is not None else 0 for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_jaccard_without_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.jaccard_coefficient(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1] for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_adamic_with_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.adamic_adar_index(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1]*G.get_edge_data(key,j[0])["weight"] if G.get_edge_data(key,j[0]) is not None else 0 for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_adamic_without_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.adamic_adar_index(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1] for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

# Data importation

- Titles

In [3]:
dico_id = {}
dico_id_film = {}
dico_id_serie = {}

with open("../Data/With3MostCommons/Titles.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id[content[0]] = content[1]

with open("../Data/With3MostCommons/Titles_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id_film[content[0]] = content[1]
        
with open("../Data/With3MostCommons/Titles_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id_serie[content[0]] = content[1]
# dico_title = {value:key for key,value in dico_id.items()}        
# {key:value for key,value in dico_title.items() if key.startswith('A')}

- Nodes weight

In [4]:
dico_node = {}
dico_node_film = {}
dico_node_serie = {}

with open("../Data/With3MostCommons/Nodes_weight.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node[content[0]] = float(content[1])

with open("../Data/With3MostCommons/Nodes_weight_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node_film[content[0]] = float(content[1])

with open("../Data/With3MostCommons/Nodes_weight_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node_serie[content[0]] = float(content[1])
        
# dico_node

- Links

In [5]:
dico_link = {}
dico_link_film = {}
dico_link_serie = {}

with open("../Data/With3MostCommons/Liaisons.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link[f"{content[0]}\t{content[1]}"] = int(content[2])
        
with open("../Data/With3MostCommons/Liaisons_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link_film[f"{content[0]}\t{content[1]}"] = int(content[2])
        
with open("../Data/With3MostCommons/Liaisons_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link_serie[f"{content[0]}\t{content[1]}"] = int(content[2])
        
# dico_link

- History

In [6]:
files = sorted(glob.glob('../Data/Historiques_FilmsVu_Netflix/CleanHistory_user*.txt'))

dico_hist = {}
for i in range(len(files)):
    with open(files[i],"r", encoding = "utf-8") as f:
        content = f.read().split("\n")
        dico_hist[f"user{i+1}"] = content

# dico_hist

# Graph creation

In [7]:
G = nx.Graph()
G_film = nx.Graph()
G_serie = nx.Graph()

for key, value in dico_node.items():
    G.add_node(key,weight=value)

for key, value in dico_link.items():
    ID = key.split('\t')
    G.add_edge(ID[0], ID[1], weight=value)
    
for key, value in dico_node_film.items():
    G_film.add_node(key,weight=value)

for key, value in dico_link_film.items():
    ID = key.split('\t')
    G_film.add_edge(ID[0], ID[1], weight=value)
    
for key, value in dico_node_serie.items():
    G_serie.add_node(key,weight=value)

for key, value in dico_link_serie.items():
    ID = key.split('\t')
    G_serie.add_edge(ID[0], ID[1], weight=value)

# Recommender system

In [8]:
number_user = int(input(f"Entrez un numéro d'usager que vous voulez consultez (entre 1 et {len(files)}): "))

viewing_history_user = dico_hist[f"user{number_user}"]
viewing_history_user = [content for content in viewing_history_user if content in dico_id.values()]
viewing_history_user = [key for key,value in dico_id.items() if value in viewing_history_user]
content_not_see = [i for i in G.nodes() if i not in viewing_history_user]

viewing_history_user_film = dico_hist[f"user{number_user}"]
viewing_history_user_film = [content for content in viewing_history_user_film if content in dico_id_film.values()]
viewing_history_user_film = [key for key,value in dico_id_film.items() if value in viewing_history_user_film]
content_not_see_film = [i for i in G_film.nodes() if i not in viewing_history_user_film]

viewing_history_user_serie = dico_hist[f"user{number_user}"]
viewing_history_user_serie = [content for content in viewing_history_user_serie if content in dico_id_serie.values()]
viewing_history_user_serie = [key for key,value in dico_id_serie.items() if value in viewing_history_user_serie]
content_not_see_serie = [i for i in G_serie.nodes() if i not in viewing_history_user_serie]

Entrez un numéro d'usager que vous voulez consultez (entre 1 et 4): 1


### Recommendation according to the Jaccard index taking into account the weight of the edges

- With the graph `G`

In [9]:
%%time
jac_with_edge = prediction_jaccard_with_edge(viewing_history_user, content_not_see, G)
# jac_with_edge[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge][:5]

Wall time: 6min 22s


[('Saiki Kusuo no Ψ Nan : Le retour', '/title/81054849/', 83.87796124128963),
 ('Levius', '/title/80156799/', 80.90723770349287),
 ('Magi: Adventure of Sinbad', '/title/80103331/', 80.6886798997946),
 ('7SEEDS', '/title/80183051/', 78.85543066928847),
 ('Witchcraft Works', '/title/81088623/', 76.38765041411901)]

- With the graph `G_film`

In [10]:
%%time
jac_with_edge_film = prediction_jaccard_with_edge(viewing_history_user_film, content_not_see_film, G_film)
# jac_with_edge_film[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge_film][:5]

Wall time: 1min 59s


[('Gaspard Proust tapine', '/title/80134758/', 35.83279444845189),
 ('David Cross: Making America Great Again!',
  '/title/80108759/',
  35.252014912594234),
 ('Francesco De Carlo: Cose di Questo Mondo',
  '/title/81041272/',
  35.252014912594234),
 ('Jeff Dunham: Beside Himself', '/title/81074113/', 35.252014912594234),
 ('Jim Jefferies: This Is Me Now', '/title/80214743/', 35.252014912594234)]

- With the graph `G_serie`

In [11]:
%%time
jac_with_edge_serie = prediction_jaccard_with_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
# jac_with_edge_serie[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge_serie][:5]

Wall time: 1min


[('Saiki Kusuo no Ψ Nan : Le retour', '/title/81054849/', 85.94985842766081),
 ('Levius', '/title/80156799/', 82.82496390993919),
 ('Magi: Adventure of Sinbad', '/title/80103331/', 82.72789736946946),
 ('7SEEDS', '/title/80183051/', 81.82947391296351),
 ('Witchcraft Works', '/title/81088623/', 78.25723528367158)]

### Recommendation according to the Jaccard index without taking into account the weight of the edges

- With the graph `G`

In [12]:
%%time
jac_without_edge = prediction_jaccard_without_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge][:5]

Wall time: 6min 27s


[('Aziz Ansari Live at Madison Square Garden',
  '/title/80038296/',
  23.204918152773836),
 ('Aziz Ansari: Buried Alive', '/title/70290568/', 23.204918152773836),
 ('Aziz Ansari: RIGHT NOW', '/title/81098589/', 23.204918152773836),
 ('Adam Devine: Best Time of Our Lives',
  '/title/80993404/',
  23.204754596921063),
 ('ADAM SANDLER 100% FRESH', '/title/80224536/', 23.204754596921063)]

- With the graph `G_film`

In [13]:
%%time
jac_without_edge_film = prediction_jaccard_without_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge_film][:5]

Wall time: 1min 49s


[('Ace Ventura, détective chiens et chats',
  '/title/215309/',
  21.231891722031534),
 ('Action Point', '/title/80210497/', 21.231891722031534),
 ('Baby Mama', '/title/70084794/', 21.231891722031534),
 ('Billy Madison', '/title/70000782/', 21.231891722031534),
 ('Blue Mountain State: The Rise of Thadland',
  '/title/80097530/',
  21.231891722031534)]

- With the graph `G_serie`

In [14]:
%%time
jac_without_edge_serie = prediction_jaccard_without_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge_serie][:5]

Wall time: 55.5 s


[('Insatiable', '/title/80179905/', 21.028855458838557),
 ('Neo Yokio', '/title/80152350/', 20.885529750215024),
 ('Glee', '/title/70143843/', 20.81353854312795),
 ('Santa Clarita Diet', '/title/80095815/', 20.813243664339186),
 ('Ash vs. Evil Dead', '/title/80049277/', 20.788397516477467)]

### Recommendation according to the adamic adam index taking into account the weight of the edges

- With the graph `G`

In [15]:
%%time
aa_with_edge = prediction_adamic_with_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge][:5]

Wall time: 7min 6s


[('La Délicatesse', '/title/70190331/', 7841.865051893523),
 ('Paris', '/title/70117694/', 7528.233196726301),
 ("Chroniques sexuelles d'une famille d'aujourd'hui",
  '/title/70242943/',
  7507.682414983971),
 ('Avril et le Monde truqué', '/title/80062096/', 7358.972688786177),
 ('Saint-Jacques... La Mecque', '/title/80007621/', 7236.134511614933)]

- With the graph `G_film`

In [16]:
%%time
aa_with_edge_film = prediction_adamic_with_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge_film][:5]

Wall time: 2min 48s


[('La Délicatesse', '/title/70190331/', 7417.477919122134),
 ('Paris', '/title/70117694/', 7104.329596799172),
 ("Chroniques sexuelles d'une famille d'aujourd'hui",
  '/title/70242943/',
  7084.207356875057),
 ('Avril et le Monde truqué', '/title/80062096/', 6939.344834993235),
 ('Saint-Jacques... La Mecque', '/title/80007621/', 6816.007876444606)]

- With the graph `G_serie`

In [17]:
%%time
aa_with_edge_serie = prediction_adamic_with_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge_serie][:5]

Wall time: 1min 24s


[('Neo Yokio', '/title/80152350/', 6187.162196110412),
 ('Lady Dynamite', '/title/80046193/', 6121.7852961236085),
 ('Haters Back Off', '/title/80095900/', 5984.4071338892645),
 ('The Politician', '/title/80241248/', 5970.30062196634),
 ('IZombie', '/title/80027159/', 5932.5677445550145)]

### Recommendation according to the adamic adam index without taking into account the weight of the edges

- With the graph `G`

In [18]:
%%time
aa_without_edge = prediction_adamic_without_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge][:5]

Wall time: 7min 5s


[('Saint-Jacques... La Mecque', '/title/80007621/', 4599.718344138767),
 ('My Deer Hunter Dad', '/title/80176715/', 4581.988582582696),
 ('Colkatay Columbus', '/title/80180509/', 4479.719948195896),
 ('Avril et le Monde truqué', '/title/80062096/', 4469.920518332674),
 ('Band of Robbers', '/title/80064273/', 4469.521663430579)]

- With the graph `G_film`

In [19]:
%%time
aa_without_edge_film = prediction_adamic_without_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge_film][:5]

Wall time: 2min 36s


[('Saint-Jacques... La Mecque', '/title/80007621/', 4060.7030651806754),
 ('My Deer Hunter Dad', '/title/80176715/', 4043.457467250964),
 ('Colkatay Columbus', '/title/80180509/', 3950.182869497937),
 ('Band of Robbers', '/title/80064273/', 3929.7158594557086),
 ('PK', '/title/70303496/', 3928.1045509183677)]

- With the graph `G_serie`

In [20]:
%%time
aa_without_edge_serie = prediction_adamic_without_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge_serie][:5]

Wall time: 1min 17s


[('Haters Back Off', '/title/80095900/', 3115.5380735578924),
 ('The Politician', '/title/80241248/', 3111.9581285028553),
 ('Neo Yokio', '/title/80152350/', 3101.855898854907),
 ('Girlboss', '/title/80115671/', 3084.8448356699864),
 ('Special', '/title/80987458/', 3084.8448356699864)]

```python
def prediction_centralite_1(ever_seen, graph):
    for elem in ever_seen:
        graph.add_node(elem,weight=10.0)
    return [i for i in sorted_dict(nx.degree_centrality(graph)) if i[0] not in ever_seen]

%%time
cent_degree = prediction_centralite_1(viewing_history_user, G2)
cent_degree[:10]
```