# Packages importation

In [1]:
import networkx as nx
import glob

# Fonctions definition

In [2]:
def sorted_dict(d):
    return sorted(d.items(), key=lambda t: t[1], reverse=True)

def prediction_jaccard_with_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.jaccard_coefficient(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1]*G.get_edge_data(key,j[0])["weight"] if G.get_edge_data(key,j[0]) is not None else 0 for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_jaccard_without_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.jaccard_coefficient(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1] for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_adamic_with_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.adamic_adar_index(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1]*G.get_edge_data(key,j[0])["weight"] if G.get_edge_data(key,j[0]) is not None else 0 for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

def prediction_adamic_without_edge(ever_seen, not_see, graph):
    inter = {}
    
    for elem in ever_seen:
        jac_index = [list(nx.adamic_adar_index(graph, [(elem, i)])) for i in not_see]
        inter[elem] = {i[0][1] : i[0][2] for i in jac_index}
        
    result = {elem:[(key,value[elem]) for key, value in inter.items()] for elem in not_see}
    
    res = {key:sum([j[1] for j in value]) for key, value in result.items()}
    
    return sorted_dict(res)

# Data importation

- Titles

In [3]:
dico_id = {}
dico_id_film = {}
dico_id_serie = {}

with open("../Data/Without3MostCommons/Titles.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id[content[0]] = content[1]

with open("../Data/Without3MostCommons/Titles_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id_film[content[0]] = content[1]
        
with open("../Data/Without3MostCommons/Titles_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = (line.replace('\n','')
                   .replace('\xa0', ' ')
                   .replace('*', '\*')
                   .split('\t'))
        
        dico_id_serie[content[0]] = content[1]
# dico_title = {value:key for key,value in dico_id.items()}        
# {key:value for key,value in dico_title.items() if key.startswith('A')}

- Nodes weight

In [4]:
dico_node = {}
dico_node_film = {}
dico_node_serie = {}

with open("../Data/Without3MostCommons/Nodes_weight.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node[content[0]] = float(content[1])

with open("../Data/Without3MostCommons/Nodes_weight_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node_film[content[0]] = float(content[1])

with open("../Data/Without3MostCommons/Nodes_weight_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_node_serie[content[0]] = float(content[1])
        
# dico_node

- Links

In [5]:
dico_link = {}
dico_link_film = {}
dico_link_serie = {}

with open("../Data/Without3MostCommons/Liaisons.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link[f"{content[0]}\t{content[1]}"] = int(content[2])
        
with open("../Data/Without3MostCommons/Liaisons_film.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link_film[f"{content[0]}\t{content[1]}"] = int(content[2])
        
with open("../Data/Without3MostCommons/Liaisons_serie.txt","r", encoding = "utf-8") as f:
    for line in f.readlines():
        content = line.replace('\n','').split('\t')
        dico_link_serie[f"{content[0]}\t{content[1]}"] = int(content[2])
        
# dico_link

- History

In [6]:
files = sorted(glob.glob('../Data/Historiques_FilmsVu_Netflix/CleanHistory_user*.txt'))

dico_hist = {}
for i in range(len(files)):
    with open(files[i],"r", encoding = "utf-8") as f:
        content = f.read().split("\n")
        dico_hist[f"user{i+1}"] = content

# dico_hist

# Graph creation

In [7]:
G = nx.Graph()
G_film = nx.Graph()
G_serie = nx.Graph()

for key, value in dico_node.items():
    G.add_node(key,weight=value)

for key, value in dico_link.items():
    ID = key.split('\t')
    G.add_edge(ID[0], ID[1], weight=value)
    
for key, value in dico_node_film.items():
    G_film.add_node(key,weight=value)

for key, value in dico_link_film.items():
    ID = key.split('\t')
    G_film.add_edge(ID[0], ID[1], weight=value)
    
for key, value in dico_node_serie.items():
    G_serie.add_node(key,weight=value)

for key, value in dico_link_serie.items():
    ID = key.split('\t')
    G_serie.add_edge(ID[0], ID[1], weight=value)

# Recommender system

In [8]:
number_user = int(input(f"Entrez un numéro d'usager que vous voulez consultez (entre 1 et {len(files)}): "))

viewing_history_user = dico_hist[f"user{number_user}"]
viewing_history_user = [content for content in viewing_history_user if content in dico_id.values()]
viewing_history_user = [key for key,value in dico_id.items() if value in viewing_history_user]
content_not_see = [i for i in G.nodes() if i not in viewing_history_user]

viewing_history_user_film = dico_hist[f"user{number_user}"]
viewing_history_user_film = [content for content in viewing_history_user_film if content in dico_id_film.values()]
viewing_history_user_film = [key for key,value in dico_id_film.items() if value in viewing_history_user_film]
content_not_see_film = [i for i in G_film.nodes() if i not in viewing_history_user_film]

viewing_history_user_serie = dico_hist[f"user{number_user}"]
viewing_history_user_serie = [content for content in viewing_history_user_serie if content in dico_id_serie.values()]
viewing_history_user_serie = [key for key,value in dico_id_serie.items() if value in viewing_history_user_serie]
content_not_see_serie = [i for i in G_serie.nodes() if i not in viewing_history_user_serie]

Entrez un numéro d'usager que vous voulez consultez (entre 1 et 4): 1


### Recommendation according to the Jaccard index taking into account the weight of the edges

- With the graph `G`

In [9]:
%%time
jac_with_edge = prediction_jaccard_with_edge(viewing_history_user, content_not_see, G)
# jac_with_edge[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge][:5]

Wall time: 3min 57s


[('Saiki Kusuo no Ψ Nan : Le retour', '/title/81054849/', 83.87796124128963),
 ('Levius', '/title/80156799/', 80.90723770349287),
 ('Magi: Adventure of Sinbad', '/title/80103331/', 80.6886798997946),
 ('7SEEDS', '/title/80183051/', 78.85543066928847),
 ('Witchcraft Works', '/title/81088623/', 76.38765041411901)]

- With the graph `G_film`

In [10]:
%%time
jac_with_edge_film = prediction_jaccard_with_edge(viewing_history_user_film, content_not_see_film, G_film)
# jac_with_edge_film[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge_film][:5]

Wall time: 38.8 s


[('GODZILLA', '/title/80180373/', 22.682766993679373),
 ("GODZILLA La ville à l'aube du combat",
  '/title/80180376/',
  21.962639847098544),
 ('GODZILLA Le dévoreur de planètes', '/title/80198623/', 21.962639847098544),
 ('BLAME!', '/title/80115466/', 19.846942471923427),
 ('The Seven Deadly Sins the Movie: Prisoners of the Sky',
  '/title/81006261/',
  19.34991682437951)]

- With the graph `G_serie`

In [11]:
%%time
jac_with_edge_serie = prediction_jaccard_with_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
# jac_with_edge_serie[:10]
[(dico_id[id_title], id_title, num) for id_title,num in jac_with_edge_serie][:5]

Wall time: 31 s


[('Saiki Kusuo no Ψ Nan : Le retour', '/title/81054849/', 85.94985842766081),
 ('Levius', '/title/80156799/', 82.82496390993919),
 ('Magi: Adventure of Sinbad', '/title/80103331/', 82.72789736946946),
 ('7SEEDS', '/title/80183051/', 81.82947391296351),
 ('Witchcraft Works', '/title/81088623/', 78.25723528367158)]

### Recommendation according to the Jaccard index without taking into account the weight of the edges

- With the graph `G`

In [12]:
%%time
jac_without_edge = prediction_jaccard_without_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge][:5]

Wall time: 4min 5s


[('Insatiable', '/title/80179905/', 20.814477991445063),
 ('The Politician', '/title/80241248/', 20.59972917211894),
 ('Haters Back Off', '/title/80095900/', 20.592515938119277),
 ('Santa Clarita Diet', '/title/80095815/', 20.58754526310887),
 ('Astronomy Club: The Sketch Show', '/title/81087498/', 20.492539019703475)]

- With the graph `G_film`

In [13]:
%%time
jac_without_edge_film = prediction_jaccard_without_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge_film][:5]

Wall time: 37.5 s


[('Adam Devine: Best Time of Our Lives',
  '/title/80993404/',
  7.787316464399635),
 ('Adel Karam: Live from Beirut', '/title/80203919/', 7.787316464399635),
 ("Aditi Mittal: Things They Wouldn't Let Me Say",
  '/title/80183329/',
  7.787316464399635),
 ('Ali Wong: Baby Cobra', '/title/80101493/', 7.787316464399635),
 ('Ali Wong: Hard Knock Wife', '/title/80186940/', 7.787316464399635)]

- With the graph `G_serie`

In [14]:
%%time
jac_without_edge_serie = prediction_jaccard_without_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in jac_without_edge_serie][:5]

Wall time: 31.2 s


[('7SEEDS', '/title/80183051/', 20.228247784672487),
 ('A Certain Scientific Railgun', '/title/70308190/', 20.228247784672487),
 ('Ajin : semi-humain', '/title/80043576/', 20.228247784672487),
 ('Assassination Classroom', '/title/80045948/', 20.228247784672487),
 ('Back Street Girls -GOKUDOLS-', '/title/80996957/', 20.228247784672487)]

### Recommendation according to the adamic adam index taking into account the weight of the edges

- With the graph `G`

In [None]:
%%time
aa_with_edge = prediction_adamic_with_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge][:5]

- With the graph `G_film`

In [None]:
%%time
aa_with_edge_film = prediction_adamic_with_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge_film][:5]

- With the graph `G_serie`

In [None]:
%%time
aa_with_edge_serie = prediction_adamic_with_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in aa_with_edge_serie][:5]

### Recommendation according to the adamic adam index without taking into account the weight of the edges

- With the graph `G`

In [None]:
%%time
aa_without_edge = prediction_adamic_without_edge(viewing_history_user, content_not_see, G)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge][:5]

- With the graph `G_film`

In [None]:
%%time
aa_without_edge_film = prediction_adamic_without_edge(viewing_history_user_film, content_not_see_film, G_film)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge_film][:5]

- With the graph `G_serie`

In [None]:
%%time
aa_without_edge_serie = prediction_adamic_without_edge(viewing_history_user_serie, content_not_see_serie, G_serie)
[(dico_id[id_title], id_title, num) for id_title,num in aa_without_edge_serie][:5]

```python
def prediction_centralite_1(ever_seen, graph):
    for elem in ever_seen:
        graph.add_node(elem,weight=10.0)
    return [i for i in sorted_dict(nx.degree_centrality(graph)) if i[0] not in ever_seen]

%%time
cent_degree = prediction_centralite_1(viewing_history_user, G2)
cent_degree[:10]
```