In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

items = pd.read_csv("items.csv", sep="|")
print(items.shape)
items.sample(10)

(78030, 6)


Unnamed: 0,itemID,title,author,publisher,main topic,subtopics
41533,60966,Adlerjunge,Rodney Bennett,Freies Geistesleben GmbH,YF,[5AN]
25749,63906,Miles Between,Mary E Pearson,St. Martins Press-3PL,YFB,[YX]
37338,55908,Primavera Libro Para Colorear,El Blokehead,Blurb,YFB,[]
76075,68658,Broken,Alyxandra Harvey-Fitzhenry,Tradewind Books,YFB,[]
7810,48688,Armada,Ernest Cline,FISCHER TOR,FBA,"[5LF,1KB,FBA,FLP,FLQ,FLR,FLS,FLU,FYT,FYW,YFG]"
22682,42360,Displacement,Kiku Hughes,Roaring Brook Press,XADC,[YF]
21850,13263,Justus Schneck,Sabine Wittemeier,NOVA MD,YBL,"[FV,G,WN]"
66130,33020,Days to Hide,Stephanie Flynn,Small Fish Publishing,FRU,[]
4714,72863,The Extraordinary Adventures of Sherlock Holmes,Arthur Conan Doyle,Vintage Publishing,YFCF,[YFA]
32389,22766,One Creepy Street,Lee Jordan,Black Rose Writing,YFD,[]


In [2]:
items = items[["itemID", "title", "author"]]
items.head()

Unnamed: 0,itemID,title,author
0,21310,Princess Poppy: The Big Mix Up,Janey Louise Jones
1,73018,Einfach zeichnen! Step by Step,Wiebke Krabbe
2,19194,Red Queen 1,Victoria Aveyard
3,40250,Meine Kindergarten-Freunde (Pirat),
4,46107,Mein großes Schablonen-Buch - Wilde Tiere,Elizabeth Golding


In [3]:
transactions = pd.read_csv("transactions.csv", sep="|")
print("Shape: ", transactions.shape,"Filtered shape:", transactions[(transactions['order'] > 0)].shape, sep="\n")
transactions.sample(10)

Shape: 
(365143, 5)
Filtered shape:
(16906, 5)


Unnamed: 0,sessionID,itemID,click,basket,order
227065,173601,43101,1,0,0
219318,167623,49234,0,0,1
35591,27025,69619,1,0,0
253163,193826,8084,1,0,0
280612,214754,3083,1,0,0
324548,248463,68641,1,0,0
317625,243119,25641,1,0,0
348727,266871,7294,1,0,0
123502,94324,13748,3,0,0
240765,184200,16698,1,0,0


In [4]:
merged = pd.merge(items, transactions, on="itemID")
print("Merged shape:", merged.shape, "After filter:",  merged[(merged['order'] > 0)].shape, sep="\n")
merged = merged[(merged['order'] > 0)]
print("Null values:","\n",merged.isnull().sum())
merged.dropna(inplace=True)
print("Merged shape:", merged.shape, "\nMerged Dataset:")
merged.sample(10)

Merged shape:
(365143, 7)
After filter:
(16906, 7)
Null values: 
 itemID          0
title           0
author       1249
sessionID       0
click           0
basket          0
order           0
dtype: int64
Merged shape: (15657, 7) 
Merged Dataset:


Unnamed: 0,itemID,title,author,sessionID,click,basket,order
299926,56731,Mein Anziehpuppen-Stickerbuch: Meine Katzen,Lucy Bowman,16405,0,0,1
340570,15651,Leah on the Offbeat,Becky Albertalli,236707,0,0,1
146322,12016,Das Buch der gelöschten Wörter - Die letzten Z...,Mary E. Garner,55623,0,0,1
6445,11525,Zeit des Sturms,Andrzej Sapkowski,111623,0,0,1
313328,56245,Nähen lernen auf Papierbögen,Anika Weimert,208718,0,0,1
312843,33869,Miles & Niles - Jetzt wird's wild,"Jory John, Mac Barnett",85478,0,0,1
18497,62556,Level 4. Die Stadt der Kinder,Andreas Schlüter,101430,0,0,1
12785,14207,Der letzte Wunsch,Andrzej Sapkowski,253480,0,0,1
169083,34815,Als Hitler das rosa Kaninchen stahl,Judith Kerr,209523,0,0,1
332115,38348,All Eyes on Us,Kit Frick,93451,0,0,1


In [5]:
merged_pivot = merged.pivot_table(columns="sessionID", index="title", values="order")
print("Pivot shape:", merged_pivot.shape)
merged_pivot

Pivot shape: (4625, 12379)


sessionID,15,54,60,90,104,128,153,177,180,194,...,279101,279104,279116,279154,279181,279199,279200,279238,279285,279309
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
#Basteln for Future,,,,,,,,,,,...,,,,,,,,,,
#Wir bleiben zuhause Kindermitmachbuch,,,,,,,,,,,...,,,,,,,,,,
#selbstschuld - Was heißt schon privat,,,,,,,,,,,...,,,,,,,,,,
... und dann kam Joselle,,,,,,,,,,,...,,,,,,,,,,
"1, 2, 3, 4 Lieblingstier",,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tiptoi® Die Welt der Musik,,,,,,,,,,,...,,,,,,,,,,
tiptoi® Tiere im Garten,,,,,,,,,,,...,,,,,,,,,,
tiptoi® Unterwegs mit der Feuerwehr,,,,,,,,,,,...,,,,,,,,,,
"verliebt, versohlt, versklavt - wenn Strenge not (gut) tut Erotischer SM-Roman",,,,,,,,,,,...,,,,,,,,,,


In [6]:
merged_pivot.fillna(0, inplace=True)
merged_pivot.sample(10)

sessionID,15,54,60,90,104,128,153,177,180,194,...,279101,279104,279116,279154,279181,279199,279200,279238,279285,279309
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kernstaub,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"17, Das dritte Buch der Erinnerung",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Perfect Twin - Der Aufbruch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Rat der Neun - Gegen das Schicksal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Der Sommer, als ich schön wurde",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
World of Warcraft: Der Untergang der Aspekte,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Winston - Kater Undercover,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Tintenwelt-Schuber,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bartimäus 03. Die Pforte des Magiers,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Honigkuckuckskinder,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Model oluşturmadan, item-item collaborative filtering kullanarak ve birden fazla transaction'a sahip id'leri kullanarak deneyelim:

In [11]:
def update_pivot(df):
    for i in df.index:
        non_zero_values = df.loc[i, df.columns != 0]
        mean = non_zero_values.mean()
        for j in df.columns:
            if df.loc[i, j] != 0:
                df.loc[i, j] -= mean
    return df

def cosine(df):
    cosine_similarity_matrix = cosine_similarity(df)
    print("Cosine Matrix shape:", cosine_similarity_matrix.shape)
    return cosine_similarity_matrix

def recommend_w_similarities(book_title, similarity, pivot_df, k):
    try:
        # Kitap indeksi bulmak için
        book_index = pivot_df.index.get_loc(book_title)

        # Kitaba en yakın komşuların indeksleri ve benzerlik skorları
        nearest_indices = np.argsort(similarity[book_index]) # küçükten büyüğe sıralı
        nearest_indices = nearest_indices[::-1][1:k] # en yakın k indeksi bulmak için önce listeyi terse çevirip sonra ilk k değeri almak lazım
        similarity_scores = similarity[book_index, nearest_indices] # ilgili kitaba en yakın indislerin cosine similarity değerleri

        # Öneri ve Benzerlik Skorlarını Oluşturma
        recommendations = pivot_df.index[nearest_indices]
        recommendation_scores = list(zip(recommendations, similarity_scores))

        print(f"Kitap: {book_title}")
        print("Önerilen Kitaplar ve Benzerlik Skorları:")
        for i, (book, score) in enumerate(recommendation_scores):
            print(f"{i + 1}. {book} - Benzerlik Skoru: {score:.2f}")

        return recommendations
    except KeyError:
        print(f"'{book_title}' adında bir kitap bulunamadı.")
        return []

    
def steps(title,threshold, k, control = 0):
    # Verileri yükle
    items = pd.read_csv("items.csv", sep="|")
    items = items[["itemID", "title", "author"]]

    transactions = pd.read_csv("transactions.csv", sep="|")
    session_counts = transactions["sessionID"].value_counts()
    valids = session_counts[session_counts >= threshold].index
    filtered_transactions = transactions[transactions["sessionID"].isin(valids)]

    merged = pd.merge(items, filtered_transactions, on="itemID")
    merged = merged[(merged['order'] > 0)] # sadece satın alımın olduğu session'ları dikkate almak için
    merged.dropna(inplace=True)
    print("Merged Dataset shape:", merged.shape)
    
    merged_pivot = merged.pivot_table(columns="sessionID", index="title", values="order")
    print("Pivot shape:", merged_pivot.shape)
    merged_pivot.fillna(0, inplace=True)
    result = update_pivot(merged_pivot.copy())
    print("Result shape:", result.shape)

    if control != 0:
        print(result.sample(10))

    similarities = cosine(result.copy())
    recommend_w_similarities(title, similarities, result, k=k+1)

    return result, merged, merged_pivot

title = "Harry Potter 1 und der Stein der Weisen" 
# Harry Potter 2 and the Chamber of Secrets
# Die letzte Königin - Das Feuer erwacht
# Mortal Instruments 01. City of Bones
# Warrior Cats Staffel 3/04. Die Macht der drei. Zeit der Dunkelheit
# Sternenschweif 25: Freundschaftszauber
# The Mortal Instruments 1-6 Slipcase

result, merged, merged_pivot = steps(title, threshold = 5, k = 5)


Merged Dataset shape: (1067, 7)
Pivot shape: (764, 281)
Result shape: (764, 281)
Cosine Matrix shape: (764, 764)
Kitap: Harry Potter 1 und der Stein der Weisen
Önerilen Kitaplar ve Benzerlik Skorları:
1. Harry Potter 2 und die Kammer des Schreckens - Benzerlik Skoru: 0.87
2. Harry Potter 3 und der Gefangene von Askaban - Benzerlik Skoru: 0.82
3. Harry Potter 6 und der Halbblutprinz - Benzerlik Skoru: 0.71
4. Harry Potter 4 und der Feuerkelch. Taschenbuch - Benzerlik Skoru: 0.67
5. Harry Potter 5 und der Orden des Phönix - Benzerlik Skoru: 0.62


In [12]:
def search(result, str):
    # verilen string ifadesini içeren indeksleri bulmak için
    contains = result.index.str.contains(str, case=False)

    # O indeks değerlerine göre satırları bulmak için
    rows = result[contains]

    print(f"{str} içeren satırlar:")
    print(rows)
    
search(result, "Harry Potter")

Harry Potter içeren satırlar:
sessionID                                           824     2011    2446    \
title                                                                        
Harry Potter 1 und der Stein der Weisen                0.0     0.0     0.0   
Harry Potter 2 and the Chamber of Secrets              0.0     0.0     0.0   
Harry Potter 2 und die Kammer des Schreckens           0.0     0.0     0.0   
Harry Potter 3 and the Prisoner of Azkaban             0.0     0.0     0.0   
Harry Potter 3 und der Gefangene von Askaban           0.0     0.0     0.0   
Harry Potter 4 and the Goblet of Fire                  0.0     0.0     0.0   
Harry Potter 4 und der Feuerkelch. Taschenbuch         0.0     0.0     0.0   
Harry Potter 5 and the Order of the Phoenix            0.0     0.0     0.0   
Harry Potter 5 und der Orden des Phönix                0.0     0.0     0.0   
Harry Potter 6 and the Half-Blood Prince               0.0     0.0     0.0   
Harry Potter 6 und der Halbblutpri

In [9]:
result.sample(20)

sessionID,824,2011,2446,3893,4307,4518,4530,4950,5786,6058,...,268902,270350,271816,273431,273692,276655,277398,278934,279034,279199
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mein Anziehpuppen-Stickerbuch: Drei Freundinnen unterwegs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Die staubsaugende Schreckschraube,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Der kleine Vampir,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gregs Tagebuch 10 - So ein Mist!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Full Disclosure,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Biss zum Ende der Nacht,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Saeculum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Goldene Flammen,0.0,0.0,0.992883,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Die Wilden Hühner auf Klassenfahrt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Star Wars(TM) - Darth Bane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.996441,0.0,0.0,0.0,0.0,0.0,0.0,0.0
