In [3]:
import pandas as pd

ratings = pd.read_csv('data/ml-100k/u.data', sep='\t', names=["user_id", "item_id", "rating", "timestamp"])
movies = pd.read_csv('data/ml-100k/u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=["item_id", "title"])

df = pd.merge(ratings, movies, on="item_id")
df.head()



Unnamed: 0,user_id,item_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


In [6]:
print(f"Toplam kullanıcı sayısı: {df['user_id'].nunique()}")
print(f"Toplam film sayısı: {df['title'].nunique()}")
print(f"Toplam puan sayısı: {len(df)}")

print("\nPuanların dağılımı:")
print(df['rating'].value_counts().sort_index())




Toplam kullanıcı sayısı: 943
Toplam film sayısı: 1664
Toplam puan sayısı: 100000

Puanların dağılımı:
rating
1     6110
2    11370
3    27145
4    34174
5    21201
Name: count, dtype: int64


In [2]:
import os
print(os.getcwd())


C:\Users\TUĞBA\OneDrive - TED Üniversitesi\Masaüstü\smart-recommender


In [15]:
rating_counts = df.groupby('title').size()
popular_titles = rating_counts[rating_counts >= 100].index
popular_movies_df = df[df['title'].isin(popular_titles)].reset_index(drop=True)
popular_movies_df = popular_movies_df.sample(n=10000, random_state=42).reset_index(drop=True)




In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(popular_movies_df['title'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

indices = pd.Series(popular_movies_df.index, index=popular_movies_df['title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim, indices=indices, top_n=5):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx].flatten()))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return popular_movies_df['title'].iloc[movie_indices]




In [19]:
print(get_recommendations('Star Wars (1977)'))

383    Star Wars (1977)
437    Star Wars (1977)
441    Star Wars (1977)
634    Star Wars (1977)
790    Star Wars (1977)
Name: title, dtype: object


In [20]:
# 'u.item' dosyasının yolu (dosyanın doğru yerde olduğundan emin ol)
item_path = 'data/ml-100k/u.item'  # senin dosya konumuna göre ayarla

# Tür sütun isimleri (19 tür var)
genres = ['unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary',
          'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
          'Thriller', 'War', 'Western']

# u.item dosyasını oku
movies_info = pd.read_csv(item_path, sep='|', header=None, encoding='latin-1',
                          names=['item_id', 'title'] + genres)

# Türleri birleştir (örneğin türü olanları virgülle ayırarak)
def combine_genres(row):
    return ','.join([genre for genre in genres if row[genre] == 1])

movies_info['genres_combined'] = movies_info.apply(combine_genres, axis=1)


In [21]:
# popular_movies_df ve movies_info'yu item_id ile birleştir (popüler filmlerle eşleştirme)
popular_movies_df = popular_movies_df.merge(movies_info[['title', 'genres_combined']], on='title', how='left')


In [24]:
# Tür sütun isimleri
genres = ['unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary',
          'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
          'Thriller', 'War', 'Western']

# 'u.item' dosyasının yolunu belirt (senin dosya konumuna göre düzenle)
item_path = 'data/ml-100k/u.item'  

# Dosyayı oku
movies_info = pd.read_csv(item_path, sep='|', header=None, encoding='latin-1',
                          names=['item_id', 'title'] + genres)

# genres_combined sütununu oluştur
def combine_genres(row):
    return ','.join([genre for genre in genres if row[genre] == 1])

movies_info['genres_combined'] = movies_info.apply(combine_genres, axis=1)

# Kontrol için ilk birkaç satırı göster
print(movies_info[['title', 'genres_combined']].head())


                                                                             title  \
1 Toy Story (1995)  01-Jan-1995  http://us.imdb.com/M/title-exact?Toy%20Story%2...   
2 GoldenEye (1995)  01-Jan-1995  http://us.imdb.com/M/title-exact?GoldenEye%20(...   
3 Four Rooms (1995) 01-Jan-1995  http://us.imdb.com/M/title-exact?Four%20Rooms%...   
4 Get Shorty (1995) 01-Jan-1995  http://us.imdb.com/M/title-exact?Get%20Shorty%...   
5 Copycat (1995)    01-Jan-1995  http://us.imdb.com/M/title-exact?Copycat%20(1995)   

                                             genres_combined  
1 Toy Story (1995)  01-Jan-1995  Animation,Children's,Comedy  
2 GoldenEye (1995)  01-Jan-1995    Action,Adventure,Thriller  
3 Four Rooms (1995) 01-Jan-1995                     Thriller  
4 Get Shorty (1995) 01-Jan-1995          Action,Comedy,Drama  
5 Copycat (1995)    01-Jan-1995         Crime,Drama,Thriller  


In [25]:
popular_movies_df = popular_movies_df.merge(movies_info[['title', 'genres_combined']], on='title', how='left')
popular_movies_df['genres_combined'] = popular_movies_df['genres_combined'].fillna('')

print(popular_movies_df[['title', 'genres_combined']].head())


                                 title genres_combined
0     Silence of the Lambs, The (1991)                
1                       Tin Cup (1996)                
2  Truth About Cats & Dogs, The (1996)                
3                        Eraser (1996)                
4         Sense and Sensibility (1995)                


In [28]:
print("Boş tür sayısı:", popular_movies_df['genres_combined'].isna().sum())
print("Boş string sayısı:", (popular_movies_df['genres_combined'] == '').sum())
print(popular_movies_df['genres_combined'].head(10))
print(movies_info[['title', 'genres_combined']].head(10))
print("Merge sonrası tür bilgisi olan film sayısı:", popular_movies_df[popular_movies_df['genres_combined'] != ''].shape[0])
print("Merge sonrası tür bilgisi olmayan film sayısı:", popular_movies_df[popular_movies_df['genres_combined'] == ''].shape[0])


Boş tür sayısı: 0
Boş string sayısı: 10000
0    
1    
2    
3    
4    
5    
6    
7    
8    
9    
Name: genres_combined, dtype: object
                                                                                                               title  \
1  Toy Story (1995)                                   01-Jan-1995  http://us.imdb.com/M/title-exact?Toy%20Story%2...   
2  GoldenEye (1995)                                   01-Jan-1995  http://us.imdb.com/M/title-exact?GoldenEye%20(...   
3  Four Rooms (1995)                                  01-Jan-1995  http://us.imdb.com/M/title-exact?Four%20Rooms%...   
4  Get Shorty (1995)                                  01-Jan-1995  http://us.imdb.com/M/title-exact?Get%20Shorty%...   
5  Copycat (1995)                                     01-Jan-1995  http://us.imdb.com/M/title-exact?Copycat%20(1995)   
6  Shanghai Triad (Yao a yao yao dao waipo qiao) (... 01-Jan-1995  http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...   
7  Twelve Monkeys (1

In [29]:
genres = ['unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary',
          'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
          'Thriller', 'War', 'Western']

columns = ['item_id', 'title'] + genres

movies_info = pd.read_csv('data/ml-100k/u.item', sep='|', header=None, names=columns, encoding='latin-1', usecols=range(2+len(genres)))

def combine_genres(row):
    return ','.join([genre for genre in genres if row[genre] == 1])

movies_info['genres_combined'] = movies_info.apply(combine_genres, axis=1)

print(movies_info[['title', 'genres_combined']].head())


               title          genres_combined
0   Toy Story (1995)  Crime,Documentary,Drama
1   GoldenEye (1995)        Children's,Comedy
2  Four Rooms (1995)                         
3  Get Shorty (1995)  Children's,Drama,Horror
4     Copycat (1995)           Fantasy,Horror


In [30]:
popular_movies_df = popular_movies_df.merge(movies_info[['title', 'genres_combined']], on='title', how='left')
popular_movies_df['genres_combined'] = popular_movies_df['genres_combined'].fillna('')

print(popular_movies_df[['title', 'genres_combined']].head())


MergeError: Passing 'suffixes' which cause duplicate columns {'genres_combined_x'} is not allowed.

In [31]:
# Eğer varsa eski genres_combined sütununu sil
if 'genres_combined' in popular_movies_df.columns:
    popular_movies_df = popular_movies_df.drop(columns=['genres_combined'])

# movies_info'dan tür bilgisi olan sütunu getir ve merge et
popular_movies_df = popular_movies_df.merge(
    movies_info[['title', 'genres_combined']],
    on='title',
    how='left'
)

# Boş olanları boş string yap
popular_movies_df['genres_combined'] = popular_movies_df['genres_combined'].fillna('')

# Kontrol için birkaç satır yazdır
print(popular_movies_df[['title', 'genres_combined']].head())


                                 title genres_combined
0     Silence of the Lambs, The (1991)          Horror
1                       Tin Cup (1996)       Drama,War
2  Truth About Cats & Dogs, The (1996)       Drama,War
3                        Eraser (1996)      Children's
4         Sense and Sensibility (1995)      Horror,War


In [35]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# 1. Veri yükle (örnek: popular_movies_df, movies_info daha önce hazır olmalı)
# popular_movies_df içinde 'title' ve 'genres_combined' sütunları olmalı

# Örnek: indices'i tekil title-index eşlemesi yapacak şekilde oluştur
indices = pd.Series(popular_movies_df.index, index=popular_movies_df['title']).drop_duplicates()

# 2. TF-IDF modelini oluştur, genres_combined boşsa boş string yap
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(popular_movies_df['genres_combined'].fillna(''))

# 3. Kosinüs benzerlik matrisi
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# 4. Öneri fonksiyonu
def get_recommendations(title, cosine_sim=cosine_sim, indices=indices, top_n=5):
    if title not in indices:
        return f"'{title}' filmi veride bulunamadı."
    
    idx = indices[title]
    
    # flatten() ekledik burada:
    sim_scores = list(enumerate(cosine_sim[idx].flatten()))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return popular_movies_df['title'].iloc[movie_indices]


# 5. Test et
print(get_recommendations('Silence of the Lambs, The (1991)'))


15    Shawshank Redemption, The (1994)
18                  Primal Fear (1996)
32                 Philadelphia (1993)
46        It's a Wonderful Life (1946)
64           Mr. Holland's Opus (1995)
Name: title, dtype: object


In [None]:
movie_name = input("Film adı girin: ")
recommendations = get_recommendations(movie_name)
print("Önerilen filmler:")
print(recommendations)

