In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import coo_matrix
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k

In [2]:
ratings_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/refs/heads/master/ratings.csv"
books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/refs/heads/master/books.csv"

In [3]:
ratings = pd.read_csv(ratings_url)
books =  pd.read_csv(books_url)

In [4]:
print(ratings.shape)
ratings.head()

(5976479, 3)


Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [5]:
print(books.shape)
books.head()

(10000, 23)


Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [6]:
print(ratings['rating'].unique())

ratings['rating'].value_counts(normalize=True)

[5 4 3 2 1]


rating
4    0.357906
5    0.331816
3    0.229385
2    0.060112
1    0.020781
Name: proportion, dtype: float64

In [7]:
# Залишаємо лише позитивні взаємодії
ratings = ratings[ratings['rating'] >= 4]
print(ratings)

         user_id  book_id  rating
0              1      258       5
1              2     4081       4
2              2      260       5
3              2     9296       5
5              2       26       4
...          ...      ...     ...
5976474    49925      510       5
5976475    49925      528       4
5976476    49925      722       4
5976477    49925      949       5
5976478    49925     1023       4

[4122111 rows x 3 columns]


In [8]:
# Змінюємо к-ть елементів ratings до 100000
# ratings=ratings.sample(n=100000)
# print(ratings)

In [9]:
# LightFM працює з розрідженими матрицями формату scipy.sparse, де індекси — числові (0, 1, 2, …).

user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

ratings['user_idx'] = user_encoder.fit_transform(ratings['user_id'])
ratings['item_idx'] = item_encoder.fit_transform(ratings['book_id'])

num_users = ratings['user_idx'].nunique()
num_items = ratings['item_idx'].nunique()

In [10]:
ratings.head(5)

Unnamed: 0,user_id,book_id,rating,user_idx,item_idx
0,1,258,5,0,257
1,2,4081,4,1,4080
2,2,260,5,1,259
3,2,9296,5,1,9295
5,2,26,4,1,25


In [11]:
# Формуємо матрицю взаємодій (interaction matrix)

interaction_matrix = coo_matrix(
    (np.ones(ratings.shape[0]), (ratings['user_idx'], ratings['item_idx'])),
    shape=(num_users, num_items)
)

In [12]:
# Розділення на train/test 
train, test = random_train_test_split(interaction_matrix, test_percentage=0.2, random_state=42)

In [13]:
model = LightFM(loss='warp')  # або 'bpr', 'logistic'?

In [14]:
model.fit(train, epochs=100, num_threads=10)

<lightfm.lightfm.LightFM at 0x7939be447d90>

In [15]:
# Оцінка точності
precision = precision_at_k(model, test, k=5).mean()
print(f"Precision@5: {precision:.4f}")

Precision@5: 0.0995


In [16]:
# Побудова словника відповідностей
user_idx_to_id = dict(enumerate(user_encoder.inverse_transform(range(num_users))))
item_idx_to_id = dict(enumerate(item_encoder.inverse_transform(range(num_items))))
book_id_to_title = dict(zip(books['book_id'], books['title']))

In [17]:
# Функція генерації рекомендацій
def recommend_books(user_id, n=10):
    try:
        user_idx = user_encoder.transform([user_id])[0]
    except:
        return f"Користувача з ID {user_id} не знайдено."

    scores = model.predict(np.repeat(user_idx, num_items), np.arange(num_items))
    top_items = np.argsort(-scores)[:n]

    recommended_book_ids = [item_idx_to_id[i] for i in top_items]
    titles = [book_id_to_title.get(book_id, f"[book_id {book_id}]") for book_id in recommended_book_ids]
    return titles

In [27]:
example_user_id = ratings['user_id'].sample(1).values[0]
print(f"\nРекомендації для користувача {example_user_id}:")
print(recommend_books(example_user_id))


Рекомендації для користувача 13893:
['The Grapes of Wrath', 'The Road', 'A Farewell to Arms', 'Of Mice and Men', 'East of Eden', 'Into Thin Air: A Personal Account of the Mount Everest Disaster', 'Lord of the Flies', 'The Old Man and the Sea', 'Into the Wild', '1984']


## User_interface

In [21]:
import gradio as gr

In [29]:
def inter(example_user_id):
    return recommend_books(example_user_id)

demo = gr.Interface(
    fn=inter,
    inputs=["text"],
    outputs=["text"],
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.


