In [25]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

# Read the dataset from 'ratings.csv' into a DataFrame
# Adjust the column names as per your dataset
column_names = ['user_id', 'isbn', 'ratings']
df = pd.read_csv('ratings.csv', usecols=column_names, dtype={'user_id': str, 'isbn': str, 'ratings': float}, na_values=['NA', 'N/A', 'missing'])

# Drop rows with missing values
df.dropna(subset=['user_id', 'isbn', 'ratings'], inplace=True)

# Define the Reader object
reader = Reader(rating_scale=(0, 10))

# Load the dataset from the DataFrame using the appropriate columns
data = Dataset.load_from_df(df[['user_id', 'isbn', 'ratings']], reader)

# Split the data into training and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# Initialize KNNBasic with appropriate parameters
sim_options = {'name': 'cosine', 'user_based': True}  # Set user-based similarity
algo = KNNBasic(k=40, sim_options=sim_options)

# Fit the algorithm to the training data
algo.fit(trainset)

# Make predictions on the test set
predictions = algo.test(testset)

# Evaluate accuracy
print("RMSE:", accuracy.rmse(predictions))
print("MAE:", accuracy.mae(predictions))

# Generate recommendations for user 'user1'
user_id = 'user1'

try:
    # Get the inner user ID corresponding to 'user1'
    inner_user_id = trainset.to_inner_uid(user_id)

    # Get the items already rated by 'user1'
    already_rated = set([item[0] for item in trainset.ur[inner_user_id]])

    # Generate recommendations for items not yet rated by 'user1'
    items_to_recommend = []
    for item_id in trainset.all_items():
        if item_id not in already_rated:
            items_to_recommend.append((item_id, algo.predict(user_id, item_id).est))

    # Sort recommendations by predicted rating
    sorted_recommendations = sorted(items_to_recommend, key=lambda x: x[1], reverse=True)

    print("Top Recommendations for", user_id)
    for item_id, rating in sorted_recommendations[:10]:
        print(f"Item ID: {item_id}, Predicted Rating: {rating:.2f}")

except ValueError as e:
    print(e)  # Handle the case where 'user1' is not part of the training set


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.8927
RMSE: 2.892652908822402
MAE:  2.5098
MAE: 2.509774606062856
Top Recommendations for user1
Item ID: 0, Predicted Rating: 5.06
Item ID: 1, Predicted Rating: 5.06
Item ID: 2, Predicted Rating: 5.06
Item ID: 3, Predicted Rating: 5.06
Item ID: 4, Predicted Rating: 5.06
Item ID: 5, Predicted Rating: 5.06
Item ID: 6, Predicted Rating: 5.06
Item ID: 7, Predicted Rating: 5.06
Item ID: 8, Predicted Rating: 5.06
Item ID: 9, Predicted Rating: 5.06
