#### Step 1: Load the Data Files

In [None]:
import pandas as pd

# Load u.data (user-item interactions)
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

# Load u.item (movie metadata)
movies = pd.read_csv('ml-100k/u.item', sep='|', names=['item_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL', 
                                                       'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 
                                                       'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 
                                                       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], 
                     encoding='latin-1')

# Load u.user (user demographics)
users = pd.read_csv('ml-100k/u.user', sep='|', names=['user_id', 'age', 'gender', 'occupation', 'zip_code'])

# Load u.genre (genres)
genres = pd.read_csv('ml-100k/u.genre', sep='|', names=['genre', 'genre_id'], usecols=[0], encoding='latin-1')

# Display the first few rows of each dataframe to understand the structure
print(ratings.head())
print(movies.head())
print(users.head())
print(genres.head())

#### Step 2: Construct the Knowledge Graph

In [10]:
kg_data = []

# Create triples for user-movie interactions (user rated movie)
for _, row in ratings.iterrows():
    kg_data.append((f'user_{row["user_id"]}', 'rated', f'movie_{row["item_id"]}'))

# Create triples for movie-genre relations
for _, row in movies.iterrows():
    movie_id = f'movie_{row["item_id"]}'
    for genre in genres['genre']:
        if row[genre] == 1:
            kg_data.append((movie_id, 'belongs_to_genre', genre))

# Create triples for movie metadata (could add director, actors, etc. if available)
# For simplicity, we'll skip this in this example.

# Convert the list of triples into a DataFrame
kg_df = pd.DataFrame(kg_data, columns=['head', 'relation', 'tail'])

# Display the first few rows of the knowledge graph DataFrame
print(kg_df.head())


       head relation       tail
0  user_196    rated  movie_242
1  user_186    rated  movie_302
2   user_22    rated  movie_377
3  user_244    rated   movie_51
4  user_166    rated  movie_346


#### Save KG to csv

In [11]:
# Export the knowledge graph DataFrame to a CSV file in the same folder
kg_df.to_csv('knowledge_graph_movies.csv', index=False)

print("Knowledge graph saved to 'knowledge_graph.csv'")

Knowledge graph saved to 'knowledge_graph.csv'
