## MovieDataLoader

In [34]:
import pandas as pd
import os

class MovieDataLoader:
    def __init__(self):
        """Initialize without parameters."""
        pass

    def load(self, file_path, encoding="utf-8"):
        """
        Load a single file into a DataFrame.

        Parameters:
        - file_path: str, the path to the file to load.
        - encoding: str, encoding to use for reading the file.

        Returns:
        - DataFrame: the loaded DataFrame.
        """
        try:
            # Check if the file exists
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"File not found: {file_path}")

            # Load data based on file extension
            if file_path.endswith(".csv"):
                return pd.read_csv(file_path, engine='c', encoding=encoding)
            elif file_path.endswith(".txt"):
                return pd.read_csv(file_path, delimiter='[|\t]', engine='python', header=None, encoding=encoding)
            else:
                raise ValueError("Unsupported file format. Only .csv and .txt are supported.")
        except FileNotFoundError as e:
            print(f"Error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
        return None

    @staticmethod
    def remove_first_row(df):
        """Remove the first row of the DataFrame and reset column names."""
        df.columns = df.iloc[0]  # Set first row as column names
        df = df.iloc[1:]  # Remove the first row
        df.reset_index(drop=True, inplace=True)
        return df

    @staticmethod
    def change_datatype(df, column, dtype):
        """
        Change the data type of a specific column.
        가장 마지막에 호출해야 함(non값이면 터짐)
        """
        df[column] = df[column].astype(dtype)
        return df

    @staticmethod
    def fill_each_other(df, col1, col2):
        """Fill missing values in one column using values from another column."""
        df.loc[df[col1].isnull() & df[col2].notnull(), col1] = df[col2]
        df.loc[df[col2].isnull() & df[col1].notnull(), col2] = df[col1]
        df = df.dropna(subset=[col1, col2])  # Drop rows where both columns are null
        return df

    @staticmethod
    def fill_val(df, column, value):
        """Fill missing values in a specific column with a given value."""
        df[column] = df[column].fillna(value)
        return df

## Load Dataset

In [39]:
kmrd_path = "../../kmrd-small"
file_paths = {
    'countries.csv': f"{kmrd_path}/countries.csv",
    'movies.txt': f"{kmrd_path}/movies.txt",
    'genres.csv': f"{kmrd_path}/genres.csv",
    'rates.csv': f"{kmrd_path}/rates.csv",
    'peoples.txt': f"{kmrd_path}/peoples.txt",
    'castings.csv': f"{kmrd_path}/castings.csv",
}

loader = MovieDataLoader()

# Process 'movies.txt'
movies_df = loader.load(file_paths['movies.txt'])
movies_df = MovieDataLoader.remove_first_row(movies_df)
movies_df = MovieDataLoader.fill_each_other(movies_df, 'title', 'title_eng')
movies_df = MovieDataLoader.fill_val(movies_df, 'grade', 'Unknown')
movies_df = MovieDataLoader.fill_val(movies_df, 'year', 0)
movies_df = MovieDataLoader.change_datatype(movies_df, 'year', int)
# movies_df.head()

# Process 'genres.csv'
genres_df = loader.load(file_paths['genres.csv'])
# genres_df.head()

# Process 'rates.csv': Convert Unix timestamp to datetime
rates_df = loader.load(file_paths['rates.csv'])
rates_df['time'] = pd.to_datetime(rates_df['time'], unit='s')
# rates_df.head()

# Process 'peoples.txt'
peoples_df = loader.load(file_paths['peoples.txt'])
peoples_df = MovieDataLoader.remove_first_row(peoples_df)
peoples_df = MovieDataLoader.fill_val(peoples_df, 'original', 'Unknown')
peoples_df = MovieDataLoader.change_datatype(peoples_df, 'people', int)
# peoples_df.head()

# Process 'castings.csv'
castings_df = loader.load(file_paths['castings.csv'])
# castings_df.head()

In [40]:
import matplotlib.pyplot as plt

# 한글 폰트 설정
plt.rcParams['font.family'] = 'AppleGothic'  # MacOS: 'AppleGothic', Windows: 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False  # 마이너스 기호 깨짐 방지

## RandomRecommender