## Database Connection

Connecting to the Database:

In [None]:
from data_manager import DataManager

imdb_data = DataManager(
    dbname='imdb',
    user='samaher',
    password="CodingIsFun++",
    host='localhost',
    port='5432'
)

## Loading the Dataframe

I've extracted the data I need into 1 dataframe:
1. movie_data

In [None]:
# Get the movie data from the database
movie_data = imdb_data.get_movie()

## Libraries
Libraries being used:

In [None]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt


## Difficulty Level 

The logic of determining the difficulty level of the quiz is built upon 2 elements:
- Recency: The more recent the movie, the easier it is to recognize it. This is determined based on the release date 'year'.
- Popularity: The more popular the movie, the easier it is to recognize it. This is determined based on the number of votes that are assigned to the movie on IMDb

To make the algorithm more accurate, I added to the 'easy' category, all movies that have more than 1 MILLION votes on IMDb. Those popular movies, in my opinion, should be categorized as 'easy'.

In [None]:
# function for the difficulty level

def determine_difficulty_level(year, votes):
    if (year >= 2010 and votes >= 500000) or (year < 2010 and votes >= 1000000):
        return 'easy'
    elif 1990 <= year < 2010 and 100000 <= votes < 500000:
        return 'medium'
    elif year < 1990 and votes < 100000:
        return 'hard'
    else:
        return 'unknown'

## Score

The score is calculated this way: 

1. wrong answer: 0 points
2. correct answer:
    - 1 point if EASY
    - 2 points if MEDIUM
    - 3 points if HARD
    

In [None]:
# function calculating the score

def calculate_score(difficulty_level, is_correct): # is_correct is a boolean variable that represents if the anser given by the user is TRUE or FALSE
    if is_correct:
        if difficulty_level == 'easy':
            return 1
        elif difficulty_level == 'medium':
            return 2
        elif difficulty_level == 'hard':
            return 3
    else:
        return 0


## Highest Score

This function returns the movie that has the highest score on IMDb

In [None]:
def find_highest_score_movie(subset_df):

    # Converting 'score' column to numeric (since it's decimal)
    subset_df.loc[:, 'score'] = pd.to_numeric(subset_df['score'])

    # Getting the index of the row with the highest score
    max_score_index = np.argmax(subset_df['score'].values)
    
    # Retrieve the 'official_title' of the movie with the highest score
    highest_score_movie = subset_df.loc[max_score_index, 'official_title']
    
    return highest_score_movie


## Generating Questions + Options

In [None]:
def generate_question(row, desired_difficulty, question_type, correct_answer_column):
    official_title = row['official_title']
    correct_answer = row[correct_answer_column]

    # Determining difficulty level 
    year = int(row['year']) 
    votes = int(row['votes'])
    difficulty_level = determine_difficulty_level(year, votes)

    # Checking if the difficulty level matches the desired difficulty
    if difficulty_level == desired_difficulty:
        
        # Constructing the question dictionary differently for the IMDb score question type
        if question_type == 'Which one of these movies has the highest score on IMDb?':
            # Get the movie with the highest score
            highest_score_movie = find_highest_score_movie(movie_data)
            # Exclude the current movie to avoid it being the correct answer
            all_answers = list(set(movie_data['official_title'].unique()))
            all_answers.remove(official_title)
            # Selecting 3 random incorrect options from all movies except the highest scoring movie
            other_options = [highest_score_movie] + random.sample(all_answers, 3)
            # Shuffling the options
            random.shuffle(other_options)
            # Mapping options to letters
            options_mapping = {chr(ord('A') + i): option for i, option in enumerate(other_options)}
            # Constructing the question dictionary
            question_dict = {
                'question': f'{question_type}',
                'options': options_mapping,
                'correct_answer': chr(ord('A') + other_options.index(highest_score_movie)),
                'difficulty_level': difficulty_level
            }
            return question_dict
        else:

            # Getting unique values excluding the correct answer
            all_answers = list(set(movie_data[correct_answer_column].unique()))
            all_answers.remove(correct_answer)  # Removing the correct answer

            # Selecting 3 more random incorrect options
            other_options = [correct_answer] + random.sample(all_answers, 3)

            # Filtering the data frame to get a subset for all 4 options
            subset_df = movie_data[movie_data['official_title'].isin(all_answers)]

            # Shuffling the options to randomize their order
            random.shuffle(other_options)

            # Mapping options to letters (A, B, C, D)
            options_mapping = {chr(ord('A') + i): option for i, option in enumerate(other_options)}

            # Constructing the question dictionary
            question_dict = {
                'question': f'{question_type} || {official_title} ||',
                'options': options_mapping,
                'correct_answer': chr(ord('A') + other_options.index(correct_answer)),
                'difficulty_level': difficulty_level
            }

            return question_dict


## THE Quiz

Well, basically, the quiz is composed of:
- 10 Questions that go by one by one
- For each question, you have 4 options including only ONE correct answer

P.S. before starting the quiz you have to pick the desired difficulty level. 

In [None]:
def quiz_game(data_frame=None):
    if data_frame is None:
        # If no dataframe is provided, we'll movie_data as default
        data_frame = movie_data

    difficulty_levels = ['easy', 'medium', 'hard']
    total_score = 0
    used_questions = []

    # Getting user input for difficulty level
    user_difficulty = input("Choosing a difficulty level (easy, medium, hard): ").lower()

    # Validating user input
    while user_difficulty not in difficulty_levels:
        print("Invalid difficulty level. Please choose from: easy, medium, hard")
        user_difficulty = input("Choosing a difficulty level (easy, medium, hard): ").lower()

    # List of question generators with parameters for different question types
    question_generators = [
        {'generator': generate_question, 'params': ('When was this movie released? ==> ', 'year')},
        {'generator': generate_question, 'params': ('Where was this movie produced? ==> ', 'country')},
        {'generator': generate_question, 'params': ('Which one of these movies has the highest score on IMDb?', 'score')}
    ]

    # Playing 10 questions, randomly choosing from different types of questions
    for i in range(10):
        while True:
            # Randomly select a question generator with its respective parameters
            question_info = None
            generator_info = random.choice(question_generators)
            question_type, correct_answer_column = generator_info['params']

            row_index = random.randint(0, len(data_frame) - 1)
            row = data_frame.iloc[row_index]

            # Generating the question with the desired difficulty level using the selected generator
            question_info = generator_info['generator'](row, user_difficulty, question_type, correct_answer_column)

            # Making sure used questions do not appear again in the same quiz
            if question_info is not None and question_info['question'] not in used_questions:
                used_questions.append(question_info['question'])
                break

        # Printing the question information
        print(question_info['question'])

        # Printing options with letters (A, B, C, D)
        for letter, option in question_info['options'].items():
            print(f"{letter}. {option}")

        # Getting user's choice with validation
        user_choice = None
        while user_choice not in ['A', 'B', 'C', 'D']:
            user_choice = input("Entering your choice (A, B, C, D): ").upper()

            if user_choice not in ['A', 'B', 'C', 'D']:
                print("You typed a wrong letter. Please type again.")

        # Checking if the user's choice is correct
        is_correct = user_choice == question_info['correct_answer']

        # Calculating and displaying the score
        score = calculate_score(question_info['difficulty_level'], is_correct)
        total_score += score

        # Providing feedback on the answer
        if is_correct:
            print("Correct!")
        else:
            print(f"Wrong! The correct answer is: {question_info['correct_answer']}")

        print(f"Your score for this question: {score}")
        print("----------------------------")

    print(f"Total score: {total_score}")


## Leveraging Game Score for Viz

In [None]:
        def display_histogram(self, player_name):
            # Extracting scores from game_scores
            scores = [score for name, score in self.game_scores]
            
            # Player's score
            player_score = next((score for name, score in self.game_scores if name == player_name), None)

            # Plotting the histogram
            plt.figure(figsize=(8, 6))
            plt.hist(scores, bins=10, alpha=0.7, color='blue', edgecolor='black')

            # Adding player's score to the plot
            if player_score:
                plt.axvline(x=player_score, color='red', linestyle='dashed', linewidth=2, label=f'{player_name}\'s score')

            plt.xlabel('Scores')
            plt.ylabel('Frequency')
            plt.title('Score Distribution')
            plt.legend()
            plt.grid(True)
            plt.show()

In [None]:
    def display_histogram(self, player_name):
        player_scores = {name: score for name, score in self.game_scores}
        plt.figure(figsize=(10, 6))

        # Extract player names and scores
        names = list(player_scores.keys())
        scores = list(player_scores.values())

        # Plotting player scores
        bars = plt.bar(names, scores, color='blue')

        # Highlight the current player's score in a different color
        if player_name in player_scores:
            index = names.index(player_name)
            bars[index].set_color('red')

        plt.xlabel('Players')
        plt.ylabel('Scores')
        plt.title('Players\' Scores Distribution')
        plt.xticks(rotation=45)  # Rotate x-axis labels for readability
        plt.grid(axis='y')  # Show grid lines only for y-axis
        plt.tight_layout()
        plt.show()

## Checking the Quiz

In [None]:
quiz_game()

## Closing the Connection 

In [None]:
imdb_data.close_connection()