In [None]:
import os
import pandas as pd
import numpy as np
from tkinter import *
from tkinter import filedialog
import tkinter.font as font
from pyresparser import ResumeParser
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
class TrainModel:
    def __init__(self):
        self.model = None

    def train(self):
        # Load your CSV data
        data = pd.read_csv('training_dataset.csv')

        X = data[['Gender', 'Age', 'Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Neuroticism']]
        y = data['PersonalityType']

        # Convert categorical variables to numerical
        X['Gender'] = X['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

        # Split the dataset into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train the model
        self.model = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
        self.model.fit(X_train, y_train)

        # Evaluate the model
        y_pred = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'Model Accuracy: {accuracy}')

    def predict(self, test_data):
        try:
            test_predict = [int(i) for i in test_data]
            y_pred = self.model.predict([test_predict])
            return y_pred
        except:
            print("All Factors For Finding Personality Not Entered!")

In [None]:

def check_type(data):
    if type(data) == str or type(data) == str:
        return str(data).title()
    if type(data) == list or type(data) == tuple:
        str_list = ""
        for i, item in enumerate(data):
            str_list += item + ", "
        return str_list
    else:
        return str(data)

In [None]:
def prediction_result(top, aplcnt_name, cv_path, personality_values):
    top.withdraw()
    applicant_data = {"Candidate Name": aplcnt_name.get(), "CV Location": cv_path}

    age = personality_values[1]

    print("\n############# Candidate Entered Data #############\n")
    print(applicant_data, personality_values)

    # Assuming 'model' is an instance of the TrainModel class
    personality = model.predict(personality_values)
    print("\n############# Predicted Personality #############\n")
    print(personality)

    data = ResumeParser(cv_path).get_extracted_data()

    try:
        del data['name']
        if len(data['mobile_number']) < 10:
            del data['mobile_number']
    except:
        pass

    print("\n############# Resume Parsed Data #############\n")

    for key in data.keys():
        if data[key] is not None:
            print('{} : {}'.format(key, data[key]))

    result = Tk()
    result.overrideredirect(False)
    result.geometry("{0}x{1}+0+0".format(result.winfo_screenwidth(), result.winfo_screenheight()))
    result.configure(background='White')
    result.title("Predicted Personality")

    # Title
    titleFont = font.Font(family='Arial', size=40, weight='bold')
    Label(result, text="Result - Personality Prediction", foreground='green', bg='white', font=titleFont,
          pady=10, anchor=CENTER).pack(fill=BOTH)

    Label(result, text=str('{} : {}'.format("Name:", aplcnt_name.get())).title(), foreground='black', bg='white',
          anchor='w').pack(fill=BOTH)
    Label(result, text=str('{} : {}'.format("Age:", age)), foreground='black', bg='white', anchor='w').pack(fill=BOTH)
    for key in data.keys():
        if data[key] is not None:
            Label(result, text=str('{} : {}'.format(check_type(key.title()), check_type(data[key]))),
                  foreground='black', bg='white', anchor='w', width=60).pack(fill=BOTH)
    Label(result, text=str("Predicted personality: " + personality).title(), foreground='black', bg='white',
          anchor='w').pack(fill=BOTH)

    quitBtn = Button(result, text="Exit", command=lambda: result.destroy()).pack()

    terms_mean = """
# Openness:
    People who like to learn new things and enjoy new experiences usually score high in openness. Openness includes traits like being insightful and imaginative and having a wide variety of interests.

# Conscientiousness:
    People that have a high degree of conscientiousness are reliable and prompt. Traits include being organised, methodic, and thorough.

# Extraversion:
    Extraversion traits include being; energetic, talkative, and assertive (sometime seen as outspoken by Introverts). Extraverts get their energy and drive from others, while introverts are self-driven get their drive from within themselves.

# Agreeableness:
    As it perhaps sounds, these individuals are warm, friendly, compassionate and cooperative and traits include being kind, affectionate, and sympathetic. In contrast, people with lower levels of agreeableness may be more distant.

# Neuroticism:
    Neuroticism or Emotional Stability relates to the degree of negative emotions. People that score high on neuroticism often experience emotional instability and negative emotions. Characteristics typically include being moody and tense.
"""

    Label(result, text=terms_mean, foreground='green', bg='white', anchor='w', justify=LEFT).pack(fill=BOTH)

    result.mainloop()

In [None]:

def predict_person():
    root.withdraw()

    top = Toplevel()
    top.geometry('700x500')
    top.configure(background='black')
    top.title("Apply For A Job")

    titleFont = font.Font(family='Helvetica', size=20, weight='bold')
    lab = Label(top, text="Personality Prediction", foreground='red', bg='black', font=titleFont, pady=10).pack()

    job_list = ('Select Job', '101-Developer at TTC', '102-Chef at Taj', '103-Professor at MIT')
    job = StringVar(top)
    job.set(job_list[0])

    l1 = Label(top, text="Applicant Name", foreground='white', bg='black').place(x=70, y=130)
    l2 = Label(top, text="Age", foreground='white', bg='black').place(x=70, y=160)
    l3 = Label(top, text="Gender", foreground='white', bg='black').place(x=70, y=190)
    l4 = Label(top, text="Upload Resume", foreground='white', bg='black').place(x=70, y=220)
    l5 = Label(top, text="Enjoy New Experience or thing(Openness)", foreground='white', bg='black').place(x=70, y=250)
    l6 = Label(top, text="How Often You Feel Negativity(Neuroticism)", foreground='white', bg='black').place(x=70, y=280)
    l7 = Label(top, text="Wishing to do one's work well and thoroughly(Conscientiousness)", foreground='white',
               bg='black').place(x=70, y=310)
    l8 = Label(top, text="How much would you like to work with your peers(Agreeableness)", foreground='white',
               bg='black').place(x=70, y=340)
    l9 = Label(top, text="How outgoing and social interaction you like(Extraversion)", foreground='white',
               bg='black').place(x=70, y=370)

    sName = Entry(top)
    sName.place(x=450, y=130, width=160)
    age = Entry(top)
    age.place(x=450, y=160, width=160)
    gender = IntVar()
    R1 = Radiobutton(top, text="Male", variable=gender, value=1, padx=7)
    R1.place(x=450, y=190)
    R2 = Radiobutton(top, text="Female", variable=gender, value=0, padx=3)
    R2.place(x=540, y=190)
    cv = Button(top, text="Select File", command=lambda: OpenFile(cv))
    cv.place(x=450, y=220, width=160)
    openness = Entry(top)
    openness.insert(0, '1-10')
    openness.place(x=450, y=250, width=160)
    neuroticism = Entry(top)
    neuroticism.insert(0, '1-10')
    neuroticism.place(x=450, y=280, width=160)
    conscientiousness = Entry(top)


In [None]:
def OpenFile(b4):
    global loc
    name = filedialog.askopenfilename(initialdir="C:/Users/Batman/Documents/Programming/tkinter/",
                                       filetypes=(("Document", "*.docx*"), ("PDF", "*.pdf*"), ('All files', '*')),
                                       title="Choose a file."
                                       )
    try:
        filename = os.path.basename(name)
        loc = name
    except:
        filename = name
        loc = name
    b4.config(text=filename)
    return

In [None]:
if __name__ == "__main__":
    model = TrainModel()
    model.train()

    root = Tk()
    root.geometry('700x500')
    root.configure(background='white')
    root.title("Personality Prediction System")
    titleFont = font.Font(family='Helvetica', size=25, weight='bold')
    homeBtnFont = font.Font(size=12, weight='bold')
    lab = Label(root, text="Personality Prediction System", bg='white', font=titleFont, pady=30).pack()
    b2 = Button(root, padx=4, pady=4, width=30, text="Predict Personality", bg='black', foreground='white', bd=1,
                font=homeBtnFont, command=predict_person).place(relx=0.5, rely=0.5, anchor=CENTER)
    root.mainloop()

KeyError: ignored

In [4]:
import pandas as pd
links_data=pd.read_csv("/content/links.csv")
movies_data=pd.read_csv("/content/movies.csv")
rating_data=pd.read_csv("/content/ratings.csv")
links_data.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [5]:
rating_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [20]:
import pandas as pd

# Assuming you have a DataFrame named 'ratings_df'
# Replace 'ratings_df' with the actual name of your DataFrame

# Group by 'movieId' and count the number of ratings for each movie
movie_ratings_count = rating_data.groupby('movieId')['rating'].count().reset_index()

# Filter movies with more than 50 ratings
movies_with_50_ratings_or_more = movie_ratings_count[movie_ratings_count['rating'] > 50]

# Display the result
print("Movies with more than 50 ratings:")
print(movies_with_50_ratings_or_more)
import pandas as pd

# Assuming you have a DataFrame named 'rating_data'
# Assuming 'movies_with_50_ratings_or_more' is a DataFrame containing movies with more than 50 ratings

# Merge the two DataFrames on 'movieId'
merged_data = pd.merge(links_data, movies_with_50_ratings_or_more, on='movieId', how='inner')

# Now, 'merged_data' contains only the rows from 'rating_data' where 'movieId' is present in 'movies_with_50_ratings_or_more'
merged_data.head()


Movies with more than 50 ratings:
      movieId  rating
0           1     215
1           2     110
2           3      52
5           6     102
6           7      54
...       ...     ...
8287   106782      54
8354   109374      52
8358   109487      73
8457   112852      59
8673   122904      54

[436 rows x 2 columns]


Unnamed: 0,movieId,imdbId,tmdbId,rating
0,1,114709,862.0,215
1,2,113497,8844.0,110
2,3,113228,15602.0,52
3,6,113277,949.0,102
4,7,114319,11860.0,54


In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Step 1: Load the Links Data
links_df = pd.read_csv('links.csv')
# Assuming 'user_ratings' is the column representing the number of user ratings
filtered_links_df = links_df[links_df['user_ratings'] > 50]

# Step 2: Get IMDb Ratings
def get_imdb_rating(imdbId):
    id = str(int(imdbId))
    n_zeroes = 7 - len(id)
    new_id = "0" * n_zeroes + id
    URL = f"https://www.imdb.com/title/tt{new_id}/"
    headers = {
        'Content-Type': 'text/html; charset=UTF-8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0',
        'Accept-Encoding': 'gzip, deflate, br'
    }
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    imdb_rating = soup.find('span', attrs={'data-testid': 'ratingValue'})
    return float(imdb_rating.text.strip()) if imdb_rating else None

# Step 3: Scrape IMDb Reviews


# Step 4: Identify the Movie with the Highest IMDb Rating
max_rating = -1
max_rating_movie_id = None

for index, row in filtered_links_df.iterrows():
    imdb_rating = get_imdb_rating(row['imdbId'])
    if imdb_rating is not None and imdb_rating > max_rating:
        max_rating = imdb_rating
        max_rating_movie_id = row['movieId']

# Step 5: Write to README.md
with open('README.md', 'a') as readme_file:
    readme_file.write(f"The movieId of the movie with the highest IMDb rating is: {max_rating_movie_id}")


In [14]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Step 1: Load the Links Data

# Step 2: Get IMDb Ratings
def get_imdb_rating(imdbId):
    id = str(int(imdbId))
    n_zeroes = 7 - len(id)
    new_id = "0" * n_zeroes + id
    URL = f"https://www.imdb.com/title/tt{new_id}/"
    headers = {
        'Content-Type': 'text/html; charset=UTF-8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0',
        'Accept-Encoding': 'gzip, deflate, br'
    }
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    imdb_rating = soup.find('span', attrs={'data-testid': 'ratingValue'})
    return float(imdb_rating.text.strip()) if imdb_rating else None

In [16]:
print(get_imdb_rating(114709))


None


In [13]:
import requests
import numpy as np
from bs4 import FILL_IN_THE_BLANK

def scrapper(imdbId):
    id = str(int(imdbId))
    n_zeroes = 7 - len(id)
    new_id = "0"*n_zeroes + id
    URL = f"https://www.imdb.com/title/tt{new_id}/"
    request_header = {'Content-Type': 'text/html; charset=UTF-8',
                      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0',
                      'Accept-Encoding': 'gzip, deflate, br'}
    response = requests.FILL_IN_THE_BLANK(URL, headers=request_header)
    soup = FILL_IN_THE_BLANK(response.text)
    imdb_rating = soup.find('FILL_IN_THE_BLANK', attrs={'FILL_IN_THE_BLANK' : 'FILL_IN_THE_BLANK'})
    return imdb_rating.text if imdb_rating else np.nan

scrapper(114709)

ImportError: cannot import name 'FILL_IN_THE_BLANK' from 'bs4' (/usr/local/lib/python3.10/dist-packages/bs4/__init__.py)

In [24]:
import requests
from bs4 import BeautifulSoup

def get_imdb_rating(imdbId):
    try:
        id = str(int(imdbId))
        n_zeroes = 7 - len(id)
        new_id = "0" * n_zeroes + id
        URL = f"https://www.imdb.com/title/tt{new_id}/"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0',
        }
        response = requests.get(URL, headers=headers)
        response.raise_for_status()  # Raise an HTTPError for bad responses

        # Debugging information
        print(f"IMDb ID: {imdbId}")
        print(f"Response status code: {response.status_code}")


        soup = BeautifulSoup(response.text, 'html.parser')
        imdb_rating = soup.find('span', attrs={'data-testid': 'ratingValue'})
        return float(imdb_rating.text.strip()) if imdb_rating else None
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None
    except AttributeError as e:
        print(f"Error parsing HTML: {e}")
        return None

# Example usage:
for i in


IMDb ID: 114709
Response status code: 200
IMDb Rating for movie with IMDb ID 114709: None
