In [33]:
import pandas as pd
import json
import os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Print the current working directory
print("Current Working Directory:", os.getcwd())

# Define the relative path to the JSON file
json_file_path = os.path.join('..', 'data', 'employees.json')

# Check if the file exists
if not os.path.exists(json_file_path):
    print(f"File not found: {json_file_path}")
else:
    # Read the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Convert the list of objects to a DataFrame
    df = pd.DataFrame(data)

    # Define the path for the output CSV file
    csv_file_path = os.path.join('output.csv')

    # Save the DataFrame to a CSV file
    df.to_csv(csv_file_path, index=False)

    print(f"CSV file has been created successfully at {csv_file_path}.")

# Load the data from the CSV file
d = pd.read_csv('output.csv')

print(d.head())
y = d['salary']
X = pd.get_dummies(d[['jobRole', 'workLocation']])
print(X.head())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
score = model.score(X_test, y_test)

print(score)

print(f"R^2 Score: {r2}")




Current Working Directory: c:\Users\wasadmin\Documents\TEX\data_analysis
CSV file has been created successfully at output.csv.
             name   phoneNumber  jobRole workLocation    salary  role  \
0  Alan Schnitzer  123-456-7890      Ceo     New York  21207397     2   
1   Travis Arroyo  745-569-7270  Manager      St Paul    103312     1   
2  Ethan Gonzalez  395-401-3079  Manager      St Paul    106615     1   
3  John Gutierrez  231-602-7708  Manager     Hartford    126099     1   
4    Megan Miller  787-512-8658  Manager      St Paul     95538     1   

                          email      username        password       managerId  
0  Alan.Schnitzer@travelers.com    ASchnitzer    ASchnitzer@1             NaN  
1   Travis.Arroyo@travelers.com     TArroyo49     TArroyo@448  Alan Schnitzer  
2  Ethan.Gonzalez@travelers.com    EGonzalez3   EGonzalez@118  Alan Schnitzer  
3  John.Gutierrez@travelers.com  JGutierrez94  JGutierrez@324  Alan Schnitzer  
4    Megan.Miller@travelers.com   

In [34]:
filename = "model.pkl"

with open(filename, 'wb') as file:
    pickle.dump(model,file) 