In [1]:
import os
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
import warnings

warnings.filterwarnings('ignore')

# Mengubah data menjadi DataFrame
df = pd.read_csv('data_recruitment_selection.csv')
df.head(5)

Unnamed: 0,CandidateID,Name,Gender,Age,Position,ApplicationDate,Status,InterviewDate,OfferStatus
0,1,Nicole Wilson,Male,29,Data Engineer,2023-07-20,Interviewed,2023-11-24,
1,2,Christine Wright,Female,52,HR Manager,2023-11-08,Interviewed,2024-06-29,Hired
2,3,Denise Robinson,Male,45,Technical Support,2023-07-29,Rejected,2023-10-26,Hired
3,4,John Chen,Female,50,UI/UX Designer,2023-10-10,Interviewed,2023-10-09,
4,5,Denise Robinson,Male,42,Data Engineer,2023-10-12,Rejected,,


In [2]:
# Mengonversi kolom 'OfferStatus' menjadi biner
df['OfferStatus'] = df['OfferStatus'].apply(lambda x: 1 if x == 'Hired' else 0)

# Menghapus kolom yang tidak diperlukan
df = df.drop(columns=['CandidateID', 'Name', 'ApplicationDate', 'InterviewDate'])

# Menyimpan kolom untuk encoding dan scaling
categorical_cols = ['Gender', 'Position', 'Status']
numerical_cols = ['Age']

In [3]:
# Fungsi untuk preprocessing OneHotEncoder
def prepOneHotEncoder(df, col, pathPackages):
    oneHotEncoder = OneHotEncoder(handle_unknown='ignore')
    dfOneHotEncoder = pd.DataFrame(oneHotEncoder.fit_transform(df[[col]]),
                                   columns=[col + "_" + str(i+1) for i in range(len(oneHotEncoder.categories_[0]))])
    
    filename = 'prep' + col + '.pkl'
    pickle.dump(oneHotEncoder, open(pathPackages + filename, 'wb'))
    print(f"Preprocessing data {col} has been saved...")
    
    df = pd.concat([df.drop(col, axis=1), dfOneHotEncoder], axis=1)
    return df

# Fungsi untuk preprocessing StandardScaler
def prepStandardScaler(df, col, pathPackages):
    scaler = StandardScaler()
    df[col] = scaler.fit_transform(df[[col]])
    
    filename = 'prep' + col + '.pkl'
    pickle.dump(scaler, open(pathPackages + filename, 'wb'))
    print(f"Preprocessing data {col} has been saved...")
    
    return df

In [4]:
# Direktori untuk menyimpan model dan scaler
pathPackages = os.getcwd() + "\\" + "packages" + "\\"
os.makedirs(pathPackages, exist_ok=True)

# Preprocessing data
for col in categorical_cols:
    df = prepOneHotEncoder(df, col, pathPackages)

for col in numerical_cols:
    df = prepStandardScaler(df, col, pathPackages)

# Memisahkan fitur dan target
X = df.drop(columns=['OfferStatus']).values
y = df['OfferStatus'].values

# Melatih model
model = LogisticRegression()
model.fit(X, y)

# Menyimpan model
with open(pathPackages + 'modelRecruitment.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Model telah dilatih dan disimpan.")

TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [5]:
import os
import pickle
import pandas as pd

data = {
    'CandidateID': [1, 2, 3, 4, 5],
    'Name': ['Nicole Wilson', 'Christine Wright', 'Denise Robinson', 'John Chen', 'Denise Robinson'],
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Age': [29, 52, 45, 50, 42],
    'Position': ['Data Engineer', 'HR Manager', 'Technical Support', 'UI/UX Designer', 'Data Engineer'],
    'ApplicationDate': ['2023-07-20', '2023-11-08', '2023-07-29', '2023-10-10', '2023-10-12'],
    'Status': ['Interviewed', 'Interviewed', 'Rejected', 'Interviewed', 'Rejected'],
    'InterviewDate': ['2023-11-24', '2024-06-29', '2023-10-26', '2023-10-09', None],
    'OfferStatus': [None, 'Hired', 'Hired', None, None]
}
df = pd.DataFrame(data)
df = df[['Gender', 'Age', 'Position', 'Status']]  # Kolom yang akan digunakan untuk modelling
pickle.dump(df.columns.tolist(), open(os.path.join('packages', 'columnModelling.pkl'), 'wb'))