In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Number of rows
n = 40  

# Generate synthetic data
np.random.seed(42)  # reproducibility
temperature = np.random.randint(15, 36, size=n)
age = np.random.randint(18, 61, size=n)
holiday = np.random.randint(0, 2, size=n)
height = np.random.randint(150, 200, size=n)

# Create salary with linear relation + noise
salaries = (age * 1000) + (height * 20) + (holiday * 10000) + (temperature * 200) + np.random.randint(-5000, 5000, size=n)

# Create DataFrame
df = pd.DataFrame({
    "Temperature": temperature,
    "Age": age,
    "Holiday": holiday,
    "Salaries": salaries,
    "Height": height
})

# Save to CSV
df.to_csv("Candidate.csv", index=False)
print("CSV file saved as Candidate.csv")

# ----------------------
# Apply Linear Regression
# ----------------------

# Features (independent variables)
X = df[["Temperature", "Age", "Holiday", "Height"]]

# Target (dependent variable)
y = df["Salaries"]

# Split data (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
accuracy = r2_score(y_test, y_pred)

print("Model Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)
print("R² Accuracy Score:", accuracy)
print("R² Accuracy Score:", round(accuracy,3)*100, "%")


CSV file saved as Candidate.csv
Model Coefficients: [ 253.28990968 1026.61629901 7302.03262225  -26.92180132]
Model Intercept: 6779.796111352989
R² Accuracy Score: 0.9376030141759588
R² Accuracy Score: 93.8 %
