In [10]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression

In [11]:
df = pd.read_csv('Salary_Data.csv')

In [12]:
df['Salary'].fillna(df['Salary'].mean(), inplace=True)

In [13]:
# Split features and target
X = df.drop(columns=['Salary'])
y = df['Salary']

# Preprocessing: Define the preprocessing steps for numerical and categorical features
numeric_features = ['Age', 'Years of Experience']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [14]:
categorical_features = ['Gender', 'Education Level', 'Job Title']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


In [15]:
# Define the regression model
regressor = LinearRegression()

# Create a pipeline that preprocesses the data and trains the regression model
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', regressor)])


In [16]:
pipeline.fit(X, y)



In [17]:
user_input = {}
for column in df.columns:
    if(column=='Age' or column=='Years of Experience' or column=='Salary'):
      user_input[column] = float(input(f"Enter value for '{column}': "));
    else:
      user_input[column] = input(f"Enter value for '{column}': ");


# Convert the input into a DataFrame
user_df = pd.DataFrame([user_input])

# Append the new row to the existing DataFrame
# df = pd.concat([df, new_row], ignore_index=True)

# print("DataFrame after adding user input:")
user_df


Enter value for 'Age': 20
Enter value for 'Gender': Male
Enter value for 'Education Level': PhD
Enter value for 'Job Title': Software Engineer
Enter value for 'Years of Experience': 5
Enter value for 'Salary': 10000


Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience,Salary
0,20.0,Male,PhD,Software Engineer,5.0,10000.0


In [18]:
# Make prediction
predicted_salary = pipeline.predict(user_df)

print("Predicted Salary:", predicted_salary)

Predicted Salary: [99523.18745274]


In [19]:
!pip install fastapi

Collecting fastapi
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/92.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting starlette<0.37.0,>=0.36.3 (from fastapi)
  Downloading starlette-0.36.3-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: starlette, fastapi
Successfully installed fastapi-0.110.0 starlette-0.36.3
