In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
# Create DataFrame with meaningful columns and realistic data
data = {
    "Customer_ID": range(1, 21),
    "Age": np.random.randint(20, 65, 20),
    "Income": np.random.randint(20000, 100000, 20),
    "Education_Level": np.random.choice(["High School", "Bachelor's", "Master's", "PhD"], 20),
    "Location": np.random.choice(["Urban", "Rural"], 20),
    "Purchase_History": np.random.randint(100, 1000, 20),
    "Last_Purchase_Amount": np.random.randint(50, 500, 20),
    "Product_Category_Preference": np.random.choice(["Electronics", "Clothing", "Home Goods", "Food"], 20),
    "Website_Visits": np.random.randint(5, 20, 20),
    "Avg_Time_on_Website": np.random.randint(100, 800, 20)
}

df = pd.DataFrame(data)
df.set_index(df["Customer_ID"], inplace=True)
df.drop(columns="Customer_ID", axis=1, inplace=True)
df.head()

Unnamed: 0_level_0,Age,Income,Education_Level,Location,Purchase_History,Last_Purchase_Amount,Product_Category_Preference,Website_Visits,Avg_Time_on_Website
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,64,85592,Master's,Rural,234,332,Home Goods,12,462
2,34,47746,High School,Urban,886,91,Home Goods,8,426
3,58,30648,Master's,Rural,392,99,Electronics,16,609
4,43,20174,PhD,Rural,804,344,Clothing,15,684
5,45,96925,Bachelor's,Rural,541,450,Electronics,18,652


In [3]:
def convert_categorical_to_ordinal(df, columns):
  # Create a dictionary to map unique categories to ordinal values for each column
    ordinal_dicts = {}
    for col in columns:
        ordinal_dicts[col] = {}
        for i, row in df.iterrows():
            value = row[col]
            if value not in ordinal_dicts[col]:
                ordinal_dicts[col][value] = len(ordinal_dicts[col]) + 1
            df.at[i, col] = ordinal_dicts[col][value]

    return df

In [4]:
columns_to_convert = ["Education_Level", "Location", "Product_Category_Preference"]

for column in columns_to_convert:
    df = convert_categorical_to_ordinal(df.copy(), [column])  # Convert each column separately
    
df.head()

Unnamed: 0_level_0,Age,Income,Education_Level,Location,Purchase_History,Last_Purchase_Amount,Product_Category_Preference,Website_Visits,Avg_Time_on_Website
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,64,85592,1,1,234,332,1,12,462
2,34,47746,2,2,886,91,1,8,426
3,58,30648,1,1,392,99,2,16,609
4,43,20174,3,1,804,344,3,15,684
5,45,96925,4,1,541,450,2,18,652


In [5]:
# Apply a machine learning model (linear regression)
X = df.drop("Avg_Time_on_Website", axis=1)  # Features
y = df["Avg_Time_on_Website"]  # Target variable

In [6]:
model = LinearRegression()
model.fit(X, y)

In [7]:
# Make predictions on new data
new_data = pd.DataFrame({
    "Customer_ID": [21, 22],
    "Age": [35, 42],
    "Income": [65000, 78000],
    "Education_Level": ["Bachelor's", "Master's"],
    "Location": ["Urban", "Rural"],
    "Purchase_History": [750, 820],
    "Last_Purchase_Amount": [320, 280],
    "Product_Category_Preference": ["Electronics", "Clothing"],
    "Website_Visits": [25, 35]
})

new_data.set_index(new_data["Customer_ID"], inplace=True)
new_data.drop(columns="Customer_ID", axis=1, inplace=True)
new_data

Unnamed: 0_level_0,Age,Income,Education_Level,Location,Purchase_History,Last_Purchase_Amount,Product_Category_Preference,Website_Visits
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
21,35,65000,Bachelor's,Urban,750,320,Electronics,25
22,42,78000,Master's,Rural,820,280,Clothing,35


In [8]:
new_data["Education_Level"].replace(["Bachelor's", "Master's"], value=("1","3"), inplace=True)
new_data["Location"].replace(["Urban", "Rural"], value=("2","1"), inplace=True)
new_data["Product_Category_Preference"].replace(["Electronics", "Clothing"], value=("1","4"), inplace=True)

new_data.head()

Unnamed: 0_level_0,Age,Income,Education_Level,Location,Purchase_History,Last_Purchase_Amount,Product_Category_Preference,Website_Visits
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
21,35,65000,1,2,750,320,1,25
22,42,78000,3,1,820,280,4,35


In [9]:
predictions = model.predict(new_data)
print(f"""Predicted average time on website for new customers: 
    {predictions[0].astype("int")},
    {predictions[1].astype("int")}""")

Predicted average time on website for new customers: 
    650,
    526
