In [None]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Possible brands and fuel types
brands = ["Toyota", "Honda", "BMW", "Audi", "Ford", "Hyundai", "Kia"]
fuel_types = ["Petrol", "Diesel", "Electric", "Hybrid"]

# Generate synthetic data
n_samples = 200
data = {
    "Brand": np.random.choice(brands, n_samples),
    "Horsepower": np.random.randint(70, 400, n_samples),        # in HP
    "Mileage": np.random.randint(8, 25, n_samples),              # km per litre
    "Age": np.random.randint(0, 15, n_samples),                  # years old
    "Fuel_Type": np.random.choice(fuel_types, n_samples),
}

# Price formula (synthetic)
base_price = 500000  # base in INR
price = (
    base_price
    + (data["Horsepower"] * 1500)          # more HP, higher price
    - (data["Age"] * 20000)                # older cars cheaper
    + (data["Mileage"] * 1000)             # better mileage, higher price
    + np.random.randint(-50000, 50000, n_samples)  # random noise
)

data["Price"] = price

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("car_data.csv", index=False)

print(df.head())


In [None]:
print(df.info())
print(df.describe())
print(df.isnull().sum())


In [None]:
# Drop irrelevant columns
df = df[['Brand', 'Horsepower', 'Mileage', 'Age', 'Fuel_Type', 'Price']]

# Handle missing values
df.fillna(df.mean(numeric_only=True), inplace=True)

# Encode categorical features
df = pd.get_dummies(df, columns=['Brand', 'Fuel_Type'], drop_first=True)


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
from sklearn.metrics import r2_score, mean_absolute_error

y_pred = model.predict(X_test)

print("R² Score:", r2_score(y_test, y_pred))
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))


In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Car Price Prediction")
plt.show()


In [None]:
"""
8️⃣ Real-World Applications of Car Price Prediction

1. Used Car Marketplaces (e.g., OLX, Cars24):
   Model can instantly give sellers and buyers a fair price based on car specs.
   Saves time compared to manual valuation.

2. Insurance Companies:
   Used to estimate a vehicle’s current value for premium and claim settlement.

3. Car Dealerships:
   Accurate trade-in values for customers, improving trust and sales.

4. Banks / Loan Providers:
   Helps decide the loan amount for a used car and reduces risk.

5. Fleet Management Companies:
   Assists in resale planning and optimizing vehicle disposal timing.
"""
