In [2]:
import numpy as np
import pandas as pd
import random

In [15]:
# Define parameters
locations = ['Istanbul', 'Ankara', 'Izmir', 'Bursa', 'Antalya']
n_samples = 10000  # Number of samples

# Generate random data
np.random.seed(42)  # For reproducibility
data = {
    "location": [random.choice(locations) for _ in range(n_samples)],
    "size": np.random.randint(50, 300, n_samples),  # Size in square meters
    "bedrooms": np.random.randint(1, 6, n_samples),  # Number of bedrooms
    "age": np.random.randint(0, 50, n_samples),  # Age in years
    "garden": np.random.choice([0, 1], n_samples),  # Garden: yes/no
    "garage": np.random.choice([0, 1], n_samples)   # Garage: yes/no
}

# Convert to DataFrame
df = pd.DataFrame(data)

In [16]:
# Location modifiers
location_modifiers = {
    "Istanbul": 50000,
    "Ankara": 30000,
    "Izmir": 40000,
    "Bursa": 20000,
    "Antalya": 35000
}

# Add noise and nonlinearity to the price calculation
df["price"] = (
    df["size"] * np.random.uniform(450, 550, n_samples) +  # Slight variation in per square meter cost
    df["bedrooms"] * np.random.uniform(19000, 21000, n_samples) -  # Bedroom multiplier variability
    df["age"] ** 1.2 * 800 +  # Nonlinear effect of age on price
    df["garden"] * np.random.uniform(28000, 32000, n_samples) +
    df["garage"] * np.random.uniform(18000, 22000, n_samples) +
    df["location"].map(location_modifiers) +  # Location-based adjustments
    np.random.normal(0, 10000, n_samples)  # Add Gaussian noise to simulate market variability
)


In [18]:
# Save to CSV
df.to_csv("veri.csv", index=False)

# Display the first few rows
df.head(10222)


Unnamed: 0,location,size,bedrooms,age,garden,garage,price
0,Bursa,152,2,34,0,0,69344.044879
1,Bursa,229,2,1,0,0,175580.110820
2,Antalya,142,4,14,1,0,205161.117366
3,Istanbul,64,1,8,1,0,114490.923886
4,Istanbul,156,5,46,1,1,194445.154146
...,...,...,...,...,...,...,...
9995,Ankara,136,1,48,1,1,81244.752537
9996,Istanbul,229,4,31,0,1,183490.714725
9997,Antalya,226,2,10,0,1,228352.913664
9998,Izmir,188,3,46,1,1,189109.303612
