In [25]:
import pandas as pd

chunksize = 10000  # Nombre de lignes par chunk
for chunk in pd.read_csv("/content/vehicles.csv", chunksize=chunksize, encoding='latin-1'):  # or 'ISO-8859-1' or other suitable encoding
    print(chunk.head())  # Traite chaque portion séparément

           id                                                url  \
0  7222695916  https://prescott.craigslist.org/cto/d/prescott...   
1  7218891961  https://fayar.craigslist.org/ctd/d/bentonville...   
2  7221797935  https://keys.craigslist.org/cto/d/summerland-k...   
3  7222270760  https://worcester.craigslist.org/cto/d/west-br...   
4  7210384030  https://greensboro.craigslist.org/cto/d/trinit...   

                   region                         region_url  price  year  \
0                prescott    https://prescott.craigslist.org   6000   NaN   
1            fayetteville       https://fayar.craigslist.org  11900   NaN   
2            florida keys        https://keys.craigslist.org  21000   NaN   
3  worcester / central MA   https://worcester.craigslist.org   1500   NaN   
4              greensboro  https://greensboro.craigslist.org   4900   NaN   

  manufacturer model condition cylinders  ... size  type paint_color  \
0          NaN   NaN       NaN       NaN  ...  NaN   NaN

In [26]:
import pandas as pd

file_path = "/content/vehicles.csv"

# Charger uniquement les colonnes utiles
# Added quoting=3 to handle potential quoting issues
df = pd.read_csv(file_path, usecols=['price', 'year', 'manufacturer', 'model', 'condition','cylinders', 'size',
                                     'type', 'paint_color', 'state', 'description'], engine='python')

# Supprimer les valeurs manquantes
df.dropna(inplace=True)

# Afficher un aperçu
print(df.head())

    price    year manufacturer                 model  condition    cylinders  \
31  15000  2013.0         ford             f-150 xlt  excellent  6 cylinders   
55  19900  2004.0         ford       f250 super duty       good  8 cylinders   
59  14000  2012.0        honda               odyssey  excellent  6 cylinders   
65  22500  2001.0         ford                  f450       good  8 cylinders   
73  15000  2017.0        dodge  charger rt 4dr sedan  excellent  8 cylinders   

         size      type paint_color  \
31  full-size     truck       black   
55  full-size    pickup        blue   
59  full-size  mini-van      silver   
65  full-size     truck       white   
73   mid-size     sedan        grey   

                                          description state  
31  2013 F-150 XLT V6 4 Door. Good condition. Leve...    al  
55  Here I have a unmolested Ford F-250 6.0 power ...    al  
59  Readily available. Excellent condition, clean ...    al  
65  Ford Super Duty F450 , XL with K

In [27]:
from sklearn.preprocessing import LabelEncoder

categorical_cols = ['manufacturer', 'model', 'condition', 'cylinders', 'size', 'type', 'paint_color', 'state']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

In [28]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['price', 'description'])  # Features
y = df['price']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)  # Sortie unique : le prix
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 125951685951488.0000 - mae: 60237.6172 - val_loss: 232720832.0000 - val_mae: 13019.5303
Epoch 2/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 48472472420352.0000 - mae: 40720.1406 - val_loss: 2814233344.0000 - val_mae: 49015.3281
Epoch 3/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 419917006372864.0000 - mae: 156367.2188 - val_loss: 1104407424.0000 - val_mae: 29134.7441
Epoch 4/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 62857517465600.0000 - mae: 74045.3047 - val_loss: 6887781376.0000 - val_mae: 76013.4922
Epoch 5/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 290978397159424.0000 - mae: 192026.1406 - val_loss: 243971104.0000 - val_mae: 11455.9814
Epoch 6/20
[1m1027/1027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0

<keras.src.callbacks.history.History at 0x7a0cdf1b7f50>

In [30]:
from diffusers import StableDiffusionPipeline
import torch

# Charger le modèle de diffusion
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
pipe.to("cuda")  # Utiliser uniquement le GPU

def generate_car_image(description, predicted_price):
    prompt = f"Voiture {description}, estimée à {int(predicted_price)}€, design moderne et détaillé"
    image = pipe(prompt).images[0]
    image.show()
    image.save("generated_car.png")

# Exemple : Prédire le prix et générer une image
predicted_price = model.predict(X_test.iloc[0:1])[0][0]
description = df.iloc[0]['description']
generate_car_image(description, predicted_price)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 352ms/step


  0%|          | 0/50 [00:00<?, ?it/s]