# Code to analyse car model using ChatGPT

This code was setup in Google Colab to free-up local resouces and to allow for the code to run for longer periods of time.

Loading Data from Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/My Drive/AI/

/content/drive/My Drive/AI


In [None]:
# Import all the libraries I need
import numpy as np
import pandas as pd

df = pd.read_csv('autoevolution.csv')

df.shape , df.columns.values

((1211, 4), array(['model', 'review', 'make', 'extracted'], dtype=object))

Add auth key

In [None]:
authkey = ""

In [None]:
!pip install openai==0.28.1

Collecting openai==0.28.1
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires tiktoken, which is not installed.[0m[31m
[0mSuccessfully installed openai-0.28.1


# ChatGPT

For analysis



In [None]:
# https://help.openai.com/en/articles/6654000-best-practices-for-prompt-engineering-with-openai-api

import openai
import json
import time

# Set OpenAI API key
openai.api_key = authkey

def GPTextract(prompt, model="gpt-3.5-turbo"):
    ''' Extracts data from unstructured text based on the topic and output value type.
    prompt: string
    topic: string
    output_value_type: string
    topic_add: string
    model: string

    return: dictionary
    '''

    retries = 3
    while retries > 0:
        try:
            messages = [
                {"role": "system", "content":
                '''
                You are a powerful data analyst for car exterior designs.
                You are tasked to extract useful design adjectives for each specific car element from the unstructured text. The adjectives must describe about shape, size, colour, style, form.

                You may summarize the description string within the list to be no more than 5 words each. You must stick to the template given. If you are not able to find any useful design description for that element, you may skip it. An example is given below. Output your results in JSON format.
                Output:
                {
                  "car-model": "",
                  "year": "",
                  "type": "",
                  "overall-design": [],
                  "design-elements": {
                    "windshield": [],
                    "grille": [],
                    "hood": [],
                    "headlights": [],
                    "bumper": []
                    "wheel": [],
                    "doors": [],
                    "roof": []
                    "diffusor": [],
                    "spoiler": [],
                    "exhaust": []
                }
                '''
                },
                {"role": "user", "content": prompt}
                ]
            response = openai.ChatCompletion.create(
                model=model, # gpt-3.5-turbo
                messages=messages, # prompt
                temperature=0, # temperature 0 means deterministic, 1 means random
            )
            text = response.choices[0].message["content"]
            text = text.replace("\n","")
            return text
        except Exception as e:
            if e:
                print(e)
                print('Timeout error, retrying...')
                retries -= 1
                time.sleep(5)
            else:
                raise e

    print('API is not responding, moving on...')
    return None


In [None]:
import pandas as pd
import datetime

def process_dataframe(progress_file):

    # Load progress if it exists
    progress_df = pd.read_csv(progress_file)
    # start_index = progress_df[df.columns[3]].last_valid_index() + 1
    start_index = 36
    print(f"Resuming from index {start_index}")

    start_time = datetime.datetime.now().strftime("%H:%M:%S")
    prev_time = datetime.datetime.now()
    print(f"Start processing at {start_time}")
    count = 400
    for index, row in progress_df.iloc[start_index:].iterrows():
        print(index,":", df.iloc[index,0], end = " ")
        if pd.isna(row.iloc[3]):
          try:
              # Process the value in the 2nd column (index 1)
              output = GPTextract(row.iloc[1],"gpt-3.5-turbo-1106")
              # Save the output in the 4th column (index 3)
              progress_df.at[index, df.columns[3]] = output

          except Exception as e:
              print(f"Error processing row {index}: {e}")
              continue

          if index % 1 == 0:  # Save every row
              progress_df.to_csv(progress_file, index=False)
              print(f"(Saved)")
              count+=1
        else:
          print("(Skip)")

        if count == 10:
            # show timestamp in minutes
            duration = datetime.datetime.now() - prev_time
            duration = duration.total_seconds() / 60

            #print timestamp every 10 rows
            print("Current:",datetime.datetime.now().strftime("%H:%M:%S"), "Duration (mins):", duration )
            prev_time = datetime.datetime.now()
            count = 0

    print("Processing complete. Output saved.")


In [None]:
process_dataframe("autoevolution.csv")

Resuming from index 36
Start processing at 13:13:29
36 : ASTON MARTIN V8 Vantage S (Skip)
37 : ASTON MARTIN Rapide E (Skip)
38 : ASTON MARTIN Valhalla (Skip)
39 : ASTON MARTIN V12 Speedster (Skip)
40 : ASTON MARTIN Vantage Roadster (Skip)
41 : AUDI Q8 (Skip)
42 : AUDI SQ8 (Skip)
43 : AUDI S8 (Skip)
44 : AUDI R8 (Skip)
45 : AUDI A3 Sportback (Skip)
46 : AUDI A3 Sedan (Skip)
47 : AUDI SQ2 (Skip)
48 : AUDI S3 Sportback (Skip)
49 : AUDI Q5 (Skip)
50 : AUDI RS 3 Sedan (Skip)
51 : AUDI S5 Cabriolet (Skip)
52 : AUDI S3 Sedan (Skip)
53 : AUDI SQ5 (Skip)
54 : AUDI S5 Sportback (Skip)
55 : AUDI Q2 (Skip)
56 : AUDI TT RS Coupe (Skip)
57 : AUDI Q7 (Skip)
58 : AUDI RS5 Sportback (Skip)
59 : AUDI A4 Avant (Skip)
60 : AUDI A5 Sportback (Skip)
61 : AUDI SQ7 (Skip)
62 : AUDI Allroad (Skip)
63 : AUDI S4 Avant (Skip)
64 : AUDI A4 (Skip)
65 : AUDI S4 (Skip)
66 : AUDI S6 Avant (Skip)
67 : AUDI TT RS Roadster (Skip)
68 : AUDI S6 (Skip)
69 : AUDI RS7 Sportback (Skip)
70 : AUDI RS 4 Avant (Skip)
71 : AUDI RS 