# Code to analyse car make using ChatGPT

This code was setup in Google Colab to free-up local resouces and to allow for the code to run for longer periods of time.

Loading Data from Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/My Drive/AI/

/content/drive/My Drive/AI


Add auth key

In [3]:
authkey = ""

In [4]:
!pip install openai==0.28.1

Collecting openai==0.28.1
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/77.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m71.7/77.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires tiktoken, which is not installed.[0m[31m
[0mSuccessfully installed openai-0.28.1


# ChatGPT

For analysis



In [5]:
# https://help.openai.com/en/articles/6654000-best-practices-for-prompt-engineering-with-openai-api

import openai
import json
import time

# Set OpenAI API key
openai.api_key = authkey

def GPTextract(prompt, model="gpt-3.5-turbo"):
    ''' Extracts data from unstructured text based on the topic and output value type.
    prompt: string
    topic: string
    output_value_type: string
    topic_add: string
    model: string

    return: dictionary
    '''

    retries = 3
    while retries > 0:
        try:
            messages = [
                {"role": "system", "content":
                '''
                You are a powerful design analyst looking at car make's brand image.
                You are tasked to extract design keywords and present it as a comma separated text.

                Do not deviate from the output tags.

                Input: Back in the olden days, a tuning house that mainly made Fiats go faster. Nowadays, a tuning house that mainly makes Fiats go faster. Part of the Stellantis empire, Abarth today mostly serves up Fiat 500s tuned to various degrees of absurdity. If you view ‘cartoonish’ as a compliment rather than an insult, you’ll be right at home here.
                Abarth A former motorsport team and independent tuning company, Abarth is now owned by Fiat and responsible for the firm’s sportiest cars. In recent years, Fiat has even tried to establish Abarth as a standalone brand – in the same way that Citroën has split off its DS luxury division. However, the major difference is that DS cars look completely different from the Citroëns on which they’re based, whereas Abarths rely on bodykits and big wheels to mark them out. The Abarth 595, for example, is a beefed-up Fiat 500 hatchback, while the Abarth 124 Spider is a hotter version of its Fiat namesake

                Output: Cartoonish, Sporty,
                '''
                },
                {"role": "user", "content": prompt}
                ]
            response = openai.ChatCompletion.create(
                model=model, # gpt-3.5-turbo
                messages=messages, # prompt
                temperature=0, # temperature 0 means deterministic, 1 means random
            )
            text = response.choices[0].message["content"]
            text = text.replace("\n","")
            return text
        except Exception as e:
            if e:
                print(e)
                print('Timeout error, retrying...')
                retries -= 1
                time.sleep(5)
            else:
                raise e

    print('API is not responding, moving on...')
    return None


In [6]:
import pandas as pd
import datetime

def process_dataframe(progress_file):

    # Load progress if it exists
    progress_df = pd.read_csv(progress_file)
    # start_index = progress_df[df.columns[3]].last_valid_index() + 1
    start_index = 26
    print(f"Resuming from index {start_index}")

    start_time = datetime.datetime.now().strftime("%H:%M:%S")
    prev_time = datetime.datetime.now()
    print(f"Start processing at {start_time}")
    count = 0
    for index, row in progress_df.iloc[start_index:].iterrows():
        print(index,":", progress_df.iloc[index,0], end = " ")
        if pd.isna(row.iloc[2]):
          try:
              # Process the value in the 2nd column (index 1)
              if (row.iloc[1] != "") and not(pd.isna(row.iloc[1])):
                # Save the output in the 4th column (index 3)
                output = GPTextract(row.iloc[1],"gpt-4")
                # output = GPTextract(row.iloc[1],"gpt-3.5-turbo-1106")
                progress_df.at[index, progress_df.columns[2]] = output
                if index % 1 == 0:  # Save every row
                    progress_df.to_csv(progress_file, index=False)
                    print("(Saved)")
                    count+=1

          except Exception as e:
              print(f"Error processing row {index}: {e}")
              continue

        else:
          print("(Skip)")

        if count == 10:
            # show timestamp in minutes
            duration = datetime.datetime.now() - prev_time
            duration = duration.total_seconds() / 60

            #print timestamp every 10 rows
            print("Current:",datetime.datetime.now().strftime("%H:%M:%S"), "Duration (mins):", duration )
            prev_time = datetime.datetime.now()
            count = 0

    print("Processing complete. Output saved.")


In [7]:
process_dataframe("brand_info.csv")

Resuming from index 26
Start processing at 02:52:48
26 : FIAT (Saved)
27 : FISKER (Saved)
28 : FORD (Saved)
29 : GEELY (Saved)
30 : GENESIS (Saved)
31 : GMC (Saved)
32 : GORDON MURRAY Automotive (Saved)
33 : HINDUSTAN (Saved)
34 : HOLDEN (Saved)
35 : HONDA (Saved)
Current: 02:53:10 Duration (mins): 0.36933173333333336
36 : HYUNDAI (Saved)
37 : INEOS (Saved)
38 : INFINITI (Saved)
39 : ISUZU Rate limit reached for gpt-4 in organization org-oRwo7HlIEyRavqNVw1ukShCz on tokens per min (TPM): Limit 10000, Used 9012, Requested 1184. Please try again in 1.176s. Visit https://platform.openai.com/account/rate-limits to learn more.
Timeout error, retrying...
(Saved)
40 : JAGUAR Rate limit reached for gpt-4 in organization org-oRwo7HlIEyRavqNVw1ukShCz on tokens per min (TPM): Limit 10000, Used 9055, Requested 1427. Please try again in 2.892s. Visit https://platform.openai.com/account/rate-limits to learn more.
Timeout error, retrying...
(Saved)
41 : JEEP Rate limit reached for gpt-4 in organizatio