In [58]:
import pandas as pd
import os
from supabase import create_client, Client

In [59]:
# Get Supabase URL from environment variable
url: str = os.environ.get("NEXT_PUBLIC_SUPABASE_URL")
# Get Supabase anonymous key from environment variable 
key: str = os.environ.get("NEXT_PUBLIC_SUPABASE_ANON_KEY")

In [60]:
# Initialize Supabase client with URL and anonymous key
Supabase: Client = create_client(url, key)

In [4]:
# Query all records from year_make_model_table in Supabase and store the response 
response = (Supabase.table("year_make_model_table").select("*").execute())

In [5]:
# Store the queried data from Supabase response into a variable
year_make_model_data = response.data

In [6]:
# Convert the data to dataframe format
df = pd.DataFrame(year_make_model_data)

In [7]:
# Converting all values in column Model to string, as some of models are numbers, for instance BMWs 318, 328, 525
df = df.astype({'model':'string'})

In [8]:
# Confirm if the Dtype from the column Model changed to string.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               1000 non-null   int64 
 1   year             1000 non-null   object
 2   make             1000 non-null   object
 3   model            1000 non-null   string
 4   security         767 non-null    object
 5   parameter_reset  267 non-null    object
 6   created_at       1000 non-null   object
 7   updated_at       1000 non-null   object
dtypes: int64(1), object(6), string(1)
memory usage: 62.6+ KB


In [9]:
# Drop unnecessary columns
df_dropped_columns = df.drop(columns=['id', 'created_at', 'updated_at'])

In [10]:
# Check which models the df has and print only the unique values
df_dropped_columns['make'].value_counts()

make
BMW           98
Toyota        80
Ford          65
Acura         65
Honda         63
Dodge         57
Chrysler      48
Chevrolet     47
Mazda         45
Lexus         41
Oldsmobile    39
Nissan        38
GMC           36
Cadillac      34
Buick         28
Pontiac       27
Mercury       24
Audi          23
Infiniti      21
Lincoln       21
Jeep          20
Jaguar        19
Mitsubishi    19
Volkswagen    18
Plymouth      15
Saturn         7
Land Rover     1
Mini           1
Name: count, dtype: int64

In [11]:
# Function to create a df based on the make
def create_df_make(make, df):
    df_make_filtered = df[df['make'] == make]
    return df_make_filtered

In [12]:
# Filter only Ford models and create a df.
df_ford_filtered = create_df_make('Ford', df_dropped_columns)

In [13]:
# Print the head of the df
df_ford_filtered.head(5)

Unnamed: 0,year,make,model,security,parameter_reset
190,1996,Ford,Mustang,PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required
191,1996,Ford,Taurus (Duratec & SHO only),PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required
247,1997,Ford,Expedition,PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required
248,1997,Ford,Mustang,PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required
249,1997,Ford,Taurus (Duratec & SHO only),PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required


In [14]:
# Count all models under the column 'model'
df_ford_filtered['model'].value_counts()

model
Mustang                        7
Expedition                     6
Crown Victoria                 5
Explorer (4dr)                 5
Explorer Sport (2dr)           5
F-150 Heritage                 4
Taurus (Duratec & SHO only)    4
Windstar                       4
Focus                          3
Taurus                         3
Excursion                      3
Contour (V6-only)              3
Explorer Sport Trac            2
Ranger (2.3L, 3.0L, & 4.0L)    2
F-150 Harley-Davidson          2
Ranger (3.0L & 4.0L only)      2
Escape                         2
F-250 (under 8500# GVW)        2
Thunderbird                    1
Name: count, dtype: Int64

### Note: there's no need to modify the model names as shown above

In [15]:
# Count and check the value under column "security"
pats_type_list = df_ford_filtered['security'].value_counts()

In [16]:
# Print the pats type list
pats_type_list

security
PATS Type B (Stand Alone PATS Module)                                                                                                             20
PATS Type C (Instrument Cluster)                                                                                                                  19
PATS Type E (Powertrain Control Module)                                                                                                           17
PATS Type A (Stand Alone PATS Module)                                                                                                              6
Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module)     2
Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module)         1
Name: count, dtype: int64

### Security Column Data Analysis
After analyzing all data within the 'security' column, it was identified two combined pieces of information that need to be separated:

1. Security system name and theft module location
2. Multiple security systems defined by manufacturing date appearing in the same row

In [17]:
# Function to filter the Ford df based on the security type column
def filter_df_security_column(pats_type, df_filtered):
    df_ford_security_type = df_filtered[df_filtered['security'] == pats_type]
    return df_ford_security_type

In [18]:
# Function to create a df based on the pats type
def split_security_info(pats_type, df_filtered):
    
    # Call the function to create the df based on the pats type
    df_make_security_type = filter_df_security_column(pats_type, df_filtered)

    # Split the data under column 'security' between 2 columns using the char '(' the delimiter
    # Furthermore, use the method replace and regex to remove the char ')'
    df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)

    # Drop the column 'security'
    df_pats_type_dropped_column = df_make_security_type.drop(columns=['security'])

    # Return the df with the modifications done
    return df_pats_type_dropped_column

  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [19]:
# Create a df containing only PATS Type B vehicles by calling split_security_info() function
# This splits the security info into separate columns for PATS type and module location
# Returns a new df with the security column split and filtered for PATS Type B vehicles
df_pats_type_b = split_security_info('PATS Type B (Stand Alone PATS Module)', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [20]:
# Print the head to see the changes
df_pats_type_b.head(2)

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
326,1998,Ford,Crown Victoria,Parameter Reset Required,PATS Type B,Stand Alone PATS Module
328,1998,Ford,Explorer (4dr),Parameter Reset Required,PATS Type B,Stand Alone PATS Module


In [21]:
# Print the pats type list to see what is the next
pats_type_list

security
PATS Type B (Stand Alone PATS Module)                                                                                                             20
PATS Type C (Instrument Cluster)                                                                                                                  19
PATS Type E (Powertrain Control Module)                                                                                                           17
PATS Type A (Stand Alone PATS Module)                                                                                                              6
Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module)     2
Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module)         1
Name: count, dtype: int64

In [22]:
# Call the function to create the df with pats type c
df_pats_type_c = split_security_info('PATS Type C (Instrument Cluster)', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [23]:
# Print the head to see the changes
df_pats_type_c.head(2)

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
431,1999,Ford,Expedition,Parameter Reset Required,PATS Type C,Instrument Cluster
434,1999,Ford,F-150 Heritage,Parameter Reset Required,PATS Type C,Instrument Cluster


In [24]:
# Call the function to create the df with pats type e
df_pats_type_e = split_security_info('PATS Type E (Powertrain Control Module)', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [25]:
# Print the head to see the changes
df_pats_type_e.head(2)

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
429,1999,Ford,Contour (V6-only),Parameter Reset Not Required,PATS Type E,Powertrain Control Module
548,2000,Ford,Contour (V6-only),Parameter Reset Not Required,PATS Type E,Powertrain Control Module


In [26]:
# Call the function
df_pats_type_a = split_security_info('PATS Type A (Stand Alone PATS Module)', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [27]:
# Print the head to see the changes
df_pats_type_a.head(2)

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
190,1996,Ford,Mustang,Parameter Reset Not Required,PATS Type A,Stand Alone PATS Module
191,1996,Ford,Taurus (Duratec & SHO only),Parameter Reset Not Required,PATS Type A,Stand Alone PATS Module


In [28]:
# Print the pats type again
pats_type_list

security
PATS Type B (Stand Alone PATS Module)                                                                                                             20
PATS Type C (Instrument Cluster)                                                                                                                  19
PATS Type E (Powertrain Control Module)                                                                                                           17
PATS Type A (Stand Alone PATS Module)                                                                                                              6
Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module)     2
Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module)         1
Name: count, dtype: int64

In [29]:
# Function to create a df after separating the columns with pats_type and pats module location based on a especific string
def separate_pats_type_pats_ecu_location(built_date, df_make_filtered):
    # Check mark with true the rows under column 'security' that contains the string 'Built July 23rd 2000'
    df_make_filtered['Built Date'] = df_make_filtered['security'].str.contains(built_date, case=False)

    # Create a df only with the string 'Built July' under the column 'security' 
    df_make_filtered_built_date = df_make_filtered[df_make_filtered['Built Date'] == True]

    # Separate the information under column 'security' using the char '\n' as the demiliter and treat different configuration based on the built date
    df_make_filtered_built_date['security'] = df_make_filtered_built_date['security'].str.split('\n')

    # Explode the column 'security' to create separate rows for each security configuration
    # This splits nested lists into individual rows while preserving the rest of the row's values
    # Example: if a row has 2 security configs, it will create 2 separate rows with the same year/make/model
    df_make_built_date_exploded_lines = df_make_filtered_built_date.explode('security').reset_index(drop=True).drop(columns=['Built Date'])

    # Split the data under column 'security' between 2 columns using the char '(' as delimiter.
    # Furthermore, use the method replace and regex to remove the char ')'
    df_make_built_date_exploded_lines[['pats_type', 'pats_module_location']] = df_make_built_date_exploded_lines['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)

    return df_make_built_date_exploded_lines

  df_make_built_date_exploded_lines[['pats_type', 'pats_module_location']] = df_make_built_date_exploded_lines['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [30]:
# Call the function to create a df containing the string 'Built July' 
df_ford_built_july_exploded_lines = separate_pats_type_pats_ecu_location('Built July', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_filtered['Built Date'] = df_make_filtered['security'].str.contains(built_date, case=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_filtered_built_date['security'] = df_make_filtered_built_date['security'].str.split('\n')


In [31]:
# Regex to get the date information on the 'pats type' column
ford_built_july_info = df_ford_built_july_exploded_lines['pats_type'].str.extract(r'^(.*?):\s*P')

In [32]:
# Convert the data to a list
ford_built_july_info_list = ford_built_july_info[0].to_list()

In [33]:
# Print the data list to see the result
ford_built_july_info_list

['Built July 23rd 2000 or Earlier',
 'Built July 24th 2000 or Later',
 'Built July 23rd 2000 or Earlier',
 'Built July 24th 2000 or Later']

In [34]:
def update_models_with_built_date_info(df_make_built_date_exploded_lines):
    # Create a list with the models updated with their respective built date from column 'pats_type'
    updated_models = []

    # Loop to concatenate the built date information under column 'model' 
    for model in df_make_built_date_exploded_lines['model'].unique():
        # Loop over the list with the built date information and get unique values
        for data in set(ford_built_july_info_list):
            # Store the updated models under a variable
            updated_model = f'{model} / {data}'
            # Append the updated models to the list
            updated_models.append(updated_model)

    # Update the column 'model' with the new models.
    df_make_built_date_exploded_lines['model'] = updated_models

    return df_make_built_date_exploded_lines

In [36]:
# Call the function to update the models with the built date
df_ford_built_july_updated_models = update_models_with_built_date_info(df_ford_built_july_exploded_lines)

In [37]:
# Print the head to see the results
df_ford_built_july_updated_models.head(2)

Unnamed: 0,year,make,model,security,parameter_reset,pats_type,pats_module_location
0,2001,Ford,Explorer Sport (2dr) / Built July 23rd 2000 or...,Built July 23rd 2000 or Earlier: PATS Type B (...,Parameter Reset Required,Built July 23rd 2000 or Earlier: PATS Type B,Stand Alone PATS Module
1,2001,Ford,Explorer Sport (2dr) / Built July 24th 2000 or...,Built July 24th 2000 or Later: PATS Type E (Po...,Parameter Reset Required,Built July 24th 2000 or Later: PATS Type E,Powertrain Control Module


In [38]:
# Remove the built date information and keep only the pats type info on the column pats_type
df_ford_built_july_updated_models['pats_type'] = df_ford_built_july_updated_models['pats_type'].str.extract(r'(?:.*?:)?\s*(PATS.*)')

In [39]:
# Drop the security column
df_ford_built_july_final_version = df_ford_built_july_updated_models.drop(columns=['security'])

In [40]:
# Print the df to see the final results.
df_ford_built_july_final_version

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
0,2001,Ford,Explorer Sport (2dr) / Built July 23rd 2000 or...,Parameter Reset Required,PATS Type B,Stand Alone PATS Module
1,2001,Ford,Explorer Sport (2dr) / Built July 24th 2000 or...,Parameter Reset Required,PATS Type E,Powertrain Control Module
2,2001,Ford,Explorer Sport Trac / Built July 23rd 2000 or ...,Parameter Reset Required,PATS Type B,Stand Alone PATS Module
3,2001,Ford,Explorer Sport Trac / Built July 24th 2000 or ...,Parameter Reset Required,PATS Type E,Powertrain Control Module


In [41]:
# Call the function to create a df containing the string 'Built February'
df_ford_filtered_built_february = separate_pats_type_pats_ecu_location('Built February', df_ford_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_filtered['Built Date'] = df_make_filtered['security'].str.contains(built_date, case=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_filtered_built_date['security'] = df_make_filtered_built_date['security'].str.split('\n')


In [42]:
# Print the df to see the results
df_ford_filtered_built_february

Unnamed: 0,year,make,model,security,parameter_reset,pats_type,pats_module_location
0,1998,Ford,Contour (V6-only),Built February 1st or earlier: PATS Type A (St...,Parameter Reset Not Required,Built February 1st or earlier: PATS Type A,Stand Alone PATS Module
1,1998,Ford,Contour (V6-only),Built February 2nd or later: PATS Type E (Powe...,Parameter Reset Not Required,Built February 2nd or later: PATS Type E,Powertrain Control Module


In [43]:
# Extract only the built info
ford_built_february_info = df_ford_filtered_built_february['security'].str.extract(r'^(.*?):\s*P')

In [44]:
# Convert the built date in a list
ford_built_february_info_list = ford_built_february_info[0].to_list()

In [45]:
# Print the list
ford_built_february_info_list

['Built February 1st or earlier', 'Built February 2nd or later']

In [46]:
# Call the function to update the models
df_ford_filtered_built_february_updated_models = update_models_with_built_date_info(df_ford_filtered_built_february)

In [47]:
# Print the df to see the results
df_ford_filtered_built_february_updated_models

Unnamed: 0,year,make,model,security,parameter_reset,pats_type,pats_module_location
0,1998,Ford,Contour (V6-only) / Built July 23rd 2000 or Ea...,Built February 1st or earlier: PATS Type A (St...,Parameter Reset Not Required,Built February 1st or earlier: PATS Type A,Stand Alone PATS Module
1,1998,Ford,Contour (V6-only) / Built July 24th 2000 or Later,Built February 2nd or later: PATS Type E (Powe...,Parameter Reset Not Required,Built February 2nd or later: PATS Type E,Powertrain Control Module


In [48]:
# Remove the built date information and keep only the pats type info on the column pats_type
df_ford_filtered_built_february_updated_models['pats_type'] = df_ford_filtered_built_february_updated_models['pats_type'].str.extract(r'(?:.*?:)?\s*(PATS.*)') 

In [49]:
# Drop column 'security'
df_ford_filtered_built_february_final_version = df_ford_filtered_built_february_updated_models.drop(columns=['security'])

In [50]:
# Print the results
df_ford_filtered_built_february_final_version 

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
0,1998,Ford,Contour (V6-only) / Built July 23rd 2000 or Ea...,Parameter Reset Not Required,PATS Type A,Stand Alone PATS Module
1,1998,Ford,Contour (V6-only) / Built July 24th 2000 or Later,Parameter Reset Not Required,PATS Type E,Powertrain Control Module


In [51]:
# Concatenate all ford dfs after changes
df_ford_filtered_concatenated = pd.concat([df_pats_type_b, df_pats_type_c, df_pats_type_e, df_pats_type_a, df_ford_built_july_final_version, df_ford_filtered_built_february_final_version])

In [52]:
# Sort the df based on year and drop the index column
df_ford_final_version = df_ford_filtered_concatenated.sort_values(['year'])

In [53]:
df_ford_final_version['model'].value_counts()

model
Mustang                                                   7
Expedition                                                6
Crown Victoria                                            5
Explorer (4dr)                                            5
Taurus (Duratec & SHO only)                               4
Explorer Sport (2dr)                                      4
Windstar                                                  4
F-150 Heritage                                            4
Taurus                                                    3
Excursion                                                 3
Focus                                                     3
F-150 Harley-Davidson                                     2
Escape                                                    2
Ranger (2.3L, 3.0L, & 4.0L)                               2
Contour (V6-only)                                         2
Ranger (3.0L & 4.0L only)                                 2
F-250 (under 8500# GVW)           

In [62]:
data = df_ford_final_version.iloc[0]

In [69]:
type(data)

pandas.core.series.Series

In [65]:
data_dict = dict(data)

In [68]:
for key, value in data_dict.items():
    print(f'{key}: {value}')

year: 1996
make: Ford
model: Taurus (Duratec & SHO only)
parameter_reset: Parameter Reset Not Required
pats_type: PATS Type A 
pats_module_location: Stand Alone PATS Module


In [86]:
# Current code shows download from GM folder
response = Supabase.storage.from_('key-relearn-procedures').list()

In [88]:
for file in response:
    print(f'{file}')

In [None]:
# Export the Ford df to csv file
df_ford_final_version.to_csv('C:\Language_Projects\Language_Projects\Python\Flagship_1\Immo_Assistant.app\dataframe_csv_files\df_ford.csv', index=False)