In [1]:
import pandas as pd
import os
from supabase import create_client, Client

In [29]:
# Get Supabase URL from environment variable
url: str = os.environ.get("NEXT_PUBLIC_SUPABASE_URL")
# Get Supabase anonymous key from environment variable 
key: str = os.environ.get("NEXT_PUBLIC_SUPABASE_ANON_KEY")

In [30]:
# Initialize Supabase client with URL and anonymous key
Supabase: Client = create_client(url, key)

In [31]:
# Query all records from year_make_model_table in Supabase and store the response 
response = (Supabase.table("year_make_model_table").select("*").execute())

In [32]:
# Store the queried data from Supabase response into a variable
year_make_model_data = response.data

In [33]:
# Convert the data to dataframe format
df = pd.DataFrame(year_make_model_data)

In [34]:
# Converting all values in column Model to string, as some of models are numbers, for instance BMWs 318, 328, 525
df = df.astype({'model':'string'})

In [35]:
# Confirm if the Dtype from the column Model changed to string.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               1000 non-null   int64 
 1   year             1000 non-null   object
 2   make             1000 non-null   object
 3   model            1000 non-null   string
 4   security         767 non-null    object
 5   parameter_reset  267 non-null    object
 6   created_at       1000 non-null   object
 7   updated_at       1000 non-null   object
dtypes: int64(1), object(6), string(1)
memory usage: 62.6+ KB


In [36]:
# Drop unnecessary columns
df_dropped_columns = df.drop(columns=['id', 'created_at', 'updated_at'])

In [37]:
# Function to create a df based on the make
def create_df_make(make, df):
    df_make_filtered = df[df['make'] == make]
    return df_make_filtered

In [38]:
# Create the df with Mercury data
df_mercury = create_df_make('Mercury', df_dropped_columns)

In [39]:
# Print the df info
df_mercury.info()

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 204 to 929
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   year             24 non-null     object
 1   make             24 non-null     object
 2   model            24 non-null     string
 3   security         24 non-null     object
 4   parameter_reset  24 non-null     object
dtypes: object(4), string(1)
memory usage: 1.1+ KB


In [40]:
# Show the data under column model
df_mercury['model'].value_counts() 

model
Grand Marquis           5
Mountaineer (4dr)       5
Sable (Duratec only)    4
Cougar                  4
Mystique (V-6 only)     3
Sable                   3
Name: count, dtype: Int64

### Note: there's no need to modify the model names as shown above

In [41]:
# Show the data under column security
mercury_pats_type_list = df_mercury['security'].value_counts()

### Security Column Data Analysis
After analyzing all data within the 'security' column, it was identified two combined pieces of information that need to be separated:

1. Security system name and theft module location
2. Multiple security systems defined by manufacturing date appearing in the same row

In [42]:
# Import the functions from Ford notebook as changes needed in the Mecury df are basically the same done on the Ford df 
%run df_ford.ipynb

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               1000 non-null   int64 
 1   year             1000 non-null   object
 2   make             1000 non-null   object
 3   model            1000 non-null   string
 4   security         767 non-null    object
 5   parameter_reset  267 non-null    object
 6   created_at       1000 non-null   object
 7   updated_at       1000 non-null   object
dtypes: int64(1), object(6), string(1)
memory usage: 62.6+ KB


  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)':

In [43]:
# Print the pats type list
mercury_pats_type_list

security
PATS Type B (Stand Alone PATS Module)                                                                                                                   11
PATS Type E (Powertrain Control Module)                                                                                                                 10
PATS Type A (Stand Alone PATS Module)                                                                                                                    2
Built February 1st 1998 or Earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd 1998 or Later: PATS Type E (Powertrain Control Module)     1
Name: count, dtype: int64

In [44]:
# Call the fuction to split the pats type b and module location info 
df_pats_b = split_security_info('PATS Type B (Stand Alone PATS Module)', df_mercury)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [45]:
# Call the fuction to split the pats type e and module location info 
df_pats_e = split_security_info('PATS Type E (Powertrain Control Module)', df_mercury)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [46]:
# Call the fuction to split the pats type a and module location info
df_pats_a = split_security_info('PATS Type A (Stand Alone PATS Module)', df_mercury)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_security_type[['pats_type', 'pats_module_location']] = df_make_security_type['security'].str.split('(', n=1, expand=True).replace({'\)': ''}, regex=True)


In [47]:
# Call the function to create a df containing the string 'Built July' 
df_mercury_built_february = separate_pats_type_pats_ecu_location('Built February', df_mercury)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_make_filtered_built_date['security'] = df_make_filtered_built_date['security'].str.split('\n')


In [48]:
# Regex to get the date information from the 'pats type' column
df_mercury_built_february_info = df_mercury_built_february['security'].str.extract(r'^(.*?):\s*P')

In [49]:
# Convert the df to list
mercury_built_february_info_list = df_mercury_built_february_info[0].to_list()

In [50]:
# Print the data list to see the result
mercury_built_february_info_list

['Built February 1st 1998 or Earlier', 'Built February 2nd 1998 or Later']

In [None]:
# Call the function to update the models with the built date
df_mercury_built_february_updated_models = update_models_with_built_date_info(df_mercury_built_february)

In [None]:
# Use the regex to keep all data after PATS string and remove all data before PATS string.
df_mercury_built_february_updated_models['pats_type'] = df_mercury_built_february_updated_models['pats_type'].str.extract(r'(?:.*?:)?\s*(PATS.*)')

In [66]:
# Drop security column
df_mercury_built_february_final_version = df_mercury_built_february_updated_models.drop(columns=['security'])

In [67]:
# Print the results
df_mercury_built_february_final_version

Unnamed: 0,year,make,model,parameter_reset,pats_type,pats_module_location
0,1998,Mercury,Mystique (V-6 only) / Built July 23rd 2000 or ...,Parameter Reset Not Required,PATS Type A,Stand Alone PATS Module
1,1998,Mercury,Mystique (V-6 only) / Built July 24th 2000 or ...,Parameter Reset Not Required,PATS Type E,Powertrain Control Module


In [68]:
# Concatenate all dfs after modifications
df_mercury_concatenated = pd.concat([df_pats_a, df_pats_b, df_pats_e, df_mercury_built_february_final_version])

In [69]:
# Sort the df based on the year
df_mercury_final_version = df_mercury_concatenated.sort_values(['year'])

In [None]:
# Export the Mercury df to csv file
df_mercury_final_version.to_csv('C:\Language_Projects\Language_Projects\Python\Flagship_1\Immo_Assistant.app\dataframe_csv_files\df_mercury.csv', index=False)