In [9]:
import pandas as pd

def create_lookup_tables(df, columns_to_encode):
    lookup_tables = {}
    for column in columns_to_encode:
        unique_values = df[column].unique()
        lookup_table = {value: i for i, value in enumerate(unique_values)}
        lookup_tables[column] = lookup_table
    return lookup_tables

def encode_with_lookup_tables(df, lookup_tables):
    encoded_df = df.copy()
    for column, lookup_table in lookup_tables.items():
        encoded_df[column] = encoded_df[column].map(lookup_table)
    return encoded_df

# Provide the file path of your CSV file
file_path = 'green_tripdata_{17}-{8}clean.csv'

# Specify the columns to encode
columns_to_encode = ['vendor', 'store_and_fwd_flag', 'rate_type', 'pu_location', 'do_location', 'payment_type', 'trip_type']

# Read the CSV file
df = pd.read_csv(file_path)

# Create lookup tables
lookup_tables = create_lookup_tables(df, columns_to_encode)

# Concatenate all lookup tables into a single dictionary
concatenated_lookup_table = {}
for column, lookup_table in lookup_tables.items():
    concatenated_lookup_table.update(lookup_table)

# Export concatenated lookup table to a CSV file
pd.DataFrame(concatenated_lookup_table.items(), columns=['Original_Value', 'Encoded_Value']).to_csv('concatenated_lookup_table.csv', index=False)

# Export individual lookup tables to CSV files
for column, lookup_table in lookup_tables.items():
    pd.DataFrame(lookup_table.items(), columns=['Original_Value', 'Encoded_Value']).to_csv(f'{column}_lookup_table.csv', index=False)

# Encode the DataFrame using the lookup tables
encoded_df = encode_with_lookup_tables(df, lookup_tables)

# You can save the encoded DataFrame to a new CSV file if needed
encoded_df.to_csv('green_tripdata_{17}-{8}encoded.csv', index=False)