## Import

In [1]:
import pandas as pd
import numpy as np
import time
import psutil
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression

## Metric Function

In [2]:
process = psutil.Process()

def metrics_start():
    return {"time": time.time(), "mem": process.memory_info().rss, "cpu": psutil.cpu_percent(interval=None)}

def metrics_end(start):
    end_time = time.time()
    end_mem = process.memory_info().rss
    end_cpu = psutil.cpu_percent(interval=None)
    return {"time": end_time - start["time"], "mem_used_MB": (end_mem - start["mem"]) / (1024**2), "cpu_percent": end_cpu}


## Dataset

In [3]:
df = pd.read_csv("./datasets/tesla_deliveries_dataset_2015_2025.csv")
print(df.head())
print(df.info())

   Year  Month         Region    Model  Estimated_Deliveries  \
0  2023      5         Europe  Model S                 17646   
1  2015      2           Asia  Model X                  3797   
2  2019      1  North America  Model X                  8411   
3  2021      2  North America  Model 3                  6555   
4  2016     12    Middle East  Model Y                 12374   

   Production_Units  Avg_Price_USD  Battery_Capacity_kWh  Range_km  \
0             17922       92874.27                   120       704   
1              4164       62205.65                    75       438   
2              9189      117887.32                    82       480   
3              7311       89294.91                   120       712   
4             13537      114846.78                   120       661   

   CO2_Saved_tons           Source_Type  Charging_Stations  
0         1863.42  Interpolated (Month)              12207  
1          249.46    Official (Quarter)               7640  
2          

## Analytics

In [4]:
analytics_results = []

In [5]:
# 1. Time-Series: Year-Month + 12M Rolling Avg
start = metrics_start()
df['Date'] = pd.to_datetime(df['Year'].astype(str) + '-' + df['Month'].astype(str) + '-01')
ts = df.groupby(['Model', 'Date'])['Estimated_Deliveries'].sum().reset_index()
ts = ts.sort_values(['Model', 'Date'])  # Sort by Model then Date
ts['Rolling_12M_Avg'] = ts.groupby('Model')['Estimated_Deliveries'].transform(lambda x: x.rolling(12, min_periods=1).mean())
ts_metrics = metrics_end(start)
analytics_results.append(("Time-Series", ts_metrics))
print("Metrics:", ts_metrics)
ts.head(30)

Metrics: {'time': 0.0064122676849365234, 'mem_used_MB': 1.0, 'cpu_percent': 11.1}


Unnamed: 0,Model,Date,Estimated_Deliveries,Rolling_12M_Avg
0,Cybertruck,2015-01-01,28656,28656.0
1,Cybertruck,2015-02-01,34350,31503.0
2,Cybertruck,2015-03-01,30062,31022.666667
3,Cybertruck,2015-04-01,37379,32611.75
4,Cybertruck,2015-05-01,37488,33587.0
5,Cybertruck,2015-06-01,44037,35328.666667
6,Cybertruck,2015-07-01,28908,34411.428571
7,Cybertruck,2015-08-01,38346,34903.25
8,Cybertruck,2015-09-01,44765,35999.0
9,Cybertruck,2015-10-01,30413,35440.4


In [6]:
# 2. Z-Score Anomaly Detection
print("=== 2. Z-Score Anomaly Detection ===")
start = metrics_start()
mean_del = df['Estimated_Deliveries'].mean()
std_del = df['Estimated_Deliveries'].std()
outliers = df[(df['Estimated_Deliveries'] - mean_del).abs()/std_del > 3]
outliers_metrics = metrics_end(start)
analytics_results.append(("Z-Score Anomaly", outliers_metrics))
print("Metrics:", outliers_metrics)
outliers.head(10)

=== 2. Z-Score Anomaly Detection ===
Metrics: {'time': 0.0022361278533935547, 'mem_used_MB': 0.0, 'cpu_percent': 0.0}


Unnamed: 0,Year,Month,Region,Model,Estimated_Deliveries,Production_Units,Avg_Price_USD,Battery_Capacity_kWh,Range_km,CO2_Saved_tons,Source_Type,Charging_Stations,Date
281,2015,3,Asia,Model 3,25410,28802,73659.66,60,341,1299.72,Official (Quarter),4127,2015-03-01
725,2015,6,Middle East,Model Y,22315,22673,81318.87,82,472,1579.9,Interpolated (Month),5909,2015-06-01
1341,2017,12,North America,Model 3,25704,28939,61247.57,120,661,2548.55,Official (Quarter),8388,2017-12-01
2490,2022,5,Europe,Model Y,21772,23530,89555.17,60,358,1169.16,Official (Quarter),13826,2022-05-01


In [7]:
# 3. Model-Level Z-Score
print("=== 3. Model Z-Score ===")
start = metrics_start()
df_model = df.copy()
df_model['mean_del'] = df_model.groupby('Model')['Estimated_Deliveries'].transform('mean')
df_model['std_del'] = df_model.groupby('Model')['Estimated_Deliveries'].transform('std')
df_model['Zscore'] = (df_model['Estimated_Deliveries'] - df_model['mean_del']) / df_model['std_del']
df_model_z = df_model[df_model['Zscore'].abs() > 3]
model_z_metrics = metrics_end(start)
analytics_results.append(("Model Z-Score", model_z_metrics))
print("Metrics:", model_z_metrics)
df_model_z.head(10)

=== 3. Model Z-Score ===
Metrics: {'time': 0.003569364547729492, 'mem_used_MB': 0.125, 'cpu_percent': 20.0}


Unnamed: 0,Year,Month,Region,Model,Estimated_Deliveries,Production_Units,Avg_Price_USD,Battery_Capacity_kWh,Range_km,CO2_Saved_tons,Source_Type,Charging_Stations,Date,mean_del,std_del,Zscore
281,2015,3,Asia,Model 3,25410,28802,73659.66,60,341,1299.72,Official (Quarter),4127,2015-03-01,10079.392045,3880.118224,3.951067
725,2015,6,Middle East,Model Y,22315,22673,81318.87,82,472,1579.9,Interpolated (Month),5909,2015-06-01,9896.371212,4018.76186,3.090163
729,2018,4,Europe,Model X,21473,24427,78461.34,60,334,1075.8,Interpolated (Month),11576,2018-04-01,9767.5,3786.120933,3.091687
1341,2017,12,North America,Model 3,25704,28939,61247.57,120,661,2548.55,Official (Quarter),8388,2017-12-01,10079.392045,3880.118224,4.026838


In [8]:
# 4. Supply Chain Stress Index
print("=== 4. Supply Chain Stress Index ===")
start = metrics_start()
supply_chain = df.groupby(['Year', 'Model']).agg({
    'Production_Units': 'sum',
    'Estimated_Deliveries': 'sum'
}).rename(columns={'Production_Units': 'Prod', 'Estimated_Deliveries': 'Del'}).reset_index()
supply_chain['Stress_Index'] = (supply_chain['Prod'] - supply_chain['Del']) / supply_chain['Del']
supply_chain = supply_chain.sort_values(['Year', 'Model']).reset_index(drop=True)
supply_chain_metrics = metrics_end(start)
analytics_results.append(("Supply Chain Stress", supply_chain_metrics))
print("Metrics:", supply_chain_metrics)
supply_chain.head(20)

=== 4. Supply Chain Stress Index ===
Metrics: {'time': 0.00531458854675293, 'mem_used_MB': 0.0, 'cpu_percent': 0.0}


Unnamed: 0,Year,Model,Prod,Del,Stress_Index
0,2015,Cybertruck,461503,430202,0.072759
1,2015,Model 3,566597,523716,0.081878
2,2015,Model S,544677,510293,0.067381
3,2015,Model X,466300,437366,0.066155
4,2015,Model Y,506618,473690,0.069514
5,2016,Cybertruck,511311,475256,0.075864
6,2016,Model 3,504713,467772,0.078972
7,2016,Model S,563981,523248,0.077846
8,2016,Model X,524056,485308,0.079842
9,2016,Model Y,512939,482497,0.063093


In [9]:
# 5. CO2 Savings Intensity
print("=== 5. CO2 Savings Intensity ===")
start = metrics_start()
env_index = df.groupby('Model').agg(
    CO2_per_Car = ('CO2_Saved_tons','sum'),
    Total_Del = ('Estimated_Deliveries','sum'),
    Avg_Range = ('Range_km','mean')
)
env_index['CO2_per_Car'] = env_index['CO2_per_Car'] / env_index['Total_Del']
env_index = env_index.sort_values('CO2_per_Car', ascending=False)
env_index_metrics = metrics_end(start)
analytics_results.append(("CO2 Savings Intensity", env_index_metrics))
print("Metrics:", env_index_metrics)
env_index.head(20)

=== 5. CO2 Savings Intensity ===
Metrics: {'time': 0.005942583084106445, 'mem_used_MB': 0.125, 'cpu_percent': 12.5}


Unnamed: 0_level_0,CO2_per_Car,Total_Del,Avg_Range
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Model 3,0.075691,5321919,503.221591
Model X,0.075227,5157240,500.776515
Model S,0.074841,5380385,501.223485
Cybertruck,0.074809,5109779,499.827652
Model Y,0.074379,5225284,496.238636


In [10]:
# 6. Linear Regression Forecast
print("=== 6. Linear Regression Forecast ===")
from sklearn.linear_model import LinearRegression

start = metrics_start()
yearly = df.groupby('Year')['Estimated_Deliveries'].sum().reset_index()
lr = LinearRegression()
lr.fit(yearly[['Year']], yearly['Estimated_Deliveries'])
future = pd.DataFrame({'Year':[2026,2027]})
future['Predicted_Deliveries'] = lr.predict(future[['Year']])
lr_metrics = metrics_end(start)
analytics_results.append(("Linear Regression", lr_metrics))
print("Metrics:", lr_metrics)
future

=== 6. Linear Regression Forecast ===
Metrics: {'time': 0.004916667938232422, 'mem_used_MB': 0.625, 'cpu_percent': 0.0}


Unnamed: 0,Year,Predicted_Deliveries
0,2026,2376197.0
1,2027,2375342.0


In [11]:
# 7. Price Elasticity Correlation
print("=== 7. Price Elasticity Correlation ===")
start = metrics_start()
elasticity = df['Avg_Price_USD'].corr(df['Estimated_Deliveries'])
elasticity_metrics = metrics_end(start)
print("Price–Demand Correlation:", elasticity)
print("Metrics:", elasticity_metrics)
analytics_results.append(("Price Elasticity Correlation", elasticity_metrics))

=== 7. Price Elasticity Correlation ===
Price–Demand Correlation: -0.02754583970952005
Metrics: {'time': 0.00208282470703125, 'mem_used_MB': 0.125, 'cpu_percent': 66.7}


In [12]:
# 8. K-Means Clustering
print("=== 8. K-Means Clustering ===")
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

start = metrics_start()
features = df[['Estimated_Deliveries','Battery_Capacity_kWh','Range_km','Avg_Price_USD']].copy()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)
kmeans = KMeans(n_clusters=4, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

cluster_counts = df['Cluster'].value_counts().sort_index().reset_index()
cluster_counts.columns = ['Cluster', 'Count']
kmeans_metrics = metrics_end(start)
analytics_results.append(("K-Means Clustering", kmeans_metrics))
print("Metrics:", kmeans_metrics)
cluster_counts

=== 8. K-Means Clustering ===


  super()._check_params_vs_input(X, default_n_init=10)


Metrics: {'time': 0.29349613189697266, 'mem_used_MB': 1.75, 'cpu_percent': 96.2}


Unnamed: 0,Cluster,Count
0,0,792
1,1,514
2,2,820
3,3,514


In [13]:
# 9. Regional Performance Index
print("=== 9. Regional Performance Index ===")
start = metrics_start()
region_perf = df.groupby('Region').agg({'Estimated_Deliveries':'sum','CO2_Saved_tons':'sum','Range_km':'mean'})
region_perf['Score'] = region_perf['Estimated_Deliveries']*0.5 + region_perf['CO2_Saved_tons']*0.3 + region_perf['Range_km']*0.2
region_perf = region_perf.sort_values('Score', ascending=False)
region_metrics = metrics_end(start)
analytics_results.append(("Regional Performance", region_metrics))
print("Metrics:", region_metrics)
region_perf.head(20)

=== 9. Regional Performance Index ===
Metrics: {'time': 0.005266666412353516, 'mem_used_MB': 0.0, 'cpu_percent': 100.0}


Unnamed: 0_level_0,Estimated_Deliveries,CO2_Saved_tons,Range_km,Score
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Middle East,6698045,503797.14,502.543939,3500262.0
Asia,6539935,484357.26,495.498485,3415374.0
Europe,6494035,485135.79,499.662121,3392658.0
North America,6462592,491073.06,503.325758,3378719.0


In [14]:
# 10. Model Lifecycle Analysis
print("=== 10. Model Lifecycle Analysis ===")
start = metrics_start()
df['Year'] = df['Year'].astype(int)
lifecycle = df.sort_values(['Model', 'Year']).copy().reset_index(drop=True)
lifecycle['YoY_Del_Growth'] = lifecycle.groupby('Model')['Estimated_Deliveries'].pct_change()
lifecycle['YoY_Prod_Growth'] = lifecycle.groupby('Model')['Production_Units'].pct_change()
lifecycle['YoY_Del_Growth'] = lifecycle['YoY_Del_Growth'].round(4)
lifecycle['YoY_Prod_Growth'] = lifecycle['YoY_Prod_Growth'].round(4)
lifecycle_metrics = metrics_end(start)
analytics_results.append(("Model Lifecycle", lifecycle_metrics))
print("Metrics:", lifecycle_metrics)
lifecycle.head(20)

=== 10. Model Lifecycle Analysis ===
Metrics: {'time': 0.006338834762573242, 'mem_used_MB': 0.625, 'cpu_percent': 40.0}


Unnamed: 0,Year,Month,Region,Model,Estimated_Deliveries,Production_Units,Avg_Price_USD,Battery_Capacity_kWh,Range_km,CO2_Saved_tons,Source_Type,Charging_Stations,Date,Cluster,YoY_Del_Growth,YoY_Prod_Growth
0,2015,3,North America,Cybertruck,5716,6522,116775.0,60,357,306.09,Interpolated (Month),7441,2015-03-01,2,,
1,2015,7,North America,Cybertruck,4405,4472,60700.14,60,337,222.67,Official (Quarter),4928,2015-07-01,0,-0.2294,-0.3143
2,2015,10,Asia,Cybertruck,5675,6461,117974.08,120,716,609.49,Estimated (Region),8690,2015-10-01,3,0.2883,0.4448
3,2015,12,Middle East,Cybertruck,17203,17897,105829.14,100,565,1457.95,Interpolated (Month),8462,2015-12-01,3,2.0314,1.77
4,2015,7,Asia,Cybertruck,113,118,115067.04,120,709,12.02,Interpolated (Month),14580,2015-07-01,3,-0.9934,-0.9934
5,2015,6,North America,Cybertruck,12581,13909,64907.49,100,556,1049.26,Estimated (Region),3174,2015-06-01,1,110.3363,116.8729
6,2015,6,Europe,Cybertruck,9777,10705,66810.19,82,454,665.81,Estimated (Region),4546,2015-06-01,0,-0.2229,-0.2304
7,2015,9,Middle East,Cybertruck,6086,6274,50769.71,120,668,609.82,Official (Quarter),13800,2015-09-01,1,-0.3775,-0.4139
8,2015,2,Europe,Cybertruck,5325,5514,100975.14,82,466,372.22,Interpolated (Month),10587,2015-02-01,2,-0.125,-0.1211
9,2015,8,Europe,Cybertruck,5021,5368,116309.83,75,418,314.82,Official (Quarter),10034,2015-08-01,2,-0.0571,-0.0265


## Metrics

In [15]:
summary_df = pd.DataFrame([{"analytics": a[0], **a[1]} for a in analytics_results])
summary_df.to_csv("./results/pandas_summary.csv", index=False)
print("\n=== Pandas Analytics Summary ===")
summary_df


=== Pandas Analytics Summary ===


Unnamed: 0,analytics,time,mem_used_MB,cpu_percent
0,Time-Series,0.006412,1.0,11.1
1,Z-Score Anomaly,0.002236,0.0,0.0
2,Model Z-Score,0.003569,0.125,20.0
3,Supply Chain Stress,0.005315,0.0,0.0
4,CO2 Savings Intensity,0.005943,0.125,12.5
5,Linear Regression,0.004917,0.625,0.0
6,Price Elasticity Correlation,0.002083,0.125,66.7
7,K-Means Clustering,0.293496,1.75,96.2
8,Regional Performance,0.005267,0.0,100.0
9,Model Lifecycle,0.006339,0.625,40.0
