In [1]:
import pandas as pd
import os

In [2]:
tan_chong_stock_code = 4405

# Load each year's data
cars_2023 = pd.read_parquet("/Users/yujing/Documents/GitHub/automotive-bursa/data/cars_2023.parquet")
cars_2024 = pd.read_parquet("/Users/yujing/Documents/GitHub/automotive-bursa/data/cars_2024.parquet")
cars_2025 = pd.read_parquet("/Users/yujing/Documents/GitHub/automotive-bursa/data/cars_2025.parquet")

# Combine into one DataFrame
combined_df = pd.concat([cars_2023, cars_2024, cars_2025], ignore_index=True)

# Save combined data
combined_df.to_csv("vehicle_registration_combined.parquet", index=False)


In [3]:
combined_df.head()

Unnamed: 0,date_reg,type,maker,model,colour,fuel,state
0,2023-01-01,motokar,BMW,3 Series,black,petrol,Johor
1,2023-01-01,jip,BMW,X5,grey,petrol,Johor
2,2023-01-01,jip,BMW,X7,blue,petrol,Johor
3,2023-01-01,jip,BMW,iX,grey,electric,Kedah
4,2023-01-01,jip,BMW,iX3,black,electric,Johor


In [4]:
# Filter the data
filtered_df = combined_df[combined_df['maker'].str.contains(
    'Nissan|Subaru|Mitsubishi|Renault', case=False, na=False)]

# Reset index
filtered_df = filtered_df.reset_index(drop=True)

In [5]:
filtered_df.dtypes

date_reg    datetime64[ns]
type                object
maker               object
model               object
colour              object
fuel                object
state               object
dtype: object

In [7]:
filtered_df.head()

Unnamed: 0,date_reg,type,maker,model,colour,fuel,state
0,2023-01-01,pick_up,Nissan,Navara,grey,greendiesel,Terengganu
1,2023-01-02,motokar_pelbagai_utiliti,Mitsubishi,Xpander,black,petrol,Rakan Niaga
2,2023-01-02,motokar,Nissan,Almera,white,petrol,Kedah
3,2023-01-02,jip,Subaru,Forester,grey,petrol,Rakan Niaga
4,2023-01-03,motokar_pelbagai_utiliti,Mitsubishi,Grandis,purple,petrol,Perlis


In [61]:
# Ensure date_reg is datetime format (keeping as datetime)
filtered_df['date_reg'] = pd.to_datetime(filtered_df['date_reg'])

# Group by quarter and count number of vehicles
quarterly_counts = (
    filtered_df
    .groupby(filtered_df['date_reg'].dt.to_period('Q'))
    .size()
    .reset_index(name='Vehicle_count')
)

# Convert Period to quarter-end datetime and normalize to remove time component
quarterly_counts['Quarter'] = quarterly_counts['date_reg'].dt.to_timestamp(how='end').dt.normalize()

# Reorder columns
quarterly_counts = quarterly_counts[['Quarter', 'Vehicle_count']]

# Preview
print("=== QUARTERLY VEHICLE REGISTRATIONS ===")
print(quarterly_counts)

=== QUARTERLY VEHICLE REGISTRATIONS ===
     Quarter  Vehicle_count
0 2023-03-31           9175
1 2023-06-30           8559
2 2023-09-30           7436
3 2023-12-31           8170
4 2024-03-31           7218
5 2024-06-30           6266
6 2024-09-30           5633
7 2024-12-31           5824
8 2025-03-31           5781


In [62]:
quarterly_counts.dtypes

Quarter          datetime64[ns]
Vehicle_count             int64
dtype: object

In [63]:
# Reload the raw data
tanchong_df = pd.read_csv("/Users/yujing/Documents/GitHub/automotive-bursa/data/quarterly_financials/Tan_Chong_Motor_Holdings.csv")
tanchong_df.columns = tanchong_df.iloc[0]  # Set the first row as header
tanchong_df = tanchong_df.iloc[1:].reset_index(drop=True)  # Drop the header row from data

# Remove rows where any column contains "Financial Year"
tanchong_df = tanchong_df[~tanchong_df.apply(lambda row: row.astype(str).str.contains("Financial Year", na=False).any(), axis=1)]

# Convert dates
tanchong_df["Ann. Date"] = pd.to_datetime(tanchong_df["Ann. Date"], format="%d-%b-%Y", errors='coerce')
tanchong_df["Quarter"] = pd.to_datetime(tanchong_df["Quarter"], format="%d-%b-%Y", errors='coerce')

# Filter by years 2023–2025
tanchong_df = tanchong_df[tanchong_df["Quarter"].dt.year.between(2023, 2025)]

# Format to yyyy-mm-dd
tanchong_df["Ann. Date"] = tanchong_df["Ann. Date"].dt.strftime("%Y-%m-%d")
tanchong_df["Quarter"] = tanchong_df["Quarter"].dt.strftime("%Y-%m-%d")

# Preview
tanchong_df


Unnamed: 0,Ann. Date,Quarter,Revenue,PBT,NP,NP to SH,NP Margin,ROE,EPS,DPS,...,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
1,2025-02-28,2024-12-31,511214,-52077,-69022,-68054,-13.50%,-2.71%,-10.44,0.0,...,,,,,,,,,,
2,2024-11-26,2024-09-30,462655,-103755,-89125,-90284,-19.26%,-3.52%,-13.85,0.0,...,,,,,,,,,,
3,2024-08-30,2024-06-30,545086,-42297,-42941,-40109,-7.88%,-1.50%,-6.15,0.0,...,,,,,,,,,,
4,2024-05-24,2024-03-31,563700,-16148,-19717,-15719,-3.50%,-0.58%,-2.41,1.0,...,,,,,,,,,,
6,2024-02-29,2023-12-31,643830,-63312,-54285,-54847,-8.43%,-2.00%,-8.41,0.0,...,,,,,,,,,,
7,2023-11-27,2023-09-30,649820,-52331,-53804,-50696,-8.28%,-1.82%,-7.78,0.0,...,,,,,,,,,,
8,2023-08-28,2023-06-30,619191,-8049,-20017,-18134,-3.23%,-0.64%,-2.78,0.0,...,,,,,,,,,,
9,2023-05-24,2023-03-31,619885,2549,-5745,-5065,-0.93%,-0.18%,-0.78,1.0,...,,,,,,,,,,


In [64]:
tanchong_df.dtypes

0
Ann. Date              object
Quarter                object
Revenue                object
PBT                    object
NP                     object
NP to SH               object
NP Margin              object
ROE                    object
EPS                    object
DPS                    object
NAPS                   object
QoQ                    object
YoY                    object
Unnamed: 13_level_1    object
Unnamed: 14_level_1    object
Unnamed: 15_level_1    object
Unnamed: 16_level_1    object
Unnamed: 17_level_1    object
Unnamed: 18_level_1    object
Unnamed: 19_level_1    object
Unnamed: 20_level_1    object
Unnamed: 21_level_1    object
Unnamed: 22_level_1    object
Unnamed: 23_level_1    object
Unnamed: 24_level_1    object
Unnamed: 25_level_1    object
Unnamed: 26_level_1    object
Unnamed: 27_level_1    object
Unnamed: 28_level_1    object
Unnamed: 29_level_1    object
Unnamed: 30_level_1    object
Unnamed: 31_level_1    object
Unnamed: 32_level_1    object
Unnamed:

In [65]:
# Drop all columns starting with 'Unnamed:'
tanchong_df = tanchong_df.loc[:, ~tanchong_df.columns.str.startswith("Unnamed:")]

tanchong_df

Unnamed: 0,Ann. Date,Quarter,Revenue,PBT,NP,NP to SH,NP Margin,ROE,EPS,DPS,NAPS,QoQ,YoY
1,2025-02-28,2024-12-31,511214,-52077,-69022,-68054,-13.50%,-2.71%,-10.44,0.0,3.85,24.62%,-24.08%
2,2024-11-26,2024-09-30,462655,-103755,-89125,-90284,-19.26%,-3.52%,-13.85,0.0,3.93,-125.10%,-78.09%
3,2024-08-30,2024-06-30,545086,-42297,-42941,-40109,-7.88%,-1.50%,-6.15,0.0,4.1,-155.16%,-121.18%
4,2024-05-24,2024-03-31,563700,-16148,-19717,-15719,-3.50%,-0.58%,-2.41,1.0,4.17,71.34%,-210.35%
6,2024-02-29,2023-12-31,643830,-63312,-54285,-54847,-8.43%,-2.00%,-8.41,0.0,4.2,-8.19%,-22.67%
7,2023-11-27,2023-09-30,649820,-52331,-53804,-50696,-8.28%,-1.82%,-7.78,0.0,4.28,-179.56%,-837.07%
8,2023-08-28,2023-06-30,619191,-8049,-20017,-18134,-3.23%,-0.64%,-2.78,0.0,4.37,-258.03%,-390.42%
9,2023-05-24,2023-03-31,619885,2549,-5745,-5065,-0.93%,-0.18%,-0.78,1.0,4.41,88.67%,74.05%


In [67]:
# Ensure the 'Quarter' columns in both DataFrames are datetime type and normalized (date only)
quarterly_counts['Quarter'] = pd.to_datetime(quarterly_counts['Quarter']).dt.normalize()
tanchong_df['Quarter'] = pd.to_datetime(tanchong_df['Quarter']).dt.normalize()

# Merge the two DataFrames on 'Quarter'
merged_df = pd.merge(tanchong_df, quarterly_counts, on='Quarter', how='left')

merged_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tanchong_df['Quarter'] = pd.to_datetime(tanchong_df['Quarter']).dt.normalize()


Unnamed: 0,Ann. Date,Quarter,Revenue,PBT,NP,NP to SH,NP Margin,ROE,EPS,DPS,NAPS,QoQ,YoY,Vehicle_count
0,2025-02-28,2024-12-31,511214,-52077,-69022,-68054,-13.50%,-2.71%,-10.44,0.0,3.85,24.62%,-24.08%,5824
1,2024-11-26,2024-09-30,462655,-103755,-89125,-90284,-19.26%,-3.52%,-13.85,0.0,3.93,-125.10%,-78.09%,5633
2,2024-08-30,2024-06-30,545086,-42297,-42941,-40109,-7.88%,-1.50%,-6.15,0.0,4.1,-155.16%,-121.18%,6266
3,2024-05-24,2024-03-31,563700,-16148,-19717,-15719,-3.50%,-0.58%,-2.41,1.0,4.17,71.34%,-210.35%,7218
4,2024-02-29,2023-12-31,643830,-63312,-54285,-54847,-8.43%,-2.00%,-8.41,0.0,4.2,-8.19%,-22.67%,8170
5,2023-11-27,2023-09-30,649820,-52331,-53804,-50696,-8.28%,-1.82%,-7.78,0.0,4.28,-179.56%,-837.07%,7436
6,2023-08-28,2023-06-30,619191,-8049,-20017,-18134,-3.23%,-0.64%,-2.78,0.0,4.37,-258.03%,-390.42%,8559
7,2023-05-24,2023-03-31,619885,2549,-5745,-5065,-0.93%,-0.18%,-0.78,1.0,4.41,88.67%,74.05%,9175
