In [2]:
import numpy as np
import random


In [3]:
car_brands = ["Toyota", "Honda", "Ford", "BMW", "Tesla"]

car_models = {
    "Toyota": ["Camry", "Corolla", "RAV4"],
    "Honda": ["Civic", "Accord", "CR-V"],
    "Ford": ["Fiesta", "Focus", "Explorer"],
    "BMW": ["X3", "X5", "3 Series"],
    "Tesla": ["Model 3", "Model S", "Model X"]
}

car_prices = {
    "Camry": 25000, "Corolla": 20000, "RAV4": 30000,
    "Civic": 22000, "Accord": 26000, "CR-V": 28000,
    "Fiesta": 18000, "Focus": 20000, "Explorer": 35000,
    "X3": 42000, "X5": 55000, "3 Series": 48000,
    "Model 3": 40000, "Model S": 80000, "Model X": 90000
}


In [4]:
dtype = np.dtype([
    ("CustomerID", "int32"),
    ("CarName", "U20"),
    ("CarModel", "U20"),
    ("Units", "int32"),
    ("Amount", "float64")
])


In [5]:
dataset = np.zeros(100, dtype=dtype)

for i in range(100):
    brand = random.choice(car_brands)
    model = random.choice(car_models[brand])
    units = random.randint(1, 5)

    dataset[i] = (
        i + 1,
        brand,
        model,
        units,
        units * car_prices[model]
    )


In [6]:
dataset[:10]


array([( 1, 'Toyota', 'Camry', 4, 100000.), ( 2, 'BMW', 'X3', 5, 210000.),
       ( 3, 'Ford', 'Fiesta', 2,  36000.),
       ( 4, 'BMW', '3 Series', 3, 144000.),
       ( 5, 'Honda', 'Civic', 5, 110000.),
       ( 6, 'Tesla', 'Model 3', 3, 120000.),
       ( 7, 'BMW', '3 Series', 4, 192000.),
       ( 8, 'Tesla', 'Model 3', 5, 200000.),
       ( 9, 'BMW', 'X3', 4, 168000.), (10, 'Honda', 'Accord', 1,  26000.)],
      dtype=[('CustomerID', '<i4'), ('CarName', '<U20'), ('CarModel', '<U20'), ('Units', '<i4'), ('Amount', '<f8')])

In [7]:
dataset = np.zeros(100, dtype=dtype)


#Generate 100 customer IDs starting from 1001.
customer_ids = range(1001, 1101)   # 1001 to 1100

for i, cust_id in enumerate(customer_ids):

    #Randomly assign a car brand to each record.
    brand = random.choice(car_brands)
    
    #Based on the brand, pick a model randomly.
    model = random.choice(car_models[brand])
    
    #Generate units sold between 1 and 5.
    units = random.randint(1, 5)
    
    #Calculate Amount = Units × Model Price.
    amount = units * car_prices[model]
    
    #Store everything in a structured array
    #Print: The dataset,The number of dimensions (ndim)

    dataset[i] = (cust_id, brand, model, units, amount)


In [8]:
print("Dataset:\n", dataset)
print("\nNumber of dimensions:", dataset.ndim)


Dataset:
 [(1001, 'Honda', 'Accord', 4, 104000.)
 (1002, 'Tesla', 'Model S', 3, 240000.) (1003, 'BMW', 'X3', 4, 168000.)
 (1004, 'BMW', 'X5', 2, 110000.) (1005, 'Tesla', 'Model 3', 1,  40000.)
 (1006, 'Tesla', 'Model S', 4, 320000.) (1007, 'BMW', 'X5', 1,  55000.)
 (1008, 'Toyota', 'Corolla', 3,  60000.)
 (1009, 'Toyota', 'Camry', 2,  50000.)
 (1010, 'Toyota', 'Camry', 4, 100000.) (1011, 'Honda', 'CR-V', 5, 140000.)
 (1012, 'Ford', 'Focus', 2,  40000.) (1013, 'Ford', 'Fiesta', 3,  54000.)
 (1014, 'Honda', 'CR-V', 2,  56000.)
 (1015, 'Ford', 'Explorer', 1,  35000.)
 (1016, 'Honda', 'CR-V', 3,  84000.) (1017, 'BMW', '3 Series', 3, 144000.)
 (1018, 'Ford', 'Focus', 5, 100000.) (1019, 'Toyota', 'RAV4', 3,  90000.)
 (1020, 'Toyota', 'RAV4', 5, 150000.)
 (1021, 'BMW', '3 Series', 1,  48000.)
 (1022, 'Honda', 'Civic', 3,  66000.)
 (1023, 'BMW', '3 Series', 5, 240000.)
 (1024, 'Toyota', 'RAV4', 1,  30000.) (1025, 'Ford', 'Fiesta', 2,  36000.)
 (1026, 'Ford', 'Focus', 3,  60000.) (1027, 'BMW', 

In [9]:
#Total sales amount (sum of Amount column)


total_sales_amount = np.sum(dataset["Amount"])
total_sales_amount


11866000.0

In [10]:
#List all unique car brands


unique_brands = np.unique(dataset["CarName"])
unique_brands


array(['BMW', 'Ford', 'Honda', 'Tesla', 'Toyota'], dtype='<U20')

In [11]:
#Total units sold (sum of Units column)

total_units_sold = np.sum(dataset["Units"])
total_units_sold


307

In [12]:
#Total units sold for Tesla only

tesla_units = np.sum(dataset["Units"][dataset["CarName"] == "Tesla"])
tesla_units


66

In [13]:
#Total revenue generated by BMW

bmw_revenue = np.sum(dataset["Amount"][dataset["CarName"] == "BMW"])
bmw_revenue


2602000.0

In [14]:
#Average units sold for Toyota

toyota_avg_units = np.mean(dataset["Units"][dataset["CarName"] == "Toyota"])
toyota_avg_units


3.3333333333333335

In [15]:
#Highest amount among Tesla purchases

tesla_max_amount = np.max(dataset["Amount"][dataset["CarName"] == "Tesla"])
tesla_max_amount


450000.0

In [16]:
#Convert structured array → regular 2D NumPy array


dataset_2d = np.column_stack((
    dataset["CustomerID"],
    dataset["CarName"],
    dataset["CarModel"],
    dataset["Units"],
    dataset["Amount"]
))

dataset_2d


array([['1001', 'Honda', 'Accord', '4', '104000.0'],
       ['1002', 'Tesla', 'Model S', '3', '240000.0'],
       ['1003', 'BMW', 'X3', '4', '168000.0'],
       ['1004', 'BMW', 'X5', '2', '110000.0'],
       ['1005', 'Tesla', 'Model 3', '1', '40000.0'],
       ['1006', 'Tesla', 'Model S', '4', '320000.0'],
       ['1007', 'BMW', 'X5', '1', '55000.0'],
       ['1008', 'Toyota', 'Corolla', '3', '60000.0'],
       ['1009', 'Toyota', 'Camry', '2', '50000.0'],
       ['1010', 'Toyota', 'Camry', '4', '100000.0'],
       ['1011', 'Honda', 'CR-V', '5', '140000.0'],
       ['1012', 'Ford', 'Focus', '2', '40000.0'],
       ['1013', 'Ford', 'Fiesta', '3', '54000.0'],
       ['1014', 'Honda', 'CR-V', '2', '56000.0'],
       ['1015', 'Ford', 'Explorer', '1', '35000.0'],
       ['1016', 'Honda', 'CR-V', '3', '84000.0'],
       ['1017', 'BMW', '3 Series', '3', '144000.0'],
       ['1018', 'Ford', 'Focus', '5', '100000.0'],
       ['1019', 'Toyota', 'RAV4', '3', '90000.0'],
       ['1020', 'Toyota', '

In [17]:
#Which brand has maximum total units sold



brands = np.unique(dataset["CarName"])
brand_units = {}

for brand in brands:
    brand_units[brand] = np.sum(dataset["Units"][dataset["CarName"] == brand])


In [18]:
max_brand = max(brand_units, key=brand_units.get)
max_brand, brand_units[max_brand]


('Toyota', 80)