In [6]:
# # Exercise 03  —  Selects and aggregations
# # ----------------------------------------------------------
import pandas as pd
from pathlib import Path

# 1) Загрузить JSON и сделать индексом CarNumber
DATA_DIR   = Path("..") / "data"
df         = pd.read_json(DATA_DIR / "auto.json").set_index("CarNumber")

# 2) Селекты

# (a) fines > 2100
sel1 = df[df["Fines"] > 2100]

# (b) fines > 2100  и  refund == 2
sel2 = df[(df["Fines"] > 2100) & (df["Refund"] == 2)]

# (c) модели в списке ['Focus','Corolla']
models = ["Focus", "Corolla"]
sel3 = df[df["Model"].isin(models)]
# or
# sel3 = df.loc[df["Model"].isin(models)]

# (d) конкретные номера
nums = ["Y7689C197RUS","92928M178RUS","7788KT197RUS",
        "H115YO163RUS","X758HY197RUS"]
sel4 = df.loc[nums]

# 3) Агрегации по Make и Model

# (a) медиана fines по make
median_by_make       = df.groupby("Make")["Fines"].median()

# (b) медиана fines по make и model
median_by_make_model = df.groupby(["Make","Model"])["Fines"].median()

# (c) количество записей (size) по make и model
count_by_make_model  = df.groupby(["Make","Model"])["Fines"].size()

# (d) min и max fines по make и model
min_max_by_make_model = df.groupby(["Make","Model"])["Fines"].agg(["min","max"])

# (e) стандартное отклонение fines по make и model
std_by_make_model    = df.groupby(["Make","Model"])["Fines"].std()

# 4) Агрегации по CarNumber

# (a) номера, отсортированные по количеству штрафов (desc)
count_by_car = df["Fines"].groupby(df.index).size().sort_values(ascending=False)

# (b) все строки для top‑1 номера по count
top1_by_count = count_by_car.index[0]
rows_top1_count = df.loc[top1_by_count]

# (c) номера, отсортированные по сумме штрафов (desc)
sum_by_car = df["Fines"].groupby(df.index).sum().sort_values(ascending=False)

# (d) все строки для top‑1 номера по сумме
top1_by_sum = sum_by_car.index[0]
rows_top1_sum = df.loc[top1_by_sum]

# (e) есть ли номера, у которых более одного Model?
multi_models = (
    df.reset_index()
      .groupby("CarNumber")["Model"]
      .nunique()
      .loc[lambda x: x > 1]
)

tests:

In [7]:
df.loc[df["Model"].isin(models)]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Y163O8161RUS,2,3200.000000,Ford,Focus
7184TT36RUS,1,2100.000000,Ford,Focus
X582HE161RUS,2,2000.000000,Ford,Focus
92918M178RUS,1,5700.000000,Ford,Focus
H234YH197RUS,2,6000.000000,Ford,Focus
...,...,...,...,...
Y163O8161RUS,2,1600.000000,Ford,Focus
M0309X197RUS,1,22300.000000,Ford,Focus
O673E8197RUS,2,600.000000,Ford,Focus
8610T8154RUS,1,2000.000000,Ford,Focus


In [8]:
df.groupby(['Make', 'Model']).agg('Fines').count()

Make        Model  
Ford        Focus      575
            Mondeo       6
Skoda       Octavia     48
Toyota      Camry       16
            Corolla     18
Volkswagen  Golf        20
            Jetta        6
            Passat      22
            Touareg      5
Name: Fines, dtype: int64

In [9]:
top3_by_count = count_by_car.head(3)
top3_numbers = top3_by_count.index.tolist()
print("Numbers top3:", top3_numbers)

Numbers top3: ['Y7689C197RUS', '92928M178RUS', '7788KT197RUS']


In [10]:
sum_by_car = df["Fines"].groupby(df.index).sum().sort_values(ascending=False)
top1_by_sum = sum_by_car.head(1)
print("Top‑1 car by sum of fines:\n", top1_by_sum)

Top‑1 car by sum of fines:
 CarNumber
X758HY197RUS    242000.0
Name: Fines, dtype: float64
