In [11]:
import polars as pl
import polars.selectors as cs
import time

start = time.time()
trips = pl.read_parquet("data/taxi/yellow_tripdata_*.parquet")
sum_per_vendor = trips.group_by("VendorID").agg(cs.numeric().sum())
print(sum_per_vendor)
print(f"Time: {time.time() - start:.2f}s")

shape: (4, 16)
┌──────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ VendorID ┆ passenger ┆ trip_dist ┆ RatecodeI ┆ … ┆ improveme ┆ total_amo ┆ congestio ┆ airport_f │
│ ---      ┆ _count    ┆ ance      ┆ D         ┆   ┆ nt_surcha ┆ unt       ┆ n_surchar ┆ ee        │
│ i64      ┆ ---       ┆ ---       ┆ ---       ┆   ┆ rge       ┆ ---       ┆ ge        ┆ ---       │
│          ┆ f64       ┆ f64       ┆ f64       ┆   ┆ ---       ┆ f64       ┆ ---       ┆ f64       │
│          ┆           ┆           ┆           ┆   ┆ f64       ┆           ┆ f64       ┆           │
╞══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ 2        ┆ 4.0110228 ┆ 1.9918e8  ┆ 2.9784712 ┆ … ┆ 8.9231e6  ┆ 6.2098e8  ┆ 6.2146e7  ┆ 2.868805e │
│          ┆ e7        ┆           ┆ e7        ┆   ┆           ┆           ┆           ┆ 6         │
│ 1        ┆ 1.3536686 ┆ 3.6616e7  ┆ 2.4743708 ┆ … ┆ 3.5956e6  ┆ 2.3562e8  ┆

In [16]:
df = pl.read_parquet("data/taxi/yellow_tripdata_2022-01.parquet")
print(df.columns)


['VendorID', 'tpep_pickup_datetime', 'tpep_dropoff_datetime', 'passenger_count', 'trip_distance', 'RatecodeID', 'store_and_fwd_flag', 'PULocationID', 'DOLocationID', 'payment_type', 'fare_amount', 'extra', 'mta_tax', 'tip_amount', 'tolls_amount', 'improvement_surcharge', 'total_amount', 'congestion_surcharge', 'airport_fee']


In [19]:
income_per_distance_per_vendor = sum_per_vendor.select(
    "VendorID",
    income_per_distance=pl.col("total_amount") / pl.col("trip_distance"),
)

print(income_per_distance_per_vendor)

top_three = income_per_distance_per_vendor.sort(
    by="income_per_distance", descending=True
).head(3)
print(top_three)

shape: (4, 2)
┌──────────┬─────────────────────┐
│ VendorID ┆ income_per_distance │
│ ---      ┆ ---                 │
│ i64      ┆ f64                 │
╞══════════╪═════════════════════╡
│ 2        ┆ 3.117667            │
│ 1        ┆ 6.434789            │
│ 6        ┆ 5.296493            │
│ 5        ┆ 4.731557            │
└──────────┴─────────────────────┘
shape: (3, 2)
┌──────────┬─────────────────────┐
│ VendorID ┆ income_per_distance │
│ ---      ┆ ---                 │
│ i64      ┆ f64                 │
╞══════════╪═════════════════════╡
│ 1        ┆ 6.434789            │
│ 6        ┆ 5.296493            │
│ 5        ┆ 4.731557            │
└──────────┴─────────────────────┘
