In [3]:
import polars as pl 

In [4]:
data_lf = pl.scan_csv("input_files/20K Financial Data.csv", ignore_errors=True)

In [5]:
def generate_rating_json(data_lf): 
    rating_lf= data_lf.select([ 
        pl.col("cs_company_id").alias("safeNumber"),
        
        pl.col("credit_score").map_elements(
        lambda x: "A" if x >= 85 else
                  "B" if 50 <= x < 85 else
                  "C" if 35 <= x < 50 else
                  "D" if 0 <= x < 35 else
                  "E"
        ,return_dtype=pl.String).alias("credit_score_type"),

        pl.col("credit_limit").map_elements(
        lambda x: "A" if x >= 250000 else
                  "B" if 100000 <= x < 250000 else
                  "C" if 50000 <= x < 100000 else
                  "D" if x < 50000 else
                  "E",return_dtype=pl.String
        )   
        .alias("credit_limit_type"),
        
        pl.col("credit_limit").map_elements(
        lambda x: "A" if x >= 250000 else
                  "B" if 100000 <= x < 250000 else
                  "C" if 50000 <= x < 100000 else
                  "D" if x < 50000 else
                  "E",return_dtype=pl.String
        )   
        .alias("turnover_type"),
        
        pl.col("is_active")
        .map_elements(lambda x: "Active" if x else "Inactive",return_dtype=pl.String)
        .alias("status"),
        
        pl.col("country_code")
    ])
    
    return rating_lf  

In [6]:
def generate_aggre_json(rating_lf): 
    generate_aggre_lf=rating_lf.group_by("country_code").agg(
        pl.len().alias("companies_count"),
        
        pl.col("status")
        .filter(pl.col("status")=="Active")
        .count()
        .alias("active_companies_count"),
        

        pl.col("status")
        .filter(pl.col("status")=="Inactive")
        .count()
        .alias("inactive_companies_count"),
    ).sort("companies_count",descending=True)
    
    return generate_aggre_lf

In [7]:
rating_lf = generate_rating_json(data_lf)
aggre_lf=generate_aggre_json(rating_lf)

In [8]:
rating_lf.collect().write_csv("output_files/company_ratings.csv")

In [13]:
aggre_lf.collect().write_json("output_files/company_aggregations.json")

In [9]:

payload = {
    "rating_data": rating_lf.collect(),
    "aggregation_data": aggre_lf.collect()
}

In [12]:
print(payload)

{'rating_data': shape: (20_358, 6)
┌────────────┬───────────────────┬───────────────────┬───────────────┬────────┬──────────────┐
│ safeNumber ┆ credit_score_type ┆ credit_limit_type ┆ turnover_type ┆ status ┆ country_code │
│ ---        ┆ ---               ┆ ---               ┆ ---           ┆ ---    ┆ ---          │
│ str        ┆ str               ┆ str               ┆ str           ┆ str    ┆ str          │
╞════════════╪═══════════════════╪═══════════════════╪═══════════════╪════════╪══════════════╡
│ IE00545762 ┆ E                 ┆ D                 ┆ D             ┆ Active ┆ IE           │
│ IT07826502 ┆ B                 ┆ D                 ┆ D             ┆ Active ┆ IT           │
│ IT07826314 ┆ B                 ┆ D                 ┆ D             ┆ Active ┆ IT           │
│ IT07826766 ┆ B                 ┆ D                 ┆ D             ┆ Active ┆ IT           │
│ UK13244446 ┆ D                 ┆ D                 ┆ D             ┆ Active ┆ GB           │
│ …          ┆ 