# Stock Etablissement

In [1]:
import polars as pl
import pyarrow.parquet as pq
import sys

# Indique le chemin de ton fichier
filepath = "../Data/raw/StockEtablissement_utf8.parquet" 

print("--- Début de la lecture 'bypass' ---")

try:
    # ÉTAPE 1 : On lit avec PyArrow, qui est robuste.
    print(f"Lecture du fichier via PyArrow : {filepath}")
    table_arrow = pq.read_table(
        filepath,
    )
    
    # ÉTAPE 2 : On passe la data à Polars sans la copier (gain de performance).
    print("Conversion de la table PyArrow en DataFrame Polars...")
    df_eta = pl.from_arrow(table_arrow)
    
    print("--- SUCCÈS ! ---")
    print("Le DataFrame est maintenant dans Polars, prêt pour la transformation.")
    print(df_eta.head())

except Exception as e:
    print(f"--- ERREUR ---", file=sys.stderr)
    print(f"Impossible de lire le fichier, même avec PyArrow : {e}", file=sys.stderr)

--- Début de la lecture 'bypass' ---
Lecture du fichier via PyArrow : ../Data/raw/StockEtablissement_utf8.parquet
Conversion de la table PyArrow en DataFrame Polars...
--- SUCCÈS ! ---
Le DataFrame est maintenant dans Polars, prêt pour la transformation.
shape: (5, 53)
┌───────────┬─────┬────────────┬────────────┬───┬────────────┬────────────┬────────────┬───────────┐
│ siren     ┆ nic ┆ siret      ┆ statutDiff ┆ … ┆ denominati ┆ activitePr ┆ nomenclatu ┆ caractere │
│ ---       ┆ --- ┆ ---        ┆ usionEtabl ┆   ┆ onUsuelleE ┆ incipaleEt ┆ reActivite ┆ Employeur │
│ str       ┆ i64 ┆ str        ┆ issement   ┆   ┆ tablisseme ┆ ablissemen ┆ Principale ┆ Etablisse │
│           ┆     ┆            ┆ ---        ┆   ┆ …          ┆ …          ┆ …          ┆ men…      │
│           ┆     ┆            ┆ str        ┆   ┆ ---        ┆ ---        ┆ ---        ┆ ---       │
│           ┆     ┆            ┆            ┆   ┆ str        ┆ str        ┆ str        ┆ str       │
╞═══════════╪═════╪════

In [5]:
df_eta.describe()

statistic,siren,nic,siret,statutDiffusionEtablissement,dateCreationEtablissement,trancheEffectifsEtablissement,anneeEffectifsEtablissement,activitePrincipaleRegistreMetiersEtablissement,dateDernierTraitementEtablissement,etablissementSiege,nombrePeriodesEtablissement,complementAdresseEtablissement,numeroVoieEtablissement,indiceRepetitionEtablissement,dernierNumeroVoieEtablissement,indiceRepetitionDernierNumeroVoieEtablissement,typeVoieEtablissement,libelleVoieEtablissement,codePostalEtablissement,libelleCommuneEtablissement,libelleCommuneEtrangerEtablissement,distributionSpecialeEtablissement,codeCommuneEtablissement,codeCedexEtablissement,libelleCedexEtablissement,codePaysEtrangerEtablissement,libellePaysEtrangerEtablissement,identifiantAdresseEtablissement,coordonneeLambertAbscisseEtablissement,coordonneeLambertOrdonneeEtablissement,complementAdresse2Etablissement,numeroVoie2Etablissement,indiceRepetition2Etablissement,typeVoie2Etablissement,libelleVoie2Etablissement,codePostal2Etablissement,libelleCommune2Etablissement,libelleCommuneEtranger2Etablissement,distributionSpeciale2Etablissement,codeCommune2Etablissement,codeCedex2Etablissement,libelleCedex2Etablissement,codePaysEtranger2Etablissement,libellePaysEtranger2Etablissement,dateDebut,etatAdministratifEtablissement,enseigne1Etablissement,enseigne2Etablissement,enseigne3Etablissement,denominationUsuelleEtablissement,activitePrincipaleEtablissement,nomenclatureActivitePrincipaleEtablissement,caractereEmployeurEtablissement
str,str,f64,str,str,str,str,f64,str,str,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""42151993""",42151993.0,"""42151993""","""42151993""","""38722345""","""42151993""",2483170.0,"""3047049""","""42151993""",42151993.0,42151993.0,"""11792209""","""34192894""","""6288682""","""4908736""","""4896366""","""37116678""","""41334261""","""41846468""","""41809918""","""312355""","""4896366""","""41809918""","""4896366""","""4896366""","""338770""","""338756""","""33421437""","""31584696""","""31584696""","""4896366""","""4896366""","""4896366""","""4896366""","""4896366""","""4896366""","""0""","""0""","""4896366""","""0""","""4896366""","""4896366""","""0""","""0""","""41633958""","""42151993""","""8067086""","""4868843""","""4853220""","""8569843""","""42130555""","""42130555""","""41981193"""
"""null_count""","""0""",0.0,"""0""","""0""","""3429648""","""0""",39668823.0,"""39104944""","""0""",0.0,0.0,"""30359784""","""7959099""","""35863311""","""37243257""","""37255627""","""5035315""","""817732""","""305525""","""342075""","""41839638""","""37255627""","""342075""","""37255627""","""37255627""","""41813223""","""41813237""","""8730556""","""10567297""","""10567297""","""37255627""","""37255627""","""37255627""","""37255627""","""37255627""","""37255627""","""42151993""","""42151993""","""37255627""","""42151993""","""37255627""","""37255627""","""42151993""","""42151993""","""518035""","""0""","""34084907""","""37283150""","""37298773""","""33582150""","""21438""","""21438""","""170800"""
"""mean""",,136.607893,,,"""2008-11-05 10:29:04.515152""",,2023.0,,"""2024-09-18 21:41:53.413371""",0.684679,2.226143,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"""2013-06-28 12:15:30.213130""",,,,,,,,
"""std""",,1926.657096,,,,,0.0,,,,1.306049,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""min""","""000325175""",1.0,"""00032517500016""","""O""","""0001-01-16""","""00""",2023.0,""" ""","""2024-03-22 15:40:57""",0.0,1.0,""" HAMEAU DE PARUEL""",""" 5""",""" """,""" 110""","""[ND]""",""" """,""" """,""" """,""" MAHINA""",""" BERG""","""[ND]""","""01001""","""[ND]""","""[ND]""","""99000""","""AFGHANISTAN""","""010010005_B""","""1000000.0412983083""","""1593862.9366314295""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""",,,"""[ND]""",,"""[ND]""","""[ND]""",,,"""0001-01-16""","""A""","""	INSTITUT ENSEIGNEMENT FRANCAI…",""" (FLORAC-TROIS-RIVIERES)""",""" GOLD TRANSPORT""",""" ""","""00.00Z""","""NAF1993""","""N"""
"""25%""",,13.0,,,"""2002-04-29""",,2023.0,,"""2024-03-30 05:36:01""",,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"""2008-01-01""",,,,,,,,
"""50%""",,17.0,,,"""2012-12-18""",,2023.0,,"""2025-01-14 02:30:49""",,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"""2016-02-04""",,,,,,,,
"""75%""",,25.0,,,"""2020-08-19""",,2023.0,,"""2025-02-05 02:24:10""",,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"""2022-02-09""",,,,,,,,
"""max""","""999992357""",95469.0,"""99999235700015""","""P""","""5015-04-05""","""NN""",2023.0,"""ZZZZAB""","""2025-11-01 00:10:15""",1.0,52.0,"""ŞEHREMINI MAHALLESI""","""¿""","""¿""","""[ND]""","""[ND]""","""[ND]""",""" RUE SOEUR ONESIME""","""sw8 1rn""","""ŒUILLY""","""ŠIAULIŲ R.""","""[ND]""","""98890""","""[ND]""","""[ND]""","""99516""","""ZIMBABWE""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""",,,"""[ND]""",,"""[ND]""","""[ND]""",,,"""6202-09-02""","""F""","""€€€""","""[ND]""","""[ND]""","""€XC€LL€RATOR""","""99.0Z""","""NAP""","""O"""


In [4]:
df_eta.schema

Schema([('siren', String),
        ('nic', Int64),
        ('siret', String),
        ('statutDiffusionEtablissement', String),
        ('dateCreationEtablissement', Date),
        ('trancheEffectifsEtablissement', String),
        ('anneeEffectifsEtablissement', Int64),
        ('activitePrincipaleRegistreMetiersEtablissement', String),
        ('dateDernierTraitementEtablissement',
         Datetime(time_unit='us', time_zone=None)),
        ('etablissementSiege', Boolean),
        ('nombrePeriodesEtablissement', Int64),
        ('complementAdresseEtablissement', String),
        ('numeroVoieEtablissement', String),
        ('indiceRepetitionEtablissement', String),
        ('dernierNumeroVoieEtablissement', String),
        ('indiceRepetitionDernierNumeroVoieEtablissement', String),
        ('typeVoieEtablissement', String),
        ('libelleVoieEtablissement', String),
        ('codePostalEtablissement', String),
        ('libelleCommuneEtablissement', String),
        ('libelleComm

In [6]:
df_eta.head()

siren,nic,siret,statutDiffusionEtablissement,dateCreationEtablissement,trancheEffectifsEtablissement,anneeEffectifsEtablissement,activitePrincipaleRegistreMetiersEtablissement,dateDernierTraitementEtablissement,etablissementSiege,nombrePeriodesEtablissement,complementAdresseEtablissement,numeroVoieEtablissement,indiceRepetitionEtablissement,dernierNumeroVoieEtablissement,indiceRepetitionDernierNumeroVoieEtablissement,typeVoieEtablissement,libelleVoieEtablissement,codePostalEtablissement,libelleCommuneEtablissement,libelleCommuneEtrangerEtablissement,distributionSpecialeEtablissement,codeCommuneEtablissement,codeCedexEtablissement,libelleCedexEtablissement,codePaysEtrangerEtablissement,libellePaysEtrangerEtablissement,identifiantAdresseEtablissement,coordonneeLambertAbscisseEtablissement,coordonneeLambertOrdonneeEtablissement,complementAdresse2Etablissement,numeroVoie2Etablissement,indiceRepetition2Etablissement,typeVoie2Etablissement,libelleVoie2Etablissement,codePostal2Etablissement,libelleCommune2Etablissement,libelleCommuneEtranger2Etablissement,distributionSpeciale2Etablissement,codeCommune2Etablissement,codeCedex2Etablissement,libelleCedex2Etablissement,codePaysEtranger2Etablissement,libellePaysEtranger2Etablissement,dateDebut,etatAdministratifEtablissement,enseigne1Etablissement,enseigne2Etablissement,enseigne3Etablissement,denominationUsuelleEtablissement,activitePrincipaleEtablissement,nomenclatureActivitePrincipaleEtablissement,caractereEmployeurEtablissement
str,i64,str,str,date,str,i64,str,datetime[μs],bool,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,date,str,str,str,str,str,str,str,str
"""000325175""",16,"""00032517500016""","""O""",2000-09-26,"""NN""",,"""3212ZZ""",2024-03-22 15:40:57,False,3,,,,,,,"""MANIHI COTE MONTAGNE TUAMOTU""","""98770""","""MANIHI""",,,"""98727""",,,,,,,,,,,,,,,,,,,,,,2009-05-27,"""F""",,,,,"""32.12Z""","""NAFRev2""","""N"""
"""000325175""",24,"""00032517500024""","""O""",2008-05-20,"""NN""",,,2025-01-27 02:35:48,False,2,,"""1""",,,,"""PLACE""","""LEONCE DE SEYNES""","""84000""","""AVIGNON""",,,"""84007""",,,,,"""84007cvsr_B""","""851150.0982592739""","""6317267.146094748""",,,,,,,,,,,,,,,2011-10-21,"""F""","""TAHITI PERLES CREATIONS""",,,,"""47.89Z""","""NAFRev2""","""N"""
"""000325175""",32,"""00032517500032""","""O""",2009-05-27,"""NN""",,,2025-01-27 02:31:40,False,2,"""ECONOMIS""","""6""",,,,"""AVENUE""","""FRANCOIS MAURIAC""","""84000""","""AVIGNON""",,,"""84007""",,,,,"""840072225_B""","""848084.2366812509""","""6316548.347114814""",,,,,,,,,,,,,,,2011-10-21,"""F""","""TAHITI PERLES CREATIONS""",,,,"""32.12Z""","""NAFRev2""","""N"""
"""000325175""",40,"""00032517500040""","""O""",2011-10-21,"""NN""",,"""3212ZZ""",2024-03-30 05:48:29,False,2,,"""170""",,,,"""AVENUE""","""DU COL DE L’ANGE""","""13420""","""GEMENOS""",,,"""13042""",,,,,"""130420096_B""","""913005.2782392878""","""6245869.653506171""",,,,,,,,,,,,,,,2014-01-07,"""F""","""TAHITI PERLES CREATION""",,,,"""32.12Z""","""NAFRev2""","""N"""
"""000325175""",57,"""00032517500057""","""O""",2014-01-07,"""NN""",,,2024-03-30 05:29:10,False,2,,"""61""",,,,"""RUE""","""MARX DORMOY""","""13004""","""MARSEILLE""",,,"""13204""",,,,,"""132045859_B""","""894589.9700529675""","""6247476.0260883765""",,,,,,,,,,,,,,,2018-02-07,"""F""","""TAHITI PERLES CREATION""",,,,"""32.12Z""","""NAFRev2""","""N"""


# Stock Unite Legale

In [None]:
import polars as pl
import pyarrow.parquet as pq
import sys

# Indique le chemin de ton fichier
filepath = "../Data/raw/StockUniteLegale_utf8.parquet" 

print("--- Début de la lecture 'bypass' ---")

try:
    # ÉTAPE 1 : On lit avec PyArrow, qui est robuste.
    print(f"Lecture du fichier via PyArrow : {filepath}")
    table_arrow = pq.read_table(
        filepath,
    )
    
    # ÉTAPE 2 : On passe la data à Polars sans la copier (gain de performance).
    print("Conversion de la table PyArrow en DataFrame Polars...")
    df_unit_legal = pl.from_arrow(table_arrow)
    
    print("--- SUCCÈS ! ---")
    print("Le DataFrame est maintenant dans Polars, prêt pour la transformation.")
    print(df_unit_legal.head())

except Exception as e:
    print(f"--- ERREUR ---", file=sys.stderr)
    print(f"Impossible de lire le fichier, même avec PyArrow : {e}", file=sys.stderr)

--- Début de la lecture 'bypass' ---
Lecture du fichier via PyArrow : ../Data/raw/StockUniteLegale_utf8.parquet
Conversion de la table PyArrow en DataFrame Polars...
--- SUCCÈS ! ---
Le DataFrame est maintenant dans Polars, prêt pour la transformation.
shape: (5, 34)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ siren     ┆ statutDif ┆ unitePurg ┆ dateCreat ┆ … ┆ nicSiegeU ┆ economieS ┆ societeMi ┆ caracter │
│ ---       ┆ fusionUni ┆ eeUniteLe ┆ ionUniteL ┆   ┆ niteLegal ┆ ocialeSol ┆ ssionUnit ┆ eEmploye │
│ str       ┆ teLegale  ┆ gale      ┆ egale     ┆   ┆ e         ┆ idaireUni ┆ eLegale   ┆ urUniteL │
│           ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ teL…      ┆ ---       ┆ egale    │
│           ┆ str       ┆ bool      ┆ date      ┆   ┆ i64       ┆ ---       ┆ str       ┆ ---      │
│           ┆           ┆           ┆           ┆   ┆           ┆ str       ┆           ┆ str      │
╞═══════════╪═══════════╪

In [9]:
df_unit_legal.describe()

statistic,siren,statutDiffusionUniteLegale,unitePurgeeUniteLegale,dateCreationUniteLegale,sigleUniteLegale,sexeUniteLegale,prenom1UniteLegale,prenom2UniteLegale,prenom3UniteLegale,prenom4UniteLegale,prenomUsuelUniteLegale,pseudonymeUniteLegale,identifiantAssociationUniteLegale,trancheEffectifsUniteLegale,anneeEffectifsUniteLegale,dateDernierTraitementUniteLegale,nombrePeriodesUniteLegale,categorieEntreprise,anneeCategorieEntreprise,dateDebut,etatAdministratifUniteLegale,nomUniteLegale,nomUsageUniteLegale,denominationUniteLegale,denominationUsuelle1UniteLegale,denominationUsuelle2UniteLegale,denominationUsuelle3UniteLegale,categorieJuridiqueUniteLegale,activitePrincipaleUniteLegale,nomenclatureActivitePrincipaleUniteLegale,nicSiegeUniteLegale,economieSocialeSolidaireUniteLegale,societeMissionUniteLegale,caractereEmployeurUniteLegale
str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,f64,str,f64,str,f64,str,str,str,str,str,str,str,str,f64,str,str,f64,str,str,str
"""count""","""28882409""","""28882409""",4267752.0,"""27710121""","""3500030""","""16301667""","""16310994""","""7451768""","""4630285""","""2939334""","""16310994""","""2866012""","""1050584""","""28882409""",2008421.0,"""28882409""",28882409.0,"""10546073""",10546073.0,"""28474772""","""28882409""","""16315955""","""5051773""","""15349351""","""4158591""","""2776349""","""2772761""",28882409.0,"""28860884""","""28860884""",28882409.0,"""6167001""","""831984""","""0"""
"""null_count""","""0""","""0""",24614657.0,"""1172288""","""25382379""","""12580742""","""12571415""","""21430641""","""24252124""","""25943075""","""12571415""","""26016397""","""27831825""","""0""",26873988.0,"""0""",0.0,"""18336336""",18336336.0,"""407637""","""0""","""12566454""","""23830636""","""13533058""","""24723818""","""26106060""","""26109648""",0.0,"""21525""","""21525""",0.0,"""22715408""","""28050425""","""28882409"""
"""mean""",,,1.0,"""2005-10-22 13:11:56.004134""",,,,,,,,,,,2023.0,"""2024-05-27 05:13:09.723577""",2.419245,,2023.0,"""2013-08-19 06:51:08.445851""",,,,,,,,3246.573881,,,19.358745,,,
"""std""",,,,,,,,,,,,,,,0.0,,1.74063,,0.0,,,,,,,,,2736.41353,,,43.78713,,,
"""min""","""000325175""","""O""",1.0,"""0001-01-16""",""" ""","""F""",""" MILA """,""" """,""" """,""" """,""" MILA """,""" """,""" ""","""00""",2023.0,"""2024-03-22 14:26:06""",1.0,"""ETI""",2023.0,"""0001-01-16""","""A""",""" ACKAH-ENYIMAH""",""" """,""" 'ASSOCIATION DES COMBATTANT…",""" AU P'TIT ZINC ""","""& CO FRAIKIN & HOANG IMMOBILI…","""& ENSEIGNEMENTS …",1000.0,"""00.00""","""NAF1993""",1.0,"""N""","""N""",
"""25%""",,,,"""1995-01-19""",,,,,,,,,,,2023.0,"""2024-03-22 14:26:06""",1.0,,2023.0,"""2008-01-01""",,,,,,,,1000.0,,,13.0,,,
"""50%""",,,,"""2010-05-01""",,,,,,,,,,,2023.0,"""2024-03-22 14:26:06""",2.0,,2023.0,"""2017-07-10""",,,,,,,,1000.0,,,16.0,,,
"""75%""",,,,"""2019-11-02""",,,,,,,,,,,2023.0,"""2024-03-22 14:26:06""",3.0,,2023.0,"""2022-09-15""",,,,,,,,5599.0,,,21.0,,,
"""max""","""999992357""","""P""",1.0,"""3023-01-06""","""€CO-MARKET""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""[ND]""","""ØNYYX""","""w931024899""","""NN""",2023.0,"""2025-11-01 00:04:39""",71.0,"""PME""",2023.0,"""9998-11-10""","""C""","""[ND]""","""[ND]""","""€URO MULTI-SERVICES""","""¿A.COM PUBLICITE""","""[ND]""","""[ND]""",9970.0,"""99.0Z""","""NAP""",76665.0,"""O""","""O""",


In [10]:
df_unit_legal.schema

Schema([('siren', String),
        ('statutDiffusionUniteLegale', String),
        ('unitePurgeeUniteLegale', Boolean),
        ('dateCreationUniteLegale', Date),
        ('sigleUniteLegale', String),
        ('sexeUniteLegale', String),
        ('prenom1UniteLegale', String),
        ('prenom2UniteLegale', String),
        ('prenom3UniteLegale', String),
        ('prenom4UniteLegale', String),
        ('prenomUsuelUniteLegale', String),
        ('pseudonymeUniteLegale', String),
        ('identifiantAssociationUniteLegale', String),
        ('trancheEffectifsUniteLegale', String),
        ('anneeEffectifsUniteLegale', Int64),
        ('dateDernierTraitementUniteLegale',
         Datetime(time_unit='us', time_zone=None)),
        ('nombrePeriodesUniteLegale', Int64),
        ('categorieEntreprise', String),
        ('anneeCategorieEntreprise', Int64),
        ('dateDebut', Date),
        ('etatAdministratifUniteLegale', String),
        ('nomUniteLegale', String),
        ('nomUsageUni

In [11]:
df_unit_legal.head()

siren,statutDiffusionUniteLegale,unitePurgeeUniteLegale,dateCreationUniteLegale,sigleUniteLegale,sexeUniteLegale,prenom1UniteLegale,prenom2UniteLegale,prenom3UniteLegale,prenom4UniteLegale,prenomUsuelUniteLegale,pseudonymeUniteLegale,identifiantAssociationUniteLegale,trancheEffectifsUniteLegale,anneeEffectifsUniteLegale,dateDernierTraitementUniteLegale,nombrePeriodesUniteLegale,categorieEntreprise,anneeCategorieEntreprise,dateDebut,etatAdministratifUniteLegale,nomUniteLegale,nomUsageUniteLegale,denominationUniteLegale,denominationUsuelle1UniteLegale,denominationUsuelle2UniteLegale,denominationUsuelle3UniteLegale,categorieJuridiqueUniteLegale,activitePrincipaleUniteLegale,nomenclatureActivitePrincipaleUniteLegale,nicSiegeUniteLegale,economieSocialeSolidaireUniteLegale,societeMissionUniteLegale,caractereEmployeurUniteLegale
str,str,bool,date,str,str,str,str,str,str,str,str,str,str,i64,datetime[μs],i64,str,i64,date,str,str,str,str,str,str,str,i64,str,str,i64,str,str,str
"""000325175""","""O""",,2000-09-26,,"""M""","""THIERRY""",,,,"""THIERRY""",,,"""NN""",,2024-03-22 14:26:06,6,"""PME""",2023.0,2018-02-07,"""A""","""JANOYER""",,,,,,1000,"""32.12Z""","""NAFRev2""",65,,,
"""001807254""","""O""",,1972-05-01,,"""M""","""JACQUES-LUCIEN""",,,,"""JACQUES-LUCIEN""",,,"""NN""",,2024-03-22 14:26:06,5,,,2014-12-31,"""C""","""BRETON""",,,,,,1000,"""85.59A""","""NAFRev2""",22,,,
"""005410220""","""O""",True,1954-12-25,,"""M""","""GEORGES""",,,,"""GEORGES""",,,"""NN""",,2024-03-22 14:26:06,1,,,1988-03-31,"""C""","""WATTEBLED""",,,,,,1000,"""22.02""","""NAP""",15,,,
"""005410345""","""O""",True,,,"""M""","""MICHEL""",,,,"""MICHEL""",,,"""NN""",,2024-03-22 14:26:06,1,,,1984-12-25,"""C""","""DEBRAY""",,,,,,1000,"""79.06""","""NAP""",10,,,
"""005410394""","""O""",True,1954-12-25,,"""M""","""ROBERT""","""ALFRED""",,,"""ROBERT""",,,"""NN""",,2024-03-22 14:26:06,1,,,1987-12-01,"""C""","""DAULT""",,,,,,1000,"""64.42""","""NAP""",18,,,


# Detail Bilan

In [1]:
import polars as pl
import pyarrow.parquet as pq  # C'est le moteur 4x4
import sys

# Indique le chemin de ton fichier
filepath = "../Data/raw/ExportDetailBilan.parquet" 

print("--- Début de la lecture 'bypass' ---")

try:
    # ÉTAPE 1 : On lit avecPyArrow et on ne prend que les colonnes siren et liasse.
    print(f"Lecture du fichier via PyArrow (le moteur robuste) : {filepath}")
    table_arrow = pq.read_table(
        filepath,
        columns=["siren", "liasse"]
    )
    
    # ÉTAPE 2 : On passe la data à Polars sans la copier (gain de performance).
    print("Conversion de la table PyArrow en DataFrame Polars...")
    df_bilan = pl.from_arrow(table_arrow)
    
    print("--- SUCCÈS ! ---")
    print("Le DataFrame est maintenant dans Polars, prêt pour la transformation.")
    print(df_bilan.head())

except Exception as e:
    print(f"--- ERREUR ---", file=sys.stderr)
    print(f"Impossible de lire le fichier, même avec PyArrow : {e}", file=sys.stderr)

--- Début de la lecture 'bypass' ---
Lecture du fichier via PyArrow (le moteur robuste) : ../Data/raw/ExportDetailBilan.parquet
Conversion de la table PyArrow en DataFrame Polars...
--- SUCCÈS ! ---
Le DataFrame est maintenant dans Polars, prêt pour la transformation.
shape: (5, 2)
┌───────────┬─────────────────────────────────┐
│ siren     ┆ liasse                          │
│ ---       ┆ ---                             │
│ str       ┆ list[struct[2]]                 │
╞═══════════╪═════════════════════════════════╡
│ 005420120 ┆ [{"HF",111571}, {"BH-BI",2559}… │
│ 005420120 ┆ [{"GK",74833}, {"AS",90304}, …… │
│ 005420120 ┆ [{"HF",151562}, {"NH",5850813}… │
│ 005420120 ┆ [{"HC",918483}, {"CU-CV",66917… │
│ 005420120 ┆ [{"AO",425420}, {"CH-CI",14383… │
└───────────┴─────────────────────────────────┘


In [2]:
df_bilan.describe()


statistic,siren,liasse
str,str,f64
"""count""","""4976444""",4976444.0
"""null_count""","""0""",0.0
"""mean""",,
"""std""",,
"""min""","""005420120""",
"""25%""",,
"""50%""",,
"""75%""",,
"""max""","""999990542""",


In [4]:
df_bilan.schema

Schema([('siren', String),
        ('liasse', List(Struct({'key': String, 'value': Int32})))])

In [3]:
df_bilan.head()

siren,liasse
str,list[struct[2]]
"""005420120""","[{""HF"",111571}, {""BH-BI"",2559}, … {""7C"",220031}]"
"""005420120""","[{""GK"",74833}, {""AS"",90304}, … {""BV"",3518}]"
"""005420120""","[{""HF"",151562}, {""NH"",5850813}, … {""DQ"",2967321}]"
"""005420120""","[{""HC"",918483}, {""CU-CV"",66917249}, … {""FJ"",104225}]"
"""005420120""","[{""AO"",425420}, {""CH-CI"",14383}, … {""FR"",520363}]"


In [6]:
# affcihe toute la colonee liasse du la premier lig e
df_bilan["liasse"][0]



"{""HF"",111571}"
"{""BH-BI"",2559}"
"{""CJ-CK"",15117606}"
"{""DO"",0}"
"{""EA"",123502}"
…
"{""FR"",470896}"
"{""I4"",0}"
"{""ØH"",0}"
"{""QW"",57930}"
"{""7C"",220031}"
