## Afficher la liste des tables disponibles via Redshift Pennylane ##

In [33]:
import psycopg2
import pandas as pd
from dotenv import load_dotenv
import os

## Charger les variables d'environnement ##

In [34]:
load_dotenv(dotenv_path='../.env')

False

## Connexion Redshift Pennylane ##

In [35]:
conn = psycopg2.connect(
    host='pennylane-external.csqwamh5pldr.eu-west-1.redshift.amazonaws.com',
    port=5439,
    dbname='prod',
    user='u_289572',
    password=os.getenv('PENNYLANE_DATA_SHARING_KEY')
)

print("Connexion Redshift etablie")

Connexion Redshift etablie


## Requête pour lister TOUTES les tables disponibles ##

In [36]:
query_tables = """
SELECT 
    table_schema AS schema,
    table_name,
    table_type AS type
FROM information_schema.tables
WHERE table_schema NOT IN ('pg_catalog', 'information_schema', 'pg_internal')
ORDER BY schema, table_name;
"""

### Exécuter la requête ###

In [37]:
df_tables = pd.read_sql(query_tables, conn)

  df_tables = pd.read_sql(query_tables, conn)


### Afficher le résultat ###

In [38]:
print(f"\n{len(df_tables)} tables/views disponibles dans Redshift Pennylane:\n")
print(df_tables.to_string(index=False))


0 tables/views disponibles dans Redshift Pennylane:

Empty DataFrame
Columns: [schema, table_name, type]
Index: []


## Diagnostic : Vérifier les schémas disponibles ##

In [39]:
query_schemas = """
SELECT DISTINCT table_schema 
FROM information_schema.tables 
ORDER BY table_schema;
"""
df_schemas = pd.read_sql(query_schemas, conn)
print("Schémas disponibles :")
print(df_schemas.to_string(index=False))

  df_schemas = pd.read_sql(query_schemas, conn)


Schémas disponibles :
      table_schema
information_schema
        pg_catalog


## Liste de TOUTES les tables (sans filtre) ##

In [40]:
query_all = """
SELECT 
    table_schema AS schema,
    table_name,
    table_type AS type
FROM information_schema.tables
ORDER BY schema, table_name
LIMIT 50;
"""
df_all = pd.read_sql(query_all, conn)
print(f"\n{len(df_all)} tables trouvées :\n")
print(df_all.to_string(index=False))

  df_all = pd.read_sql(query_all, conn)



50 tables trouvées :

            schema                      table_name       type
information_schema                applicable_roles       VIEW
information_schema               check_constraints       VIEW
information_schema             column_domain_usage       VIEW
information_schema               column_privileges       VIEW
information_schema                column_udt_usage       VIEW
information_schema                         columns       VIEW
information_schema         constraint_column_usage       VIEW
information_schema          constraint_table_usage       VIEW
information_schema            data_type_privileges       VIEW
information_schema              domain_constraints       VIEW
information_schema                domain_udt_usage       VIEW
information_schema                         domains       VIEW
information_schema                   element_types       VIEW
information_schema                   enabled_roles       VIEW
information_schema information_schema_catalog_n

## Recherche de schémas métier (hors système) ##

In [41]:
# Lister TOUS les schémas accessibles
query_all_schemas = """
SELECT nspname AS schema_name
FROM pg_namespace
WHERE nspname NOT LIKE 'pg_%'
  AND nspname != 'information_schema'
ORDER BY nspname;
"""
df_custom_schemas = pd.read_sql(query_all_schemas, conn)
print("Schémas métier disponibles :")
print(df_custom_schemas.to_string(index=False))

Schémas métier disponibles :
        schema_name
         accounting
              admin
    catalog_history
                dbt
            dbt_run
                etl
          factoring
          pennylane
practice_management
             public
             s3_app
    s3_data_exports


  df_custom_schemas = pd.read_sql(query_all_schemas, conn)


## Tables disponibles dans le schéma "pennylane" ##

In [42]:
query_pennylane_tables = """
SELECT 
    table_name,
    table_type AS type
FROM information_schema.tables
WHERE table_schema = 'pennylane'
ORDER BY table_name;
"""
df_pennylane = pd.read_sql(query_pennylane_tables, conn)
print(f"\n{len(df_pennylane)} tables/views dans le schéma 'pennylane' :\n")
print(df_pennylane.to_string(index=False))


0 tables/views dans le schéma 'pennylane' :

Empty DataFrame
Columns: [table_name, type]
Index: []


  df_pennylane = pd.read_sql(query_pennylane_tables, conn)


## Requête directe sur pg_tables (spécifique Redshift) ##

In [43]:
query_direct = """
SELECT 
    schemaname AS schema,
    tablename AS table_name,
    tableowner AS owner
FROM pg_tables
WHERE schemaname = 'pennylane'
ORDER BY tablename;
"""
df_direct = pd.read_sql(query_direct, conn)
print(f"\n{len(df_direct)} tables dans 'pennylane' (via pg_tables) :\n")
print(df_direct.to_string(index=False))

  df_direct = pd.read_sql(query_direct, conn)


DatabaseError: Execution failed on sql '
SELECT 
    schemaname AS schema,
    tablename AS table_name,
    tableowner AS owner
FROM pg_tables
WHERE schemaname = 'pennylane'
ORDER BY tablename;
': permission denied for relation pg_tables


## Test d'accès aux différents schémas métier ##

In [None]:
# Tester l'accès à chaque schéma métier
schemas_to_test = ['accounting', 'admin', 'factoring', 'pennylane', 'practice_management', 'public']

for schema in schemas_to_test:
    try:
        query = f"""
        SELECT COUNT(*) as count
        FROM information_schema.tables
        WHERE table_schema = '{schema}';
        """
        result = pd.read_sql(query, conn)
        print(f"✓ Schéma '{schema}': {result['count'][0]} tables accessibles via information_schema")
    except Exception as e:
        print(f"✗ Schéma '{schema}': Erreur - {str(e)[:50]}")
Sinon, essayons une requête directe sur une table connue de Pennylane : Cellule Markdown :
## Test requête directe sur table Pennylane ##
Cellule Code :
# Essayer de requêter directement une table qui devrait exister
try:
    query_test = "SELECT * FROM pennylane.invoices LIMIT 1;"
    df_test = pd.read_sql(query_test, conn)
    print("✓ Table 'pennylane.invoices' accessible !")
    print(df_test.columns.tolist())
except Exception as e:
    print(f"✗ Erreur: {e}")

SyntaxError: only single target (not tuple) can be annotated (3805564548.py, line 15)

## Test d'accès aux différents schémas métier ##

In [None]:
# Tester l'accès à chaque schéma métier
schemas_to_test = ['accounting', 'admin', 'factoring', 'pennylane', 'practice_management', 'public']

for schema in schemas_to_test:
    try:
        query = f"""
        SELECT COUNT(*) as count
        FROM information_schema.tables
        WHERE table_schema = '{schema}';
        """
        result = pd.read_sql(query, conn)
        print(f"✓ Schéma '{schema}': {result['count'][0]} tables accessibles via information_schema")
    except Exception as e:
        print(f"✗ Schéma '{schema}': Erreur - {str(e)[:50]}")

  result = pd.read_sql(query, conn)


✓ Schéma 'accounting': 0 tables accessibles via information_schema
✓ Schéma 'admin': 0 tables accessibles via information_schema
✓ Schéma 'factoring': 0 tables accessibles via information_schema
✓ Schéma 'pennylane': 0 tables accessibles via information_schema
✓ Schéma 'practice_management': 0 tables accessibles via information_schema
✓ Schéma 'public': 0 tables accessibles via information_schema


## Test requête directe sur tables Pennylane connues ##

In [None]:
# Tables courantes documentées par Pennylane
tables_to_test = [
    'invoices',
    'customers',
    'suppliers',
    'products',
    'transactions',
    'accounts',
    'journal_entries'
]

for table in tables_to_test:
    try:
        query = f"SELECT * FROM pennylane.{table} LIMIT 1;"
        df = pd.read_sql(query, conn)
        print(f"✓ Table 'pennylane.{table}' existe ! ({len(df.columns)} colonnes)")
    except Exception as e:
        error_msg = str(e).split('\n')[0][:80]
        print(f"✗ Table 'pennylane.{table}': {error_msg}")

  df = pd.read_sql(query, conn)


✗ Table 'pennylane.invoices': Execution failed on sql 'SELECT * FROM pennylane.invoices LIMIT 1;': Relation in
✓ Table 'pennylane.customers' existe ! (18 colonnes)
✓ Table 'pennylane.suppliers' existe ! (18 colonnes)
✗ Table 'pennylane.products': Execution failed on sql 'SELECT * FROM pennylane.products LIMIT 1;': Relation pr
✗ Table 'pennylane.transactions': Execution failed on sql 'SELECT * FROM pennylane.transactions LIMIT 1;': Relatio
✗ Table 'pennylane.accounts': Execution failed on sql 'SELECT * FROM pennylane.accounts LIMIT 1;': Relation ac
✗ Table 'pennylane.journal_entries': Execution failed on sql 'SELECT * FROM pennylane.journal_entries LIMIT 1;': Rela


## Découverte de toutes les tables accessibles ##

In [None]:
# Liste étendue de tables potentielles Pennylane
tables_extended = [
    'customers', 'suppliers','general_ledger','analytical_ledger', 'invoices', 'quotes', 'bills', 'payments',
    'customer_invoices', 'supplier_invoices', 'credit_notes',
    'bank_accounts', 'bank_transactions', 'categories',
    'tax_rates', 'vat', 'contacts', 'companies',
    'line_items', 'invoice_lines', 'payment_lines',
    'estimates', 'purchase_orders','fiscal_years','trial_balance','tax_declarations','vat_declarations', 'receipts'
]

accessible_tables = []

for table in tables_extended:
    try:
        query = f"SELECT * FROM pennylane.{table} LIMIT 1;"
        df = pd.read_sql(query, conn)
        accessible_tables.append({
            'table_name': table,
            'columns': len(df.columns),
            'column_list': ', '.join(df.columns.tolist()[:5]) + '...'
        })
        print(f"✓ {table} ({len(df.columns)} colonnes)")
    except:
        pass

print(f"\n\n=== RÉSUMÉ: {len(accessible_tables)} tables accessibles ===")
df_accessible = pd.DataFrame(accessible_tables)
print(df_accessible.to_string(index=False))



=== RÉSUMÉ: 0 tables accessibles ===
Empty DataFrame
Columns: []
Index: []


  df = pd.read_sql(query, conn)


## Test connexion active##

In [44]:
# Vérifier si la connexion est toujours active
try:
    test = pd.read_sql("SELECT 1 as test;", conn)
    print("✓ Connexion active")
except Exception as e:
    print(f"✗ Connexion fermée : {e}")
    print("\nRecréation de la connexion...")
    conn = psycopg2.connect(
        host='pennylane-external.csqwamh5pldr.eu-west-1.redshift.amazonaws.com',
        port=5439,
        dbname='prod',
        user='u_289572',
        password=os.getenv('PENNYLANE_DATA_SHARING_KEY')
    )
    print("✓ Connexion rétablie")

  test = pd.read_sql("SELECT 1 as test;", conn)


✗ Connexion fermée : Execution failed on sql: SELECT 1 as test;
SSL connection has been closed unexpectedly

unable to rollback

Recréation de la connexion...
✓ Connexion rétablie


In [45]:
# Liste étendue de tables potentielles Pennylane
tables_extended = [
    'customers', 'suppliers','general_ledger','analytical_ledger', 'invoices', 'quotes', 'bills', 'payments',
    'customer_invoices', 'supplier_invoices', 'credit_notes',
    'bank_accounts', 'bank_transactions', 'categories',
    'tax_rates', 'vat', 'contacts', 'companies',
    'line_items', 'invoice_lines', 'payment_lines',
    'estimates', 'purchase_orders','fiscal_years','trial_balance','tax_declarations','vat_declarations', 'receipts'
]

accessible_tables = []

for table in tables_extended:
    try:
        query = f"SELECT * FROM pennylane.{table} LIMIT 1;"
        df = pd.read_sql(query, conn)
        accessible_tables.append({
            'table_name': table,
            'columns': len(df.columns),
            'column_list': ', '.join(df.columns.tolist()[:5]) + '...'
        })
        print(f"✓ {table} ({len(df.columns)} colonnes)")
    except:
        pass

print(f"\n\n=== RÉSUMÉ: {len(accessible_tables)} tables accessibles ===")
df_accessible = pd.DataFrame(accessible_tables)
print(df_accessible.to_string(index=False))

  df = pd.read_sql(query, conn)


✓ customers (18 colonnes)
✓ suppliers (18 colonnes)
✓ general_ledger (23 colonnes)
✓ analytical_ledger (26 colonnes)
✓ customer_invoices (17 colonnes)
✓ supplier_invoices (16 colonnes)
✓ bank_accounts (18 colonnes)
✓ bank_transactions (18 colonnes)
✓ companies (23 colonnes)
✓ fiscal_years (10 colonnes)
✓ trial_balance (16 colonnes)
✓ tax_declarations (15 colonnes)
✓ vat_declarations (10 colonnes)


=== RÉSUMÉ: 13 tables accessibles ===
       table_name  columns                                                           column_list
        customers       18                       id, name, company_id, company_name, vat_rate...
        suppliers       18                       id, name, company_id, company_name, vat_rate...
   general_ledger       23                      company_id, company_name, id, date, lettering...
analytical_ledger       26                      id, company_id, company_name, date, lettering...
customer_invoices       17                 id, created_date, issue_date, de

## Fonction helper : Connexion auto-reconnect ##

In [46]:
def get_active_connection():
    """Retourne une connexion active, la recrée si nécessaire"""
    global conn
    try:
        # Test si la connexion est active
        pd.read_sql("SELECT 1;", conn)
        return conn
    except:
        # Reconnexion si fermée
        print("⟳ Reconnexion Redshift...")
        conn = psycopg2.connect(
            host='pennylane-external.csqwamh5pldr.eu-west-1.redshift.amazonaws.com',
            port=5439,
            dbname='prod',
            user='u_289572',
            password=os.getenv('PENNYLANE_DATA_SHARING_KEY')
        )
        print("✓ Reconnecté")
        return conn

# Utilisation : remplace "conn" par "get_active_connection()" dans tes requêtes
# Exemple :
df = pd.read_sql("SELECT * FROM pennylane.customers LIMIT 1;", get_active_connection())
print(f"Test réussi : {len(df)} ligne(s)")

  pd.read_sql("SELECT 1;", conn)
  df = pd.read_sql("SELECT * FROM pennylane.customers LIMIT 1;", get_active_connection())


Test réussi : 1 ligne(s)
