<p>What data is in your Postgres database?</p>
<p>Often, team members don't know what to query becuase they have no idea what data is stored, or what table the data is stored in.</p>
<p>This Action lists all the tables (this output can be modified to limit the number of tables exposed).</p>
<p>For each of the tables, we display the columns that are present.</p>
<p>&nbsp;</p>
<p>If desired, there is an input parameter list that allows you to specify schemas for the output.&nbsp; For example if your Postgres has 3 schemas: Develeopment, Customers, superSecretData, you may not want the 3rd item displayed for all to see.&nbsp; using the unpit parameter to specify ['Develeopment','Customers'] will display only the tables with that schema.&nbsp;&nbsp;</p>
<p>&nbsp;</p>
<p><strong>Step 1</strong>: <strong>Write query</strong>: "getAllTables". This query is used to get a list of all of the tables/schemas that are present in the PostgreSQL database.</p>
<p><strong>Step 2:&nbsp;Make the query to get all of the tables</strong>. The output is saved in&nbsp;<em>allTableNames<strong>.</strong></em></p>
<p><strong>Step 3:&nbsp;Create queries to retrieve columns from all tables.</strong>&nbsp; Based on the schemas chosen (or all of the schema if none are chosen), create a series of queries to select every column in every table.</p>
<p><strong>Step 4:&nbsp; Query all the tables&nbsp;</strong> Loops through the list of queries to extract the column data from each table.</p>
<p><strong>Step 5: Create a JSON</strong>: This Action simplifies the data from the query. Key: table name: Value: list of columsn.</p>

In [45]:
getAllTables  = f"SELECT TABLE_SCHEMA, TABLE_NAME  FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'"
print(getAllTables)


In [46]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##
import random
import string
from typing import List
from pydantic import BaseModel, Field
from tabulate import tabulate


from beartype import beartype
@beartype
def postgresql_read_query_printer(output):
    print("\n")
    data = []
    for records in output:
        data.append(record for record in records)
    print(tabulate(data, tablefmt="grid"))
    return output


@beartype
def postgresql_read_query(handle, query: str, params: list = ()) -> List:
    """postgresql_read_query Runs postgresql query with the provided parameters.

          :type handle: object
          :param handle: Object returned from task.validate(...).

          :type query: str
          :param query: Postgresql read query.

          :type params: tuples
          :param params: Parameters to the query in tuple format.

          :rtype: List of Result of the Query.
      """

    cur = handle.cursor()
    # cur.execute(query, params)

    random_id = ''.join(
        [random.choice(string.ascii_letters + string.digits) for n in range(32)])

    query = f"PREPARE psycop_{random_id} AS {query};"
    if not params:
        prepared_query = f"EXECUTE psycop_{random_id};"
    else:
        parameters_tuple = tuple(params)
        ## If there is only one tuple element, remove the trailing comma before format
        if len(parameters_tuple) == 1:
            tuple_string = str(parameters_tuple)
            parameters_tuple = tuple_string[:-2] + tuple_string[-1]
        prepared_query = f"EXECUTE psycop_{random_id} {params};"
    cur.execute(query)
    cur.execute(prepared_query)
    res = cur.fetchall()
    handle.commit()
    cur.close()
    handle.close()
    return res


task = Task(Workflow())
task.configure(credentialsJson='''{
    "credential_name": "segment",
    "credential_type": "CONNECTOR_TYPE_POSTGRESQL",
    "credential_id": "05050797-8929-47de-aaed-93d9fc63b244"
}''')
task.configure(inputParamsJson='''{
    "query": "getAllTables"
    }''')
task.configure(outputName="allTableNames")
task.configure(printOutput=True)
(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.execute(postgresql_read_query, lego_printer=postgresql_read_query_printer, hdl=hdl, args=args)

<p>There may be a lot of tables, and you may want to exclude some - perhaps an entire schema - or perhaps specific tables.</p>

In [47]:
for record in allTableNames:
    print("record", record[0])

In [48]:
#print(allTableNames)
tableList =[]
queryList = []
allowedSchemas = []
#only list table data for those in these schema
if len(schema_list) >0:
    allowedSchemas = schema_list
else:
    #include all the schemas
    for record in allTableNames:
        if record[0] not in allowedSchemas:
            allowedSchemas.append(record[0])
print("allowedSchemas",allowedSchemas)
for tableName in allTableNames:
    justSchema = tableName[0]
    justTable = tableName[1]
    schemaDotTable = f"{tableName[0]}.{tableName[1]}"
    
    if justSchema in allowedSchemas:
    
        tableList.append(schemaDotTable)
        queryList.append(f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{justTable}';")
    
print(len(tableList))
print(queryList)



In [49]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##
import random
import string
from typing import List
from pydantic import BaseModel, Field
from tabulate import tabulate


from beartype import beartype
@beartype
def postgresql_read_query_printer(output):
    print("\n")
    data = []
    for records in output:
        data.append(record for record in records)
    print(tabulate(data, tablefmt="grid"))
    return output


@beartype
def postgresql_read_query(handle, query: str, params: list = ()) -> List:
    """postgresql_read_query Runs postgresql query with the provided parameters.

          :type handle: object
          :param handle: Object returned from task.validate(...).

          :type query: str
          :param query: Postgresql read query.

          :type params: tuples
          :param params: Parameters to the query in tuple format.

          :rtype: List of Result of the Query.
      """

    cur = handle.cursor()
    # cur.execute(query, params)

    random_id = ''.join(
        [random.choice(string.ascii_letters + string.digits) for n in range(32)])

    query = f"PREPARE psycop_{random_id} AS {query};"
    if not params:
        prepared_query = f"EXECUTE psycop_{random_id};"
    else:
        parameters_tuple = tuple(params)
        ## If there is only one tuple element, remove the trailing comma before format
        if len(parameters_tuple) == 1:
            tuple_string = str(parameters_tuple)
            parameters_tuple = tuple_string[:-2] + tuple_string[-1]
        prepared_query = f"EXECUTE psycop_{random_id} {params};"
    cur.execute(query)
    cur.execute(prepared_query)
    res = cur.fetchall()
    handle.commit()

    return res


task = Task(Workflow())
task.configure(continueOnError=False)
task.configure(credentialsJson='''{
    "credential_name": "segment",
    "credential_type": "CONNECTOR_TYPE_POSTGRESQL",
    "credential_id": "05050797-8929-47de-aaed-93d9fc63b244"
}''')
task.configure(inputParamsJson='''{
    "query": "iter_item"
    }''')
task.configure(iterJson='''{
    "iter_enabled": true,
    "iter_list_is_const": false,
    "iter_list": "queryList",
    "iter_parameter": "query"
    }''')
task.configure(outputName="tableColumns")

task.configure(printOutput=True)
(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.execute(postgresql_read_query, lego_printer=postgresql_read_query_printer, hdl=hdl, args=args)

In [50]:
temp ={}
for k,v in tableColumns.items():
    tempList =[]
    for value in v:
        tempList.append(value[0])
    temp[k]= tempList
tableColumns = temp
print(tableColumns)