In [1]:
import os
import logging
import json
import psycopg2
import io
import sys
current_directory = os.getcwd()
target_directory = os.path.abspath(os.path.join(current_directory, "..", ".."))
sys.path.append(target_directory)

from Production.Backfill import GLEIF_Backfill_Helpers

In [2]:
class GLEIF_Reporting_Exceptions_Data:
    def __init__(self , bool_log = True , str_db_name = "GLEIF_test_db" , bool_downloaded = True):
        
        self.obj_backfill_helpers = GLEIF_Backfill_Helpers.GLEIF_Backill_Helpers(bool_Level_2_Reporting_Exceptions = True)

        if bool_log:
            logging_folder = "../logging"  # Adjust the folder path as necessary
    
            if os.path.exists(logging_folder):
                if not os.path.isdir(logging_folder):
                    raise FileExistsError(f"'{logging_folder}' exists but is not a directory. Please remove or rename the file.")
            else:
                os.makedirs(logging_folder)
    
            logging.basicConfig(filename=f"{logging_folder}/GLEIF_Backfill_level_2_exceptions.log", level=logging.DEBUG, format='%(levelname)s: %(message)s', filemode="w")

        if not bool_downloaded:
            if not os.path.exists("../file_lib"):
                os.makedirs("../file_lib")
                
            str_level_2_exceptions_download_link = self.obj_backfill_helpers.get_level_download_links()
            self.str_json_file_path = self.obj_backfill_helpers.unpacking_GLEIF_zip_files(str_download_link = str_level_2_exceptions_download_link , str_unpacked_zip_file_path_name = "Level_2_unpacked_exceptions" , str_zip_file_path_name = "Level_2_exceptions.zip")
        else:
            str_unpacked_zip_file_name = os.listdir(rf"../file_lib/Level_2_unpacked_exceptions")[-1]
            self.str_json_file_path = rf"../file_lib/Level_2_unpacked_exceptions" + "//" + str_unpacked_zip_file_name
        self.conn = psycopg2.connect(dbname = str_db_name, user="Matthew_Pisinski", password="matt1", host="localhost", port="5432")    
        self.conn.autocommit = True
        self.cursor = self.conn.cursor()
        
    def create_table(self):
        self.cursor.execute("""
            CREATE TABLE IF NOT EXISTS GLEIF_exception_data (
            id SERIAL PRIMARY KEY,
            lei TEXT NOT NULL,
            ExceptionCategory TEXT,
            ExceptionReason Text,
            UNIQUE (lei, ExceptionCategory, ExceptionReason)
            );
        """)
    
    def drop_table(self , lst_table_names):
            """
            Drops a specific table from the database securely.
            
            Parameters:
                table_name (list of string): The names of the tables to drop.
            """

            for table_name in lst_table_names:
                self.cursor.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE;")
                
            self.conn.commit()
    
    def bulk_insert_using_copy(self , table_name , columns, data):
        """Perform a bulk insert using PostgreSQL COPY with an in-memory buffer

        Args:
            table_name (_type_): Name of the table to insert into
            columns (_type_): List of column names for the table
            data (_type_): List of tuples with the data to be inserted
        """
        
        buffer = io.StringIO()
        
        #write data to the buffer
        
        for row in data:
            buffer.write('\t'.join(map(str , row)) + "\n")
        buffer.seek(0) #reset buffer position to the beginning
        
        #Construct the copy query
        copy_query = f"COPY {table_name} ({', '.join(columns)}) FROM STDIN WITH DELIMITER '\t'"
        self.cursor.copy_expert(copy_query , buffer)
        self.conn.commit
        
    def remove_duplicates_keep_order(self , input_list):
        seen = set()
        output_list = []
        for item in input_list:
            if item not in seen:
                output_list.append(item)
                seen.add(item)
        return output_list
    
    def process_data(self , dict_leis):
        list_tuples_exceptions = []
    
        for dict in dict_leis:
            dict_flat = self.obj_backfill_helpers.flatten_dict(dict_input = dict)
            tuple_values = self.obj_backfill_helpers.get_target_values(dict_data = dict_flat , subset_string = True , target_keys = ["LEI" , "ExceptionCategory" , "ExceptionReason"])
            list_tuples_exceptions.append(tuple(tuple_values))
        
        list_clean_tuples_exceptions = self.remove_duplicates_keep_order(list_tuples_exceptions)
        self.bulk_insert_using_copy(data = list_clean_tuples_exceptions , table_name = "GLEIF_exception_data" , columns = ['lei' , 'ExceptionCategory' , 'ExceptionReason'])
            
    def storing_GLEIF_data_in_database(self):
        self.create_table()
        
        with open(self.str_json_file_path, 'r', encoding='utf-8') as file:
            dict_leis = json.load(file)
            
        self.process_data(dict_leis = dict_leis["exceptions"])
        
        
        self.conn.close()

In [3]:
obj_gleif_reporting_exceptions = GLEIF_Reporting_Exceptions_Data()


In [4]:
obj_gleif_reporting_exceptions.drop_table(lst_table_names=["GLEIF_exception_data"])

In [5]:
obj_gleif_reporting_exceptions.storing_GLEIF_data_in_database()
