In [1]:
from ffiec_data_connect import methods, credentials, ffiec_connection
import pandas as pd
import snowflake.snowpark as snowpark
from snowflake.connector.pandas_tools import write_pandas
import time
from zeep.exceptions import Fault
#creds = credentials.WebserviceCredentials(username="your-username", password="your-token")
#creds = credentials.WebserviceCredentials(username="whills12", password="vGzHZX7EAsPCwfXuV6pI")
#conn = ffiec_connection.FFIECConnection()

In [2]:
import datetime

In [3]:
def initialize_snowflake_connection(username,password,account,role,warehouse,database,schema):
    connection_parameters = {
        "user": username,
        "password": password,
        "account": account,
        "role": role,
        "warehouse": warehouse,
        "database": database,
        "schema": schema }
    return snowpark.session.Session.builder.configs(connection_parameters).create()

In [4]:
def initialize_ffeic_connection(username, password):
    creds = credentials.WebserviceCredentials(username=username, password=password)
    conn = ffiec_connection.FFIECConnection()
    return creds, conn

In [5]:
def print_row_count(session: snowpark.Session, t):
    row_count = session.sql(f"SELECT COUNT(*) AS row_count FROM {t};").collect()
    df = pd.DataFrame(row_count)
    return print(f'{df["ROW_COUNT"][0]} values inserted into table {t}')

In [6]:
def grab_table_name(session: snowpark.Session):
    tables = session.sql('''SELECT TABLE_NAME
    FROM INFORMATION_SCHEMA.TABLES
    WHERE TABLE_SCHEMA = 'PUBLIC';''').collect()
    table_names = pd.DataFrame(tables)
    table_list = table_names['TABLE_NAME'].tolist()
    table_list.remove('SCHEDULE_METADATA')
    return table_list

In [7]:
def grab_table_schema(session: snowpark.Session, table_name):
    df_schema = session.sql(f"DESCRIBE TABLE {table_name};").collect()
    schema_cols = pd.DataFrame(df_schema)
    return pd.DataFrame(columns=schema_cols['name'].tolist()), schema_cols['name'].tolist()

In [15]:
def grab_id_data(reporting_period, passed_list, creds, conn):
    skipper = 0
    limit_caller = 0
    error_list = []
    give_it_back = 0
    try:
        if not passed_list:
            limit_caller += 1
            give_it_back = 2499
            raise ValueError('The passed ID RSSD list is empty, calling the webservice')
        else:
            id_rssd_list = passed_list 
            print('Using the passed id list')
    except ValueError as e:
        print(e)
        filers = methods.collect_filers_on_reporting_period(
        session=conn,
        creds=creds,
        reporting_period=reporting_period,
        output_type="pandas")
        id_rssd_list = sorted(filers['id_rssd'].tolist())
    print('Processing period: ', reporting_period, f"list length is {len(id_rssd_list)}")
    print("The time was : ", time.ctime())
    for id_rssd in id_rssd_list:
        if limit_caller == 2500:
            break
        else:
            try:
                df = methods.collect_data(
                    session=conn,
                    creds=creds,
                    rssd_id=id_rssd,
                    reporting_period=reporting_period,
                    series="call",
                    output_type='pandas'
                )
                result_df = pd.DataFrame(columns=df['mdrm'].unique())
                for index, row in df.iterrows():
                    mdrm = row['mdrm']
                    data_type = row['data_type']
                    if data_type == 'str':
                        value = row['str_data']
                    elif data_type == 'bool':
                        value = row['bool_data']
                    elif data_type == 'float':
                        value = row['float_data']
                    elif data_type == 'int':
                        value = row['int_data']
                    else:
                        value = None
                    result_df.at[0, mdrm] = value
                result_df.insert(0, 'IDRSSD',id_rssd)
                result_df['RCON9999']=pd.to_datetime(result_df['RCON9999'], format='%Y%m%d.%f').dt.strftime('%Y-%m-%d')
                date_column = result_df.pop('RCON9999') 
                result_df.insert(1, 'RCON9999', date_column) 
                if skipper == 0:
                    final_df = result_df
                    skipper+=1
                else:
                    final_df = pd.concat([final_df,result_df]).reset_index(drop=True)
                limit_caller+=1
            except Fault as e:
                print(f'The IDRSSD: {id_rssd} is not available for reporting period {reporting_period}: printing fault statement {e}')
                error_list.append(id_rssd)
                limit_caller+=1
                continue
    if give_it_back == 0:
        id_rssd_partial = id_rssd_list[limit_caller:]
    else: 
        id_rssd_partial = id_rssd_list[give_it_back:]
    print("Records to still be processed: ", len(id_rssd_partial))
    return final_df, error_list, id_rssd_partial

In [9]:
def main(session: snowpark.Session, creds, conn, is_df,reporting_period,period_string):
    print('Grabbing Data')
    if is_df:
        passed_list = []
    else: 
        passed_df=pd.read_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\{period_string}_partial_id_list.csv')
        passed_list = passed_df['0'].tolist()
    #passed_list = [] 
    #passed_df=pd.read_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\{period_string}_partial_id_list.csv')
    #passed_list = passed_df['0'].tolist()
    #pd.read_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\{reporting_period}_error_list.csv')
    final_df, error_list, id_rssd_partial = grab_id_data(reporting_period, passed_list, creds, conn)
    print("The last data call was : ", time.ctime())
    if error_list:
        print('saving errors')
        e_df = pd.DataFrame(error_list)
        e_df.to_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\{period_string}_error_list.csv',index=False)
    if id_rssd_partial: 
        print('saving partial list')
        partial_id_df = pd.DataFrame(id_rssd_partial)
        partial_id_df.to_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\{period_string}_partial_id_list.csv',index=False)
    table_list = grab_table_name(session)
    print("Started writing data to snowflake @: ", time.ctime())
    for t in table_list:
        concat_df, col_list = grab_table_schema(session, t)
        write_df = pd.concat([concat_df,final_df.filter(items=col_list)])
        session.write_pandas(write_df,t)
        print_row_count(session, t)

In [17]:
if __name__ == "__main__":
    warehouse = "COMPUTE_WH"
    session = initialize_snowflake_connection(username= "WALKERHILLS",password = "h!123@iLLs#",account= "lrlwqdb-yib23397",role = "ACCOUNTADMIN",
                                    warehouse = "COMPUTE_WH",database = "CALLREPORTDB",schema = "PUBLIC")
    creds, conn = initialize_ffeic_connection(username='whills12',password="vGzHZX7EAsPCwfXuV6pI")
    reporting_period = '03/31/2020'
    period_string = '03_31_2020'
    main(session, creds, conn, False, reporting_period, period_string)
    session.sql(f"ALTER WAREHOUSE {warehouse} SUSPEND;")
    session.close()

Grabbing Data
Using the passed id list
Processing period:  03/31/2020 list length is 166
The time was :  Fri Dec 15 15:32:16 2023
Records to still be processed:  0
The last data call was :  Fri Dec 15 15:33:16 2023
Started writing data to snowflake @:  Fri Dec 15 15:33:18 2023
73426 values inserted into table SCHEDULE_RCCI
73426 values inserted into table SCHEDULE_RCD
73426 values inserted into table SCHEDULE_RCEII
73426 values inserted into table SCHEDULE_RCF
73426 values inserted into table SCHEDULE_RCG
73426 values inserted into table SCHEDULE_RCS
73426 values inserted into table SCHEDULE_RCT
73426 values inserted into table SCHEDULE_RCN
73426 values inserted into table SCHEDULE_RCV
73426 values inserted into table SCHEDULE_RIC
73426 values inserted into table SCHEDULE_LEO
73426 values inserted into table SCHEDULE_RCH
73426 values inserted into table SCHEDULE_RCI
73426 values inserted into table SCHEDULE_RCM
73426 values inserted into table SCHEDULE_RIBII
73426 values inserted into 

In [None]:
warehouse 

In [81]:
class snowflake_conn:
    def __init__(self, username,password,account,role,warehouse,database,schema):
        self.username = username
        self.password = password
        self.account = account
        self.role = role
        self.warehouse = warehouse
        self.database = database
        self.schema = schema
        self.connection_parameters = {
        "user": self.username,
        "password": self.password,
        "account": self.account,
        "role": self.role,
        "warehouse": self.warehouse,
        "database": self.database,
        "schema": self.schema }
    def snow_connect(self):
        session = snowpark.session.Session.builder.configs(self.connection_parameters).create()
        return session
    def snow_close(self,session):
        session.close()

In [78]:
username= "WALKERHILLS"
password = "h!123@iLLs#"
account= "lrlwqdb-yib23397"
role = "ACCOUNTADMIN"
warehouse = "COMPUTE_WH"
database = "CALLREPORTDB"
schema = "PUBLIC"

In [79]:
connection_parameters = {
"user": username,
"password": password,
"account": account,
"role": role,
"warehouse": warehouse,
"database": database,
"schema": schema }
session = snowpark.session.Session.builder.configs(connection_parameters).create()

In [24]:
session.close()

In [15]:
df_schema = session.sql("DESCRIBE TABLE SCHEDULE_ENT;").collect()
schema_cols = pd.DataFrame(df_schema)
schema_cols['name']

0    RCON9224
1    RCON9999
2    RSSD9017
3    RSSD9130
4    RSSD9200
5    RSSD9220
6      IDRSSD
Name: name, dtype: object

In [55]:
row_count = session.sql("SELECT COUNT(*) AS row_count FROM SCHEDULE_ENT;").collect()
df = pd.DataFrame(row_count)
print(f'{df["ROW_COUNT"][0]} values inserted into table _')

0 values inserted into table _


In [26]:
#table_list = grab_table_name(session)
#for t in table_list:
#    session.sql(f"UPDATE {t} SET RCON9999 = '2023-09-30'").collect()

In [54]:
#table_list = grab_table_name(session)
#for t in table_list:
#    session.sql(f"Delete from {t} where RCON9999 = '2023-06-30'").collect()

In [6]:
#for t in table_list:
#    try:
#        update_statement = f"UPDATE {t} SET RCON9999 = '2023-09-30'"
#        session.sql(update_statement)
#    except Exception as e:
#        print(f"Error updating {t}: {e}")

NameError: name 'table_list' is not defined

In [89]:
#def is_DataFrame(df): 
#    if isinstance(df, pd.DataFrame):
#        is_df = True
#        return df['0'].tolist(), is_df
#    else:
#        is_df = False
#        return df, is_df

In [96]:
file_lead = 'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\'
period_string = '06_30_2023'
partial_or_error = ('_partial_id_list.csv', '_error_list.csv')
filename = file_lead + period_string + partial_or_error[0]
print(filename)
pd.read_csv(filename)

C:\Users\whill\OneDrive\Desktop\Fall2022\ISYE6740\call-report-database\Uploading Files\06_30_2023_partial_id_list.csv


Unnamed: 0,0
0,493741
1,493844
2,494261
3,494654
4,495419
...,...
2193,998648
2194,998657
2195,998844
2196,999355


In [None]:
file_lead = 'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\'
period_string = '06_30_2023'
partial_or_error = ('_partial_id_list.csv', '_error_list.csv')
filename = file_lead + period_string + partial_or_error[0]
passed_list = pd.read_csv(filename)
passed_list = []

In [88]:
session = initialize_connection(username= "WALKERHILLS",password = "h!123@iLLs#",account= "lrlwqdb-yib23397",role = "ACCOUNTADMIN",
warehouse = "COMPUTE_WH",database = "CALLREPORTDB",schema = "PUBLIC")

df_schema = session.sql(f"SELECT COUNT(*) AS row_count FROM SCHEDULE_METADATA").collect()
schema_cols = pd.DataFrame(df_schema)
print(schema_cols)
session.close()

   ROW_COUNT
0       6062


In [80]:
for t in table_list:
    concat_df, col_list = grab_table_schema(session, t)
    write_df = pd.concat([concat_df,final_df.filter(items=col_list)])
    session.write_pandas(write_df,t)
    print_row_count(session, t)

NameError: name 'final_df' is not defined

In [92]:
is_DataFrame(pd.read_csv(f'C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\06_30_2023_partial_id_list.csv'))[1]

True

In [39]:
try:
    if not empty_list:
        raise ValueError('The list is empty')
    for i in empty_list:
        print(i)
except ValueError as e:
    print(e)

The list is empty


In [37]:
for i in empty_list:
    print(i)

In [9]:
concat_df, col_list = grab_table_schema(session, 'SCHEDULE_ENT')

In [19]:
def main(session: snowpark.Session):
    quarters = ['03/31','06/30','09/30','12/31']
    years = [x for x in range(2001,2003)]
    dates = []
    for y in years:
        for q in quarters:
            if q == '12/31' and y == 2023:
                break
            else:
                dates.append(f'{q}/{y}')
    print('Grabbing Data')
    final_df = grab_id_data(dates)   
    table_list = grab_table_name(session)
    for t in table_list:
        concat_df, col_list = grab_table_schema(session, t)
        write_df = pd.concat([concat_df,final_df.filter(items=col_list)])
        session.write_pandas(write_df,t)
        print_row_count(session, t)

In [None]:
years = '09/30/2023'
table = 'SCHEDULE_ENT'

In [35]:
quarters = ['03/31','06/30','09/30','12/31']
years = [x for x in range(2001,2023)]
dates = []
for y in years:
    for q in quarters:
        if q == '12/31' and y == 2023:
            break
        else:
            dates.append(f'{q}/{y}')
dates

['03/31/2001',
 '06/30/2001',
 '09/30/2001',
 '12/31/2001',
 '03/31/2002',
 '06/30/2002',
 '09/30/2002',
 '12/31/2002',
 '03/31/2003',
 '06/30/2003',
 '09/30/2003',
 '12/31/2003',
 '03/31/2004',
 '06/30/2004',
 '09/30/2004',
 '12/31/2004',
 '03/31/2005',
 '06/30/2005',
 '09/30/2005',
 '12/31/2005',
 '03/31/2006',
 '06/30/2006',
 '09/30/2006',
 '12/31/2006',
 '03/31/2007',
 '06/30/2007',
 '09/30/2007',
 '12/31/2007',
 '03/31/2008',
 '06/30/2008',
 '09/30/2008',
 '12/31/2008',
 '03/31/2009',
 '06/30/2009',
 '09/30/2009',
 '12/31/2009',
 '03/31/2010',
 '06/30/2010',
 '09/30/2010',
 '12/31/2010',
 '03/31/2011',
 '06/30/2011',
 '09/30/2011',
 '12/31/2011',
 '03/31/2012',
 '06/30/2012',
 '09/30/2012',
 '12/31/2012',
 '03/31/2013',
 '06/30/2013',
 '09/30/2013',
 '12/31/2013',
 '03/31/2014',
 '06/30/2014',
 '09/30/2014',
 '12/31/2014',
 '03/31/2015',
 '06/30/2015',
 '09/30/2015',
 '12/31/2015',
 '03/31/2016',
 '06/30/2016',
 '09/30/2016',
 '12/31/2016',
 '03/31/2017',
 '06/30/2017',
 '09/30/20

In [42]:
filers_set = set()
for d in dates:
    filers = methods.collect_filers_on_reporting_period(
    session=conn,
    creds=creds,
    reporting_period=d,
    output_type="list")
    df = pd.DataFrame(filers)
    filers_set.update(df['id_rssd'])
filers_set

{'3193462',
 '733447',
 '1015467',
 '191663',
 '182353',
 '440857',
 '228055',
 '983671',
 '68345',
 '477657',
 '3577428',
 '2484145',
 '40855',
 '2791795',
 '65746',
 '990156',
 '790721',
 '1469211',
 '212344',
 '786210',
 '2042684',
 '391959',
 '235558',
 '679826',
 '388753',
 '986935',
 '943004',
 '2905761',
 '3634433',
 '431323',
 '678454',
 '3374403',
 '2036843',
 '346379',
 '2742238',
 '333203',
 '540672',
 '2804882',
 '2880019',
 '233022',
 '5050',
 '790543',
 '509950',
 '505831',
 '672573',
 '118156',
 '1165540',
 '6972',
 '5582846',
 '86358',
 '926959',
 '3208441',
 '3584538',
 '12946',
 '922728',
 '925653',
 '403151',
 '387345',
 '2567123',
 '123178',
 '535753',
 '87047',
 '229801',
 '82024',
 '465672',
 '440174',
 '80374',
 '966973',
 '969741',
 '820842',
 '176325',
 '97055',
 '987848',
 '44602',
 '737632',
 '713926',
 '186557',
 '3437157',
 '855741',
 '1450620',
 '642866',
 '112332',
 '680466',
 '698649',
 '3473285',
 '844820',
 '788924',
 '3035245',
 '702751',
 '170912',
 

In [None]:
vasavi.kullanakoppal@usaa.com

In [56]:
emp_list = []
emp_list[2499:]

[]

In [65]:
listed = ["4114567","4114567"]
for l in listed:
    try:
        time_series = methods.collect_data(
            session=conn,
            creds=creds,
            rssd_id=l,
            reporting_period="9/30/2023",
            series="call",
            output_type='pandas'
        )
        time_series
    except Fault as e:
        print('no no there sorry')


no no there sorry
no no there sorry


In [10]:
df = grab_id_data([37,242,279,354,5805479,5859511],'09/30/2023')
df.filter(items=col_list)

Unnamed: 0,RCON9224,RCON9999,RSSD9017,RSSD9130,RSSD9200,RSSD9220,IDRSSD
0,,20230930,BANK OF HANCOCK COUNTY,SPARTA,GA,31087,37
1,,20230930,FIRST COMMUNITY BANK XENIA FLORA,Xenia,IL,62899,242
2,,20230930,MINEOLA COMMUNITY BANK S. S. B.,MINEOLA,TX,75773,279
3,549300T61O0X0RJMQ375,20230930,Bison State Bank,Bison,KS,675200328,354
4,,20230930,"BESSEMER TRUST COMPANY OF NEVADA, N.A.",Las Vegas,NV,89135,5805479
5,9845004BEFQAAF3AE023,20230930,"TIAA Trust, National Association",Charlotte,NC,28262,5859511


In [11]:
final_df = pd.concat([concat_df,df.filter(items=col_list)])
final_df

Unnamed: 0,RCON9224,RCON9999,RSSD9017,RSSD9130,RSSD9200,RSSD9220,IDRSSD
0,,20230930,BANK OF HANCOCK COUNTY,SPARTA,GA,31087,37
1,,20230930,FIRST COMMUNITY BANK XENIA FLORA,Xenia,IL,62899,242
2,,20230930,MINEOLA COMMUNITY BANK S. S. B.,MINEOLA,TX,75773,279
3,549300T61O0X0RJMQ375,20230930,Bison State Bank,Bison,KS,675200328,354
4,,20230930,"BESSEMER TRUST COMPANY OF NEVADA, N.A.",Las Vegas,NV,89135,5805479
5,9845004BEFQAAF3AE023,20230930,"TIAA Trust, National Association",Charlotte,NC,28262,5859511


In [20]:
result = session.write_pandas(final_df, 'SCHEDULE_ENT') 
success, nchunks,nrows, _ = result

ValueError: too many values to unpack (expected 4)

In [19]:
print(result)

<snowflake.snowpark.table.Table object at 0x0000021913059D90>


In [29]:
df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"])
df.write.mode("overwrite").save_as_table("saved_table", table_type="temporary")
session.table("saved_table").show()


stage_created_result = session.sql("create temporary stage if not exists my_stage").collect()
df.write.copy_into_location("@my_stage/copied_from_dataframe.csv")  # default CSV

-------------
|"A"  |"B"  |
-------------
|1    |2    |
|3    |4    |
-------------



[Row(rows_unloaded=2, input_bytes=8, output_bytes=28)]

In [None]:
def main(session: snowpark.Session):
    df_table = session.table("sample_product_data")

In [38]:
def match_data_type(id_rssd_set, mdrm_set):
    master_data_type_list = []
    trials = 0
    while len(mdrm_set) != 0 and trials != len(id_rssd_set):
        for id_rssd in id_rssd_set:
                print(id_rssd)
                time_series = methods.collect_data(
                session=conn,
                creds=creds,
                rssd_id=id_rssd,
                reporting_period="9/30/2023",
                series="call",
                output_type='pandas'
                )
                print(time_series)
                for mdrm in mdrm_set.copy():
                    if mdrm in time_series['mdrm'].values:
                        print('made it')
                        data_type = time_series['data_type'][time_series['mdrm']==mdrm].values[0]
                        pair_mdrm_type = (mdrm,data_type)
                        master_data_type_list.append(pair_mdrm_type)
                        mdrm_set.remove(mdrm)
                        print(mdrm_set,len(mdrm_set))
                trials +=1
                if not mdrm_set:
                    break
                #time.sleep(2)
    return master_data_type_list

In [39]:
id_rssd_set = set([37,279])
mdrm_set = set(['RCONC752','RCONF164','RCOALB61','RIADC235','RIADGW44'])
ml_ret = match_data_type(id_rssd_set,mdrm_set)
ml_ret

37
         mdrm rssd    quarter  int_data  float_data bool_data  str_data  \
0    RCONC752   37  9/30/2023       NaN         NaN      None  FFIEC051   
1    RCONF164   37  9/30/2023       0.0         NaN      None      None   
2    RCONG322   37  9/30/2023       0.0         NaN      None      None   
3    RCON1460   37  9/30/2023       0.0         NaN      None      None   
4    RIADGW44   37  9/30/2023      38.0         NaN      None      None   
..        ...  ...        ...       ...         ...       ...       ...   
578  RIADC232   37  9/30/2023       0.0         NaN      None      None   
579  RCOALB60   37  9/30/2023       0.0         NaN      None      None   
580  RCOALB61   37  9/30/2023       0.0         NaN      None      None   
581  RIADC235   37  9/30/2023       0.0         NaN      None      None   
582  RIADC234   37  9/30/2023       1.0         NaN      None      None   

    data_type  
0         str  
1         int  
2         int  
3         int  
4         int  


[('RCOALB61', 'int'),
 ('RCONF164', 'int'),
 ('RIADGW44', 'int'),
 ('RCONC752', 'str'),
 ('RIADC235', 'int')]

In [14]:
for mdrm in mdrm_set:
    if mdrm in time_series['mdrm'].values:
        print(f"{mdrm} found!")
    else:
        print(f"{mdrm} not found.")

RCOALB61 found!
RCONC752 found!
RIADC235 found!
RCONF164 found!
RIADGW44 found!


In [13]:

time_series['data_type'][time_series['mdrm']==mdrm]

4    int
Name: data_type, dtype: object

In [15]:
print("How long does it take : ", time.ctime())
df = methods.collect_data(
    session=conn,
    creds=creds,
    rssd_id="37",
    reporting_period="9/30/2023",
    series="call",
    output_type='pandas'
)
time_series
print("The time was : ", time.ctime())
result_df = pd.DataFrame(columns=df['mdrm'].unique())
for index, row in df.iterrows():
    mdrm = row['mdrm']
    data_type = row['data_type']
    if data_type == 'str':
        value = row['str_data']
    elif data_type == 'bool':
        value = row['bool_data']
    elif data_type == 'float':
        value = row['float_data']
    elif data_type == 'int':
        value = row['int_data']
    else:
        value = None
    result_df.at[0, mdrm] = value
result_df.insert(0, 'IDRSSD',37)
result_df['RCON9999']=pd.to_datetime(result_df['RCON9999'], format='%Y%m%d.%f').dt.strftime('%Y%m%d')
date_column = result_df.pop('RCON9999') 
result_df.insert(1, 'RCON9999', date_column) 
result_df

How long does it take :  Mon Dec 11 13:48:16 2023
The time was :  Mon Dec 11 13:48:17 2023


Unnamed: 0,IDRSSD,RCON9999,RCONC752,RCONF164,RCONG322,RCON1460,RIADGW44,RIADB522,RCONC027,RCONC026,...,RIADC895,RIADC894,RCONG472,RIADC896,RIADC233,RIADC232,RCOALB60,RCOALB61,RIADC235,RIADC234
0,37,20230930,FFIEC051,0.0,0.0,0.0,38.0,955.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [4]:
df= time_series

In [14]:
result_df = pd.DataFrame(columns=df['mdrm'].unique())

# Iterate through each row in the original DataFrame
for index, row in df.iterrows():
    mdrm = row['mdrm']
    data_type = row['data_type']

    # Depending on the 'data_type', select the corresponding value
    if data_type == 'str':
        value = row['str_data']
    elif data_type == 'bool':
        value = row['bool_data']
    elif data_type == 'float':
        value = row['float_data']
    elif data_type == 'int':
        value = row['int_data']
    else:
        value = None

    # Set the value in the result DataFrame
    result_df.at[0, mdrm] = value

# Transpose the result DataFrame to have one row and 583 columns
#result_df = result_df.T

# Reset the index and rename columns
#result_df = result_df.reset_index()
#result_df.columns = ['mdrm', 0]

# Display the resulting DataFrame
print(result_df)

   RCONC752 RCONF164 RCONG322 RCON1460 RIADGW44 RIADB522 RCONC027 RCONC026  \
0  FFIEC051      0.0      0.0      0.0     38.0    955.0      0.0      0.0   

  RCONHK09 RCONHK17  ... RIADC895 RIADC894 RCONG472 RIADC896 RIADC233  \
0   5162.0    964.0  ...      0.0      0.0      0.0      0.0      0.0   

  RIADC232 RCOALB60 RCOALB61 RIADC235 RIADC234  
0      0.0      0.0      0.0      0.0      1.0  

[1 rows x 583 columns]


In [15]:
result_df['RCON9999']# = '09/30/2023'

0    20230930.0
Name: RCON9999, dtype: object

In [18]:
result_df['IDRSSD'] = 37

In [19]:
result_df['IDRSSD']

0    37
Name: IDRSSD, dtype: int64

In [16]:
result_df['RCON9999']=pd.to_datetime(result_df['RCON9999'], format='%Y%m%d.%f').dt.strftime('%Y-%m-%d')

0    2023-09-30
Name: RCON9999, dtype: object

In [2]:
last_filing_date_time = methods.collect_filers_submission_date_time(
    session=conn,
    creds=creds,
    since_date="10/31/2022",
    reporting_period="9/30/2023",
)

In [6]:
df =pd.DataFrame(last_filing_date_time)
df

Unnamed: 0,rssd,datetime
0,175458,10/1/2023 8:26:23 AM
1,723158,10/1/2023 2:00:50 PM
2,359744,10/2/2023 9:15:07 AM
3,688556,10/2/2023 11:38:07 AM
4,199137,10/2/2023 2:27:02 PM
...,...,...
4619,856869,10/31/2023 12:26:27 AM
4620,543459,10/31/2023 12:30:06 AM
4621,402846,10/31/2023 12:33:03 AM
4622,666554,10/31/2023 12:59:07 AM


In [17]:
filers = methods.collect_filers_on_reporting_period(
    session=conn,
    creds=creds,
    reporting_period="06/30/2023",
    output_type="pandas"
)
filers

Unnamed: 0,id_rssd,fdic_cert_number,occ_chart_number,ots_dock_number,primary_aba_rout_number,name,state,city,address,filing_type,has_filed_for_reporting_period
0,37,10057,,16553,61107146,BANK OF HANCOCK COUNTY,GA,SPARTA,12855 BROAD STREET,051,True
1,242,3850,,,81220537,FIRST COMMUNITY BANK XENIA-FLORA,IL,XENIA,260 FRONT STREET,051,True
2,279,28868,,2523,311972526,"MINEOLA COMMUNITY BANK, SSB",TX,MINEOLA,215 W BROAD,051,True
3,354,14083,,,101107475,BISON STATE BANK,KS,BISON,223 MAIN STREET,051,True
4,457,10202,,,91208332,LOWRY STATE BANK,MN,LOWRY,400 FLORENCE AVE.,051,True
...,...,...,...,...,...,...,...,...,...,...,...
4664,5805479,59346,25287,,122402463,"BESSEMER TRUST COMPANY OF NEVADA, NATIONAL ASS...",NV,LAS VEGAS,1700 SOUTH PAVILION CENTER DRIVE SUITE...,051,True
4665,5805488,59349,25275,,121202457,"INSPIRE TRUST COMPANY, NATIONAL ASSOCIATION",NV,RENO,241 RIDGE STREET SUITE 310,041,True
4666,5805817,59337,25237,,91018470,CERIDIAN NATIONAL TRUST BANK,MN,BLOOMINGTON,3311 EAST OLD SHAKOPEE ROAD,051,True
4667,5859511,59344,25288,,53012977,"TIAA TRUST, NATIONAL ASSOCIATION",NC,CHARLOTTE,8500 ANDREW CARNEGIE BOULEVARD,051,True


In [38]:
filers.loc[(filers['id_rssd']=='493741')]

Unnamed: 0,id_rssd,fdic_cert_number,occ_chart_number,ots_dock_number,primary_aba_rout_number,name,state,city,address,filing_type,has_filed_for_reporting_period
1813,493741,197,,,74912674,ALLIANCE BANK,IN,FRANCESVILLE,101 WEST MONTGOMERY STREET,51,True


In [20]:
print(filers['id_rssd'].tolist())

['37', '242', '279', '354', '457', '505', '1155', '1351', '1454', '1669', '1856', '2161', '2732', '2750', '3252', '3458', '3720', '3971', '4156', '4231', '5069', '5135', '5210', '5461', '5751', '6039', '6329', '7009', '7045', '7072', '7456', '7634', '8462', '8631', '9357', '9553', '9807', '9955', '10250', '10849', '11145', '11640', '11837', '12030', '12142', '12311', '12647', '12946', '13103', '13251', '13457', '14155', '14650', '14753', '14865', '14977', '16551', '17110', '17147', '17259', '17950', '17978', '18050', '18827', '18836', '18854', '19356', '19936', '19972', '20053', '20633', '20857', '21256', '21359', '21658', '22048', '22543', '22552', '22730', '23456', '23504', '23643', '23755', '24006', '24378', '24659', '24949', '25357', '25647', '26765', '27070', '27548', '27614', '27847', '28013', '28152', '28675', '28732', '29476', '29636', '29878', '30052', '30502', '30650', '30810', '31134', '31255', '31657', '31826', '31835', '32234', '32766', '32971', '33147', '33259', '33370', 

In [23]:
ex = filers['id_rssd'].tolist()
ex.sort()

In [44]:
ex2 = sorted(filers['id_rssd'].tolist())
ex2

['1000052',
 '1000276',
 '1000641',
 '1000856',
 '1000959',
 '1001059',
 '1001152',
 '1001639',
 '1001648',
 '1001853',
 '100236',
 '1002373',
 '1002458',
 '1002878',
 '1002953',
 '1003455',
 '1003558',
 '100357',
 '1003839',
 '1003950',
 '1004256',
 '1004470',
 '1005075',
 '1005552',
 '100562',
 '1005655',
 '1005851',
 '1006148',
 '1006157',
 '1006513',
 '1006559',
 '1006652',
 '1007015',
 '1007051',
 '1007154',
 '1007417',
 '1007734',
 '1007752',
 '100777',
 '1007819',
 '1007873',
 '1007930',
 '1007958',
 '1008076',
 '1008151',
 '1008209',
 '100843',
 '1008432',
 '1008450',
 '1008553',
 '1008674',
 '1008955',
 '1009242',
 '1009354',
 '1009756',
 '1009840',
 '1010015',
 '101037',
 '1010574',
 '1011432',
 '1011526',
 '1011553',
 '1011638',
 '1011656',
 '1011852',
 '1011955',
 '1012251',
 '1012457',
 '1012671',
 '1013070',
 '1013650',
 '1013744',
 '1014059',
 '1014125',
 '1014246',
 '1014255',
 '1014376',
 '101439',
 '1014451',
 '1014554',
 '1014657',
 '1014853',
 '1015252',
 '1015270',

In [50]:
for i in ex2:
    print(i)

1000052
1000276
1000641
1000856
1000959
1001059
1001152
1001639
1001648
1001853
100236
1002373
1002458
1002878
1002953
1003455
1003558
100357
1003839
1003950
1004256
1004470
1005075
1005552
100562
1005655
1005851
1006148
1006157
1006513
1006559
1006652
1007015
1007051
1007154
1007417
1007734
1007752
100777
1007819
1007873
1007930
1007958
1008076
1008151
1008209
100843
1008432
1008450
1008553
1008674
1008955
1009242
1009354
1009756
1009840
1010015
101037
1010574
1011432
1011526
1011553
1011638
1011656
1011852
1011955
1012251
1012457
1012671
1013070
1013650
1013744
1014059
1014125
1014246
1014255
1014376
101439
1014451
1014554
1014657
1014853
1015252
1015270
1015458
1015560
1015832
1015841
1015850
1016174
1016231
1016259
1016558
101671
1016718
1016848
1016857
1016941
101738
1017425
1017555
1017564
1017854
1017939
1017957
1018927
101952
102333
102342
10250
102557
102874
103134
104234
104542
104971
105026
105473
105530
106359
106452
106676
107244
107570
107758
108072
108269
10849
108652
10

In [55]:
ex2[2499:]

['498625',
 '498942',
 '499154',
 '499453',
 '499501',
 '49951',
 '499743',
 '499752',
 '499855',
 '500256',
 '500658',
 '501105',
 '501132',
 '50144',
 '501459',
 '501655',
 '501767',
 '501815',
 '502111',
 '502447',
 '502559',
 '502652',
 '502746',
 '502849',
 '503547',
 '503640',
 '504142',
 '504311',
 '504535',
 '50461',
 '504647',
 '504713',
 '5047505',
 '505',
 '5050028',
 '505550',
 '505916',
 '5069',
 '506922',
 '507068',
 '507152',
 '507349',
 '507509',
 '508270',
 '508346',
 '508355',
 '509811',
 '509857',
 '509950',
 '510330',
 '510648',
 '510871',
 '510938',
 '510947',
 '511832',
 '51253',
 '513256',
 '513322',
 '513340',
 '5135',
 '513546',
 '51356',
 '5136959',
 '513854',
 '513920',
 '514048',
 '514057',
 '514066',
 '514132',
 '514356',
 '5143788',
 '51459',
 '514655',
 '514936',
 '515979',
 '516154',
 '516855',
 '516873',
 '517049',
 '517357',
 '517441',
 '517674',
 '518037',
 '518176',
 '518354',
 '51842',
 '518877',
 '519146',
 '5192496',
 '519360',
 '5193989',
 '51944

In [39]:
ex[1813:]

['3592047',
 '3593307',
 '3594005',
 '3594087',
 '359472',
 '3594797',
 '3595271',
 '3596250',
 '3596894',
 '3597211',
 '359744',
 '3599804',
 '3603961',
 '3607062',
 '360777',
 '3608751',
 '361055',
 '3610718',
 '361167',
 '361279',
 '361354',
 '361439',
 '3614837',
 '3614846',
 '3614958',
 '3614976',
 '3617856',
 '3619216',
 '362043',
 '362155',
 '3623110',
 '3623969',
 '3626184',
 '3626858',
 '3627109',
 '362717',
 '362856',
 '362940',
 '3629420',
 '3629484',
 '3630323',
 '3633016',
 '3633173',
 '363442',
 '3635029',
 '3635551',
 '3636259',
 '3636428',
 '3636446',
 '363648',
 '363657',
 '3636914',
 '3637247',
 '3637685',
 '3637706',
 '363778',
 '364131',
 '364270',
 '364430',
 '36456',
 '3645840',
 '3646397',
 '364850',
 '3650808',
 '365325',
 '3655933',
 '365745',
 '365848',
 '365950',
 '366078',
 '366238',
 '366247',
 '366359',
 '3664588',
 '366658',
 '3667132',
 '366854',
 '367150',
 '367178',
 '3671997',
 '367431',
 '367543',
 '3678857',
 '368054',
 '368344',
 '368522',
 '368804

In [49]:
ex2.index('493152')

2476

In [29]:
ex[2300]

'4536084'

In [37]:
filers.index[filers['id_rssd'] == 493741].tolist()

[]

In [36]:
filers['id_rssd']==493741

0       False
1       False
2       False
3       False
4       False
        ...  
4664    False
4665    False
4666    False
4667    False
4668    False
Name: id_rssd, Length: 4669, dtype: bool

In [None]:
[37,242,279,354,5805479]

In [13]:
df2 = pd.DataFrame(filers)
df2

Unnamed: 0,id_rssd,fdic_cert_number,occ_chart_number,ots_dock_number,primary_aba_rout_number,name,state,city,address,filing_type,has_filed_for_reporting_period
0,37,10057,,16553,61107146,BANK OF HANCOCK COUNTY,GA,SPARTA,12855 BROAD STREET,051,True
1,242,3850,,,81220537,FIRST COMMUNITY BANK XENIA-FLORA,IL,XENIA,260 FRONT STREET,051,True
2,279,28868,,2523,311972526,"MINEOLA COMMUNITY BANK, SSB",TX,MINEOLA,215 W BROAD,051,True
3,354,14083,,,101107475,BISON STATE BANK,KS,BISON,223 MAIN STREET,051,True
4,457,10202,,,91208332,LOWRY STATE BANK,MN,LOWRY,400 FLORENCE AVE.,051,True
...,...,...,...,...,...,...,...,...,...,...,...
4664,5805479,59346,25287,,122402463,"BESSEMER TRUST COMPANY OF NEVADA, NATIONAL ASS...",NV,LAS VEGAS,1700 SOUTH PAVILION CENTER DRIVE SUITE...,051,True
4665,5805488,59349,25275,,121202457,"INSPIRE TRUST COMPANY, NATIONAL ASSOCIATION",NV,RENO,241 RIDGE STREET SUITE 310,041,True
4666,5805817,59337,25237,,91018470,CERIDIAN NATIONAL TRUST BANK,MN,BLOOMINGTON,3311 EAST OLD SHAKOPEE ROAD,051,True
4667,5859511,59344,25288,,53012977,"TIAA TRUST, NATIONAL ASSOCIATION",NC,CHARLOTTE,8500 ANDREW CARNEGIE BOULEVARD,051,True


In [15]:
df2['id_rssd'].tolist()

['37',
 '242',
 '279',
 '354',
 '457',
 '505',
 '1155',
 '1351',
 '1454',
 '1669',
 '1856',
 '2161',
 '2732',
 '2750',
 '3252',
 '3458',
 '3720',
 '3971',
 '4156',
 '4231',
 '5069',
 '5135',
 '5210',
 '5461',
 '5751',
 '6039',
 '6329',
 '7009',
 '7045',
 '7072',
 '7456',
 '7634',
 '8462',
 '8631',
 '9357',
 '9553',
 '9807',
 '9955',
 '10250',
 '10849',
 '11145',
 '11640',
 '11837',
 '12030',
 '12142',
 '12311',
 '12647',
 '12946',
 '13103',
 '13251',
 '13457',
 '14155',
 '14650',
 '14753',
 '14865',
 '14977',
 '16551',
 '17110',
 '17147',
 '17259',
 '17950',
 '17978',
 '18050',
 '18827',
 '18836',
 '18854',
 '19356',
 '19936',
 '19972',
 '20053',
 '20633',
 '20857',
 '21256',
 '21359',
 '21658',
 '22048',
 '22543',
 '22552',
 '22730',
 '23456',
 '23504',
 '23643',
 '23755',
 '24006',
 '24378',
 '24659',
 '24949',
 '25357',
 '25647',
 '26765',
 '27070',
 '27548',
 '27614',
 '27847',
 '28013',
 '28152',
 '28675',
 '28732',
 '29476',
 '29636',
 '29878',
 '30052',
 '30502',
 '30650',
 '308

In [5]:
df3 = df2[df2['has_filed_for_reporting_period']==True]
df3

Unnamed: 0,id_rssd,fdic_cert_number,occ_chart_number,ots_dock_number,primary_aba_rout_number,name,state,city,address,filing_type,has_filed_for_reporting_period
0,37,10057,,16553,61107146,BANK OF HANCOCK COUNTY,GA,SPARTA,12855 BROAD STREET,051,True
1,242,3850,,,81220537,FIRST COMMUNITY BANK XENIA-FLORA,IL,XENIA,260 FRONT STREET,051,True
2,279,28868,,2523,311972526,"MINEOLA COMMUNITY BANK, SSB",TX,MINEOLA,215 W BROAD,051,True
3,354,14083,,,101107475,BISON STATE BANK,KS,BISON,223 MAIN STREET,051,True
4,457,10202,,,91208332,LOWRY STATE BANK,MN,LOWRY,400 FLORENCE AVE.,051,True
...,...,...,...,...,...,...,...,...,...,...,...
4664,5805479,59346,25287,,122402463,"BESSEMER TRUST COMPANY OF NEVADA, NATIONAL ASS...",NV,LAS VEGAS,1700 SOUTH PAVILION CENTER DRIVE SUITE...,051,True
4665,5805488,59349,25275,,121202457,"INSPIRE TRUST COMPANY, NATIONAL ASSOCIATION",NV,RENO,241 RIDGE STREET SUITE 310,041,True
4666,5805817,59337,25237,,91018470,CERIDIAN NATIONAL TRUST BANK,MN,BLOOMINGTON,3311 EAST OLD SHAKOPEE ROAD,051,True
4667,5859511,59344,25288,,53012977,"TIAA TRUST, NATIONAL ASSOCIATION",NC,CHARLOTTE,8500 ANDREW CARNEGIE BOULEVARD,051,True


In [None]:
filers = methods.collect_filers_on_reporting_period(
    session=conn,
    creds=creds,
    reporting_period="9/30/2023",
    output_type="list"
)

In [73]:
df1= pd.read_csv('C:\\Users\\whill\\OneDrive\\Desktop\\Fall2022\\ISYE6740\\call-report-database\\Uploading Files\\06_30_2023_partial_id_list.csv')

In [74]:
df1['0'].tolist()

[493741,
 493844,
 494261,
 494654,
 495419,
 495558,
 496434,
 49652,
 496845,
 496957,
 497039,
 497404,
 497570,
 497655,
 497851,
 497954,
 498148,
 498317,
 498362,
 498456,
 498531,
 49858,
 498625,
 498942,
 499154,
 499453,
 499501,
 49951,
 499743,
 499752,
 499855,
 500256,
 500658,
 501105,
 501132,
 50144,
 501459,
 501655,
 501767,
 501815,
 502111,
 502447,
 502559,
 502652,
 502746,
 502849,
 503547,
 503640,
 504142,
 504311,
 504535,
 50461,
 504647,
 504713,
 5047505,
 505,
 5050028,
 505550,
 505916,
 5069,
 506922,
 507068,
 507152,
 507349,
 507509,
 508270,
 508346,
 508355,
 509811,
 509857,
 509950,
 510330,
 510648,
 510871,
 510938,
 510947,
 511832,
 51253,
 513256,
 513322,
 513340,
 5135,
 513546,
 51356,
 5136959,
 513854,
 513920,
 514048,
 514057,
 514066,
 514132,
 514356,
 5143788,
 51459,
 514655,
 514936,
 515979,
 516154,
 516855,
 516873,
 517049,
 517357,
 517441,
 517674,
 518037,
 518176,
 518354,
 51842,
 518877,
 519146,
 5192496,
 519360,
 51