In [1]:
import requests
import pandas as pd
import csv
import json
import concurrent.futures
import os

url = 'https://politdata.nazk.gov.ua/api/getreportslist'
response = requests.get(url)
data = response.json()

df = pd.DataFrame(data).replace({None: "Дані відсутні", '"': '""'}, regex=True)

df.to_csv('spysok_zvitiv.csv', index=False, quoting=csv.QUOTE_ALL, encoding='utf-8')

all_data = []
def fetch_report_data(id):
    response = requests.get(f'https://politdata.nazk.gov.ua/api/getreport/{id}')
    if response.status_code == 200:
        return response.json()

with concurrent.futures.ThreadPoolExecutor() as executor:
    future_to_id = {executor.submit(fetch_report_data, id): id for id in df['id']}
    for future in concurrent.futures.as_completed(future_to_id):
        report_data = future.result()
        if report_data:
            all_data.append(report_data)

final_df = pd.DataFrame(all_data)

final_df.to_json('final_data.json', orient='records', force_ascii=False)

def process_data(data, key, additional_fields):
    combined_df = pd.DataFrame()
    for item in data:
        for value in item.values():
            if value and key in value:
                filtered_data = [record for record in value[key] if record and any(record.values())]
                if filtered_data:
                    temp_df = pd.DataFrame(filtered_data)
                    for field in additional_fields:
                        temp_df[field] = value.get(field, "Дані відсутні")
                    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)
    return combined_df

def rename_columns(df, column_mapping):
    df.rename(columns=column_mapping, inplace=True)
    return df

with open('final_data.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

categories = {
    'contributionConMoney': 'conmoney_data.csv',
    'contributionOtherCon': 'othercon_data.csv',
    'contributionCosts': 'costs_data.csv',
    'contributionOtherCosts': 'othercosts_data.csv',
    'paymentOther': 'payment_data.csv',
    'paymentGov': 'payment_gov_data.csv',  
    'tablets1': 'local_orgs_list.csv'
}

additional_fields = ['year', 'types', 'period', 'partyCode', 'partyName', 'documentId', 'officeType']

for category, output_file in categories.items():
    combined_df = process_data(data, category, additional_fields)
    if not combined_df.empty:
        combined_df.to_csv(output_file, index=False, encoding='utf-8')


file_mappings = {
    'conmoney_data.csv': {
        'rename': {
            'conMoney1': 'donor_name',
            'conMoney2': 'EDRPOU',
            'conMoney3': 'donor_type',
            'conMoney4': 'birth_date',
            'conMoney5': 'donor_location',
            'conMoney6': 'bank_name',
            'conMoney7': 'bank_EDRPOU',
            'conMoney8': 'account_number',
            'conMoney9': 'contribution_date',
            'conMoney10': 'amount',
            'conMoney11': 'refund_amount'
        },
        'output': 'vnesky.csv'
    },
    'othercon_data.csv': {
        'rename': {
            'other1': 'cathegory',
            'other2': 'donor_name',
            'other3': 'EDRPOU',
            'other4': 'donor_type',
            'other5': 'birth_date',
            'othery6': 'donor_location',
            'other7': 'contribution_date',
            'other9': 'amount',
            'other10': 'refund_amount',
        },
        'output': 'non_money_vnesky.csv'
    },
    'costs_data.csv': {
        'rename': {
            'costs1': 'cathegory',
            'costs2': 'bank_name',
            'costs3': 'EDRPOU',
            'costs4': 'account_number',
            'costs5': 'contribution_date',
            'costs6': 'amount',
            'costs7': 'refund_amount',
        },
        'output': 'state_vnesky.csv'
    },
    'othercosts_data.csv': {
        'rename': {
            'otherCosts1': 'cathegory',
            'otherCosts2': 'payment_purpose',
            'otherCosts3': 'donor_name',
            'otherCosts4': 'donor_type',
            'otherCosts6': 'contribution_date',
            'otherCosts7': 'bank_name',
            'otherCosts8': 'EDRPOU',
            'otherCosts9': 'account_number',
            'otherCosts10': 'amount',
        },
        'output': 'other_vnesky.csv'
    },
    'payment_data.csv': {
        'rename': {
            'other3': 'account_number',
            'other4': 'account_type',
            'other12': 'amount',
            'other11': 'payment_purpose2',
            'other5': 'payment_date',
            'other8': 'recipient_type',
            'other9': 'recipient_EDRPOU',
            'other10': 'recipient_location',
            'other1': 'bank_name',
            'other7': 'recipient_name',
            'other2': 'bank_EDRPOU',
            'other6': 'payment_purpose'
        },
        'output': 'vytraty.csv'
    },
    'payment_gov_data.csv': {
        'rename': {
            'paymentGov1': 'bank_name',
            'paymentGov2': 'bank_EDRPOU',
            'paymentGov3': 'account_number',
            'paymentGov5': 'account_type',
            'paymentGov6': 'payment_date',
            'paymentGov7': 'payment_purpose',
            'paymentGov8': 'recipient_name',
            'paymentGov9': 'recipient_EDRPOU',
            'paymentGov10': 'recipient_location',
            'paymentGov11': 'payment_purpose2',
            'paymentGov12': 'amount',
            'paymentGov13': 'recipient_type',
        },
        'output': 'state_vytraty.csv'
    }
}

def format_donor_location(location):
    location = location.title()
    location = location.replace('Область', 'область')
    location = location.replace('М.', 'м.')
    return location

for input_file, mapping in file_mappings.items():
    df = pd.read_csv(input_file)
    df = rename_columns(df, mapping['rename'])
    if 'donor_location' in df.columns:
        df['donor_location'] = df['donor_location'].apply(format_donor_location)
    df.to_csv(mapping['output'], index=False)



for file_name in ['vnesky.csv', 'vytraty.csv', 'non_money_vnesky.csv', 'state_vnesky.csv', 'other_vnesky.csv', 'state_vytraty.csv']:
    df = pd.read_csv(file_name)
    df.rename(columns={'partyName': 'orgName'}, inplace=True)
    local_orgs_list = pd.read_csv('local_orgs_list.csv')
    mapping_dict = dict(zip(local_orgs_list['name'], local_orgs_list['partyName']))
    df['partyName'] = df['orgName'].map(mapping_dict).fillna(df['orgName'])
    df['partyName'] = df['partyName'].str.replace(r'(?i)\**політична партія\**\s*', '', regex=True)
    df.to_csv(file_name, index=False)
    print(f"Файл {file_name} успішно оновлено.")

for category, output_file in categories.items():
    if category != 'tablets1':
        os.remove(output_file)

  df = pd.read_csv(input_file)


Файл vnesky.csv успішно оновлено.


  df = pd.read_csv(file_name)


Файл vytraty.csv успішно оновлено.
Файл non_money_vnesky.csv успішно оновлено.
Файл state_vnesky.csv успішно оновлено.
Файл other_vnesky.csv успішно оновлено.
Файл state_vytraty.csv успішно оновлено.
