In [1]:
import json
import numpy as np
import pandas as pd
import psycopg2
import os

from collections import Counter
from datetime import datetime, timedelta
from dateutil.tz import tzlocal 
from pathlib import Path
from tqdm import tqdm

## Analyse data

In [2]:
from concurrent.futures import ThreadPoolExecutor

def load_responses(resp_path):
    resp_tm = datetime.strptime(resp_path.name[6:-5], '%Y-%m-%d %H;%M;%S')
    resp_tm_str = resp_tm.replace(tzinfo=tzlocal()).isoformat()
    
    with open(resp_path, 'r', encoding='utf-8' ) as f:
        resp = json.load(f)
        if ("rows" in resp) and (resp["rows"]): 
            return [row + [resp['timestamp'], resp_tm_str] 
                    for row in resp['rows']]
        else: 
            return []

def accumulate_responses_from_folder(folder_path):
    file_path_list = list(folder_path.iterdir())
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(tqdm(executor.map(load_responses, file_path_list),
                            total=len(file_path_list), mininterval=10, leave=False, 
                            desc="Accumulate responses from folder")
                          )
        resp_list = []
        for resp_ in results: 
            resp_list += resp_
        print(f"Accumulate responses from folder\n"\
              f"\t{len(resp_list)} / {len(resp_list)} [avg={len(resp_list)/len(resp_list):.2f}]")
    
    
    columns = ['imei', 'lat', 'lng', 'speed', 'gps_datetime_origin', 'orientation', 'route_name', 
               'route_type', 'vehicle_id', 'dd', 'gpstime', 'response_datetime'] 

    return pd.DataFrame(resp_list, columns=columns)
    
def clear_data(in_df):
    unique_data = []
    
    imei_list = in_df['imei'].value_counts().index
    imei_tqdm = tqdm(
        imei_list, total=len(imei_list), mininterval=10, leave=False, desc="Clear data"
    )
    
    for imei in imei_tqdm: 
        row_data = in_df[ in_df['imei'] == imei].values.tolist()
        result = [row_data[0]]
        for row0, row1 in zip(row_data[:-1], row_data[1:]):
            if row0[1:9] != row1[1:9]: 
                result += [row1]
        unique_data += result

    df_unique = pd.DataFrame(unique_data, columns=in_df.columns)
    print(f"Clear data\n\t{len(in_df)} / {len(df_unique)} [avg {len(in_df) / len(df_unique):.02f}]")
    return df_unique

In [15]:
"kh_requests_path".upper()

'KH_REQUESTS_PATH'

In [7]:
KHARKIV_FOLDER_PATH = Path("../../data/local/kharkiv")
KH_REQUESTS_PATH = KHARKIV_FOLDER_PATH/"jsons"
KH_TABLES_PATH = KHARKIV_FOLDER_PATH/"tables"
# kharkiv_folder_path = Path("../../../pet_project/kharkiv")
kharkiv_req_list = [p for p in KH_REQUESTS_PATH.iterdir() if "trans_data_" in p.name]
kharkiv_reqs_list = sorted(kharkiv_folder_list,
                        key=lambda p: datetime.strptime(p.name[11:], '%d_%b_%Y'))
# kharkiv_folders_list

for folder_path in tqdm(kharkiv_reqs_list[:-1]):   
    print(f"Processing '{folder_path}'")
    df = accumulate_responses_from_folder(folder_path)
    df.to_parquet(KH_TABLES_PATH/'origin'/(folder_path.name + '_origin.parquet'))
    
    df_u = clear_data(df)
    df_u.to_parquet(KH_TABLES_PATH/'optimized'/(folder_path.name + '_optimized.parquet'))

  0%|          | 0/24 [00:00<?, ?it/s]

Processing '..\..\data\local\kharkiv\jsons\trans_data_17_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  32%|███▏      | 5476/17280 [00:10<00:21, 547.18it/s][A
Accumulate responses from folder:  63%|██████▎   | 10948/17280 [00:23<00:13, 465.08it/s][A
Accumulate responses from folder:  89%|████████▉ | 15463/17280 [00:33<00:03, 459.39it/s][A
                                                                                        [A

Accumulate responses from folder
	2469322 / 2469322 [avg=1.00]



Clear data:   0%|          | 0/278 [00:00<?, ?it/s][A
Clear data:  38%|███▊      | 106/278 [00:10<00:16, 10.59it/s][A
Clear data:  83%|████████▎ | 230/278 [00:20<00:04, 11.64it/s][A
                                                             [A

Clear data
	2469322 / 234708 [avg 10.52]


  4%|▍         | 1/24 [01:19<30:30, 79.60s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_18_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17233 [00:00<?, ?it/s][A
Accumulate responses from folder:  23%|██▎       | 3895/17233 [00:10<00:34, 389.16it/s][A
Accumulate responses from folder:  39%|███▉      | 6766/17233 [00:20<00:32, 318.43it/s][A
Accumulate responses from folder:  39%|███▉      | 6766/17233 [00:20<00:32, 318.43it/s][A
Accumulate responses from folder:  39%|███▉      | 6766/17233 [00:30<00:32, 318.43it/s][A
Accumulate responses from folder:  54%|█████▍    | 9326/17233 [00:30<00:27, 289.36it/s][A
Accumulate responses from folder:  69%|██████▉   | 11865/17233 [00:41<00:20, 265.52it/s][A
Accumulate responses from folder:  82%|████████▏ | 14204/17233 [00:51<00:11, 254.40it/s][A
                                                                                        [A

Accumulate responses from folder
	2080642 / 2080642 [avg=1.00]



Clear data:   0%|          | 0/241 [00:00<?, ?it/s][A
Clear data:  44%|████▎     | 105/241 [00:10<00:12, 10.47it/s][A
Clear data:  89%|████████▉ | 214/241 [00:20<00:02, 10.69it/s][A
                                                             [A

Clear data
	2080642 / 196611 [avg 10.58]


  8%|▊         | 2/24 [03:09<35:42, 97.38s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_19_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  19%|█▊        | 3226/17280 [00:10<00:43, 322.44it/s][A
Accumulate responses from folder:  37%|███▋      | 6344/17280 [00:21<00:36, 297.49it/s][A
Accumulate responses from folder:  37%|███▋      | 6344/17280 [00:21<00:36, 297.49it/s][A
Accumulate responses from folder:  49%|████▉     | 8476/17280 [00:31<00:34, 258.65it/s][A
Accumulate responses from folder:  49%|████▉     | 8476/17280 [00:31<00:34, 258.65it/s][A
Accumulate responses from folder:  64%|██████▎   | 10978/17280 [00:41<00:24, 255.16it/s][A
Accumulate responses from folder:  76%|███████▌  | 13122/17280 [00:51<00:17, 240.48it/s][A
Accumulate responses from folder:  76%|███████▌  | 13122/17280 [00:51<00:17, 240.48it/s][A
Accumulate responses from folder:  89%|████████▉ | 15391/17280 [01:01<00:08, 235.87it/s][A
                                                                                        [A

Accumulate responses from folder
	2059485 / 2059485 [avg=1.00]



Clear data:   0%|          | 0/234 [00:00<?, ?it/s][A
 12%|█▎        | 3/24 [04:39<32:57, 94.18s/it]     [A

Clear data
	2059485 / 195248 [avg 10.55]
Processing '..\..\data\local\kharkiv\jsons\trans_data_20_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17267 [00:00<?, ?it/s][A
Accumulate responses from folder:  26%|██▌       | 4465/17267 [00:10<00:28, 446.43it/s][A
Accumulate responses from folder:  47%|████▋     | 8140/17267 [00:23<00:26, 339.54it/s][A
Accumulate responses from folder:  47%|████▋     | 8140/17267 [00:23<00:26, 339.54it/s][A
Accumulate responses from folder:  64%|██████▎   | 10996/17267 [00:33<00:20, 312.21it/s][A
Accumulate responses from folder:  64%|██████▎   | 10996/17267 [00:33<00:20, 312.21it/s][A
Accumulate responses from folder:  64%|██████▎   | 10996/17267 [00:43<00:20, 312.21it/s][A
Accumulate responses from folder:  79%|███████▉  | 13705/17267 [00:43<00:12, 296.52it/s][A
Accumulate responses from folder:  79%|███████▉  | 13705/17267 [00:53<00:12, 296.52it/s][A
Accumulate responses from folder:  97%|█████████▋| 16803/17267 [00:53<00:01, 300.66it/s][A
                                                                                        [A

Accumulate responses from folder
	2485317 / 2485317 [avg=1.00]



Clear data:   0%|          | 0/279 [00:00<?, ?it/s][A
Clear data:  91%|█████████▏| 255/279 [00:10<00:00, 25.48it/s][A
                                                             [A

Clear data
	2485317 / 238214 [avg 10.43]


 17%|█▋        | 4/24 [05:56<29:02, 87.15s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_21_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17229 [00:00<?, ?it/s][A
Accumulate responses from folder:  26%|██▋       | 4546/17229 [00:10<00:27, 454.47it/s][A
Accumulate responses from folder:  53%|█████▎    | 9091/17229 [00:25<00:23, 350.85it/s][A
Accumulate responses from folder:  70%|███████   | 12117/17229 [00:35<00:15, 328.32it/s][A
Accumulate responses from folder:  87%|████████▋ | 15063/17229 [00:45<00:06, 314.53it/s][A
                                                                                        [A

Accumulate responses from folder
	2451872 / 2451872 [avg=1.00]



Clear data:   0%|          | 0/278 [00:00<?, ?it/s][A
Clear data:  85%|████████▌ | 237/278 [00:10<00:01, 23.65it/s][A
                                                             [A

Clear data
	2451872 / 232463 [avg 10.55]


 21%|██        | 5/24 [07:10<26:06, 82.45s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_22_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  23%|██▎       | 3976/17280 [00:10<00:33, 397.03it/s][A
Accumulate responses from folder:  44%|████▎     | 7541/17280 [00:23<00:31, 306.95it/s][A
Accumulate responses from folder:  44%|████▎     | 7541/17280 [00:23<00:31, 306.95it/s][A
Accumulate responses from folder:  59%|█████▉    | 10163/17280 [00:33<00:24, 288.62it/s][A
Accumulate responses from folder:  76%|███████▌  | 13160/17280 [00:43<00:14, 292.63it/s][A
Accumulate responses from folder:  97%|█████████▋| 16698/17280 [00:53<00:01, 313.72it/s][A
                                                                                        [A

Accumulate responses from folder
	2475818 / 2475818 [avg=1.00]



Clear data:   0%|          | 0/281 [00:00<?, ?it/s][A
Clear data:  90%|█████████ | 254/281 [00:10<00:01, 25.36it/s][A
                                                             [A

Clear data
	2475818 / 231225 [avg 10.71]


 25%|██▌       | 6/24 [08:27<24:11, 80.65s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_23_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17016 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▌       | 4266/17016 [00:10<00:29, 426.57it/s][A
Accumulate responses from folder:  50%|█████     | 8532/17016 [00:26<00:26, 314.29it/s][A
Accumulate responses from folder:  67%|██████▋   | 11389/17016 [00:36<00:18, 303.34it/s][A
Accumulate responses from folder:  84%|████████▎ | 14246/17016 [00:46<00:09, 292.66it/s][A
                                                                                        [A

Accumulate responses from folder
	2467718 / 2467718 [avg=1.00]



Clear data:   0%|          | 0/281 [00:00<?, ?it/s][A
Clear data:  96%|█████████▋| 271/281 [00:10<00:00, 27.10it/s][A
                                                             [A

Clear data
	2467718 / 236117 [avg 10.45]


 29%|██▉       | 7/24 [09:42<22:21, 78.93s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_24_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17025 [00:00<?, ?it/s][A
Accumulate responses from folder:  27%|██▋       | 4609/17025 [00:10<00:26, 460.59it/s][A
Accumulate responses from folder:  27%|██▋       | 4609/17025 [00:21<00:26, 460.59it/s][A
Accumulate responses from folder:  47%|████▋     | 8064/17025 [00:21<00:25, 356.21it/s][A
Accumulate responses from folder:  47%|████▋     | 8064/17025 [00:31<00:25, 356.21it/s][A
Accumulate responses from folder:  64%|██████▍   | 10876/17025 [00:31<00:19, 322.98it/s][A
Accumulate responses from folder:  80%|████████  | 13701/17025 [00:41<00:10, 307.52it/s][A
Accumulate responses from folder:  99%|█████████▉| 16917/17025 [00:51<00:00, 312.42it/s][A
                                                                                        [A

Accumulate responses from folder
	2488252 / 2488252 [avg=1.00]



Clear data:   0%|          | 0/284 [00:00<?, ?it/s][A
 33%|███▎      | 8/24 [10:55<20:30, 76.92s/it]     [A

Clear data
	2488252 / 140878 [avg 17.66]
Processing '..\..\data\local\kharkiv\jsons\trans_data_25_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▍       | 4310/17280 [00:10<00:30, 430.86it/s][A
Accumulate responses from folder:  50%|████▉     | 8619/17280 [00:25<00:26, 327.88it/s][A
Accumulate responses from folder:  66%|██████▌   | 11428/17280 [00:35<00:19, 307.79it/s][A
Accumulate responses from folder:  83%|████████▎ | 14327/17280 [00:45<00:09, 301.36it/s][A
                                                                                        [A

Accumulate responses from folder
	2131992 / 2131992 [avg=1.00]



Clear data:   0%|          | 0/243 [00:00<?, ?it/s][A
 38%|███▊      | 9/24 [12:06<18:45, 75.05s/it]     [A

Clear data
	2131992 / 79924 [avg 26.68]
Processing '..\..\data\local\kharkiv\jsons\trans_data_26_MAR_2023'



Accumulate responses from folder:   0%|          | 0/15839 [00:00<?, ?it/s][A
Accumulate responses from folder:  27%|██▋       | 4251/15839 [00:10<00:27, 424.91it/s][A
Accumulate responses from folder:  54%|█████▎    | 8501/15839 [00:23<00:21, 345.43it/s][A
Accumulate responses from folder:  73%|███████▎  | 11556/15839 [00:33<00:13, 328.56it/s][A
Accumulate responses from folder:  93%|█████████▎| 14771/15839 [00:43<00:03, 325.85it/s][A
                                                                                        [A

Accumulate responses from folder
	2085659 / 2085659 [avg=1.00]



Clear data:   0%|          | 0/233 [00:00<?, ?it/s][A
 42%|████▏     | 10/24 [13:12<16:51, 72.23s/it]    [A

Clear data
	2085659 / 80083 [avg 26.04]
Processing '..\..\data\local\kharkiv\jsons\trans_data_27_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17277 [00:00<?, ?it/s][A
Accumulate responses from folder:  22%|██▏       | 3793/17277 [00:10<00:35, 378.55it/s][A
Accumulate responses from folder:  22%|██▏       | 3793/17277 [00:23<00:35, 378.55it/s][A
Accumulate responses from folder:  33%|███▎      | 5641/17277 [00:23<00:51, 224.00it/s][A
Accumulate responses from folder:  41%|████      | 7053/17277 [00:33<00:54, 189.10it/s][A
Accumulate responses from folder:  51%|█████     | 8747/17277 [00:43<00:46, 181.75it/s][A
Accumulate responses from folder:  64%|██████▍   | 11019/17277 [00:53<00:31, 197.52it/s][A
Accumulate responses from folder:  77%|███████▋  | 13291/17277 [01:12<00:25, 159.00it/s][A
Accumulate responses from folder:  87%|████████▋ | 15101/17277 [01:22<00:13, 164.18it/s][A
Accumulate responses from folder:  87%|████████▋ | 15101/17277 [01:33<00:13, 164.18it/s][A
Accumulate responses from folder:  92%|█████████▏| 15934/17277 [01:33<00:09, 138.66it/s][A
     

Accumulate responses from folder
	2460604 / 2460604 [avg=1.00]



Clear data:   0%|          | 0/280 [00:00<?, ?it/s][A
Clear data:  65%|██████▍   | 181/280 [00:10<00:05, 18.08it/s][A
 46%|████▌     | 11/24 [15:25<19:43, 91.04s/it]              [A

Clear data
	2460604 / 93515 [avg 26.31]
Processing '..\..\data\local\kharkiv\jsons\trans_data_28_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17204 [00:00<?, ?it/s][A
Accumulate responses from folder:  17%|█▋        | 2936/17204 [00:10<00:48, 291.35it/s][A
Accumulate responses from folder:  17%|█▋        | 2936/17204 [00:21<00:48, 291.35it/s][A
Accumulate responses from folder:  26%|██▋       | 4539/17204 [00:21<01:03, 199.99it/s][A
Accumulate responses from folder:  35%|███▌      | 6034/17204 [00:31<01:02, 177.70it/s][A
Accumulate responses from folder:  44%|████▍     | 7527/17204 [00:43<01:01, 156.09it/s][A
Accumulate responses from folder:  52%|█████▏    | 8981/17204 [00:53<00:53, 152.36it/s][A
Accumulate responses from folder:  62%|██████▏   | 10670/17204 [01:03<00:41, 157.57it/s][A
Accumulate responses from folder:  72%|███████▏  | 12353/17204 [01:18<00:35, 136.37it/s][A
Accumulate responses from folder:  80%|███████▉  | 13713/17204 [01:28<00:25, 136.16it/s][A
Accumulate responses from folder:  87%|████████▋ | 15021/17204 [01:43<00:18, 120.13it/s][A
Accumu

Accumulate responses from folder
	2450625 / 2450625 [avg=1.00]



Clear data:   0%|          | 0/275 [00:00<?, ?it/s][A
Clear data:  60%|█████▉    | 164/275 [00:10<00:06, 16.26it/s][A
                                                             [A

Clear data
	2450625 / 92336 [avg 26.54]


 50%|█████     | 12/24 [17:57<21:52, 109.41s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_29_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17260 [00:00<?, ?it/s][A
Accumulate responses from folder:  17%|█▋        | 2919/17260 [00:10<00:49, 291.71it/s][A
Accumulate responses from folder:  33%|███▎      | 5771/17260 [00:22<00:46, 246.23it/s][A
Accumulate responses from folder:  33%|███▎      | 5771/17260 [00:22<00:46, 246.23it/s][A
Accumulate responses from folder:  46%|████▋     | 7986/17260 [00:32<00:39, 235.88it/s][A
Accumulate responses from folder:  58%|█████▊    | 9956/17260 [00:42<00:33, 220.86it/s][A
Accumulate responses from folder:  58%|█████▊    | 9956/17260 [00:42<00:33, 220.86it/s][A
Accumulate responses from folder:  58%|█████▊    | 9956/17260 [00:52<00:33, 220.86it/s][A
Accumulate responses from folder:  71%|███████   | 12204/17260 [00:53<00:22, 221.79it/s][A
Accumulate responses from folder:  84%|████████▎ | 14451/17260 [01:03<00:12, 222.58it/s][A
Accumulate responses from folder: 100%|█████████▉| 17238/17260 [01:13<00:00, 240.51it/s][A
       

Accumulate responses from folder
	2411648 / 2411648 [avg=1.00]



Clear data:   0%|          | 0/275 [00:00<?, ?it/s][A
Clear data:  84%|████████▎ | 230/275 [00:10<00:01, 22.98it/s][A
 54%|█████▍    | 13/24 [19:36<19:28, 106.27s/it]             [A

Clear data
	2411648 / 90164 [avg 26.75]
Processing '..\..\data\local\kharkiv\jsons\trans_data_30_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17269 [00:00<?, ?it/s][A
Accumulate responses from folder:  14%|█▍        | 2446/17269 [00:10<01:00, 244.49it/s][A
Accumulate responses from folder:  14%|█▍        | 2446/17269 [00:24<01:00, 244.49it/s][A
Accumulate responses from folder:  26%|██▌       | 4420/17269 [00:24<01:14, 172.82it/s][A
Accumulate responses from folder:  34%|███▎      | 5798/17269 [00:44<01:41, 113.41it/s][A
Accumulate responses from folder:  34%|███▎      | 5798/17269 [00:54<01:41, 113.41it/s][A
Accumulate responses from folder:  38%|███▊      | 6618/17269 [00:54<01:42, 104.02it/s][A
Accumulate responses from folder:  43%|████▎     | 7431/17269 [01:05<01:42, 95.54it/s] [A
Accumulate responses from folder:  48%|████▊     | 8319/17269 [01:15<01:35, 93.52it/s][A
Accumulate responses from folder:  53%|█████▎    | 9206/17269 [01:26<01:31, 87.89it/s][A
Accumulate responses from folder:  58%|█████▊    | 10061/17269 [01:36<01:22, 87.04it/s][A
Accumulate r

Accumulate responses from folder
	2414744 / 2414744 [avg=1.00]



Clear data:   0%|          | 0/279 [00:00<?, ?it/s][A
Clear data:  22%|██▏       | 60/279 [00:10<00:36,  6.00it/s][A
Clear data:  46%|████▌     | 129/279 [00:20<00:23,  6.48it/s][A
Clear data:  71%|███████   | 198/279 [00:31<00:13,  6.22it/s][A
                                                             [A

Clear data
	2414744 / 91490 [avg 26.39]


 58%|█████▊    | 14/24 [23:45<24:54, 149.42s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_31_MAR_2023'



Accumulate responses from folder:   0%|          | 0/17268 [00:00<?, ?it/s][A
Accumulate responses from folder:  14%|█▎        | 2340/17268 [00:10<01:03, 233.80it/s][A
Accumulate responses from folder:  27%|██▋       | 4678/17268 [00:20<00:56, 221.43it/s][A
Accumulate responses from folder:  39%|███▉      | 6814/17268 [00:35<00:58, 178.45it/s][A
Accumulate responses from folder:  48%|████▊     | 8318/17268 [00:45<00:53, 168.80it/s][A
Accumulate responses from folder:  57%|█████▋    | 9926/17268 [00:56<00:44, 166.03it/s][A
Accumulate responses from folder:  67%|██████▋   | 11618/17268 [01:06<00:33, 167.00it/s][A
Accumulate responses from folder:  67%|██████▋   | 11618/17268 [01:16<00:33, 167.00it/s][A
Accumulate responses from folder:  76%|███████▋  | 13186/17268 [01:16<00:25, 162.46it/s][A
Accumulate responses from folder:  86%|████████▋ | 14917/17268 [01:26<00:14, 165.63it/s][A
Accumulate responses from folder:  86%|████████▋ | 14917/17268 [01:36<00:14, 165.63it/s][A
Accum

Accumulate responses from folder
	2414970 / 2414970 [avg=1.00]



Clear data:   0%|          | 0/280 [00:00<?, ?it/s][A
Clear data:  73%|███████▎  | 205/280 [00:10<00:03, 20.47it/s][A
 62%|██████▎   | 15/24 [25:54<21:29, 143.24s/it]             [A

Clear data
	2414970 / 91490 [avg 26.40]
Processing '..\..\data\local\kharkiv\jsons\trans_data_01_APR_2023'



Accumulate responses from folder:   0%|          | 0/17243 [00:00<?, ?it/s][A
Accumulate responses from folder:  24%|██▍       | 4180/17243 [00:10<00:31, 417.08it/s][A
Accumulate responses from folder:  48%|████▊     | 8351/17243 [00:25<00:28, 310.53it/s][A
Accumulate responses from folder:  64%|██████▍   | 11012/17243 [00:35<00:21, 293.38it/s][A
Accumulate responses from folder:  79%|███████▉  | 13672/17243 [00:46<00:13, 273.21it/s][A
Accumulate responses from folder:  96%|█████████▌| 16492/17243 [00:56<00:02, 275.76it/s][A
                                                                                        [A

Accumulate responses from folder
	2145470 / 2145470 [avg=1.00]



Clear data:   0%|          | 0/244 [00:00<?, ?it/s][A
Clear data:  66%|██████▌   | 161/244 [00:10<00:05, 16.08it/s][A
                                                             [A

Clear data
	2145470 / 79007 [avg 27.16]


 67%|██████▋   | 16/24 [27:34<17:21, 130.13s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_02_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  23%|██▎       | 3960/17280 [00:10<00:33, 395.96it/s][A
Accumulate responses from folder:  23%|██▎       | 3960/17280 [00:20<00:33, 395.96it/s][A
Accumulate responses from folder:  37%|███▋      | 6454/17280 [00:20<00:36, 297.99it/s][A
Accumulate responses from folder:  51%|█████     | 8731/17280 [00:30<00:32, 266.24it/s][A
Accumulate responses from folder:  51%|█████     | 8731/17280 [00:30<00:32, 266.24it/s][A
Accumulate responses from folder:  51%|█████     | 8731/17280 [00:40<00:32, 266.24it/s][A
Accumulate responses from folder:  64%|██████▎   | 10977/17280 [00:40<00:25, 249.34it/s][A
Accumulate responses from folder:  77%|███████▋  | 13358/17280 [00:50<00:15, 245.26it/s][A
Accumulate responses from folder:  77%|███████▋  | 13358/17280 [00:50<00:15, 245.26it/s][A
Accumulate responses from folder:  92%|█████████▏| 15862/17280 [01:00<00:05, 246.80it/s][A
      

Accumulate responses from folder
	2110937 / 2110937 [avg=1.00]



Clear data:   0%|          | 0/241 [00:00<?, ?it/s][A
Clear data:  93%|█████████▎| 224/241 [00:10<00:00, 22.29it/s][A
                                                             [A

Clear data
	2110937 / 78772 [avg 26.80]


 71%|███████   | 17/24 [29:03<13:44, 117.80s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_03_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:   7%|▋         | 1167/17280 [00:10<02:18, 116.57it/s][A
Accumulate responses from folder:  15%|█▍        | 2537/17280 [00:20<01:54, 128.47it/s][A
Accumulate responses from folder:  26%|██▌       | 4525/17280 [00:30<01:19, 160.50it/s][A
Accumulate responses from folder:  26%|██▌       | 4525/17280 [00:41<01:19, 160.50it/s][A
Accumulate responses from folder:  33%|███▎      | 5781/17280 [00:41<01:23, 138.36it/s][A
Accumulate responses from folder:  41%|████      | 7122/17280 [00:51<01:14, 136.74it/s][A
Accumulate responses from folder:  41%|████      | 7122/17280 [00:51<01:14, 136.74it/s][A
Accumulate responses from folder:  48%|████▊     | 8229/17280 [01:01<01:10, 127.77it/s][A
Accumulate responses from folder:  48%|████▊     | 8229/17280 [01:01<01:10, 127.77it/s][A
Accumulate responses from folder:  55%|█████▌    | 9519/17280 [01:11<01:00, 127.95it/s][A
Accumulate

Accumulate responses from folder
	2451629 / 2451629 [avg=1.00]



Clear data:   0%|          | 0/282 [00:00<?, ?it/s][A
Clear data:  74%|███████▍  | 208/282 [00:10<00:03, 20.80it/s][A
                                                             [A

Clear data
	2451629 / 93261 [avg 26.29]


 75%|███████▌  | 18/24 [31:39<12:56, 129.47s/it]

Processing '..\..\data\local\kharkiv\jsons\trans_data_04_APR_2023'



Accumulate responses from folder:   0%|          | 0/17278 [00:00<?, ?it/s][A
Accumulate responses from folder:  19%|█▊        | 3230/17278 [00:10<00:43, 322.88it/s][A
Accumulate responses from folder:  37%|███▋      | 6459/17278 [00:23<00:41, 262.85it/s][A
Accumulate responses from folder:  53%|█████▎    | 9169/17278 [00:33<00:30, 266.07it/s][A
Accumulate responses from folder:  69%|██████▉   | 11952/17278 [00:43<00:19, 270.53it/s][A
Accumulate responses from folder:  85%|████████▌ | 14735/17278 [00:54<00:09, 271.75it/s][A
                                                                                        [A

Accumulate responses from folder
	2436496 / 2436496 [avg=1.00]



Clear data:   0%|          | 0/271 [00:00<?, ?it/s][A
 79%|███████▉  | 19/24 [33:03<09:37, 115.57s/it]   [A

Clear data
	2436496 / 91893 [avg 26.51]
Processing '..\..\data\local\kharkiv\jsons\trans_data_05_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  24%|██▍       | 4229/17280 [00:10<00:30, 422.85it/s][A
Accumulate responses from folder:  24%|██▍       | 4229/17280 [00:25<00:30, 422.85it/s][A
Accumulate responses from folder:  48%|████▊     | 8323/17280 [00:25<00:28, 318.88it/s][A
Accumulate responses from folder:  64%|██████▍   | 11045/17280 [00:35<00:20, 300.56it/s][A
Accumulate responses from folder:  64%|██████▍   | 11045/17280 [00:45<00:20, 300.56it/s][A
Accumulate responses from folder:  78%|███████▊  | 13423/17280 [00:45<00:13, 277.53it/s][A
Accumulate responses from folder:  96%|█████████▌| 16551/17280 [00:55<00:02, 289.42it/s][A
                                                                                        [A

Accumulate responses from folder
	2431420 / 2431420 [avg=1.00]



Clear data:   0%|          | 0/275 [00:00<?, ?it/s][A
 83%|████████▎ | 20/24 [34:21<06:57, 104.32s/it]   [A

Clear data
	2431420 / 91999 [avg 26.43]
Processing '..\..\data\local\kharkiv\jsons\trans_data_06_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▍       | 4278/17280 [00:10<00:30, 427.78it/s][A
Accumulate responses from folder:  50%|████▉     | 8556/17280 [00:26<00:27, 315.69it/s][A
Accumulate responses from folder:  66%|██████▌   | 11328/17280 [00:36<00:19, 300.94it/s][A
Accumulate responses from folder:  82%|████████▏ | 14100/17280 [00:46<00:10, 290.31it/s][A
                                                                                        [A

Accumulate responses from folder
	2392553 / 2392553 [avg=1.00]



Clear data:   0%|          | 0/274 [00:00<?, ?it/s][A
 88%|████████▊ | 21/24 [35:37<04:47, 95.86s/it]    [A

Clear data
	2392553 / 90331 [avg 26.49]
Processing '..\..\data\local\kharkiv\jsons\trans_data_07_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▌       | 4356/17280 [00:10<00:29, 434.92it/s][A
Accumulate responses from folder:  43%|████▎     | 7437/17280 [00:21<00:29, 335.46it/s][A
Accumulate responses from folder:  43%|████▎     | 7437/17280 [00:21<00:29, 335.46it/s][A
Accumulate responses from folder:  43%|████▎     | 7437/17280 [00:31<00:29, 335.46it/s][A
Accumulate responses from folder:  58%|█████▊    | 10098/17280 [00:31<00:23, 304.22it/s][A
Accumulate responses from folder:  74%|███████▍  | 12753/17280 [00:41<00:15, 289.32it/s][A
Accumulate responses from folder:  91%|█████████ | 15749/17280 [00:51<00:05, 292.78it/s][A
                                                                                        [A

Accumulate responses from folder
	2442172 / 2442172 [avg=1.00]



Clear data:   0%|          | 0/276 [00:00<?, ?it/s][A
 92%|█████████▏| 22/24 [36:53<03:00, 90.05s/it]    [A

Clear data
	2442172 / 93372 [avg 26.16]
Processing '..\..\data\local\kharkiv\jsons\trans_data_08_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▍       | 4237/17280 [00:10<00:30, 423.56it/s][A
Accumulate responses from folder:  48%|████▊     | 8353/17280 [00:25<00:27, 320.20it/s][A
Accumulate responses from folder:  48%|████▊     | 8353/17280 [00:25<00:27, 320.20it/s][A
Accumulate responses from folder:  65%|██████▍   | 11168/17280 [00:35<00:20, 304.87it/s][A
Accumulate responses from folder:  65%|██████▍   | 11168/17280 [00:45<00:20, 304.87it/s][A
Accumulate responses from folder:  79%|███████▉  | 13639/17280 [00:45<00:12, 283.50it/s][A
Accumulate responses from folder:  97%|█████████▋| 16744/17280 [00:55<00:01, 292.60it/s][A
                                                                                        [A

Accumulate responses from folder
	2139128 / 2139128 [avg=1.00]



Clear data:   0%|          | 0/243 [00:00<?, ?it/s][A
 96%|█████████▌| 23/24 [38:08<01:25, 85.43s/it]    [A

Clear data
	2139128 / 81808 [avg 26.15]
Processing '..\..\data\local\kharkiv\jsons\trans_data_09_APR_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:  25%|██▍       | 4260/17280 [00:10<00:30, 425.96it/s][A
Accumulate responses from folder:  42%|████▏     | 7185/17280 [00:20<00:29, 336.78it/s][A
Accumulate responses from folder:  42%|████▏     | 7185/17280 [00:20<00:29, 336.78it/s][A
Accumulate responses from folder:  57%|█████▋    | 9841/17280 [00:30<00:24, 303.97it/s][A
Accumulate responses from folder:  57%|█████▋    | 9841/17280 [00:30<00:24, 303.97it/s][A
Accumulate responses from folder:  72%|███████▏  | 12507/17280 [00:40<00:16, 287.47it/s][A
Accumulate responses from folder:  72%|███████▏  | 12507/17280 [00:40<00:16, 287.47it/s][A
Accumulate responses from folder:  91%|█████████ | 15737/17280 [00:50<00:05, 299.92it/s][A
                                                                                        [A

Accumulate responses from folder
	2104581 / 2104581 [avg=1.00]



Clear data:   0%|          | 0/240 [00:00<?, ?it/s][A
100%|██████████| 24/24 [39:21<00:00, 98.41s/it]    [A

Clear data
	2104581 / 78996 [avg 26.64]





In [20]:
# [p.name[11:-18] for p in file_path_list]

In [23]:
file_path_list = list((KH_TABLES_PATH/"optimized").iterdir())
file_path_list = sorted(file_path_list,
                        key=lambda p: datetime.strptime(p.name[11:-18], '%d_%b_%Y'))
file_path_list[-3:]

[WindowsPath('../../data/local/kharkiv/tables/optimized/trans_data_11_APR_2023_optimized.parquet'),
 WindowsPath('../../data/local/kharkiv/tables/optimized/trans_data_12_APR_2023_optimized.parquet'),
 WindowsPath('../../data/local/kharkiv/tables/optimized/trans_data_13_APR_2023_optimized.parquet')]

In [24]:
df_l = pd.read_parquet(file_path_list[-1])

In [25]:
df['route_name'].value_counts().index

Index(['3', '27', '8', '20', '204', '6', '119', '1', '63', '24', '35', '16А',
       '16', '19', '304', '272', '55', '11', '31', '45', '46', '34', '56',
       '72', '170', '7', '13', '267', '109', '57', '106', '40', '140', '32',
       '260', '5', '112', '7А', '51', '49', '222', '96', '58', '243', '29',
       '80', '67', '77', '33', '12', '81', '53', '94', '25', '68', '18', '21',
       '23', '47', '99', '64', '28', '83', '224', '241', '273', '78', '118',
       '22', '41'],
      dtype='object')

In [26]:
df_u['route_name'].value_counts().index

Index(['3', '27', '8', '20', '204', '6', '1', '119', '24', '35', '16А', '16',
       '63', '19', '304', '11', '34', '272', '31', '55', '56', '45', '7', '13',
       '170', '46', '72', '5', '7А', '40', '32', '49', '267', '112', '106',
       '140', '260', '109', '57', '51', '222', '12', '18', '29', '58', '33',
       '80', '81', '96', '94', '77', '68', '53', '243', '67', '23', '21', '83',
       '241', '47', '25', '64', '78', '28', '99', '273', '118', '224', '41',
       '22'],
      dtype='object')

In [27]:
df_l['route_name'].value_counts().index

Index(['3', '27', '20', '8', '204', '6', '35', '24', '63', '119', '1', '19',
       '272', '304', '16', '16А', '34', '45', '170', '11', '13', '31', '55',
       '7', '7А', '72', '46', '56', '112', '51', '68', '106', '58', '33', '32',
       '49', '260', '109', '57', '96', '140', '40', '77', '5', '222', '94',
       '267', '81', '25', '80', '52', '18', '29', '12', '53', '67', '83',
       '243', '241', '23', '273', '47', '78', '21', '64', '22', '41', '99',
       '28', '118', '224'],
      dtype='object')

In [28]:
df[df['route_name'] == '56А']

Unnamed: 0,imei,lat,lng,speed,gps_datetime_origin,orientation,route_name,route_type,vehicle_id,dd,gpstime,response_datetime


In [29]:
df[df['route_name'] == '56Рђ']['route_name'].astype('string')

Series([], Name: route_name, dtype: string)

In [29]:
import pyarrow.parquet as pq
pfile = pq.read_table(file_path_list[-2])

In [30]:
pfile

pyarrow.Table
imei: int64
lat: int64
lng: int64
speed: int64
gps_datetime_origin: int64
orientation: int64
route_name: string
route_type: int64
vehicle_id: int64
dd: int64
gpstime: string
response_datetime: string
----
imei: [[353976014023992,353976014023992,353976014023992,353976014023992,353976014023992,...,352625696583044,352625696583044,352625696583044,352625696583044,352625696583044],[352625696583044,352625696583044,352625696583044,352625696583044,352625696583044,...,353976014241727,353976014241727,353976014241727,353976014241727,353976014241727]]
lat: [[369808,370071,368210,367470,366741,...,237099,237099,251362,253208,253410],[259300,259159,259102,259220,259090,...,260399,260410,260448,260429,260380]]
lng: [[961990,961880,959579,958759,957932,...,984879,984879,977291,976440,976349],[970169,968700,968250,968281,967739,...,944111,944019,944099,945011,945950]]
speed: [[0,5,17,17,12,...,13,0,21,23,17],[26,27,12,0,24,...,0,0,0,28,11]]
gps_datetime_origin: [[0,0,0,0,0,...,0,0,0,0,0],[

In [27]:
import pyarrow

In [16]:
BATCH_SIZE = 3_000
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})

monitor = TransGPSCVMonitor(connection_config=connection_config, data_model="kharkiv")

# file_path_list =list(Path("../data/local/tables/").iterdir())
file_path_list =list(Path("D:/projects/pet_project/tables").iterdir())
file_path_list = sorted(file_path_list, 
                        key=lambda p: datetime.strptime(p.name[9:-4], '%d_%b_%Y') )

for folder_path in tqdm(kharkiv_folders_list[:-1]):
    df = accumulate_responses_from_folder(folder_path)
    df_u = clear_data(df)       
    
    df_cur = df_u[:]

    batch_tqdm = tqdm(df_cur.groupby(np.arange(len(df_cur)) // BATCH_SIZE), 
                     mininterval=10)
    for batch_number, batch_df in batch_tqdm:
        batch_df = batch_df.where(pd.notnull(batch_df), None)
        monitor.write_to_db(batch_df.to_dict('records'))

  0%|          | 0/3 [00:00<?, ?it/s]
Accumulate responses from folder:   0%|          | 0/12739 [00:00<?, ?it/s][A
Accumulate responses from folder:   3%|▎         | 433/12739 [00:02<00:58, 209.60it/s][A
Accumulate responses from folder:   7%|▋         | 853/12739 [00:05<01:20, 147.01it/s][A
Accumulate responses from folder:   9%|▉         | 1168/12739 [00:07<01:20, 143.31it/s][A
Accumulate responses from folder:  12%|█▏        | 1465/12739 [00:10<01:30, 125.08it/s][A
Accumulate responses from folder:  14%|█▎        | 1724/12739 [00:13<01:36, 113.78it/s][A
Accumulate responses from folder:  15%|█▌        | 1958/12739 [00:17<01:53, 95.33it/s] [A
Accumulate responses from folder:  18%|█▊        | 2263/12739 [00:19<01:35, 109.17it/s][A
Accumulate responses from folder:  20%|█▉        | 2525/12739 [00:21<01:29, 113.90it/s][A
Accumulate responses from folder:  22%|██▏       | 2768/12739 [00:23<01:27, 114.56it/s][A
Accumulate responses from folder:  24%|██▍       | 3050/12739 [00:

Accumulate responses from folder
	1616588 / 1616588 [avg=1.00]



Clear data:   0%|          | 0/273 [00:00<?, ?it/s][A
Clear data:   8%|▊         | 23/273 [00:02<00:22, 11.36it/s][A
Clear data:  18%|█▊        | 48/273 [00:04<00:18, 11.89it/s][A
Clear data:  26%|██▋       | 72/273 [00:06<00:16, 11.93it/s][A
Clear data:  36%|███▋      | 99/273 [00:08<00:14, 12.06it/s][A
Clear data:  47%|████▋     | 127/273 [00:10<00:11, 12.61it/s][A
Clear data:  56%|█████▌    | 153/273 [00:12<00:09, 12.50it/s][A
Clear data:  66%|██████▌   | 180/273 [00:14<00:07, 12.52it/s][A
Clear data:  75%|███████▌  | 206/273 [00:17<00:05, 11.74it/s][A
Clear data:  86%|████████▌ | 235/273 [00:19<00:03, 12.44it/s][A
                                                             [A

Clear data
	1616588 / 153418 [avg 10.54]



  0%|          | 0/52 [00:00<?, ?it/s][A
 19%|█▉        | 10/52 [00:10<00:44,  1.05s/it][A
 42%|████▏     | 22/52 [00:20<00:27,  1.09it/s][A
100%|██████████| 52/52 [00:40<00:00,  1.28it/s][A
 33%|███▎      | 1/3 [02:53<05:46, 173.17s/it]
Accumulate responses from folder:   0%|          | 0/10997 [00:00<?, ?it/s][A
Accumulate responses from folder:   6%|▌         | 652/10997 [00:02<00:31, 325.93it/s][A
Accumulate responses from folder:  12%|█▏        | 1304/10997 [00:04<00:33, 287.63it/s][A
Accumulate responses from folder:  17%|█▋        | 1924/10997 [00:06<00:30, 296.95it/s][A
Accumulate responses from folder:  23%|██▎       | 2523/10997 [00:08<00:29, 291.25it/s][A
Accumulate responses from folder:  28%|██▊       | 3109/10997 [00:10<00:28, 276.51it/s][A
Accumulate responses from folder:  34%|███▍      | 3714/10997 [00:12<00:25, 284.65it/s][A
Accumulate responses from folder:  39%|███▉      | 4291/10997 [00:14<00:23, 285.74it/s][A
Accumulate responses from folder:  44%|███

Accumulate responses from folder
	740660 / 740660 [avg=1.00]



Clear data:   0%|          | 0/239 [00:00<?, ?it/s][A
Clear data:  33%|███▎      | 79/239 [00:02<00:04, 39.10it/s][A
Clear data:  72%|███████▏  | 171/239 [00:04<00:01, 43.07it/s][A
                                                             [A
  0%|          | 0/24 [00:00<?, ?it/s][A

Clear data
	740660 / 70474 [avg 10.51]
There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportVehicle.KharkivTransportVehicle'> to inserted in DB



100%|██████████| 24/24 [00:13<00:00,  1.74it/s][A
 67%|██████▋   | 2/3 [04:01<01:51, 111.68s/it]
Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:   4%|▍         | 669/17280 [00:02<00:49, 333.68it/s][A
Accumulate responses from folder:   8%|▊         | 1344/17280 [00:04<00:47, 335.91it/s][A
Accumulate responses from folder:  12%|█▏        | 2016/17280 [00:06<00:47, 322.15it/s][A
Accumulate responses from folder:  15%|█▌        | 2662/17280 [00:08<00:46, 316.67it/s][A
Accumulate responses from folder:  19%|█▉        | 3298/17280 [00:10<00:44, 315.36it/s][A
Accumulate responses from folder:  23%|██▎       | 3930/17280 [00:12<00:43, 307.21it/s][A
Accumulate responses from folder:  26%|██▋       | 4546/17280 [00:14<00:43, 291.63it/s][A
Accumulate responses from folder:  30%|██▉       | 5132/17280 [00:17<00:46, 261.55it/s][A
Accumulate responses from folder:  33%|███▎      | 5666/17280 [00:20<00:48, 240.18it/s][A
Accu

Accumulate responses from folder
	2075486 / 2075486 [avg=1.00]



Clear data:   0%|          | 0/241 [00:00<?, ?it/s][A
Clear data:  11%|█         | 27/241 [00:02<00:16, 13.02it/s][A
Clear data:  22%|██▏       | 54/241 [00:04<00:14, 13.15it/s][A
Clear data:  34%|███▎      | 81/241 [00:06<00:13, 12.19it/s][A
Clear data:  44%|████▍     | 106/241 [00:08<00:11, 11.93it/s][A
Clear data:  54%|█████▍    | 130/241 [00:10<00:09, 11.43it/s][A
Clear data:  63%|██████▎   | 153/241 [00:13<00:07, 11.17it/s][A
Clear data:  73%|███████▎  | 176/241 [00:15<00:05, 10.98it/s][A
Clear data:  83%|████████▎ | 199/241 [00:17<00:03, 11.09it/s][A
Clear data:  95%|█████████▌| 229/241 [00:19<00:00, 12.19it/s][A
                                                             [A
  0%|          | 0/68 [00:00<?, ?it/s][A

Clear data
	2075486 / 201332 [avg 10.31]



 15%|█▍        | 10/68 [00:10<00:59,  1.03s/it][A
 37%|███▋      | 25/68 [00:20<00:34,  1.24it/s][A

There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportVehicle.KharkivTransportVehicle'> to inserted in DB



 59%|█████▉    | 40/68 [00:33<00:23,  1.19it/s][A
 76%|███████▋  | 52/68 [00:43<00:13,  1.19it/s][A
100%|██████████| 68/68 [00:57<00:00,  1.19it/s][A
100%|██████████| 3/3 [06:44<00:00, 134.70s/it]


In [6]:
monitor.objects_unique

{PTETA.utils.transport.kharkiv.KharkivTransportOperator.KharkivTransportOperator: {KharkivTransportOperator(id=-1, name='UNKNOWN')},
 PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute: {KharkivTransportRoute(id=-1),
  KharkivTransportRoute(id=1),
  KharkivTransportRoute(id=10),
  KharkivTransportRoute(id=11),
  KharkivTransportRoute(id=12),
  KharkivTransportRoute(id=13),
  KharkivTransportRoute(id=14),
  KharkivTransportRoute(id=15),
  KharkivTransportRoute(id=16),
  KharkivTransportRoute(id=17),
  KharkivTransportRoute(id=18),
  KharkivTransportRoute(id=19),
  KharkivTransportRoute(id=2),
  KharkivTransportRoute(id=20),
  KharkivTransportRoute(id=21),
  KharkivTransportRoute(id=22),
  KharkivTransportRoute(id=23),
  KharkivTransportRoute(id=24),
  KharkivTransportRoute(id=25),
  KharkivTransportRoute(id=5),
  KharkivTransportRoute(id=6),
  KharkivTransportRoute(id=7),
  KharkivTransportRoute(id=8),
  KharkivTransportRoute(id=9)},
 PTETA.utils.transport.kharkiv

In [7]:
operator_list, route_list, vehicle_list, avl_data_list = monitor.decompose_response(batch_df.to_dict('records'))

for obj_list in [operator_list, route_list, vehicle_list]:
    new_obj = monitor.get_new_objs(obj_list)
    if new_obj:
        print(f"There are {len(new_obj)} new {new_obj[0].__class__} to inserted in DB")
        monitor.update_db(new_obj)

for i, (vehicle, route) in enumerate(zip(vehicle_list, route_list)):
    avl_data_list[i].vehicle_id = monitor.vehicle_to_id[vehicle]
    avl_data_list[i].route_id = monitor.route_to_id[route]


There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> to inserted in DB


KeyError: KharkivTransportRoute(id=None)

In [8]:
i, (vehicle, route)

(732,
 (KharkivTransportVehicle(id=None, imei='353976014249548', name='None', owner_id=-1),
  KharkivTransportRoute(id=None)))

In [9]:
route.id, route.name, route.type

(None, '27', 1)

In [None]:
route.__

In [10]:
[(r.id, r.name, r.type) for r in monitor.objects_unique[route.__class__]]

[(8, '8', 1),
 (25, '28', 1),
 (9, '208', 3),
 (16, '3', 2),
 (23, '75', 3),
 (6, '204', 3),
 (22, '68', 3),
 (15, '206', 3),
 (14, '212', 3),
 (19, '51', 2),
 (-1, 'UNKNOWN', -1),
 (17, '1', 2),
 (21, '20', 1),
 (18, '6', 2),
 (7, '45', 2),
 (12, '35', 2),
 (11, '46', 2),
 (1, '52', 2),
 (2, '55', 2),
 (5, '24', 2),
 (20, '13', 2),
 (13, '72', 3),
 (10, '34', 2),
 (24, '27', 2)]

NameError: name 'batch_df' is not defined

In [7]:
#     df_cur = df_u[:]

batch_tqdm = tqdm(df_cur.groupby(np.arange(len(df_cur)) // BATCH_SIZE), 
                  miniters=40)
for batch_number, batch_df in batch_tqdm:
    batch_df = batch_df.where(pd.notnull(batch_df), None)
    monitor.write_to_db(batch_df.to_dict('records'))

  0%|          | 0/124 [00:00<?, ?it/s]

There are 2 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> to inserted in DB
Error raised while select <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> '[KharkivTransportRoute(id=None, name='52', type=2), KharkivTransportRoute(id=None, name='55', type=2)]'





SyntaxError: syntax error at or near "AND"
LINE 1: SELECT "name", "type" FROM kharkiv.route  WHERE ( AND "name"...
                                                          ^


In [32]:
from psycopg2.extras import RealDictCursor
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})
conn = psycopg2.connect(**connection_config)

# cur = connection.cursor(cursor_factory = RealDictCursor)

In [54]:
with conn.cursor() as cur : 
    cur.execute('SELECT id, "name", "type" FROM kharkiv.route;')
    columns = [desc[0] for desc in cur.description]
    real_dict = [dict(zip(columns, row)) for row in cur.fetchall()]
    print(real_dict)

[{'id': -1, 'name': 'UNKNOWN', 'type': -1}, {'id': 1, 'name': '52', 'type': 2}, {'id': 2, 'name': '55', 'type': 2}, {'id': 3, 'name': '24', 'type': 2}, {'id': 4, 'name': '204', 'type': 3}, {'id': 5, 'name': '45', 'type': 2}, {'id': 6, 'name': '8', 'type': 1}, {'id': 7, 'name': '208', 'type': 3}, {'id': 8, 'name': '34', 'type': 2}, {'id': 9, 'name': '46', 'type': 2}, {'id': 10, 'name': '35', 'type': 2}, {'id': 11, 'name': '72', 'type': 3}, {'id': 12, 'name': '212', 'type': 3}, {'id': 13, 'name': '206', 'type': 3}, {'id': 14, 'name': '3', 'type': 2}, {'id': 15, 'name': '1', 'type': 2}, {'id': 16, 'name': '6', 'type': 2}, {'id': 17, 'name': '51', 'type': 2}, {'id': 18, 'name': '13', 'type': 2}, {'id': 19, 'name': '20', 'type': 1}, {'id': 20, 'name': '68', 'type': 3}, {'id': 21, 'name': '75', 'type': 3}, {'id': 22, 'name': '27', 'type': 2}, {'id': 23, 'name': '28', 'type': 1}, {'id': 24, 'name': '27', 'type': 1}, {'id': 25, 'name': '281', 'type': 3}, {'id': 26, 'name': '119', 'type': 2}, {

In [55]:
**real_dict

SyntaxError: invalid syntax (2672971826.py, line 1)

['id', 'name']

In [13]:
cv_req_path = Path(r"D:\projects\pet_project\trans_data_23_FEB_2023")
len([p.name for p in cv_req_path.iterdir()]), [p.name for p in cv_req_path.iterdir()][30_000]

(54701, 'trans_2023-02-23 14;31;48.json')

In [14]:
with open(cv_req_path/'trans_2023-02-23 14;31;48.json', 'r', encoding='utf-8') as file:
    resp = json.load(file)

In [15]:
resp

{'355227045533718': {'id': 156,
  'imei': '355227045533718',
  'name': 'A79',
  'stateCode': 'used',
  'stateName': 'used',
  'lat': 48.26898333333333,
  'lng': 25.927051666666667,
  'speed': '022.4',
  'orientation': '220.37',
  'gpstime': '2023-02-23 13:31:46',
  'routeId': 2,
  'routeName': '11',
  'routeColour': 'green',
  'inDepo': False,
  'busNumber': '9034',
  'perevId': 1,
  'perevName': 'Денисівка',
  'remark': '9034 DNSNK',
  'online': True,
  'idBusTypes': 1},
 '355227046451662': {'id': 394,
  'imei': '355227046451662',
  'name': 'H76',
  'stateCode': 'used',
  'stateName': 'used',
  'lat': 48.292473333333334,
  'lng': 25.935496666666666,
  'speed': '000.0',
  'orientation': '089.35',
  'gpstime': '2023-02-23 13:31:46',
  'routeId': 31,
  'routeName': '6/6a',
  'routeColour': 'deeppink',
  'inDepo': False,
  'busNumber': '350',
  'perevId': 6,
  'perevName': 'ЧТУ',
  'remark': 'Тролейбус 350 DNSNTNK',
  'online': True,
  'idBusTypes': 2},
 '355228042956811': {'id': 413,
  '

In [45]:
response_datetime

odict_items([('id', -1), ('name', 'UNKNOWN')])

## TransportRoute

In [7]:
TransportRoute.__insert_columns__().replace('"', '').split(', ')[1:]

['route_name', 'route_colour']

In [28]:
cols = ["routeId", 'routeName', 'routeColour']
route_list = [TransportRoute.from_response_row(row) for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(route_list), route_list[:3]
TransportRoute.insert_many_in_table(conn, route_list)

In [32]:
TransportRoute.get_table(conn)

[TransportRoute(id=37, name='T', colour='coral'),
 TransportRoute(id=31, name='6/6a', colour='deeppink'),
 TransportRoute(id=21, name='38', colour='deeppink'),
 TransportRoute(id=41, name='10A', colour='black'),
 TransportRoute(id=20, name='A', colour='navy'),
 TransportRoute(id=42, name='39', colour='coral'),
 TransportRoute(id=23, name='19', colour='teal'),
 TransportRoute(id=19, name='10', colour='black'),
 TransportRoute(id=11, name='9', colour='magenta'),
 TransportRoute(id=2, name='11', colour='green'),
 TransportRoute(id=6, name='4', colour='magenta'),
 TransportRoute(id=27, name='3/3a', colour='green'),
 TransportRoute(id=16, name='2', colour='green'),
 TransportRoute(id=4, name='5', colour='orange'),
 TransportRoute(id=3, name='12', colour='blue'),
 TransportRoute(id=12, name='20', colour='maroon'),
 TransportRoute(id=7, name='6', colour='sienna'),
 TransportRoute(id=10, name='34', colour='navy'),
 TransportRoute(id=45, name='1', colour='navy'),
 TransportRoute(id=9, name='27'

In [13]:
# df_sum[TransportRoute.__insert_columns__().replace('"', '').split(', ')[1:]].value_counts()

In [27]:
conn.rollback()

In [45]:
SQL_big_req = " ".join([create_sql_req(t) for t in trans_vehicle_list])

In [29]:
route_list = TransportRoute.get_table(conn)[:]
len(route_list)

31

In [30]:
for r in route_list[::2]: 
    r.id += 1
TransportRoute.are_in_table(conn, route_list)

[False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False]

In [31]:
TransportRoute.are_in_table(conn, route_list)

[False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False]

In [14]:
for r in route_list: 
    r.name += "_"
    r.id += 100

In [16]:
# sql = f"""INSERT INTO pteta.route("id", "routeName", "routeColour") VALUES """ + \
#               ", ".join([f"""({obj.id}, '{obj.name}', '{obj.colour}')"""
#                         for obj in route_list]) + ";"
# sql

In [17]:
# route_list

In [18]:
TransportRoute.insert_many_in_table(conn, route_list)

## Test TransportVehicle

In [33]:
TransportVehicle.__insert_columns__()

'"imei", "name", "bus_number", "remark", "perev_id"'

In [45]:
cols = ["imei", "name", "busNumber", "remark", "perevId"]
vehicle_list = [TransportVehicle.from_response_row(row) for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(vehicle_list), vehicle_list[:3]
TransportVehicle.insert_many_in_table(conn, vehicle_list)

In [47]:
vehicle_list = TransportVehicle.get_table(conn)
len(vehicle_list)

99

In [48]:
TransportVehicle.are_in_table(conn, vehicle_list[:5])

[True, True, True, True, True]

In [49]:
vehicle_list[5].is_in_table(conn)

True

In [50]:
obj = vehicle_list[0]
obj.imei += "-"
obj.is_in_table(conn)

False

In [51]:
obj.insert_in_table(conn)

In [52]:
for v in vehicle_list[:10]: 
    v.imei += '_'

In [15]:
TransportVehicle.insert_many_in_table(conn, vehicle_list[:10])

In [31]:
obj.insert_in_table(conn)
obj.is_in_table(conn)

In [14]:
vehicle_list[0].id = None
print(vehicle_list[0])
vehicle_list[0].update_id_from_table(conn)
print(vehicle_list[0])

TransportVehicle(id=None, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)
TransportVehicle(id=1, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)


## TransportOperator

In [41]:
operator_list = TransportOperator.get_table(conn)
len(operator_list)

6

In [37]:
TransportOperator.__insert_columns__()

'id, "perev_name"'

In [40]:
cols = ['perevId', "perevName"]
operator_list = [TransportOperator.from_response_row(row) 
              for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(operator_list), operator_list[:3]
TransportOperator.insert_many_in_table(conn, operator_list)

In [42]:
TransportOperator.are_in_table(conn, operator_list[:])

[True, True, True, True, True, True]

In [43]:
operator_list[0].is_in_table(conn)

True

In [44]:
obj = operator_list[0]
obj.id += 100
obj.name = "-_-" + obj.name
obj.is_in_table(conn)

False

In [14]:
obj.insert_in_table(conn)

In [15]:
for op in operator_list: 
    op.id += 100
    op.name = "-_-" + op.name
    
TransportOperator.insert_many_in_table(conn, operator_list)

## TransportAVLData

In [9]:
avl_data_list = TransportAVLData.get_table(conn)

In [11]:
# avl_data_list

In [14]:
%%timeit
TransportAVLData.are_in_table(conn, avl_data_list)

57.4 ms ± 3.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
%%timeit
avl_data_list[0].is_in_table(conn)

42.1 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [49]:
file_path_list[0].name[:-4]

'data_for_01_DEC_2022'

In [58]:
file_path = Path(f"../../../pet_project/tables")
file_path_list = [p for p in file_path.iterdir() if ".csv" in p.name]
(file_path/"parquet").mkdir(exist_ok=True)

# errors in 22
for folder_path in tqdm(file_path_list[:]):   
    print(f"Processing '{folder_path}'")
    df = pd.read_csv(folder_path)
    
    df["busNumber"] = df["busNumber"].astype(str)
    df.to_parquet(folder_path.parent/'parquet'/(folder_path.name[:-4] + '.parquet'))

  0%|          | 0/126 [00:00<?, ?it/s]

Processing '..\..\..\pet_project\tables\data_for_01_DEC_2022.csv'


  1%|          | 1/126 [00:00<01:44,  1.20it/s]

Processing '..\..\..\pet_project\tables\data_for_01_FEB_2023.csv'


  2%|▏         | 2/126 [00:01<01:22,  1.50it/s]

Processing '..\..\..\pet_project\tables\data_for_01_JAN_2023.csv'


  2%|▏         | 3/126 [00:02<01:39,  1.24it/s]

Processing '..\..\..\pet_project\tables\data_for_01_NOV_2022.csv'


  3%|▎         | 4/126 [00:04<02:27,  1.21s/it]

Processing '..\..\..\pet_project\tables\data_for_02_DEC_2022.csv'


  4%|▍         | 5/126 [00:05<02:36,  1.30s/it]

Processing '..\..\..\pet_project\tables\data_for_02_FEB_2023.csv'


  5%|▍         | 6/126 [00:06<02:35,  1.29s/it]

Processing '..\..\..\pet_project\tables\data_for_02_JAN_2023.csv'


  6%|▌         | 7/126 [00:08<02:51,  1.44s/it]

Processing '..\..\..\pet_project\tables\data_for_02_NOV_2022.csv'


  6%|▋         | 8/126 [00:10<03:11,  1.62s/it]

Processing '..\..\..\pet_project\tables\data_for_03_DEC_2022.csv'


  7%|▋         | 9/126 [00:11<02:27,  1.26s/it]

Processing '..\..\..\pet_project\tables\data_for_03_FEB_2023.csv'


  8%|▊         | 10/126 [00:12<02:16,  1.17s/it]

Processing '..\..\..\pet_project\tables\data_for_03_JAN_2023.csv'


  9%|▊         | 11/126 [00:13<02:28,  1.29s/it]

Processing '..\..\..\pet_project\tables\data_for_03_NOV_2022.csv'


 10%|▉         | 12/126 [00:15<02:37,  1.38s/it]

Processing '..\..\..\pet_project\tables\data_for_04_DEC_2022.csv'


 10%|█         | 13/126 [00:16<02:20,  1.24s/it]

Processing '..\..\..\pet_project\tables\data_for_04_FEB_2023.csv'


 11%|█         | 14/126 [00:16<01:53,  1.02s/it]

Processing '..\..\..\pet_project\tables\data_for_04_JAN_2023.csv'


 12%|█▏        | 15/126 [00:18<02:10,  1.18s/it]

Processing '..\..\..\pet_project\tables\data_for_04_NOV_2022.csv'


 13%|█▎        | 16/126 [00:19<02:23,  1.31s/it]

Processing '..\..\..\pet_project\tables\data_for_05_DEC_2022.csv'


 13%|█▎        | 17/126 [00:21<02:49,  1.55s/it]

Processing '..\..\..\pet_project\tables\data_for_05_FEB_2023.csv'


 14%|█▍        | 18/126 [00:24<03:18,  1.84s/it]

Processing '..\..\..\pet_project\tables\data_for_05_JAN_2023.csv'


 15%|█▌        | 19/126 [00:26<03:15,  1.83s/it]

Processing '..\..\..\pet_project\tables\data_for_05_NOV_2022.csv'


 16%|█▌        | 20/126 [00:27<03:06,  1.76s/it]

Processing '..\..\..\pet_project\tables\data_for_06_DEC_2022.csv'


 17%|█▋        | 21/126 [00:29<03:05,  1.77s/it]

Processing '..\..\..\pet_project\tables\data_for_06_FEB_2023.csv'


 17%|█▋        | 22/126 [00:31<02:54,  1.68s/it]

Processing '..\..\..\pet_project\tables\data_for_06_JAN_2023.csv'


 18%|█▊        | 23/126 [00:32<02:43,  1.59s/it]

Processing '..\..\..\pet_project\tables\data_for_06_NOV_2022.csv'


 19%|█▉        | 24/126 [00:33<02:34,  1.52s/it]

Processing '..\..\..\pet_project\tables\data_for_07_DEC_2022.csv'


 20%|█▉        | 25/126 [00:35<02:36,  1.55s/it]

Processing '..\..\..\pet_project\tables\data_for_07_FEB_2023.csv'


 21%|██        | 26/126 [00:37<02:34,  1.55s/it]

Processing '..\..\..\pet_project\tables\data_for_07_JAN_2023.csv'


 21%|██▏       | 27/126 [00:38<02:21,  1.43s/it]

Processing '..\..\..\pet_project\tables\data_for_07_NOV_2022.csv'


 22%|██▏       | 28/126 [00:38<01:45,  1.07s/it]

Processing '..\..\..\pet_project\tables\data_for_08_DEC_2022.csv'


 23%|██▎       | 29/126 [00:39<01:52,  1.16s/it]

Processing '..\..\..\pet_project\tables\data_for_08_FEB_2023.csv'


 24%|██▍       | 30/126 [00:41<02:18,  1.44s/it]

Processing '..\..\..\pet_project\tables\data_for_08_JAN_2023.csv'


 25%|██▍       | 31/126 [00:43<02:20,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_08_NOV_2022.csv'


 25%|██▌       | 32/126 [00:45<02:30,  1.60s/it]

Processing '..\..\..\pet_project\tables\data_for_09_DEC_2022.csv'


 26%|██▌       | 33/126 [00:46<02:27,  1.59s/it]

Processing '..\..\..\pet_project\tables\data_for_09_FEB_2023.csv'


 27%|██▋       | 34/126 [00:48<02:30,  1.64s/it]

Processing '..\..\..\pet_project\tables\data_for_09_JAN_2023.csv'


 28%|██▊       | 35/126 [00:50<02:29,  1.64s/it]

Processing '..\..\..\pet_project\tables\data_for_09_NOV_2022.csv'


 29%|██▊       | 36/126 [00:52<02:32,  1.69s/it]

Processing '..\..\..\pet_project\tables\data_for_09_OCT_2022.csv'


 29%|██▉       | 37/126 [00:52<01:54,  1.29s/it]

Processing '..\..\..\pet_project\tables\data_for_10_DEC_2022.csv'


 30%|███       | 38/126 [00:53<01:47,  1.22s/it]

Processing '..\..\..\pet_project\tables\data_for_10_FEB_2023.csv'


 31%|███       | 39/126 [00:54<01:34,  1.08s/it]

Processing '..\..\..\pet_project\tables\data_for_10_JAN_2023.csv'


 32%|███▏      | 40/126 [00:55<01:45,  1.22s/it]

Processing '..\..\..\pet_project\tables\data_for_10_NOV_2022.csv'


 33%|███▎      | 41/126 [00:57<02:01,  1.43s/it]

Processing '..\..\..\pet_project\tables\data_for_10_OCT_2022.csv'


 33%|███▎      | 42/126 [00:59<02:03,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_11_DEC_2022.csv'


 34%|███▍      | 43/126 [01:00<01:54,  1.38s/it]

Processing '..\..\..\pet_project\tables\data_for_11_FEB_2023.csv'


 35%|███▍      | 44/126 [01:01<01:33,  1.14s/it]

Processing '..\..\..\pet_project\tables\data_for_11_JAN_2023.csv'


 36%|███▌      | 45/126 [01:02<01:28,  1.10s/it]

Processing '..\..\..\pet_project\tables\data_for_11_NOV_2022.csv'


 37%|███▋      | 46/126 [01:03<01:43,  1.30s/it]

Processing '..\..\..\pet_project\tables\data_for_11_OCT_2022.csv'


 37%|███▋      | 47/126 [01:05<01:41,  1.29s/it]

Processing '..\..\..\pet_project\tables\data_for_12_DEC_2022.csv'


 38%|███▊      | 48/126 [01:06<01:38,  1.27s/it]

Processing '..\..\..\pet_project\tables\data_for_12_FEB_2023.csv'


 39%|███▉      | 49/126 [01:07<01:29,  1.16s/it]

Processing '..\..\..\pet_project\tables\data_for_12_JAN_2023.csv'


 40%|███▉      | 50/126 [01:08<01:34,  1.24s/it]

Processing '..\..\..\pet_project\tables\data_for_12_NOV_2022.csv'


 40%|████      | 51/126 [01:08<01:09,  1.07it/s]

Processing '..\..\..\pet_project\tables\data_for_12_OCT_2022.csv'


 41%|████▏     | 52/126 [01:10<01:19,  1.07s/it]

Processing '..\..\..\pet_project\tables\data_for_13_DEC_2022.csv'


 42%|████▏     | 53/126 [01:11<01:29,  1.23s/it]

Processing '..\..\..\pet_project\tables\data_for_13_FEB_2023.csv'


 43%|████▎     | 54/126 [01:13<01:40,  1.39s/it]

Processing '..\..\..\pet_project\tables\data_for_13_JAN_2023.csv'


 44%|████▎     | 55/126 [01:15<01:40,  1.41s/it]

Processing '..\..\..\pet_project\tables\data_for_13_NOV_2022.csv'


 44%|████▍     | 56/126 [01:15<01:27,  1.25s/it]

Processing '..\..\..\pet_project\tables\data_for_13_OCT_2022.csv'


 45%|████▌     | 57/126 [01:17<01:39,  1.45s/it]

Processing '..\..\..\pet_project\tables\data_for_14_DEC_2022.csv'


 46%|████▌     | 58/126 [01:19<01:36,  1.42s/it]

Processing '..\..\..\pet_project\tables\data_for_14_JAN_2023.csv'


 47%|████▋     | 59/126 [01:20<01:31,  1.36s/it]

Processing '..\..\..\pet_project\tables\data_for_14_NOV_2022.csv'


 48%|████▊     | 60/126 [01:22<01:37,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_14_OCT_2022.csv'


 48%|████▊     | 61/126 [01:23<01:33,  1.44s/it]

Processing '..\..\..\pet_project\tables\data_for_15_DEC_2022.csv'


 49%|████▉     | 62/126 [01:25<01:37,  1.52s/it]

Processing '..\..\..\pet_project\tables\data_for_15_JAN_2023.csv'


 50%|█████     | 63/126 [01:26<01:26,  1.37s/it]

Processing '..\..\..\pet_project\tables\data_for_15_NOV_2022.csv'


 51%|█████     | 64/126 [01:26<01:04,  1.04s/it]

Processing '..\..\..\pet_project\tables\data_for_15_OCT_2022.csv'


 52%|█████▏    | 65/126 [01:27<01:10,  1.15s/it]

Processing '..\..\..\pet_project\tables\data_for_16_DEC_2022.csv'


 52%|█████▏    | 66/126 [01:28<01:07,  1.12s/it]

Processing '..\..\..\pet_project\tables\data_for_16_JAN_2023.csv'


 53%|█████▎    | 67/126 [01:30<01:05,  1.11s/it]

Processing '..\..\..\pet_project\tables\data_for_16_NOV_2022.csv'


 54%|█████▍    | 68/126 [01:31<01:13,  1.27s/it]

Processing '..\..\..\pet_project\tables\data_for_16_OCT_2022.csv'


 55%|█████▍    | 69/126 [01:32<01:11,  1.25s/it]

Processing '..\..\..\pet_project\tables\data_for_17_DEC_2022.csv'


 56%|█████▌    | 70/126 [01:33<00:58,  1.05s/it]

Processing '..\..\..\pet_project\tables\data_for_17_JAN_2023.csv'


 56%|█████▋    | 71/126 [01:35<01:05,  1.18s/it]

Processing '..\..\..\pet_project\tables\data_for_17_NOV_2022.csv'


 57%|█████▋    | 72/126 [01:36<01:05,  1.22s/it]

Processing '..\..\..\pet_project\tables\data_for_17_OCT_2022.csv'


 58%|█████▊    | 73/126 [01:38<01:13,  1.38s/it]

Processing '..\..\..\pet_project\tables\data_for_18_DEC_2022.csv'


 59%|█████▊    | 74/126 [01:39<01:07,  1.31s/it]

Processing '..\..\..\pet_project\tables\data_for_18_JAN_2023.csv'


 60%|█████▉    | 75/126 [01:40<01:05,  1.28s/it]

Processing '..\..\..\pet_project\tables\data_for_18_NOV_2022.csv'


 60%|██████    | 76/126 [01:42<01:10,  1.41s/it]

Processing '..\..\..\pet_project\tables\data_for_18_OCT_2022.csv'


 61%|██████    | 77/126 [01:43<01:12,  1.48s/it]

Processing '..\..\..\pet_project\tables\data_for_19_DEC_2022.csv'


 62%|██████▏   | 78/126 [01:44<01:05,  1.36s/it]

Processing '..\..\..\pet_project\tables\data_for_19_JAN_2023.csv'


 63%|██████▎   | 79/126 [01:46<01:03,  1.35s/it]

Processing '..\..\..\pet_project\tables\data_for_19_NOV_2022.csv'


 63%|██████▎   | 80/126 [01:47<01:01,  1.34s/it]

Processing '..\..\..\pet_project\tables\data_for_19_OCT_2022.csv'


 64%|██████▍   | 81/126 [01:49<01:04,  1.44s/it]

Processing '..\..\..\pet_project\tables\data_for_20_DEC_2022.csv'


 65%|██████▌   | 82/126 [01:50<01:01,  1.39s/it]

Processing '..\..\..\pet_project\tables\data_for_20_JAN_2023.csv'


 66%|██████▌   | 83/126 [01:51<00:56,  1.31s/it]

Processing '..\..\..\pet_project\tables\data_for_20_NOV_2022.csv'


 67%|██████▋   | 84/126 [01:52<00:54,  1.29s/it]

Processing '..\..\..\pet_project\tables\data_for_20_OCT_2022.csv'


 67%|██████▋   | 85/126 [01:54<00:57,  1.40s/it]

Processing '..\..\..\pet_project\tables\data_for_21_DEC_2022.csv'


 68%|██████▊   | 86/126 [01:55<00:52,  1.31s/it]

Processing '..\..\..\pet_project\tables\data_for_21_JAN_2023.csv'


 69%|██████▉   | 87/126 [01:56<00:48,  1.24s/it]

Processing '..\..\..\pet_project\tables\data_for_21_NOV_2022.csv'


 70%|██████▉   | 88/126 [01:58<00:52,  1.38s/it]

Processing '..\..\..\pet_project\tables\data_for_21_OCT_2022.csv'


 71%|███████   | 89/126 [02:00<00:59,  1.60s/it]

Processing '..\..\..\pet_project\tables\data_for_22_DEC_2022.csv'


 71%|███████▏  | 90/126 [02:02<00:59,  1.65s/it]

Processing '..\..\..\pet_project\tables\data_for_22_JAN_2023.csv'


 72%|███████▏  | 91/126 [02:03<00:54,  1.55s/it]

Processing '..\..\..\pet_project\tables\data_for_22_NOV_2022.csv'


 73%|███████▎  | 92/126 [02:05<00:54,  1.60s/it]

Processing '..\..\..\pet_project\tables\data_for_22_OCT_2022.csv'


 74%|███████▍  | 93/126 [02:06<00:48,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_23_DEC_2022.csv'


 75%|███████▍  | 94/126 [02:07<00:47,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_23_JAN_2023.csv'


 75%|███████▌  | 95/126 [02:10<00:53,  1.73s/it]

Processing '..\..\..\pet_project\tables\data_for_23_NOV_2022.csv'


 76%|███████▌  | 96/126 [02:12<00:52,  1.75s/it]

Processing '..\..\..\pet_project\tables\data_for_23_OCT_2022.csv'


 77%|███████▋  | 97/126 [02:13<00:50,  1.73s/it]

Processing '..\..\..\pet_project\tables\data_for_24_DEC_2022.csv'


 78%|███████▊  | 98/126 [02:14<00:44,  1.57s/it]

Processing '..\..\..\pet_project\tables\data_for_24_JAN_2023.csv'


 79%|███████▊  | 99/126 [02:16<00:38,  1.43s/it]

Processing '..\..\..\pet_project\tables\data_for_24_NOV_2022.csv'


 79%|███████▉  | 100/126 [02:17<00:34,  1.31s/it]

Processing '..\..\..\pet_project\tables\data_for_24_OCT_2022.csv'


 80%|████████  | 101/126 [02:19<00:37,  1.52s/it]

Processing '..\..\..\pet_project\tables\data_for_25_DEC_2022.csv'


 81%|████████  | 102/126 [02:20<00:33,  1.41s/it]

Processing '..\..\..\pet_project\tables\data_for_25_JAN_2023.csv'


 82%|████████▏ | 103/126 [02:21<00:35,  1.52s/it]

Processing '..\..\..\pet_project\tables\data_for_25_NOV_2022.csv'


 83%|████████▎ | 104/126 [02:24<00:36,  1.68s/it]

Processing '..\..\..\pet_project\tables\data_for_25_OCT_2022.csv'


 83%|████████▎ | 105/126 [02:25<00:36,  1.74s/it]

Processing '..\..\..\pet_project\tables\data_for_26_DEC_2022.csv'


 84%|████████▍ | 106/126 [02:27<00:34,  1.75s/it]

Processing '..\..\..\pet_project\tables\data_for_26_NOV_2022.csv'


 85%|████████▍ | 107/126 [02:28<00:29,  1.58s/it]

Processing '..\..\..\pet_project\tables\data_for_26_OCT_2022.csv'


 86%|████████▌ | 108/126 [02:30<00:28,  1.58s/it]

Processing '..\..\..\pet_project\tables\data_for_27_DEC_2022.csv'


 87%|████████▋ | 109/126 [02:31<00:24,  1.42s/it]

Processing '..\..\..\pet_project\tables\data_for_27_NOV_2022.csv'


 87%|████████▋ | 110/126 [02:32<00:20,  1.30s/it]

Processing '..\..\..\pet_project\tables\data_for_27_OCT_2022.csv'


 88%|████████▊ | 111/126 [02:34<00:23,  1.59s/it]

Processing '..\..\..\pet_project\tables\data_for_28_DEC_2022.csv'


 89%|████████▉ | 112/126 [02:36<00:23,  1.68s/it]

Processing '..\..\..\pet_project\tables\data_for_28_JAN_2023.csv'
Processing '..\..\..\pet_project\tables\data_for_28_NOV_2022.csv'


 90%|█████████ | 114/126 [02:38<00:15,  1.30s/it]

Processing '..\..\..\pet_project\tables\data_for_28_OCT_2022.csv'


 91%|█████████▏| 115/126 [02:40<00:16,  1.46s/it]

Processing '..\..\..\pet_project\tables\data_for_29_DEC_2022.csv'


 92%|█████████▏| 116/126 [02:41<00:14,  1.47s/it]

Processing '..\..\..\pet_project\tables\data_for_29_JAN_2023.csv'


 93%|█████████▎| 117/126 [02:43<00:12,  1.40s/it]

Processing '..\..\..\pet_project\tables\data_for_29_NOV_2022.csv'


 94%|█████████▎| 118/126 [02:44<00:11,  1.44s/it]

Processing '..\..\..\pet_project\tables\data_for_29_OCT_2022.csv'


 94%|█████████▍| 119/126 [02:45<00:09,  1.43s/it]

Processing '..\..\..\pet_project\tables\data_for_30_DEC_2022.csv'


 95%|█████████▌| 120/126 [02:47<00:09,  1.54s/it]

Processing '..\..\..\pet_project\tables\data_for_30_JAN_2023.csv'


 96%|█████████▌| 121/126 [02:49<00:08,  1.69s/it]

Processing '..\..\..\pet_project\tables\data_for_30_NOV_2022.csv'


 97%|█████████▋| 122/126 [02:51<00:06,  1.62s/it]

Processing '..\..\..\pet_project\tables\data_for_30_OCT_2022.csv'


 98%|█████████▊| 123/126 [02:52<00:04,  1.59s/it]

Processing '..\..\..\pet_project\tables\data_for_31_DEC_2022.csv'


 98%|█████████▊| 124/126 [02:54<00:02,  1.48s/it]

Processing '..\..\..\pet_project\tables\data_for_31_JAN_2023.csv'


 99%|█████████▉| 125/126 [02:55<00:01,  1.37s/it]

Processing '..\..\..\pet_project\tables\data_for_31_OCT_2022.csv'


100%|██████████| 126/126 [02:56<00:00,  1.40s/it]


In [53]:
Path.mkdir?

[1;31mSignature:[0m [0mPath[0m[1;33m.[0m[0mmkdir[0m[1;33m([0m[0mself[0m[1;33m,[0m [0mmode[0m[1;33m=[0m[1;36m511[0m[1;33m,[0m [0mparents[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m [0mexist_ok[0m[1;33m=[0m[1;32mFalse[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m Create a new directory at this given path.
[1;31mFile:[0m      c:\program files\python38\lib\pathlib.py
[1;31mType:[0m      function


In [None]:
(file_path/"parquet").mkdir