In [2]:
import json
import numpy as np
import pandas as pd
import psycopg2
import os

from collections import Counter
from datetime import datetime, timedelta
from dateutil.tz import tzlocal 
from pathlib import Path
from tqdm import tqdm

## Analyse data

In [2]:
[p for p in kharkiv_folder_path.iterdir()]

[WindowsPath('../../../pet_project/kharkiv/new 1.txt'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_01_FEB_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_02_FEB_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_03_FEB_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_25_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_26_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_27_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_28_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_29_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_30_JAN_2023'),
 WindowsPath('../../../pet_project/kharkiv/trans_data_31_JAN_2023')]

In [61]:
[p for p in Path("../../../pet_project/kharkiv").iterdir()]

[WindowsPath('../../../pet_project/kharkiv/archive'),
 WindowsPath('../../../pet_project/kharkiv/new 1.txt')]

In [9]:
from concurrent.futures import ThreadPoolExecutor

def load_responses(resp_path):
    resp_tm = datetime.strptime(resp_path.name[6:-5], '%Y-%m-%d %H;%M;%S')
    resp_tm_str = resp_tm.replace(tzinfo=tzlocal()).isoformat()
    
    with open(resp_path) as f:
        resp = json.load(f)
        if ("rows" in resp) and (resp["rows"]): 
            return [row + [resp['timestamp'], resp_tm_str] 
                    for row in resp['rows']]
        else: 
            return []

def accumulate_responses_from_folder(folder_path):
    file_path_list = list(folder_path.iterdir())
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(tqdm(executor.map(load_responses, file_path_list),
                            total=len(file_path_list), mininterval=10, leave=False, 
                            desc="Accumulate responses from folder")
                          )
        resp_list = []
        for resp_ in results: 
            resp_list += resp_
        print(f"Accumulate responses from folder\n"\
              f"\t{len(resp_list)} / {len(resp_list)} [avg={len(resp_list)/len(resp_list):.2f}]")
    
    
    columns = ['imei', 'lat', 'lng', 'speed', 'gps_datetime_origin', 'orientation', 'route_name', 
               'route_type', 'vehicle_id', 'dd', 'gpstime', 'response_datetime'] 

    return pd.DataFrame(resp_list, columns=columns)
    
def clear_data(in_df):
    unique_data = []
    
    imei_list = in_df['imei'].value_counts().index
    imei_tqdm = tqdm(
        imei_list, total=len(imei_list), mininterval=10, leave=False, desc="Clear data"
    )
    
    for imei in imei_tqdm: 
        row_data = in_df[ in_df['imei'] == imei].values.tolist()
        result = [row_data[0]]
        for row0, row1 in zip(row_data[:-1], row_data[1:]):
            if row0[1:9] != row1[1:9]: 
                result += [row1]
        unique_data += result

    df_unique = pd.DataFrame(unique_data, columns=in_df.columns)
    print(f"Clear data\n\t{len(in_df)} / {len(df_unique)} [avg {len(in_df) / len(df_unique):.02f}]")
    return df_unique

In [72]:
kharkiv_folder_path = Path("../../data/local/jsons/kharkiv")
# kharkiv_folder_path = Path("../../../pet_project/kharkiv")
kharkiv_folder_list = [p for p in kharkiv_folder_path.iterdir() if "trans_data_" in p.name]
kharkiv_folders_list = sorted(kharkiv_folder_list,
                        key=lambda p: datetime.strptime(p.name[11:], '%d_%b_%Y'))
kharkiv_folders_list

[WindowsPath('../../data/local/jsons/kharkiv/trans_data_06_FEB_2023'),
 WindowsPath('../../data/local/jsons/kharkiv/trans_data_07_FEB_2023'),
 WindowsPath('../../data/local/jsons/kharkiv/trans_data_08_FEB_2023'),
 WindowsPath('../../data/local/jsons/kharkiv/trans_data_09_FEB_2023')]

In [71]:
[p for p in (kharkiv_folder_path/'archive').iterdir()]

[WindowsPath('../../data/local/jsons/kharkiv/archive/optimized'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/origin'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_03_FEB_2023_optimized.parquet'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_03_FEB_2023_origin.parquet'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_04_FEB_2023_optimized.parquet'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_04_FEB_2023_origin.parquet'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_05_FEB_2023_optimized.parquet'),
 WindowsPath('../../data/local/jsons/kharkiv/archive/trans_data_05_FEB_2023_origin.parquet')]

In [70]:
(kharkiv_folder_path/"archive/optimized").mkdir()

In [73]:
kharkiv_folder_path = Path("../../data/local/jsons/kharkiv")
# kharkiv_folder_path = Path("../../../pet_project/kharkiv")
kharkiv_folder_list = [p for p in kharkiv_folder_path.iterdir() if "trans_data_" in p.name]
kharkiv_folders_list = sorted(kharkiv_folder_list,
                        key=lambda p: datetime.strptime(p.name[11:], '%d_%b_%Y'))
# kharkiv_folders_list

for folder_path in tqdm(kharkiv_folders_list[:-1]):   
    print(f"Processing '{folder_path}'")
    df = accumulate_responses_from_folder(folder_path)
    df.to_parquet(folder_path.parent/'archive/origin'/(folder_path.name + '_origin.parquet'))
    
    df_u = clear_data(df)
    df_u.to_parquet(folder_path.parent/'archive/optimized'/(folder_path.name + '_optimized.parquet'))

  0%|          | 0/3 [00:00<?, ?it/s]

Processing '..\..\data\local\jsons\kharkiv\trans_data_06_FEB_2023'



Accumulate responses from folder:   0%|          | 0/16453 [00:00<?, ?it/s][A
Accumulate responses from folder:   9%|▊         | 1415/16453 [00:02<00:21, 707.40it/s][A
Accumulate responses from folder:  17%|█▋        | 2830/16453 [00:04<00:19, 695.72it/s][A
Accumulate responses from folder:  26%|██▌       | 4222/16453 [00:06<00:18, 662.27it/s][A
Accumulate responses from folder:  34%|███▎      | 5550/16453 [00:10<00:24, 449.85it/s][A
Accumulate responses from folder:  40%|████      | 6588/16453 [00:14<00:26, 377.19it/s][A
Accumulate responses from folder:  45%|████▌     | 7449/16453 [00:17<00:24, 366.34it/s][A
Accumulate responses from folder:  50%|█████     | 8248/16453 [00:19<00:23, 346.17it/s][A
Accumulate responses from folder:  55%|█████▌    | 9075/16453 [00:21<00:20, 362.24it/s][A
Accumulate responses from folder:  60%|█████▉    | 9838/16453 [00:24<00:18, 350.41it/s][A
Accumulate responses from folder:  65%|██████▍   | 10691/16453 [00:26<00:15, 369.97it/s][A
Accumulat

Accumulate responses from folder
	2266644 / 2266644 [avg=1.00]



Clear data:   0%|          | 0/279 [00:00<?, ?it/s][A
Clear data:  18%|█▊        | 50/279 [00:02<00:09, 24.31it/s][A
Clear data:  35%|███▌      | 99/279 [00:04<00:08, 21.02it/s][A
Clear data:  51%|█████     | 142/279 [00:06<00:06, 20.72it/s][A
Clear data:  66%|██████▌   | 184/279 [00:08<00:04, 20.62it/s][A
Clear data:  84%|████████▎ | 233/279 [00:10<00:02, 21.43it/s][A
                                                             [A

Clear data
	2266644 / 213429 [avg 10.62]


 33%|███▎      | 1/3 [01:10<02:20, 70.32s/it]

Processing '..\..\data\local\jsons\kharkiv\trans_data_07_FEB_2023'



Accumulate responses from folder:   0%|          | 0/17081 [00:00<?, ?it/s][A
Accumulate responses from folder:   8%|▊         | 1375/17081 [00:02<00:22, 686.63it/s][A
Accumulate responses from folder:  16%|█▌        | 2769/17081 [00:04<00:20, 692.54it/s][A
Accumulate responses from folder:  24%|██▍       | 4155/17081 [00:06<00:18, 682.93it/s][A
Accumulate responses from folder:  32%|███▏      | 5522/17081 [00:09<00:20, 566.37it/s][A
Accumulate responses from folder:  39%|███▉      | 6703/17081 [00:11<00:19, 530.95it/s][A
Accumulate responses from folder:  46%|████▌     | 7797/17081 [00:13<00:18, 508.96it/s][A
Accumulate responses from folder:  52%|█████▏    | 8835/17081 [00:16<00:16, 487.10it/s][A
Accumulate responses from folder:  58%|█████▊    | 9822/17081 [00:18<00:15, 472.40it/s][A
Accumulate responses from folder:  64%|██████▍   | 10928/17081 [00:20<00:12, 494.80it/s][A
Accumulate responses from folder:  70%|██████▉   | 11928/17081 [00:23<00:11, 448.27it/s][A
Accumula

Accumulate responses from folder
	2389542 / 2389542 [avg=1.00]



Clear data:   0%|          | 0/280 [00:00<?, ?it/s][A
Clear data:  15%|█▌        | 43/280 [00:02<00:12, 19.52it/s][A
Clear data:  30%|██▉       | 83/280 [00:04<00:10, 18.81it/s][A
Clear data:  43%|████▎     | 121/280 [00:06<00:08, 18.63it/s][A
Clear data:  57%|█████▋    | 159/280 [00:08<00:06, 18.72it/s][A
Clear data:  72%|███████▏  | 201/280 [00:10<00:04, 19.51it/s][A
Clear data:  91%|█████████ | 254/280 [00:12<00:01, 20.81it/s][A
 67%|██████▋   | 2/3 [02:07<01:02, 62.69s/it]                [A

Clear data
	2389542 / 227580 [avg 10.50]
Processing '..\..\data\local\jsons\kharkiv\trans_data_08_FEB_2023'



Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:   8%|▊         | 1416/17280 [00:02<00:22, 706.98it/s][A
Accumulate responses from folder:  16%|█▋        | 2830/17280 [00:04<00:20, 705.46it/s][A
Accumulate responses from folder:  25%|██▍       | 4241/17280 [00:06<00:18, 693.73it/s][A
Accumulate responses from folder:  33%|███▎      | 5629/17280 [00:09<00:19, 584.83it/s][A
Accumulate responses from folder:  40%|███▉      | 6840/17280 [00:12<00:22, 468.41it/s][A
Accumulate responses from folder:  45%|████▌     | 7854/17280 [00:15<00:22, 426.18it/s][A
Accumulate responses from folder:  51%|█████     | 8759/17280 [00:18<00:21, 394.83it/s][A
Accumulate responses from folder:  55%|█████▌    | 9583/17280 [00:20<00:20, 379.06it/s][A
Accumulate responses from folder:  60%|██████    | 10368/17280 [00:22<00:18, 382.34it/s][A
Accumulate responses from folder:  65%|██████▍   | 11148/17280 [00:25<00:16, 368.58it/s][A
Accumula

Accumulate responses from folder
	2490106 / 2490106 [avg=1.00]



Clear data:   0%|          | 0/278 [00:00<?, ?it/s][A
Clear data:  12%|█▏        | 34/278 [00:02<00:14, 16.94it/s][A
Clear data:  24%|██▍       | 68/278 [00:04<00:12, 16.34it/s][A
Clear data:  37%|███▋      | 103/278 [00:06<00:10, 16.77it/s][A
Clear data:  50%|█████     | 139/278 [00:08<00:08, 17.17it/s][A
Clear data:  63%|██████▎   | 175/278 [00:10<00:05, 17.29it/s][A
Clear data:  79%|███████▉  | 221/278 [00:12<00:02, 19.13it/s][A
                                                             [A

Clear data
	2490106 / 241456 [avg 10.31]


100%|██████████| 3/3 [03:14<00:00, 64.76s/it]


In [23]:
df

Unnamed: 0,imei,lat,lng,speed,gps_datetime_origin,orientation,route_name,route_type,vehcle_id,dd,gpstime,response_datetime
0,352625698666219,368439,962440,0,0,315,55,2,2736,23,2023-02-02T02:33:03+00:00,2023-02-02T04:33:15+02:00
1,352625698666219,368439,962440,0,0,315,55,2,2736,28,2023-02-02T02:33:08+00:00,2023-02-02T04:33:20+02:00
2,352625698666219,368439,962440,0,0,315,55,2,2736,33,2023-02-02T02:33:13+00:00,2023-02-02T04:33:25+02:00
3,352625698666219,368439,962440,0,0,315,55,2,2736,38,2023-02-02T02:33:18+00:00,2023-02-02T04:33:30+02:00
4,352625698666219,368439,962440,0,0,315,55,2,2736,43,2023-02-02T02:33:23+00:00,2023-02-02T04:33:35+02:00
...,...,...,...,...,...,...,...,...,...,...,...,...
1936189,354017114806397,368710,962059,0,0,316,55,2,2714,1788,2023-02-02T21:03:37+00:00,2023-02-02T23:03:50+02:00
1936190,354017114806397,368710,962059,0,0,316,55,2,2714,1793,2023-02-02T21:03:42+00:00,2023-02-02T23:03:55+02:00
1936191,354017114806397,368710,962059,0,0,316,55,2,2714,1798,2023-02-02T21:03:47+00:00,2023-02-02T23:04:00+02:00
1936192,354017114806397,368710,962059,0,0,316,55,2,2714,1803,2023-02-02T21:03:52+00:00,2023-02-02T23:04:05+02:00


In [25]:
df_1 = pd.read_parquet(folder_path.parent/'archive'/(folder_path.name + '_origin.parquet') )

In [26]:
df_1

Unnamed: 0,imei,lat,lng,speed,gps_datetime_origin,orientation,route_name,route_type,vehcle_id,dd,gpstime,response_datetime
0,352625698666219,368439,962440,0,0,315,55,2,2736,23,2023-02-02T02:33:03+00:00,2023-02-02T04:33:15+02:00
1,352625698666219,368439,962440,0,0,315,55,2,2736,28,2023-02-02T02:33:08+00:00,2023-02-02T04:33:20+02:00
2,352625698666219,368439,962440,0,0,315,55,2,2736,33,2023-02-02T02:33:13+00:00,2023-02-02T04:33:25+02:00
3,352625698666219,368439,962440,0,0,315,55,2,2736,38,2023-02-02T02:33:18+00:00,2023-02-02T04:33:30+02:00
4,352625698666219,368439,962440,0,0,315,55,2,2736,43,2023-02-02T02:33:23+00:00,2023-02-02T04:33:35+02:00
...,...,...,...,...,...,...,...,...,...,...,...,...
1936189,354017114806397,368710,962059,0,0,316,55,2,2714,1788,2023-02-02T21:03:37+00:00,2023-02-02T23:03:50+02:00
1936190,354017114806397,368710,962059,0,0,316,55,2,2714,1793,2023-02-02T21:03:42+00:00,2023-02-02T23:03:55+02:00
1936191,354017114806397,368710,962059,0,0,316,55,2,2714,1798,2023-02-02T21:03:47+00:00,2023-02-02T23:04:00+02:00
1936192,354017114806397,368710,962059,0,0,316,55,2,2714,1803,2023-02-02T21:03:52+00:00,2023-02-02T23:04:05+02:00


In [4]:
import PTETA
import importlib
# importlib.reload(PTETA.utils.transport.kharkiv)

try: del KharkivTransportAVLData
except: pass 
finally: from PTETA.utils.transport.kharkiv.KharkivTransportAVLData import KharkivTransportAVLData
    
try: del KharkivTransportOperator
except: pass 
finally: from PTETA.utils.transport.kharkiv.KharkivTransportOperator import KharkivTransportOperator
    
try: del KharkivTransportRoute
except: pass 
finally: from PTETA.utils.transport.kharkiv.KharkivTransportRoute import KharkivTransportRoute
    
try: del KharkivTransportVehicle
except: pass 
finally: from PTETA.utils.transport.kharkiv.KharkivTransportVehicle import KharkivTransportVehicle

try: del TransGPSCVMonitor
except: pass 
finally: from aws.src.TransGPSCVMonitor import TransGPSCVMonitor

In [13]:
folder_path

WindowsPath('../../../pet_project/kharkiv/trans_data_03_FEB_2023')

In [16]:
BATCH_SIZE = 3_000
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})

monitor = TransGPSCVMonitor(connection_config=connection_config, data_model="kharkiv")

# file_path_list =list(Path("../data/local/tables/").iterdir())
file_path_list =list(Path("D:/projects/pet_project/tables").iterdir())
file_path_list = sorted(file_path_list, 
                        key=lambda p: datetime.strptime(p.name[9:-4], '%d_%b_%Y') )

for folder_path in tqdm(kharkiv_folders_list[:-1]):
    
    df = accumulate_responses_from_folder(folder_path)
    df_u = clear_data(df)
    
#     df_sum = pd.read_csv(df_path, encoding='utf-8', low_memory=False)
        
#     if "response_datetime" not in df_sum: 
#         df_sum['response_datetime'] = None

#     if not df_sum['imei'].dtype == 'O':
#         df_sum['imei'] = df_sum['imei'].astype(str, copy=True)

#     if not df_sum['busNumber'].dtype == 'O':
#         df_sum['busNumber'] = df_sum['busNumber'].astype(str, copy=True)
        
    
    df_cur = df_u[:]

    batch_tqdm = tqdm(df_cur.groupby(np.arange(len(df_cur)) // BATCH_SIZE), 
                     mininterval=10)
    for batch_number, batch_df in batch_tqdm:
        batch_df = batch_df.where(pd.notnull(batch_df), None)
        monitor.write_to_db(batch_df.to_dict('records'))

  0%|          | 0/3 [00:00<?, ?it/s]
Accumulate responses from folder:   0%|          | 0/12739 [00:00<?, ?it/s][A
Accumulate responses from folder:   3%|▎         | 433/12739 [00:02<00:58, 209.60it/s][A
Accumulate responses from folder:   7%|▋         | 853/12739 [00:05<01:20, 147.01it/s][A
Accumulate responses from folder:   9%|▉         | 1168/12739 [00:07<01:20, 143.31it/s][A
Accumulate responses from folder:  12%|█▏        | 1465/12739 [00:10<01:30, 125.08it/s][A
Accumulate responses from folder:  14%|█▎        | 1724/12739 [00:13<01:36, 113.78it/s][A
Accumulate responses from folder:  15%|█▌        | 1958/12739 [00:17<01:53, 95.33it/s] [A
Accumulate responses from folder:  18%|█▊        | 2263/12739 [00:19<01:35, 109.17it/s][A
Accumulate responses from folder:  20%|█▉        | 2525/12739 [00:21<01:29, 113.90it/s][A
Accumulate responses from folder:  22%|██▏       | 2768/12739 [00:23<01:27, 114.56it/s][A
Accumulate responses from folder:  24%|██▍       | 3050/12739 [00:

Accumulate responses from folder
	1616588 / 1616588 [avg=1.00]



Clear data:   0%|          | 0/273 [00:00<?, ?it/s][A
Clear data:   8%|▊         | 23/273 [00:02<00:22, 11.36it/s][A
Clear data:  18%|█▊        | 48/273 [00:04<00:18, 11.89it/s][A
Clear data:  26%|██▋       | 72/273 [00:06<00:16, 11.93it/s][A
Clear data:  36%|███▋      | 99/273 [00:08<00:14, 12.06it/s][A
Clear data:  47%|████▋     | 127/273 [00:10<00:11, 12.61it/s][A
Clear data:  56%|█████▌    | 153/273 [00:12<00:09, 12.50it/s][A
Clear data:  66%|██████▌   | 180/273 [00:14<00:07, 12.52it/s][A
Clear data:  75%|███████▌  | 206/273 [00:17<00:05, 11.74it/s][A
Clear data:  86%|████████▌ | 235/273 [00:19<00:03, 12.44it/s][A
                                                             [A

Clear data
	1616588 / 153418 [avg 10.54]



  0%|          | 0/52 [00:00<?, ?it/s][A
 19%|█▉        | 10/52 [00:10<00:44,  1.05s/it][A
 42%|████▏     | 22/52 [00:20<00:27,  1.09it/s][A
100%|██████████| 52/52 [00:40<00:00,  1.28it/s][A
 33%|███▎      | 1/3 [02:53<05:46, 173.17s/it]
Accumulate responses from folder:   0%|          | 0/10997 [00:00<?, ?it/s][A
Accumulate responses from folder:   6%|▌         | 652/10997 [00:02<00:31, 325.93it/s][A
Accumulate responses from folder:  12%|█▏        | 1304/10997 [00:04<00:33, 287.63it/s][A
Accumulate responses from folder:  17%|█▋        | 1924/10997 [00:06<00:30, 296.95it/s][A
Accumulate responses from folder:  23%|██▎       | 2523/10997 [00:08<00:29, 291.25it/s][A
Accumulate responses from folder:  28%|██▊       | 3109/10997 [00:10<00:28, 276.51it/s][A
Accumulate responses from folder:  34%|███▍      | 3714/10997 [00:12<00:25, 284.65it/s][A
Accumulate responses from folder:  39%|███▉      | 4291/10997 [00:14<00:23, 285.74it/s][A
Accumulate responses from folder:  44%|███

Accumulate responses from folder
	740660 / 740660 [avg=1.00]



Clear data:   0%|          | 0/239 [00:00<?, ?it/s][A
Clear data:  33%|███▎      | 79/239 [00:02<00:04, 39.10it/s][A
Clear data:  72%|███████▏  | 171/239 [00:04<00:01, 43.07it/s][A
                                                             [A
  0%|          | 0/24 [00:00<?, ?it/s][A

Clear data
	740660 / 70474 [avg 10.51]
There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportVehicle.KharkivTransportVehicle'> to inserted in DB



100%|██████████| 24/24 [00:13<00:00,  1.74it/s][A
 67%|██████▋   | 2/3 [04:01<01:51, 111.68s/it]
Accumulate responses from folder:   0%|          | 0/17280 [00:00<?, ?it/s][A
Accumulate responses from folder:   4%|▍         | 669/17280 [00:02<00:49, 333.68it/s][A
Accumulate responses from folder:   8%|▊         | 1344/17280 [00:04<00:47, 335.91it/s][A
Accumulate responses from folder:  12%|█▏        | 2016/17280 [00:06<00:47, 322.15it/s][A
Accumulate responses from folder:  15%|█▌        | 2662/17280 [00:08<00:46, 316.67it/s][A
Accumulate responses from folder:  19%|█▉        | 3298/17280 [00:10<00:44, 315.36it/s][A
Accumulate responses from folder:  23%|██▎       | 3930/17280 [00:12<00:43, 307.21it/s][A
Accumulate responses from folder:  26%|██▋       | 4546/17280 [00:14<00:43, 291.63it/s][A
Accumulate responses from folder:  30%|██▉       | 5132/17280 [00:17<00:46, 261.55it/s][A
Accumulate responses from folder:  33%|███▎      | 5666/17280 [00:20<00:48, 240.18it/s][A
Accu

Accumulate responses from folder
	2075486 / 2075486 [avg=1.00]



Clear data:   0%|          | 0/241 [00:00<?, ?it/s][A
Clear data:  11%|█         | 27/241 [00:02<00:16, 13.02it/s][A
Clear data:  22%|██▏       | 54/241 [00:04<00:14, 13.15it/s][A
Clear data:  34%|███▎      | 81/241 [00:06<00:13, 12.19it/s][A
Clear data:  44%|████▍     | 106/241 [00:08<00:11, 11.93it/s][A
Clear data:  54%|█████▍    | 130/241 [00:10<00:09, 11.43it/s][A
Clear data:  63%|██████▎   | 153/241 [00:13<00:07, 11.17it/s][A
Clear data:  73%|███████▎  | 176/241 [00:15<00:05, 10.98it/s][A
Clear data:  83%|████████▎ | 199/241 [00:17<00:03, 11.09it/s][A
Clear data:  95%|█████████▌| 229/241 [00:19<00:00, 12.19it/s][A
                                                             [A
  0%|          | 0/68 [00:00<?, ?it/s][A

Clear data
	2075486 / 201332 [avg 10.31]



 15%|█▍        | 10/68 [00:10<00:59,  1.03s/it][A
 37%|███▋      | 25/68 [00:20<00:34,  1.24it/s][A

There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportVehicle.KharkivTransportVehicle'> to inserted in DB



 59%|█████▉    | 40/68 [00:33<00:23,  1.19it/s][A
 76%|███████▋  | 52/68 [00:43<00:13,  1.19it/s][A
100%|██████████| 68/68 [00:57<00:00,  1.19it/s][A
100%|██████████| 3/3 [06:44<00:00, 134.70s/it]


In [6]:
monitor.objects_unique

{PTETA.utils.transport.kharkiv.KharkivTransportOperator.KharkivTransportOperator: {KharkivTransportOperator(id=-1, name='UNKNOWN')},
 PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute: {KharkivTransportRoute(id=-1),
  KharkivTransportRoute(id=1),
  KharkivTransportRoute(id=10),
  KharkivTransportRoute(id=11),
  KharkivTransportRoute(id=12),
  KharkivTransportRoute(id=13),
  KharkivTransportRoute(id=14),
  KharkivTransportRoute(id=15),
  KharkivTransportRoute(id=16),
  KharkivTransportRoute(id=17),
  KharkivTransportRoute(id=18),
  KharkivTransportRoute(id=19),
  KharkivTransportRoute(id=2),
  KharkivTransportRoute(id=20),
  KharkivTransportRoute(id=21),
  KharkivTransportRoute(id=22),
  KharkivTransportRoute(id=23),
  KharkivTransportRoute(id=24),
  KharkivTransportRoute(id=25),
  KharkivTransportRoute(id=5),
  KharkivTransportRoute(id=6),
  KharkivTransportRoute(id=7),
  KharkivTransportRoute(id=8),
  KharkivTransportRoute(id=9)},
 PTETA.utils.transport.kharkiv

In [7]:
operator_list, route_list, vehicle_list, avl_data_list = monitor.decompose_response(batch_df.to_dict('records'))

for obj_list in [operator_list, route_list, vehicle_list]:
    new_obj = monitor.get_new_objs(obj_list)
    if new_obj:
        print(f"There are {len(new_obj)} new {new_obj[0].__class__} to inserted in DB")
        monitor.update_db(new_obj)

for i, (vehicle, route) in enumerate(zip(vehicle_list, route_list)):
    avl_data_list[i].vehicle_id = monitor.vehicle_to_id[vehicle]
    avl_data_list[i].route_id = monitor.route_to_id[route]


There are 1 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> to inserted in DB


KeyError: KharkivTransportRoute(id=None)

In [8]:
i, (vehicle, route)

(732,
 (KharkivTransportVehicle(id=None, imei='353976014249548', name='None', owner_id=-1),
  KharkivTransportRoute(id=None)))

In [9]:
route.id, route.name, route.type

(None, '27', 1)

In [None]:
route.__

In [10]:
[(r.id, r.name, r.type) for r in monitor.objects_unique[route.__class__]]

[(8, '8', 1),
 (25, '28', 1),
 (9, '208', 3),
 (16, '3', 2),
 (23, '75', 3),
 (6, '204', 3),
 (22, '68', 3),
 (15, '206', 3),
 (14, '212', 3),
 (19, '51', 2),
 (-1, 'UNKNOWN', -1),
 (17, '1', 2),
 (21, '20', 1),
 (18, '6', 2),
 (7, '45', 2),
 (12, '35', 2),
 (11, '46', 2),
 (1, '52', 2),
 (2, '55', 2),
 (5, '24', 2),
 (20, '13', 2),
 (13, '72', 3),
 (10, '34', 2),
 (24, '27', 2)]

NameError: name 'batch_df' is not defined

In [7]:
#     df_cur = df_u[:]

batch_tqdm = tqdm(df_cur.groupby(np.arange(len(df_cur)) // BATCH_SIZE), 
                  miniters=40)
for batch_number, batch_df in batch_tqdm:
    batch_df = batch_df.where(pd.notnull(batch_df), None)
    monitor.write_to_db(batch_df.to_dict('records'))

  0%|          | 0/124 [00:00<?, ?it/s]

There are 2 new <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> to inserted in DB
Error raised while select <class 'PTETA.utils.transport.kharkiv.KharkivTransportRoute.KharkivTransportRoute'> '[KharkivTransportRoute(id=None, name='52', type=2), KharkivTransportRoute(id=None, name='55', type=2)]'





SyntaxError: syntax error at or near "AND"
LINE 1: SELECT "name", "type" FROM kharkiv.route  WHERE ( AND "name"...
                                                          ^


In [32]:
from psycopg2.extras import RealDictCursor
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})
conn = psycopg2.connect(**connection_config)

# cur = connection.cursor(cursor_factory = RealDictCursor)

In [54]:
with conn.cursor() as cur : 
    cur.execute('SELECT id, "name", "type" FROM kharkiv.route;')
    columns = [desc[0] for desc in cur.description]
    real_dict = [dict(zip(columns, row)) for row in cur.fetchall()]
    print(real_dict)

[{'id': -1, 'name': 'UNKNOWN', 'type': -1}, {'id': 1, 'name': '52', 'type': 2}, {'id': 2, 'name': '55', 'type': 2}, {'id': 3, 'name': '24', 'type': 2}, {'id': 4, 'name': '204', 'type': 3}, {'id': 5, 'name': '45', 'type': 2}, {'id': 6, 'name': '8', 'type': 1}, {'id': 7, 'name': '208', 'type': 3}, {'id': 8, 'name': '34', 'type': 2}, {'id': 9, 'name': '46', 'type': 2}, {'id': 10, 'name': '35', 'type': 2}, {'id': 11, 'name': '72', 'type': 3}, {'id': 12, 'name': '212', 'type': 3}, {'id': 13, 'name': '206', 'type': 3}, {'id': 14, 'name': '3', 'type': 2}, {'id': 15, 'name': '1', 'type': 2}, {'id': 16, 'name': '6', 'type': 2}, {'id': 17, 'name': '51', 'type': 2}, {'id': 18, 'name': '13', 'type': 2}, {'id': 19, 'name': '20', 'type': 1}, {'id': 20, 'name': '68', 'type': 3}, {'id': 21, 'name': '75', 'type': 3}, {'id': 22, 'name': '27', 'type': 2}, {'id': 23, 'name': '28', 'type': 1}, {'id': 24, 'name': '27', 'type': 1}, {'id': 25, 'name': '281', 'type': 3}, {'id': 26, 'name': '119', 'type': 2}, {

In [55]:
**real_dict

SyntaxError: invalid syntax (2672971826.py, line 1)

['id', 'name']

In [37]:
a = [r for r in cur]

In [40]:
[item for item in a[0]]

['id', 'name']

In [43]:
b = a[0]

In [45]:
b.items()

odict_items([('id', -1), ('name', 'UNKNOWN')])

## TransportRoute

In [7]:
TransportRoute.__insert_columns__().replace('"', '').split(', ')[1:]

['route_name', 'route_colour']

In [28]:
cols = ["routeId", 'routeName', 'routeColour']
route_list = [TransportRoute.from_response_row(row) for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(route_list), route_list[:3]
TransportRoute.insert_many_in_table(conn, route_list)

In [32]:
TransportRoute.get_table(conn)

[TransportRoute(id=37, name='T', colour='coral'),
 TransportRoute(id=31, name='6/6a', colour='deeppink'),
 TransportRoute(id=21, name='38', colour='deeppink'),
 TransportRoute(id=41, name='10A', colour='black'),
 TransportRoute(id=20, name='A', colour='navy'),
 TransportRoute(id=42, name='39', colour='coral'),
 TransportRoute(id=23, name='19', colour='teal'),
 TransportRoute(id=19, name='10', colour='black'),
 TransportRoute(id=11, name='9', colour='magenta'),
 TransportRoute(id=2, name='11', colour='green'),
 TransportRoute(id=6, name='4', colour='magenta'),
 TransportRoute(id=27, name='3/3a', colour='green'),
 TransportRoute(id=16, name='2', colour='green'),
 TransportRoute(id=4, name='5', colour='orange'),
 TransportRoute(id=3, name='12', colour='blue'),
 TransportRoute(id=12, name='20', colour='maroon'),
 TransportRoute(id=7, name='6', colour='sienna'),
 TransportRoute(id=10, name='34', colour='navy'),
 TransportRoute(id=45, name='1', colour='navy'),
 TransportRoute(id=9, name='27'

In [13]:
# df_sum[TransportRoute.__insert_columns__().replace('"', '').split(', ')[1:]].value_counts()

In [27]:
conn.rollback()

In [45]:
SQL_big_req = " ".join([create_sql_req(t) for t in trans_vehicle_list])

In [29]:
route_list = TransportRoute.get_table(conn)[:]
len(route_list)

31

In [30]:
for r in route_list[::2]: 
    r.id += 1
TransportRoute.are_in_table(conn, route_list)

[False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False]

In [31]:
TransportRoute.are_in_table(conn, route_list)

[False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False]

In [14]:
for r in route_list: 
    r.name += "_"
    r.id += 100

In [16]:
# sql = f"""INSERT INTO pteta.route("id", "routeName", "routeColour") VALUES """ + \
#               ", ".join([f"""({obj.id}, '{obj.name}', '{obj.colour}')"""
#                         for obj in route_list]) + ";"
# sql

In [17]:
# route_list

In [18]:
TransportRoute.insert_many_in_table(conn, route_list)

## Test TransportVehicle

In [33]:
TransportVehicle.__insert_columns__()

'"imei", "name", "bus_number", "remark", "perev_id"'

In [45]:
cols = ["imei", "name", "busNumber", "remark", "perevId"]
vehicle_list = [TransportVehicle.from_response_row(row) for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(vehicle_list), vehicle_list[:3]
TransportVehicle.insert_many_in_table(conn, vehicle_list)

In [47]:
vehicle_list = TransportVehicle.get_table(conn)
len(vehicle_list)

99

In [48]:
TransportVehicle.are_in_table(conn, vehicle_list[:5])

[True, True, True, True, True]

In [49]:
vehicle_list[5].is_in_table(conn)

True

In [50]:
obj = vehicle_list[0]
obj.imei += "-"
obj.is_in_table(conn)

False

In [51]:
obj.insert_in_table(conn)

In [52]:
for v in vehicle_list[:10]: 
    v.imei += '_'

In [15]:
TransportVehicle.insert_many_in_table(conn, vehicle_list[:10])

In [31]:
obj.insert_in_table(conn)
obj.is_in_table(conn)

In [14]:
vehicle_list[0].id = None
print(vehicle_list[0])
vehicle_list[0].update_id_from_table(conn)
print(vehicle_list[0])

TransportVehicle(id=None, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)
TransportVehicle(id=1, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)


In [34]:
for v in vehicle_list[:10]: 
    v.imei += '__'

vehicle_list[:13]

[TransportVehicle(id=222, imei='355227045600830-__', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37),
 TransportVehicle(id=2, imei='355227046451662__', name='H76', busNumber='350', remark='Тролейбус 350 DNSNTNK', perevId=6, routeId=31),
 TransportVehicle(id=3, imei='355227045369527__', name='A6', busNumber='3557', remark='3557 DNSNK ', perevId=12, routeId=21),
 TransportVehicle(id=4, imei='355227045540176__', name='A83', busNumber='5150', remark=' 5150 DNS', perevId=7, routeId=41),
 TransportVehicle(id=5, imei='355227046453387__', name='H75', busNumber='3627', remark='3627 DNSNTNK', perevId=1, routeId=20),
 TransportVehicle(id=6, imei='355228042084283__', name='A207', busNumber='1032', remark='1032 DNSNK', perevId=13, routeId=42),
 TransportVehicle(id=7, imei='355227045371655__', name='A1', busNumber='6513', remark='6513 DNSNK', perevId=7, routeId=23),
 TransportVehicle(id=8, imei='355227046451407__', name='H68', busNumber='0855', remark='0855 DNSNTNK

## TransportOperator

In [41]:
operator_list = TransportOperator.get_table(conn)
len(operator_list)

6

In [37]:
TransportOperator.__insert_columns__()

'id, "perev_name"'

In [40]:
cols = ['perevId', "perevName"]
operator_list = [TransportOperator.from_response_row(row) 
              for row in df_sum[cols].drop_duplicates().to_dict('records')]
len(operator_list), operator_list[:3]
TransportOperator.insert_many_in_table(conn, operator_list)

In [42]:
TransportOperator.are_in_table(conn, operator_list[:])

[True, True, True, True, True, True]

In [43]:
operator_list[0].is_in_table(conn)

True

In [44]:
obj = operator_list[0]
obj.id += 100
obj.name = "-_-" + obj.name
obj.is_in_table(conn)

False

In [14]:
obj.insert_in_table(conn)

In [15]:
for op in operator_list: 
    op.id += 100
    op.name = "-_-" + op.name
    
TransportOperator.insert_many_in_table(conn, operator_list)

## TransportAVLData

In [9]:
avl_data_list = TransportAVLData.get_table(conn)

In [11]:
# avl_data_list

In [14]:
%%timeit
TransportAVLData.are_in_table(conn, avl_data_list)

57.4 ms ± 3.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
%%timeit
avl_data_list[0].is_in_table(conn)

42.1 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
