In [1]:
from datetime import datetime, timedelta
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd

from collections import Counter

## Analyse data

In [2]:
# df_sum = None
# for df_path in tqdm(list(Path("../data/local/tables/").iterdir())[-10:]):
#     if df_sum is None: 
#         df_sum = pd.read_csv(df_path, encoding='utf-8', low_memory=False)
#     else : 
#         df_sum.append(pd.read_csv(df_path, encoding='utf-8', low_memory=False))

In [3]:
df_sum[df_sum.duplicated()]

NameError: name 'df_sum' is not defined

In [4]:
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
import pytz

import json
import requests
import os
import time
# from tqdm.auto import tqdm


from PTETA.utils.transport.TransportOperator import TransportOperator
from PTETA.utils.transport.TransportRoute import TransportRoute
from PTETA.utils.transport.TransportVehicle import TransportVehicle
from PTETA.utils.transport.TransportAVLData import TransportAVLData
from psycopg2.extensions import connection as Connection
import psycopg2

In [5]:
from apscheduler.schedulers.background import BackgroundScheduler

response_prev = dict()

try: 
    del TransGPSCVMonitor
except: 
    pass

class TransGPSCVMonitor:
    
    db_connection: Connection = None
        
    operator_to_id : dict = dict()
    id_to_operator : dict = dict()
        
    route_to_id : dict = dict()
    id_to_route : dict = dict()

    vehicle_to_id : dict = dict()
    id_to_vehicle : dict = dict()
        
        
    def __init__(self, connection_config: dict, **kwarg: dict) -> None: 
        self.db_connection = psycopg2.connect(**connection_config)
        
        self.reloadTransportOperators()   
        self.reloadTransportRoutes()
        self.reloadTransportVehicles()
        
        self.datetime_format = '%Y-%m-%d %H:%M:%S'
        
        self.REQUEST_URI = \
            kwarg.get('REQUEST_URI', 'http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr=')
        self.START_DATE = \
            kwarg.get('START_DATE', (datetime.now() - timedelta(days=1)).strftime(self.datetime_format))
        self.END_DATE = \
            kwarg.get('END_DATE', (datetime.now() + timedelta(days=30)).strftime(self.datetime_format))
        self.REQ_TIME_DELTA = kwarg.get('REQ_TIME_DELTA',  1.1)
        
        
    def reloadTransportOperators(self):
        operator_list = TransportOperator.get_table(self.db_connection)
        self.operator_to_id = dict({operator : operator.id for operator in operator_list})
        self.id_to_operator = dict({operator.id : operator for operator in operator_list})
        
    def reloadTransportRoutes(self):
        route_list = TransportRoute.get_table(self.db_connection)
        self.route_to_id = dict({route : route.id for route in route_list})
        self.id_to_route = dict({route.id : route for route in route_list})
    
    def reloadTransportVehicles(self):
        vehicle_list = TransportVehicle.get_table(self.db_connection)
        self.vehicle_to_id = dict({vehicle : vehicle.id for vehicle in vehicle_list})
        self.id_to_vehicle = dict({vehicle.id : vehicle for vehicle in vehicle_list})

    @classmethod
    def request_data(cls, request_uri='http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr='):
        dt_now = datetime.now()
        dt_tz_now = datetime.utcnow().replace(tzinfo=pytz.utc)

        try:
            request = requests.get(request_uri)
            if (request is None) or (request.text is None):
                return
            response_cur = json.loads(request.text)

            global response_prev

            keys_prev = set(response_prev.keys())
            keys_cur = set(response_cur.keys())

            optimized_data_list = list()
            for imei in keys_prev.intersection(keys_cur):
                if response_prev[imei]['gpstime'] != response_cur[imei]['gpstime']:
                    response_cur[imei]['response_datetime'] = dt_tz_now
                    optimized_data_list += [response_cur[imei]]

            for imei in keys_cur.difference(keys_prev):
                response_cur[imei]['response_datetime'] = dt_tz_now
                optimized_data_list += [response_cur[imei]]
                
            response_prev = response_cur
        except (requests.Timeout, requests.ConnectionError, requests.HTTPError) as err:
            print(f"{dt_now.strftime('%Y-%m-%d %H;%M;%S')} : error while trying to GET data\n"
                  f"\t{err}\n")

        print(dt_now.strftime('%Y-%m-%d %H;%M;%S'), len(optimized_data_list))
        return optimized_data_list
    
    def run(self):
        scheduler =  BackgroundScheduler(job_defaults={'max_instances': 8})  
        scheduler.add_job(
                self.request_data,
                'interval',
                seconds=self.REQ_TIME_DELTA,
                end_date=self.END_DATE,
                id='listener')

        scheduler.start()
            
        try:
            print('Scheduler started!')
            display_files_num = True 
            prev_update = datetime.now().replace(minute=0, second=0, microsecond=0)
            while 1:
                time.sleep(10)
                print(datetime.now())
        except KeyboardInterrupt:
            if scheduler.state:
                scheduler.shutdown()

In [6]:
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})

monitor = TransGPSCVMonitor(connection_config)

req = monitor.request_data()
len(req)

2022-12-04 02;24;01 7


7

In [7]:
def decompose(req): 
    trans_operator = TransportOperator(req['perevId'], req['perevName'])
    trans_route = TransportRoute(req['routeId'], req['routeName'], req['routeColour'])
    
    trans_vehicle = TransportVehicle(
        None, req['imei'], req['name'], req['busNumber'], req['remark'], req['perevId'], req['routeId'] )
    
    trans_AVLData = TransportAVLData(
        req['lat'], req['lng'], req['speed'], 
        req['orientation'], req['gpstime'], req['inDepo'],
        None, req['response_datetime']
    )
    
    return trans_operator, trans_route, trans_vehicle, trans_AVLData

def write_to_DB(conn, req, verbose=0): 
    trans_operator, trans_route, trans_vehicle, trans_AVLData = decompose(req)
    
    if not trans_operator in monitor.operator_to_id:
        if not trans_operator.is_in_table(conn): 
            trans_operator.insert_in_table(conn)
        monitor.reloadTransportOperators()
        if verbose > 0: 
            print("New trans_operator inserted in DB" )
        if not trans_operator.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_operator is in ")
            
    if not trans_route in monitor.route_to_id:
        if not trans_route.is_in_table(conn): 
            trans_route.insert_in_table(conn)
        monitor.reloadTransportRoutes()
        if verbose > 0: 
            print("New trans_route inserted in DB" )
        if not trans_route.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_route is in ")
        
    if not trans_vehicle in monitor.vehicle_to_id:
        if not trans_vehicle.is_in_table(conn): 
            trans_vehicle.insert_in_table(conn)
        monitor.reloadTransportVehicles()
        if verbose > 0: 
            print("New trans_vehicle inserted in DB" )
        if not trans_vehicle.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_vehicle is in ")
        
    trans_vehicle.update_id_from_table(conn)
    trans_AVLData.vehicleId = trans_vehicle.id
    
    if not trans_AVLData.is_in_table(conn): 
        trans_AVLData.insert_in_table(conn)
        if verbose > 0: 
            print("trans_AVLData inserted in DB")
    else : 
        if verbose > 0: 
            print("trans_AVLData is already in DB")
    

In [8]:
conn = psycopg2.connect(
    host=os.environ['RDS_HOSTNAME'],
    database="pteta_db",
    user="postgres",
    password=os.environ['RDS_PTETA_DB_PASSWORD'])

In [9]:
decompose(req[0])[3]

TransportAVLData(lat=48.265935, lng=25.98969, speed='000.0', orientation='000.00', gpstime='2022-12-04 01:22:07', inDepo=True, vehicleId=None, response_datetime=datetime.datetime(2022, 12, 4, 0, 23, 13, 561571, tzinfo=<UTC>))

In [12]:
# len([decompose(r)[-1] for r in req 
#      if decompose(r)[-1].imei not in set([k.imei for k in monitor.vehicle_to_id.keys() ])])

compare by TransportVehicle
42 not in DB
55 in DB 

compare by imei
18 not in DB
79 in DB 

In [39]:
decompose(req[0])[-1]

TransportVehicle(id=None, imei='355227046507810', name='A199', busNumber='0359', remark='0359 DNSNK', perevId=13, routeId=11)

In [23]:
for i, row in tqdm(enumerate(req)): 
#     print(f"{i})")
    write_to_DB(conn, row)
#     print(f"{'='*20}\n")

97it [00:14,  6.90it/s]


In [22]:
trans_vehicle_list = [decompose(row)[2] for row in req]
len(trans_vehicle_list)

100

In [27]:
# trans_AVLData_list

In [41]:
monitor.vehicle_to_id[decompose(req[0])[-1]]
# [decompose(req[2])[-1]]

149

In [39]:
%%timeit
a = [t.is_in_table(conn) for t in trans_vehicle_list]

7.12 s ± 766 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [70]:
conn.rollback()

In [45]:
SQL_big_req = " ".join([create_sql_req(t) for t in trans_vehicle_list])

In [13]:
route_list = TransportRoute.get_table(conn)[:]
len(route_list)

33

In [24]:
%%timeit
[r.is_in_table(conn) for r in route_list]

2.36 s ± 389 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
TransportRoute.is_set_in_table(conn, route_list)

66.7 ms ± 5.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
# for r in route_list[::2]: 
#     r.id += 1
# TransportRoute.are_in_table(conn, route_list)

In [19]:
# TransportRoute.are_in_table(conn, route_list)

In [14]:
for r in route_list: 
    r.name += "_"
    r.id += 100

In [16]:
# sql = f"""INSERT INTO pteta.route("id", "routeName", "routeColour") VALUES """ + \
#               ", ".join([f"""({obj.id}, '{obj.name}', '{obj.colour}')"""
#                         for obj in route_list]) + ";"
# sql

In [17]:
# route_list

In [18]:
TransportRoute.insert_many_in_table(conn, route_list)

## Test TransportVehicle

In [11]:
vehicle_list = TransportVehicle.get_table(conn)
len(vehicle_list)

201

In [16]:
obj = vehicle_list[0]
obj.imei += "-"
obj.is_in_table(conn)

False

In [17]:
%%timeit
[r.is_in_table(conn) for r in vehicle_list]

14.4 s ± 1.81 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [20]:
%%timeit
TransportVehicle.are_in_table(conn, vehicle_list)

99.8 ms ± 36.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
# TransportVehicle.are_in_table(conn, vehicle_list)

In [24]:
obj

TransportVehicle(id=222, imei='355227045600830-', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)

In [31]:
obj.insert_in_table(conn)
obj.is_in_table(conn)

In [10]:
TransportVehicle.insert_many_in_table(conn, vehicle_list[:10])

INSERT INTO pteta.vehicle("imei", "name", "busNumber", "remark", "perevId", "routeId") VALUES('355227045600830', 'A178', '310', 'Тролейбус 310 DNSNK', 6,  37), ('355227046451662', 'H76', '350', 'Тролейбус 350 DNSNTNK', 6,  31), ('355227045369527', 'A6', '3557', '3557 DNSNK ', 12,  21), ('355227045540176', 'A83', '5150', ' 5150 DNS', 7,  41), ('355227046453387', 'H75', '3627', '3627 DNSNTNK', 1,  20), ('355228042084283', 'A207', '1032', '1032 DNSNK', 13,  42), ('355227045371655', 'A1', '6513', '6513 DNSNK', 7,  23), ('355227046451407', 'H68', '0855', '0855 DNSNTNK', 13,  42), ('355227046578332', 'A189', '6518', '6518 DNSNK ', 7,  19), ('355228043199379', 'H99', '1176', '1176 DNSNTNK', 12,  21);


In [14]:
vehicle_list[0].id = None
print(vehicle_list[0])
vehicle_list[0].update_id_from_table(conn)
print(vehicle_list[0])

TransportVehicle(id=None, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)
TransportVehicle(id=1, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)


In [34]:
for v in vehicle_list[:10]: 
    v.imei += '__'

vehicle_list[:13]

[TransportVehicle(id=222, imei='355227045600830-__', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37),
 TransportVehicle(id=2, imei='355227046451662__', name='H76', busNumber='350', remark='Тролейбус 350 DNSNTNK', perevId=6, routeId=31),
 TransportVehicle(id=3, imei='355227045369527__', name='A6', busNumber='3557', remark='3557 DNSNK ', perevId=12, routeId=21),
 TransportVehicle(id=4, imei='355227045540176__', name='A83', busNumber='5150', remark=' 5150 DNS', perevId=7, routeId=41),
 TransportVehicle(id=5, imei='355227046453387__', name='H75', busNumber='3627', remark='3627 DNSNTNK', perevId=1, routeId=20),
 TransportVehicle(id=6, imei='355228042084283__', name='A207', busNumber='1032', remark='1032 DNSNK', perevId=13, routeId=42),
 TransportVehicle(id=7, imei='355227045371655__', name='A1', busNumber='6513', remark='6513 DNSNK', perevId=7, routeId=23),
 TransportVehicle(id=8, imei='355227046451407__', name='H68', busNumber='0855', remark='0855 DNSNTNK