In [7]:
from datetime import datetime, timedelta
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import os

from collections import Counter

## Analyse data

In [17]:
# df_sum = None
# for df_path in tqdm(list(Path("../data/local/tables/").iterdir())[-10:]):
#     if df_sum is None: 
#         df_sum = pd.read_csv(df_path, encoding='utf-8', low_memory=False)
#     else : 
#         df_sum.append(pd.read_csv(df_path, encoding='utf-8', low_memory=False))

In [3]:
df_sum[df_sum.duplicated()]

NameError: name 'df_sum' is not defined

In [4]:
from datetime import datetime, timedelta
import pytz

import json
import requests
import time

from PTETA.utils.transport.TransportOperator import TransportOperator
from PTETA.utils.transport.TransportRoute import TransportRoute
from PTETA.utils.transport.TransportVehicle import TransportVehicle
from PTETA.utils.transport.TransportAVLData import TransportAVLData
from psycopg2.extensions import connection as Connection
import psycopg2

from apscheduler.schedulers.background import BackgroundScheduler
from PTETA.utils.transport.BaseDBAccessDataclass import BaseDBAccessDataclass
from typing import List, Union
from PTETA.utils.transport.BaseDBAccessDataclass import BaseDBAccessDataclass

In [23]:
response_prev = dict()

try: 
    del TransGPSCVMonitor
except: 
    pass

class TransGPSCVMonitor:
    db_connection: Connection = None

    operator_to_id: dict = dict()
    route_to_id: dict = dict()
    vehicle_to_id: dict = dict()

    objects_unique = {
        TransportOperator: set(),
        TransportRoute: set(),
        TransportVehicle: set()
    }

    def __init__(self, connection_config: dict, **kwarg: dict) -> None:
        self.db_connection = psycopg2.connect(**connection_config)

        self.reload_operators()
        self.reload_routes()
        self.reload_vehicles()

        self.datetime_format = '%Y-%m-%d %H:%M:%S'

        self.REQUEST_URI = \
            kwarg.get('REQUEST_URI', 'http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr=')
        self.START_DATE = \
            kwarg.get('START_DATE', (datetime.now() - timedelta(days=1)).strftime(self.datetime_format))
        self.END_DATE = \
            kwarg.get('END_DATE', (datetime.now() + timedelta(days=30)).strftime(self.datetime_format))
        self.REQ_TIME_DELTA = kwarg.get('REQ_TIME_DELTA', 1.1)

    def reload_operators(self):
        operator_list = TransportOperator.get_table(self.db_connection)
        self.objects_unique[TransportOperator] = set(operator_list)
        self.operator_to_id = dict({operator: operator.id for operator in operator_list})

    def reload_routes(self):
        route_list = TransportRoute.get_table(self.db_connection)
        self.objects_unique[TransportRoute] = set(route_list)
        self.route_to_id = dict({route: route.id for route in route_list})

    def reload_vehicles(self):
        vehicle_list = TransportVehicle.get_table(self.db_connection)
        self.objects_unique[TransportVehicle] = set(vehicle_list)
        self.vehicle_to_id = dict({vehicle: vehicle.id for vehicle in vehicle_list})

    @classmethod
    def request_data(cls, request_uri='http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr='):
        dt_now = datetime.now()
        dt_tz_now = datetime.utcnow().replace(tzinfo=pytz.utc)

        try:
            request = requests.get(request_uri)
            if (request is None) or (request.text is None):
                return
            response_cur = json.loads(request.text)

            global response_prev

            keys_prev = set(response_prev.keys())
            keys_cur = set(response_cur.keys())

            optimized_data_list = list()
            for imei in keys_prev.intersection(keys_cur):
                if response_prev[imei]['gpstime'] != response_cur[imei]['gpstime']:
                    response_cur[imei]['response_datetime'] = dt_tz_now
                    optimized_data_list += [response_cur[imei]]

            for imei in keys_cur.difference(keys_prev):
                response_cur[imei]['response_datetime'] = dt_tz_now
                optimized_data_list += [response_cur[imei]]

            response_prev = response_cur
        except (requests.Timeout, requests.ConnectionError, requests.HTTPError) as err:
            print(f"{dt_now.strftime('%Y-%m-%d %H;%M;%S')} : error while trying to GET data\n"
                  f"\t{err}\n")

        #         print(dt_now.strftime('%Y-%m-%d %H;%M;%S'), len(optimized_data_list))
        return optimized_data_list

    def get_new_objs(self, obj_list: List[BaseDBAccessDataclass]) -> List[BaseDBAccessDataclass]:
        unique_obj_list = list(set(obj_list))
        return [obj for obj in unique_obj_list
                if obj not in self.objects_unique[obj.__class__]]

    def update_db(self, obj_list: List[BaseDBAccessDataclass]):
        current_class = obj_list[0].__class__
        are_in_db_list = current_class.are_in_table(self.db_connection, obj_list)

        obj_to_insert = [obj
                         for obj, is_in in zip(obj_list, are_in_db_list)
                         if not is_in]

        current_class.insert_many_in_table(self.db_connection, obj_to_insert)

    @classmethod
    def decompose_response(cls, response: List[dict]) -> Union:
        operator_list, route_list = list(), list()
        vehicle_list, avl_data_list = list(), list()

        for row in response:
            operator_list.append(TransportOperator.from_response_row(row))
            route_list.append(TransportRoute.from_response_row(row))
            vehicle_list.append(TransportVehicle.from_response_row(row))
            avl_data_list.append(TransportAVLData.from_response_row(row))

        return operator_list, route_list, vehicle_list, avl_data_list

    def write_to_db(self, response):
        operator_list, route_list, vehicle_list, avl_data_list = self.decompose_response(response)

        for obj_list in [operator_list, route_list, vehicle_list]:
            new_obj = self.get_new_objs(obj_list)
            if new_obj:
                print(f"There are {len(new_obj)} new {new_obj[0].__class__} to inserted in DB")
                self.update_db(new_obj)
                if isinstance(new_obj[0], TransportOperator):
                    self.reload_operators()
                elif isinstance(new_obj[0], TransportRoute):
                    self.reload_routes()
                elif isinstance(new_obj[0], TransportVehicle):
                    self.reload_vehicles()

        for i, vehicle in enumerate(vehicle_list):
            avl_data_list[i].vehicleId = self.vehicle_to_id[vehicle]

        TransportAVLData.insert_many_in_table(self.db_connection, avl_data_list)

    def run(self):
        scheduler = BackgroundScheduler(job_defaults={'max_instances': 8})
        scheduler.add_job(
            self.request_data,
            'interval',
            seconds=self.REQ_TIME_DELTA,
            end_date=self.END_DATE,
            id='listener')

        scheduler.start()

        try:
            print('Scheduler started!')
            while 1:
                time.sleep(10)
                print(datetime.now())
        except KeyboardInterrupt:
            if scheduler.state:
                scheduler.shutdown()

In [24]:
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})

monitor = TransGPSCVMonitor(connection_config)

req = monitor.request_data()
len(req)

88

In [None]:
ti
# monitor.write_to_db

In [27]:
import time
 
t1_start = time.perf_counter()
 
    
monitor.write_to_db(req)

t1_stop = time.perf_counter()
print(f"{(t1_stop-t1_start)*1_000:.3f}")

127.979


In [18]:
len(monitor.vehicle_to_id)

190

In [20]:
len(TransportVehicle.get_table(conn))

203

In [12]:
conn = psycopg2.connect(
    host=os.environ['RDS_HOSTNAME'],
    database="pteta_db",
    user="postgres",
    password=os.environ['RDS_PTETA_DB_PASSWORD'])

In [40]:
# operator_list, route_list, vehicle_list, avl_data_list
dto_lists = decompose_response(req)

In [None]:
monitor.operator_to_id

In [52]:
set([1, 2, 4]).difference([2, 3, 4])

{1}

In [56]:
dto_lists[0].append(TransportOperator(100, name='noname'))

In [60]:
dto_lists[1].append(TransportRoute(23, name='19-', colour='teal---'))

In [65]:
# dto_lists[2].append(
#     TransportVehicle(id=None, imei='355227045371655', name='A1', busNumber='6513', remark='6513 DNSNK', perevId=7, routeId=23))

In [66]:
# set(dto_lists[0]).difference(monitor.operator_set)
# set(dto_lists[1]).difference(monitor.route_set)
set(dto_lists[2]).difference(monitor.vehicle_set)



# resp_operator_set = set(operator_list)
#     monitor.operator_set

{TransportVehicle(id=None, imei='355227044854750', name='A22', busNumber='7665', remark='7665 DNSNK', perevId=1, routeId=20),
 TransportVehicle(id=None, imei='355227045369527', name='A6', busNumber='3557', remark='3557 DNSNK ', perevId=12, routeId=45),
 TransportVehicle(id=None, imei='355227045541257', name='A57', busNumber='9043', remark='0325 DNSNK', perevId=2, routeId=5),
 TransportVehicle(id=None, imei='355227045598570', name='А107', busNumber='1125', remark='1125 DNSNK', perevId=2, routeId=12),
 TransportVehicle(id=None, imei='355227046299129', name='A186', busNumber='389', remark='Тролейбус 389 DNSNK', perevId=6, routeId=37),
 TransportVehicle(id=None, imei='355227046313847', name='A184', busNumber='387', remark=' тролейбус 387 DNSNK', perevId=6, routeId=31),
 TransportVehicle(id=None, imei='355227046450953', name='H26', busNumber='0443', remark='0443 DNSNTNK', perevId=2, routeId=5),
 TransportVehicle(id=None, imei='355227046450961', name='H21', busNumber='2058', remark='2058 DNS

In [35]:
req[0]['perevName']

'Віталій / Олег Бойко '

In [None]:
TransportOperator(req[0]['perevId'], req[0]['perevName'])

## TransportRoute

In [70]:
conn.rollback()

In [45]:
SQL_big_req = " ".join([create_sql_req(t) for t in trans_vehicle_list])

In [13]:
route_list = TransportRoute.get_table(conn)[:]
len(route_list)

33

In [18]:
# for r in route_list[::2]: 
#     r.id += 1
# TransportRoute.are_in_table(conn, route_list)

In [19]:
# TransportRoute.are_in_table(conn, route_list)

In [14]:
for r in route_list: 
    r.name += "_"
    r.id += 100

In [16]:
# sql = f"""INSERT INTO pteta.route("id", "routeName", "routeColour") VALUES """ + \
#               ", ".join([f"""({obj.id}, '{obj.name}', '{obj.colour}')"""
#                         for obj in route_list]) + ";"
# sql

In [17]:
# route_list

In [18]:
TransportRoute.insert_many_in_table(conn, route_list)

## Test TransportVehicle

In [8]:
vehicle_list = TransportVehicle.get_table(conn)
len(vehicle_list)

190

In [10]:
TransportVehicle.are_in_table(conn, vehicle_list[:5])

[True, True, True, True, True]

In [11]:
vehicle_list[5].is_in_table(conn)

True

In [12]:
obj = vehicle_list[0]
obj.imei += "-"
obj.is_in_table(conn)

False

In [13]:
obj.insert_in_table(conn)

In [14]:
for v in vehicle_list[:10]: 
    v.imei += '_'

In [15]:
TransportVehicle.insert_many_in_table(conn, vehicle_list[:10])

In [31]:
obj.insert_in_table(conn)
obj.is_in_table(conn)

In [10]:
TransportVehicle.insert_many_in_table(conn, vehicle_list[:10])

INSERT INTO pteta.vehicle("imei", "name", "busNumber", "remark", "perevId", "routeId") VALUES('355227045600830', 'A178', '310', 'Тролейбус 310 DNSNK', 6,  37), ('355227046451662', 'H76', '350', 'Тролейбус 350 DNSNTNK', 6,  31), ('355227045369527', 'A6', '3557', '3557 DNSNK ', 12,  21), ('355227045540176', 'A83', '5150', ' 5150 DNS', 7,  41), ('355227046453387', 'H75', '3627', '3627 DNSNTNK', 1,  20), ('355228042084283', 'A207', '1032', '1032 DNSNK', 13,  42), ('355227045371655', 'A1', '6513', '6513 DNSNK', 7,  23), ('355227046451407', 'H68', '0855', '0855 DNSNTNK', 13,  42), ('355227046578332', 'A189', '6518', '6518 DNSNK ', 7,  19), ('355228043199379', 'H99', '1176', '1176 DNSNTNK', 12,  21);


In [14]:
vehicle_list[0].id = None
print(vehicle_list[0])
vehicle_list[0].update_id_from_table(conn)
print(vehicle_list[0])

TransportVehicle(id=None, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)
TransportVehicle(id=1, imei='355227045600830', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37)


In [34]:
for v in vehicle_list[:10]: 
    v.imei += '__'

vehicle_list[:13]

[TransportVehicle(id=222, imei='355227045600830-__', name='A178', busNumber='310', remark='Тролейбус 310 DNSNK', perevId=6, routeId=37),
 TransportVehicle(id=2, imei='355227046451662__', name='H76', busNumber='350', remark='Тролейбус 350 DNSNTNK', perevId=6, routeId=31),
 TransportVehicle(id=3, imei='355227045369527__', name='A6', busNumber='3557', remark='3557 DNSNK ', perevId=12, routeId=21),
 TransportVehicle(id=4, imei='355227045540176__', name='A83', busNumber='5150', remark=' 5150 DNS', perevId=7, routeId=41),
 TransportVehicle(id=5, imei='355227046453387__', name='H75', busNumber='3627', remark='3627 DNSNTNK', perevId=1, routeId=20),
 TransportVehicle(id=6, imei='355228042084283__', name='A207', busNumber='1032', remark='1032 DNSNK', perevId=13, routeId=42),
 TransportVehicle(id=7, imei='355227045371655__', name='A1', busNumber='6513', remark='6513 DNSNK', perevId=7, routeId=23),
 TransportVehicle(id=8, imei='355227046451407__', name='H68', busNumber='0855', remark='0855 DNSNTNK

## TransportOperator

In [10]:
operator_list = TransportOperator.get_table(conn)
len(operator_list)

8

In [11]:
TransportOperator.are_in_table(conn, operator_list[:])

[True, True, True, True, True, True, True, True]

In [12]:
operator_list[0].is_in_table(conn)

True

In [13]:
obj = operator_list[0]
obj.id += 100
obj.name = "-_-" + obj.name
obj.is_in_table(conn)

False

In [14]:
obj.insert_in_table(conn)

In [15]:
for op in operator_list: 
    op.id += 100
    op.name = "-_-" + op.name
    
TransportOperator.insert_many_in_table(conn, operator_list)

## TransportAVLData

In [9]:
avl_data_list = TransportAVLData.get_table(conn)

In [11]:
# avl_data_list

In [14]:
%%timeit
TransportAVLData.are_in_table(conn, avl_data_list)

57.4 ms ± 3.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
%%timeit
avl_data_list[0].is_in_table(conn)

42.1 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
