In [22]:
from datetime import datetime, timedelta
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd

from collections import Counter

## Analyse data

In [2]:
df_sum = None
for df_path in tqdm(list(Path("../data/local/tables/").iterdir())[-10:]):
    if df_sum is None: 
        df_sum = pd.read_csv(df_path, encoding='utf-8', low_memory=False)
    else : 
        df_sum.append(pd.read_csv(df_path, encoding='utf-8', low_memory=False))

100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


In [3]:
df_sum[df_sum.duplicated()]

Unnamed: 0,id,imei,name,stateCode,stateName,lat,lng,speed,orientation,gpstime,routeId,routeName,routeColour,inDepo,busNumber,perevId,perevName,remark,online,idBusTypes


In [4]:
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
import pytz

import json
import requests
import os
import time
# from tqdm.auto import tqdm


from PTETA.utils.transport.TransportOperator import TransportOperator
from PTETA.utils.transport.TransportRoute import TransportRoute
from PTETA.utils.transport.TransportVehicle import TransportVehicle
from PTETA.utils.transport.TransportAVLData import TransportAVLData
from psycopg2.extensions import connection as Connection
import psycopg2

from apscheduler.schedulers.background import BackgroundScheduler


response_prev = dict()

try: 
    del TransGPSCVMonitor
except: 
    pass

class TransGPSCVMonitor:
    
    db_connection: Connection = None
        
    operator_to_id : dict = dict()
    id_to_operator : dict = dict()
        
    route_to_id : dict = dict()
    id_to_route : dict = dict()

    vehicle_to_id : dict = dict()
    id_to_vehicle : dict = dict()
        
        
    def __init__(self, connection_config: dict, **kwarg: dict) -> None: 
        self.db_connection = psycopg2.connect(**connection_config)
        
        self.reloadTransportOperators()   
        self.reloadTransportRoutes()
        self.reloadTransportVehicles()
        
        self.datetime_format = '%Y-%m-%d %H:%M:%S'
        
        self.REQUEST_URI = \
            kwarg.get('REQUEST_URI', 'http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr=')
        self.START_DATE = \
            kwarg.get('START_DATE', (datetime.now() - timedelta(days=1)).strftime(self.datetime_format))
        self.END_DATE = \
            kwarg.get('END_DATE', (datetime.now() + timedelta(days=30)).strftime(self.datetime_format))
        self.REQ_TIME_DELTA = kwarg.get('REQ_TIME_DELTA',  1.1)
        
        
    def reloadTransportOperators(self):
        operator_list = TransportOperator.get_table(self.db_connection)
        self.operator_to_id = dict({operator : operator.id for operator in operator_list})
        self.id_to_operator = dict({operator.id : operator for operator in operator_list})
        
    def reloadTransportRoutes(self):
        route_list = TransportRoute.get_table(self.db_connection)
        self.route_to_id = dict({route : route.id for route in route_list})
        self.id_to_route = dict({route.id : route for route in route_list})
    
    def reloadTransportVehicles(self):
        vehicle_list = TransportVehicle.get_table(self.db_connection)
        self.vehicle_to_id = dict({vehicle : vehicle.id for vehicle in vehicle_list})
        self.id_to_vehicle = dict({vehicle.id : vehicle for vehicle in vehicle_list})

    @classmethod
    def request_data(cls, request_uri='http://www.trans-gps.cv.ua/map/tracker/?selectedRoutesStr='):
        dt_now = datetime.now()
        dt_tz_now = datetime.utcnow().replace(tzinfo=pytz.utc)

        try:
            request = requests.get(request_uri)
            if (request is None) or (request.text is None):
                return
            response_cur = json.loads(request.text)

            global response_prev

            keys_prev = set(response_prev.keys())
            keys_cur = set(response_cur.keys())

            optimized_data_list = list()
            for imei in keys_prev.intersection(keys_cur):
                if response_prev[imei]['gpstime'] != response_cur[imei]['gpstime']:
                    response_cur[imei]['response_datetime'] = dt_tz_now
                    optimized_data_list += [response_cur[imei]]

            for imei in keys_cur.difference(keys_prev):
                response_cur[imei]['response_datetime'] = dt_tz_now
                optimized_data_list += [response_cur[imei]]
                
            response_prev = response_cur
        except (requests.Timeout, requests.ConnectionError, requests.HTTPError) as err:
            print(f"{dt_now.strftime('%Y-%m-%d %H;%M;%S')} : error while trying to GET data\n"
                  f"\t{err}\n")

        print(dt_now.strftime('%Y-%m-%d %H;%M;%S'), len(optimized_data_list))
        return optimized_data_list
    
    def run(self):
        scheduler =  BackgroundScheduler(job_defaults={'max_instances': 8})  
        scheduler.add_job(
                self.request_data,
                'interval',
                seconds=self.REQ_TIME_DELTA,
                end_date=self.END_DATE,
                id='listener')

        scheduler.start()
            
        try:
            print('Scheduler started!')
            display_files_num = True 
            prev_update = datetime.now().replace(minute=0, second=0, microsecond=0)
            while 1:
                time.sleep(10)
                print(datetime.now())
        except KeyboardInterrupt:
            if scheduler.state:
                scheduler.shutdown()
            
        


In [5]:
connection_config= dict({
    'host': os.environ['RDS_HOSTNAME'],
    'database': "pteta_db",
    'user': "postgres",
    'password': os.environ['RDS_PTETA_DB_PASSWORD']
})

monitor = TransGPSCVMonitor(connection_config)

In [7]:
# monitor.run()

In [6]:
req = monitor.request_data()
len(req), req

2022-12-03 02;59;56 97


(97,
 [{'id': 215,
   'imei': '355227045601259',
   'name': 'A133',
   'stateCode': 'used',
   'stateName': 'used',
   'lat': 48.26555666666667,
   'lng': 25.989718333333332,
   'speed': '000.0',
   'orientation': '000.00',
   'gpstime': '2022-12-03 01:58:27',
   'routeId': 27,
   'routeName': '3/3a',
   'routeColour': 'green',
   'inDepo': True,
   'busNumber': '358',
   'perevId': 6,
   'perevName': 'ЧТУ',
   'remark': 'Тролейбус 358 DNSNK',
   'online': True,
   'idBusTypes': 2,
   'response_datetime': datetime.datetime(2022, 12, 3, 0, 59, 56, 161686, tzinfo=<UTC>)},
  {'id': 231,
   'imei': '355227045600582',
   'name': 'A146',
   'stateCode': 'used',
   'stateName': 'used',
   'lat': 48.251115,
   'lng': 25.952376666666666,
   'speed': '018.3',
   'orientation': '277.07',
   'gpstime': '2022-12-02 20:53:51',
   'routeId': 31,
   'routeName': '6/6a',
   'routeColour': 'deeppink',
   'inDepo': False,
   'busNumber': '385',
   'perevId': 6,
   'perevName': 'ЧТУ',
   'remark': 'Тролей

In [7]:
req[0]

{'id': 215,
 'imei': '355227045601259',
 'name': 'A133',
 'stateCode': 'used',
 'stateName': 'used',
 'lat': 48.26555666666667,
 'lng': 25.989718333333332,
 'speed': '000.0',
 'orientation': '000.00',
 'gpstime': '2022-12-03 01:58:27',
 'routeId': 27,
 'routeName': '3/3a',
 'routeColour': 'green',
 'inDepo': True,
 'busNumber': '358',
 'perevId': 6,
 'perevName': 'ЧТУ',
 'remark': 'Тролейбус 358 DNSNK',
 'online': True,
 'idBusTypes': 2,
 'response_datetime': datetime.datetime(2022, 12, 3, 0, 59, 56, 161686, tzinfo=<UTC>)}

In [19]:
def decompose(req): 
    trans_operator = TransportOperator(req['perevId'], req['perevName'])
    trans_route = TransportRoute(req['routeId'], req['routeName'], req['routeColour'])
    
    trans_vehicle = TransportVehicle(
        None, req['imei'], req['name'], req['busNumber'], req['remark'], req['perevId'], req['routeId'] )
    
    trans_AVLData = TransportAVLData(
        req['lat'], req['lng'], req['speed'], 
        req['orientation'], req['gpstime'], req['inDepo'],
        None, req['response_datetime']
    )
    
    return trans_operator, trans_route, trans_vehicle, trans_AVLData

def write_to_DB(conn, req, verbose=0): 
    trans_operator, trans_route, trans_vehicle, trans_AVLData = decompose(req)
    
    if not trans_operator in monitor.operator_to_id:
        if not trans_operator.is_in_table(conn): 
            trans_operator.insert_in_table(conn)
        monitor.reloadTransportOperators()
        if verbose > 0: 
            print("New trans_operator inserted in DB" )
        if not trans_operator.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_operator is in ")
            
    if not trans_route in monitor.route_to_id:
        if not trans_route.is_in_table(conn): 
            trans_route.insert_in_table(conn)
        monitor.reloadTransportRoutes()
        if verbose > 0: 
            print("New trans_route inserted in DB" )
        if not trans_route.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_route is in ")
        
    if not trans_vehicle in monitor.vehicle_to_id:
        if not trans_vehicle.is_in_table(conn): 
            trans_vehicle.insert_in_table(conn)
        monitor.reloadTransportVehicles()
        if verbose > 0: 
            print("New trans_vehicle inserted in DB" )
        if not trans_vehicle.is_in_table(conn): 
            raise ValueError()
    else : 
        if verbose > 0: 
            print("It ok. trans_vehicle is in ")
        
    trans_vehicle.update_id_from_table(conn)
    trans_AVLData.vehicleId = trans_vehicle.id
    
    if not trans_AVLData.is_in_table(conn): 
        trans_AVLData.insert_in_table(conn)
        if verbose > 0: 
            print("trans_AVLData inserted in DB")
    else : 
        if verbose > 0: 
            print("trans_AVLData is already in DB")
    

In [9]:
conn = psycopg2.connect(
    host=os.environ['RDS_HOSTNAME'],
    database="pteta_db",
    user="postgres",
    password=os.environ['RDS_PTETA_DB_PASSWORD'])

In [12]:
decompose(req[0])[3]

TransportAVLData(lat=48.26555666666667, lng=25.989718333333332, speed='000.0', orientation='000.00', gpstime='2022-12-03 01:58:27', inDepo=True, vehicleId=None, response_datetime=datetime.datetime(2022, 12, 3, 0, 59, 56, 161686, tzinfo=<UTC>))

In [11]:
trans_AVLData = decompose(req[0])[2]
print(trans_AVLData)
trans_AVLData.update_id_from_table(conn)
print(trans_AVLData)

TransportVehicle(id=None, imei='355227045601259', name='A133', busNumber='358', remark='Тролейбус 358 DNSNK', perevId=6, routeId=27)
TransportVehicle(id=72, imei='355227045601259', name='A133', busNumber='358', remark='Тролейбус 358 DNSNK', perevId=6, routeId=27)


In [10]:
trans_AVLData.is_in_table(conn)

False

In [28]:
len([decompose(r)[-1] for r in req 
     if decompose(r)[-1].imei not in set([k.imei for k in monitor.vehicle_to_id.keys() ])])

18

compare by TransportVehicle
42 not in DB
55 in DB 

compare by imei
18 not in DB
79 in DB 

In [39]:
decompose(req[0])[-1]

TransportVehicle(id=None, imei='355227046507810', name='A199', busNumber='0359', remark='0359 DNSNK', perevId=13, routeId=11)

In [23]:
for i, row in tqdm(enumerate(req)): 
#     print(f"{i})")
    write_to_DB(conn, row)
#     print(f"{'='*20}\n")

97it [00:14,  6.90it/s]


It ok. trans_operator is in 
It ok. trans_route is in 
It ok. trans_vehicle is in 
trans_AVLData is already in DB


In [42]:
decompose(req[0])[-1]

TransportVehicle(id=None, imei='355227046507810', name='A199', busNumber='0359', remark='0359 DNSNK', perevId=13, routeId=11)

In [41]:
monitor.vehicle_to_id[decompose(req[0])[-1]]
# [decompose(req[2])[-1]]

149