In [1]:
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

In [2]:
import os
import glob
import subprocess
import sys
from multiprocessing import Process
import socket
import sys

from dateutil.parser import parse
from confluent_kafka import Producer
import csv
import pandas as pd

In [9]:
import datetime
import json
import pandas as pd

In [10]:
help(FileSystemEventHandler)

Help on class FileSystemEventHandler in module watchdog.events:

class FileSystemEventHandler(builtins.object)
 |  Base file system event handler that you can override methods from.
 |  
 |  Methods defined here:
 |  
 |  dispatch(self, event)
 |      Dispatches events to the appropriate methods.
 |      
 |      :param event:
 |          The event object representing the file system event.
 |      :type event:
 |          :class:`FileSystemEvent`
 |  
 |  on_any_event(self, event)
 |      Catch-all event handler.
 |      
 |      :param event:
 |          The event object representing the file system event.
 |      :type event:
 |          :class:`FileSystemEvent`
 |  
 |  on_created(self, event)
 |      Called when a file or directory is created.
 |      
 |      :param event:
 |          Event representing file/directory creation.
 |      :type event:
 |          :class:`DirCreatedEvent` or :class:`FileCreatedEvent`
 |  
 |  on_deleted(self, event)
 |      Called when a file or dire

In [12]:
class Watcher:

    def __init__(self, directory=".", handler=FileSystemEventHandler()):
        self.observer = Observer()
        self.handler = handler
        self.directory = directory

    def run(self):
        self.observer.schedule(
            self.handler, self.directory, recursive=True)
        self.observer.start()
        print("\nWatcher Running in {}/\n".format(self.directory))
        try:
            while True:
                time.sleep(1)
        except:
            self.observer.stop()
        self.observer.join()
        print("\nWatcher Terminated\n")


class MyHandler(FileSystemEventHandler):


    def __init__(self,isc_id):
        self.root = 'Orlando_converted'
        self.decoder_path = 'PurdueDecoder.exe'
        self.produce = MessageProducer()
        self.isc_id = isc_id
        self.last_timestamp = None

            
    def on_moved(self, event):
#         print(f"Indi - {event}")
        try:
            path_dest = event.dest_path
            all_split = path_dest.split('/')
            city, year, month, day, isc, filename  = all_split[0], all_split[1], all_split[2], all_split[3], all_split[4],all_split[5] 
            self.check_destination_directory(year, month, day, isc)
            out_file = f"{self.root}/{year}/{month}/{day}/{isc}/{filename}.txt"
            temp = out_file.split('/')[-1].split('_')
            curr_ts = pd.to_datetime(f"{temp[2]}-{temp[3]}-{temp[4]} {temp[5][:2]}:{temp[5][2:4]}:00")
            last_ts  = self.last_timestamp
            
            
            process = subprocess.Popen(["wine", f"{self.decoder_path }", f"{path_dest}", f"{out_file}"])
            (output, err) = process.communicate()  

            #This makes the wait possible
            p_status = process.wait()
            self.produce.generate_messages(isc, out_file)
            
#             produce message from last to current timestamp
            self.produce.generate_message_bit_mask(isc, last_ts, curr_ts)


            if(last_ts is None):
                self.last_timestamp = curr_ts
            if((last_ts is not None)and (curr_ts>last_ts))
                self.last_timestamp= curr_ts
            

        except Exception as e:
            print(f"deocde failed for event {event} due to {e}")
        
#         subprocess.Popen(["cp",f"{path_dest}",f"{self.root}/{year}/{month}/{day}/{isc}"])
        
        
        
    def check_destination_directory(self, year, month, day, isc):
        root = self.root
        
        
        if not os.path.exists(f"{root}/{year}"):
            os.makedirs(f"{root}/{year}")
            
        if not os.path.exists(f"{root}/{year}/{month}"):
            os.makedirs(f"{root}/{year}/{month}")
            
        if not os.path.exists(f"{root}/{year}/{month}/{day}"):
            os.makedirs(f"{root}/{year}/{month}/{day}")
            
        if not os.path.exists(f"{root}/{year}/{month}/{day}/{isc}"):
            os.makedirs(f"{root}/{year}/{month}/{day}/{isc}")



In [5]:
class ISCWatcher:
    def __init__(self, directory=".", handler=FileSystemEventHandler()):
        self.observer = Observer()
        self.handler = handler
        self.directory = directory
        

    def run(self):
        self.observer.schedule(
            self.handler, self.directory, recursive=False)
        self.observer.start()
        print("\nWatcher Running in {}/\n".format(self.directory))
        try:
            while True:
                time.sleep(1)
        except:
            self.observer.stop()
        self.observer.join()
        print("\nWatcher Terminated\n")
        
class ISChandler(FileSystemEventHandler):
    def __init__(self):
        self.root = 'Orlando_converted'
        self.decoder_path = 'PurdueDecoder.exe'
        self.all_isc_dict = {}
        
    def on_created(self, event):
        print(event.is_directory)
        if(event.is_directory):
            isc_path = event.src_path
            isc_id = event.src_path.split('/')[-1]
            if(isc_id not in self.all_isc_dict):
                print(f"start monitoring {isc_id} at path {isc_path}")
                print(event) # Your code here
                p = Process(target=self.start_process, args=(isc_path,isc_id))
                p.start()
#                 p.join()
#                 start new process and join 

    def start_process(self,dest,isc_id):
        watcher = Watcher(dest, handler = MyHandler(isc_id))
        watcher.run()
        
        
                
                
    
    

In [6]:
class MessageProducer:
    
    def __init__(self):
        self.file_path = None
        self.topic = 'atspm'
        

        self.conf = {'bootstrap.servers': "localhost:9092",
                'client.id': socket.gethostname()}
        self.producer = Producer(self.conf)
        
        
    def acked(self,err, msg):
#         print(f"called {msg}")
        if err is not None:
            print("Failed to deliver message: %s: %s" % (str(msg.value()), str(err)))
        else:
            print(f"Message produced: {msg.value()}")
            
    def generate_message_bit_mask(self, isc_id, last_ts, curr_ts):
        result = {}

        result['ts']=curr_ts
        result['SignalID']=isc_id
        result['flag']  = 1
        jresult = json.dumps(result)
        self.producer.produce('atspm_bit_mask' , key=isc_id, value=jresult, callback=self.acked)
        
        if((last_ts is not None)and (curr_ts>last_ts)):
            all_ts = pd.date_range(start=last_ts,end=curr_ts, freq= '1min')
            if(len(all_ts)>2):
                for each_ts in all_ts[1:-1]:
                    result = {}
                    result['ts']=each_ts
                    result['SignalID']=isc_id
                    result['flag']  = 0
                    jresult = json.dumps(result)
                    self.producer.produce('atspm_bit_mask' , key=isc_id, value=jresult, callback=self.acked)
            
            
            
            
    def generate_messages(self,isc_id, path):
        df_file = pd.read_csv(path,sep=',',skiprows = 6,header=None,names = ['Timestamp','EventCode','EventParam'] )
        
#         print(f"df_file {df_file.shape}")
        for i,each_row  in df_file.iterrows():
            result = {}
            result["SignalID"] = isc_id
            dt = datetime.datetime.strptime(each_row['Timestamp'], '%m/%d/%Y %H:%M:%S.%f')
            unix_time =  int(time.mktime(dt.timetuple()) * 1000)
            str_time = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')
            result["ts"] = unix_time
            result["EventCode"] = each_row['EventCode']
            result["EventParam"] = each_row['EventParam']

            # Convert dict to json as message format
            jresult = json.dumps(result)
#             print(f"{jresult}")
            self.producer.produce(self.topic , key=isc_id, value=jresult, callback=self.acked)
    
        self.producer.flush()
        
            
        
    

In [7]:
temp_path = '/home/yash/interruptions/real_time_pipeline/Orlando_converted/2022/12/02/1020/TRAF_1020_2022_12_02_1759.dat.txt'
isc_id = '1020'

In [10]:
watcher = ISCWatcher('Orlando/2022/12/04', handler = ISChandler())

In [11]:
watcher.run()


Watcher Running in Orlando/2022/12/04/

True
start monitoring 1020 at path Orlando/2022/12/04/1020
<DirCreatedEvent: event_type=created, src_path='Orlando/2022/12/04/1020', is_directory=True>

Watcher Running in Orlando/2022/12/04/1020/

True
start monitoring 1025 at path Orlando/2022/12/04/1025
<DirCreatedEvent: event_type=created, src_path='Orlando/2022/12/04/1025', is_directory=True>

Watcher Running in Orlando/2022/12/04/1025/

deocde failed for event <FileMovedEvent: src_path='Orlando/2022/12/04/1020/TRAF_1020_2022_12_04_0004.dat.E59EDAbD', dest_path='Orlando/2022/12/04/1020/TRAF_1020_2022_12_04_0004.dat', is_directory=False> due to [Errno 2] No such file or directory: 'Orlando_converted/2022/12/04/1020/TRAF_1020_2022_12_04_0004.dat.txt'
deocde failed for event <FileMovedEvent: src_path='Orlando/2022/12/04/1020/TRAF_1020_2022_12_04_0001.dat.9e06C873', dest_path='Orlando/2022/12/04/1020/TRAF_1020_2022_12_04_0001.dat', is_directory=False> due to [Errno 2] No such file or directory:

In [2]:
tekm = 'Orlando_converted/2022/12/02/1020/TRAF_1020_2022_12_02_1759.dat.txt'

In [5]:
temp = tekm.split('/')[-1].split('_')

In [10]:
curr_ts =  pd.to_datetime(f"{temp[2]}-{temp[3]}-{temp[4]} {temp[5][:2]}:{temp[5][2:4]}:00")

In [12]:
curr_ts

Timestamp('2022-12-02 17:59:00')

In [14]:
old_ts =  'Orlando_converted/2022/12/02/1020/TRAF_1020_2022_12_02_1757.dat.txt'
temp =  old_ts.split('/')[-1].split('_')
old_ts = pd.to_datetime(f"{temp[2]}-{temp[3]}-{temp[4]} {temp[5][:2]}:{temp[5][2:4]}:00")

In [15]:
old_ts

Timestamp('2022-12-02 17:57:00')

In [16]:
curr_ts<old_ts

False

In [24]:
pd.date_range(start=old_ts,end=curr_ts, freq= '1min')[1:-1]

DatetimeIndex(['2022-12-02 17:58:00'], dtype='datetime64[ns]', freq='T')

In [19]:
old_ts

Timestamp('2022-12-02 17:57:00')