In [None]:
import os
import json
import requests
from kafka import KafkaProducer, KafkaConsumer
import boto3
import time

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'localhost:9092'),
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Initialize S3 Client
s3_client = boto3.client(
    's3',
    aws_access_key_id='id',
    aws_secret_access_key='key',
    region_name=os.getenv('AWS_REGION', 'region')
)
bucket_name = os.getenv('S3_BUCKET_NAME', 'transit-optimization')


# Utility to validate JSON
def is_valid_json(data):
    try:
        json.dumps(data)
        return True
    except (TypeError, ValueError):
        return False


# Functions for fetching data
def get_weather_data(lat,lon):
    api_key = 'apikey'
    exclude = 'minutely,hourly,alerts'  # Adjust exclusions as needed
    format = 'metric'
    url = f'https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&exclude={exclude}&appid={api_key}&units={format}'
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching weather data for lat={lat}, lon={lon}: {e}")
        return {}


def get_traffic_data(origin, destination):
    api_key = 'apikey'
    url = f'https://maps.googleapis.com/maps/api/directions/json?origin={origin}&destination={destination}&key={api_key}'
    response = requests.get(url)
    return response.json()


def get_transit_data():
    api_key = 'apikey'
    url = f'http://api.511.org/traffic/events?api_key={api_key}'
    response = requests.get(url)
    data = response.content.decode('utf-8-sig')
    return json.loads(data)


import requests
import json
from datetime import datetime


import requests
import json
from datetime import datetime

# Function to fetch transit stop monitoring data and calculate delays with enriched data
def get_rtsm_data():
    # Define the endpoint URL with all necessary parameters
    url = 'http://api.511.org/transit/StopMonitoring'
    params = {
        'api_key': 'apikey',
        'agency': 'AC',  # Replace 'AC' with the actual agency/operator ID
        'format': 'json'
    }

    try:
        # Send GET request with parameters
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Decode the response content explicitly using utf-8-sig
        data = response.content.decode('utf-8-sig')  # Handle UTF-8 BOM
        parsed_data = json.loads(data)  # Parse the decoded JSON

        # Extract relevant information
        monitored_visits = (
            parsed_data.get("ServiceDelivery", {})
            .get("StopMonitoringDelivery", {})
            .get("MonitoredStopVisit", [])
        )

        # Process each monitored stop visit and calculate delays
        enriched_data = []
        for visit in monitored_visits:
            journey = visit.get("MonitoredVehicleJourney", {})
            monitored_call = journey.get("MonitoredCall", {})

            # Extract times
            aimed_arrival = monitored_call.get("AimedArrivalTime")
            expected_arrival = monitored_call.get("ExpectedArrivalTime")
            aimed_departure = monitored_call.get("AimedDepartureTime")
            expected_departure = monitored_call.get("ExpectedDepartureTime")

            # Parse times and calculate delays
            arrival_delay = None
            departure_delay = None

            if aimed_arrival and expected_arrival:
                aimed_arrival_dt = datetime.fromisoformat(aimed_arrival.replace("Z", "+00:00"))
                expected_arrival_dt = datetime.fromisoformat(expected_arrival.replace("Z", "+00:00"))
                arrival_delay = (expected_arrival_dt - aimed_arrival_dt).total_seconds()

            if aimed_departure and expected_departure:
                aimed_departure_dt = datetime.fromisoformat(aimed_departure.replace("Z", "+00:00"))
                expected_departure_dt = datetime.fromisoformat(expected_departure.replace("Z", "+00:00"))
                departure_delay = (expected_departure_dt - aimed_departure_dt).total_seconds()

            # Append all relevant details to the enriched data list
            enriched_data.append({
                "RecordedAtTime": visit.get("RecordedAtTime"),
                "LineRef": journey.get("LineRef"),
                "DirectionRef": journey.get("DirectionRef"),
                "PublishedLineName": journey.get("PublishedLineName"),
                "OperatorRef": journey.get("OperatorRef"),
                "OriginName": journey.get("OriginName"),
                "DestinationName": journey.get("DestinationName"),
                "VehicleLocationLatitude": journey.get("VehicleLocation", {}).get("Latitude"),
                "VehicleLocationLongitude": journey.get("VehicleLocation", {}).get("Longitude"),
                "Bearing": journey.get("Bearing"),
                "Occupancy": journey.get("Occupancy"),
                "StopPointRef": monitored_call.get("StopPointRef"),
                "StopPointName": monitored_call.get("StopPointName"),
                "AimedArrivalTime": aimed_arrival,
                "ExpectedArrivalTime": expected_arrival,
                "ArrivalDelaySeconds": arrival_delay,
                "AimedDepartureTime": aimed_departure,
                "ExpectedDepartureTime": expected_departure,
                "DepartureDelaySeconds": departure_delay,
                "VehicleAtStop": monitored_call.get("VehicleAtStop"),
                "DestinationDisplay": monitored_call.get("DestinationDisplay")
            })

        return enriched_data

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None



# Producer functions
def produce_weather_data():
    rtsm_data = get_rtsm_data()
    start_time = time.time()
    for stop in rtsm_data:
        lat = stop.get("VehicleLocationLatitude")
        lon = stop.get("VehicleLocationLongitude")
        if time.time() - start_time > 60:
            print("Terminating the loop after one minute")
            break
        if lat is not None and lon is not None:
            weather_data = get_weather_data(lat, lon)
            if is_valid_json(weather_data):
                producer.send('weather-data', value=weather_data)
                print(f"Weather data for lat={lat}, lon={lon} sent to Kafka")
            else:
                print(f"Invalid weather data for lat={lat}, lon={lon}, not sent to Kafka")
        else:
            print(f"Skipping stop with invalid coordinates: {stop}")


def produce_traffic_data():
    origin = 'San Francisco'
    destination = 'Los Angeles'
    data = get_traffic_data(origin, destination)
    if is_valid_json(data):
        producer.send('traffic-data', value=data)
        print("Traffic data sent to Kafka")
    else:
        print("Invalid traffic data, not sent to Kafka")


def produce_transit_data():
    data = get_transit_data()
    if is_valid_json(data):
        producer.send('transit-data', value=data)
        print("Transit data sent to Kafka")
    else:
        print("Invalid transit data, not sent to Kafka")


# RTSM Data: Fetch and Store in S3
def store_rtsm_data_in_s3():
    data = get_rtsm_data()
    if data and is_valid_json(data):
        file_name = "rtsm_data.json"
        key = f"rtsm-data/{file_name}"
        s3_client.put_object(Bucket=bucket_name, Key=key, Body=json.dumps(data))
        print(f"RTSM data uploaded to S3 as {key}")
    else:
        print("Invalid RTSM data. Not uploaded to S3.")


# Upload to S3
def upload_to_s3(data, file_name, folder):
    if is_valid_json(data):
        key = f"{folder}/{file_name}"
        s3_client.put_object(Bucket=bucket_name, Key=key, Body=json.dumps(data))
        print(f"Data uploaded to S3 as {key}")
    else:
        print(f"Invalid JSON data. File {file_name} not uploaded to S3.")


# Kafka Consumer
def consume_kafka_topic(topic_name, group_id, folder_name):
    consumer = KafkaConsumer(
        topic_name,
        bootstrap_servers=[os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'localhost:9092')],
        auto_offset_reset='earliest',
        enable_auto_commit=True,
        group_id=group_id,
        value_deserializer=lambda x: json.loads(x.decode('utf-8')),
        consumer_timeout_ms=5000
    )

    for message in consumer:
        data = message.value
        file_name = f"{topic_name}_data_{message.offset}.json"
        upload_to_s3(data, file_name, folder_name)
        print(f"Message with offset {message.offset} from {topic_name} uploaded to S3.")


# Consumer Functions
def consume_weather_data():
    consume_kafka_topic('weather-data', 'weather-consumer-group', 'weather-data')


def consume_traffic_data():
    consume_kafka_topic('traffic-data', 'traffic-consumer-group', 'traffic-data')


def consume_transit_data():
    consume_kafka_topic('transit-data', 'transit-consumer-group', 'transit-data')


# Main function that triggers all actions
def main():
    try:
        # Produce data

        produce_weather_data()
        produce_traffic_data()
        produce_transit_data()
        # Store RTSM data in S3
        store_rtsm_data_in_s3()

        # Consume data
        consume_weather_data()

        consume_transit_data()
    except Exception as e:
        print(f"Error: {e}")


if __name__ == '__main__':
    while True:
        main()  # Run the main function
        time.sleep(300)  # Wait for 5 minutes (300 seconds) before running again

