In [1]:
pip install kafka-python

Defaulting to user installation because normal site-packages is not writeable
Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
     -------------------------------------- 246.5/246.5 kB 3.8 MB/s eta 0:00:00
Installing collected packages: kafka-python
Successfully installed kafka-python-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [5]:
from kafka import KafkaProducer
import requests
import json

producer = KafkaProducer(bootstrap_servers='localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8'))
def get_weather_data(city):
    api_key = 'apikey'
    url = f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}'
    response = requests.get(url)
    return response.json()

def produce_weather_data():
    city = 'San Francisco'
    data = get_weather_data(city)
    producer.send('weather-data', value=data)
    print("Weather data sent to Kafka")

produce_weather_data()

Weather data sent to Kafka


In [2]:
from kafka import KafkaProducer
import requests
import json

# Initialize Kafka producer
producer = KafkaProducer(
    bootstrap_servers='localhost:9092', 
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Function to get weather data from OpenWeather API
def get_weather_data(city):
    api_key = 'apikey'
    url = f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}'
    response = requests.get(url)
    return response.json()

# Function to assign temperature and visibility ranges based on conditions
def assign_ranges(data):
    # Assign Temperature Range
    temp = data.get("main", {}).get("temp", None)
    if temp is not None:
        if temp <= 273.15:  # 0°C (32°F) and below
            data["Temperature_Range"] = "0-32F"
        elif 273.15 < temp <= 288.71:  # 32°F to 60°F
            data["Temperature_Range"] = "32-60F"
        elif 288.71 < temp <= 299.82:  # 60°F to 80°F
            data["Temperature_Range"] = "60-80F"
        else:  # Above 80°F
            data["Temperature_Range"] = "80+F"
    
    # Assign Visibility Range
    visibility = data.get("visibility", None)
    if visibility is not None:
        if visibility <= 1609:  # 0-1 mile
            data["Visibility_Range"] = "0-1 mile"
        elif 1609 < visibility <= 8046:  # 1-5 miles
            data["Visibility_Range"] = "1-5 miles"
        else:  # Greater than 5 miles
            data["Visibility_Range"] = "5+ miles"

    return data

# Function to fetch, transform, and send weather data to Kafka
def produce_weather_data():
    city = 'San Francisco'
    data = get_weather_data(city)
    
    # Add ranges to the data
    transformed_data = assign_ranges(data)
    
    # Send transformed data to Kafka
    producer.send('weather-data', value=transformed_data)
    print("Weather data with ranges sent to Kafka")

# Run the function to produce weather data
produce_weather_data()

Weather data with ranges sent to Kafka


In [2]:
from kafka import KafkaProducer
import requests
import json
import time

# Initialize Kafka producer
producer = KafkaProducer(
    bootstrap_servers='localhost:9092', 
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# List of Bay Area cities
bay_area_cities = [
    "San Francisco", "Oakland", "San Jose", "Berkeley", "Santa Clara",
    "Sunnyvale", "Fremont", "Palo Alto", "Mountain View", "Hayward",
    "Milpitas", "Redwood City", "Daly City", "Union City", "San Mateo",
    "Cupertino", "South San Francisco"
]

# Function to get weather data from OpenWeather API
def get_weather_data(city):
    api_key = 'apikey'
    url = f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric'  # Fetch data in Celsius
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch data for {city}: {e}")
        return None

# Function to assign temperature and visibility ranges based on conditions
def assign_ranges(data):
    if not data:
        return None
    
    # Assign Temperature Range
    temp = data.get("main", {}).get("temp", None)
    if temp is not None:
        if temp <= 0:  # 0°C and below
            data["Temperature_Range"] = "0°C and below"
        elif 0 < temp <= 15:  # 1°C to 15°C
            data["Temperature_Range"] = "1°C to 15°C"
        elif 15 < temp <= 27:  # 16°C to 27°C
            data["Temperature_Range"] = "16°C to 27°C"
        else:  # Above 27°C
            data["Temperature_Range"] = "Above 27°C"
    
    # Assign Visibility Range
    visibility = data.get("visibility", None)
    if visibility is not None:
        if visibility <= 1000:  # 0-1 km
            data["Visibility_Range"] = "0-1 km"
        elif 1000 < visibility <= 5000:  # 1-5 km
            data["Visibility_Range"] = "1-5 km"
        else:  # Greater than 5 km
            data["Visibility_Range"] = "5+ km"

    return data

# Function to fetch, transform, and send weather data to Kafka for Bay Area cities
def produce_weather_data_for_bay_area():
    for city in bay_area_cities:
        data = get_weather_data(city)
        if data:
            # Add ranges to the data
            transformed_data = assign_ranges(data)
            if transformed_data:
                # Add city name for context
                transformed_data["city"] = city
                # Send transformed data to Kafka
                producer.send('weather-data', value=transformed_data)
                print(f"Weather data for {city} with ranges sent to Kafka")
        else:
            print(f"Skipping {city} due to missing or invalid data")
        time.sleep(1)  # To avoid hitting the API rate limit

# Run the function to produce weather data for Bay Area cities
produce_weather_data_for_bay_area()

Weather data for San Francisco with ranges sent to Kafka
Weather data for Oakland with ranges sent to Kafka
Weather data for San Jose with ranges sent to Kafka
Weather data for Berkeley with ranges sent to Kafka
Weather data for Santa Clara with ranges sent to Kafka
Weather data for Sunnyvale with ranges sent to Kafka
Weather data for Fremont with ranges sent to Kafka
Weather data for Palo Alto with ranges sent to Kafka
Weather data for Mountain View with ranges sent to Kafka
Weather data for Hayward with ranges sent to Kafka
Weather data for Milpitas with ranges sent to Kafka
Weather data for Redwood City with ranges sent to Kafka
Weather data for Daly City with ranges sent to Kafka
Weather data for Union City with ranges sent to Kafka
Weather data for San Mateo with ranges sent to Kafka
Weather data for Cupertino with ranges sent to Kafka
Weather data for South San Francisco with ranges sent to Kafka


In [None]:
!pip uninstall kafka

In [4]:
import requests
import json

def get_transit_data():
    url = 'http://api.511.org/traffic/events?api_key=d5b207db-af6a-466d-9402-47aa3e9af391'
    response = requests.get(url)
    data = response.content.decode('utf-8-sig')
    return json.loads(data)

from kafka import KafkaProducer

producer = KafkaProducer(bootstrap_servers='localhost:9092',
                         value_serializer=lambda v: json.dumps(v).encode('utf-8')
                        )
def produce_transit_data():
    data = get_transit_data()
    producer.send('transit-data', value=data)
    print("Transit data sent to Kafka")

produce_transit_data()

Transit data sent to Kafka


In [6]:
import requests
import json
from kafka import KafkaProducer
def get_transit_data():
    api_key = 'apikey'  # Replace with your API key
    operator_id = 'AC'  # Replace with the specific operator ID
    url = f'http://api.511.org/transit/StopMonitoring?api_key={api_key}&agency={operator_id}'
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.content.decode('utf-8-sig')
        return json.loads(data)
    

producer = KafkaProducer(bootstrap_servers='localhost:9092',
                         value_serializer=lambda v: json.dumps(v).encode('utf-8')
                        )
def produce_transit_data():
    data = get_transit_data()
    producer.send('rtsm', value=data)
    print("Transit data sent to Kafka")

produce_transit_data()

Transit data sent to Kafka


In [4]:
from kafka import KafkaProducer
import requests
import json

producer = KafkaProducer(bootstrap_servers='localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8'))
def get_traffic_data(origin, destination):
    api_key = 'apikey'
    url = f'https://maps.googleapis.com/maps/api/directions/json?origin={origin}&destination={destination}&key={api_key}'
    response = requests.get(url)
    return response.json()

def produce_traffic_data():
    origin = 'San Francisco'
    destination = 'Los Angeles'
    data = get_traffic_data(origin, destination)
    producer.send('traffic-data', value=data)
    print("Traffic data sent to Kafka")

produce_traffic_data()

Traffic data sent to Kafka


In [4]:
import os
os.environ['AWS_ACCESS_KEY'] = 'yourkey'
os.environ['AWS_SECRET_KEY'] = 'yourkey'

In [2]:
!pip install boto3

Defaulting to user installation because normal site-packages is not writeable


In [6]:
import boto3
from kafka import KafkaConsumer
import json
import os

# Initialize the S3 client
s3_client = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY'),
    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),
    region_name='us-east-1'
)
bucket_name = 'transitopti'

def upload_to_s3(data, file_name, folder):
    key = f"{folder}/{file_name}"
    s3_client.put_object(Bucket=bucket_name, Key=key, Body=json.dumps(data))
    print(f"Data uploaded to S3 as {key}")

def consume_kafka_topic(topic_name, group_id, folder_name):
    consumer = KafkaConsumer(
        topic_name,
        bootstrap_servers=['localhost:9092'],
        auto_offset_reset='earliest',  
        enable_auto_commit=True,
        group_id=group_id,  # Consumer group name
        value_deserializer=lambda x: json.loads(x.decode('utf-8')),
        consumer_timeout_ms = 5000
    )
    
    for message in consumer:
        data = message.value
        file_name = f"{topic_name}data{message.offset}.json"
        upload_to_s3(data, file_name, folder_name)
        print(f"Message with offset {message.offset} from {topic_name} uploaded to S3.")

def consume_traffic_data():
    consume_kafka_topic('traffic-data', 'traffic-consumer-group', 'traffic-data')

def consume_transit_data():
    consume_kafka_topic('transit-data', 'transit-consumer-group', 'transit-data')

def consume_weather_data():
    consume_kafka_topic('weather-data', 'weather-consumer-group', 'weather-data')
consume_weather_data()

Data uploaded to S3 as weather-data/weather-datadata0.json
Message with offset 0 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata1.json
Message with offset 1 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata2.json
Message with offset 2 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata3.json
Message with offset 3 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata0.json
Message with offset 0 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata1.json
Message with offset 1 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata2.json
Message with offset 2 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata3.json
Message with offset 3 from weather-data uploaded to S3.
Data uploaded to S3 as weather-data/weather-datadata4.json
Message with offset 4

In [8]:
import requests
import json

# Function to fetch transit stop monitoring data with format=json
def get_stop_monitoring_data():
    # Define the endpoint URL with all necessary parameters
    url = 'http://api.511.org/transit/StopMonitoring'
    params = {
        'api_key': 'apikey',
        'agency': 'AC',  # Replace 'AC' with the actual agency/operator ID
        'format': 'json'
    }
    
    try:
        # Send GET request with parameters
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Decode the response content explicitly using utf-8-sig
        data = response.content.decode('utf-8-sig')  # Handle UTF-8 BOM
        return json.loads(data)  # Parse the decoded JSON
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
    
# Fetch stop monitoring data and upload it to S3
def upload_stop_monitoring_data():
    data = get_stop_monitoring_data()
    if data:
        file_name = "stop_monitoring_data.json"
        upload_to_s3(data, file_name, 'stop-monitoring-data')
    else:
        print("No data to upload.")

# Call the function to upload stop monitoring data
upload_stop_monitoring_data()

Data uploaded to S3 as stop-monitoring-data/stop_monitoring_data.json


In [12]:
import requests
import json
from datetime import datetime

# Function to fetch transit stop monitoring data and calculate delays
def get_stop_monitoring_data_with_delays():
    # Define the endpoint URL with all necessary parameters
    url = 'http://api.511.org/transit/StopMonitoring'
    params = {
        'api_key': 'apikey',
        'agency': 'AC',  # Replace 'AC' with the actual agency/operator ID
        'format': 'json'
    }
    
    try:
        # Send GET request with parameters
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Decode the response content explicitly using utf-8-sig
        data = response.content.decode('utf-8-sig')  # Handle UTF-8 BOM
        parsed_data = json.loads(data)  # Parse the decoded JSON

        # Extract relevant information
        monitored_visits = (
            parsed_data.get("ServiceDelivery", {})
            .get("StopMonitoringDelivery", {})
            .get("MonitoredStopVisit", [])
        )

        # Process each monitored stop visit and calculate delays
        delays = []
        for visit in monitored_visits:
            journey = visit.get("MonitoredVehicleJourney", {})
            monitored_call = journey.get("MonitoredCall", {})

            # Extract times
            aimed_arrival = monitored_call.get("AimedArrivalTime")
            expected_arrival = monitored_call.get("ExpectedArrivalTime")
            aimed_departure = monitored_call.get("AimedDepartureTime")
            expected_departure = monitored_call.get("ExpectedDepartureTime")

            # Parse times and calculate delays
            arrival_delay = None
            departure_delay = None

            if aimed_arrival and expected_arrival:
                aimed_arrival_dt = datetime.fromisoformat(aimed_arrival.replace("Z", "+00:00"))
                expected_arrival_dt = datetime.fromisoformat(expected_arrival.replace("Z", "+00:00"))
                arrival_delay = (expected_arrival_dt - aimed_arrival_dt).total_seconds()

            if aimed_departure and expected_departure:
                aimed_departure_dt = datetime.fromisoformat(aimed_departure.replace("Z", "+00:00"))
                expected_departure_dt = datetime.fromisoformat(expected_departure.replace("Z", "+00:00"))
                departure_delay = (expected_departure_dt - aimed_departure_dt).total_seconds()

            # Append relevant details to the delays list
            delays.append({
                "StopPointName": monitored_call.get("StopPointName"),
                "AimedArrivalTime": aimed_arrival,
                "ExpectedArrivalTime": expected_arrival,
                "ArrivalDelaySeconds": arrival_delay,
                "AimedDepartureTime": aimed_departure,
                "ExpectedDepartureTime": expected_departure,
                "DepartureDelaySeconds": departure_delay
            })

        return delays

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
# Fetch stop monitoring data and upload it to S3
def upload_stop_monitoring_data_with_delays():
    data = get_stop_monitoring_data_with_delays()
    if data:
        file_name = "stop_monitoring_data.json"
        upload_to_s3(data, file_name, 'stop-monitoring-data')
    else:
        print("No data to upload.")

# Call the function to upload stop monitoring data
upload_stop_monitoring_data_with_delays()

Data uploaded to S3 as stop-monitoring-data/stop_monitoring_data.json
