In [331]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np    


In [332]:
#clean meteorological data time stamps and eliminate seemingly irrelevant columns
meteorological_df = pd.read_excel('hw03-data/Sensor Data/Meteorological Data.xlsx')
if 'Date' in meteorological_df.columns:
    meteorological_df['timeStamp'] = pd.to_datetime(meteorological_df['Date'], errors='coerce')
else:
    print('Column "Date" not found')
meteorological_df = meteorological_df.drop(columns=['Date'])
meteorological_df = meteorological_df.drop(columns=['Unnamed: 3'])
meteorological_df = meteorological_df.drop(columns=['Elevation (m)'])

meteorological_df.head()

Unnamed: 0,Wind Direction,Wind Speed (m/s),timeStamp
0,190.5,4.0,2016-04-01 00:00:00
1,203.3,5.0,2016-04-01 03:00:00
2,201.1,5.2,2016-04-01 06:00:00
3,204.9,4.1,2016-04-01 09:00:00
4,207.0,3.6,2016-04-01 12:00:00


In [333]:
if 'Wind Direction' in meteorological_df.columns:
    meteorological_df['direction'] = (meteorological_df['Wind Direction'] / 45).round()

meteorological_df = meteorological_df[~meteorological_df['direction'].isna()]

#meteorological_df['direction'] = meteorological_df['direction'].replace({9.0: 'B'})
meteorological_df['direction'] = meteorological_df['direction'].replace({1.0: 'N'})
meteorological_df['direction'] = meteorological_df['direction'].replace({2.0: 'NE'})
meteorological_df['direction'] = meteorological_df['direction'].replace({3.0: 'E'})
meteorological_df['direction'] = meteorological_df['direction'].replace({4.0: 'SE'})    
meteorological_df['direction'] = meteorological_df['direction'].replace({5.0: 'S'})
meteorological_df['direction'] = meteorological_df['direction'].replace({6.0: 'SW'})   
meteorological_df['direction'] = meteorological_df['direction'].replace({7.0: 'W'})
meteorological_df['direction'] = meteorological_df['direction'].replace({8.0: 'NW'})

# Create colors column
meteorological_df['colors'] = meteorological_df['direction'].apply(lambda x: color_mapping.get(x, 'gray'))
""" for index, row in meteorological_df.iterrows():
    if row['direction'] == 0.0:
        print(row['Wind Direction'])
        print(row['Wind Direction']/45) """

#meteorological_df = meteorological_df[meteorological_df['colors'] != 'gray']

#meteorological_df.head()
# Check unique values in the direction column
unique_directions = meteorological_df['direction'].unique()
print("Unique Directions:", unique_directions)

Unique Directions: ['SE' 'S' 'SW' 'W' 'NW' 0.0 'E' 'N' 'NE']


In [334]:
#clean sensor data time stamps 
sensor_df = pd.read_excel('hw03-data/Sensor Data/Sensor Data.xlsx')
if 'Date Time ' in sensor_df.columns:
    sensor_df['timeStamp'] = pd.to_datetime(sensor_df['Date Time '], errors='coerce')
else:
    print('Column "Date Time" not found')
sensor_df = sensor_df.drop(columns=['Date Time '])
#sensor_df.head()

In [335]:
# Count unique dates in both DataFrames
sensor_dates = sensor_df['timeStamp'].dt.date.unique()
meteorological_dates = meteorological_df['timeStamp'].dt.date.unique()

In [336]:
# Find common dates
common_dates = set(sensor_dates) & set(meteorological_dates)

# Filter both DataFrames to keep only rows with common dates
sensor_df_filtered = sensor_df[sensor_df['timeStamp'].dt.date.isin(common_dates)]
meteorological_df_filtered = meteorological_df[meteorological_df['timeStamp'].dt.date.isin(common_dates)]

In [337]:
# Count unique dates in both DataFrames
sensor_date_count = sensor_df_filtered['timeStamp'].dt.date.nunique()
meteorological_date_count = meteorological_df_filtered['timeStamp'].dt.date.nunique()

# Print the counts
print(f"Unique Dates in Sensor Data: {sensor_date_count}")
print(f"Unique Dates in Meteorological Data: {meteorological_date_count}")

Unique Dates in Sensor Data: 89
Unique Dates in Meteorological Data: 89


In [338]:
# Group by date and calculate the average wind direction and speed
meteorological_df_filtered['day'] = meteorological_df_filtered['timeStamp'].dt.round('1d')
average_meteorological_df_filtered = meteorological_df_filtered.groupby('day').size().reset_index(name='Calls')
average_meteorological_df_filtered = meteorological_df_filtered.groupby('day').agg(
    Average_Wind_Direction=('Wind Direction', 'mean'),
    Average_Wind_Speed=('Wind Speed (m/s)', 'mean')
).reset_index()
average_meteorological_df_filtered.head()

Unnamed: 0,day,Average_Wind_Direction,Average_Wind_Speed
0,2016-04-01,201.36,4.38
1,2016-04-02,257.028571,2.328571
2,2016-04-03,262.266667,2.644444
3,2016-04-04,299.128571,3.871429
4,2016-04-05,301.844444,1.522222


In [342]:

# Load the sensor factory data
sensor_factory_df = pd.read_excel('sensors_factories.xlsx')
sensor_factory_df['x'] = sensor_factory_df['x'] - 100
sensor_factory_df['y'] = sensor_factory_df['y'] - 100

# Create a scatter plot for sensor factories
fig = px.scatter(
    x=sensor_factory_df['x'], 
    y=sensor_factory_df['y'], 
    color=sensor_factory_df['Type'], 
    text=sensor_factory_df['Label']
)

# Update layout for scatter plot
fig.update_layout(
    height=1000, 
    width=1000, 
    title_text="Average Wind Direction Per Day with Factories and Sensors", 
    yaxis=dict(range=[-100, 100], title='Latitude'),
    xaxis=dict(range=[-100, 100], title='Longitude'),
)

fig.update_traces(textposition='top center')

# Generate x values for the lines
x_line = np.linspace(-100, 100, 100)  # Adjust range as needed

# Initialize a set to keep track of added directions
added_directions = set()

# Add lines to the figure
for index, row in meteorological_df_filtered.iterrows():
    slope = row['Wind Direction'] 
    line_color = row['colors']  
    direction = row['direction'] 

    # Calculate the y values based on the slope
    y_line = slope * x_line

    # Check if the direction has already been added to the legend
    if direction not in added_directions:
        fig.add_trace(go.Scatter( 
            x=x_line,  # X values for the line
            y=y_line,  # Y values calculated from the slope
            mode='lines',  # Draw as a line
            line=dict(color=line_color, width=1),  # Set the line color
            name=direction,  # Use direction as the legend entry
            showlegend=True  # Show legend for this line
        ))
        added_directions.add(direction)  # Add direction to the set
    else:
        # If the direction is already in the legend, we can add a line without a legend entry
        fig.add_trace(go.Scatter(
            x=x_line,  # X values for the line
            y=y_line,  # Y values calculated from the slope
            mode='lines',  
            line=dict(color=line_color, width=2),  # Set the line color
            name='',  # No legend entry for these lines
            showlegend=False  # Do not show in the legend
        ))

# Show the plot
fig.show()
