In [15]:
import pandas as pd
import altair as alt

# Read the dataset
url = 'https://raw.githubusercontent.com/yuwangy/Seoul_bike_rental_viz/main/SeoulBikeData.csv'
data = pd.read_csv(url, encoding='latin-1')
data.head()

# Add 'Month' as ordinal attribute
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

# Create 'Month' column based on date
data['Month'] = data['Date'].dt.month

# Add 'Weekend' as nominal attribute, determine if the date is on a weekend 
data['Weekend'] = data['Date'].dt.dayofweek.apply(lambda x: 'Yes' if x >= 5 else 'No')

data.head(10)



Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day,Month,Weekend
0,2017-12-01,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
1,2017-12-01,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
2,2017-12-01,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
3,2017-12-01,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
4,2017-12-01,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
5,2017-12-01,100,5,-6.4,37,1.5,2000,-18.7,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
6,2017-12-01,181,6,-6.6,35,1.3,2000,-19.5,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
7,2017-12-01,460,7,-7.4,38,0.9,2000,-19.3,0.0,0.0,0.0,Winter,No Holiday,Yes,12,No
8,2017-12-01,930,8,-7.6,37,1.1,2000,-19.8,0.01,0.0,0.0,Winter,No Holiday,Yes,12,No
9,2017-12-01,490,9,-6.5,27,0.5,1928,-22.4,0.23,0.0,0.0,Winter,No Holiday,Yes,12,No


In [16]:
# Print the shape
print(data.shape)

# Print the info
print(data.info())

# Print the names of the columns
print(data.columns)



(8760, 16)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Date                       8760 non-null   datetime64[ns]
 1   Rented Bike Count          8760 non-null   int64         
 2   Hour                       8760 non-null   int64         
 3   Temperature(°C)            8760 non-null   float64       
 4   Humidity(%)                8760 non-null   int64         
 5   Wind speed (m/s)           8760 non-null   float64       
 6   Visibility (10m)           8760 non-null   int64         
 7   Dew point temperature(°C)  8760 non-null   float64       
 8   Solar Radiation (MJ/m2)    8760 non-null   float64       
 9   Rainfall(mm)               8760 non-null   float64       
 10  Snowfall (cm)              8760 non-null   float64       
 11  Seasons                    8760 non-null   object        


In [17]:
# Obtaining the extremes in the data, finding the range of various data attribute types

# Extract maximum and minimum Rented Bike Count
max_rented_bike_count = data['Rented Bike Count'].max()
print(f'maximum bikes rented: {max_rented_bike_count}')
min_rented_bike_count = data['Rented Bike Count'].min()
print(f'minimum bikes rented: {min_rented_bike_count}')

# Extract highest and lowest temperature
highest_temperature = data['Temperature(°C)'].max()
print(f'highest temperature: {highest_temperature}')
lowest_temperature = data['Temperature(°C)'].min()
print(f'lowest temperature: {lowest_temperature}')

# Extract highest and lowest humidity
highest_humidity = data['Humidity(%)'].max()
print(f'highest humidity: {highest_humidity}')
lowest_humidity = data['Humidity(%)'].min()
print(f'lowest humidit: {lowest_humidity}')

# Extract maximum and minimum wind speed
max_wind_speed = data['Wind speed (m/s)'].max()
print(f'maximum wind speed: {max_wind_speed}')
min_wind_speed = data['Wind speed (m/s)'].min()
print(f'minimum wind speed: {min_wind_speed}')

# Extract maximum and minimum visibility
max_visibility = data['Visibility (10m)'].max()
print(f'maximum visibility: {max_visibility}')
min_visibility = data['Visibility (10m)'].min()
print(f'minimum visibility: {min_visibility}')

# Extract highest and lowest dew point temperature
highest_dew_point_temperature = data['Dew point temperature(°C)'].max()
print(f'highest dew point temperature: {highest_dew_point_temperature}')
lowest_dew_point_temperature = data['Dew point temperature(°C)'].min()
print(f'lowest dew point temperature: {lowest_dew_point_temperature}')

# Extract maximum and minimum solar radiation
max_solar_radiation = data['Solar Radiation (MJ/m2)'].max()
print(f'maximum solar radiation: {max_solar_radiation}')
min_solar_radiation = data['Solar Radiation (MJ/m2)'].min()
print(f'minimum solar radiation: {min_solar_radiation}')

# Extract maximum and minimum rainfall
max_rainfall = data['Rainfall(mm)'].max()
print(f'maximum rainfall: {max_rainfall}')
min_rainfall = data['Rainfall(mm)'].min()
print(f'minimum rainfall: {min_rainfall}')

# Extract maximum and minimum snowfall
max_snowfall = data['Snowfall (cm)'].max()
print(f'maximum snowfall: {max_snowfall}')
min_snowfall = data['Snowfall (cm)'].min()
print(f'minimum snowfall: {min_snowfall}')



maximum bikes rented: 3556
minimum bikes rented: 0
highest temperature: 39.4
lowest temperature: -17.8
highest humidity: 98
lowest humidit: 0
maximum wind speed: 7.4
minimum wind speed: 0.0
maximum visibility: 2000
minimum visibility: 27
highest dew point temperature: 27.2
lowest dew point temperature: -30.6
maximum solar radiation: 3.52
minimum solar radiation: 0.0
maximum rainfall: 35.0
minimum rainfall: 0.0
maximum snowfall: 8.8
minimum snowfall: 0.0


In [22]:
# Create data abstraction for Seoul Bike Rental Dataset in the form of csv table

url = 'https://raw.githubusercontent.com/yuwangy/Seoul_bike_rental_viz/main/Seoul%20Bike%20Rental%20Data%20Abstraction.csv'
data_abstraction = pd.read_csv(url)
data_abstraction



Unnamed: 0.1,Unnamed: 0,Semantics,Attribute Type,Cardinality
0,Date,The specific day of the month of the year,Temporal,01-12-2017 to 30-11-2018
1,Rented Bike Count,The specific number of bikes rented in given hour,Quantitative,0 - 3556 bikes rented
2,Hour,The specific hour of the day ...,Ordinal,24
3,Temperature (°C),The temperature in given hour,Quantitative,-17.8 - 39.4 °C
4,Humidity (%),The humidity in given hour,Quantitative,0 - 98 % humidity
5,Wind speed (m/s),The wind speed in given hour,Quantitative,0 - 7.4 m/s wind speed
6,Visibility (10m),The visibility (how far you can see clearly) i...,Quantitative,27 - 2000 (10m) visibility
7,Dew point temperature (°C),The dew point temperature in given hour,Quantitative,-30.6 - 27.2 °C dew point temperature
8,Solar Radiation (MJ/m2),The amount of solar radiation in given hour,Quantitative,0 - 3.52 (MJ/m2)
9,Rainfall (mm),The amount of rain in given hour,Quantitative,0 - 35 (mm)
