In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [3]:
# import csv
# bike_sharing = pd.read_csv(r'

### Bring in Capital Bike Share DataSet

In [4]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
bike_sharing = fetch_ucirepo(id=275) 
  
# data (as pandas dataframes) 
X = bike_sharing.data.features 
y = bike_sharing.data.targets 
  
# metadata 
print(bike_sharing.metadata) 
  
# variable information 
print(bike_sharing.variables) 

{'uci_id': 275, 'name': 'Bike Sharing', 'repository_url': 'https://archive.ics.uci.edu/dataset/275/bike+sharing+dataset', 'data_url': 'https://archive.ics.uci.edu/static/public/275/data.csv', 'abstract': 'This dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.', 'area': 'Social Science', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 17389, 'num_features': 13, 'feature_types': ['Integer', 'Real'], 'demographics': [], 'target_col': ['cnt'], 'index_col': ['instant'], 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2013, 'last_updated': 'Sun Mar 10 2024', 'dataset_doi': '10.24432/C5W894', 'creators': ['Hadi Fanaee-T'], 'intro_paper': {'ID': 422, 'type': 'NATIVE', 'title': 'Event labeling combining ensemble detectors and background knowledge', 'authors': 'Hadi Fanaee-T, João Gama', 'venue': 'Progress

In [5]:
X.head()

Unnamed: 0,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0
1,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0
2,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0
3,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0
4,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0


In [6]:
y.head()

Unnamed: 0,cnt
0,16
1,40
2,32
3,13
4,1


In [7]:
print(X)

           dteday  season  yr  mnth  hr  holiday  weekday  workingday  \
0      2011-01-01       1   0     1   0        0        6           0   
1      2011-01-01       1   0     1   1        0        6           0   
2      2011-01-01       1   0     1   2        0        6           0   
3      2011-01-01       1   0     1   3        0        6           0   
4      2011-01-01       1   0     1   4        0        6           0   
...           ...     ...  ..   ...  ..      ...      ...         ...   
17374  2012-12-31       1   1    12  19        0        1           1   
17375  2012-12-31       1   1    12  20        0        1           1   
17376  2012-12-31       1   1    12  21        0        1           1   
17377  2012-12-31       1   1    12  22        0        1           1   
17378  2012-12-31       1   1    12  23        0        1           1   

       weathersit  temp   atemp   hum  windspeed  
0               1  0.24  0.2879  0.81     0.0000  
1               1  0.

In [8]:
print(bike_sharing)

{'data': {'ids':        instant
0            1
1            2
2            3
3            4
4            5
...        ...
17374    17375
17375    17376
17376    17377
17377    17378
17378    17379

[17379 rows x 1 columns], 'features':            dteday  season  yr  mnth  hr  holiday  weekday  workingday  \
0      2011-01-01       1   0     1   0        0        6           0   
1      2011-01-01       1   0     1   1        0        6           0   
2      2011-01-01       1   0     1   2        0        6           0   
3      2011-01-01       1   0     1   3        0        6           0   
4      2011-01-01       1   0     1   4        0        6           0   
...           ...     ...  ..   ...  ..      ...      ...         ...   
17374  2012-12-31       1   1    12  19        0        1           1   
17375  2012-12-31       1   1    12  20        0        1           1   
17376  2012-12-31       1   1    12  21        0        1           1   
17377  2012-12-31       1   1    1

In [9]:
bike_sharing_concat = pd.concat([X,y],axis=1)

In [10]:
bike_sharing_concat

Unnamed: 0,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,16
1,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,40
2,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,32
3,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,13
4,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,2012-12-31,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,119
17375,2012-12-31,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,89
17376,2012-12-31,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,90
17377,2012-12-31,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,61
