In [17]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
import glob
import warnings
from sklearn.linear_model import LinearRegression

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'

## Getting the data

In [2]:
filenamesList = glob.glob('ADFC/*.csv')

In [3]:
list_of_dfs = []
for filename in filenamesList:
    df_data = pd.read_csv(filename, index_col=0)
    df_data['date'] = pd.to_datetime(df_data['date'])
    df_data = df_data.drop('timestamp', 1)
    df_data['group'] = filename
    list_of_dfs.append(df_data)
df = pd.concat(list_of_dfs)

In [4]:
df["number"] = df["group"].str.extract('(\d+)', expand=True)
df['number'] = df['number'].astype(float)

In [5]:
eco_bike_data = pd.read_excel('ADFC/fetched_eco_bike_data.xlsx')

In [6]:
merged_df = pd.merge(df, eco_bike_data, on='number', how='left')

In [7]:
merged_df

Unnamed: 0,date,comptage,group,number,city,latitude,longitude,id_list,Distance to center [km],fetched_url
0,2017-11-28 00:00:00,0,ADFC/Rostock_11.csv,11.0,Berlin,52.457758,13.519360,100032237.0,11.0239,https://data.eco-counter.com/public2/?id=10003...
1,2017-11-28 00:00:00,0,ADFC/Rostock_11.csv,11.0,Düsseldorf,51.231660,6.763200,100047280.0,1.1481,https://data.eco-counter.com/public2/?id=10004...
2,2017-11-28 00:00:00,0,ADFC/Rostock_11.csv,11.0,Köln,50.920549,6.944770,100045094.0,2.2490,https://data.eco-counter.com/public2/?id=10004...
3,2017-11-28 00:00:00,0,ADFC/Rostock_11.csv,11.0,Rostock,54.184589,12.237238,100041597.0,12.4519,https://data.eco-counter.com/public2/?id=10004...
4,2017-11-28 00:00:00,0,ADFC/Rostock_11.csv,11.0,Stuttgart,48.784240,9.147031,100063203.0,2.5009,https://data.eco-counter.com/public2/?id=10006...
...,...,...,...,...,...,...,...,...,...,...
84226512,2021-09-29 23:45:00,3,ADFC/Heidelberg_1.csv,1.0,Köln,50.913017,6.948681,100019755.0,2.9272,https://data.eco-counter.com/public2/?id=10001...
84226513,2021-09-29 23:45:00,3,ADFC/Heidelberg_1.csv,1.0,Rostock,54.102950,12.074080,100005392.0,3.7428,https://data.eco-counter.com/public2/?id=10000...
84226514,2021-09-29 23:45:00,3,ADFC/Heidelberg_1.csv,1.0,Stuttgart,48.799126,9.210372,100034882.0,3.1989,https://data.eco-counter.com/public2/?id=10003...
84226515,2021-09-29 23:45:00,3,ADFC/Heidelberg_1.csv,1.0,Osnabrück,52.279965,8.055056,100050631.0,1.0234,https://data.eco-counter.com/public2/?id=10005...


In [15]:
#merged_df.groupby(merged_df['date']).agg({'comptage': 'sum', 'Distance to center [km]': 'mean'})

In [12]:
final_df = merged_df.groupby(merged_df['Distance to center [km]']).agg({'comptage': 'sum'})

In [28]:
final_df['distance_km'] = final_df.index
final_df

Unnamed: 0_level_0,comptage,distance_km
Distance to center [km],Unnamed: 1_level_1,Unnamed: 2_level_1
0.0691,54831590,0.0691
0.1432,54831590,0.1432
0.2782,10889367,0.2782
0.3123,54831590,0.3123
0.3506,54831590,0.3506
...,...,...
11.4888,5415041,11.4888
11.4972,21590214,11.4972
11.8009,9814600,11.8009
12.4519,11095313,12.4519


In [55]:
fig = px.line(final_df, x='distance_km', y='comptage',
              labels={
                     "distance_km": "Distance to center [km]",
                     "comptage": "comptage"
                 })
fig.show()

In [63]:
fig = px.scatter(final_df, x='distance_km', y='comptage', trendline="ols", labels={
                     "distance_km": "Distance to center [km]",
                     "comptage": "comptage"
                 }, trendline_scope="overall", title="Comptage against Distance to center")
#fig.update_traces(mode = 'lines')
fig.show()

In [44]:
model = LinearRegression()
model.fit(final_df[['distance_km']], final_df[['comptage']])

LinearRegression()

In [45]:
y_pred = model.predict(final_df[['distance_km']])
x_values = final_df['distance_km']