In [9]:
from google.cloud import bigquery
from datetime import datetime, timedelta
import pandas as pd
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../key-vlille-gcp.json"

client = bigquery.Client()

In [39]:
query = """
select 
  TIMESTAMP_ADD(record_timestamp, INTERVAL 2 HOUR) AS record_timestamp_ptz,
  latitude, longitude,
  (nb_velos_dispo/ (nb_velos_dispo + nb_places_dispo)) as taux_velos_dispo
from 
  `vlille_gcp_dataset.records`, `vlille_gcp_dataset.stations`
WHERE 
  id = station_id
  AND record_timestamp >= TIMESTAMP_SUB('2023-10-20', INTERVAL 2 HOUR)
  AND record_timestamp < TIMESTAMP_SUB('2023-10-20', INTERVAL 1 HOUR)
  AND etat = 'EN SERVICE'
ORDER BY
  record_timestamp_ptz ASC
"""

query_job = client.query(query)
rows = query_job.result()

In [40]:
data = [(row.record_timestamp_ptz, row.latitude, row.longitude, row.taux_velos_dispo) for row in rows]
df = pd.DataFrame(data, columns=['record_timestamp_ptz', 'latitude', 'longitude', 'taux_velos_dispo'])

In [41]:
# group the df by record_timestamp_ptz
df = df.groupby('record_timestamp_ptz').agg({
    'latitude': lambda x: list(x),
    'longitude': lambda x: list(x),
    'taux_velos_dispo': lambda x: list(x)
})

In [44]:
df.index = pd.to_datetime(df.index)

In [46]:
df.index = df.index.strftime('%Y-%m-%d %H:%M')

In [55]:
# convert the index to datetimeindex
df.index = pd.to_datetime(df.index)

In [56]:
# for each timestamp index of the df, if the next index is more than 1 minute away, add a new row with the same data and the next timestamp (1min added)
df = df.resample('1min').ffill()

In [None]:
df.index = df.index.strftime('%Y-%m-%d %H:%M')

In [58]:
df.to_json('data.json', orient='index')