In [1]:
from google.cloud import bigquery
import os

from function.config import Config

## Check data in BigQuery

In [4]:
# Create a BigQuery client

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = Config.GOOGLE_CREDENTIALS

client = bigquery.Client()

# request to check the data
query = f"""
WITH MaxTimestamp AS (
    SELECT MAX(record_timestamp) AS max_timestamp
    FROM `{Config.PROJECT_NAME}.{Config.DATASET_ID}.{Config.TABLE_ID}`
)
SELECT *
FROM `{Config.PROJECT_NAME}.{Config.DATASET_ID}.{Config.TABLE_ID}`
WHERE record_timestamp = (SELECT max_timestamp FROM MaxTimestamp)
ORDER BY record_timestamp DESC
"""

query_job = client.query(query)
df = query_job.to_dataframe()
df

Unnamed: 0,station_id,etat,nb_velos_dispo,nb_places_dispo,etat_connexion,derniere_maj,record_timestamp
0,1,RÉFORMÉ,0,0,DÉCONNECTÉ,2022-11-29 10:47:16.181000+00:00,2024-08-29 09:58:06+00:00
1,105,RÉFORMÉ,0,0,DÉCONNECTÉ,2022-11-29 10:47:16.183000+00:00,2024-08-29 09:58:06+00:00
2,106,RÉFORMÉ,0,0,DÉCONNECTÉ,2022-11-29 10:47:16.183000+00:00,2024-08-29 09:58:06+00:00
3,109,RÉFORMÉ,0,0,DÉCONNECTÉ,2022-11-29 10:47:16.184000+00:00,2024-08-29 09:58:06+00:00
4,112,RÉFORMÉ,0,0,DÉCONNECTÉ,2022-11-29 10:47:16.184000+00:00,2024-08-29 09:58:06+00:00
...,...,...,...,...,...,...,...
284,274,EN SERVICE,8,12,CONNECTÉ,2024-08-29 09:57:15.848000+00:00,2024-08-29 09:58:06+00:00
285,275,IN_MAINTENANCE,1,3,DÉCONNECTÉ,2024-08-29 09:57:15.848000+00:00,2024-08-29 09:58:06+00:00
286,300,EN SERVICE,19,1,CONNECTÉ,2024-08-29 09:57:15.859000+00:00,2024-08-29 09:58:06+00:00
287,301,EN SERVICE,4,16,CONNECTÉ,2024-08-29 09:57:15.860000+00:00,2024-08-29 09:58:06+00:00


## Populate 'stations' table

In [7]:
from google.cloud import bigquery
import requests
import os
import sys

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "function/key-vlille-gcp-etl.json"
project_id = "vlille-gcp-etl"
dataset_id = "vlille_dataset"
client = bigquery.Client()

# empty the table
query = f"""
DELETE FROM `{project_id}.{dataset_id}.stations`
"""
query_job = client.query(query)

In [9]:
# Populate the stations table once with a query:
url = "https://data.lillemetropole.fr/geoserver/wfs?SERVICE=WFS&REQUEST=GetFeature&VERSION=2.0.0&TYPENAMES=dsp_ilevia%3Avlille_temps_reel&OUTPUTFORMAT=application%2Fjson"
response = requests.get(url)
data = response.json()

# Get the 42th station
data['features'][42]

{'type': 'Feature',
 'id': 'vlille_temps_reel.35',
 'geometry': {'type': 'Point', 'coordinates': [3.043307, 50.62899]},
 'geometry_name': 'geom',
 'properties': {'objectid': 35,
  'nom': 'LECLERC',
  'adresse': 'Place du Maréchal Leclerc',
  'code_insee': None,
  'commune': 'Lille',
  'etat': 'EN SERVICE',
  'type': 'AVEC TPE',
  'nb_places_dispo': 15,
  'nb_velos_dispo': 11,
  'etat_connexion': 'CONNECTÉ',
  'x': 3.043307,
  'y': 50.62899,
  'date_modification': '2024-08-29T09:59:16.890Z'}}

In [10]:
rows_to_insert = []
for record in data["features"]:
    rows_to_insert.append(
        (
            record["properties"]["objectid"], # id = objectid
            record["properties"]["nom"],
            record["properties"]["adresse"],
            record["properties"]["commune"],
            record["properties"]["type"],
            record["properties"]["x"],
            record["properties"]["y"],
        )
    )


table_id = project_id + '.' + dataset_id + '.stations'
try:
    table = client.get_table(table_id)
    client.insert_rows(table, rows_to_insert)
    print("Station's rows inserted into table {}".format(table_id))
except Exception as e:
    print(e)

Station's rows inserted into table vlille-gcp-etl.vlille_dataset.stations
