In [10]:
from google.cloud import bigquery
import os

from function.config import Config

## Check data in BigQuery

In [16]:
# Create a BigQuery client
client = bigquery.Client()

# request to check the data
query = f"""
WITH MaxTimestamp AS (
    SELECT MAX(record_timestamp) AS max_timestamp
    FROM `{Config.PROJECT_NAME}.{Config.DATASET_ID}.{Config.TABLE_ID}`
)
SELECT *
FROM `{Config.PROJECT_NAME}.{Config.DATASET_ID}.{Config.TABLE_ID}`
WHERE record_timestamp = (SELECT max_timestamp FROM MaxTimestamp)
ORDER BY record_timestamp DESC
"""

query_job = client.query(query)
df = query_job.to_dataframe()
df.sample(5)

Unnamed: 0,station_id,etat,nb_velos_dispo,nb_places_dispo,etat_connexion,derniere_maj,record_timestamp
29,25,EN SERVICE,13,19,CONNECTÉ,2024-08-17 20:05:14.492000+00:00,2024-08-17 20:07:01+00:00
217,188,EN SERVICE,9,3,CONNECTÉ,2024-08-17 20:05:14.751000+00:00,2024-08-17 20:07:01+00:00
262,242,EN SERVICE,8,9,CONNECTÉ,2024-08-17 20:05:14.814000+00:00,2024-08-17 20:07:01+00:00
28,306,EN SERVICE,7,13,CONNECTÉ,2024-08-17 20:05:14.836000+00:00,2024-08-17 20:07:01+00:00
211,179,EN SERVICE,17,3,CONNECTÉ,2024-08-17 20:05:14.748000+00:00,2024-08-17 20:07:01+00:00


## Populate 'stations' table

In [9]:
from google.cloud import bigquery
import requests
import os
import sys

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "function/key-vlille-gcp-etl.json"
project_id = "vlille-gcp-etl"
dataset_id = "vlille_dataset"
client = bigquery.Client()

# Populate the stations table once with a query:
url = "https://data.lillemetropole.fr/geoserver/wfs?SERVICE=WFS&REQUEST=GetFeature&VERSION=2.0.0&TYPENAMES=dsp_ilevia%3Avlille_temps_reel&OUTPUTFORMAT=application%2Fjson"
response = requests.get(url)
data = response.json()

# Get the 42th station
data['features'][42]['properties']

{'objectid': 184,
 'nom': 'RUE DU PARC',
 'adresse': '20 rue du Parc',
 'code_insee': None,
 'commune': 'La Madeleine',
 'etat': 'EN SERVICE',
 'type': 'AVEC TPE',
 'nb_places_dispo': 8,
 'nb_velos_dispo': 4,
 'etat_connexion': 'CONNECTÉ',
 'x': 3.0678,
 'y': 50.651107,
 'date_modification': '2024-08-17T20:05:14.750Z'}

In [5]:
rows_to_insert = []
for record in data["features"]:
    rows_to_insert.append(
        (
            record["properties"]["objectid"], # id = objectid
            record["properties"]["nom"],
            record["properties"]["adresse"],
            record["properties"]["commune"],
            record["properties"]["type"],
            record["geometry"]["coordinates"][1],
            record["geometry"]["coordinates"][0],
        )
    )


table_id = project_id + '.' + dataset_id + '.stations'
try:
    table = client.get_table(table_id)
    client.insert_rows(table, rows_to_insert)
    print("Station's rows inserted into table {}".format(table_id))
except Exception as e:
    print(e)

Station's rows inserted into table vlille-gcp-etl.vlille_dataset.stations
