# Fetch testing

Testing ground for `fetch.py` script

In [6]:
import os
from pathlib import Path
from dotenv import load_dotenv
from google.cloud import storage
from google.cloud.storage import Client, Blob


In [3]:
load_dotenv()
BUCKET = os.getenv("BUCKET")
PROJECT_ID = os.getenv("PROJECT_ID")
bucket_url = f"gs://{BUCKET}"
fname = "calls.csv"
csvpath = os.path.join(bucket_url, fname)
csvpath

'gs://service-data-lake/calls.csv'

Test whether gcs object exists

In [4]:
gcs = Client(project=PROJECT_ID)

In [5]:
for b in gcs.list_buckets():
    print(b)

<Bucket: service-data-lake>
<Bucket: tf-state-service>


In [14]:
bucket_name = "service-calls-data-lake"
# fpath = Path("../data/city-wards-boundary.geojson")
fpath = Path("../data/toronto_fsa.geojson")
# use os.path.join to avoid messing with gs:// double slash
# do not need bucket name in beginning
gcsobj = os.path.join("geojson", fpath.name)
bucket_client = gcs.bucket(bucket_name)
blob_client = bucket_client.blob(gcsobj)
blob_client.upload_from_filename(fpath)

In [15]:
from tempfile import TemporaryDirectory

In [16]:
with TemporaryDirectory() as tmp1:
    print(tmp1)
    with TemporaryDirectory() as tmp2:
        print(tmp2)

/tmp/tmpdlxb1lmf
/tmp/tmp1juacnbk


In [17]:
import pandas as pd

In [18]:
df = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"])
gcspq = f"gs://{bucket_name}/raw/pq/test.parquet"
df.to_parquet(path=gcspq)

Testing for errors when parsing ward name and ID number

In [11]:
ward = "some msg"
try:
    idx = ward.index("(")
    ward_name = ward[: idx - 1]
    ward_id = int(ward[idx + 1 : idx + 3])

except ValueError as e:
    print("error: ", e)
    if "substring not found" in str(e):
        print("Ward field did not have '(' to search for ID")
        ward_name = "None"
        ward_id = 0
finally:
    print(ward_name, ward_id)

error:  substring not found
Ward field did not have '(' to search for ID
None 0


In [14]:
try:
    raise ValueError("substring not found")
except ValueError as e:
    print(f"msg: {e}\ntype: {type(e)}\nstr: {str(e)}\nrepr: {repr(e)}")
    if "substring not found" in repr(e):
        print("yay")

msg: substring not found
type: <class 'ValueError'>
str: substring not found
repr: ValueError('substring not found')
yay


In [9]:
print(dir(ValueError()))

['__cause__', '__class__', '__context__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__suppress_context__', '__traceback__', 'args', 'with_traceback']


## storage.Blob.exists

What if there are no files?

In [7]:
blob_path = "not_real"
bucket = gcs.bucket(bucket_name=BUCKET)
exists = storage.Blob(bucket=bucket, name=blob_path).exists(client=gcs)

In [8]:
exists

False

In [10]:
try:
    foo = gcs.bucket(bucket_name="")
except IndexError as e:
    if "out of range" in repr(e):
        print("Empty string")

Empty string
