Skip to content

Commit

Permalink
Add a generic VHS storage lookup helper
Browse files Browse the repository at this point in the history
  • Loading branch information
alexwlchan committed Oct 8, 2018
1 parent 549ccfc commit b592043
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 2 deletions.
54 changes: 54 additions & 0 deletions archive/archive_api/src/storage.py
@@ -0,0 +1,54 @@
# -*- encoding: utf-8

import json


class VHSNotFound(Exception):
"""
Raised if an item isn't found in VHS.
"""
pass


class VHSError(Exception):
"""
Raised if there was an unexpected error while reading from the VHS.
"""
pass


def read_from_vhs(dynamodb_resource, table_name, s3_client, bucket_name, id):
"""
Fetch the JSON-decoded contents of a resource from VHS.
This function assumes that the DynamoDB index table contains a "location"
field with a HybridRecord(namespace, key) instance.
"""
table = dynamodb_resource.Table(table_name)

try:
item_response = table.get_item(Key={"id": id})
except Exception as err:
raise VHSError(f"Error reading from DynamoDB: {err!r}")

try:
item = item_response["Item"]
except KeyError:
raise VHSNotFound(id)

try:
bucket = item["location"]["namespace"]
key = item["location"]["key"]
except KeyError:
raise VHSError(f"Malformed item in DynamoDB: {item!r}")

try:
body = s3_client.get_object(Bucket=bucket, Key=key)["Body"].read()
except Exception as err:
raise VHSError(f"Error retrieving from S3: {err!r}")

try:
return json.loads(body)
except ValueError as err:
raise VHSError(f"Error decoding S3 contents as JSON: {err!r}")
87 changes: 87 additions & 0 deletions archive/archive_api/src/tests/test_storage.py
@@ -0,0 +1,87 @@
# -*- encoding: utf-8

import json

import pytest

from storage import VHSNotFound, VHSError, read_from_vhs


def test_can_read_from_vhs(dynamodb_resource, vhs_table_name, s3_client, bucket):
vhs_value = {'id': '123'}

s3_client.put_object(Bucket=bucket, Key="123.txt", Body=json.dumps(vhs_value))

table = dynamodb_resource.Table(vhs_table_name)
table.put_item(
Item={"id": "123", "location": {"key": "123.txt", "namespace": bucket}}
)

resp = read_from_vhs(dynamodb_resource, vhs_table_name, s3_client, bucket, id="123")
assert resp == vhs_value


def test_dynamodb_error_is_vhserror(dynamodb_resource, s3_client):
with pytest.raises(VHSError, match="Error reading from DynamoDB"):
read_from_vhs(
dynamodb_resource, "no-such-table", s3_client, "no-such-bucket", id="123"
)


def test_missing_dynamodb_table_is_vhsnotfounderror(dynamodb_resource, vhs_table_name, s3_client):
with pytest.raises(VHSNotFound, match="123"):
read_from_vhs(
dynamodb_resource, vhs_table_name, s3_client, "no-such-bucket", id="123"
)


def test_malformed_dynamodb_row_is_vhserror(dynamodb_resource, vhs_table_name, s3_client):
table = dynamodb_resource.Table(vhs_table_name)
table.put_item(
Item={"id": "123", "location": {"k_y": "123.txt", "n_m_s_a_e": "bukkit"}}
)

with pytest.raises(VHSError, match="Malformed item in DynamoDB"):
read_from_vhs(dynamodb_resource, vhs_table_name, s3_client, "bukkit", id="123")


def test_missing_s3_object_is_vhserror(dynamodb_resource, vhs_table_name, s3_client):
table = dynamodb_resource.Table(vhs_table_name)
table.put_item(
Item={"id": "123", "location": {"key": "123.txt", "namespace": "bukkit"}}
)

with pytest.raises(VHSError, match="Error retrieving from S3"):
read_from_vhs(dynamodb_resource, vhs_table_name, s3_client, "bukkit", id="123")


def test_non_json_in_s3_is_vhserror(dynamodb_resource, vhs_table_name, s3_client, bucket):
s3_client.put_object(Bucket=bucket, Key="123.txt", Body="<<notJson>>")

table = dynamodb_resource.Table(vhs_table_name)
table.put_item(
Item={"id": "123", "location": {"key": "123.txt", "namespace": bucket}}
)

with pytest.raises(VHSError, match="Error decoding S3 contents as JSON"):
read_from_vhs(dynamodb_resource, vhs_table_name, s3_client, bucket, id="123")


@pytest.fixture
def vhs_table_name(dynamodb_client, random_alpha):
table_name = random_alpha

try:
dynamodb_client.create_table(
TableName=table_name,
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}],
ProvisionedThroughput={"ReadCapacityUnits": 1, "WriteCapacityUnits": 1},
)
dynamodb_client.get_waiter("table_exists").wait(TableName=table_name)
except dynamodb_client.exceptions.ResourceInUseException:
pass

yield table_name

dynamodb_client.delete_table(TableName=table_name)
5 changes: 3 additions & 2 deletions shared_conftest.py
Expand Up @@ -21,6 +21,7 @@ def pytest_runtest_setup(item):
boto3.setup_default_session(region_name="eu-west-1")


@pytest.fixture
def random_alpha():
return "".join(random.choice(string.ascii_lowercase) for _ in range(10))

Expand Down Expand Up @@ -257,14 +258,14 @@ def elasticsearch_url(docker_services, elasticsearch_hostname):


@pytest.fixture
def elasticsearch_index(docker_services, elasticsearch_url):
def elasticsearch_index(docker_services, elasticsearch_url, random_alpha):
docker_services.wait_until_responsive(
timeout=60.0,
pause=0.1,
check=_is_responsive(elasticsearch_url, lambda r: r.status_code == 401),
)

index_name = random_alpha()
index_name = random_alpha
resp = requests.put(
f"{elasticsearch_url}/{index_name}", auth=("elastic", "changeme")
)
Expand Down

0 comments on commit b592043

Please sign in to comment.