Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refact bot_data, sending file to aws s3 #284

Merged
merged 3 commits into from
Sep 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ drf-yasg = "*"
gunicorn = "*"
gevent = "*"
packaging = "*"
boto3 = "*"

[dev-packages]
"flake8" = "*"
Expand Down
60 changes: 53 additions & 7 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,7 @@ You can set environment variables in your OS, write on ```.env``` file or pass v
| BOTHUB_NLP_BASE_URL | ```string``` | ```http://localhost:2657/``` | The bothub-blp production application URL. Used to proxy requests.
| CHECK_ACCESSIBLE_API_URL | ```string``` | ```http://localhost/api/repositories/``` | URL used by ```bothub.health.check.check_accessible_api``` to make a HTTP request. The response status code must be 200.
| SEND_EMAILS | ```boolean``` | ```True``` | Send emails flag.
| BOTHUB_ENGINE_AWS_S3_BUCKET_NAME | ```string``` | ```None``` |
| BOTHUB_ENGINE_AWS_ACCESS_KEY_ID | ```string``` | ```None``` |
| BOTHUB_ENGINE_AWS_SECRET_ACCESS_KEY | ```string``` | ```None``` |
| BOTHUB_ENGINE_AWS_REGION_NAME | ```string``` | ```None``` |
14 changes: 11 additions & 3 deletions bothub/api/v2/nlp/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import json
import requests

from django.db import models
from django.utils.translation import gettext_lazy as _
Expand All @@ -24,6 +25,7 @@
from bothub.common.models import RepositoryUpdate
from bothub.common.models import Repository
from bothub.common import languages
from bothub.utils import send_bot_data_file_aws


def check_auth(request):
Expand Down Expand Up @@ -499,18 +501,24 @@ class RepositoryUpdateInterpretersViewSet(
def retrieve(self, request, *args, **kwargs):
check_auth(request)
update = self.get_object()
try:
download = requests.get(update.bot_data)
bot_data = base64.b64encode(download.content)
except Exception:
bot_data = b''
return Response({
'update_id': update.id,
'repository_uuid': update.repository.uuid,
'bot_data': str(update.bot_data)
'bot_data': str(bot_data)
})

def create(self, request, *args, **kwargs):
check_auth(request)
id = request.data.get('id')
repository = get_object_or_404(
RepositoryUpdate,
pk=request.data.get('id')
pk=id
)
bot_data = base64.b64decode(request.data.get('bot_data'))
repository.save_training(bot_data)
repository.save_training(send_bot_data_file_aws(id, bot_data))
return Response({})
33 changes: 33 additions & 0 deletions bothub/common/migrations/0035_auto_20190902_1455.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 2.1.5 on 2019-09-02 14:55

from django.db import migrations, models
from bothub.utils import send_bot_data_file_aws
from bothub.common.models import RepositoryUpdate


def update_repository(apps, schema_editor):
for update in RepositoryUpdate.objects.all().exclude(bot_data__exact=''):
url = send_bot_data_file_aws(update.pk, update.bot_data)
repository_update = RepositoryUpdate.objects.get(pk=update.pk)
repository_update.bot_data = url
repository_update.save(
update_fields=[
'bot_data',
])
print('Updating bot_data repository_update {}'.format(str(update.pk)))


class Migration(migrations.Migration):

dependencies = [
('common', '0034_repository_nlp_server'),
]

operations = [
migrations.RunPython(update_repository),
migrations.AlterField(
model_name='repositoryupdate',
name='bot_data',
field=models.URLField(blank=True, verbose_name='bot data'),
),
]
10 changes: 4 additions & 6 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import uuid
import base64
import requests

from functools import reduce
Expand Down Expand Up @@ -489,10 +488,9 @@ class Meta:
created_at = models.DateTimeField(
_('created at'),
auto_now_add=True)
bot_data = models.TextField(
bot_data = models.URLField(
_('bot data'),
blank=True,
editable=False)
blank=True)
by = models.ForeignKey(
User,
models.CASCADE,
Expand Down Expand Up @@ -660,7 +658,7 @@ def save_training(self, bot_data):
raise RepositoryUpdateAlreadyTrained()

self.trained_at = timezone.now()
self.bot_data = base64.b64encode(bot_data).decode('utf8')
self.bot_data = bot_data
self.repository.total_updates += 1
self.repository.save()
self.save(
Expand All @@ -670,7 +668,7 @@ def save_training(self, bot_data):
])

def get_bot_data(self):
return base64.b64decode(self.bot_data)
return self.bot_data

def train_fail(self):
self.failed_at = timezone.now()
Expand Down
2 changes: 1 addition & 1 deletion bothub/common/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def test_train(self):
update = self.repository.current_update()
update.start_training(self.owner)

bot_data = b'bot_data__()\\\\//?(*)'
bot_data = 'https://s3.amazonaws.com'

update.save_training(bot_data)
self.assertEqual(
Expand Down
12 changes: 4 additions & 8 deletions bothub/common/views.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from django.shortcuts import get_object_or_404
from django.http import HttpResponse
from django.http import HttpResponseRedirect
from django.core.exceptions import ValidationError
from django.contrib.admin.views.decorators import staff_member_required
from .models import RepositoryUpdate


@staff_member_required
def download_bot_data(self, update_id): # pragma: no cover
update = get_object_or_404(RepositoryUpdate, id=update_id)
update = get_object_or_404(RepositoryUpdate, pk=update_id)
if not update.trained_at:
raise ValidationError('Update #{} not trained at.'.format(update.id))
response = HttpResponse(
update.get_bot_data(),
content_type='application/gzip')
response['Content-Disposition'] = 'inline; filename={}.tar.gz'.format(
update.id)
raise ValidationError(f'Update #{update.pk} not trained at.')
response = HttpResponseRedirect(update.get_bot_data())
return response
44 changes: 44 additions & 0 deletions bothub/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import io
import uuid
import boto3
from decouple import config
from botocore.exceptions import ClientError
from collections import OrderedDict


Expand All @@ -10,3 +15,42 @@ def cast_supported_languages(i):

def cast_empty_str_to_none(value):
return value or None


def send_bot_data_file_aws(id, bot_data):
aws_access_key_id = config('BOTHUB_ENGINE_AWS_ACCESS_KEY_ID', default='')
aws_secret_access_key = config(
'BOTHUB_ENGINE_AWS_SECRET_ACCESS_KEY', default='')
aws_bucket_name = config('BOTHUB_ENGINE_AWS_S3_BUCKET_NAME', default='')
aws_region_name = config('BOTHUB_ENGINE_AWS_REGION_NAME', 'us-east-1')

confmat_url = ''

if all([aws_access_key_id, aws_secret_access_key, aws_bucket_name]):
confmat_filename = \
f'repository_{str(id)}/bot_data_{uuid.uuid4()}.tar.gz'

botdata = io.BytesIO(bot_data)

s3_client = boto3.client(
's3',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=aws_region_name,
)
try:
s3_client.upload_fileobj(
botdata,
aws_bucket_name,
confmat_filename,
ExtraArgs={'ContentType': 'application/gzip'}
)
confmat_url = '{}/{}/{}'.format(
s3_client.meta.endpoint_url,
aws_bucket_name,
confmat_filename
)
except ClientError as e:
print(e)

return confmat_url
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ services:
- CHECK_ACCESSIBLE_API_URL=${CHECK_ACCESSIBLE_API_URL}
- SEND_EMAILS=${SEND_EMAILS:-true}
- SUPPORTED_LANGUAGES=${SUPPORTED_LANGUAGES:-en|pt}
- BOTHUB_ENGINE_AWS_ACCESS_KEY_ID=${BOTHUB_ENGINE_AWS_ACCESS_KEY_ID}
- BOTHUB_ENGINE_AWS_SECRET_ACCESS_KEY=${BOTHUB_ENGINE_AWS_SECRET_ACCESS_KEY}
- BOTHUB_ENGINE_AWS_S3_BUCKET_NAME=${BOTHUB_ENGINE_AWS_S3_BUCKET_NAME}
- BOTHUB_ENGINE_AWS_REGION_NAME=${BOTHUB_ENGINE_AWS_REGION_NAME}


networks:
Expand Down