Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validation to minimal train #207

Merged
merged 4 commits into from
Sep 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bothub/api/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class Meta:
'available_request_authorization',
'request_authorization',
'ready_for_train',
'requirements_to_train',
'languages_ready_for_train',
'votes_sum',
'created_at',
]
Expand Down
20 changes: 20 additions & 0 deletions bothub/common/migrations/0021_auto_20180921_1259.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 2.0.6 on 2018-09-21 12:59

import django.core.validators
from django.db import migrations, models
import re


class Migration(migrations.Migration):

dependencies = [
('common', '0020_auto_20180813_1320'),
]

operations = [
migrations.AlterField(
model_name='repositoryexample',
name='intent',
field=models.CharField(default='no_intent', help_text='Example intent reference', max_length=64, validators=[django.core.validators.RegexValidator(re.compile('^[-a-z0-9_]+\\Z'), 'Enter a valid value consisting of lowercase letters, numbers, underscores or hyphens.', 'invalid')], verbose_name='intent'),
),
]
105 changes: 81 additions & 24 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import requests

from functools import reduce
from django.db import models
from django.utils.translation import gettext as _
from django.utils import timezone
Expand Down Expand Up @@ -168,19 +169,25 @@ def languages_status(self):
))

@property
def ready_for_train(self):
updates = self.updates.filter(training_started_at=None)

if RepositoryExample.objects.filter(
models.Q(repository_update__in=updates) |
models.Q(deleted_in__in=updates)).exists():
return True
def requirements_to_train(self):
return dict(filter(
lambda l: l[1],
map(
lambda u: (u.language, u.requirements_to_train,),
self.updates.filter(training_started_at__isnull=True))))

if RepositoryTranslatedExample.objects.filter(
repository_update__in=updates).exists():
return True
@property
def languages_ready_for_train(self):
return dict(map(
lambda u: (u.language, u.ready_for_train,),
self.updates.filter(training_started_at__isnull=True)))

return False
@property
def ready_for_train(self):
return reduce(
lambda current, u: u.ready_for_train or current,
self.updates.filter(training_started_at__isnull=True),
False)

@property
def votes_sum(self):
Expand Down Expand Up @@ -311,6 +318,9 @@ class Meta:
verbose_name_plural = _('repository updates')
ordering = ['-created_at']

MIN_EXAMPLES_PER_INTENT = 2
MIN_EXAMPLES_PER_ENTITY = 2

repository = models.ForeignKey(
Repository,
models.CASCADE,
Expand Down Expand Up @@ -361,26 +371,73 @@ def examples(self):
examples = examples.exclude(deleted_in__isnull=False)
return examples

@property
def requirements_to_train(self):
try:
self.validate_init_train()
except RepositoryUpdateAlreadyTrained as e:
return [_('This bot version has already been trained.')]
except RepositoryUpdateAlreadyStartedTraining as e:
return [_('This bot version is being trained.')]

r = []

if not self.added.exists() and \
not self.translated_added.exists() and \
not self.deleted.exists():
r.append(_('There was no change in this bot version. No ' +
'examples or translations for {} have been added or ' +
'removed.').format(
languages.VERBOSE_LANGUAGES.get(self.language)))

intents = self.examples.values_list('intent', flat=True)

if '' in intents:
r.append(_('All examples need have a intent.'))

weak_intents = self.examples.values('intent').annotate(
intent_count=models.Count('id')).order_by().exclude(
intent_count__gte=self.MIN_EXAMPLES_PER_INTENT)
if weak_intents.exists():
for i in weak_intents:
r.append(_('Intent "{}" has only {} examples. ' +
'Minimum is {}.').format(
i.get('intent'),
i.get('intent_count'),
self.MIN_EXAMPLES_PER_INTENT))

weak_entities = self.examples.annotate(
es_count=models.Count('entities')).filter(
es_count__gte=1).values(
'entities__entity__value').annotate(
entities_count=models.Count('id')).order_by().exclude(
entities_count__gte=self.MIN_EXAMPLES_PER_ENTITY)
if weak_entities.exists():
for e in weak_entities:
r.append(_('Entity "{}" has only {} examples. ' +
'Minimum is {}.').format(
e.get('entities__entity__value'),
e.get('entities_count'),
self.MIN_EXAMPLES_PER_ENTITY))

return r

@property
def ready_for_train(self):
if self.added.exists():
return True
if self.translated_added.exists():
return True
if self.deleted.exists():
return True
return False
return len(self.requirements_to_train) is 0

def start_training(self, by):
def validate_init_train(self, by=None):
if self.trained_at:
raise RepositoryUpdateAlreadyTrained()
if self.training_started_at:
raise RepositoryUpdateAlreadyStartedTraining()
if by:
authorization = self.repository.get_user_authorization(by)
if not authorization.can_write:
raise TrainingNotAllowed()

authorization = self.repository.get_user_authorization(by)
if not authorization.can_write:
raise TrainingNotAllowed()

def start_training(self, by):
self.validate_init_train(by)
self.by = by
self.training_started_at = timezone.now()
self.save(
Expand Down Expand Up @@ -435,7 +492,7 @@ class Meta:
intent = models.CharField(
_('intent'),
max_length=64,
blank=True,
default='no_intent',
help_text=_('Example intent reference'),
validators=[validate_item_key])
created_at = models.DateTimeField(
Expand Down
101 changes: 96 additions & 5 deletions bothub/common/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,10 +705,26 @@ def setUp(self):
name='Test',
slug='test',
language=languages.LANGUAGE_EN)
self.example = RepositoryExample.objects.create(
self.example_1 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
self.example_2 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
self.example_3 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='bye!',
intent='bye')
self.example_4 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='good bye',
intent='bye')
self.example_5 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hellow',
intent='greet')

def test_be_true(self):
self.assertTrue(self.repository.ready_for_train)
Expand All @@ -720,15 +736,20 @@ def test_be_false(self):
def test_be_true_when_new_translate(self):
self.repository.current_update().start_training(self.owner)
RepositoryTranslatedExample.objects.create(
original_example=self.example,
original_example=self.example_1,
language=languages.LANGUAGE_PT,
text='oi')
RepositoryTranslatedExample.objects.create(
original_example=self.example_2,
language=languages.LANGUAGE_PT,
text='olá')
self.repository.current_update()
self.assertTrue(self.repository.ready_for_train)

def test_be_true_when_deleted_example(self):
self.repository.current_update()
self.repository.current_update().start_training(self.owner)
self.example.delete()
self.example_1.delete()
self.assertTrue(self.repository.ready_for_train)


Expand All @@ -743,6 +764,10 @@ def setUp(self):
language=languages.LANGUAGE_EN)

def test_be_true(self):
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
Expand All @@ -753,15 +778,23 @@ def test_be_false(self):
self.assertFalse(self.repository.current_update().ready_for_train)

def test_new_translate(self):
example = RepositoryExample.objects.create(
example_1 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
example_2 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
self.repository.current_update().start_training(self.owner)
RepositoryTranslatedExample.objects.create(
original_example=example,
original_example=example_1,
language=languages.LANGUAGE_PT,
text='oi')
RepositoryTranslatedExample.objects.create(
original_example=example_2,
language=languages.LANGUAGE_PT,
text='olá')
self.assertTrue(self.repository.current_update(
languages.LANGUAGE_PT).ready_for_train)

Expand All @@ -770,10 +803,68 @@ def test_when_deleted(self):
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hellow',
intent='greet')
self.repository.current_update().start_training(self.owner)
example.delete()
self.assertTrue(self.repository.current_update().ready_for_train)

def test_empty_intent(self):
example = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='douglas',
intent='')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='douglas',
intent='')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=7,
entity='name')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=7,
entity='name')
self.assertFalse(self.repository.current_update().ready_for_train)

def test_intent_dont_have_min_examples(self):
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
self.assertFalse(self.repository.current_update().ready_for_train)

def test_entity_dont_have_min_examples(self):
example = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=2,
entity='hi')
self.assertFalse(self.repository.current_update().ready_for_train)
RepositoryExampleEntity.objects.create(
repository_example=example,
start=1,
end=2,
entity='hi')
self.assertTrue(self.repository.current_update().ready_for_train)


class RequestRepositoryAuthorizationTestCase(TestCase):
def setUp(self):
Expand Down