diff --git a/bothub/api/serializers/repository.py b/bothub/api/serializers/repository.py index 1cd6f9cfd..d8b79efd7 100644 --- a/bothub/api/serializers/repository.py +++ b/bothub/api/serializers/repository.py @@ -26,6 +26,7 @@ class Meta: 'name', 'slug', 'language', + 'use_language_model_featurizer', 'categories', 'description', 'is_private', @@ -65,6 +66,7 @@ class Meta: 'slug', 'language', 'available_languages', + 'use_language_model_featurizer', 'categories', 'categories_list', 'description', diff --git a/bothub/common/migrations/0023_repository_use_language_model_featurizer.py b/bothub/common/migrations/0023_repository_use_language_model_featurizer.py new file mode 100644 index 000000000..cb1c91f42 --- /dev/null +++ b/bothub/common/migrations/0023_repository_use_language_model_featurizer.py @@ -0,0 +1,18 @@ +# Generated by Django 2.0.6 on 2018-10-02 17:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('common', '0022_repositoryupdate_training_log'), + ] + + operations = [ + migrations.AddField( + model_name='repository', + name='use_language_model_featurizer', + field=models.BooleanField(default=True, help_text='You can use language featurizer to get words similarity. You need less examples to create a great bot.', verbose_name='Use language model featurizer'), + ), + ] diff --git a/bothub/common/migrations/0024_repositoryupdate_use_language_model_featurizer.py b/bothub/common/migrations/0024_repositoryupdate_use_language_model_featurizer.py new file mode 100644 index 000000000..7343fe2f0 --- /dev/null +++ b/bothub/common/migrations/0024_repositoryupdate_use_language_model_featurizer.py @@ -0,0 +1,18 @@ +# Generated by Django 2.0.6 on 2018-10-02 17:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('common', '0023_repository_use_language_model_featurizer'), + ] + + operations = [ + migrations.AddField( + model_name='repositoryupdate', + name='use_language_model_featurizer', + field=models.BooleanField(default=True), + ), + ] diff --git a/bothub/common/models.py b/bothub/common/models.py index 3f441dbf5..fffbe749f 100644 --- a/bothub/common/models.py +++ b/bothub/common/models.py @@ -99,6 +99,12 @@ class Meta: validators=[ languages.validate_language, ]) + use_language_model_featurizer = models.BooleanField( + _('Use language model featurizer'), + help_text=_('You can use language featurizer to get words ' + + 'similarity. You need less examples to create a great ' + + 'bot.'), + default=True) categories = models.ManyToManyField( RepositoryCategory, help_text=CATEGORIES_HELP_TEXT) @@ -293,7 +299,8 @@ def current_update(self, language=None): language = language or self.language repository_update, created = self.updates.get_or_create( language=language, - training_started_at=None) + training_started_at=None, + use_language_model_featurizer=self.use_language_model_featurizer) return repository_update def last_trained_update(self, language=None): @@ -337,6 +344,7 @@ class Meta: validators=[ languages.validate_language, ]) + use_language_model_featurizer = models.BooleanField(default=True) created_at = models.DateTimeField( _('created at'), auto_now_add=True) @@ -434,6 +442,12 @@ def requirements_to_train(self): @property def ready_for_train(self): + last_trained_update = self.repository.last_trained_update( + language=self.language) + if last_trained_update: + if last_trained_update.use_language_model_featurizer is not \ + self.repository.use_language_model_featurizer: + return True return len(self.requirements_to_train) is 0 def validate_init_train(self, by=None): @@ -450,10 +464,13 @@ def start_training(self, by): self.validate_init_train(by) self.by = by self.training_started_at = timezone.now() + self.use_language_model_featurizer = self.repository \ + .use_language_model_featurizer self.save( update_fields=[ 'by', 'training_started_at', + 'use_language_model_featurizer', ]) def save_training(self, bot_data): diff --git a/bothub/common/tests.py b/bothub/common/tests.py index ddb224909..83649d7b1 100644 --- a/bothub/common/tests.py +++ b/bothub/common/tests.py @@ -1045,3 +1045,48 @@ def test_set_label_to_none(self): name_entity.set_label(None) self.assertIsNone(name_entity.label) + + +class UseLanguageModelFeaturizerTestCase(TestCase): + def setUp(self): + self.language = languages.LANGUAGE_EN + + self.owner = User.objects.create_user('owner@user.com', 'user') + + self.repository = Repository.objects.create( + owner=self.owner, + name='Test', + slug='test', + language=self.language, + use_language_model_featurizer=True) + + RepositoryExample.objects.create( + repository_update=self.repository.current_update(), + text='my name is Douglas', + intent='greet') + RepositoryExample.objects.create( + repository_update=self.repository.current_update(), + text='my name is John', + intent='greet') + + def test_change_ready_for_train(self): + self.assertTrue(self.repository.ready_for_train) + current_update = self.repository.current_update() + current_update.start_training(self.owner) + current_update.save_training(b'') + self.assertFalse(self.repository.ready_for_train) + self.repository.use_language_model_featurizer = False + self.repository.save() + self.assertTrue(self.repository.ready_for_train) + self.repository.use_language_model_featurizer = True + self.repository.save() + self.assertFalse(self.repository.ready_for_train) + + def test_equal_repository_value_after_train(self): + current_update = self.repository.current_update() + self.repository.use_language_model_featurizer = False + self.repository.save() + self.assertTrue(current_update.use_language_model_featurizer) + current_update.start_training(self.owner) + current_update.save_training(b'') + self.assertFalse(current_update.use_language_model_featurizer) diff --git a/setup.py b/setup.py index 08e3f945f..538deaf23 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,9 @@ setup( - name='bothub', + name='bothub-engine', version='1.16.0', - description='bothub', + description='Bothub Engine', packages=find_packages(), install_requires=[ 'python-decouple',