From b30f46ae24da66b4b33d0f295ea6eb396c6c2bff Mon Sep 17 00:00:00 2001 From: helllllllder Date: Wed, 1 Dec 2021 10:08:10 -0300 Subject: [PATCH 1/2] Add validators for the text in examples Validate if it has words and if the words are in the range limit --- README.md | 1 + bothub/api/v2/knowledge_base/serializers.py | 2 + bothub/api/v2/repository/serializers.py | 5 +- bothub/api/v2/repository/validators.py | 16 +++ bothub/api/v2/tests/test_examples.py | 97 ++++++++++++++++++- .../migrations/0112_auto_20211130_1834.py | 30 ++++++ bothub/common/models.py | 17 +++- bothub/settings.py | 4 + 8 files changed, 165 insertions(+), 7 deletions(-) create mode 100644 bothub/common/migrations/0112_auto_20211130_1834.py diff --git a/README.md b/README.md index 205bd9d84..7bc12e889 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,7 @@ You can set environment variables in your OS, write on ```.env``` file or pass v | REPOSITORY_NLP_LOG_LIMIT | ```int``` | ```10000``` | Limit of query size to repository log. | REPOSITORY_RESTRICT_ACCESS_NLP_LOGS | ```list``` | ```[]``` | Restricts log access to a particular or multiple intelligences | REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT | ```int``` | ```450``` | Limit of characters in the knowledge base description +| REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT | ```int``` | ```200``` | Limit of words for the example sentence text | ELASTICSEARCH_DSL | ```string``` | ```es:9200``` | URL Elasticsearch. | ELASTICSEARCH_NUMBER_OF_SHARDS | ```int``` | ```1``` | Specify the number of shards for the indexes. | ELASTICSEARCH_NUMBER_OF_REPLICAS | ```int``` | ```1``` | Specify the number of replicas for the indexes. diff --git a/bothub/api/v2/knowledge_base/serializers.py b/bothub/api/v2/knowledge_base/serializers.py index 39c8b02e0..356c35534 100644 --- a/bothub/api/v2/knowledge_base/serializers.py +++ b/bothub/api/v2/knowledge_base/serializers.py @@ -1,3 +1,4 @@ +from bothub.api.v2.repository.validators import ExampleTextHasLettersValidator from rest_framework import serializers from bothub.common import languages @@ -50,6 +51,7 @@ class Meta: ] read_only_fields = ["created_at", "last_update"] + text = serializers.CharField(required=False, validators=[ExampleTextHasLettersValidator()]) knowledge_base = serializers.PrimaryKeyRelatedField( queryset=QAKnowledgeBase.objects ) diff --git a/bothub/api/v2/repository/serializers.py b/bothub/api/v2/repository/serializers.py index ce76457fe..c5a786779 100644 --- a/bothub/api/v2/repository/serializers.py +++ b/bothub/api/v2/repository/serializers.py @@ -50,6 +50,8 @@ CanContributeInRepositoryVersionValidator, CanCreateRepositoryInOrganizationValidator, ExampleWithIntentOrEntityValidator, + ExampleTextHasLettersValidator, + ExampleTextHasLimitedWordsValidator, IntentValidator, ) from ..translation.validators import ( @@ -1249,7 +1251,7 @@ class Meta: ref_name = None id = serializers.PrimaryKeyRelatedField(read_only=True, style={"show": False}) - text = EntityText(style={"entities_field": "entities"}, required=False) + text = EntityText(style={"entities_field": "entities"}, required=False, validators=[ExampleTextHasLettersValidator(), ExampleTextHasLimitedWordsValidator()]) repository = serializers.PrimaryKeyRelatedField( queryset=Repository.objects, validators=[CanContributeInRepositoryValidator()], @@ -1322,6 +1324,7 @@ def create(self, validated_data): intent, created = RepositoryIntent.objects.get_or_create( repository_version=version_id, text=intent_text ) + validated_data.update({"intent": intent}) example = self.Meta.model.objects.create(**validated_data) for entity_data in entities_data: diff --git a/bothub/api/v2/repository/validators.py b/bothub/api/v2/repository/validators.py index 71a7070df..4ef036733 100644 --- a/bothub/api/v2/repository/validators.py +++ b/bothub/api/v2/repository/validators.py @@ -1,4 +1,5 @@ import re +from django.conf import settings from django.utils.translation import ugettext_lazy as _ from django.shortcuts import get_object_or_404 @@ -77,6 +78,21 @@ def set_context(self, serializer): self.request = serializer.context.get("request") +class ExampleTextHasLettersValidator(object): + def __call__(self, value): + reg = re.compile(r".[a-zA-Z_]") + if not reg.match(value): + raise ValidationError(_("Enter a valid value that have letters in it")) + + +class ExampleTextHasLimitedWordsValidator(object): + def __call__(self, value): + count = len(value.split()) + if count > settings.REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT: + raise ValidationError(_("Enter a valid value that is in the range of 200 words")) + + + class APIExceptionCustom(APIException): """Readers error class""" diff --git a/bothub/api/v2/tests/test_examples.py b/bothub/api/v2/tests/test_examples.py index fd77a8c14..4d8f4acf1 100644 --- a/bothub/api/v2/tests/test_examples.py +++ b/bothub/api/v2/tests/test_examples.py @@ -1,4 +1,5 @@ import json +from django.conf import settings from django.test import TestCase from django.test import RequestFactory @@ -12,9 +13,10 @@ from bothub.api.v2.tests.utils import create_user_and_token from bothub.api.v2.examples.views import ExamplesViewSet +from bothub.api.v2.repository.views import RepositoryExampleViewSet -class ListExamplesAPITestCase(TestCase): +class DefaultExamplesAPITestCase(TestCase): def setUp(self): self.factory = RequestFactory() self.owner, self.owner_token = create_user_and_token("owner") @@ -79,13 +81,15 @@ def setUp(self): repository_version_language=self.repository_2.current_version( languages.LANGUAGE_PT ), - text="oi", + text="oi ", intent=self.example2_intent_1, ) self.translation_6 = RepositoryTranslatedExample.objects.create( original_example=self.example_6, language=languages.LANGUAGE_EN, text="hi" ) + +class ListExamplesAPITestCase(DefaultExamplesAPITestCase): def request(self, data={}, token=None): authorization_header = ( {"HTTP_AUTHORIZATION": "Token {}".format(token.key)} if token else {} @@ -247,3 +251,92 @@ def test_filter_entity(self): ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(content_data.get("count"), 1) + + +class CreateExamplesAPITestCase(DefaultExamplesAPITestCase): + def request(self, data, token): + authorization_header = ( + {"HTTP_AUTHORIZATION": "Token {}".format(token.key)} + ) + request = self.factory.post( + "/v2/repository/example/", + json.dumps(data), + content_type="application/json", + **authorization_header, + ) + + response = RepositoryExampleViewSet.as_view({"post": "create"})(request) + response.render() + content_data = json.loads(response.content) + return (response, content_data) + + def test_ok(self): + data = { + "repository": str(self.repository.uuid), + "repository_version": self.repository.current_version().repository_version.pk, + "text": "testing 123 yés ///????³³²²¹¹£ ++++-----", + "language": "en", + "entities": [ + { + "start": 9, + "end": 11, + "entity": "numero", + } + ], + "intent": str(self.example_intent_1.pk), + "is_corrected": False + } + + response, content_data = self.request( + data, + self.owner_token, + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + def test_text_without_letters(self): + data = { + "repository": str(self.repository.uuid), + "repository_version": self.repository.current_version().repository_version.pk, + "text": " ---- //// -----", + "language": "en", + "entities": [ + { + "start": 9, + "end": 11, + "entity": "numero", + } + ], + "intent": str(self.example_intent_1.pk), + "is_corrected": False + } + response, content_data = self.request( + data, + self.owner_token, + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_text_words_limit(self): + limit = settings.REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT + 1 + text = " ".join(['teste' for x in range(limit)]) + data = { + "repository": str(self.repository.uuid), + "repository_version": self.repository.current_version().repository_version.pk, + "text": text, + "language": "en", + "entities": [ + { + "start": 9, + "end": 11, + "entity": "numero", + } + ], + "intent": str(self.example_intent_1.pk), + "is_corrected": False + } + response, content_data = self.request( + data, + self.owner_token, + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/bothub/common/migrations/0112_auto_20211130_1834.py b/bothub/common/migrations/0112_auto_20211130_1834.py new file mode 100644 index 000000000..f4a882ae6 --- /dev/null +++ b/bothub/common/migrations/0112_auto_20211130_1834.py @@ -0,0 +1,30 @@ +# Generated by Django 3.2.8 on 2021-11-30 18:34 + +import django.core.validators +from django.db import migrations, models +import re + + +class Migration(migrations.Migration): + + dependencies = [ + ('common', '0111_auto_20210908_1135'), + ] + + operations = [ + migrations.AlterField( + model_name='qatext', + name='text', + field=models.TextField(help_text='QA context text', max_length=25000, validators=[django.core.validators.RegexValidator(re.compile('.[a-zA-Z_]'), 'Enter a valid value that have letters in it', 'invalid')], verbose_name='text'), + ), + migrations.AlterField( + model_name='repositoryexample', + name='text', + field=models.TextField(help_text='Example text', validators=[django.core.validators.RegexValidator(re.compile('.[a-zA-Z_]'), 'Enter a valid value that have letters in it', 'invalid')], verbose_name='text'), + ), + migrations.AlterField( + model_name='repositorytranslatedexample', + name='text', + field=models.TextField(help_text='Translation text', validators=[django.core.validators.RegexValidator(re.compile('.[a-zA-Z_]'), 'Enter a valid value that have letters in it', 'invalid')], verbose_name='text'), + ), + ] diff --git a/bothub/common/models.py b/bothub/common/models.py index d24bb13e0..39247465b 100644 --- a/bothub/common/models.py +++ b/bothub/common/models.py @@ -5,7 +5,7 @@ from django.conf import settings from django.core.exceptions import ValidationError from django.core.mail import send_mail -from django.core.validators import RegexValidator, _lazy_re_compile +from django.core.validators import RegexValidator, _lazy_re_compile, ProhibitNullCharactersValidator from django.db import models from django.db.models import Sum, Q, IntegerField, Case, When, Count from django.dispatch import receiver @@ -25,6 +25,7 @@ from .exceptions import TrainingNotAllowed from .. import utils + item_key_regex = _lazy_re_compile(r"^[-a-z0-9_]+\Z") validate_item_key = RegexValidator( item_key_regex, @@ -35,6 +36,12 @@ "invalid", ) +validate_text = RegexValidator( + _lazy_re_compile(r".[-a-zA-Z_]"), + _("Enter a valid value that have letters in it"), + "invalid", +) + def can_t_be_other(value): # pragma: no cover if value == "other": @@ -1428,7 +1435,7 @@ class Meta: RepositoryVersionLanguage, models.CASCADE, related_name="added", editable=False ) text = models.TextField( - _("text"), help_text=_("Example text"), blank=False, null=False + _("text"), help_text=_("Example text"), blank=False, null=False, validators=[validate_text] ) intent = models.ForeignKey(RepositoryIntent, models.CASCADE) created_at = models.DateTimeField(_("created at"), auto_now_add=True) @@ -1541,7 +1548,7 @@ class Meta: help_text=_("Translation language"), validators=[languages.validate_language], ) - text = models.TextField(_("text"), help_text=_("Translation text")) + text = models.TextField(_("text"), help_text=_("Translation text"), validators=[validate_text]) created_at = models.DateTimeField(_("created at"), auto_now_add=True) objects = RepositoryTranslatedExampleManager() @@ -2333,7 +2340,9 @@ class QAtext(models.Model): knowledge_base = models.ForeignKey( QAKnowledgeBase, on_delete=models.CASCADE, related_name="texts" ) - text = models.TextField(_("text"), help_text=_("QA context text"), max_length=25000) + text = models.TextField( + _("text"), help_text=_("QA context text"), max_length=25000, validators=[validate_text] + ) language = models.CharField( _("language"), max_length=5, diff --git a/bothub/settings.py b/bothub/settings.py index ca166360e..d988f72d9 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -77,6 +77,7 @@ CONNECT_CERTIFICATE_GRPC_CRT=(str, None), REPOSITORY_RESTRICT_ACCESS_NLP_LOGS=(list, []), REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT=(int, 450), + REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT=(int, 200), ELASTICSEARCH_DSL=(str, "localhost:9200"), ELASTICSEARCH_REPOSITORYNLPLOG_INDEX=(str, "ai_repositorynlplog"), ELASTICSEARCH_REPOSITORYQANLPLOG_INDEX=(str, "ai_repositoryqanlplog"), @@ -443,6 +444,9 @@ # Limit of characters for the knowledge base description REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT = env.list("REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT", default=450) +# Limit of words for the example sentence +REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT = env.list("REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT", default=200) + # django_redis CACHES = { From 08af2f46f543826843b6d23c9a2d993ef5d96a60 Mon Sep 17 00:00:00 2001 From: helllllllder Date: Wed, 1 Dec 2021 10:22:09 -0300 Subject: [PATCH 2/2] flake8 --- bothub/api/v2/repository/validators.py | 9 +++++++-- bothub/common/models.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/bothub/api/v2/repository/validators.py b/bothub/api/v2/repository/validators.py index 4ef036733..dc61ec51a 100644 --- a/bothub/api/v2/repository/validators.py +++ b/bothub/api/v2/repository/validators.py @@ -89,8 +89,13 @@ class ExampleTextHasLimitedWordsValidator(object): def __call__(self, value): count = len(value.split()) if count > settings.REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT: - raise ValidationError(_("Enter a valid value that is in the range of 200 words")) - + raise ValidationError( + _( + "Enter a valid value that is in the range of " + + str(settings.REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT) + + " words" + ) + ) class APIExceptionCustom(APIException): diff --git a/bothub/common/models.py b/bothub/common/models.py index 39247465b..56900b54e 100644 --- a/bothub/common/models.py +++ b/bothub/common/models.py @@ -5,7 +5,7 @@ from django.conf import settings from django.core.exceptions import ValidationError from django.core.mail import send_mail -from django.core.validators import RegexValidator, _lazy_re_compile, ProhibitNullCharactersValidator +from django.core.validators import RegexValidator, _lazy_re_compile from django.db import models from django.db.models import Sum, Q, IntegerField, Case, When, Count from django.dispatch import receiver