Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add automatic evaluate #542

Merged
merged 10 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mozilla-django-oidc = "~=1.2.4"

[dev-packages]
"flake8" = "*"
requests-mock = "~=1.8.0"
coverage = "*"
ipython = "*"
autopep8 = "*"
Expand Down
2 changes: 1 addition & 1 deletion bothub/api/v2/evaluate/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def update(self, instance, validated_data):
class RepositoryEvaluateResultVersionsSerializer(serializers.ModelSerializer):
class Meta:
model = RepositoryEvaluateResult
fields = ["id", "language", "created_at", "version"]
fields = ["id", "language", "created_at", "version", "cross_validation"]
ref_name = None

language = serializers.SerializerMethodField()
Expand Down
1 change: 1 addition & 0 deletions bothub/api/v2/evaluate/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ class ResultsListViewSet(
permission_classes = [IsAuthenticated, RepositoryEvaluateResultPermission]
filter_class = EvaluateResultsFilter
filter_backends = [OrderingFilter, DjangoFilterBackend]
filter_fields = ["cross_validation"]
ordering_fields = ["created_at"]

def retrieve(self, request, *args, **kwargs):
Expand Down
1 change: 0 additions & 1 deletion bothub/api/v2/repository/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,6 @@ class TrainSerializer(serializers.Serializer):
class EvaluateSerializer(serializers.Serializer):
language = serializers.ChoiceField(LANGUAGE_CHOICES, required=True)
repository_version = serializers.IntegerField(required=False)
cross_validation = serializers.BooleanField(default=False)


class RepositoryAutoTranslationSerializer(serializers.Serializer):
Expand Down
50 changes: 25 additions & 25 deletions bothub/api/v2/repository/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def words_distribution(self, request, **kwargs):
)
def evaluate(self, request, **kwargs):
"""
Evaluate repository using Bothub NLP service
Manual evaluate repository using Bothub NLP service
"""
repository = self.get_object()
user_authorization = repository.get_user_authorization(request.user)
Expand All @@ -405,19 +405,13 @@ def evaluate(self, request, **kwargs):
serializer = EvaluateSerializer(data=request.data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

if not repository.evaluations(language=request.data.get("language")).count():
raise APIException(
detail=_("You need to have at least " + "one registered test phrase")
) # pragma: no cover

if len(repository.intents()) <= 1:
raise APIException(
detail=_("You need to have at least " + "two registered intents")
) # pragma: no cover
try:
request = repository.request_nlp_manual_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)

request = repository.request_nlp_evaluate( # pragma: no cover
user_authorization, serializer.data
)
if request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": request.status_code}, code=request.status_code
Expand All @@ -427,27 +421,33 @@ def evaluate(self, request, **kwargs):
@action(
detail=True,
methods=["POST"],
url_name="repository-evaluate-crossvalidation",
url_name="repository-automatic-evaluate",
lookup_fields=["uuid"],
serializer_class=EvaluateSerializer,
)
def evaluate_crossvalidation(self, request, **kwargs):
def automatic_evaluate(self, request, **kwargs):
"""
Cross validation evaluate repository using Bothub NLP service
Automatic evaluate repository using Bothub NLP service
"""
repository = self.get_object()
user_authorization = repository.get_user_authorization(request.user)
if not user_authorization.can_write:
raise PermissionDenied() # pragma: no cover
serializer = EvaluateSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
raise PermissionDenied()
serializer = EvaluateSerializer(data=request.data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

task = celery_app.send_task( # pragma: no cover
name="evaluate_crossvalidation",
args=[serializer.data, str(user_authorization)],
)
task.wait() # pragma: no cover
return Response(task.result) # pragma: nocover
try:
request = repository.request_nlp_automatic_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)

if request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": request.status_code}, code=request.status_code
) # pragma: no cover
return Response(request.json()) # pragma: no cover


@method_decorator(
Expand Down
13 changes: 6 additions & 7 deletions bothub/api/v2/tests/test_nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,7 @@ def request(self, token, repository_version=""):
authorization_header = {"HTTP_AUTHORIZATION": "Bearer {}".format(token)}
request = self.factory.get(
"/v2/repository/nlp/authorization/info/{}/".format(token),
{
"repository_version": repository_version
},
{"repository_version": repository_version},
**authorization_header
)
response = RepositoryAuthorizationInfoViewSet.as_view({"get": "retrieve"})(
Expand All @@ -209,13 +207,12 @@ def test_list_all_repository_intents(self):
def test_list_repository_intents_with_filter_version(self):
response, content_data = self.request(
str(self.repository_authorization.uuid),
repository_version=self.repository_version_language.pk
repository_version=self.repository_version.pk,
)
self.assertEqual(len(response.data.get("intents")), 1)

response, content_data = self.request(
str(self.repository_authorization.uuid),
repository_version="0"
str(self.repository_authorization.uuid), repository_version="0"
)
self.assertEqual(len(response.data.get("intents")), 0)

Expand All @@ -235,7 +232,9 @@ def setUp(self):
)

self.repository_authorization = RepositoryAuthorization.objects.create(
user=self.user, repository=self.repository, role=RepositoryAuthorization.ROLE_ADMIN
user=self.user,
repository=self.repository,
role=RepositoryAuthorization.ROLE_ADMIN,
)

self.repository_version = RepositoryVersion.objects.create(
Expand Down
6 changes: 3 additions & 3 deletions bothub/api/v2/tests/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -2308,7 +2308,7 @@ def test_permission_denied(self):
self.assertEqual(response[0].status_code, status.HTTP_403_FORBIDDEN)


class EvaluateCrossValidationTestCase(TestCase):
class EvaluateAutomaticTestCase(TestCase):
def setUp(self):
self.factory = RequestFactory()

Expand All @@ -2330,14 +2330,14 @@ def request(self, repository, data={}, token=None):
authorization_header = {"HTTP_AUTHORIZATION": "Token {}".format(token.key)}

request = self.factory.post(
"/v2/repository/repository-details/{}/evaluate_crossvalidation/".format(
"/v2/repository/repository-details/{}/automatic_evaluate/".format(
str(repository.uuid)
),
data,
**authorization_header,
)

response = RepositoryViewSet.as_view({"post": "evaluate_crossvalidation"})(
response = RepositoryViewSet.as_view({"post": "automatic_evaluate"})(
request, uuid=repository.uuid
)

Expand Down
72 changes: 68 additions & 4 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,50 @@ def save(
self.__use_name_entities = self.use_name_entities
self.__use_analyze_char = self.use_analyze_char

def have_at_least_one_test_phrase_registered(self, language: str) -> bool:
return self.evaluations(language=language).count() > 0

def have_at_least_two_intents_registered(self) -> bool:
return len(self.intents()) >= 2

def have_at_least_fifteen_examples_registered(self, language: str) -> bool:
return self.examples(language=language).count() >= 15

def have_at_least_three_examples_for_each_intent(self, language: str) -> bool:
return all(
[
self.examples(language=language).filter(intent__text=intent).count() > 3
for intent in self.intents()
]
)

def validate_if_can_run_manual_evaluate(self, language: str) -> None:
if not self.have_at_least_one_test_phrase_registered(language=language):
raise ValidationError(
_("You need to have at least " + "one registered test phrase")
)

if not self.have_at_least_two_intents_registered():
raise ValidationError(
_("You need to have at least " + "two registered intents")
)

def validate_if_can_run_automatic_evaluate(self, language: str) -> None:
if not self.have_at_least_two_intents_registered():
raise ValidationError(
_("You need to have at least " + "two registered intents")
)

if not self.have_at_least_fifteen_examples_registered(language=language):
raise ValidationError(
_("You need to have at least " + "fifteen registered train phrases")
)

if not self.have_at_least_three_examples_for_each_intent(language=language):
raise ValidationError(
_("You need to have at least " + "three train phrases for each intent")
)

def request_nlp_train(self, user_authorization, data):
try: # pragma: no cover
if data.get("repository_version"):
Expand Down Expand Up @@ -521,17 +565,37 @@ def request_nlp_words_distribution(self, user_authorization, data):
code=status.HTTP_503_SERVICE_UNAVAILABLE,
)

def request_nlp_evaluate(self, user_authorization, data):
def request_nlp_manual_evaluate(self, user_authorization, data):
self.validate_if_can_run_manual_evaluate(language=data.get("language"))

try: # pragma: no cover
url = f"{self.nlp_server if self.nlp_server else settings.BOTHUB_NLP_BASE_URL}evaluate/"
data = {
"language": data.get("language"),
"repository_version": data.get("repository_version"),
"cross_validation": data.get("cross_validation", False)
"cross_validation": False,
}
headers = {
"Authorization": f"Bearer {user_authorization.uuid}"
headers = {"Authorization": f"Bearer {user_authorization.uuid}"}
r = requests.post(url, data=json.dumps(data), headers=headers)

return r # pragma: no cover
except requests.exceptions.ConnectionError: # pragma: no cover
raise APIException( # pragma: no cover
{"status_code": status.HTTP_503_SERVICE_UNAVAILABLE},
code=status.HTTP_503_SERVICE_UNAVAILABLE,
)

def request_nlp_automatic_evaluate(self, user_authorization, data):
self.validate_if_can_run_automatic_evaluate(language=data.get("language"))

try: # pragma: no cover
url = f"{self.nlp_server if self.nlp_server else settings.BOTHUB_NLP_BASE_URL}evaluate/"
data = {
"language": data.get("language"),
"repository_version": data.get("repository_version"),
"cross_validation": True,
}
headers = {"Authorization": f"Bearer {user_authorization.uuid}"}
r = requests.post(url, data=json.dumps(data), headers=headers)

return r # pragma: no cover
Expand Down
7 changes: 4 additions & 3 deletions bothub/common/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,10 @@ def intent_suggestions(intent_id, language, authorization_token): # pragma: no
suggestions = request_nlp(
authorization_token, None, "intent_sentence_suggestion", data
)
random.shuffle(suggestions["suggested_sentences"])
if suggestions["suggested_sentences"]:
dataset[intent.text] = suggestions["suggested_sentences"][
suggested_sentences = suggestions.get("suggested_sentences", [])
if suggested_sentences:
random.shuffle(suggested_sentences)
dataset[intent.text] = suggested_sentences[
: settings.N_SENTENCES_TO_GENERATE
]
else:
Expand Down
Loading