From cce45745c7a284efc540fa7899db9d7465c5fc6e Mon Sep 17 00:00:00 2001 From: Helder Souza <42891390+helllllllder@users.noreply.github.com> Date: Tue, 3 May 2022 15:59:10 -0300 Subject: [PATCH 1/6] Develop (#709) (#710) * Fix: Remove ai from project (#707) * remove document deletion from delete_nlp_logs task * inconsistency number and debug errors fixed * add user_email to remove_authorizations_project * Feature/health check blocklist (#708) * remove document deletion from delete_nlp_logs task * add a blocklist for not saving logs depending on the authorization user * inconsistency number and debug errors fixed * change the REPOSITORY_BLOCK_USER_LOGS values from users to repository authorizations * change readme * pass on sonarcloud * change admins settings * transform uuid into string * convert uuid into string at test_blocked_user --- README.md | 1 + bothub/api/grpc/connect_grpc_client.py | 8 +++- bothub/api/v2/nlp/serializers.py | 8 +++- bothub/api/v2/repository/views.py | 2 +- bothub/api/v2/tests/test_logs.py | 53 ++++++++++++++++++++++++++ bothub/common/tasks.py | 6 ++- bothub/settings.py | 9 +++-- docker-compose.yml | 31 ++++++++++----- 8 files changed, 99 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index d239aac1..1846f7f8 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,7 @@ You can set environment variables in your OS, write on ```.env``` file or pass v | ELASTICSEARCH_LOGS_DELETE_AGE | ```string``` | ```90d``` | Specify the ILM delete age, when the index will be deleted. | GUNICORN_WORKERS | ``` int ``` | ``` multiprocessing.cpu_count() * 2 + 1 ``` | Gunicorn number of workers. | USE_ELASTICSEARCH | ```boolean``` | ```true``` | Change the logic in requirements_to_train to use either elasticsearch or postgres. +| REPOSITORY_BLOCK_USER_LOGS | ```list``` | ```[]``` | List of repository authorization(api bearer) that won't save logs ## Roadmap diff --git a/bothub/api/grpc/connect_grpc_client.py b/bothub/api/grpc/connect_grpc_client.py index 79fb94db..07b0cd51 100644 --- a/bothub/api/grpc/connect_grpc_client.py +++ b/bothub/api/grpc/connect_grpc_client.py @@ -57,14 +57,18 @@ def get_authorization_classifier( return next(classifier).get("uuid") - def remove_authorization(self, project_uuid: str, authorization_uuid: str): + def remove_authorization( + self, project_uuid: str, authorization_uuid: str, user_email: str + ): classifier_uuid = self.get_authorization_classifier( project_uuid, authorization_uuid ) stub = project_pb2_grpc.ProjectControllerStub(self.channel) stub.DestroyClassifier( - project_pb2.ClassifierDestroyRequest(uuid=classifier_uuid) + project_pb2.ClassifierDestroyRequest( + uuid=classifier_uuid, user_email=user_email + ) ) def create_classifier(self, **kwargs): diff --git a/bothub/api/v2/nlp/serializers.py b/bothub/api/v2/nlp/serializers.py index 1acbccb5..21446817 100644 --- a/bothub/api/v2/nlp/serializers.py +++ b/bothub/api/v2/nlp/serializers.py @@ -1,5 +1,7 @@ from rest_framework import serializers +from django.conf import settings + from bothub.common.models import ( QAKnowledgeBase, QALogs, @@ -45,8 +47,12 @@ class Meta: ) def create(self, validated_data): + repository_auth = validated_data.get("user") + user = repository_auth.user + if str(repository_auth.pk) in settings.REPOSITORY_BLOCK_USER_LOGS: + return validated_data log_intent = validated_data.pop("log_intent") - validated_data.update({"user": validated_data.get("user").user}) + validated_data.update({"user": user}) instance = self.Meta.model(**validated_data) instance.save() diff --git a/bothub/api/v2/repository/views.py b/bothub/api/v2/repository/views.py index 3fc582f0..a69918f1 100644 --- a/bothub/api/v2/repository/views.py +++ b/bothub/api/v2/repository/views.py @@ -336,7 +336,7 @@ def remove_repository_project(self, request, **kwargs): task = celery_app.send_task( name="remove_authorizations_project", - args=[project_uuid, list(authorizations_uuids)], + args=[project_uuid, list(authorizations_uuids), request.user.email], ) task.wait() diff --git a/bothub/api/v2/tests/test_logs.py b/bothub/api/v2/tests/test_logs.py index 710a2b1e..93e1a147 100644 --- a/bothub/api/v2/tests/test_logs.py +++ b/bothub/api/v2/tests/test_logs.py @@ -23,6 +23,7 @@ from bothub.common.documents.repositorynlplog import REPOSITORYNLPLOG_INDEX_NAME +@tag("elastic") class RepositoryNLPLogTestCase(TestCase): def setUp(self): self.factory = RequestFactory() @@ -59,6 +60,58 @@ def request(self, data): content_data = json.loads(response.content) return (response, content_data) + def list_request(self, data, token=None): + authorization_header = ( + {"HTTP_AUTHORIZATION": "Token {}".format(token.key)} if token else {} + ) + request = self.factory.get("/v2/repository/log/", data, **authorization_header) + response = RepositoryNLPLogViewSet.as_view({"get": "list"})(request) + response.render() + content_data = json.loads(response.content) + return (response, content_data) + + def test_blocked_user(self): + with self.settings(REPOSITORY_BLOCK_USER_LOGS=[str(self.repository_auth.pk)]): + data = { + "text": "test", + "user_agent": "python-requests/2.20.1", + "from_backend": True, + "user": str(self.repository_auth.pk), + "repository_version_language": int( + self.repository.current_version().pk + ), + "nlp_log": json.dumps( + { + "intent": {"name": "bias", "confidence": 0.9994810899625854}, + "intent_ranking": [ + {"name": "bias", "confidence": 0.9994810819625244}, + {"name": "doubt", "confidence": 0.039216167263031006}, + {"name": "negative", "confidence": 0.0}, + {"name": "affirmative", "confidence": 0.0}, + ], + "labels_list": [], + "entities_list": [], + "entities": {}, + "text": "test", + "repository_version": int(self.repository.current_version().pk), + "language": str(self.repository.language), + } + ), + "log_intent": [], + } + self.request(data) + response, content_data = self.list_request( + { + "repository_version_language": int( + self.repository.current_version().pk + ) + }, + self.owner_token, + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(content_data.get("count"), 0) + def test_okay(self): data = { "text": "test", diff --git a/bothub/common/tasks.py b/bothub/common/tasks.py index a3f5fb8d..59d29e77 100644 --- a/bothub/common/tasks.py +++ b/bothub/common/tasks.py @@ -551,10 +551,12 @@ def get_project_organization(project_uuid: str): # pragma: no cover @app.task(name="remove_authorizations_project") -def remove_authorizations_project(project_uuid: str, authorizations_uuids: list): +def remove_authorizations_project( + project_uuid: str, authorizations_uuids: list, user_email: str +): grpc_client = ConnectGRPCClient() for authorization_uuid in authorizations_uuids: - grpc_client.remove_authorization(project_uuid, authorization_uuid) + grpc_client.remove_authorization(project_uuid, authorization_uuid, user_email) @app.task(name="create_repository_project") diff --git a/bothub/settings.py b/bothub/settings.py index ad8461c7..0f0b01fc 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -76,6 +76,7 @@ CONNECT_GRPC_SERVER_URL=(str, "localhost:8002"), CONNECT_CERTIFICATE_GRPC_CRT=(str, None), REPOSITORY_RESTRICT_ACCESS_NLP_LOGS=(list, []), + REPOSITORY_BLOCK_USER_LOGS=(list, []), REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT=(int, 450), REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT=(int, 200), ELASTICSEARCH_DSL=(str, "localhost:9200"), @@ -263,7 +264,7 @@ envvar_EMAIL_HOST = env.str("EMAIL_HOST") -ADMINS = env.list("ADMINS") +ADMINS = [("Helder", "helder.souza@weni.ai")] # env.list("ADMINS") EMAIL_SUBJECT_PREFIX = "[bothub] " DEFAULT_FROM_EMAIL = env.str("DEFAULT_FROM_EMAIL") SERVER_EMAIL = env.str("SERVER_EMAIL") @@ -451,12 +452,12 @@ REPOSITORY_RESTRICT_ACCESS_NLP_LOGS = env.list("REPOSITORY_RESTRICT_ACCESS_NLP_LOGS") # Limit of characters for the knowledge base description -REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT = env.list( +REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT = env.int( "REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT", default=450 ) # Limit of words for the example sentence -REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT = env.list( +REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT = env.int( "REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT", default=200 ) @@ -582,3 +583,5 @@ ELASTICSEARCH_DSL_SIGNAL_PROCESSOR = ELASTICSEARCH_SIGNAL_PROCESSOR_CLASSES[ env.str("ELASTICSEARCH_SIGNAL_PROCESSOR", default="realtime") ] + +REPOSITORY_BLOCK_USER_LOGS = env.list("REPOSITORY_BLOCK_USER_LOGS", default=[]) diff --git a/docker-compose.yml b/docker-compose.yml index 8d500f68..306cb714 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,16 +7,24 @@ version: '3.6' services: database: - image: postgres - ports: - - 5432:5432 - networks: - - default - - bothub - environment: - - POSTGRES_USER=${POSTGRES_USER:-bothub} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-bothub} - - POSTGRES_DB=${POSTGRES_DB:-bothub} + image: postgres + ports: + - 5432:5432 + volumes: + - postgres:/var/lib/postgresql/data + networks: + - default + - bothub + environment: + - POSTGRES_USER=${POSTGRES_USER:-bothub} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-bothub} + - POSTGRES_DB=${POSTGRES_DB:-bothub} + deploy: + mode: replicated + replicas: 1 + placement: + constraints: [node.role == manager] + bothub: image: ${DOCKER_IMAGE_NAME:-ilha/bothub}:${TAG:-latest} build: @@ -153,6 +161,9 @@ services: - default - bothub +volumes: + postgres: + networks: bothub: external: true From ba953a668b6f765f12ecda44c5207823c716da19 Mon Sep 17 00:00:00 2001 From: helllllllder Date: Fri, 6 May 2022 15:03:34 -0300 Subject: [PATCH 2/6] add regex remotion of special characters from username when creating from keycloak --- bothub/authentication/authorization.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bothub/authentication/authorization.py b/bothub/authentication/authorization.py index ccc89e80..7cca7734 100644 --- a/bothub/authentication/authorization.py +++ b/bothub/authentication/authorization.py @@ -1,4 +1,5 @@ import logging +import re from django.utils.translation import ugettext_lazy as _ from mozilla_django_oidc.auth import OIDCAuthenticationBackend @@ -92,6 +93,7 @@ def create_user(self, claims): # Override existing create_user method in OIDCAuthenticationBackend email = claims.get("email") username = self.get_username(claims)[:16] + username = re.sub("[^A-Za-z0-9]+", "", username) user = self.UserModel.objects.create_user(email, username) user.name = claims.get("name", "") From 99fcce72d68d4c57dd6e0d53bbfe7f2518c214c5 Mon Sep 17 00:00:00 2001 From: helllllllder Date: Thu, 12 May 2022 15:30:28 -0300 Subject: [PATCH 3/6] add django_csp and settings --- bothub/settings.py | 9 +++++++++ poetry.lock | 21 ++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/bothub/settings.py b/bothub/settings.py index 0f0b01fc..ec5bbc0d 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -144,6 +144,7 @@ "whitenoise.middleware.WhiteNoiseMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.locale.LocaleMiddleware", + "csp.middleware.CSPMiddleware", "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", @@ -151,6 +152,7 @@ "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "bothub.api.v2.middleware.UserLanguageMiddleware", + ] ROOT_URLCONF = "bothub.urls" @@ -299,6 +301,13 @@ CSRF_COOKIE_SECURE = env.bool("CSRF_COOKIE_SECURE") +# CSP headers + +CSP_DEFAULT_SRC = ("'self'", '*') + +CSP_FRAME_ANCESTORS = ["'self'", "*.weni.ai"] + + # Logging LOGGING = DEFAULT_LOGGING diff --git a/poetry.lock b/poetry.lock index d67a0a89..6199b760 100644 --- a/poetry.lock +++ b/poetry.lock @@ -445,6 +445,21 @@ python-versions = ">=3.6" [package.dependencies] Django = ">=2.2" +[[package]] +name = "django-csp" +version = "3.7" +description = "Django Content Security Policy support." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +Django = ">=1.8" + +[package.extras] +jinja2 = ["jinja2 (>=2.9.6)"] +tests = ["pytest (<4.0)", "pytest-django", "pytest-flakes (==1.0.1)", "pytest-pep8 (==1.0.6)", "pep8 (==1.4.6)", "mock (==1.0.1)", "six (==1.12.0)", "jinja2 (>=2.9.6)"] + [[package]] name = "django-elasticsearch-dsl" version = "7.2.2" @@ -1711,7 +1726,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = "~=3.6.15" -content-hash = "dd6bfc5c1373deba58e4e85a9cb3114b91f7d5d5a33db1ac48afee53bbe395f8" +content-hash = "31ccdac8c205f34af79715c1d090792b5496b461d94085bc7715f635cfbf4dfa" [metadata.files] amqp = [ @@ -1970,6 +1985,10 @@ django-cors-headers = [ {file = "django-cors-headers-3.7.0.tar.gz", hash = "sha256:96069c4aaacace786a34ee7894ff680780ec2644e4268b31181044410fecd12e"}, {file = "django_cors_headers-3.7.0-py3-none-any.whl", hash = "sha256:1ac2b1213de75a251e2ba04448da15f99bcfcbe164288ae6b5ff929dc49b372f"}, ] +django-csp = [ + {file = "django_csp-3.7-py2.py3-none-any.whl", hash = "sha256:01443a07723f9a479d498bd7bb63571aaa771e690f64bde515db6cdb76e8041a"}, + {file = "django_csp-3.7.tar.gz", hash = "sha256:01eda02ad3f10261c74131cdc0b5a6a62b7c7ad4fd017fbefb7a14776e0a9727"}, +] django-elasticsearch-dsl = [ {file = "django-elasticsearch-dsl-7.2.2.tar.gz", hash = "sha256:811d3909b3387fd55c19d9bbcf0e9a9b234f085df3f8422d59e7519a5f733e0e"}, {file = "django_elasticsearch_dsl-7.2.2-py2.py3-none-any.whl", hash = "sha256:3c58a254a6318b169eb904d41d802924b99ea8e53ddc2c596ebba90506cf47fa"}, diff --git a/pyproject.toml b/pyproject.toml index c6d48f5e..0abed417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ psycopg2-binary = "~=2.9.1" weni-protobuffers = "~=1.2.1" black = "21.7b0" Pillow = "~=8.4.0" +django-csp = "^3.7" [tool.poetry.dev-dependencies] flake8 = "~=4.0.0" From 16e7e98d0649693c37b50fa09b8ddcc3ef288b37 Mon Sep 17 00:00:00 2001 From: helllllllder Date: Fri, 20 May 2022 16:57:06 -0300 Subject: [PATCH 4/6] configure csp --- bothub/settings.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/bothub/settings.py b/bothub/settings.py index ec5bbc0d..7799b155 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -150,9 +150,7 @@ "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", - "django.middleware.clickjacking.XFrameOptionsMiddleware", "bothub.api.v2.middleware.UserLanguageMiddleware", - ] ROOT_URLCONF = "bothub.urls" @@ -303,9 +301,19 @@ # CSP headers -CSP_DEFAULT_SRC = ("'self'", '*') - -CSP_FRAME_ANCESTORS = ["'self'", "*.weni.ai"] +CSP_DEFAULT_SRC = env.tuple("CSP_DEFAULT_SRC", default=("'self'",)) +CSP_FRAME_ANCESTORS = env.tuple("CSP_FRAME_ANCESTORS", default=("'self'", "*.weni.ai")) +CSP_FONT_SRC = env.tuple("CSP_FONT_SRC", default=CSP_DEFAULT_SRC) +CSP_STYLE_SRC = env.tuple( + "CSP_STYLE_SRC", default=("'self'", "'unsafe-inline'", "'unsafe-eval'") +) +CSP_STYLE_SRC_ELEM = env.tuple("CSP_STYLE_SRC_ELEM", default=CSP_STYLE_SRC) +CSP_SCRIPT_SRC = env.tuple( + "CSP_SCRIPT_SRC", default=("'self'", "'unsafe-inline'", "'unsafe-eval'") +) +CSP_SCRIPT_SRC_ELEM = env.tuple("CSP_SCRIPT_SRC_ELEM", default=CSP_SCRIPT_SRC) +CSP_FRAME_SRC = env.tuple("CSP_FRAME_SRC", default=CSP_DEFAULT_SRC) +CSP_CONNECT_SRC = env.tuple("CSP_CONNECT_SRC", default=CSP_DEFAULT_SRC) # Logging From 0fdd690450aacbb636ca844c4297bebcc2a74508 Mon Sep 17 00:00:00 2001 From: helllllllder Date: Tue, 24 May 2022 17:31:53 -0300 Subject: [PATCH 5/6] fix settings --- bothub/settings.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bothub/settings.py b/bothub/settings.py index 7799b155..b8ee938b 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -92,6 +92,15 @@ ELASTICSEARCH_LOGS_DELETE_AGE=(str, "90d"), ELASTICSEARCH_LOGS_ROLLOVER_AGE=(str, "1d"), ELASTICSEARCH_TIMESTAMP_PIPELINE_FIELD=(str, "created_at"), + CSP_DEFAULT_SRC = (tuple, "CSP_DEFAULT_SRC") + CSP_FRAME_ANCESTORS = (tuple, "CSP_FRAME_ANCESTORS") + CSP_FONT_SRC = (tuple, "CSP_FONT_SRC") + CSP_STYLE_SRC = (tuple, "CSP_STYLE_SRC") + CSP_STYLE_SRC_ELEM = (tuple, "CSP_STYLE_SRC_ELEM") + CSP_SCRIPT_SRC = (tuple, "CSP_SCRIPT_SRC") + CSP_SCRIPT_SRC_ELEM = (tuple, "CSP_SCRIPT_SRC_ELEM") + CSP_FRAME_SRC = (tuple, "CSP_FRAME_SRC") + CSP_CONNECT_SRC = (tuple, "CSP_CONNECT_SRC") ) # Build paths inside the project like this: os.path.join(BASE_DIR, ...) From 89173330547a752af7313e1411d26cc7e100d6ad Mon Sep 17 00:00:00 2001 From: helllllllder Date: Tue, 24 May 2022 17:33:59 -0300 Subject: [PATCH 6/6] black --- bothub/settings.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/bothub/settings.py b/bothub/settings.py index 9b5db6bf..edb5e3e1 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -94,15 +94,15 @@ ELASTICSEARCH_LOGS_DELETE_AGE=(str, "90d"), ELASTICSEARCH_LOGS_ROLLOVER_AGE=(str, "1d"), ELASTICSEARCH_TIMESTAMP_PIPELINE_FIELD=(str, "created_at"), - CSP_DEFAULT_SRC = (tuple, "CSP_DEFAULT_SRC") - CSP_FRAME_ANCESTORS = (tuple, "CSP_FRAME_ANCESTORS") - CSP_FONT_SRC = (tuple, "CSP_FONT_SRC") - CSP_STYLE_SRC = (tuple, "CSP_STYLE_SRC") - CSP_STYLE_SRC_ELEM = (tuple, "CSP_STYLE_SRC_ELEM") - CSP_SCRIPT_SRC = (tuple, "CSP_SCRIPT_SRC") - CSP_SCRIPT_SRC_ELEM = (tuple, "CSP_SCRIPT_SRC_ELEM") - CSP_FRAME_SRC = (tuple, "CSP_FRAME_SRC") - CSP_CONNECT_SRC = (tuple, "CSP_CONNECT_SRC") + CSP_DEFAULT_SRC=(tuple, "CSP_DEFAULT_SRC"), + CSP_FRAME_ANCESTORS=(tuple, "CSP_FRAME_ANCESTORS"), + CSP_FONT_SRC=(tuple, "CSP_FONT_SRC"), + CSP_STYLE_SRC=(tuple, "CSP_STYLE_SRC"), + CSP_STYLE_SRC_ELEM=(tuple, "CSP_STYLE_SRC_ELEM"), + CSP_SCRIPT_SRC=(tuple, "CSP_SCRIPT_SRC"), + CSP_SCRIPT_SRC_ELEM=(tuple, "CSP_SCRIPT_SRC_ELEM"), + CSP_FRAME_SRC=(tuple, "CSP_FRAME_SRC"), + CSP_CONNECT_SRC=(tuple, "CSP_CONNECT_SRC"), ) # Build paths inside the project like this: os.path.join(BASE_DIR, ...)