From 7451f9a29ebbadf4b5091096f4aeb55bbb5baf20 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Thu, 4 Jun 2015 14:50:05 -0700 Subject: [PATCH 01/13] Adding a flat auth list option to permissions --- sqlshare_rest/test/api/permissions.py | 49 ++++++++++++++++++++++ sqlshare_rest/views/dataset_permissions.py | 49 +++++++++++++++------- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/sqlshare_rest/test/api/permissions.py b/sqlshare_rest/test/api/permissions.py index dd57b4d..b0f703d 100644 --- a/sqlshare_rest/test/api/permissions.py +++ b/sqlshare_rest/test/api/permissions.py @@ -601,6 +601,55 @@ def test_sharing_tokens(self): emails = data["emails"] self.assertEquals(emails, ["test_user1@example.com"]) + def test_flat_auth_list(self): + owner = "permissions_flat_user1" + dataset_name = "ds_flat1" + other_user1 = "permissions_flat_user2" + other_user2 = "permissions_flat_user3" + self.remove_users.append(owner) + self.remove_users.append(other_user1) + self.remove_users.append(other_user2) + + backend = get_backend() + backend.get_user(other_user1) + backend.get_user(other_user2) + ds1 = create_dataset_from_query(owner, dataset_name, "SELECT(1)") + + permissions_url = reverse("sqlshare_view_dataset_permissions", kwargs={'owner':owner, 'name':dataset_name}) + new_data = { "authlist": [ other_user1, other_user2, "test@example.com", "not_email_but_whatever"] } + + owner_auth_headers = self.get_auth_header_for_username(owner) + response = self.client.put(permissions_url, data=json.dumps(new_data), **owner_auth_headers) + self.assertEquals(response.status_code, 200) + self.assertEquals(response.content.decode("utf-8"), "") + + response = self.client.get(permissions_url, **owner_auth_headers) + self.assertEquals(response.status_code, 200) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(data["is_public"], False) + self.assertEquals(data["is_shared"], True) + + accounts = data["accounts"] + lookup = {} + for account in accounts: + lookup[account["login"]] = True + + self.assertEquals(lookup, { "permissions_flat_user2": True, "permissions_flat_user3": True }) + + lookup = {} + emails = data["emails"] + for email in emails: + lookup[email] = True + + self.assertEquals(lookup, { "test@example.com": True, "not_email_but_whatever": True }) + # empty out the memory outbox: + mail.outbox = [] + # Now make sure we send 1 email + send_new_emails() + # empty out the memory outbox: + mail.outbox = [] + + @classmethod def setUpClass(cls): def _run_query(sql): diff --git a/sqlshare_rest/views/dataset_permissions.py b/sqlshare_rest/views/dataset_permissions.py index 3779518..4e1117a 100644 --- a/sqlshare_rest/views/dataset_permissions.py +++ b/sqlshare_rest/views/dataset_permissions.py @@ -10,6 +10,7 @@ from sqlshare_rest.dao.dataset import set_dataset_accounts, set_dataset_emails from sqlshare_rest.dao.dataset import add_account_to_dataset from sqlshare_rest.dao.user import get_user +from sqlshare_rest.models import User from sqlshare_rest.logger import getLogger logger = getLogger(__name__) @@ -60,10 +61,25 @@ def _set_dataset_permissions(request, dataset): data = json.loads(request.body.decode("utf-8")) accounts = data.get("accounts", []) - is_shared = False + emails = data.get("emails", []) logger.info("PUT dataset permissions; owner: %s; " "name: %s" % (dataset.owner.username, dataset.name), request) + + if "authlist" in data: + authlist = data["authlist"] + + existing_users = User.objects.filter(username__in=authlist) + existing_accounts = {} + for user in existing_users: + existing_accounts[user.username] = True + + for item in authlist: + if item in existing_accounts: + accounts.append(item) + else: + emails.append(item) + for account in accounts: logger.info("PUT dataset permissions; owner: %s; name: %s; " "set account: %s" % (dataset.owner.username, @@ -71,26 +87,17 @@ def _set_dataset_permissions(request, dataset): account), request) - try: - set_dataset_accounts(dataset, accounts, save_dataset=False) - if len(accounts): - is_shared = True - except InvalidAccountException: - return get400() - - emails = data.get("emails", []) - if len(emails): - is_shared = True - for email in emails: logger.info("PUT dataset permissions; owner: %s; name: %s; " "set email: %s" % (dataset.owner.username, dataset.name, email), request) - set_dataset_emails(dataset, emails, save_dataset=False) - dataset.is_shared = is_shared + try: + _store_dataset_permissions(dataset, accounts, emails) + except InvalidAccountException: + return get400() if "is_public" in data: dataset.is_public = data["is_public"] @@ -102,6 +109,20 @@ def _set_dataset_permissions(request, dataset): return HttpResponse() +def _store_dataset_permissions(dataset, accounts, emails): + is_shared = False + + set_dataset_accounts(dataset, accounts, save_dataset=False) + if len(accounts): + is_shared = True + + set_dataset_emails(dataset, emails, save_dataset=False) + if len(emails): + is_shared = True + + dataset.is_shared = is_shared + + @csrf_exempt @protected_resource() def add_token_access(request, token): From e5c78a66200bba59a534914afccab97acde6aa0a Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 11:50:13 -0700 Subject: [PATCH 02/13] Adding snapshots, implemented in sqlite3 --- sqlshare_rest/backend/base.py | 28 +++++- sqlshare_rest/backend/mysql.py | 17 ---- sqlshare_rest/backend/sqlite3.py | 11 +++ sqlshare_rest/dao/dataset.py | 22 +++++ sqlshare_rest/models.py | 2 + sqlshare_rest/test/api/snapshot.py | 85 +++++++++++++++++ sqlshare_rest/test/backend/sqlite3.py | 28 ++++++ sqlshare_rest/tests.py | 1 + sqlshare_rest/urls.py | 4 + sqlshare_rest/util/queue_triggers.py | 9 ++ sqlshare_rest/util/snapshot_queue.py | 130 ++++++++++++++++++++++++++ sqlshare_rest/views/__init__.py | 6 ++ sqlshare_rest/views/dataset.py | 42 ++++++++- 13 files changed, 365 insertions(+), 20 deletions(-) create mode 100644 sqlshare_rest/test/api/snapshot.py create mode 100644 sqlshare_rest/util/snapshot_queue.py diff --git a/sqlshare_rest/backend/base.py b/sqlshare_rest/backend/base.py index 048b56f..294125f 100644 --- a/sqlshare_rest/backend/base.py +++ b/sqlshare_rest/backend/base.py @@ -25,8 +25,32 @@ def create_db_user(self, username, password): def create_db_schema(self, username, schema): self._not_implemented("create_db_schema") - def create_snapshot(self, source_dataset, destination_datset, user): - self._not_implemented("create_snapshot") + def create_snapshot_dataset(self, source_dataset, destination, user): + name = destination.name + table_name = self._get_table_name_for_dataset(name) + sql = self._get_view_sql_for_dataset(table_name, user) + + destination.sql = sql + destination.snapshot_finished = False + destination.snapshot_source = source_dataset + destination.save() + + def load_snapshot_table(self, dataset, user): + source_dataset = dataset.snapshot_source + table_name = self._get_table_name_for_dataset(dataset.name) + + self._create_snapshot_table(source_dataset, table_name, user) + self._create_view_of_snapshot(dataset, user) + + def _create_view_of_snapshot(self, dataset, user): + sql = self._get_snapshot_view_sql(dataset) + self.run_query(sql, user) + + def _get_snapshot_view_sql(self, dataset): + self._not_implemented("_get_snapshot_view_sql") + + def _create_snapshot_table(self, source_dataset, table_name, user): + self._not_implemented("_create_snapshot_table") def remove_db_user(self, db_username): self._not_implemented("remove_db_user") diff --git a/sqlshare_rest/backend/mysql.py b/sqlshare_rest/backend/mysql.py index e9ae2ca..c33aa79 100644 --- a/sqlshare_rest/backend/mysql.py +++ b/sqlshare_rest/backend/mysql.py @@ -85,23 +85,6 @@ def get_preview_sql_for_dataset(self, dataset_name, user): def get_preview_sql_for_query(self, sql): return "SELECT * FROM (%s) as x LIMIT 100" % sql - def _create_snapshot_sql(self, source_dataset, destination_datset): - """ - Requires the source to be quoted, the destination to not be. - - Source could be another user's dataset, so we can't quote that. - """ - return "CREATE TABLE `%s` AS SELECT * FROM %s" % (destination_datset, - source_dataset) - - def create_snapshot(self, source_dataset, destination_datset, user): - table_name = self._get_table_name_for_dataset(destination_datset) - sql = self._create_snapshot_sql(source_dataset, table_name) - self.run_query(sql, user) - self.create_view(destination_datset, - self._get_view_sql_for_dataset(table_name, user), - user) - def _add_read_access_sql(self, dataset, owner, reader): return "GRANT SELECT ON `%s`.`%s` TO `%s`" % (owner.schema, dataset, diff --git a/sqlshare_rest/backend/sqlite3.py b/sqlshare_rest/backend/sqlite3.py index c7ed09d..fb03290 100644 --- a/sqlshare_rest/backend/sqlite3.py +++ b/sqlshare_rest/backend/sqlite3.py @@ -49,6 +49,17 @@ def get_qualified_name(self, dataset): def create_db_schema(self, db_username, schema_name): return + def _create_snapshot_table(self, source_dataset, table_name, user): + sql = "CREATE TABLE `%s` AS SELECT * FROM %s" % (table_name, + source_dataset.name) + + self.run_query(sql, user) + + def _get_snapshot_view_sql(self, dataset): + table_name = self._get_table_name_for_dataset(dataset.name) + return "CREATE VIEW `%s` AS SELECT * FROM `%s`" % (dataset.name, + table_name) + def run_query(self, sql, username, params=None, return_cursor=False): cursor = connection.cursor() cursor.execute(sql, params) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index 016b836..911e5b2 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -91,6 +91,28 @@ def create_dataset_from_query(username, dataset_name, sql): backend.close_user_connection(user) +def create_dataset_from_snapshot(user, dataset_name, source): + backend = get_backend() + try: + (model, created) = Dataset.objects.get_or_create(name=dataset_name, + owner=user) + if not created: + # Clear out the existing dataset, so we can create + # the new view properly + backend.delete_dataset(dataset_name, user) + + backend.create_snapshot_dataset(source, model, user) + + model.preview_is_finished = False + model.preview_error = None + + return model + except Exception: + raise + finally: + backend.close_user_connection(user) + + def create_preview_for_dataset(dataset): # Remove all existing sample data queries previous = Query.objects.filter(is_preview_for=dataset) diff --git a/sqlshare_rest/models.py b/sqlshare_rest/models.py index 94b2f13..a2f79b9 100644 --- a/sqlshare_rest/models.py +++ b/sqlshare_rest/models.py @@ -49,6 +49,8 @@ class Dataset(models.Model): preview_is_finished = models.BooleanField(default=False) preview_error = models.TextField(null=True) rows_total = models.IntegerField(null=True) + snapshot_source = models.ForeignKey('Dataset', null=True) + snapshot_finished = models.NullBooleanField() class Meta: unique_together = (("name", "owner"),) diff --git a/sqlshare_rest/test/api/snapshot.py b/sqlshare_rest/test/api/snapshot.py new file mode 100644 index 0000000..fc80dff --- /dev/null +++ b/sqlshare_rest/test/api/snapshot.py @@ -0,0 +1,85 @@ +from django.test import TestCase +from unittest2 import skipIf +from django.db import connection +import json +from datetime import datetime +from dateutil import parser +from django.utils import timezone +from sqlshare_rest.util.db import get_backend +from sqlshare_rest.test import missing_url +from django.test.utils import override_settings +from django.test.client import Client +from django.core.urlresolvers import reverse +from sqlshare_rest.test.api.base import BaseAPITest +from sqlshare_rest.dao.dataset import create_dataset_from_query +from sqlshare_rest.models import Query, Dataset +from sqlshare_rest.util.query_queue import process_queue +from sqlshare_rest.util.snapshot_queue import process_snapshot_queue +from testfixtures import LogCapture +import csv + +import six +if six.PY2: + from StringIO import StringIO +elif six.PY3: + from io import StringIO + +@skipIf(missing_url("sqlshare_view_dataset_list"), "SQLShare REST URLs not configured") +@override_settings(MIDDLEWARE_CLASSES = ( + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.auth.middleware.RemoteUserMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', + ), + SQLSHARE_QUERY_CACHE_DB="test_ss_query_db", + AUTHENTICATION_BACKENDS = ('django.contrib.auth.backends.ModelBackend',) + ) + +class SnapshotAPITest(BaseAPITest): + def setUp(self): + # Try to cleanup from any previous test runs... + self.remove_users = [] + self.client = Client() + + def test_snapshot(self): + owner = "snapshot_user1" + dataset_name = "snap_source1" + + self.remove_users.append(owner) + + backend = get_backend() + backend.get_user(owner) + + Dataset.objects.all().delete() + ds1 = create_dataset_from_query(owner, dataset_name, "SELECT(1)") + + url = reverse("sqlshare_dataset_snapshot", kwargs={ 'owner': owner, + 'name': dataset_name}) + + owner_auth_headers = self.get_auth_header_for_username(owner) + + self.assertEquals(len(Dataset.objects.all()), 1) + Query.objects.all().delete() + self.assertEquals(len(Query.objects.all()), 0) + + new_data = { + "name": "snap_destination1", + "description": "Snapshot created in test", + "is_public": True, + } + + with LogCapture() as l: + response = self.client.post(url, data=json.dumps(new_data), content_type="application/json", **owner_auth_headers) + self.assertEquals(response.status_code, 201) + self.assertEquals(response.content.decode("utf-8"), "") + self.assertEquals(response["Location"], "http://testserver/v3/db/dataset/snapshot_user1/snap_destination1") + self.assertTrue(self._has_log(l, owner, None, 'sqlshare_rest.views.dataset', 'INFO', 'POST dataset snapshot; owner: snapshot_user1; name: snap_source1; destination_name: snap_destination1; is_public: True')) + + + self.assertEquals(len(Dataset.objects.all()), 2) + self.assertEquals(len(Query.objects.all()), 0) + + process_snapshot_queue(verbose=True) + self.assertEquals(len(Query.objects.all()), 1) diff --git a/sqlshare_rest/test/backend/sqlite3.py b/sqlshare_rest/test/backend/sqlite3.py index b791e6f..f832433 100644 --- a/sqlshare_rest/test/backend/sqlite3.py +++ b/sqlshare_rest/test/backend/sqlite3.py @@ -2,6 +2,9 @@ from sqlshare_rest.models import Dataset, User from sqlshare_rest.parser import Parser from sqlshare_rest.util.db import is_sqlite3, get_backend +from sqlshare_rest.dao.dataset import create_dataset_from_query +from sqlshare_rest.util.snapshot_queue import process_snapshot_queue +from django.db.utils import OperationalError import unittest import six if six.PY2: @@ -125,3 +128,28 @@ def test_create_table_from_parser_with_values(self): raise finally: backend.close_user_connection(user) + + def test_snapshot(self): + backend = get_backend() + owner = "test_user_snapshot1" + user = backend.get_user(owner) + + + ds_source = create_dataset_from_query(owner, "s3_snap_source1", "SELECT (1), (2), (4), (8)") + result2 = backend.run_query("SELECT * FROM s3_snap_source1", user) + self.assertEquals([(1, 2, 4, 8,)], result2) + + ds_source = Dataset.objects.get(name="s3_snap_source1", owner=user) + ds_dest = Dataset.objects.create(name="s3_snap_destination", owner=user) + backend.create_snapshot_dataset(ds_source, ds_dest, user) + + self.assertRaises(OperationalError, backend.run_query, "SELECT * FROM s3_snap_destination", user) + + process_snapshot_queue(verbose=True) + + result4 = backend.run_query("SELECT * FROM table_s3_snap_destination", user) + self.assertEquals([(1, 2, 4, 8,)], result4) + + + result3 = backend.run_query("SELECT * FROM s3_snap_destination", user) + self.assertEquals([(1, 2, 4, 8,)], result3) diff --git a/sqlshare_rest/tests.py b/sqlshare_rest/tests.py index 748b42f..c9602eb 100644 --- a/sqlshare_rest/tests.py +++ b/sqlshare_rest/tests.py @@ -17,4 +17,5 @@ from sqlshare_rest.test.api.sql import RunQueryAPITest from sqlshare_rest.test.api.user_override import UserOverrideAPITest from sqlshare_rest.test.api.cancel_query import CancelQueryAPITest +from sqlshare_rest.test.api.snapshot import SnapshotAPITest from sqlshare_rest.test.logging import TestLogging diff --git a/sqlshare_rest/urls.py b/sqlshare_rest/urls.py index bcf2c64..49948a6 100644 --- a/sqlshare_rest/urls.py +++ b/sqlshare_rest/urls.py @@ -20,6 +20,10 @@ url('v3/db/dataset/tagged/(?P.*)', 'dataset_list.dataset_tagged_list', name="sqlshare_view_dataset_tagged_list"), + url('v3/db/dataset/(?P[^/].*)/(?P[^/].*)/snapshot', + 'dataset.snapshot', + name="sqlshare_dataset_snapshot"), + url('v3/db/dataset/(?P[^/].*)/(?P[^/].*)', 'dataset.dataset', name="sqlshare_view_dataset"), diff --git a/sqlshare_rest/util/queue_triggers.py b/sqlshare_rest/util/queue_triggers.py index 663e8ef..090ae00 100644 --- a/sqlshare_rest/util/queue_triggers.py +++ b/sqlshare_rest/util/queue_triggers.py @@ -1,5 +1,6 @@ import socket +SNAPSHOT_QUEUE_PORT_NUMBER = 1997 UPLOAD_QUEUE_PORT_NUMBER = 1998 QUERY_QUEUE_PORT_NUMBER = 1999 @@ -18,3 +19,11 @@ def trigger_upload_queue_processing(): client.connect(('localhost', UPLOAD_QUEUE_PORT_NUMBER)) except socket.error as ex: pass + + +def trigger_snapshot_processing(): + try: + client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + client.connect(('localhost', SNAPSHOT_QUEUE_PORT_NUMBER)) + except socket.error as ex: + pass diff --git a/sqlshare_rest/util/snapshot_queue.py b/sqlshare_rest/util/snapshot_queue.py new file mode 100644 index 0000000..19baff0 --- /dev/null +++ b/sqlshare_rest/util/snapshot_queue.py @@ -0,0 +1,130 @@ +from sqlshare_rest.models import Dataset +from sqlshare_rest.dao.dataset import create_preview_for_dataset +from sqlshare_rest.util.db import get_backend +from time import sleep +from sqlshare_rest.util.queue_triggers import trigger_snapshot_processing +from sqlshare_rest.util.queue_triggers import SNAPSHOT_QUEUE_PORT_NUMBER +import atexit + +import socket +from threading import Thread + +import six + +if six.PY2: + from Queue import Queue +elif six.PY3: + from queue import Queue + + +def process_snapshot_queue(thread_count=0, run_once=True, verbose=False): + q = Queue() + + def worker(): + """ + Get a dataset snapshot from the queue, and materialize its table. + """ + backend = get_backend() + keep_looping = True + while keep_looping: + oldest = q.get() + if verbose: + print("Processing snapshot: %s" % oldest.pk) + user = oldest.owner + backend = get_backend() + try: + backend.load_snapshot_table(oldest, user) + create_preview_for_dataset(oldest) + oldest.snapshot_finished = True + oldest.save() + # Do some work here + except Exception as ex: + if verbose: + print("Error on %s: %s" % (oldest.pk, str(ex))) + oldest.snapshot_finished = True + oldest.save() + finally: + backend.close_user_connection(user) + + q.task_done() + if verbose: + print("Finished snapshot %s." % oldest.pk) + if run_once: + keep_looping = False + + def periodic_check(): + """ + Every 5 seconds, do a check for new snapshots. Just in case something + needs processing, but didn't call trigger_snapshot_processing() itself. + """ + while True: + sleep(5) + if verbose: + print("Triggering periodic processing.") + trigger_snapshot_processing() + + filtered = get_initial_filter_list() + + if run_once: + try: + oldest = filtered.order_by('id')[:1].get() + except Dataset.DoesNotExist: + return + q.put(oldest) + worker() + else: + # Track the oldest query, so we only select ones newer that + newest_pk = 0 + for i in range(thread_count): + t = Thread(target=worker) + t.setDaemon(True) + t.start() + + # Start with any queries already in the queue: + for dataset in filtered: + if dataset.pk > newest_pk: + newest_pk = dataset.pk + if verbose: + print("Adding dataset ID %s to the queue." % upload.pk) + q.put(dataset) + + # Just in case things get off the rails - maybe a connection to the + # server gets blocked? - periodically trigger a check for new queries + kicker = Thread(target=periodic_check) + kicker.setDaemon(True) + kicker.start() + + # Start the socket server for getting notifications of new queries + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Make it so we can run the server right away after killing it + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(('localhost', SNAPSHOT_QUEUE_PORT_NUMBER)) + + # Make sure we close our socket when we're killed. + def close_socket(): + server.close() + + atexit.register(close_socket) + + server.listen(5) + while True: + (clientsocket, address) = server.accept() + # We don't actually have a protocol to speak... + clientsocket.close() + + snapshots = Dataset.objects.filter(snapshot_source__isnull=False, + snapshot_finished=False, + pk__gt=newest_pk) + for snapshot in snapshots: + if snapshot.pk > newest_pk: + newest_pk = snapshot.pk + if verbose: + print("Adding snapshot ID %s to the queue." % snapshot.pk) + q.put(snapshot) + + q.join() + + +def get_initial_filter_list(): + return Dataset.objects.filter(snapshot_source__isnull=False, + snapshot_finished=False) diff --git a/sqlshare_rest/views/__init__.py b/sqlshare_rest/views/__init__.py index c50603d..f741496 100644 --- a/sqlshare_rest/views/__init__.py +++ b/sqlshare_rest/views/__init__.py @@ -18,6 +18,12 @@ def get_oauth_user(request): raise Exception("Invalid token - no client app") +def get405(): + response = HttpResponse("") + response.status_code = 405 + return response + + def get404(): response = HttpResponse("") response.status_code = 404 diff --git a/sqlshare_rest/views/dataset.py b/sqlshare_rest/views/dataset.py index bf7daae..84e5e6c 100644 --- a/sqlshare_rest/views/dataset.py +++ b/sqlshare_rest/views/dataset.py @@ -6,10 +6,11 @@ from django.utils import timezone from sqlshare_rest.util.db import get_backend from sqlshare_rest.models import Dataset, User, Query -from sqlshare_rest.views import get_oauth_user, get403, get404 +from sqlshare_rest.views import get_oauth_user, get403, get404, get405 from sqlshare_rest.views.sql import response_for_query from sqlshare_rest.dao.user import get_user from sqlshare_rest.dao.dataset import create_dataset_from_query +from sqlshare_rest.dao.dataset import create_dataset_from_snapshot from sqlshare_rest.dao.dataset import create_preview_for_dataset from sqlshare_rest.dao.dataset import get_dataset_by_owner_and_name from sqlshare_rest.util.query import get_sample_data_for_query @@ -48,6 +49,45 @@ def download(request, owner, name): return response_for_query(sql, user, download_name) +@csrf_exempt +@protected_resource() +def snapshot(request, owner, name): + get_oauth_user(request) + if request.META['REQUEST_METHOD'] != "POST": + return get405() + + try: + dataset = get_dataset_by_owner_and_name(owner, name) + except Dataset.DoesNotExist: + return get404() + except User.DoesNotExist: + return get404() + except Exception as ex: + raise + + user = get_user(request) + if not dataset.user_has_read_access(user): + return get403() + + values = json.loads(request.body.decode("utf-8")) + new_name = values["name"] + description = values["description"] + is_public = getattr(values, "is_public", True) + logger.info("POST dataset snapshot; owner: %s; name: %s; " + "destination_name: %s; is_public: %s" % (owner, + name, + new_name, + is_public), + request) + + new_dataset = create_dataset_from_snapshot(user, new_name, dataset) + + response = HttpResponse("") + response["location"] = new_dataset.get_url() + response.status_code = 201 + return response + + @csrf_exempt @protected_resource() def dataset(request, owner, name): From 968c4238dd9e94014f28e66ca701b79728ac0341 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 12:39:12 -0700 Subject: [PATCH 03/13] Implementing snapshots for mysql --- sqlshare_rest/backend/mysql.py | 14 +++++++++ sqlshare_rest/test/api/dataset.py | 1 + sqlshare_rest/test/backend/mysql.py | 48 ++++++++++++----------------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/sqlshare_rest/backend/mysql.py b/sqlshare_rest/backend/mysql.py index c33aa79..1bf95cb 100644 --- a/sqlshare_rest/backend/mysql.py +++ b/sqlshare_rest/backend/mysql.py @@ -199,6 +199,20 @@ def get_query_sample_sql(self, query_id): QUERY_SCHEMA = self.get_query_cache_db_name() return "SELECT * FROM %s.query_%s LIMIT 100" % (QUERY_SCHEMA, query_id) + def _create_snapshot_table(self, source_dataset, table_name, user): + sql = "CREATE TABLE `%s` AS SELECT * FROM %s" % (table_name, + source_dataset.name) + + self.run_query(sql, user) + + def _get_snapshot_view_sql(self, dataset): + table_name = self._get_table_name_for_dataset(dataset.name) + return ("CREATE OR REPLACE VIEW `%s` AS " + "SELECT * FROM `%s`.`%s`" % (dataset.name, + dataset.owner.schema, + table_name)) + + def _get_column_definitions_for_cursor(self, cursor): import pymysql # XXX - is defining this a sign that this is a mistake? diff --git a/sqlshare_rest/test/api/dataset.py b/sqlshare_rest/test/api/dataset.py index c516a6e..3026b03 100644 --- a/sqlshare_rest/test/api/dataset.py +++ b/sqlshare_rest/test/api/dataset.py @@ -346,6 +346,7 @@ def test_valid_no_permissions(self): owner = "put_user2" ds1_name = "dataset_1c" self.remove_users.append(owner) + self.remove_users.append("not_owner") auth_headers = self.get_auth_header_for_username(owner) url = reverse("sqlshare_view_dataset", kwargs={ 'owner': owner, 'name': ds1_name}) diff --git a/sqlshare_rest/test/backend/mysql.py b/sqlshare_rest/test/backend/mysql.py index bf75c74..965212d 100644 --- a/sqlshare_rest/test/backend/mysql.py +++ b/sqlshare_rest/test/backend/mysql.py @@ -1,5 +1,7 @@ from sqlshare_rest.test import CleanUpTestCase from sqlshare_rest.util.db import is_mysql, get_backend +from sqlshare_rest.dao.dataset import create_dataset_from_query +from sqlshare_rest.util.snapshot_queue import process_snapshot_queue from sqlshare_rest.parser import Parser from django.db import connection from django.conf import settings @@ -241,45 +243,33 @@ def test_create_table_sql(self): self.assertEquals(sql, "CREATE TABLE `test_table1` (`Column1` INT, `Column2` FLOAT, `Column3` VARCHAR(400)) ENGINE InnoDB CHARACTER SET utf8 COLLATE utf8_bin") - def test_snapshot_sql(self): - backend = get_backend() - sql = backend._create_snapshot_sql("old", "new") - self.assertEquals(sql, "CREATE TABLE `new` AS SELECT * FROM old") def test_snapshot(self): - self.remove_users.append("test_user_snapshot1") + owner = "test_user_snapshot1" + self.remove_users.append(owner) backend = get_backend() - user = backend.get_user("test_user_snapshot1") - - handle = StringIO("z,y,x\n1,3,4\n2,10,12") - - parser = Parser() - parser.guess(handle.read(1024*20)) - handle.seek(0) - parser.parse(handle) + user = backend.get_user(owner) try: - backend.create_dataset_from_parser("test_dataset1", parser, user) - result = backend.run_query("SELECT * FROM %s.table_test_dataset1" % user.schema, user) - self.assertEquals(((1, 3, 4, ), (2, 10, 12, )), result) - result2 = backend.run_query("SELECT * FROM %s.test_dataset1" % user.schema, user) - self.assertEquals(((1, 3, 4, ), (2, 10, 12, )), result2) + from pymysql.err import ProgrammingError + ds_source = create_dataset_from_query(owner, "my_snap_source1", "SELECT (1), (2), (4), (8)") + result2 = backend.run_query("SELECT * FROM test_user_snapshot1.my_snap_source1", user) + self.assertEquals(((1, 2, 4, 8,),), result2) + + ds_source = Dataset.objects.get(name="my_snap_source1", owner=user) + ds_dest = Dataset.objects.create(name="my_snap_destination", owner=user) + backend.create_snapshot_dataset(ds_source, ds_dest, user) - backend.create_snapshot("`test_user_snapshot1`.`test_dataset1`", "test_snapshot1", user) + self.assertRaises(ProgrammingError, backend.run_query, "SELECT * FROM test_user_snapshot1.my_snap_destination", user) - # Make sure the snapshot has the right initial data - result3 = backend.run_query("SELECT * FROM test_user_snapshot1.test_snapshot1", user) - self.assertEquals(((1, 3, 4, ), (2, 10, 12, )), result3) + process_snapshot_queue(verbose=True) - # Update the original backing table - # make sure the original dataset is updated, but the snapshot isn't - backend.run_query("INSERT INTO table_test_dataset1 VALUES (3,14,15)", user) - result4 = backend.run_query("SELECT * FROM %s.test_dataset1" % user.schema, user) - self.assertEquals(((1, 3, 4, ), (2, 10, 12, ), (3, 14, 15, )), result4) + result4 = backend.run_query("SELECT * FROM test_user_snapshot1.table_my_snap_destination", user) + self.assertEquals(((1, 2, 4, 8,),), result4) - result5 = backend.run_query("SELECT * FROM test_user_snapshot1.test_snapshot1", user) - self.assertEquals(((1, 3, 4, ), (2, 10, 12, )), result5) + result3 = backend.run_query("SELECT * FROM test_user_snapshot1.my_snap_destination", user) + self.assertEquals(((1, 2, 4, 8,),), result3) except Exception: raise finally: From bec9a5a84658875306c9bac7bc7baf833e695c98 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 12:39:36 -0700 Subject: [PATCH 04/13] pep8 fix --- sqlshare_rest/backend/mysql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlshare_rest/backend/mysql.py b/sqlshare_rest/backend/mysql.py index 1bf95cb..defdbce 100644 --- a/sqlshare_rest/backend/mysql.py +++ b/sqlshare_rest/backend/mysql.py @@ -212,7 +212,6 @@ def _get_snapshot_view_sql(self, dataset): dataset.owner.schema, table_name)) - def _get_column_definitions_for_cursor(self, cursor): import pymysql # XXX - is defining this a sign that this is a mistake? From 32d5a455049a5a7c422d423c386cee0209dad768 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 13:52:02 -0700 Subject: [PATCH 05/13] implementing mssql --- sqlshare_rest/backend/mssql.py | 21 +++++++++++++++++ sqlshare_rest/test/api/permissions.py | 1 + sqlshare_rest/test/backend/mssql.py | 34 +++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/sqlshare_rest/backend/mssql.py b/sqlshare_rest/backend/mssql.py index 8abb564..ed2def7 100644 --- a/sqlshare_rest/backend/mssql.py +++ b/sqlshare_rest/backend/mssql.py @@ -113,6 +113,27 @@ def remove_schema(self, schema): def _disconnect_connection(self, connection): connection["connection"].close() + def _create_snapshot_table(self, source_dataset, table_name, user): + source_schema = source_dataset.owner.schema + sql = "SELECT * INTO [%s].[%s] FROM [%s].[%s]" % (source_schema, + table_name, + user.schema, + source_dataset.name) + + self.run_query(sql, user, return_cursor=True).close() + + def _create_view_of_snapshot(self, dataset, user): + sql = self._get_snapshot_view_sql(dataset) + self.run_query(sql, user, return_cursor=True).close() + + def _get_snapshot_view_sql(self, dataset): + table_name = self._get_table_name_for_dataset(dataset.name) + return ("CREATE VIEW [%s].[%s] AS " + "SELECT * FROM [%s].[%s]" % (dataset.owner.schema, + dataset.name, + dataset.owner.schema, + table_name)) + def create_view(self, name, sql, user, column_names=None): import pyodbc if column_names: diff --git a/sqlshare_rest/test/api/permissions.py b/sqlshare_rest/test/api/permissions.py index b0f703d..084e33b 100644 --- a/sqlshare_rest/test/api/permissions.py +++ b/sqlshare_rest/test/api/permissions.py @@ -665,6 +665,7 @@ def _run_query(sql): _run_query("drop login permissions_preview_user8") _run_query("drop login permissions_preview_user2") _run_query("drop login permissions_preview_user6") + _run_query("drop login permissions_preview_user7") _run_query("drop login permissions_token_user1") _run_query("drop login permissions_xpublic_user1") _run_query("drop login permissions_user1") diff --git a/sqlshare_rest/test/backend/mssql.py b/sqlshare_rest/test/backend/mssql.py index 6d9ff2e..ac52779 100644 --- a/sqlshare_rest/test/backend/mssql.py +++ b/sqlshare_rest/test/backend/mssql.py @@ -1,6 +1,8 @@ from sqlshare_rest.test import CleanUpTestCase from sqlshare_rest.models import Dataset from sqlshare_rest.parser import Parser +from sqlshare_rest.dao.dataset import create_dataset_from_query +from sqlshare_rest.util.snapshot_queue import process_snapshot_queue from django.db import connection from sqlshare_rest.util.db import is_mssql, is_sql_azure, get_backend import unittest @@ -424,6 +426,37 @@ def test_public_permissions_control(self): backend.close_user_connection(user2) backend.close_user_connection(user3) + def test_snapshot(self): + import pyodbc + owner = "test_user_snapshot1" + self.remove_users.append(owner) + backend = get_backend() + user = backend.get_user(owner) + + try: + ds_source = create_dataset_from_query(owner, "my_snap_source1", "SELECT (1), (2), (4), (8)") + result2 = backend.run_query("SELECT * FROM test_user_snapshot1.my_snap_source1", user) + self.assertEquals(result2[0][2], 4) + + ds_source = Dataset.objects.get(name="my_snap_source1", owner=user) + ds_dest = Dataset.objects.create(name="my_snap_destination", owner=user) + backend.create_snapshot_dataset(ds_source, ds_dest, user) + + self.assertRaises(pyodbc.ProgrammingError, backend.run_query, "SELECT * FROM test_user_snapshot1.my_snap_destination", user) + + process_snapshot_queue(verbose=True) + + result4 = backend.run_query("SELECT * FROM [test_user_snapshot1].[table_my_snap_destination]", user) + self.assertEquals(result4[0][2], 4) + + + result3 = backend.run_query("SELECT * FROM [test_user_snapshot1].[my_snap_destination]", user) + self.assertEquals(result3[0][2], 4) + except Exception: + raise + finally: + backend.close_user_connection(user) + @classmethod def setUpClass(cls): @@ -459,6 +492,7 @@ def _run_query(sql): _run_query("drop login test_remove_user1") _run_query("drop login test_remove_user3") _run_query("drop login test_mysql_qualified_dataset_name") + _run_query("drop login test_user_snapshot1") def setUp(self): # Try to cleanup from any previous test runs... From 69863aeb00ee42dc4ae0beb3be9d4828b368f0c3 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 14:13:55 -0700 Subject: [PATCH 06/13] Management command for running snapshots --- .../commands/run_snapshots_queue.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 sqlshare_rest/management/commands/run_snapshots_queue.py diff --git a/sqlshare_rest/management/commands/run_snapshots_queue.py b/sqlshare_rest/management/commands/run_snapshots_queue.py new file mode 100644 index 0000000..dad8714 --- /dev/null +++ b/sqlshare_rest/management/commands/run_snapshots_queue.py @@ -0,0 +1,30 @@ +from sqlshare_rest.util.snapshot_queue import process_snapshot_queue +from django.core.management.base import BaseCommand +from optparse import make_option + + +class Command(BaseCommand): + help = "This processes snapshots of datasets." + + option_list = BaseCommand.option_list + ( + make_option('--run-once', + dest='run_once', + default=False, + action="store_true", + help='This will only process one item in the queue'), + + make_option('--verbose', + dest='verbose', + default=False, + action="store_true", + help='Prints status info to standard out'), + ) + + def handle(self, *args, **options): + verbose = options["verbose"] + if options["run_once"]: + process_snapshot_queue(verbose=verbose) + else: + process_snapshot_queue(run_once=False, + thread_count=10, + verbose=verbose) From cd2b51fe12c612f75651943ae177aab0b92e80cf Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Tue, 9 Jun 2015 14:17:36 -0700 Subject: [PATCH 07/13] Fixing a bad variable name --- sqlshare_rest/util/snapshot_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlshare_rest/util/snapshot_queue.py b/sqlshare_rest/util/snapshot_queue.py index 19baff0..e4567d7 100644 --- a/sqlshare_rest/util/snapshot_queue.py +++ b/sqlshare_rest/util/snapshot_queue.py @@ -85,7 +85,7 @@ def periodic_check(): if dataset.pk > newest_pk: newest_pk = dataset.pk if verbose: - print("Adding dataset ID %s to the queue." % upload.pk) + print("Adding dataset ID %s to the queue." % dataset.pk) q.put(dataset) # Just in case things get off the rails - maybe a connection to the From 08d4ce5edea23c2f6f4cb4f9d4247c6dea96b928 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 12:18:07 -0700 Subject: [PATCH 08/13] Adding paging/ordering to the owner list --- sqlshare_rest/test/api/dataset_list.py | 42 ++++++++++++++++++++++++++ sqlshare_rest/views/dataset_list.py | 22 ++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/sqlshare_rest/test/api/dataset_list.py b/sqlshare_rest/test/api/dataset_list.py index dacd77a..cc6c2d6 100644 --- a/sqlshare_rest/test/api/dataset_list.py +++ b/sqlshare_rest/test/api/dataset_list.py @@ -279,6 +279,48 @@ def build_lookup(data): self.assertTrue("ds_shared" not in lookup["ds_list_user8"]) self.assertTrue("ds_owned" not in lookup["ds_list_user7"]) + def test_pagination(self): + owner = "test_pagination_owner" + self.remove_users.append(owner) + datasets = [] + for i in range(200): + ds = create_dataset_from_query(owner, "test_paging_%s" % i, "SELECT (%s)" % i) + ds.is_public = True + if i < 120: + ds.description = "Find the elephant" + ds.save() + + auth_headers = self.get_auth_header_for_username(owner) + url = reverse("sqlshare_view_dataset_list") + + response = self.client.get(url, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 200) + + response = self.client.get(url, { "page": 1, "page_size": 50, "order_by": "updated" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_199") + + response = self.client.get(url, { "page": 2, "page_size": 50, "order_by": "updated" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_149") + + response = self.client.get(url, { "page": 100, "page_size": 50, "order_by": "updated" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 0) + + response = self.client.get(url, { "page": 10, "page_size": 10, "order_by": "updated" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 10) + + response = self.client.get(url, { "page": 1 }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_0") + + @classmethod def setUpClass(cls): def _run_query(sql): diff --git a/sqlshare_rest/views/dataset_list.py b/sqlshare_rest/views/dataset_list.py index 3f81601..17e8660 100644 --- a/sqlshare_rest/views/dataset_list.py +++ b/sqlshare_rest/views/dataset_list.py @@ -21,6 +21,8 @@ def dataset_list(request): user = get_user(request) datasets = get_datasets_owned_by_user(user) + datasets = _filter_list_from_request(datasets, request) + data = [] for dataset in datasets: data.append(dataset.json_data()) @@ -60,6 +62,26 @@ def dataset_tagged_list(request, tag): return HttpResponse(json.dumps(data)) +def _filter_list_from_request(query_set, request): + if "order_by" in request.GET: + if request.GET["order_by"] == "updated": + query_set = query_set.order_by("-date_modified") + else: + query_set = query_set.order_by("pk") + + if "page" in request.GET: + page_size = 50 + if "page_size" in request.GET: + page_size = int(request.GET["page_size"]) + + page_num = int(request.GET["page"]) + start = (page_num - 1) * page_size + end = start + page_size + query_set = query_set[start:end] + + return query_set + + @csrf_exempt @protected_resource() def dataset_all_list(request): From 7dd7be3ee33c7f8fe9553ebae6eee5c9d3a21283 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 12:50:22 -0700 Subject: [PATCH 09/13] pagination on shared and public --- sqlshare_rest/dao/dataset.py | 66 ++++++++++++++++++-------- sqlshare_rest/test/api/dataset_list.py | 50 +++++++++++++++++-- sqlshare_rest/views/dataset_list.py | 30 ++---------- 3 files changed, 98 insertions(+), 48 deletions(-) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index 911e5b2..f46920f 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -5,49 +5,50 @@ from sqlshare_rest.exceptions import InvalidAccountException -def get_datasets_owned_by_user(user): +def get_datasets_owned_by_user(user, request, page_list=True): # Django auth user vs sqlshare user backend = get_backend() user_obj = backend.get_user(user.username) - return Dataset.objects.filter(owner=user_obj) + base = Dataset.objects.filter(owner=user_obj) + return _filter_list_from_request(base, request, page_list) -def get_datasets_shared_with_user(user): +def get_datasets_shared_with_user(user, request, page_list=True): # Django auth user vs sqlshare user backend = get_backend() user_obj = backend.get_user(user.username) - return Dataset.objects.filter(shared_with__in=[user_obj]) + base = Dataset.objects.filter(shared_with__in=[user_obj]) + return _filter_list_from_request(base, request, page_list) -def get_public_datasets(): - return Dataset.objects.filter(is_public=True) +def get_public_datasets(request, page_list=True): + base = Dataset.objects.filter(is_public=True) + return _filter_list_from_request(base, request, page_list) -def _get_all_dataset_querysets(user): - return (get_datasets_owned_by_user(user), - get_datasets_shared_with_user(user), - get_public_datasets()) +def _get_all_dataset_querysets(user, request): + return (get_datasets_owned_by_user(user, request, page_list=False), + get_datasets_shared_with_user(user, request, page_list=False), + get_public_datasets(request, page_list=False)) def _dataset_unique_list(mine, shared, public): - datasets = list(mine) - datasets.extend(list(shared)) - datasets.extend(list(public)) + return mine | shared | public - return list(set(datasets)) +def get_all_datasets_for_user(user, request): + mine, shared, public = _get_all_dataset_querysets(user, request) + unique = _dataset_unique_list(mine, shared, public) -def get_all_datasets_for_user(user): - mine, shared, public = _get_all_dataset_querysets(user) - return _dataset_unique_list(mine, shared, public) + return _page_dataset_list(unique, request) -def get_all_datasets_tagged_for_user(user, tag_label): +def get_all_datasets_tagged_for_user(user, request, tag_label): try: tags = Tag.objects.filter(tag__iexact=tag_label) except Tag.DoesNotExist: return [] - datasets = get_all_datasets_for_user(user) + datasets = get_all_datasets_for_user(user, request) dataset_tags = DatasetTag.objects.filter(dataset__in=datasets, tag__in=tags) @@ -347,3 +348,30 @@ def _update_tag_popularity(tag_label): count = DatasetTag.objects.filter(tag=tag_obj).count() tag_obj.popularity = count tag_obj.save() + + +def _filter_list_from_request(query_set, request, page_list): + if "order_by" in request.GET: + if request.GET["order_by"] == "updated": + query_set = query_set.order_by("-date_modified") + else: + query_set = query_set.order_by("pk") + + if page_list: + query_set = _page_dataset_list(query_set, request) + + return query_set + + +def _page_dataset_list(query_set, request): + if "page" in request.GET: + page_size = 50 + if "page_size" in request.GET: + page_size = int(request.GET["page_size"]) + + page_num = int(request.GET["page"]) + start = (page_num - 1) * page_size + end = start + page_size + query_set = query_set[start:end] + + return query_set diff --git a/sqlshare_rest/test/api/dataset_list.py b/sqlshare_rest/test/api/dataset_list.py index cc6c2d6..0f9719b 100644 --- a/sqlshare_rest/test/api/dataset_list.py +++ b/sqlshare_rest/test/api/dataset_list.py @@ -281,16 +281,32 @@ def build_lookup(data): def test_pagination(self): owner = "test_pagination_owner" + public = "test_pagination_public" + shared = "test_pagination_shared" self.remove_users.append(owner) - datasets = [] + self.remove_users.append(public) + self.remove_users.append(shared) + + backend = get_backend() + backend.get_user(public) + backend.get_user(shared) + auth_headers = self.get_auth_header_for_username(owner) + public_auth_headers = self.get_auth_header_for_username(public) + shared_auth_headers = self.get_auth_header_for_username(shared) + + never_seen = create_dataset_from_query(public, "test_paging_public_owner_first", "SELECT (1)") + + account_data = { "accounts": [ shared ] } for i in range(200): - ds = create_dataset_from_query(owner, "test_paging_%s" % i, "SELECT (%s)" % i) + dataset_name = "test_paging_%s" % i + ds = create_dataset_from_query(owner, dataset_name, "SELECT (%s)" % i) ds.is_public = True + set_dataset_accounts(ds, [ shared ]) + if i < 120: ds.description = "Find the elephant" ds.save() - auth_headers = self.get_auth_header_for_username(owner) url = reverse("sqlshare_view_dataset_list") response = self.client.get(url, **auth_headers) @@ -320,6 +336,34 @@ def test_pagination(self): self.assertEquals(len(data), 50) self.assertEquals(data[0]["name"], "test_paging_0") + url = reverse("sqlshare_view_dataset_shared_list") + + response = self.client.get(url, **shared_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 200) + + response = self.client.get(url, { "page": 1, "page_size": 50, "order_by": "updated" }, **shared_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_199") + + url = reverse("sqlshare_view_dataset_all_list") + + response = self.client.get(url, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertTrue(len(data) >= 200) + + response = self.client.get(url, { "page": 1, "page_size": 50, "order_by": "updated" }, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_199") + + new_public = create_dataset_from_query(public, "test_paging_public_owner", "SELECT (1)") + response = self.client.get(url, { "page": 1, "page_size": 50, "order_by": "updated" }, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_public_owner") + self.assertEquals(data[1]["name"], "test_paging_199") @classmethod def setUpClass(cls): diff --git a/sqlshare_rest/views/dataset_list.py b/sqlshare_rest/views/dataset_list.py index 17e8660..4819f49 100644 --- a/sqlshare_rest/views/dataset_list.py +++ b/sqlshare_rest/views/dataset_list.py @@ -19,9 +19,7 @@ def dataset_list(request): get_oauth_user(request) user = get_user(request) - datasets = get_datasets_owned_by_user(user) - - datasets = _filter_list_from_request(datasets, request) + datasets = get_datasets_owned_by_user(user, request) data = [] for dataset in datasets: @@ -38,7 +36,7 @@ def dataset_shared_list(request): user = get_user(request) - datasets = get_datasets_shared_with_user(user) + datasets = get_datasets_shared_with_user(user, request) data = [] for dataset in datasets: @@ -53,7 +51,7 @@ def dataset_tagged_list(request, tag): get_oauth_user(request) user = get_user(request) - datasets = get_all_datasets_tagged_for_user(user, tag_label=tag) + datasets = get_all_datasets_tagged_for_user(user, request, tag_label=tag) data = [] for dataset in datasets: @@ -62,33 +60,13 @@ def dataset_tagged_list(request, tag): return HttpResponse(json.dumps(data)) -def _filter_list_from_request(query_set, request): - if "order_by" in request.GET: - if request.GET["order_by"] == "updated": - query_set = query_set.order_by("-date_modified") - else: - query_set = query_set.order_by("pk") - - if "page" in request.GET: - page_size = 50 - if "page_size" in request.GET: - page_size = int(request.GET["page_size"]) - - page_num = int(request.GET["page"]) - start = (page_num - 1) * page_size - end = start + page_size - query_set = query_set[start:end] - - return query_set - - @csrf_exempt @protected_resource() def dataset_all_list(request): get_oauth_user(request) user = get_user(request) - datasets = get_all_datasets_for_user(user) + datasets = get_all_datasets_for_user(user, request) data = [] for dataset in datasets: From 09d0fb2c351bf83f6fda0fe7c1f931f788222f4e Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 13:07:46 -0700 Subject: [PATCH 10/13] Adding some stuff for db engines with less refined timestamps... --- sqlshare_rest/dao/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index f46920f..1b93831 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -353,7 +353,9 @@ def _update_tag_popularity(tag_label): def _filter_list_from_request(query_set, request, page_list): if "order_by" in request.GET: if request.GET["order_by"] == "updated": - query_set = query_set.order_by("-date_modified") + # mysql doesn't have the timestamp resolution needed to be + # able to just filter by date modified during unit tests + query_set = query_set.order_by("-date_modified", "-pk") else: query_set = query_set.order_by("pk") From 94b25bbc53452a8c6ee2cb302f57fb7cff6c62f1 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 13:44:40 -0700 Subject: [PATCH 11/13] Adds searching --- sqlshare_rest/dao/dataset.py | 5 +++++ sqlshare_rest/test/api/dataset_list.py | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index 1b93831..a0fbe41 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -3,6 +3,7 @@ from sqlshare_rest.models import DatasetSharingEmail from sqlshare_rest.models import Query from sqlshare_rest.exceptions import InvalidAccountException +from django.db.models import Q def get_datasets_owned_by_user(user, request, page_list=True): @@ -351,6 +352,10 @@ def _update_tag_popularity(tag_label): def _filter_list_from_request(query_set, request, page_list): + if "q" in request.GET: + q = request.GET["q"] + query_set = query_set.filter(Q(name__icontains=q) | Q(description__icontains=q)) + if "order_by" in request.GET: if request.GET["order_by"] == "updated": # mysql doesn't have the timestamp resolution needed to be diff --git a/sqlshare_rest/test/api/dataset_list.py b/sqlshare_rest/test/api/dataset_list.py index 0f9719b..c9e43c7 100644 --- a/sqlshare_rest/test/api/dataset_list.py +++ b/sqlshare_rest/test/api/dataset_list.py @@ -365,6 +365,17 @@ def test_pagination(self): self.assertEquals(data[0]["name"], "test_paging_public_owner") self.assertEquals(data[1]["name"], "test_paging_199") + # Now for searching... + response = self.client.get(url, { "q": "elephant" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 120) + + response = self.client.get(url, { "q": "elephant", "page": 1, "page_size": 50, "order_by": "updated" }, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 50) + self.assertEquals(data[0]["name"], "test_paging_119") + + @classmethod def setUpClass(cls): def _run_query(sql): From fb16188edb890ea2eb544626402c8f811c0c0b87 Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 14:00:30 -0700 Subject: [PATCH 12/13] pep8 fix --- sqlshare_rest/dao/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index a0fbe41..429c419 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -354,7 +354,8 @@ def _update_tag_popularity(tag_label): def _filter_list_from_request(query_set, request, page_list): if "q" in request.GET: q = request.GET["q"] - query_set = query_set.filter(Q(name__icontains=q) | Q(description__icontains=q)) + query_set = query_set.filter(Q(name__icontains=q) | + Q(description__icontains=q)) if "order_by" in request.GET: if request.GET["order_by"] == "updated": From 6e74d6d5f25ba2be69bbe1114db2822c031b465a Mon Sep 17 00:00:00 2001 From: Patrick Michaud Date: Wed, 10 Jun 2015 16:33:24 -0700 Subject: [PATCH 13/13] View dataset list by recentness --- sqlshare_rest/dao/dataset.py | 10 +++- sqlshare_rest/models.py | 9 ++++ sqlshare_rest/test/api/dataset_list.py | 75 ++++++++++++++++++++++++++ sqlshare_rest/urls.py | 3 ++ sqlshare_rest/views/dataset.py | 7 ++- sqlshare_rest/views/dataset_list.py | 17 ++++++ 6 files changed, 119 insertions(+), 2 deletions(-) diff --git a/sqlshare_rest/dao/dataset.py b/sqlshare_rest/dao/dataset.py index 429c419..88e9aaf 100644 --- a/sqlshare_rest/dao/dataset.py +++ b/sqlshare_rest/dao/dataset.py @@ -1,7 +1,7 @@ from sqlshare_rest.util.db import get_backend from sqlshare_rest.models import Dataset, User, SharingEmail, DatasetTag, Tag from sqlshare_rest.models import DatasetSharingEmail -from sqlshare_rest.models import Query +from sqlshare_rest.models import Query, RecentDatasetView from sqlshare_rest.exceptions import InvalidAccountException from django.db.models import Q @@ -27,6 +27,14 @@ def get_public_datasets(request, page_list=True): return _filter_list_from_request(base, request, page_list) +def get_recent_datasets_viewed_by_user(user, request, page_list=True): + base = RecentDatasetView.objects.filter(user=user).order_by("-timestamp", + "-pk") + paged = _page_dataset_list(base, request) + + return map(lambda x: x.dataset, paged) + + def _get_all_dataset_querysets(user, request): return (get_datasets_owned_by_user(user, request, page_list=False), get_datasets_shared_with_user(user, request, page_list=False), diff --git a/sqlshare_rest/models.py b/sqlshare_rest/models.py index a2f79b9..1b87741 100644 --- a/sqlshare_rest/models.py +++ b/sqlshare_rest/models.py @@ -130,6 +130,15 @@ def user_has_read_access(self, user): return False +class RecentDatasetView(models.Model): + dataset = models.ForeignKey(Dataset) + user = models.ForeignKey(User) + timestamp = models.DateTimeField(null=True) + + class Meta: + unique_together = (('dataset', 'user'),) + + class SharingEmail(models.Model): email = models.CharField(max_length=200) diff --git a/sqlshare_rest/test/api/dataset_list.py b/sqlshare_rest/test/api/dataset_list.py index c9e43c7..208dae0 100644 --- a/sqlshare_rest/test/api/dataset_list.py +++ b/sqlshare_rest/test/api/dataset_list.py @@ -16,6 +16,7 @@ from django.utils import timezone import json from testfixtures import LogCapture +from time import sleep @skipIf(missing_url("sqlshare_view_dataset_list"), "SQLShare REST URLs not configured") @@ -375,6 +376,80 @@ def test_pagination(self): self.assertEquals(len(data), 50) self.assertEquals(data[0]["name"], "test_paging_119") + def test_recent_datasets(self): + owner = "test_recents_owner" + public = "test_recents_public" + + self.remove_users.append(owner) + self.remove_users.append(public) + + ds1 = create_dataset_from_query(owner, "recent_ds1", "SELECT (1)") + ds2 = create_dataset_from_query(owner, "recent_ds2", "SELECT (1)") + + add_public_access(ds1) + add_public_access(ds2) + + url = reverse("sqlshare_view_dataset_recent_list") + + auth_headers = self.get_auth_header_for_username(owner) + public_auth_headers = self.get_auth_header_for_username(public) + + # initial state - neither user has visited any dataset + response = self.client.get(url, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 0) + + response = self.client.get(url, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 0) + + # Test public user ds1/ds2 - still no items for the owner + ds1_url = reverse("sqlshare_view_dataset", kwargs={ 'owner': owner, + 'name': "recent_ds1"}) + ds2_url = reverse("sqlshare_view_dataset", kwargs={ 'owner': owner, + 'name': "recent_ds2"}) + + response = self.client.get(ds1_url, **public_auth_headers) + sleep(1.1) + response = self.client.get(ds2_url, **public_auth_headers) + + response = self.client.get(url, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 0) + + response = self.client.get(url, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 2) + + self.assertEquals(data[0]["name"], "recent_ds2") + self.assertEquals(data[1]["name"], "recent_ds1") + + response = self.client.get(ds2_url, **auth_headers) + sleep(1.1) + response = self.client.get(ds1_url, **auth_headers) + + response = self.client.get(url, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 2) + self.assertEquals(data[0]["name"], "recent_ds1") + self.assertEquals(data[1]["name"], "recent_ds2") + + response = self.client.get(url, **public_auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 2) + + self.assertEquals(data[0]["name"], "recent_ds2") + self.assertEquals(data[1]["name"], "recent_ds1") + + # Go and visit #1 again, make sure we have things in the right order. + sleep(1.1) + response = self.client.get(ds2_url, **auth_headers) + response = self.client.get(url, **auth_headers) + data = json.loads(response.content.decode("utf-8")) + self.assertEquals(len(data), 2) + self.assertEquals(data[0]["name"], "recent_ds2") + self.assertEquals(data[1]["name"], "recent_ds1") + @classmethod def setUpClass(cls): diff --git a/sqlshare_rest/urls.py b/sqlshare_rest/urls.py index 49948a6..f4fe10b 100644 --- a/sqlshare_rest/urls.py +++ b/sqlshare_rest/urls.py @@ -28,6 +28,9 @@ 'dataset.dataset', name="sqlshare_view_dataset"), + url('v3/db/dataset/recent', 'dataset_list.dataset_recent_list', + name="sqlshare_view_dataset_recent_list"), + url('v3/db/dataset/shared', 'dataset_list.dataset_shared_list', name="sqlshare_view_dataset_shared_list"), diff --git a/sqlshare_rest/views/dataset.py b/sqlshare_rest/views/dataset.py index 84e5e6c..20142b8 100644 --- a/sqlshare_rest/views/dataset.py +++ b/sqlshare_rest/views/dataset.py @@ -5,7 +5,7 @@ from datetime import datetime from django.utils import timezone from sqlshare_rest.util.db import get_backend -from sqlshare_rest.models import Dataset, User, Query +from sqlshare_rest.models import Dataset, User, Query, RecentDatasetView from sqlshare_rest.views import get_oauth_user, get403, get404, get405 from sqlshare_rest.views.sql import response_for_query from sqlshare_rest.dao.user import get_user @@ -126,6 +126,11 @@ def _get_dataset(request, owner, name): dataset.last_viewed = timezone.now() dataset.save() + get_or_create = RecentDatasetView.objects.get_or_create + recent_view, created = get_or_create(dataset=dataset, user=user) + recent_view.timestamp = timezone.now() + recent_view.save() + data = dataset.json_data() if dataset.preview_is_finished: diff --git a/sqlshare_rest/views/dataset_list.py b/sqlshare_rest/views/dataset_list.py index 4819f49..9f15814 100644 --- a/sqlshare_rest/views/dataset_list.py +++ b/sqlshare_rest/views/dataset_list.py @@ -1,6 +1,7 @@ from sqlshare_rest.views import get_oauth_user from sqlshare_rest.dao.dataset import get_datasets_owned_by_user from sqlshare_rest.dao.dataset import get_datasets_shared_with_user +from sqlshare_rest.dao.dataset import get_recent_datasets_viewed_by_user from sqlshare_rest.dao.dataset import get_all_datasets_tagged_for_user from sqlshare_rest.dao.dataset import get_all_datasets_for_user from sqlshare_rest.dao.user import get_user @@ -60,6 +61,22 @@ def dataset_tagged_list(request, tag): return HttpResponse(json.dumps(data)) +@csrf_exempt +@protected_resource() +def dataset_recent_list(request): + get_oauth_user(request) + + user = get_user(request) + datasets = get_recent_datasets_viewed_by_user(user, request) + + data = [] + for dataset in datasets: + data.append(dataset.json_data()) + + logger.info("GET my dataset list", request) + return HttpResponse(json.dumps(data)) + + @csrf_exempt @protected_resource() def dataset_all_list(request):