Skip to content
This repository has been archived by the owner on Jan 28, 2021. It is now read-only.

Commit

Permalink
Merge branch 'develop' into new_reqs
Browse files Browse the repository at this point in the history
  • Loading branch information
vegitron committed Jun 11, 2015
2 parents 1e4d3a1 + 004599f commit 24fa7ea
Show file tree
Hide file tree
Showing 22 changed files with 843 additions and 88 deletions.
28 changes: 26 additions & 2 deletions sqlshare_rest/backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,32 @@ def create_db_user(self, username, password):
def create_db_schema(self, username, schema):
self._not_implemented("create_db_schema")

def create_snapshot(self, source_dataset, destination_datset, user):
self._not_implemented("create_snapshot")
def create_snapshot_dataset(self, source_dataset, destination, user):
name = destination.name
table_name = self._get_table_name_for_dataset(name)
sql = self._get_view_sql_for_dataset(table_name, user)

destination.sql = sql
destination.snapshot_finished = False
destination.snapshot_source = source_dataset
destination.save()

def load_snapshot_table(self, dataset, user):
source_dataset = dataset.snapshot_source
table_name = self._get_table_name_for_dataset(dataset.name)

self._create_snapshot_table(source_dataset, table_name, user)
self._create_view_of_snapshot(dataset, user)

def _create_view_of_snapshot(self, dataset, user):
sql = self._get_snapshot_view_sql(dataset)
self.run_query(sql, user)

def _get_snapshot_view_sql(self, dataset):
self._not_implemented("_get_snapshot_view_sql")

def _create_snapshot_table(self, source_dataset, table_name, user):
self._not_implemented("_create_snapshot_table")

def remove_db_user(self, db_username):
self._not_implemented("remove_db_user")
Expand Down
21 changes: 21 additions & 0 deletions sqlshare_rest/backend/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ def remove_schema(self, schema):
def _disconnect_connection(self, connection):
connection["connection"].close()

def _create_snapshot_table(self, source_dataset, table_name, user):
source_schema = source_dataset.owner.schema
sql = "SELECT * INTO [%s].[%s] FROM [%s].[%s]" % (source_schema,
table_name,
user.schema,
source_dataset.name)

self.run_query(sql, user, return_cursor=True).close()

def _create_view_of_snapshot(self, dataset, user):
sql = self._get_snapshot_view_sql(dataset)
self.run_query(sql, user, return_cursor=True).close()

def _get_snapshot_view_sql(self, dataset):
table_name = self._get_table_name_for_dataset(dataset.name)
return ("CREATE VIEW [%s].[%s] AS "
"SELECT * FROM [%s].[%s]" % (dataset.owner.schema,
dataset.name,
dataset.owner.schema,
table_name))

def create_view(self, name, sql, user, column_names=None):
import pyodbc
if column_names:
Expand Down
30 changes: 13 additions & 17 deletions sqlshare_rest/backend/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,6 @@ def get_preview_sql_for_dataset(self, dataset_name, user):
def get_preview_sql_for_query(self, sql):
return "SELECT * FROM (%s) as x LIMIT 100" % sql

def _create_snapshot_sql(self, source_dataset, destination_datset):
"""
Requires the source to be quoted, the destination to not be.
Source could be another user's dataset, so we can't quote that.
"""
return "CREATE TABLE `%s` AS SELECT * FROM %s" % (destination_datset,
source_dataset)

def create_snapshot(self, source_dataset, destination_datset, user):
table_name = self._get_table_name_for_dataset(destination_datset)
sql = self._create_snapshot_sql(source_dataset, table_name)
self.run_query(sql, user)
self.create_view(destination_datset,
self._get_view_sql_for_dataset(table_name, user),
user)

def _add_read_access_sql(self, dataset, owner, reader):
return "GRANT SELECT ON `%s`.`%s` TO `%s`" % (owner.schema,
dataset,
Expand Down Expand Up @@ -216,6 +199,19 @@ def get_query_sample_sql(self, query_id):
QUERY_SCHEMA = self.get_query_cache_db_name()
return "SELECT * FROM %s.query_%s LIMIT 100" % (QUERY_SCHEMA, query_id)

def _create_snapshot_table(self, source_dataset, table_name, user):
sql = "CREATE TABLE `%s` AS SELECT * FROM %s" % (table_name,
source_dataset.name)

self.run_query(sql, user)

def _get_snapshot_view_sql(self, dataset):
table_name = self._get_table_name_for_dataset(dataset.name)
return ("CREATE OR REPLACE VIEW `%s` AS "
"SELECT * FROM `%s`.`%s`" % (dataset.name,
dataset.owner.schema,
table_name))

def _get_column_definitions_for_cursor(self, cursor):
import pymysql
# XXX - is defining this a sign that this is a mistake?
Expand Down
11 changes: 11 additions & 0 deletions sqlshare_rest/backend/sqlite3.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ def get_qualified_name(self, dataset):
def create_db_schema(self, db_username, schema_name):
return

def _create_snapshot_table(self, source_dataset, table_name, user):
sql = "CREATE TABLE `%s` AS SELECT * FROM %s" % (table_name,
source_dataset.name)

self.run_query(sql, user)

def _get_snapshot_view_sql(self, dataset):
table_name = self._get_table_name_for_dataset(dataset.name)
return "CREATE VIEW `%s` AS SELECT * FROM `%s`" % (dataset.name,
table_name)

def run_query(self, sql, username, params=None, return_cursor=False):
cursor = connection.cursor()
cursor.execute(sql, params)
Expand Down
106 changes: 86 additions & 20 deletions sqlshare_rest/dao/dataset.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,63 @@
from sqlshare_rest.util.db import get_backend
from sqlshare_rest.models import Dataset, User, SharingEmail, DatasetTag, Tag
from sqlshare_rest.models import DatasetSharingEmail
from sqlshare_rest.models import Query
from sqlshare_rest.models import Query, RecentDatasetView
from sqlshare_rest.exceptions import InvalidAccountException
from django.db.models import Q


def get_datasets_owned_by_user(user):
def get_datasets_owned_by_user(user, request, page_list=True):
# Django auth user vs sqlshare user
backend = get_backend()
user_obj = backend.get_user(user.username)
return Dataset.objects.filter(owner=user_obj)
base = Dataset.objects.filter(owner=user_obj)
return _filter_list_from_request(base, request, page_list)


def get_datasets_shared_with_user(user):
def get_datasets_shared_with_user(user, request, page_list=True):
# Django auth user vs sqlshare user
backend = get_backend()
user_obj = backend.get_user(user.username)
return Dataset.objects.filter(shared_with__in=[user_obj])
base = Dataset.objects.filter(shared_with__in=[user_obj])
return _filter_list_from_request(base, request, page_list)


def get_public_datasets():
return Dataset.objects.filter(is_public=True)
def get_public_datasets(request, page_list=True):
base = Dataset.objects.filter(is_public=True)
return _filter_list_from_request(base, request, page_list)


def _get_all_dataset_querysets(user):
return (get_datasets_owned_by_user(user),
get_datasets_shared_with_user(user),
get_public_datasets())
def get_recent_datasets_viewed_by_user(user, request, page_list=True):
base = RecentDatasetView.objects.filter(user=user).order_by("-timestamp",
"-pk")
paged = _page_dataset_list(base, request)

return map(lambda x: x.dataset, paged)


def _get_all_dataset_querysets(user, request):
return (get_datasets_owned_by_user(user, request, page_list=False),
get_datasets_shared_with_user(user, request, page_list=False),
get_public_datasets(request, page_list=False))


def _dataset_unique_list(mine, shared, public):
datasets = list(mine)
datasets.extend(list(shared))
datasets.extend(list(public))
return mine | shared | public

return list(set(datasets))

def get_all_datasets_for_user(user, request):
mine, shared, public = _get_all_dataset_querysets(user, request)
unique = _dataset_unique_list(mine, shared, public)

def get_all_datasets_for_user(user):
mine, shared, public = _get_all_dataset_querysets(user)
return _dataset_unique_list(mine, shared, public)
return _page_dataset_list(unique, request)


def get_all_datasets_tagged_for_user(user, tag_label):
def get_all_datasets_tagged_for_user(user, request, tag_label):
try:
tags = Tag.objects.filter(tag__iexact=tag_label)
except Tag.DoesNotExist:
return []
datasets = get_all_datasets_for_user(user)
datasets = get_all_datasets_for_user(user, request)

dataset_tags = DatasetTag.objects.filter(dataset__in=datasets,
tag__in=tags)
Expand Down Expand Up @@ -91,6 +101,28 @@ def create_dataset_from_query(username, dataset_name, sql):
backend.close_user_connection(user)


def create_dataset_from_snapshot(user, dataset_name, source):
backend = get_backend()
try:
(model, created) = Dataset.objects.get_or_create(name=dataset_name,
owner=user)
if not created:
# Clear out the existing dataset, so we can create
# the new view properly
backend.delete_dataset(dataset_name, user)

backend.create_snapshot_dataset(source, model, user)

model.preview_is_finished = False
model.preview_error = None

return model
except Exception:
raise
finally:
backend.close_user_connection(user)


def create_preview_for_dataset(dataset):
# Remove all existing sample data queries
previous = Query.objects.filter(is_preview_for=dataset)
Expand Down Expand Up @@ -325,3 +357,37 @@ def _update_tag_popularity(tag_label):
count = DatasetTag.objects.filter(tag=tag_obj).count()
tag_obj.popularity = count
tag_obj.save()


def _filter_list_from_request(query_set, request, page_list):
if "q" in request.GET:
q = request.GET["q"]
query_set = query_set.filter(Q(name__icontains=q) |
Q(description__icontains=q))

if "order_by" in request.GET:
if request.GET["order_by"] == "updated":
# mysql doesn't have the timestamp resolution needed to be
# able to just filter by date modified during unit tests
query_set = query_set.order_by("-date_modified", "-pk")
else:
query_set = query_set.order_by("pk")

if page_list:
query_set = _page_dataset_list(query_set, request)

return query_set


def _page_dataset_list(query_set, request):
if "page" in request.GET:
page_size = 50
if "page_size" in request.GET:
page_size = int(request.GET["page_size"])

page_num = int(request.GET["page"])
start = (page_num - 1) * page_size
end = start + page_size
query_set = query_set[start:end]

return query_set
30 changes: 30 additions & 0 deletions sqlshare_rest/management/commands/run_snapshots_queue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from sqlshare_rest.util.snapshot_queue import process_snapshot_queue
from django.core.management.base import BaseCommand
from optparse import make_option


class Command(BaseCommand):
help = "This processes snapshots of datasets."

option_list = BaseCommand.option_list + (
make_option('--run-once',
dest='run_once',
default=False,
action="store_true",
help='This will only process one item in the queue'),

make_option('--verbose',
dest='verbose',
default=False,
action="store_true",
help='Prints status info to standard out'),
)

def handle(self, *args, **options):
verbose = options["verbose"]
if options["run_once"]:
process_snapshot_queue(verbose=verbose)
else:
process_snapshot_queue(run_once=False,
thread_count=10,
verbose=verbose)
11 changes: 11 additions & 0 deletions sqlshare_rest/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class Dataset(models.Model):
preview_is_finished = models.BooleanField(default=False)
preview_error = models.TextField(null=True)
rows_total = models.IntegerField(null=True)
snapshot_source = models.ForeignKey('Dataset', null=True)
snapshot_finished = models.NullBooleanField()

class Meta:
unique_together = (("name", "owner"),)
Expand Down Expand Up @@ -128,6 +130,15 @@ def user_has_read_access(self, user):
return False


class RecentDatasetView(models.Model):
dataset = models.ForeignKey(Dataset)
user = models.ForeignKey(User)
timestamp = models.DateTimeField(null=True)

class Meta:
unique_together = (('dataset', 'user'),)


class SharingEmail(models.Model):
email = models.CharField(max_length=200)

Expand Down
1 change: 1 addition & 0 deletions sqlshare_rest/test/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ def test_valid_no_permissions(self):
owner = "put_user2"
ds1_name = "dataset_1c"
self.remove_users.append(owner)
self.remove_users.append("not_owner")
auth_headers = self.get_auth_header_for_username(owner)
url = reverse("sqlshare_view_dataset", kwargs={ 'owner': owner,
'name': ds1_name})
Expand Down
Loading

0 comments on commit 24fa7ea

Please sign in to comment.