Skip to content

Commit

Permalink
sessions: Migrate to custom indexed session model.
Browse files Browse the repository at this point in the history
Fixes #19490 and has other performance benefits.

Create RealmSession model with fields: realm, user and ip_address,
with combined index on (realm, user) and an index on ip_address.

Migrate code to use the new model, zerver/lib/sessions' has most impact.

Create data_migration to copy over data from
django_session to zerver_realmsession table.
  • Loading branch information
abdelrahman725 committed Apr 17, 2024
1 parent db86027 commit efa5c75
Show file tree
Hide file tree
Showing 18 changed files with 331 additions and 73 deletions.
2 changes: 1 addition & 1 deletion zerver/actions/realm_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def do_deactivate_realm(realm: Realm, *, acting_user: Optional[UserProfile]) ->
# Note: This is intentionally outside the transaction because it
# is unsafe to modify sessions inside transactions with the
# cached_db session plugin we're using, and our session engine
# declared in zerver/lib/safe_session_cached_db.py enforces this.
# declared in zerver/models/sessions.py enforces this.
delete_realm_user_sessions(realm)


Expand Down
14 changes: 8 additions & 6 deletions zerver/lib/cache_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Any, Callable, Dict, Iterable, Tuple

from django.conf import settings
from django.contrib.sessions.models import Session
from django.db import connection
from django.db.models import QuerySet
from django.utils.timezone import now as timezone_now
Expand All @@ -21,11 +20,13 @@
user_profile_by_api_key_cache_key,
user_profile_cache_key_id,
)
from zerver.lib.safe_session_cached_db import SessionStore

# from zerver.lib.safe_session_cached_db import SessionStore
from zerver.lib.sessions import session_engine
from zerver.lib.users import get_all_api_keys
from zerver.models import Client, UserProfile
from zerver.models import Client, RealmSession, UserProfile
from zerver.models.clients import get_client_cache_key
from zerver.models.sessions import SessionStore


def user_cache_items(
Expand All @@ -45,9 +46,10 @@ def client_cache_items(items_for_remote_cache: Dict[str, Tuple[Client]], client:


def session_cache_items(
items_for_remote_cache: Dict[str, Dict[str, object]], session: Session
items_for_remote_cache: Dict[str, Dict[str, object]], session: RealmSession
) -> None:
if settings.SESSION_ENGINE != "zerver.lib.safe_session_cached_db":
if settings.SESSION_ENGINE != "zerver.models.sessions":
# if settings.SESSION_ENGINE != "zerver.models.sessions":
# If we're not using the cached_db session engine, we there
# will be no store.cache_key attribute, and in any case we
# don't need to fill the cache, since it won't exist.
Expand Down Expand Up @@ -95,7 +97,7 @@ def get_users() -> QuerySet[UserProfile]:
3600 * 24 * 7,
10000,
),
"session": (Session.objects.all, session_cache_items, 3600 * 24 * 7, 10000),
"session": (RealmSession.objects.all, session_cache_items, 3600 * 24 * 7, 10000),
}


Expand Down
3 changes: 3 additions & 0 deletions zerver/lib/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ class MessagePartial(TypedDict):
"zerver_realmplayground",
"zerver_realmreactivationstatus",
"zerver_realmuserdefault",
"zerver_realmsession",
"zerver_recipient",
"zerver_scheduledemail",
"zerver_scheduledemail_users",
Expand Down Expand Up @@ -243,6 +244,8 @@ class MessagePartial(TypedDict):
"zerver_submessage",
# Drafts don't need to be exported as they are supposed to be more ephemeral.
"zerver_draft",
# sessions are treated like others secrets, so shouldn't be exported
"zerver_realmsession",
# For any tables listed below here, it's a bug that they are not present in the export.
}

Expand Down
28 changes: 0 additions & 28 deletions zerver/lib/safe_session_cached_db.py

This file was deleted.

45 changes: 19 additions & 26 deletions zerver/lib/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from django.conf import settings
from django.contrib.auth import SESSION_KEY, get_user_model
from django.contrib.sessions.backends.base import SessionBase
from django.contrib.sessions.models import Session
from django.db.models import Q
from django.utils.timezone import now as timezone_now

from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime
from zerver.models import Realm, UserProfile
from zerver.models.users import get_user_profile_by_id
from zerver.lib.utils import assert_is_not_none
from zerver.models import Realm, RealmSession, UserProfile


class SessionEngine(Protocol):
Expand All @@ -31,45 +31,38 @@ def get_session_dict_user(session_dict: Mapping[str, int]) -> Optional[int]:
return None


def get_session_user_id(session: Session) -> Optional[int]:
return get_session_dict_user(session.get_decoded())
def user_sessions(user_profile: UserProfile) -> List[RealmSession]:
return list(RealmSession.objects.filter(user=user_profile))


def user_sessions(user_profile: UserProfile) -> List[Session]:
return [s for s in Session.objects.all() if get_session_user_id(s) == user_profile.id]


def delete_session(session: Session) -> None:
def delete_session(session: RealmSession) -> None:
session_engine.SessionStore(session.session_key).delete()


def delete_user_sessions(user_profile: UserProfile) -> None:
for session in Session.objects.all():
if get_session_user_id(session) == user_profile.id:
delete_session(session)
for session in RealmSession.objects.filter(realm=user_profile.realm, user=user_profile):
delete_session(session)


def delete_realm_user_sessions(realm: Realm) -> None:
realm_user_ids = set(UserProfile.objects.filter(realm=realm).values_list("id", flat=True))
for session in Session.objects.all():
if get_session_user_id(session) in realm_user_ids:
delete_session(session)
print(RealmSession.objects.filter(realm=realm))
for session in RealmSession.objects.filter(realm=realm):
delete_session(session)


def delete_all_user_sessions() -> None:
for session in Session.objects.all():
for session in RealmSession.objects.all():
delete_session(session)


def delete_all_deactivated_user_sessions() -> None:
for session in Session.objects.all():
user_profile_id = get_session_user_id(session)
if user_profile_id is None: # nocoverage # TODO: Investigate why we lost coverage on this
continue
user_profile = get_user_profile_by_id(user_profile_id)
if not user_profile.is_active or user_profile.realm.deactivated:
logging.info("Deactivating session for deactivated user %s", user_profile.id)
delete_session(session)
for session in RealmSession.objects.filter(
Q(user__is_active=False) | Q(realm__deactivated=True)
):
# user_profile is guaranteed to not be none, since none values are excluded from the query.
user_profile = assert_is_not_none(session.user)
logging.info("Deactivating session for deactivated user %s", user_profile.id)
delete_session(session)


def set_expirable_session_var(
Expand Down
2 changes: 1 addition & 1 deletion zerver/lib/test_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2001,7 +2001,7 @@ def capture_send_event_calls(

def get_row_ids_in_all_tables() -> Iterator[Tuple[str, Set[int]]]:
all_models = apps.get_models(include_auto_created=True)
ignored_tables = {"django_session"}
ignored_tables = {"django_session", "zerver_realmsession"}

for model in all_models:
table_name = model._meta.db_table
Expand Down
50 changes: 50 additions & 0 deletions zerver/migrations/0510_sessions_create_custom_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Generated by Django 4.2.11 on 2024-04-17 04:28

import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("zerver", "0509_fix_emoji_metadata"),
]

operations = [
migrations.CreateModel(
name="RealmSession",
fields=[
(
"session_key",
models.CharField(
max_length=40, primary_key=True, serialize=False, verbose_name="session key"
),
),
("session_data", models.TextField(verbose_name="session data")),
("expire_date", models.DateTimeField(db_index=True, verbose_name="expire date")),
("ip_address", models.GenericIPAddressField(null=True)),
(
"realm",
models.ForeignKey(
null=True, on_delete=django.db.models.deletion.CASCADE, to="zerver.realm"
),
),
(
"user",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["realm", "user"], name="zerver_realmsession_realm_user_idx"
),
models.Index(fields=["ip_address"], name="zerver_realmsession_ip_address_idx"),
],
},
),
]
111 changes: 111 additions & 0 deletions zerver/migrations/0511_sessions_data_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Generated by Django 4.2.11 on 2024-04-17 04:30

from django.db import connection, migrations, transaction
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from psycopg2.sql import SQL, Composable, Identifier

# This migration copies over data from existing sessions table "django_session"
# to the custom session table "zerver_realmsession" which is introduced in previous migration.

# Since django_session is a large table we want to do the insertion as performantly as possible,
# approach is pretty simple: drop all indexes, copy data, create back the indexes,
# because creating an index on pre-existing data is quicker than updating it incrementally as each row is loaded.

# copy and drop operations are done in one transaction (because we can't write to postgres then update its scheme (i.e. create the indexes) in the same transaction),
# create indexes is done in a separate transaction


def migrate_data_from_django_session_to_realmsession(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
session_key_index_name = ""
with transaction.atomic():
with connection.cursor() as cursor:
# Fetch indexes names
cursor.execute(
SQL("""SELECT indexname FROM pg_indexes WHERE tablename = 'zerver_realmsession' """)
)

indexes_tuples = cursor.fetchall()
# Drop all indexes
for index in indexes_tuples:
if index[0] == "zerver_realmsession_pkey":
query: Composable = SQL(
"""ALTER TABLE zerver_realmsession drop constraint zerver_realmsession_pkey"""
)
else:
if "session_key" in index[0]:
session_key_index_name = index[0]

query = SQL("""DROP INDEX {index_name}""").format(
index_name=Identifier(index[0])
)

cursor.execute(query)

# Copy over data
cursor.execute(SQL("INSERT INTO zerver_realmsession (SELECT * FROM django_session)"))

# Create back the indexes
with transaction.atomic():
with connection.cursor() as cursor:
# Primary key creates an index by default
cursor.execute(
SQL(
"""ALTER TABLE zerver_realmsession ADD CONSTRAINT zerver_realmsession_pkey PRIMARY KEY (session_key)"""
)
)

# That index was auto generated, we create the index with the same name, but if a custom name won't be a problem then it would be simpler to add our custom name.
cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS {index_name} ON zerver_realmsession (session_key varchar_pattern_ops)"""
).format(index_name=Identifier(session_key_index_name))
)

cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS zerver_realmsession_expire_date_idx ON zerver_realmsession (expire_date)"""
)
)

cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS zerver_realmsession_realm_idx ON zerver_realmsession (realm_id)"""
)
)

cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS zerver_realmsession_user_idx ON zerver_realmsession (user_id)"""
)
)

cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS zerver_realmsession_realm_user_idx ON zerver_realmsession (realm_id, user_id)"""
)
)

cursor.execute(
SQL(
"""CREATE INDEX IF NOT EXISTS zerver_realmsession_ip_address_idx ON zerver_realmsession (ip_address)"""
)
)

# To do
# if above queries are successful, Should we delete data from django_session table ?
# TRUNCATE TABLE django_session.


class Migration(migrations.Migration):
atomic = False

dependencies = [
("zerver", "0510_sessions_create_custom_model"),
]

operations = [
migrations.RunPython(migrate_data_from_django_session_to_realmsession),
]
1 change: 1 addition & 0 deletions zerver/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from zerver.models.scheduled_jobs import (
ScheduledMessageNotificationEmail as ScheduledMessageNotificationEmail,
)
from zerver.models.sessions import RealmSession as RealmSession
from zerver.models.streams import DefaultStream as DefaultStream
from zerver.models.streams import DefaultStreamGroup as DefaultStreamGroup
from zerver.models.streams import Stream as Stream
Expand Down
20 changes: 20 additions & 0 deletions zerver/models/nocache_sessions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Dict, Type, Union

from django.contrib.sessions.backends.db import SessionStore as DbSessionStore
from typing_extensions import override

from zerver.models import RealmSession
from zerver.models.sessions import create_realm_session_instance


# Used in tests.
class SessionStore(DbSessionStore):
@classmethod
@override
def get_model_class(cls) -> Type[RealmSession]:
return RealmSession

@override
def create_model_instance(self, data: Dict[str, Union[int, str]]) -> RealmSession:
session_object: RealmSession = super().create_model_instance(data) # type: ignore[assignment] # https://github.com/typeddjango/django-stubs/issues/2056
return create_realm_session_instance(session_object, data)

0 comments on commit efa5c75

Please sign in to comment.