Skip to content

Commit

Permalink
sessions: Migrate to custom indexed session model.
Browse files Browse the repository at this point in the history
Fixes #19490 and has other performance benefits.

Create RealmSession model with custom fields: realm, user and ip_address,
indexed by (realm, user) and ip_address.

Migrate code to use that model, "zerver/lib/sessions" has most impact.

Create data_migration to copy over data from
django_session table to zerver_realmsession table.
  • Loading branch information
abdelrahman725 committed May 14, 2024
1 parent 3f80bc1 commit 9cf24bc
Show file tree
Hide file tree
Showing 19 changed files with 422 additions and 75 deletions.
2 changes: 1 addition & 1 deletion zerver/actions/realm_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ def do_deactivate_realm(realm: Realm, *, acting_user: Optional[UserProfile]) ->
# Note: This is intentionally outside the transaction because it
# is unsafe to modify sessions inside transactions with the
# cached_db session plugin we're using, and our session engine
# declared in zerver/lib/safe_session_cached_db.py enforces this.
# declared in zerver/models/sessions.py enforces this.
delete_realm_user_sessions(realm)


Expand Down
14 changes: 8 additions & 6 deletions zerver/lib/cache_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Any, Callable, Dict, Iterable, Tuple

from django.conf import settings
from django.contrib.sessions.models import Session
from django.db import connection
from django.db.models import QuerySet
from django.utils.timezone import now as timezone_now
Expand All @@ -21,11 +20,13 @@
user_profile_by_api_key_cache_key,
user_profile_cache_key_id,
)
from zerver.lib.safe_session_cached_db import SessionStore

# from zerver.lib.safe_session_cached_db import SessionStore
from zerver.lib.sessions import session_engine
from zerver.lib.users import get_all_api_keys
from zerver.models import Client, UserProfile
from zerver.models import Client, RealmSession, UserProfile
from zerver.models.clients import get_client_cache_key
from zerver.models.sessions import SessionStore


def user_cache_items(
Expand All @@ -45,9 +46,10 @@ def client_cache_items(items_for_remote_cache: Dict[str, Tuple[Client]], client:


def session_cache_items(
items_for_remote_cache: Dict[str, Dict[str, object]], session: Session
items_for_remote_cache: Dict[str, Dict[str, object]], session: RealmSession
) -> None:
if settings.SESSION_ENGINE != "zerver.lib.safe_session_cached_db":
if settings.SESSION_ENGINE != "zerver.models.sessions":
# if settings.SESSION_ENGINE != "zerver.models.sessions":
# If we're not using the cached_db session engine, we there
# will be no store.cache_key attribute, and in any case we
# don't need to fill the cache, since it won't exist.
Expand Down Expand Up @@ -95,7 +97,7 @@ def get_users() -> QuerySet[UserProfile]:
3600 * 24 * 7,
10000,
),
"session": (Session.objects.all, session_cache_items, 3600 * 24 * 7, 10000),
"session": (RealmSession.objects.all, session_cache_items, 3600 * 24 * 7, 10000),
}


Expand Down
3 changes: 3 additions & 0 deletions zerver/lib/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ class MessagePartial(TypedDict):
"zerver_realmplayground",
"zerver_realmreactivationstatus",
"zerver_realmuserdefault",
"zerver_realmsession",
"zerver_recipient",
"zerver_scheduledemail",
"zerver_scheduledemail_users",
Expand Down Expand Up @@ -245,6 +246,8 @@ class MessagePartial(TypedDict):
"zerver_submessage",
# Drafts don't need to be exported as they are supposed to be more ephemeral.
"zerver_draft",
# sessions are treated like others secrets, so shouldn't be exported
"zerver_realmsession",
# For any tables listed below here, it's a bug that they are not present in the export.
}

Expand Down
28 changes: 0 additions & 28 deletions zerver/lib/safe_session_cached_db.py

This file was deleted.

44 changes: 18 additions & 26 deletions zerver/lib/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from django.conf import settings
from django.contrib.auth import SESSION_KEY, get_user_model
from django.contrib.sessions.backends.base import SessionBase
from django.contrib.sessions.models import Session
from django.db.models import Q
from django.utils.timezone import now as timezone_now

from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime
from zerver.models import Realm, UserProfile
from zerver.models.users import get_user_profile_by_id
from zerver.lib.utils import assert_is_not_none
from zerver.models import Realm, RealmSession, UserProfile


class SessionEngine(Protocol):
Expand All @@ -31,45 +31,37 @@ def get_session_dict_user(session_dict: Mapping[str, int]) -> Optional[int]:
return None


def get_session_user_id(session: Session) -> Optional[int]:
return get_session_dict_user(session.get_decoded())
def user_sessions(user_profile: UserProfile) -> List[RealmSession]:
return list(RealmSession.objects.filter(user=user_profile))


def user_sessions(user_profile: UserProfile) -> List[Session]:
return [s for s in Session.objects.all() if get_session_user_id(s) == user_profile.id]


def delete_session(session: Session) -> None:
def delete_session(session: RealmSession) -> None:
session_engine.SessionStore(session.session_key).delete()


def delete_user_sessions(user_profile: UserProfile) -> None:
for session in Session.objects.all():
if get_session_user_id(session) == user_profile.id:
delete_session(session)
for session in RealmSession.objects.filter(realm=user_profile.realm, user=user_profile):
delete_session(session)


def delete_realm_user_sessions(realm: Realm) -> None:
realm_user_ids = set(UserProfile.objects.filter(realm=realm).values_list("id", flat=True))
for session in Session.objects.all():
if get_session_user_id(session) in realm_user_ids:
delete_session(session)
for session in RealmSession.objects.filter(realm=realm):
delete_session(session)


def delete_all_user_sessions() -> None:
for session in Session.objects.all():
for session in RealmSession.objects.all():
delete_session(session)


def delete_all_deactivated_user_sessions() -> None:
for session in Session.objects.all():
user_profile_id = get_session_user_id(session)
if user_profile_id is None: # nocoverage # TODO: Investigate why we lost coverage on this
continue
user_profile = get_user_profile_by_id(user_profile_id)
if not user_profile.is_active or user_profile.realm.deactivated:
logging.info("Deactivating session for deactivated user %s", user_profile.id)
delete_session(session)
for session in RealmSession.objects.filter(
Q(user__is_active=False) | Q(realm__deactivated=True)
):
# user_profile is guaranteed to not be none, since none values are excluded from the query.
user_profile = assert_is_not_none(session.user)
logging.info("Deactivating session for deactivated user %s", user_profile.id)
delete_session(session)


def set_expirable_session_var(
Expand Down
2 changes: 1 addition & 1 deletion zerver/lib/test_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2004,7 +2004,7 @@ def capture_send_event_calls(

def get_row_ids_in_all_tables() -> Iterator[Tuple[str, Set[int]]]:
all_models = apps.get_models(include_auto_created=True)
ignored_tables = {"django_session"}
ignored_tables = {"django_session", "zerver_realmsession"}

for model in all_models:
table_name = model._meta.db_table
Expand Down
15 changes: 15 additions & 0 deletions zerver/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,21 @@ def process_request(self, request: HttpRequest) -> Optional[HttpResponse]:

return render(request, "zerver/invalid_realm.html", status=404)

prev_session_ip_address = request.session.get("ip_address")
request.session["ip_address"] = request.META["REMOTE_ADDR"]
request.session["realm_id"] = request_notes.realm.id

# we mark the session as modified only if the ip_address changes
# otherwise we should set modified as False to prevent early unnecessary saving of session
# which triggers extra queries.
if (
prev_session_ip_address is not None
and prev_session_ip_address != request.session["ip_address"]
):
request.session.modified = True
else:
request.session.modified = False

set_tag("realm", request_notes.realm.string_id)

# Check that we're not using the non-canonical form of a REALM_HOSTS subdomain
Expand Down
53 changes: 53 additions & 0 deletions zerver/migrations/0521_sessions_create_custom_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.11 on 2024-04-17 04:28

import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
(
"zerver",
"0520_attachment_zerver_attachment_realm_create_time",
),
]

operations = [
migrations.CreateModel(
name="RealmSession",
fields=[
(
"session_key",
models.CharField(
max_length=40, primary_key=True, serialize=False, verbose_name="session key"
),
),
("session_data", models.TextField(verbose_name="session data")),
("expire_date", models.DateTimeField(db_index=True, verbose_name="expire date")),
("ip_address", models.GenericIPAddressField(null=True)),
(
"realm",
models.ForeignKey(
null=True, on_delete=django.db.models.deletion.CASCADE, to="zerver.realm"
),
),
(
"user",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["realm", "user"], name="zerver_realmsession_realm_user_idx"
),
models.Index(fields=["ip_address"], name="zerver_realmsession_ip_address_idx"),
],
},
),
]
108 changes: 108 additions & 0 deletions zerver/migrations/0522_sessions_data_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Generated by Django 4.2.11 on 2024-04-17 04:30

from django.contrib.sessions.models import Session
from django.db import connection, migrations, transaction
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from psycopg2.sql import SQL, Composable, Identifier

# This migration copies over data from existing sessions table "django_session"
# to the custom session table "zerver_realmsession" which is introduced in previous migration.

# Since "django_session" is a large table we want to do the insertion as performantly as possible,
# approach is pretty simple: drop all indexes, copy data, create back the indexes,
# because creating an index on pre-existing data is quicker than updating it incrementally as each row is loaded.

# Both drop indexes and copy data are done in one transaction
# (because we can't write to postgres then update its scheme (i.e. create the indexes) in the same transaction),
# so creating back the indexes should be in a separate transaction.


def migrate_data_from_django_session_to_realmsession(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
# Just in case django_session table was empty, then no need to do all of this.
if not Session.objects.exists():
return

session_key_index_name = ""
with transaction.atomic():
with connection.cursor() as cursor:
# Fetch indexes names.
cursor.execute(
SQL("""SELECT indexname FROM pg_indexes WHERE tablename = 'zerver_realmsession' """)
)

indexes_tuples = cursor.fetchall()
# Drop all indexes.
for index in indexes_tuples:
if index[0] == "zerver_realmsession_pkey":
query: Composable = SQL(
"""ALTER TABLE zerver_realmsession drop constraint zerver_realmsession_pkey"""
)
else:
if "session_key" in index[0]:
session_key_index_name = index[0]

query = SQL("""DROP INDEX {index_name}""").format(
index_name=Identifier(index[0])
)

cursor.execute(query)

# Copy over data.
cursor.execute(SQL("INSERT INTO zerver_realmsession (SELECT * FROM django_session)"))

# If Insertion is successful, Should we clear django_session table ?
# Session.objects.all().delete()

# Create back the indexes
with transaction.atomic():
with connection.cursor() as cursor:
# Primary key creates an index by default.
cursor.execute(
SQL(
"""
ALTER TABLE zerver_realmsession ADD CONSTRAINT
zerver_realmsession_pkey PRIMARY KEY (session_key)
"""
)
)

cursor.execute(
SQL(
# {index_name} was auto generated by django, we create the index with the same name just in case,
# as i'm not really sure if a custom name in this case would be a problem.
"""
CREATE INDEX IF NOT EXISTS {index_name}
ON zerver_realmsession (session_key varchar_pattern_ops);
CREATE INDEX IF NOT EXISTS zerver_realmsession_expire_date_idx
ON zerver_realmsession (expire_date);
CREATE INDEX IF NOT EXISTS zerver_realmsession_realm_idx
ON zerver_realmsession (realm_id);
CREATE INDEX IF NOT EXISTS zerver_realmsession_user_idx
ON zerver_realmsession (user_id);
CREATE INDEX IF NOT EXISTS zerver_realmsession_realm_user_idx
ON zerver_realmsession (realm_id, user_id);
CREATE INDEX IF NOT EXISTS zerver_realmsession_ip_address_idx
ON zerver_realmsession (ip_address);
"""
).format(index_name=Identifier(session_key_index_name))
)


class Migration(migrations.Migration):
atomic = False

dependencies = [
("zerver", "0521_sessions_create_custom_model"),
]

operations = [
migrations.RunPython(migrate_data_from_django_session_to_realmsession),
]
1 change: 1 addition & 0 deletions zerver/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from zerver.models.scheduled_jobs import (
ScheduledMessageNotificationEmail as ScheduledMessageNotificationEmail,
)
from zerver.models.sessions import RealmSession as RealmSession
from zerver.models.streams import DefaultStream as DefaultStream
from zerver.models.streams import DefaultStreamGroup as DefaultStreamGroup
from zerver.models.streams import Stream as Stream
Expand Down

0 comments on commit 9cf24bc

Please sign in to comment.