-
-
Notifications
You must be signed in to change notification settings - Fork 7.5k
/
test_fixtures.py
436 lines (357 loc) · 15.1 KB
/
test_fixtures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
import glob
import json
import os
import re
import shutil
import subprocess
import sys
import time
from importlib import import_module
from io import StringIO
from typing import Any, List, Set
from django.apps import apps
from django.conf import settings
from django.core.management import call_command
from django.db import DEFAULT_DB_ALIAS, ProgrammingError, connection, connections
from django.db.utils import OperationalError
from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import (
TEMPLATE_DATABASE_DIR,
get_dev_uuid_var_path,
is_digest_obsolete,
run,
write_new_digest,
)
BACKEND_DATABASE_TEMPLATE = "zulip_test_template"
UUID_VAR_DIR = get_dev_uuid_var_path()
IMPORTANT_FILES = [
"zilencer/management/commands/populate_db.py",
"zerver/actions/create_realm.py",
"zerver/lib/bulk_create.py",
"zerver/lib/generate_test_data.py",
"zerver/lib/server_initialization.py",
"tools/setup/postgresql-init-test-db",
"tools/setup/postgresql-init-dev-db",
"zerver/migrations/0258_enable_online_push_notifications_default.py",
]
VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION = """
NOTE!!!!
We are rebuilding your database for a one-time transition.
We have a hashing scheme that we use to detect whether any
important files used in the construction of the database
have changed.
We are changing that scheme so it only uses one file
instead of a directory of files.
In order to prevent errors due to this transition, we are
doing a one-time rebuild of your database. This should
be the last time this happens (for this particular reason,
at least), unless you go back to older branches.
"""
def migration_paths() -> List[str]:
return [
*glob.glob("*/migrations/*.py"),
"requirements/dev.txt",
]
class Database:
def __init__(self, platform: str, database_name: str, settings: str) -> None:
self.database_name = database_name
self.settings = settings
self.digest_name = "db_files_hash_for_" + platform
self.migration_status_file = "migration_status_" + platform
self.migration_status_path = os.path.join(
UUID_VAR_DIR,
self.migration_status_file,
)
self.migration_digest_file = "migrations_hash_" + database_name
def important_settings(self) -> List[str]:
def get(setting_name: str) -> str:
value = getattr(settings, setting_name, {})
return json.dumps(value, sort_keys=True)
return [
get("LOCAL_DATABASE_PASSWORD"),
get("INTERNAL_BOTS"),
get("REALM_INTERNAL_BOTS"),
get("DISABLED_REALM_INTERNAL_BOTS"),
]
def run_db_migrations(self) -> None:
# We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
# the command line rather than just calling the migration
# functions, because Django doesn't support changing settings like
# what the database is as runtime.
# Also we export ZULIP_DB_NAME which is ignored by dev platform but
# recognised by test platform and used to migrate correct db.
manage_py = [
"env",
"DJANGO_SETTINGS_MODULE=" + self.settings,
"ZULIP_DB_NAME=" + self.database_name,
"./manage.py",
]
run([*manage_py, "migrate", "--no-input"])
run([*manage_py, "get_migration_status", "--output=" + self.migration_status_file])
def what_to_do_with_migrations(self) -> str:
status_fn = self.migration_status_path
settings = self.settings
if not os.path.exists(status_fn):
return "scrap"
with open(status_fn) as f:
previous_migration_status = f.read()
current_migration_status = get_migration_status(settings=settings)
all_curr_migrations = extract_migrations_as_list(current_migration_status)
all_prev_migrations = extract_migrations_as_list(previous_migration_status)
if len(all_curr_migrations) < len(all_prev_migrations):
return "scrap"
for migration in all_prev_migrations:
if migration not in all_curr_migrations:
return "scrap"
if len(all_curr_migrations) == len(all_prev_migrations):
return "migrations_are_latest"
return "migrate"
def database_exists(self) -> bool:
try:
connection = connections[DEFAULT_DB_ALIAS]
with connection.cursor() as cursor:
cursor.execute(
"SELECT 1 from pg_database WHERE datname=%s;",
[self.database_name],
)
return_value = bool(cursor.fetchone())
connections.close_all()
return return_value
except OperationalError:
return False
def files_or_settings_have_changed(self) -> bool:
database_name = self.database_name
# Deal with legacy hash files. We can kill off this code when
# enough time has passed since April 2020 that we're not
# worried about anomalies doing `git bisect`--probably a few
# months is sufficient.
legacy_status_dir = os.path.join(UUID_VAR_DIR, database_name + "_db_status")
if os.path.exists(legacy_status_dir):
print(VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION)
# Remove the old digest for several reasons:
# - tidiness
# - preventing false positives if you bisect
# - make this only a one-time headache (generally)
shutil.rmtree(legacy_status_dir)
# Return True to force a one-time rebuild.
return True
return is_digest_obsolete(
self.digest_name,
IMPORTANT_FILES,
self.important_settings(),
)
def template_status(self) -> str:
# This function returns a status string specifying the type of
# state the template db is in and thus the kind of action required.
if not self.database_exists():
# TODO: It's possible that `database_exists` will
# return `False` even though the database
# exists, but we just have the wrong password,
# probably due to changing the secrets file.
#
# The only problem this causes is that we waste
# some time rebuilding the whole database, but
# it's better to err on that side, generally.
return "needs_rebuild"
if self.files_or_settings_have_changed():
return "needs_rebuild"
# Here we hash and compare our migration files before doing
# the work of seeing what to do with them; if there are no
# changes, we can safely assume we don't need to run
# migrations without spending a few 100ms parsing all the
# Python migration code.
if not self.is_migration_digest_obsolete():
return "current"
"""
NOTE:
We immediately update the digest, assuming our
callers will do what it takes to run the migrations.
Ideally our callers would just do it themselves
AFTER the migrations actually succeeded, but the
caller codepaths are kind of complicated here.
"""
self.write_new_migration_digest()
migration_op = self.what_to_do_with_migrations()
if migration_op == "scrap":
return "needs_rebuild"
if migration_op == "migrate":
return "run_migrations"
return "current"
def is_migration_digest_obsolete(self) -> bool:
return is_digest_obsolete(
self.migration_digest_file,
migration_paths(),
)
def write_new_migration_digest(self) -> None:
write_new_digest(
self.migration_digest_file,
migration_paths(),
)
def write_new_db_digest(self) -> None:
write_new_digest(
self.digest_name,
IMPORTANT_FILES,
self.important_settings(),
)
DEV_DATABASE = Database(
platform="dev",
database_name="zulip",
settings="zproject.settings",
)
TEST_DATABASE = Database(
platform="test",
database_name="zulip_test_template",
settings="zproject.test_settings",
)
def update_test_databases_if_required(rebuild_test_database: bool = False) -> None:
"""Checks whether the zulip_test_template database template, is
consistent with our database migrations; if not, it updates it
in the fastest way possible:
* If all we need to do is add some migrations, just runs those
migrations on the template database.
* Otherwise, we rebuild the test template database from scratch.
The default behavior is sufficient for the `test-backend` use
case, where the test runner code will clone directly from the
template database.
The `rebuild_test_database` option (used by our frontend and API
tests) asks us to drop and re-cloning the zulip_test database from
the template so those test suites can run with a fresh copy.
"""
test_template_db_status = TEST_DATABASE.template_status()
if test_template_db_status == "needs_rebuild":
run(["tools/rebuild-test-database"])
TEST_DATABASE.write_new_db_digest()
return
if test_template_db_status == "run_migrations":
TEST_DATABASE.run_db_migrations()
run(["tools/setup/generate-fixtures"])
return
if rebuild_test_database:
run(["tools/setup/generate-fixtures"])
def get_migration_status(**options: Any) -> str:
verbosity = options.get("verbosity", 1)
for app_config in apps.get_app_configs():
if module_has_submodule(app_config.module, "management"):
import_module(".management", app_config.name)
app_label = options["app_label"] if options.get("app_label") else None
db = options.get("database", DEFAULT_DB_ALIAS)
out = StringIO()
command_args = ["--list"]
if app_label:
command_args.append(app_label)
call_command(
"showmigrations",
*command_args,
database=db,
no_color=options.get("no_color", False),
settings=options.get("settings", os.environ["DJANGO_SETTINGS_MODULE"]),
stdout=out,
traceback=options.get("traceback", True),
verbosity=verbosity,
)
connections.close_all()
out.seek(0)
output = out.read()
return re.sub(r"\x1b\[(1|0)m", "", output)
def extract_migrations_as_list(migration_status: str) -> List[str]:
MIGRATIONS_RE = re.compile(r"\[[X| ]\] (\d+_.+)\n")
return MIGRATIONS_RE.findall(migration_status)
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
"""The logic in zerver/lib/test_runner.py tries to delete all the
temporary test databases generated by test-backend threads, but it
cannot guarantee it handles all race conditions correctly. This
is a catch-all function designed to delete any that might have
been leaked due to crashes (etc.). The high-level algorithm is to:
* Delete every database with a name like zulip_test_template_*
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
part of a currently running test-backend invocation
* And that file is less expiry_time old.
This should ensure we ~never break a running test-backend process,
while also ensuring we will eventually delete all leaked databases.
"""
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
test_databases: Set[str] = set()
try:
with connection.cursor() as cursor:
cursor.execute("SELECT datname FROM pg_database;")
rows = cursor.fetchall()
for row in rows:
if "zulip_test_template_" in row[0]:
test_databases.add(row[0])
except ProgrammingError:
pass
databases_in_use: Set[str] = set()
for file in files:
if round(time.time()) - os.path.getmtime(file) < expiry_time:
with open(file) as f:
for line in f:
databases_in_use.add(f"zulip_test_template_{line}".rstrip())
else:
# Any test-backend run older than expiry_time can be
# cleaned up, both the database and the file listing its
# databases.
os.remove(file)
databases_to_drop = test_databases - databases_in_use
if not databases_to_drop:
return 0
commands = "\n".join(f"DROP DATABASE IF EXISTS {db};" for db in databases_to_drop)
subprocess.run(
["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost", "postgres", "zulip_test"],
input=commands,
check=True,
text=True,
)
return len(databases_to_drop)
def remove_test_run_directories(expiry_time: int = 60 * 60) -> int:
removed = 0
directories = glob.glob(os.path.join(UUID_VAR_DIR, "test-backend", "run_*"))
for test_run in directories:
if round(time.time()) - os.path.getmtime(test_run) > expiry_time:
try:
shutil.rmtree(test_run)
removed += 1
except FileNotFoundError:
pass
return removed
def reset_zulip_test_database() -> None:
"""
This function is used to reset the zulip_test database fastest way possible,
i.e. First, it deletes the database and then clones it from zulip_test_template.
This function is used with puppeteer tests, so it can quickly reset the test
database after each run.
"""
from zerver.lib.test_runner import destroy_test_databases
# Make sure default database is 'zulip_test'.
assert connections["default"].settings_dict["NAME"] == "zulip_test"
# Clearing all the active PSQL sessions with 'zulip_test'.
run(
[
"env",
"PGHOST=localhost",
"PGUSER=zulip_test",
"scripts/setup/terminate-psql-sessions",
"zulip_test",
]
)
destroy_test_databases()
# Pointing default database to test database template, so we can instantly clone it.
settings.DATABASES["default"]["NAME"] = BACKEND_DATABASE_TEMPLATE
connection = connections["default"]
clone_database_suffix = "clone"
connection.creation.clone_test_db(
suffix=clone_database_suffix,
)
settings_dict = connection.creation.get_test_db_clone_settings(clone_database_suffix)
# We manually rename the clone database to 'zulip_test' because when cloning it,
# its name is set to original database name + some suffix.
# Also, we need it to be 'zulip_test' so that our running server can recognize it.
with connection.cursor() as cursor:
cursor.execute("ALTER DATABASE zulip_test_template_clone RENAME TO zulip_test;")
settings_dict["NAME"] = "zulip_test"
# connection.settings_dict must be updated in place for changes to be
# reflected in django.db.connections. If the following line assigned
# connection.settings_dict = settings_dict, new threads would connect
# to the default database instead of the appropriate clone.
connection.settings_dict.update(settings_dict)
connection.close()