/
tests_models.py
389 lines (295 loc) · 15.7 KB
/
tests_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# coding: utf-8
import os
import shutil
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MatchAll
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
from zds.settings import BASE_DIR
from zds.member.factories import ProfileFactory, StaffProfileFactory
from zds.tutorialv2.factories import PublishableContentFactory, ContainerFactory, ExtractFactory, publish_content
from zds.tutorialv2.models.models_database import PublishedContent, FakeChapter, PublishableContent
from zds.forum.factories import TopicFactory, PostFactory, Topic, Post
from zds.forum.tests.tests_views import create_category
from zds.searchv2.models import ESIndexManager
overrided_zds_app = settings.ZDS_APP
overrided_zds_app['content']['repo_private_path'] = os.path.join(BASE_DIR, 'contents-private-test')
overrided_zds_app['content']['repo_public_path'] = os.path.join(BASE_DIR, 'contents-public-test')
@override_settings(MEDIA_ROOT=os.path.join(BASE_DIR, 'media-test'))
@override_settings(ZDS_APP=overrided_zds_app)
@override_settings(ES_SEARCH_INDEX={'name': 'zds_search_test', 'shards': 5, 'replicas': 0})
class ESIndexManagerTests(TestCase):
def setUp(self):
# don't build PDF to speed up the tests
settings.ZDS_APP['content']['build_pdf_when_published'] = False
settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
self.mas = ProfileFactory().user
settings.ZDS_APP['member']['bot_account'] = self.mas.username
self.category, self.forum = create_category()
self.user = ProfileFactory().user
self.staff = StaffProfileFactory().user
self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
self.indexable = [FakeChapter, PublishedContent, Topic, Post]
self.manager.reset_es_index(self.indexable)
self.manager.setup_custom_analyzer()
self.manager.refresh_index()
def test_setup_functions(self):
"""Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions"""
if not self.manager.connected_to_es:
return
custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1}
manager = ESIndexManager(**custom_index)
# in the beginning: the void:
self.assertTrue(manager.index not in self.manager.es.cat.indices())
self.assertEqual(manager.index, custom_index['name'])
self.assertEqual(manager.number_of_shards, custom_index['shards'])
self.assertEqual(manager.number_of_replicas, custom_index['replicas'])
# 1. Creation:
models = [Topic, Post]
manager.reset_es_index([Topic, Post])
self.assertTrue(manager.index in manager.es.cat.indices()) # index in !
index_settings = manager.es.indices.get_settings(index=manager.index)
self.assertTrue(manager.index in index_settings)
index_settings = index_settings[manager.index]['settings']['index']
self.assertEqual(index_settings['provided_name'], manager.index)
self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards))
self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas))
# test mappings
mappings = manager.es.indices.get_mapping(index=manager.index)
self.assertTrue(manager.index in mappings)
mappings = mappings[manager.index]['mappings']
for model in models:
self.assertTrue(model.get_es_document_type() in mappings)
# analyzer
self.assertTrue('analysis' not in index_settings)
manager.setup_custom_analyzer()
index_settings = manager.es.indices.get_settings(index=manager.index)
self.assertTrue(manager.index in index_settings)
index_settings = index_settings[manager.index]['settings']['index']
self.assertTrue('analysis' in index_settings)
# 3. Clearing
manager.clear_es_index()
self.assertTrue(manager.index not in self.manager.es.cat.indices()) # back to the void
def test_custom_analyzer(self):
"""Test our custom analyzer"""
if not self.manager.connected_to_es:
return
test_sentences = [
# stemming:
('programmation programmer programmateur programmes', ['program', 'program', 'program', 'program']),
# keep "c" intact:
('apprendre à programmer en C', ['aprendr', 'program', 'langage_c']),
# remove HTML and some special characters:
('<p>« test ! », en hurlant …</p>', ['test', 'hurlant']),
# keep "c++" and "linux" intact:
('écrire un programme en C++ avec Linux', ['ecrir', 'program', 'c++', 'linux']),
# elision:
('c\'est de l\'arnaque', ['arnaqu'])
]
for sentence in test_sentences:
tokens = self.manager.analyze_sentence(sentence[0])
self.assertEqual(len(tokens), len(sentence[1]))
self.assertEqual(tokens, sentence[1])
def test_indexation(self):
"""test the indexation and deletion of the different documents"""
if not self.manager.connected_to_es:
return
# create a topic with a post
topic = TopicFactory(forum=self.forum, author=self.user)
post = PostFactory(topic=topic, author=self.user, position=1)
topic = Topic.objects.get(pk=topic.pk)
post = Post.objects.get(pk=post.pk)
self.assertFalse(topic.es_already_indexed)
self.assertTrue(topic.es_flagged)
self.assertFalse(post.es_already_indexed)
self.assertTrue(post.es_flagged)
# create a middle-tutorial and publish it
tuto = PublishableContentFactory(type='TUTORIAL')
tuto.authors.add(self.user)
tuto.save()
tuto_draft = tuto.load_version()
chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
ExtractFactory(container=chapter1, db_object=tuto)
published = publish_content(tuto, tuto_draft, is_major_update=True)
tuto.sha_public = tuto_draft.current_version
tuto.sha_draft = tuto_draft.current_version
tuto.public_version = published
tuto.save()
published = PublishedContent.objects.get(content_pk=tuto.pk)
self.assertFalse(published.es_already_indexed)
self.assertTrue(published.es_flagged)
# 1. index all
for model in self.indexable[1:]: # do not try to index FakeChapter, it would be useless
self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
self.manager.refresh_index()
topic = Topic.objects.get(pk=topic.pk)
post = Post.objects.get(pk=post.pk)
self.assertTrue(topic.es_already_indexed)
self.assertFalse(topic.es_flagged)
self.assertTrue(post.es_already_indexed)
self.assertFalse(post.es_flagged)
published = PublishedContent.objects.get(content_pk=tuto.pk)
self.assertTrue(published.es_already_indexed)
self.assertFalse(published.es_flagged)
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 4) # get 4 results, one of each type
must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
id_must_be = {
'post': str(post.pk),
'topic': str(topic.pk),
'publishedcontent': str(published.pk),
'chapter': tuto.slug + '__' + chapter1.slug
}
for hit in results:
doc_type = hit.meta.doc_type
must_contain[doc_type] = True
self.assertEqual(hit.meta.id, id_must_be[doc_type])
self.assertTrue(all(must_contain))
# 2. Test what reindexation will do:
new_topic = TopicFactory(forum=self.forum, author=self.user)
new_post = PostFactory(topic=new_topic, author=self.user, position=1)
if_force_reindexing = 0
if_not_force_reindexing = 0
for model in self.indexable[1:]: # once again; indexing FakeChapter would be useless
if_force_reindexing += len(model.get_es_indexable(force_reindexing=True))
if_not_force_reindexing += len(model.get_es_indexable(force_reindexing=False))
self.assertEqual(if_force_reindexing, 6)
self.assertEqual(if_not_force_reindexing, 2) # only new topic and post
# just to be sure, let's explicitly looks into the outcome:
self.assertTrue(topic.pk not in [t.pk for t in Topic.get_es_indexable(force_reindexing=False)])
self.assertTrue(new_topic.pk in [t.pk for t in Topic.get_es_indexable(force_reindexing=False)])
self.assertTrue(post.pk not in [p.pk for p in Post.get_es_indexable(force_reindexing=False)])
self.assertTrue(new_post.pk in [p.pk for p in Post.get_es_indexable(force_reindexing=False)])
published.es_flagged = True # Since force reindexation delete the chapter, let's reindex the content
published.save()
self.manager.refresh_index()
for model in self.indexable[1:]: # ok, so let's index that
self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
self.manager.refresh_index()
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 6) # good!
# 3. Test single deletion:
new_post = Post.objects.get(pk=new_post.pk)
self.manager.delete_document(new_post)
self.manager.refresh_index()
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 5) # one is missing
for hit in results:
self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != new_post.es_id)
# 4. Test "delete_by_query_deletion":
topic = Topic.objects.get(pk=topic.pk)
new_topic = Topic.objects.get(pk=new_topic.pk)
self.manager.delete_by_query(Topic.get_es_document_type(), MatchAll()) # the two topic are deleted
self.manager.refresh_index()
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 3)
for hit in results:
self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != new_topic.es_id)
self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != topic.es_id)
# 5. Test that the deletion of an object also triggers its deletion in ES
post = Post.objects.get(pk=post.pk)
post.delete()
self.manager.refresh_index()
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 2)
for hit in results:
self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != post.es_id)
# 6. Test full desindexation:
for model in self.indexable:
self.manager.clear_indexing_of_model(model)
# note "topic" is gone since "post" is gone, due to relationships at the Django level
new_topic = Topic.objects.get(pk=new_topic.pk)
new_post = Post.objects.get(pk=new_post.pk)
self.assertFalse(new_topic.es_already_indexed)
self.assertTrue(new_topic.es_flagged)
self.assertFalse(new_post.es_already_indexed)
self.assertTrue(new_post.es_flagged)
published = PublishedContent.objects.get(content_pk=tuto.pk)
self.assertFalse(published.es_already_indexed)
self.assertTrue(published.es_flagged)
def test_special_case_of_contents(self):
"""test that the old publishedcontent does not stay when a new one is created"""
if not self.manager.connected_to_es:
return
# 1. Create a middle-tutorial, publish it, then index it
tuto = PublishableContentFactory(type='TUTORIAL')
tuto.authors.add(self.user)
tuto.save()
tuto_draft = tuto.load_version()
chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
ExtractFactory(container=chapter1, db_object=tuto)
published = publish_content(tuto, tuto_draft, is_major_update=True)
tuto.sha_public = tuto_draft.current_version
tuto.sha_draft = tuto_draft.current_version
tuto.public_version = published
tuto.save()
self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True) # index
self.manager.refresh_index()
first_publication = PublishedContent.objects.get(content_pk=tuto.pk)
self.assertTrue(first_publication.es_already_indexed)
self.assertFalse(first_publication.es_flagged)
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 2) # get 2 results, one for the content and one for the chapter
self.assertEqual(PublishedContent.objects.count(), 1)
# 2. Change thet title, which will trigger a change in the slug
tuto = PublishableContent.objects.get(pk=tuto.pk)
versioned = tuto.load_version(sha=tuto.sha_draft)
tuto.title = u'un titre complètement différent!'
tuto.save()
versioned.repo_update_top_container(tuto.title, tuto.slug, u'osef', u'osef')
second_publication = publish_content(tuto, versioned, True)
tuto.sha_public = versioned.current_version
tuto.sha_draft = versioned.current_version
tuto.public_version = second_publication
tuto.save()
self.assertEqual(PublishedContent.objects.count(), 2) # now there is two objects ...
first_publication = PublishedContent.objects.get(pk=first_publication.pk)
self.assertTrue(first_publication.must_redirect) # .. including the first one, for redirection
self.manager.refresh_index()
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 0) # the old one is gone (and we need to reindex to get the new one)
# 3. Check if indexation brings the new one, and not the old one
self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True) # index
self.manager.refresh_index()
first_publication = PublishedContent.objects.get(pk=first_publication.pk)
second_publication = PublishedContent.objects.get(pk=second_publication.pk)
s = Search()
s.query(MatchAll())
results = self.manager.setup_search(s).execute()
self.assertEqual(len(results), 2) # Still 2, not 4 !
found_old = False
found_new = False
for hit in results:
if hit.meta.doc_type == PublishedContent.get_es_document_type():
if hit.meta.id == first_publication.es_id:
found_old = True
if hit.meta.id == second_publication.es_id:
found_new = True
self.assertTrue(found_new)
self.assertFalse(found_old)
def tearDown(self):
if os.path.isdir(settings.ZDS_APP['content']['repo_private_path']):
shutil.rmtree(settings.ZDS_APP['content']['repo_private_path'])
if os.path.isdir(settings.ZDS_APP['content']['repo_public_path']):
shutil.rmtree(settings.ZDS_APP['content']['repo_public_path'])
if os.path.isdir(settings.MEDIA_ROOT):
shutil.rmtree(settings.MEDIA_ROOT)
# re-active PDF build
settings.ZDS_APP['content']['build_pdf_when_published'] = True
# delete index:
self.manager.clear_es_index()