-
Notifications
You must be signed in to change notification settings - Fork 0
/
pgday_02_02_24
624 lines (523 loc) · 24.4 KB
/
pgday_02_02_24
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// SETUP //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
docker run -d -it -e POSTGRES_PASSWORD=postgres --name postgres postgres:16
docker exec -it postgres bash
apt update
apt install -y git wget bzip2
git clone https://github.com/credativ/omdb-postgresql.git
cd omdb-postgresql
./download
su postgres
./import
psql omdb
create index on categories (name);
create index on categories (parent_id);
analyze categories;
create index on movie_categories(category_id);
analyze movie_categories;
analyze movies;
\pset pager 0
////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// CONTEXT //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
\d+ movies
\d+ movie_categories
\d+ categories
// We'll choose some movie
SELECT id, name FROM movies WHERE id = 77;
id | name
----+---------
77 | Memento
(1 row)
// It has 18 categories
SELECT * FROM movie_categories WHERE movie_id = 77;
movie_id | category_id
----------+-------------
77 | 49
77 | 79
77 | 80
77 | 134
77 | 141
77 | 219
77 | 301
77 | 303
77 | 329
77 | 684
77 | 1086
77 | 4865
77 | 10214
77 | 10215
77 | 10222
77 | 10225
77 | 10228
77 | 10282
(18 rows)
// What are those categories ?
SELECT c.name, c.parent_id FROM movie_categories mc JOIN categories c ON mc.category_id=c.id WHERE movie_id = 77;
name | parent_id
-----------------------------+-----------
Cult Favorite | 4
Mysterie | 53
Crime | 1
Justice Drama | 18
Psychological Thriller | 53
Psychodrama | 18
Black And White Film | 10
Independent Film | 7
Backflash | 1030
Non-linear Timeline | 1030
neo-noir | 1080
Slow Motion | 1057
Exciting | 10213
Emotionen > Spannend | 10213
Intellectually Stimulating | 10213
Bleak | 10213
Goofy | 10213
Nominated for Academy Award | 4
(18 rows)
// We have parent categories
// Now try to fetch categories AND their parent categories
//
WITH RECURSIVE cte_cs AS (
SELECT c.name, c.id, c.parent_id, c.root_id
FROM movie_categories mc
JOIN categories c ON mc.category_id=c.id
WHERE movie_id = 77
UNION
SELECT c2.name, c2.id, c2.parent_id, c2.root_id
FROM categories c2
JOIN cte_cs on cte_cs.parent_id=c2.id
)
SELECT name, id, parent_id
FROM cte_cs
ORDER BY id, parent_id;
name | id | parent_id
-----------------------------+-------+-----------
Genre | 1 |
Standing | 4 |
Filmmaking Movement | 6 |
Production | 7 |
Term | 9 |
Technical Format | 10 | 9
Drama | 18 | 1
Cult Favorite | 49 | 4
Thriller | 53 | 1
Mysterie | 79 | 53
Crime | 80 | 1
Justice Drama | 134 | 18
Psychological Thriller | 141 | 53
Psychodrama | 219 | 18
Black And White Film | 301 | 10
Independent Film | 303 | 7
Backflash | 329 | 1030
Non-linear Timeline | 684 | 1030
Time in Film | 1030 | 9
Post Production | 1056 | 9
Visual Effects (VFX) | 1057 | 1056
US-American Cinema | 1080 | 3277
neo-noir | 1086 | 1080
North American Cinema | 3277 | 6
Slow Motion | 4865 | 1057
Emotions | 10213 | 9
Exciting | 10214 | 10213
Emotionen > Spannend | 10215 | 10213
Intellectually Stimulating | 10222 | 10213
Bleak | 10225 | 10213
Goofy | 10228 | 10213
Nominated for Academy Award | 10282 | 4
National Film Registry | 19877 | 4
(33 rows)
// we have more results, and important ones like 'thriller'
////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// PROBLEM //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now, how to find movies with Thiller + Justice Drama + Nominated for Academy Award ?
// naive join wouldn't work
SELECT m.name
FROM movies m
JOIN movie_categories mc ON m.id = mc.movie_id
JOIN categories c ON mc.category_id=c.id
WHERE c.name IN ('Thriller', 'Justice Drama', 'Nominated for Academy Award')
GROUP BY 1 HAVING count(c.name)=3;
name
------------------------
Blackout
Collateral
Conviction
Flatliners
Ground Zero
Hunger
Hush
Kojak
Les misérables
Melissa
Michael Clayton
Money
Paranoia
Ransom
Rear Window
RoboCop
Season 3
Season 4
Season 5
Sleuth
Straw Dogs
The Firm
The French Connection
The Godfather Part III
The Letter
The Reckoning
Traffic
Venom
// => NO memento
// We have to use RECURSIVE CTE + a GROUP BY trick
WITH RECURSIVE cte_cs as (
SELECT c.id, c.id AS group, c.name
FROM categories c
WHERE c.name IN ('Thriller', 'Nominated for Academy Award', 'Justice Drama')
UNION
SELECT c2.id, cte_cs.id AS group, c2.name
FROM categories c2
JOIN cte_cs ON cte_cs.id = c2.parent_id
)
SELECT m.id, m.name, string_agg(cte_cs.name, ',')
FROM movies m
JOIN movie_categories mc ON m.id = mc.movie_id
JOIN cte_cs ON cte_cs.id = mc.category_id
GROUP BY 1, 2
HAVING count(distinct cte_cs.group) = 3;
id | name | string_agg
-------+-----------------------------+----------------------------------------------------------------------------------
77 | Memento | Mysterie,Justice Drama,Nominated for Academy Award,Psychological Thriller
180 | Minority Report | Justice Drama,Nominated for Academy Award,Political Thriller
240 | The Godfather Part II | Justice Drama,Conspiracy Thriller,Nominated for Academy Award
242 | The Godfather Part III | Nominated for Academy Award,Thriller,Justice Drama
334 | Magnolia | Nominated for Academy Award,Psychological Thriller,Justice Drama
729 | The Firm | Thriller,Justice Drama,Nominated for Academy Award
737 | Witness for the Prosecution | Thriller,Courtroom Drama,Nominated for Academy Award
1051 | The French Connection | Justice Drama,Nominated for Academy Award,Thriller
1538 | Collateral | Thriller,Justice Drama,Psychological Thriller,Nominated for Academy Award
3580 | Changeling | Justice Drama,Conspiracy Thriller,Courtroom Drama,Nominated for Academy Award
4566 | Michael Clayton | Nominated for Academy Award,Thriller,Justice Drama
9008 | The Insider | Justice Drama,Nominated for Academy Award,Conspiracy Thriller,Political Thriller
61650 | The Letter | Thriller,Nominated for Academy Award,Courtroom Drama
(13 rows)
// some are here because of subcategories ('Courtroom Drama', subcategory of 'Justice Drama' for example)
omdb=# EXPLAIN WITH RECURSIVE cte_cs as (
SELECT c.id, c.id AS group, c.name
FROM categories c
WHERE c.name IN ('Thriller', 'Nominated for Academy Award', 'Justice Drama')
UNION
SELECT c2.id, cte_cs.id AS group, c2.name
FROM categories c2
JOIN cte_cs ON cte_cs.id = c2.parent_id
)
SELECT m.id, m.name, string_agg(cte_cs.name, ',')
FROM movies m
JOIN movie_categories mc ON m.id = mc.movie_id
JOIN cte_cs ON cte_cs.id = mc.category_id
GROUP BY 1, 2
HAVING count(distinct cte_cs.group) = 3;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=137395.76..147278.68 rows=1121 width=57)
Group Key: m.id
Filter: (count(DISTINCT cte_cs."group") = 3)
CTE cte_cs
-> Recursive Union (cost=12.88..4315.36 rows=1553 width=27)
-> Bitmap Heap Scan on categories c (cost=12.88..23.87 rows=3 width=27)
Recheck Cond: (name = ANY ('{Thriller,"Nominated for Academy Award","Justice Drama"}'::text[]))
-> Bitmap Index Scan on categories_name_idx (cost=0.00..12.88 rows=3 width=0)
Index Cond: (name = ANY ('{Thriller,"Nominated for Academy Award","Justice Drama"}'::text[]))
-> Hash Join (cost=0.97..427.60 rows=155 width=27)
Hash Cond: (c2.parent_id = cte_cs_1.id)
-> Seq Scan on categories c2 (cost=0.00..368.87 rows=14987 width=27)
-> Hash (cost=0.60..0.60 rows=30 width=8)
-> WorkTable Scan on cte_cs cte_cs_1 (cost=0.00..0.60 rows=30 width=8)
-> Sort (cost=133080.40..134710.19 rows=651914 width=65)
Sort Key: m.id, cte_cs."group"
-> Hash Join (cost=10089.34..43384.02 rows=651914 width=65)
Hash Cond: (mc.movie_id = m.id)
-> Merge Join (cost=113.67..18704.04 rows=651914 width=48)
Merge Cond: (mc.category_id = cte_cs.id)
-> Index Scan using movie_categories_category_id_idx on movie_categories mc (cost=0.29..8317.67 rows=197715 width=16)
-> Sort (cost=113.38..117.26 rows=1553 width=48)
Sort Key: cte_cs.id
-> CTE Scan on cte_cs (cost=0.00..31.06 rows=1553 width=48)
-> Hash (cost=5639.52..5639.52 rows=224252 width=25)
-> Seq Scan on movies m (cost=0.00..5639.52 rows=224252 width=25)
JIT:
Functions: 39
Options: Inlining false, Optimization false, Expressions true, Deforming true
(29 rows)
////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////// ARRAY //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now, how we could aggregate all that stuff into arrays using array_agg.
WITH RECURSIVE cte_cs AS (
SELECT mc.movie_id, c.id, c.name, c.parent_id
FROM movie_categories mc
JOIN categories c on mc.category_id=c.id
UNION
SELECT cte_cs.movie_id, c2.id, c2.name, c2.parent_id
FROM categories c2
JOIN cte_cs ON cte_cs.parent_id=c2.id
), cte_arr AS (
SELECT movie_id, array_agg(name)
FROM cte_cs
GROUP BY 1
)
SELECT cte_arr.movie_id, movies.name, cte_arr.array_agg
FROM cte_arr
JOIN movies ON cte_arr.movie_id=movies.id
ORDER BY movie_id LIMIT 5 OFFSET 51;
movie_id | name | array_agg
------------------------------------------------------------------------------------------------------------
73 | American History X | {"Social Drama",Crime,Violence,"US-American Cinema",Ernst,Bleak,Disquieting,"Nominated for Academy Award",Drama,Genre,Adult,"North American Cinema",Emotions,Standing,Audience,"Filmmaking Movement",Term}
74 | War of the Worlds | {Familienfilm,"Hollywood Film","Apocalypse & Post-Apocalypse","Family Drama",Blockbuster,"Disaster Film",Novel,"US-American Cinema","Popcorn Movie",Mainstream,Remake,Audience,Production,Sci-Fi,Drama,Standing,Genre,"Literary Fiction","North American Cinema",Source,"Filmmaking Movement"}
75 | Mars Attacks! | {"Black Comedy",Satire,"Multiple Roles",Cameo,Sci-Fi,"US-American Cinema","Popcorn Movie",Mainstream,Hommage,35-mm,"Actors Playing Themselves",Funny,Feel-good,"Intellectually Stimulating",Goofy,Comedy,Actors,"Guest Appearance",Genre,"North American Cinema",Standing,Class,"Filming Material",Emotions,Characters,"Filmmaking Movement","Technical Format",Term}
76 | Before Sunrise | {"Romance Film","Low Budget Film","Romantic Comedy","Independent Film","Chick Flick",Romantic,Berührend,"Intellectually Stimulating","Romantic Drama",Production,Comedy,Audience,Emotions,Drama,Genre,Term}
77 | Memento | {"Cult Favorite",Mysterie,Crime,"Justice Drama","Psychological Thriller",Psychodrama,"Black And White Film","Independent Film",Backflash,"Non-linear Timeline",neo-noir,"Slow Motion",Exciting,"Emotionen > Spannend","Intellectually Stimulating",Bleak,Goofy,"Nominated for Academy Award",Standing,Thriller,Genre,Drama,"Technical Format",Production,"Time in Film","US-American Cinema","Visual Effects (VFX)",Emotions,Term,"North American Cinema","Post Production","Filmmaking Movement"}
(5 rows)
///////////////////////////////////
///// New movies table ! ////////
/////////////////////////////////
// Copy table
create table movies_array (like movies including all);
insert into movies_array select * from movies;
// Add THE column
// not text[], because it would stored in toast table which would be a bad idea to actually implement
alter table movies_array add column categories varchar(64)[];
// Now writing an array of each categories
BEGIN;
WITH RECURSIVE cte_cs AS (
SELECT mc.movie_id, c.id, c.name, c.parent_id
FROM movie_categories mc
JOIN categories c ON mc.category_id=c.id
UNION
SELECT cte_cs.movie_id, c2.id, c2.name, c2.parent_id
FROM categories c2 JOIN cte_cs ON cte_cs.parent_id=c2.id
), cte_arr AS (
SELECT movie_id, array_agg(name) as array
FROM cte_cs
GROUP BY 1
)
UPDATE movies_array mar
SET categories=cte_arr.array
FROM cte_arr
WHERE cte_arr.movie_id=mar.id;
// Checking
select id, name, categories from movies_array order by id limit 15;
COMMIT;
// Using GIN already
CREATE INDEX ON movies_array USING GIN (categories);
ANALYZE movies_array;
///////////////////////////////////
///////// Now using it //////////
/////////////////////////////////
// now fetching film with multiple categories
SELECT id, name
FROM movies_array
WHERE categories @> '{"Thriller"}'::varchar(64)[]
AND categories @> '{"Nominated for Academy Award"}'::varchar(64)[]
AND categories @> '{"Justice Drama"}'::varchar(64)[]
ORDER BY id;
id | name
-------+-----------------------------
77 | Memento
180 | Minority Report
240 | The Godfather Part II
242 | The Godfather Part III
334 | Magnolia
729 | The Firm
737 | Witness for the Prosecution
1051 | The French Connection
1538 | Collateral
3580 | Changeling
4566 | Michael Clayton
9008 | The Insider
61650 | The Letter
(13 rows)
// Can be simplified with
SELECT id, name
FROM movies_array
WHERE categories @> '{"Justice Drama", "Nominated for Academy Award", "Thriller"}'::varchar(64)[]
ORDER BY id;
explain SELECT id, name
FROM movies_array
WHERE categories @> '{"Justice Drama", "Nominated for Academy Award", "Thriller"}'::varchar(64)[]
ORDER BY id;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Sort (cost=34.18..34.19 rows=1 width=25)
Sort Key: id
-> Bitmap Heap Scan on movies_array (cost=30.16..34.17 rows=1 width=25)
Recheck Cond: (categories @> '{"Justice Drama","Nominated for Academy Award",Thriller}'::character varying(64)[])
-> Bitmap Index Scan on movies_array_categories_idx (cost=0.00..30.16 rows=1 width=0)
Index Cond: (categories @> '{"Justice Drama","Nominated for Academy Award",Thriller}'::character varying(64)[])
(6 rows)
// we can even search "related" movies, by varying how many categories we filter on
SELECT id, name FROM movies_array WHERE categories && '{"Bleak","Justice Drama"}'::varchar(64)[] ORDER BY id;
// And it's still straightforward to know that some films do not have categories
SELECT id, name FROM movies_array WHERE categories IS NULL;
////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////// LTREE //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////
/////// New category table ///////
/////////////////////////////////
CREATE EXTENSION ltree;
CREATE TABLE categories_ltree (like categories including all);
INSERT INTO categories_ltree SELECT * FROM categories;
ALTER TABLE categories_ltree ADD COLUMN name_sanitized text;
ALTER TABLE categories_ltree ADD COLUMN name_tree ltree;
///////////////////////////////////
//// Sanitizing category name ////
/////////////////////////////////
// ltree is not compatible because of symbols:
select id, name, text2ltree(name) from categories_ltree order by id;
select text2ltree('Apocalypse & Post-Apocalypse');
ERROR: ltree syntax error at character 11
SELECT name, regexp_matches(name, '[a-zA-Z0-9_]+', 'g') FROM categories;
SELECT name, unnest(match), sort_order
FROM categories,
regexp_matches(name, '(\w+)', 'g') WITH ORDINALITY as f (match, sort_order);
SELECT name, string_agg(word, '_')
FROM (
select name, unnest(match) as word, sort_order
FROM categories,
regexp_matches(name, '(\w+)', 'g') WITH ORDINALITY as f (match, sort_order)
ORDER BY name, sort_order
) f
GROUP BY name;
// Now sanitizing every names
//
WITH cte_sanitized AS (
SELECT id, string_agg(word, '_') as name
FROM (
SELECT id, unnest(match) as word, sort_order
FROM categories,
regexp_matches(name, '(\w+)', 'g') WITH ORDINALITY as f (match, sort_order)
ORDER BY id, sort_order
) f
GROUP BY id
)
UPDATE categories_ltree ctg_ltree
SET name_sanitized=cte_sanitized.name
FROM cte_sanitized
WHERE cte_sanitized.id=ctg_ltree.id;
// ltree is now compatible
select id, name, name_sanitized, text2ltree(name_sanitized) from categories_ltree order by id limit 10;
///////////////////////////////////
/////// Ltree categories ////////
/////////////////////////////////
// Setting proper tree on categories
WITH RECURSIVE cte_tree AS (
SELECT c.id, c.name_sanitized as tree, c.parent_id
FROM categories_ltree c
WHERE c.parent_id is null
UNION
select c2.id,
cte_tree.tree || '.' || c2.name_sanitized as tree,
c2.parent_id
FROM categories_ltree c2
JOIN cte_tree ON cte_tree.id=c2.parent_id
)
UPDATE categories_ltree ctg_ltree
SET name_tree = text2ltree(tree)
FROM cte_tree
WHERE cte_tree.id = ctg_ltree.id ;
// why wait
CREATE INDEX on categories_ltree USING GIST (name_tree);
ANALYZE categories_ltree;
// Few examples of LTREE
SELECT * from categories_ltree WHERE name_tree @ 'Justice_Drama | Thriller | Nominated_for_Academy_Award';
id | parent_id | root_id | name | name_sanitized | name_tree
-------+-----------+---------+-----------------------------+-----------------------------+--------------------------------------------------
10282 | 4 | 4 | Nominated for Academy Award | Nominated_for_Academy_Award | Standing.Nominated_for_Academy_Award
142 | 53 | 1 | Erotic Thriller | Erotic_Thriller | Genre.Thriller.Erotic_Thriller
16286 | 53 | 1 | Giallo | Giallo | Genre.Thriller.Giallo
106 | 53 | 1 | Political Thriller | Political_Thriller | Genre.Thriller.Political_Thriller
141 | 53 | 1 | Psychological Thriller | Psychological_Thriller | Genre.Thriller.Psychological_Thriller
53 | 1 | 1 | Thriller | Thriller | Genre.Thriller
76 | 53 | 1 | Conspiracy Thriller | Conspiracy_Thriller | Genre.Thriller.Conspiracy_Thriller
79 | 53 | 1 | Mysterie | Mysterie | Genre.Thriller.Mysterie
15176 | 13200 | 8 | Thriller | Thriller | Keyword.Plot.Arts_and_Culture.Film.Film.Thriller
134 | 18 | 1 | Justice Drama | Justice_Drama | Genre.Drama.Justice_Drama
135 | 134 | 1 | Courtroom Drama | Courtroom_Drama | Genre.Drama.Justice_Drama.Courtroom_Drama
(11 rows)
///////////////////////////////////
/// Adding LTREEs into movies ///
/////////////////////////////////
// new table again
CREATE TABLE movies_arrayltree (like movies including all);
INSERT INTO movies_arrayltree select * from movies;
ALTER TABLE movies_arrayltree add column categories ltree[];
WITH cte_cs AS (
SELECT mc.movie_id, c.name_tree as name_tree
FROM movie_categories mc
JOIN categories_ltree c ON mc.category_id=c.id
), cte_arr AS (
SELECT movie_id, array_agg(cte_cs.name_tree) as array
FROM cte_cs
GROUP BY 1
)
UPDATE movies_arrayltree mar SET categories=cte_arr.array
FROM cte_arr
WHERE cte_arr.movie_id=mar.id;
CREATE INDEX on movies_arrayltree USING GIST (categories);
ANALYZE movies_arrayltree;
// now using it!
SELECT id, name
FROM movies_arrayltree
WHERE categories @ 'Justice_Drama'
AND categories @ 'Nominated_for_Academy_Award'
AND categories @ 'Thriller'
ORDER BY id;
id | name
-------+-----------------------------
77 | Memento
180 | Minority Report
240 | The Godfather Part II
242 | The Godfather Part III
334 | Magnolia
729 | The Firm
737 | Witness for the Prosecution
1051 | The French Connection
1538 | Collateral
3580 | Changeling
4566 | Michael Clayton
9008 | The Insider
61650 | The Letter
(13 rows)
explain SELECT id, name
FROM movies_arrayltree
WHERE categories @ 'Justice_Drama'
AND categories @ 'Nominated_for_Academy_Award'
AND categories @ 'Thriller'
ORDER BY id;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=8.31..8.32 rows=1 width=25)
Sort Key: id
-> Index Scan using movies_arrayltree_categories_idx on movies_arrayltree (cost=0.28..8.30 rows=1 width=25)
Index Cond: ((categories @ 'Justice_Drama'::ltxtquery) AND (categories @ 'Nominated_for_Academy_Award'::ltxtquery) AND (categories @ 'Thriller'::ltxtquery))
(4 rows)
// Now we even have advanced search
SELECT id, name
FROM movies_arrayltree
WHERE categories @ 'Justice_Drama & !Courtroom_Drama'
AND categories @ 'Nominated_for_Academy_Award'
AND categories @ 'Thriller & !Conspiracy_Thriller & !Political_Thriller'
ORDER BY id;
id | name
------+------------------------
77 | Memento
242 | The Godfather Part III
334 | Magnolia
729 | The Firm
1051 | The French Connection
1538 | Collateral
4566 | Michael Clayton
(7 rows)