Skip to content

Commit

Permalink
Dedup Policy Tests (#613)
Browse files Browse the repository at this point in the history
* dedup tests: add basic tests for dedup system, continuing from #611
- ensure config merge works correctly
  • Loading branch information
ikreymer committed Jan 27, 2021
1 parent aee458b commit 78a9888
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
6 changes: 6 additions & 0 deletions pywb/warcserver/warcserver.py
Expand Up @@ -62,9 +62,15 @@ def __init__(self, config_file='./config.yaml', custom_config=None):
if 'proxy' in custom_config and 'proxy' in config:
custom_config['proxy'].update(config['proxy'])
if 'recorder' in custom_config and 'recorder' in config:
if isinstance(custom_config['recorder'], str):
custom_config['recorder'] = {'source_coll': custom_config['recorder']}

if isinstance(config['recorder'], str):
config['recorder'] = {'source_coll': config['recorder']}

config['recorder'].update(custom_config['recorder'])
custom_config['recorder'] = config['recorder']

config.update(custom_config)

super(WarcServer, self).__init__(debug=config.get('debug', False))
Expand Down
12 changes: 12 additions & 0 deletions tests/config_test_record_dedup.yaml
@@ -0,0 +1,12 @@
debug: true

collections_root: _test_colls

recorder:
source_coll: live
dedup_policy: skip

collections:
'live': '$live'


52 changes: 52 additions & 0 deletions tests/test_record_dedup.py
@@ -0,0 +1,52 @@
from .base_config_test import BaseConfigTest, CollsDirMixin, BaseTestClass
from pywb.manager.manager import main as manager
from pywb.warcserver.test.testutils import to_path, HttpBinLiveTests, FakeRedisTests

from fakeredis import FakeStrictRedis

from warcio import ArchiveIterator

import os
import time
import json

import pytest


# ============================================================================
class TestRecordDedup(HttpBinLiveTests, CollsDirMixin, BaseConfigTest, FakeRedisTests, BaseTestClass):
@classmethod
def setup_class(cls):
super(TestRecordDedup, cls).setup_class('config_test_record_dedup.yaml', custom_config={'recorder': 'live'})
cls.redis = FakeStrictRedis.from_url("redis://localhost/0")

def test_init_coll(self):
manager(['init', 'test-dedup'])
assert os.path.isdir(os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive'))

def test_record_1(self):
res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
assert '"A": "B"' in res.text

time.sleep(1.2)

res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
assert '"A": "B"' in res.text

def test_single_redis_entry(self):
res = self.redis.zrange("pywb:test-dedup:cdxj", 0, -1)
assert len(res) == 1

def test_single_warc_record(self):
dir_name = os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive')
files = os.listdir(dir_name)
assert len(files) == 1

records = []

with open(os.path.join(dir_name, files[0]), 'rb') as fh:
for record in ArchiveIterator(fh):
records.append(record.rec_type)

# ensure only one response/request pair written
assert records == ['response', 'request']

0 comments on commit 78a9888

Please sign in to comment.