Merge pull request #511 from fomars/develop

fix LUNAPARK-2927: ignore quotes in tags
yandex · Feb 20, 2018 · 787673a · 787673a
2 parents 23ea9eb + e6fb04c
commit 787673a
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 7 deletions.
diff --git a/yandextank/aggregator/tank_aggregator.py b/yandextank/aggregator/tank_aggregator.py
@@ -59,11 +59,14 @@ def __init__(self, generator):
         self.drain = None
         self.stats_drain = None
 
+    @staticmethod
+    def load_config():
+        return json.loads(resource_string(__name__, 'config/phout.json').decode('utf8'))
+
     def start_test(self):
         self.reader = self.generator.get_reader()
         self.stats_reader = self.generator.get_stats_reader()
-        aggregator_config = json.loads(
-            resource_string(__name__, 'config/phout.json').decode('utf8'))
+        aggregator_config = self.load_config()
         verbose_histogram = True
         if verbose_histogram:
             logger.info("using verbose histogram")

diff --git a/yandextank/aggregator/tests/phout2927 b/yandextank/aggregator/tests/phout2927
@@ -0,0 +1,3 @@
+1502376593.698	"Technology	797	208	12	521	56	670	31	315	0	404
+1502376594.699	"/v1/tech/ru-RU/latest/maps/jsapi",	750	206	11	452	81	602	24	315	0	404
+1502376597.698	#3	669	146	9	410	104	581	18	315	0	404
diff --git a/yandextank/aggregator/tests/phout2927res.jsonl b/yandextank/aggregator/tests/phout2927res.jsonl
@@ -0,0 +1,5 @@
+[
+  {"tagged": {"\"Technology": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 521, "total": 521, "len": 1, "min": 521}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0]}, "min": 797, "max": 797, "len": 1, "hist": {"data": [1], "bins": [800.0]}, "total": 797}, "interval_event": {"max": 670, "total": 670, "len": 1, "min": 670}, "receive_time": {"max": 56, "total": 56, "len": 1, "min": 56}, "connect_time": {"max": 208, "total": 208, "len": 1, "min": 208}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 31, "total": 31, "len": 1, "min": 31}, "send_time": {"max": 12, "total": 12, "len": 1, "min": 12}, "net_code": {"count": {"0": 1}}}}, "overall": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 521, "total": 521, "len": 1, "min": 521}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0, 797.0]}, "min": 797, "max": 797, "len": 1, "hist": {"data": [1], "bins": [800.0]}, "total": 797}, "interval_event": {"max": 670, "total": 670, "len": 1, "min": 670}, "receive_time": {"max": 56, "total": 56, "len": 1, "min": 56}, "connect_time": {"max": 208, "total": 208, "len": 1, "min": 208}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 31, "total": 31, "len": 1, "min": 31}, "send_time": {"max": 12, "total": 12, "len": 1, "min": 12}, "net_code": {"count": {"0": 1}}}, "ts": 1502376593},
+  {"tagged": {"\"/v1/tech/ru-RU/latest/maps/jsapi\",": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 452, "total": 452, "len": 1, "min": 452}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0]}, "min": 750, "max": 750, "len": 1, "hist": {"data": [1], "bins": [760.0]}, "total": 750}, "interval_event": {"max": 602, "total": 602, "len": 1, "min": 602}, "receive_time": {"max": 81, "total": 81, "len": 1, "min": 81}, "connect_time": {"max": 206, "total": 206, "len": 1, "min": 206}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 24, "total": 24, "len": 1, "min": 24}, "send_time": {"max": 11, "total": 11, "len": 1, "min": 11}, "net_code": {"count": {"0": 1}}}}, "overall": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 452, "total": 452, "len": 1, "min": 452}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0, 750.0]}, "min": 750, "max": 750, "len": 1, "hist": {"data": [1], "bins": [760.0]}, "total": 750}, "interval_event": {"max": 602, "total": 602, "len": 1, "min": 602}, "receive_time": {"max": 81, "total": 81, "len": 1, "min": 81}, "connect_time": {"max": 206, "total": 206, "len": 1, "min": 206}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 24, "total": 24, "len": 1, "min": 24}, "send_time": {"max": 11, "total": 11, "len": 1, "min": 11}, "net_code": {"count": {"0": 1}}}, "ts": 1502376594},
+  {"tagged": {"": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 410, "total": 410, "len": 1, "min": 410}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0]}, "min": 669, "max": 669, "len": 1, "hist": {"data": [1], "bins": [670.0]}, "total": 669}, "interval_event": {"max": 581, "total": 581, "len": 1, "min": 581}, "receive_time": {"max": 104, "total": 104, "len": 1, "min": 104}, "connect_time": {"max": 146, "total": 146, "len": 1, "min": 146}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 18, "total": 18, "len": 1, "min": 18}, "send_time": {"max": 9, "total": 9, "len": 1, "min": 9}, "net_code": {"count": {"0": 1}}}}, "overall": {"size_in": {"max": 315, "total": 315, "len": 1, "min": 315}, "latency": {"max": 410, "total": 410, "len": 1, "min": 410}, "interval_real": {"q": {"q": [50, 75, 80, 85, 90, 95, 98, 99, 100], "value": [669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669.0]}, "min": 669, "max": 669, "len": 1, "hist": {"data": [1], "bins": [670.0]}, "total": 669}, "interval_event": {"max": 581, "total": 581, "len": 1, "min": 581}, "receive_time": {"max": 104, "total": 104, "len": 1, "min": 104}, "connect_time": {"max": 146, "total": 146, "len": 1, "min": 146}, "proto_code": {"count": {"404": 1}}, "size_out": {"max": 18, "total": 18, "len": 1, "min": 18}, "send_time": {"max": 9, "total": 9, "len": 1, "min": 9}, "net_code": {"count": {"0": 1}}}, "ts": 1502376597}
+  ]
diff --git a/yandextank/aggregator/tests/test_pipeline.py b/yandextank/aggregator/tests/test_pipeline.py
@@ -1,16 +1,19 @@
 import json
 
 import numpy as np
-from pkg_resources import resource_filename
+import pytest
 from queue import Queue
+
+from yandextank.aggregator import TankAggregator
 from yandextank.aggregator.aggregator import Aggregator, DataPoller
 from yandextank.aggregator.chopper import TimeChopper
 
 from conftest import MAX_TS, random_split
 from yandextank.common.util import Drain
 
-with open(resource_filename("yandextank.aggregator", 'config/phout.json')) as f:
-    AGGR_CONFIG = json.load(f)
+from yandextank.plugins.Phantom.reader import string_to_df
+
+AGGR_CONFIG = TankAggregator.load_config()
 
 
 class TestPipeline(object):
@@ -49,3 +52,20 @@ def producer():
         drain = Drain(pipeline, results_queue)
         drain.run()
         assert results_queue.qsize() == MAX_TS
+
+    @pytest.mark.parametrize('phout, results', [
+        ('yandextank/aggregator/tests/phout2927', 'yandextank/aggregator/tests/phout2927res.jsonl')
+    ])
+    def test_invalid_ammo(self, phout, results):
+        with open(phout) as fp:
+            reader = [string_to_df(line) for line in fp.readlines()]
+        pipeline = Aggregator(
+            TimeChopper(
+                DataPoller(source=reader, poll_period=0),
+                cache_size=3),
+            AGGR_CONFIG,
+            True)
+        with open(results) as fp:
+            results_parsed = json.load(fp)
+        for item, result in zip(pipeline, results_parsed):
+            assert item == result
diff --git a/yandextank/plugins/Phantom/reader.py b/yandextank/plugins/Phantom/reader.py
@@ -1,6 +1,8 @@
 """
 Phantom phout format reader. Read chunks from phout and produce data frames
 """
+from _csv import QUOTE_NONE
+
 import pandas as pd
 import numpy as np
 import logging
@@ -9,6 +11,8 @@
 import datetime
 import itertools as itt
 
+from pandas.parser import CParserError
+
 from yandextank.common.interfaces import StatsReader
 
 try:
@@ -42,8 +46,12 @@
 
 def string_to_df(data):
     start_time = time.time()
-    chunk = pd.read_csv(
-        StringIO(data), sep='\t', names=phout_columns, dtype=dtypes)
+    try:
+        chunk = pd.read_csv(StringIO(data), sep='\t', names=phout_columns, dtype=dtypes, quoting=QUOTE_NONE)
+    except CParserError as e:
+        logger.error(e.message)
+        logger.error('Incorrect phout data: {}'.format(data))
+        return
 
     chunk['receive_ts'] = chunk.send_ts + chunk.interval_real / 1e6
     chunk['receive_sec'] = chunk.receive_ts.astype(np.int64)