diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClass.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClass.java index 99897d8a564..35ec45acba8 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClass.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClass.java @@ -75,8 +75,7 @@ private void addField(String name, DataType type, SummaryTransform transform) { SummaryClassField sf = fields.get(name); if (!SummaryClassField.convertDataType(type, transform).equals(sf.getType())) { deployLogger.log(Level.WARNING, "Conflicting definition of field " + name + ". " + - "Declared as type " + sf.getType() + " and " + - type); + "Declared as type " + sf.getType() + " and " + type); } } else { fields.put(name, new SummaryClassField(name, type, transform)); diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClassField.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClassField.java index 3416b15cd63..e1bfa90cc36 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClassField.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/SummaryClassField.java @@ -33,11 +33,12 @@ public enum Type { LONGDATA("longdata"), XMLSTRING("xmlstring"), FEATUREDATA("featuredata"), - JSONSTRING("jsonstring"); + JSONSTRING("jsonstring"), + TENSOR("tensor"); private String name; - private Type(String name) { + Type(String name) { this.name = name; } @@ -88,7 +89,7 @@ public static Type convertDataType(DataType fieldType, SummaryTransform transfor } else if (fval instanceof PredicateFieldValue) { return Type.STRING; } else if (fval instanceof TensorFieldValue) { - return Type.JSONSTRING; + return Type.TENSOR; } else if (fieldType instanceof CollectionDataType) { if (transform != null && transform.equals(SummaryTransform.POSITIONS)) { return Type.XMLSTRING; diff --git a/config-model/src/test/derived/tensor/summary.cfg b/config-model/src/test/derived/tensor/summary.cfg index 3882af1b70b..5e5507836b0 100644 --- a/config-model/src/test/derived/tensor/summary.cfg +++ b/config-model/src/test/derived/tensor/summary.cfg @@ -1,27 +1,27 @@ -defaultsummaryid 1113059691 -classes[0].id 1113059691 -classes[0].name "default" -classes[0].fields[0].name "f1" -classes[0].fields[0].type "jsonstring" -classes[0].fields[1].name "f3" -classes[0].fields[1].type "jsonstring" -classes[0].fields[2].name "f4" -classes[0].fields[2].type "jsonstring" -classes[0].fields[3].name "rankfeatures" -classes[0].fields[3].type "featuredata" -classes[0].fields[4].name "summaryfeatures" -classes[0].fields[4].type "featuredata" -classes[0].fields[5].name "documentid" -classes[0].fields[5].type "longstring" -classes[1].id 457955124 -classes[1].name "attributeprefetch" -classes[1].fields[0].name "f2" -classes[1].fields[0].type "jsonstring" -classes[1].fields[1].name "f3" -classes[1].fields[1].type "jsonstring" -classes[1].fields[2].name "f4" -classes[1].fields[2].type "jsonstring" -classes[1].fields[3].name "rankfeatures" -classes[1].fields[3].type "featuredata" -classes[1].fields[4].name "summaryfeatures" -classes[1].fields[4].type "featuredata" \ No newline at end of file +defaultsummaryid 289405525 +classes[].id 289405525 +classes[].name "default" +classes[].fields[].name "f1" +classes[].fields[].type "tensor" +classes[].fields[].name "f3" +classes[].fields[].type "tensor" +classes[].fields[].name "f4" +classes[].fields[].type "tensor" +classes[].fields[].name "rankfeatures" +classes[].fields[].type "featuredata" +classes[].fields[].name "summaryfeatures" +classes[].fields[].type "featuredata" +classes[].fields[].name "documentid" +classes[].fields[].type "longstring" +classes[].id 1860420340 +classes[].name "attributeprefetch" +classes[].fields[].name "f2" +classes[].fields[].type "tensor" +classes[].fields[].name "f3" +classes[].fields[].type "tensor" +classes[].fields[].name "f4" +classes[].fields[].type "tensor" +classes[].fields[].name "rankfeatures" +classes[].fields[].type "featuredata" +classes[].fields[].name "summaryfeatures" +classes[].fields[].type "featuredata" diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java index 0e54adae932..7046cb91cd3 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java @@ -16,11 +16,11 @@ import com.yahoo.data.access.simple.Value; import com.yahoo.data.access.Inspector; - /** - * @author Bj\u00f8rn Borud + * @author Bjørn Borud */ public class DataField extends DocsumField implements VariableLengthField { + public DataField(String name) { super(name); } @@ -67,4 +67,5 @@ public int sizeOfLength() { public Object convert(Inspector value) { return convert(value.asData(Value.empty().asData())); } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java index 2f0768d4e8b..3b414e288da 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java @@ -23,6 +23,7 @@ * @author Bjørn Borud */ public final class DocsumDefinitionSet { + public static final int SLIME_MAGIC_ID = 0x55555555; private final static Logger log = Logger.getLogger(DocsumDefinitionSet.class.getName()); @@ -73,6 +74,7 @@ public final void lazyDecode(String summaryClass, byte[] data, FastHit hit) { buffer.order(ByteOrder.LITTLE_ENDIAN); long docsumClassId = buffer.getInt(); if (docsumClassId != SLIME_MAGIC_ID) { + // TODO: Not used, remove - bratseth 2017-01-016 DocsumDefinition docsumDefinition = lookupDocsum(docsumClassId); Docsum docsum = new Docsum(docsumDefinition, data); hit.addSummary(docsum); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java index 3aa02f57a1e..1e14fea26b7 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java @@ -13,8 +13,8 @@ import com.yahoo.log.LogLevel; /** - * @author Bj\u00f8rn Borud - * @author Steinar Knutsen + * @author Bjørn Borud + * @author Steinar Knutsen */ public abstract class DocsumField { @@ -22,26 +22,26 @@ public abstract class DocsumField { private static FieldFactory fieldFactory; private static class FieldFactory { + Map> constructors = new HashMap<>(); - void put(final String typename, - final Class fieldClass) - throws NoSuchMethodException, SecurityException - { - final Constructor constructor = fieldClass.getConstructor(String.class); + void put(String typename, Class fieldClass) + throws NoSuchMethodException, SecurityException { + Constructor constructor = fieldClass.getConstructor(String.class); constructors.put(typename, constructor); } - DocsumField create(final String typename, final String name, final LegacyEmulationConfig emulConfig) + DocsumField create(String typename, String name, LegacyEmulationConfig emulConfig) throws InstantiationException, IllegalAccessException, - IllegalArgumentException, InvocationTargetException - { + IllegalArgumentException, InvocationTargetException { DocsumField f = constructors.get(typename).newInstance(name); f.emulConfig = emulConfig; return f; } } + private LegacyEmulationConfig emulConfig; + final LegacyEmulationConfig getEmulConfig() { return emulConfig; } static { @@ -61,27 +61,27 @@ DocsumField create(final String typename, final String name, final LegacyEmulati fieldFactory.put("jsonstring", StructDataField.class); fieldFactory.put("featuredata", FeatureDataField.class); fieldFactory.put("xmlstring", XMLField.class); - } catch (final Exception e) { - log.log(LogLevel.ERROR, - "Could not initialize docsum decoding properly.", e); + fieldFactory.put("tensor", TensorField.class); + } catch (Exception e) { + log.log(LogLevel.ERROR, "Could not initialize docsum decoding properly.", e); } } protected String name; - protected DocsumField(final String name) { + protected DocsumField(String name) { this.name = name; } /* for unit test only */ - static DocsumField create(final String name, final String typename) { + static DocsumField create(String name, String typename) { return create(name, typename, new LegacyEmulationConfig()); } - public static DocsumField create(final String name, final String typename, LegacyEmulationConfig emulConfig) { + public static DocsumField create(String name, String typename, LegacyEmulationConfig emulConfig) { try { return fieldFactory.create(typename, name, emulConfig); - } catch (final Exception e) { + } catch (Exception e) { throw new RuntimeException("Unknown field type '" + typename + "'", e); } } @@ -90,7 +90,7 @@ public String getName() { return name; } - public boolean isCompressed(final ByteBuffer b) { + public boolean isCompressed(ByteBuffer b) { return false; } @@ -106,8 +106,8 @@ public boolean isCompressed(final ByteBuffer b) { public abstract Object decode(ByteBuffer b); /** - * Get the number of bytes this field occupies in the given buffer and set - * the position of the first byte after this field. + * Get the number of bytes this field occupies in the given buffer + * AND SET(!) the position to the first byte after this field. */ public abstract int getLength(ByteBuffer b); @@ -116,4 +116,5 @@ public boolean isCompressed(final ByteBuffer b) { * for this field. **/ public abstract Object convert(Inspector value); + } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java index fe0c4a35d1e..91e67174c4c 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java @@ -174,8 +174,7 @@ public void setPartId(int partId, int rowBits) { } /** - * - * @param useRowInIndexUri Sets whether to use the row in the index uri. See FastSearcher for details. + * Sets whether to use the row in the index uri. See FastSearcher for details. */ public void setUseRowInIndexUri(boolean useRowInIndexUri) { this.useRowInIndexUri = useRowInIndexUri; @@ -189,7 +188,7 @@ public int getColumn() { } /** - * @return the row number where this hit originated, or 0 if not known + * Returns the row number where this hit originated, or 0 if not known * */ public int getRow() { if (rowBits == 0) { @@ -214,17 +213,20 @@ public int getRow() { * filled returns the following types, even when the field has no actual value:

* * */ @Override @@ -277,10 +279,7 @@ void addSummary(DocsumDefinition docsumDef, Inspector value) { for (DocsumField field : docsumDef.getFields()) { String fieldName = field.getName(); if (value.type() == Type.STRING && - (field instanceof LongstringField || - field instanceof StringField || - field instanceof XMLField)) - { + (field instanceof LongstringField || field instanceof StringField || field instanceof XMLField)) { setDocsumFieldIfNotPresent(fieldName, new LazyString(field, value)); } else { Inspector f = value.field(fieldName); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java index d61a15723ac..87ee906e592 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java @@ -1,7 +1,6 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.fastsearch; - import java.nio.ByteBuffer; import com.yahoo.io.SlowInflate; @@ -10,13 +9,13 @@ import com.yahoo.data.access.*; import com.yahoo.data.access.simple.Value; - /** - * Class representing a JSON string field in the result set + * A hit field containing JSON structured data * - * @author Steinar Knutsen + * @author Steinar Knutsen */ public class JSONField extends DocsumField implements VariableLengthField { + public JSONField(String name) { super(name); } @@ -104,9 +103,7 @@ Inspector stringify(Inspector value) { } } - private static class ArrConv extends CompatibilityConverter - implements ArrayTraverser - { + private static class ArrConv extends CompatibilityConverter implements ArrayTraverser { @Override public void entry(int idx, Inspector value) { target.add(stringify(value)); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java index 617f382f462..3ad8ba510aa 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java @@ -5,7 +5,6 @@ */ package com.yahoo.prelude.fastsearch; - import java.nio.ByteBuffer; import com.yahoo.io.SlowInflate; @@ -13,11 +12,11 @@ import com.yahoo.data.access.simple.Value; import com.yahoo.data.access.Inspector; - /** - * @author Bj\u00f8rn Borud + * @author Bjørn Borud */ public class LongdataField extends DocsumField implements VariableLengthField { + public LongdataField(String name) { super(name); } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java index 744476beaa5..2cb3ac51873 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java @@ -5,18 +5,17 @@ */ package com.yahoo.prelude.fastsearch; - import java.nio.ByteBuffer; import com.yahoo.io.SlowInflate; import com.yahoo.text.Utf8; import com.yahoo.data.access.Inspector; - /** - * @author Bj\u00f8rn Borud + * @author Bjørn Borud */ public class LongstringField extends DocsumField implements VariableLengthField { + public LongstringField(String name) { super(name); } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java index e9c19590102..d30ab3e87e2 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java @@ -12,9 +12,8 @@ import com.yahoo.data.access.Inspector; /** - * @author Bj\u00f8rn Borud + * @author Bjørn Borud */ - public class ShortField extends DocsumField { static final short EMPTY_VALUE = Short.MIN_VALUE; diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java index 671188e4cae..f5be5ffb678 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java @@ -13,9 +13,10 @@ /** - * @author Bj\u00f8rn Borud + * @author Bjørn Borud */ public class StringField extends DocsumField implements VariableLengthField { + public StringField(String name) { super(name); } @@ -59,4 +60,5 @@ public int sizeOfLength() { public Object convert(Inspector value) { return value.asString(""); } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java index f0f4b82c22a..c83f209e8f0 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java @@ -8,7 +8,7 @@ import com.yahoo.prelude.hitfield.JSONString; /** - * Class representing a XML rendered structured data field in the result set + * A hit field containing JSON structured data */ public class StructDataField extends JSONField { @@ -22,9 +22,7 @@ public String toString() { } public Object convert(Inspector value) { - if (getEmulConfig().stringBackedStructuredData() || - value.type() == Type.STRING) - { + if (getEmulConfig().stringBackedStructuredData() || value.type() == Type.STRING) { return super.convert(value); } return new StructuredData(value); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/TensorField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TensorField.java new file mode 100644 index 00000000000..0d97a5bdaf7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TensorField.java @@ -0,0 +1,66 @@ +package com.yahoo.prelude.fastsearch; + +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.simple.Value; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.TypedBinaryFormat; + +import java.nio.ByteBuffer; +import java.util.Optional; + +/** + * A tensor field. Tensors are encoded as a data field where the data (following the length) + * is encoded in a tensor binary format defined by com.yahoo.tensor.serialization.TypedBinaryFormat + * + * @author bratseth + */ +public class TensorField extends DocsumField implements VariableLengthField { + + public TensorField(String name) { + super(name); + } + + @Override + public Tensor decode(ByteBuffer buffer) { + int length = buffer.getInt(); + if (length == 0) return null; + ByteBuffer contentBuffer = ByteBuffer.wrap(buffer.array(), buffer.arrayOffset() + buffer.position(), length); + Tensor tensor = TypedBinaryFormat.decode(Optional.empty(), new GrowableByteBuffer(contentBuffer)); + buffer.position(buffer.position() + length); + return tensor; + } + + @Override + public Tensor decode(ByteBuffer b, FastHit hit) { + Tensor tensor = decode(b); + hit.setField(name, tensor); + return tensor; + } + + @Override + public String toString() { + return "field " + getName() + " type tensor"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + int length = b.getInt(); + b.position(offset + length); + return length; + } + + @Override + public int sizeOfLength() { + return 4; + } + + @Override + public Object convert(Inspector value) { + byte[] content = value.asData(Value.empty().asData()); + if (content.length == 0) return null; + return TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(content)); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java index f169533f8db..2ff7c4864e2 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java @@ -5,8 +5,11 @@ * Interface to easier find the start of the actual data for variable length * fields. * - * @author Steinar Knutsen + * @author Steinar Knutsen */ public interface VariableLengthField { - public int sizeOfLength(); + + /** Returns the size of the length preceeding the data of this field, in bytes */ + int sizeOfLength(); + } diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java index 26787e442fc..d9b1a9a57ae 100644 --- a/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java @@ -2,7 +2,7 @@ package com.yahoo.prelude.hitfield; /** - * A representation of some random data with unknown semantics + * A representation of some binary data with unknown semantics * * @author arnej27959 */ diff --git a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java index d8321a579b3..bd33e3bcd63 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/federation/http/HTTPSearcher.java @@ -134,7 +134,7 @@ * The content of any additional response header fields. * * - * @author Arne Bergene Fossaa + * @author Arne Bergene Fossaa */ public abstract class HTTPSearcher extends ClusterSearcher { diff --git a/container-search/src/test/java/com/yahoo/fs4/test/PacketDecoderTestCase.java b/container-search/src/test/java/com/yahoo/fs4/test/PacketDecoderTestCase.java index 6e750ea0102..34bfefbe98b 100644 --- a/container-search/src/test/java/com/yahoo/fs4/test/PacketDecoderTestCase.java +++ b/container-search/src/test/java/com/yahoo/fs4/test/PacketDecoderTestCase.java @@ -17,9 +17,10 @@ /** * Tests the PacketDecoder * - * @author Bjorn Borud + * @author Bjørn Borud */ public class PacketDecoderTestCase { + static byte[] queryResultPacketData = new byte[] {0,0,0,104, 0,0,0,217-256, @@ -44,7 +45,6 @@ public void testOnePacket () throws BufferTooSmallException { data.put(queryResultPacketData); data.flip(); - // not really necessary for testing, but these help visualize // the state the buffer should be in so a reader of this test // will not have to diff --git a/container-search/src/test/java/com/yahoo/fs4/test/RankFeaturesTestCase.java b/container-search/src/test/java/com/yahoo/fs4/test/RankFeaturesTestCase.java index 5cf9aeda978..64153363ca5 100644 --- a/container-search/src/test/java/com/yahoo/fs4/test/RankFeaturesTestCase.java +++ b/container-search/src/test/java/com/yahoo/fs4/test/RankFeaturesTestCase.java @@ -109,7 +109,7 @@ private static Map decode(TensorType type, byte[] encodedPropert if (key.contains(".type")) { result.put(key, Utf8.toString(value)); } else { - result.put(key, TypedBinaryFormat.decode(type, value)); + result.put(key, TypedBinaryFormat.decode(Optional.of(type), GrowableByteBuffer.wrap(value))); } } return result; diff --git a/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java b/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java index 47a3003371e..f45d1e0fd2e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java @@ -4,81 +4,28 @@ import com.yahoo.config.subscription.ConfigGetter; import com.yahoo.container.search.LegacyEmulationConfig; -import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; -import com.yahoo.prelude.fastsearch.Docsum; -import com.yahoo.prelude.fastsearch.DocsumDefinition; -import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; -import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.prelude.hitfield.RawData; import com.yahoo.prelude.hitfield.XMLString; import com.yahoo.prelude.hitfield.JSONString; import com.yahoo.search.result.NanNumber; import com.yahoo.search.result.StructuredData; -import com.yahoo.document.DocumentId; -import com.yahoo.document.GlobalId; import com.yahoo.slime.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.serialization.TypedBinaryFormat; import org.junit.Test; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.CoreMatchers.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; public class SlimeSummaryTestCase { - public static DocsumDefinitionSet createDocsumDefinitionSet(String configID) { - DocumentdbInfoConfig config = new ConfigGetter<>(DocumentdbInfoConfig.class).getConfig(configID); - return new DocsumDefinitionSet(config.documentdb(0)); - } - - public static DocsumDefinitionSet createDocsumDefinitionSet(String configID, LegacyEmulationConfig legacyEmulationConfig) { - DocumentdbInfoConfig config = new ConfigGetter<>(DocumentdbInfoConfig.class).getConfig(configID); - return new DocsumDefinitionSet(config.documentdb(0), legacyEmulationConfig); - } - - public byte[] makeEmptyDocsum() { - Slime slime = new Slime(); - Cursor docsum = slime.setObject(); - byte[] tmp = BinaryFormat.encode(slime); - ByteBuffer buf = ByteBuffer.allocate(tmp.length + 4); - buf.order(ByteOrder.LITTLE_ENDIAN); - buf.putInt(DocsumDefinitionSet.SLIME_MAGIC_ID); - buf.order(ByteOrder.BIG_ENDIAN); - buf.put(tmp); - return buf.array(); - } - - public byte[] makeDocsum() { - Slime slime = new Slime(); - Cursor docsum = slime.setObject(); - docsum.setLong("integer_field", 4); - docsum.setLong("short_field", 2); - docsum.setLong("byte_field", 1); - docsum.setDouble("float_field", 4.5); - docsum.setDouble("double_field", 8.75); - docsum.setLong("int64_field", 8); - docsum.setString("string_field", "string_value"); - docsum.setData("data_field", "data_value".getBytes(StandardCharsets.UTF_8)); - docsum.setString("longstring_field", "longstring_value"); - docsum.setData("longdata_field", "longdata_value".getBytes(StandardCharsets.UTF_8)); - docsum.setString("xmlstring_field", "xmlstring_value"); - { - Cursor field = docsum.setObject("jsonstring_field"); - field.setLong("foo", 1); - field.setLong("bar", 2); - } - byte[] tmp = BinaryFormat.encode(slime); - ByteBuffer buf = ByteBuffer.allocate(tmp.length + 4); - buf.order(ByteOrder.LITTLE_ENDIAN); - buf.putInt(DocsumDefinitionSet.SLIME_MAGIC_ID); - buf.order(ByteOrder.BIG_ENDIAN); - buf.put(tmp); - return buf.array(); - } - @Test public void testDecodingEmpty() { String summary_cf = "file:src/test/java/com/yahoo/prelude/fastsearch/summary.cfg"; @@ -87,13 +34,13 @@ public void testDecodingEmpty() { byte[] docsum = makeEmptyDocsum(); FastHit hit = new FastHit(); set.lazyDecode("default", docsum, hit); - assertThat(hit.getField("integer_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("short_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("byte_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("float_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("double_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("int64_field"), equalTo((Object) NanNumber.NaN)); - assertThat(hit.getField("string_field"), equalTo((Object)"")); + assertThat(hit.getField("integer_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("short_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("byte_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("float_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("double_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("int64_field"), equalTo(NanNumber.NaN)); + assertThat(hit.getField("string_field"), equalTo("")); assertThat(hit.getField("data_field"), instanceOf(RawData.class)); assertThat(hit.getField("data_field").toString(), equalTo("")); assertThat(hit.getField("longstring_field"), equalTo((Object)"")); @@ -103,6 +50,14 @@ public void testDecodingEmpty() { assertThat(hit.getField("xmlstring_field").toString(), equalTo("")); // assertThat(hit.getField("jsonstring_field"), instanceOf(JSONString.class)); assertThat(hit.getField("jsonstring_field").toString(), equalTo("")); + // Empty tensors are represented by null because we don't have type information here to create the right empty tensor + assertNull(hit.getField("tensor_field1")); + assertNull(hit.getField("tensor_field2")); + } + + private DocsumDefinitionSet createDocsumDefinitionSet(String configID, LegacyEmulationConfig legacyEmulationConfig) { + DocumentdbInfoConfig config = new ConfigGetter<>(DocumentdbInfoConfig.class).getConfig(configID); + return new DocsumDefinitionSet(config.documentdb(0), legacyEmulationConfig); } @Test @@ -127,22 +82,39 @@ public void testDecodingEmptyWithoutForcedFill() { assertThat(hit.getField("xmlstring_field"), equalTo(null)); assertThat(hit.getField("xmlstring_field"), equalTo(null)); assertThat(hit.getField("jsonstring_field"), equalTo(null)); + assertNull(hit.getField("tensor_field1")); + assertNull(hit.getField("tensor_field2")); + } + + private byte[] makeEmptyDocsum() { + Slime slime = new Slime(); + Cursor docsum = slime.setObject(); + byte[] tmp = BinaryFormat.encode(slime); + ByteBuffer buf = ByteBuffer.allocate(tmp.length + 4); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.putInt(DocsumDefinitionSet.SLIME_MAGIC_ID); + buf.order(ByteOrder.BIG_ENDIAN); + buf.put(tmp); + return buf.array(); } @Test public void testDecoding() { + Tensor tensor1 = Tensor.from("tensor(x{},y{}):{{x:foo,y:bar}:0.1}"); + Tensor tensor2 = Tensor.from("tensor(x[],y[1]):{{x:0,y:0}:-0.3}"); + String summary_cf = "file:src/test/java/com/yahoo/prelude/fastsearch/summary.cfg"; DocsumDefinitionSet set = createDocsumDefinitionSet(summary_cf); - byte[] docsum = makeDocsum(); + byte[] docsum = makeDocsum(tensor1, tensor2); FastHit hit = new FastHit(); set.lazyDecode("default", docsum, hit); - assertThat(hit.getField("integer_field"), equalTo((Object)new Integer(4))); - assertThat(hit.getField("short_field"), equalTo((Object)new Short((short)2))); - assertThat(hit.getField("byte_field"), equalTo((Object)new Byte((byte)1))); - assertThat(hit.getField("float_field"), equalTo((Object)new Float(4.5f))); - assertThat(hit.getField("double_field"), equalTo((Object)new Double(8.75))); - assertThat(hit.getField("int64_field"), equalTo((Object)new Long(8L))); - assertThat(hit.getField("string_field"), equalTo((Object)"string_value")); + assertThat(hit.getField("integer_field"), equalTo(4)); + assertThat(hit.getField("short_field"), equalTo((short)2)); + assertThat(hit.getField("byte_field"), equalTo((byte)1)); + assertThat(hit.getField("float_field"), equalTo(4.5f)); + assertThat(hit.getField("double_field"), equalTo(8.75)); + assertThat(hit.getField("int64_field"), equalTo(8L)); + assertThat(hit.getField("string_field"), equalTo("string_value")); assertThat(hit.getField("data_field"), instanceOf(RawData.class)); assertThat(hit.getField("data_field").toString(), equalTo("data_value")); assertThat(hit.getField("longstring_field"), equalTo((Object)"longstring_value")); @@ -168,5 +140,43 @@ public void testDecoding() { assertThat(value.field("foo").asLong(), equalTo(1L)); assertThat(value.field("bar").asLong(), equalTo(2L)); } + assertEquals(tensor1, hit.getField("tensor_field1")); + assertEquals(tensor2, hit.getField("tensor_field2")); + } + + private DocsumDefinitionSet createDocsumDefinitionSet(String configID) { + DocumentdbInfoConfig config = new ConfigGetter<>(DocumentdbInfoConfig.class).getConfig(configID); + return new DocsumDefinitionSet(config.documentdb(0)); + } + + private byte[] makeDocsum(Tensor tensor1, Tensor tensor2) { + Slime slime = new Slime(); + Cursor docsum = slime.setObject(); + docsum.setLong("integer_field", 4); + docsum.setLong("short_field", 2); + docsum.setLong("byte_field", 1); + docsum.setDouble("float_field", 4.5); + docsum.setDouble("double_field", 8.75); + docsum.setLong("int64_field", 8); + docsum.setString("string_field", "string_value"); + docsum.setData("data_field", "data_value".getBytes(StandardCharsets.UTF_8)); + docsum.setString("longstring_field", "longstring_value"); + docsum.setData("longdata_field", "longdata_value".getBytes(StandardCharsets.UTF_8)); + docsum.setString("xmlstring_field", "xmlstring_value"); + { + Cursor field = docsum.setObject("jsonstring_field"); + field.setLong("foo", 1); + field.setLong("bar", 2); + } + docsum.setData("tensor_field1", TypedBinaryFormat.encode(tensor1)); + docsum.setData("tensor_field2", TypedBinaryFormat.encode(tensor2)); + byte[] tmp = BinaryFormat.encode(slime); + ByteBuffer buf = ByteBuffer.allocate(tmp.length + 4); + buf.order(ByteOrder.LITTLE_ENDIAN); + buf.putInt(DocsumDefinitionSet.SLIME_MAGIC_ID); + buf.order(ByteOrder.BIG_ENDIAN); + buf.put(tmp); + return buf.array(); } + } diff --git a/container-search/src/test/java/com/yahoo/prelude/fastsearch/summary.cfg b/container-search/src/test/java/com/yahoo/prelude/fastsearch/summary.cfg index a188754db19..e46904b17d0 100644 --- a/container-search/src/test/java/com/yahoo/prelude/fastsearch/summary.cfg +++ b/container-search/src/test/java/com/yahoo/prelude/fastsearch/summary.cfg @@ -3,7 +3,7 @@ documentdb[0].name test documentdb[0].summaryclass[1] documentdb[0].summaryclass[0].name default documentdb[0].summaryclass[0].id 0 -documentdb[0].summaryclass[0].fields[12] +documentdb[0].summaryclass[0].fields[14] documentdb[0].summaryclass[0].fields[0].name integer_field documentdb[0].summaryclass[0].fields[0].type integer documentdb[0].summaryclass[0].fields[1].name short_field @@ -28,3 +28,7 @@ documentdb[0].summaryclass[0].fields[10].name xmlstring_field documentdb[0].summaryclass[0].fields[10].type xmlstring documentdb[0].summaryclass[0].fields[11].name jsonstring_field documentdb[0].summaryclass[0].fields[11].type jsonstring +documentdb[0].summaryclass[0].fields[12].name tensor_field1 +documentdb[0].summaryclass[0].fields[12].type tensor +documentdb[0].summaryclass[0].fields[13].name tensor_field2 +documentdb[0].summaryclass[0].fields[13].type tensor diff --git a/document/src/main/java/com/yahoo/document/TensorDataType.java b/document/src/main/java/com/yahoo/document/TensorDataType.java index dbaf6ee7763..808d2612825 100644 --- a/document/src/main/java/com/yahoo/document/TensorDataType.java +++ b/document/src/main/java/com/yahoo/document/TensorDataType.java @@ -41,7 +41,7 @@ public boolean isValueCompatible(FieldValue value) { if (value == null) return false; if ( ! TensorFieldValue.class.isAssignableFrom(value.getClass())) return false; TensorFieldValue tensorValue = (TensorFieldValue)value; - return tensorValue.getDataType().getTensorType().isAssignableTo(tensorType); + return tensorType.isAssignableTo(tensorValue.getDataType().getTensorType()); } /** Returns the type of the tensor this field can hold */ diff --git a/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java index 9d8e9a83b5e..c013dde6757 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java +++ b/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java @@ -70,7 +70,7 @@ public void assign(Object o) { } public void assignTensor(Optional tensor) { - if (tensor.isPresent() && ! dataType.getTensorType().isAssignableTo(tensor.get().type())) + if (tensor.isPresent() && ! tensor.get().type().isAssignableTo(dataType.getTensorType())) throw new IllegalArgumentException("Type mismatch: Cannot assign tensor of type " + tensor.get().type() + " to field of type " + dataType.getTensorType()); this.tensor = tensor; diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java index 6e9495b1437..ec0d1bce406 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java @@ -278,7 +278,8 @@ public void read(FieldBase field, TensorFieldValue value) { int encodedTensorLength = buf.getInt1_4Bytes(); if (encodedTensorLength > 0) { byte[] encodedTensor = getBytes(null, encodedTensorLength); - value.assign(TypedBinaryFormat.decode(value.getDataType().getTensorType(), encodedTensor)); + value.assign(TypedBinaryFormat.decode(Optional.of(value.getDataType().getTensorType()), + GrowableByteBuffer.wrap(encodedTensor))); } else { value.clear(); } diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index 8bfe7b7d919..b22f33a5fd0 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -233,6 +233,38 @@ TEST("requireThatStringFieldValueCanBeSerialized") { TEST_DO(checkStringFieldValueWithAnnotation()); } +TEST("require that strings can be redesrialized") { + StringFieldValue value("foo"); + nbostream streamNotAnnotated; + VespaDocumentSerializer serializer(streamNotAnnotated); + serializer.write(value); + + Span::UP root(new Span(2, 3)); + SpanTree::UP tree(new SpanTree("test", std::move(root))); + AnnotationType annotation_type(42, "foo_type"); + tree->annotate(tree->getRoot(), annotation_type); + + setSpanTree(value, *tree); + + nbostream streamAnnotated; + VespaDocumentSerializer serializerAnnotated(streamAnnotated); + serializerAnnotated.write(value); + + StringFieldValue deserialized; + { + VespaDocumentDeserializer deserializer(repo, streamAnnotated, 8); + deserializer.read(deserialized); + } + EXPECT_EQUAL("foo", deserialized.getValueRef()); + EXPECT_TRUE(deserialized.hasSpanTrees()); + { + VespaDocumentDeserializer deserializer(repo, streamNotAnnotated, 8); + deserializer.read(deserialized); + } + EXPECT_EQUAL("foo", deserialized.getValueRef()); + EXPECT_FALSE(deserialized.hasSpanTrees()); +} + template void checkRawFieldValue(const string &val) { RawFieldValue value(val); diff --git a/document/src/vespa/document/fieldvalue/stringfieldvalue.cpp b/document/src/vespa/document/fieldvalue/stringfieldvalue.cpp index c37edbfea5f..04580f906fa 100644 --- a/document/src/vespa/document/fieldvalue/stringfieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/stringfieldvalue.cpp @@ -80,6 +80,11 @@ StringFieldValue::SpanTrees StringFieldValue::getSpanTrees() const { return trees; } +void +StringFieldValue::doClearSpanTrees() { + _annotationData.reset(); +} + const SpanTree * StringFieldValue::findTree(const SpanTrees & trees, const stringref & name) { for(const auto & tree : trees) { diff --git a/document/src/vespa/document/fieldvalue/stringfieldvalue.h b/document/src/vespa/document/fieldvalue/stringfieldvalue.h index 35981c5ba30..bb3c8ddf9d5 100644 --- a/document/src/vespa/document/fieldvalue/stringfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/stringfieldvalue.h @@ -47,10 +47,16 @@ class StringFieldValue : public LiteralFieldValuehasSpanTrees() : false; } static const SpanTree *findTree(const SpanTrees &trees, const vespalib::stringref &name); + void clearSpanTrees() { + if (_annotationData) { + doClearSpanTrees(); + } + } using LiteralFieldValueB::operator=; DECLARE_IDENTIFIABLE(StringFieldValue); private: + void doClearSpanTrees(); class AnnotationData { public: diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp index 82b1c4e03ed..0ee9c7f4b32 100644 --- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp @@ -278,6 +278,8 @@ void VespaDocumentDeserializer::read(StringFieldValue &value) { value.setSpanTrees(vespalib::ConstBufferRef(_stream.peek(), serializedAnnotationsSize), _repo, _version, _stream.isLongLivedBuffer()); _stream.adjustReadPos(serializedAnnotationsSize); + } else { + value.clearSpanTrees(); } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/RunVespaLocal.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/RunVespaLocal.java index a62d1fdc57b..b6c9675f076 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/RunVespaLocal.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/RunVespaLocal.java @@ -48,7 +48,7 @@ public class RunVespaLocal { private final Docker docker; private final Logger logger = Logger.getLogger("RunVespaLocal"); - RunVespaLocal() { + public RunVespaLocal() { this.docker = DockerTestUtils.getDocker(); } @@ -56,7 +56,7 @@ public class RunVespaLocal { * Pulls the base image and builds the vespa-local image * @param vespaBaseImage Vespa docker image to use as base for the image that the config-server and nodes will run */ - void buildVespaLocalImage(DockerImage vespaBaseImage) throws ExecutionException, InterruptedException, IOException { + public void buildVespaLocalImage(DockerImage vespaBaseImage) throws ExecutionException, InterruptedException, IOException { if (!docker.imageIsDownloaded(vespaBaseImage)) { logger.info("Pulling " + vespaBaseImage.asString() + " (This may take a while)"); docker.pullImageAsync(vespaBaseImage).get(); @@ -69,7 +69,7 @@ void buildVespaLocalImage(DockerImage vespaBaseImage) throws ExecutionException, /** * Starts config server, provisions numNodesToProvision and puts them in ready state */ - void startLocalZoneWithNodes(int numNodesToProvision) throws IOException { + public void startLocalZoneWithNodes(int numNodesToProvision) throws IOException { logger.info("Starting config-server"); LocalZoneUtils.startConfigServerIfNeeded(docker, environmentBuilder.build()); @@ -89,7 +89,7 @@ void startLocalZoneWithNodes(int numNodesToProvision) throws IOException { * be stored, the path must exist and must be writeable by user, * normally /home/docker/container-storage */ - void startNodeAdminInIDE(PathResolver pathResolver) { + public void startNodeAdminInIDE(PathResolver pathResolver) { logger.info("Starting node-admin"); environmentBuilder.pathResolver(pathResolver); new ComponentsProviderImpl( @@ -105,7 +105,7 @@ void startNodeAdminInIDE(PathResolver pathResolver) { * @param pathToContainerStorage Path to where the container data will be stored, the path must exist and must * be writeable by user, normally /home/docker/container-storage */ - void startNodeAdminAsContainer(Path pathToNodeAdminApp, Path pathToContainerStorage) throws UnknownHostException { + public void startNodeAdminAsContainer(Path pathToNodeAdminApp, Path pathToContainerStorage) throws UnknownHostException { logger.info("Starting node-admin"); String parentHostHostname = LocalZoneUtils.NODE_ADMIN_HOSTNAME; LocalZoneUtils.startNodeAdminIfNeeded(docker, environmentBuilder.build(), pathToContainerStorage); @@ -130,7 +130,7 @@ void startNodeAdminAsContainer(Path pathToNodeAdminApp, Path pathToContainerStor * Packages, deploys an app and waits for the node to come up * @param pathToApp Path to the directory of the application to deploy */ - void deployApplication(Path pathToApp) { + public void deployApplication(Path pathToApp) { logger.info("Packaging application"); LocalZoneUtils.packageApp(pathToApp); logger.info("Deploying application"); @@ -146,7 +146,7 @@ void deployApplication(Path pathToApp) { } } - void deleteApplication() { + public void deleteApplication() { logger.info("Deleting application"); LocalZoneUtils.deleteApplication(); } diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index a4b297ffb69..c9ceda982e0 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -2,7 +2,6 @@ #include "docentry.h" #include -#include #include namespace storage { diff --git a/persistencetypes/src/persistence/spi/types.h b/persistencetypes/src/persistence/spi/types.h index 5f93935dc2b..d88fc5eb5ae 100644 --- a/persistencetypes/src/persistence/spi/types.h +++ b/persistencetypes/src/persistence/spi/types.h @@ -95,10 +95,10 @@ using DocumentId = document::DocumentId; using GlobalId = document::GlobalId; using TimestampList = std::vector; using string = vespalib::string; -using DocumentUP = std::unique_ptr; -using DocumentIdUP = std::unique_ptr; -using DocumentSP = std::shared_ptr; -using DocumentUpdateSP = std::shared_ptr; +using DocumentUP = std::unique_ptr; +using DocumentIdUP = std::unique_ptr; +using DocumentSP = std::shared_ptr; +using DocumentUpdateSP = std::shared_ptr; enum IncludedVersions { NEWEST_DOCUMENT_ONLY, diff --git a/searchcore/src/tests/proton/docsummary/docsummary.cpp b/searchcore/src/tests/proton/docsummary/docsummary.cpp index a0b947e11f8..d0705e7b538 100644 --- a/searchcore/src/tests/proton/docsummary/docsummary.cpp +++ b/searchcore/src/tests/proton/docsummary/docsummary.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include LOG_SETUP("docsummary_test"); @@ -300,10 +302,18 @@ class Test : public vespalib::TestApp uint32_t id, uint32_t resultClassID); + void + assertTensor(const Tensor::UP &exp, + const std::string &fieldName, + const DocsumReply &reply, + uint32_t id, + uint32_t resultClassID); + bool assertSlime(const std::string &exp, const DocsumReply &reply, - uint32_t id); + uint32_t id, + bool relaxed = false); void requireThatAdapterHandlesAllFieldTypes(); @@ -400,8 +410,26 @@ Test::assertString(const std::string & exp, const std::string & fieldName, } +void +Test::assertTensor(const Tensor::UP & exp, const std::string & fieldName, + const DocsumReply & reply, + uint32_t id, uint32_t resultClassID) +{ + GeneralResultPtr res = getResult(reply, id, resultClassID); + const void *data = res->GetEntry(fieldName.c_str())->_stringval; + size_t len = res->GetEntry(fieldName.c_str())->_stringlen; + EXPECT_EQUAL(exp.get() == nullptr, len == 0u); + if (exp) { + vespalib::nbostream serialized(data, len); + Tensor::UP tensor = vespalib::tensor::TypedBinaryFormat::deserialize(serialized); + EXPECT_TRUE(tensor.get() != nullptr); + EXPECT_EQUAL(*exp, *tensor); + } +} + + bool -Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id) +Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id, bool relaxed) { const DocsumReply::Docsum & docsum = reply.docsums[id]; uint32_t classId; @@ -414,6 +442,14 @@ Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id) size_t decodeRes = vespalib::slime::BinaryFormat::decode(serialized, slime); ASSERT_EQUAL(decodeRes, serialized.size); + if (relaxed) { + vespalib::slime::SimpleBuffer buf; + vespalib::slime::JsonFormat::encode(slime, buf, false); + vespalib::Slime tmpSlime; + size_t used = vespalib::slime::JsonFormat::decode(buf.get(), tmpSlime); + EXPECT_EQUAL(buf.get().size, used); + slime = std::move(tmpSlime); + } vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); EXPECT_EQUAL(exp.size(), used); @@ -726,7 +762,7 @@ Test::requireThatAttributesAreUsed() endElement(). endField(). startAttributeField("bj"). - addTensor(createTensor({ {{}, 3} }, { "x", "y"})). + addTensor(createTensor({ {{{"x","f"},{"y","g"}}, 3} }, { "x", "y"})). endField(). endDocument(), 2); @@ -755,9 +791,8 @@ Test::requireThatAttributesAreUsed() *rep, 0, rclass)); EXPECT_TRUE(assertString("[[\"quux\",7],[\"qux\",6]]", "bi", *rep, 0, rclass)); - EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," - "\"cells\":[{\"address\":{},\"value\":3}]}", - "bj", *rep, 0, rclass)); + TEST_DO(assertTensor(createTensor({ {{{"x","f"},{"y","g"}}, 3} }, { "x", "y"}), + "bj", *rep, 0, rclass)); // empty doc EXPECT_TRUE(search::attribute::isUndefined @@ -771,7 +806,7 @@ Test::requireThatAttributesAreUsed() EXPECT_TRUE(assertString("[]", "bg", *rep, 1, rclass)); EXPECT_TRUE(assertString("[]", "bh", *rep, 1, rclass)); EXPECT_TRUE(assertString("[]", "bi", *rep, 1, rclass)); - EXPECT_TRUE(assertString("", "bj", *rep, 1, rclass)); + TEST_DO(assertTensor(Tensor::UP(), "bj", *rep, 1, rclass)); proton::IAttributeManager::SP attributeManager = dc._ddb->getReadySubDB()->getAttributeManager(); @@ -785,14 +820,13 @@ Test::requireThatAttributesAreUsed() attributeFieldWriter. execute("bj", [&]() { bjTensorAttr->setTensor(3, - *createTensor({ {{}, 4} }, { "x"})); + *createTensor({ {{{"x", "a"},{"y", "b"}}, 4} }, { "x"})); bjTensorAttr->commit(); }); attributeFieldWriter.sync(); DocsumReply::UP rep2 = dc._ddb->getDocsums(req); - EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," - "\"cells\":[{\"address\":{},\"value\":4}]}", - "bj", *rep2, 1, rclass)); + TEST_DO(assertTensor(createTensor({ {{{"x","a"},{"y","b"}}, 4} }, { "x", "y"}), + "bj", *rep2, 1, rclass)); DocsumRequest req3; req3.resultClassName = "class3"; @@ -802,9 +836,8 @@ Test::requireThatAttributesAreUsed() EXPECT_TRUE(assertSlime("{bd:[],be:[],bf:[],bg:[]," "bh:[],bi:[]," - "bj:{dimensions:['x','y']," - "cells:[{address:{},value:4.0}]}}", - *rep3, 0)); + "bj:'0x01020178017901016101624010000000000000'}", + *rep3, 0, true)); } diff --git a/searchcore/src/tests/proton/docsummary/summary.cfg b/searchcore/src/tests/proton/docsummary/summary.cfg index 52f300ae3e0..33fd90f4c82 100644 --- a/searchcore/src/tests/proton/docsummary/summary.cfg +++ b/searchcore/src/tests/proton/docsummary/summary.cfg @@ -85,7 +85,7 @@ classes[3].fields[7].type "jsonstring" classes[3].fields[8].name "bi" classes[3].fields[8].type "jsonstring" classes[3].fields[9].name "bj" -classes[3].fields[9].type "jsonstring" +classes[3].fields[9].type "tensor" classes[4].name "class4" classes[4].id 4 classes[4].fields[1] diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp index e9496093cfe..27a50c9c57f 100644 --- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -150,6 +150,7 @@ class Test : public vespalib::TestApp { void checkString(const string &str, const FieldValue *value); void checkData(const search::RawBuf &data, const FieldValue *value); + void checkTensor(const Tensor::UP &tensor, const FieldValue *value); template void checkArray(const char *(&str)[N], const FieldValue *value); void setSummaryField(const string &name); @@ -170,7 +171,7 @@ class Test : public vespalib::TestApp { void requireThatSearchDataTypeUsesDefaultDataTypes(); void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); void requireThatPredicateIsPrinted(); - void requireThatTensorIsPrinted(); + void requireThatTensorIsNotConverted(); const DocumentType &getDocType() const { return *_documentType; } Document makeDocument(); StringFieldValue annotateTerm(const string &term); @@ -245,7 +246,7 @@ Test::Main() TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes()); TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); TEST_CALL(requireThatPredicateIsPrinted()); - TEST_CALL(requireThatTensorIsPrinted()); + TEST_CALL(requireThatTensorIsNotConverted()); TEST_DONE(); } @@ -430,6 +431,17 @@ void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); } +void Test::checkTensor(const Tensor::UP &tensor, const FieldValue *value) { + ASSERT_TRUE(value); + const TensorFieldValue *s = dynamic_cast(value); + ASSERT_TRUE(s); + const Tensor::UP &tvalue = s->getAsTensorPtr(); + EXPECT_EQUAL(tensor.get() != nullptr, tvalue.get() != nullptr); + if (tensor) { + EXPECT_EQUAL(*tensor, *tvalue); + } +} + template void Test::checkArray(const char *(&str)[N], const FieldValue *value) { ASSERT_TRUE(value); @@ -649,7 +661,7 @@ createTensor(const TensorCells &cells, const TensorDimensions &dimensions) { } void -Test::requireThatTensorIsPrinted() +Test::requireThatTensorIsNotConverted() { TensorFieldValue tensorFieldValue; tensorFieldValue = createTensor({ {{{"x", "4"}, {"y", "5"}}, 7} }, @@ -658,30 +670,17 @@ Test::requireThatTensorIsPrinted() doc.setRepo(*_documentRepo); doc.setValue("tensor", tensorFieldValue); - FieldBlock expect1("{ dimensions: [ 'x', 'y' ], cells: [" - "{ address: { x:'4', y:'5' }, value: 7.0 }" - "] }"); - - TEST_CALL(checkString(expect1.json, + TEST_CALL(checkTensor(createTensor({ {{{"x", "4"}, {"y", "5"}}, 7} }, + {"x", "y"}), SFC::convertSummaryField(false, *doc.getValue("tensor"), - false).get())); - TEST_CALL(checkData(expect1.binary, - SFC::convertSummaryField(false, - *doc.getValue("tensor"), - true).get())); + true).get())); doc.setValue("tensor", TensorFieldValue()); - FieldBlock expect2("{ }"); - - TEST_CALL(checkString(expect2.json, + TEST_CALL(checkTensor(Tensor::UP(), SFC::convertSummaryField(false, *doc.getValue("tensor"), - false).get())); - TEST_CALL(checkData(expect2.binary, - SFC::convertSummaryField(false, - *doc.getValue("tensor"), - true).get())); + true).get())); } } // namespace diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp index 855126bd064..4f61873b938 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp @@ -3,11 +3,16 @@ #include "documentstoreadapter.h" #include "summaryfieldconverter.h" #include +#include +#include +#include +#include #include LOG_SETUP(".proton.docsummary.documentstoreadapter"); using namespace document; using namespace search::docsummary; +using vespalib::tensor::Tensor; namespace proton { @@ -76,6 +81,18 @@ DocumentStoreAdapter::writeField(const FieldValue &value, ResType type) std::pair buf = value.getAsRaw(); return _resultPacker.AddLongData(buf.first, buf.second); } + case RES_TENSOR: + { + vespalib::nbostream serialized; + if (value.getClass().inherits(TensorFieldValue::classId)) { + const TensorFieldValue &tvalue = static_cast(value); + const std::unique_ptr &tensor = tvalue.getAsTensorPtr(); + if (tensor) { + vespalib::tensor::TypedBinaryFormat::serialize(serialized, *tensor); + } + } + return _resultPacker.AddSerializedTensor(serialized.peek(), serialized.size()); + } default: LOG(warning, "Unknown docsum field type: %s. Add empty field", diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp index 0c4d739be79..c74442bd72b 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include @@ -472,7 +474,7 @@ class SummaryFieldValueConverter : protected ConstFieldValueVisitor } virtual void visit(const TensorFieldValue &value) override { - _field_value = _structuredFieldConverter.convert(value); + visitPrimitive(value); } public: @@ -632,12 +634,11 @@ class SlimeFiller : public ConstFieldValueVisitor { virtual void visit(const TensorFieldValue &value) override { const auto &tensor = value.getAsTensorPtr(); + vespalib::nbostream s; if (tensor) { - vespalib::tensor::SlimeBinaryFormat::serialize(_inserter, *tensor); - } else { - // No tensor value => empty object - _inserter.insertObject(); + vespalib::tensor::TypedBinaryFormat::serialize(s, *tensor); } + _inserter.insertData(vespalib::slime::Memory(s.peek(), s.size())); } public: diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index b0704b2d148..cb8a3dc7680 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -8,7 +8,8 @@ #include "attributedfw.h" #include "docsumstate.h" #include -#include +#include +#include #include #include @@ -44,7 +45,13 @@ ResType inferType(const IAttributeVector & vec) { } else if (vec.isFloatingPointType()) { retval = (fw == sizeof(float)) ? RES_FLOAT : RES_DOUBLE; } else { - retval = RES_STRING; + BasicType::Type t = vec.getBasicType(); + switch (t) { + case BasicType::TENSOR: + retval = RES_TENSOR; + default: + retval = RES_STRING; + } } } } @@ -133,34 +140,27 @@ SingleAttrDFW::WriteField(uint32_t docid, target->append(s, slen); return (sizeof(slen) + slen); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { + vespalib::nbostream str; BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { const tensor::TensorAttribute &tv = static_cast(v); const auto tensor = tv.getTensor(docid); - vespalib::string str; if (tensor) { - auto slime = - vespalib::tensor::SlimeBinaryFormat::serialize(*tensor); - vespalib::slime::SimpleBuffer buf; - vespalib::slime::JsonFormat::encode(*slime, buf, true); - str = buf.get().make_string(); - } else { - // No tensor value => empty object - str = ""; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); } - uint32_t slen = str.size(); - target->append(&slen, sizeof(slen)); - target->append(str.c_str(), slen); - return (sizeof(slen) + slen); } default: break; - }; + } + uint32_t slen = str.size(); + target->append(&slen, sizeof(slen)); + target->append(str.peek(), slen); + return (sizeof(slen) + slen); } - /* FALLTHROUGH */ + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: @@ -222,7 +222,7 @@ SingleAttrDFW::insertField(uint32_t docid, target.insertLong(val); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { @@ -230,17 +230,17 @@ SingleAttrDFW::insertField(uint32_t docid, static_cast(v); const auto tensor = tv.getTensor(docid); if (tensor) { - vespalib::tensor::SlimeBinaryFormat::serialize(target, *tensor); - } else { - // No tensor value => no object + vespalib::nbostream str; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); + target.insertData(vespalib::slime::Memory(str.peek(), str.size())); } - return; } default: - break; - }; + ; + } } - /* FALLTHROUGH */ + break; + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp index be188e9a871..446ca347bff 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp @@ -164,6 +164,7 @@ CopyDFW::insertField(uint32_t /*docid*/, target.insertString(value); break; } + case RES_TENSOR: case RES_LONG_DATA: case RES_DATA: { uint32_t len; @@ -259,6 +260,7 @@ CopyDFW::WriteField(uint32_t docid, break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp index a837fca3bdb..27b20f8b1bd 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp @@ -97,6 +97,7 @@ DocsumFormat::addEmpty(ResType type, search::RawBuf &target) case RES_LONG_DATA: case RES_XMLSTRING: case RES_JSONSTRING: + case RES_TENSOR: case RES_FEATUREDATA: return addLongData(target, "", 0); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index f790d38e70e..4273c89d7f5 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -151,6 +151,7 @@ DynamicDocsumWriter::RepackDocsum(GeneralResult *gres, written += slen; break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; uint32_t dlen = entry->_get_length(); @@ -304,6 +305,7 @@ static void convertEntry(GetDocsumsState *state, inserter.insertString(Memory(ptr, len)); break; case RES_DATA: + case RES_TENSOR: case RES_LONG_DATA: entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace); inserter.insertData(Memory(ptr, len)); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index e35408a796c..58a245a364a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -31,6 +31,7 @@ enum ResType { RES_LONG_DATA, RES_XMLSTRING, RES_JSONSTRING, + RES_TENSOR, RES_FEATUREDATA }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp index a08b0d11d5e..aa3029b9535 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp @@ -55,6 +55,7 @@ ResultConfig::GetResTypeName(ResType type) case RES_LONG_DATA: return "longdata"; case RES_XMLSTRING: return "xmlstring"; case RES_JSONSTRING: return "jsonstring"; + case RES_TENSOR: return "tensor"; case RES_FEATUREDATA: return "featuredata"; } return "unknown-type"; @@ -172,6 +173,8 @@ ResultConfig::ReadConfig(const vespa::config::search::SummaryConfig &cfg, const rc = resClass->AddConfigEntry(fieldname, RES_XMLSTRING); } else if (strcmp(fieldtype, "jsonstring") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_JSONSTRING); + } else if (strcmp(fieldtype, "tensor") == 0) { + rc = resClass->AddConfigEntry(fieldname, RES_TENSOR); } else if (strcmp(fieldtype, "featuredata") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_FEATUREDATA); } else { // FAIL: unknown field type diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h index 7082a5636cc..4ae1b14e486 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h @@ -164,6 +164,8 @@ class ResultConfig case RES_DATA: case RES_LONG_DATA: return (b == RES_DATA || b == RES_LONG_DATA); + case RES_TENSOR: + return (b == RES_TENSOR); case RES_FEATUREDATA: return (b == RES_FEATUREDATA); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp index 83d504f2429..bda2f5c1b74 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp @@ -121,6 +121,7 @@ ResultPacker::AddEmpty() case RES_JSONSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: return AddLongString(NULL, 0); + case RES_TENSOR: return AddSerializedTensor(NULL, 0); case RES_LONG_DATA: return AddLongData(NULL, 0); } } @@ -250,6 +251,17 @@ ResultPacker::AddLongData(const char *buf, uint32_t buflen) } +bool +ResultPacker::AddSerializedTensor(const char *buf, uint32_t buflen) +{ + if (CheckEntry(RES_TENSOR)) { + _buf.append(&buflen, sizeof(buflen)); + _buf.append(buf, buflen); + } + return !_error; +} + + bool ResultPacker::GetDocsumBlob(const char **buf, uint32_t *buflen) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h index 8280ebe0980..634084fea6a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h @@ -230,6 +230,14 @@ class ResultPacker **/ bool AddLongData(const char *buf, uint32_t buflen); + /* + * Add a 'tensor' field to the docsum blob we are currently creating. + * + * @return true(ok)/false(error). + * @param buf pointer to serialized tensor to add. + * @param buflen length of serialized tensor to add. + **/ + bool AddSerializedTensor(const char *buf, uint32_t buflen); /** * Obtain a pointer to, and the length of, the created docsum diff --git a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp index 92ebe07d457..47cc0cf3a33 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp @@ -419,6 +419,7 @@ GeneralResult::unpack(const char *buf, const size_t buflen) break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t ldlen; diff --git a/storage/src/vespa/storage/visiting/visitor.h b/storage/src/vespa/storage/visiting/visitor.h index cea4a590f3f..bdf301a0d94 100644 --- a/storage/src/vespa/storage/visiting/visitor.h +++ b/storage/src/vespa/storage/visiting/visitor.h @@ -11,20 +11,20 @@ #pragma once -#include -#include +#include "visitormessagesession.h" +#include "memory_bounded_trace.h" #include #include #include #include #include -#include -#include #include #include #include #include #include +#include +#include namespace document { class Document; diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp index 3875122d9e8..8c0d2f5ac35 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp @@ -5,8 +5,6 @@ #include #include #include -#include -#include using namespace document; using namespace search::fef; @@ -64,7 +62,7 @@ void HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const char *sortData, size_t sortDataSize) { document::Document::UP doc(new document::Document(_docType, DocumentId("doc::"))); - StorageDocument::SP sdoc(new StorageDocument(std::move(doc))); + StorageDocument::LP sdoc(new StorageDocument(std::move(doc), SharedFieldPathMap(), 0)); ASSERT_TRUE(sdoc->valid()); MatchData md(MatchData::params()); hc.addHit(sdoc, docId, md, score, sortData, sortDataSize); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index e583fe55a51..f65d4a1d100 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -2,7 +2,6 @@ #include "hitcollector.h" #include -#include #include LOG_SETUP(".searchvisitor.hitcollector"); @@ -32,14 +31,14 @@ HitCollector::getDocSum(const search::DocumentIdT & docId) const } bool -HitCollector::addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score) +HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score) { Hit h(doc, docId, data, score); return addHit(h); } bool -HitCollector::addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, +HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score, const void * sortData, size_t sortDataLen) { Hit h(doc, docId, data, score, sortData, sortDataLen); @@ -109,17 +108,17 @@ void HitCollector::fillSearchResult(vdslib::SearchResult & searchResult) { sortByDocId(); - for (HitVector::const_iterator it(_hits.begin()), mt(_hits.end()); it != mt; it++) { - vespalib::string documentId(it->getDocument()->docDoc().getId().toString()); - search::DocumentIdT docId = it->getDocId(); - SearchResult::RankType rank = it->getRankScore(); + for (const Hit & hit : _hits) { + vespalib::string documentId(hit.getDocument()->docDoc().getId().toString()); + search::DocumentIdT docId = hit.getDocId(); + SearchResult::RankType rank = hit.getRankScore(); LOG(debug, "fillSearchResult: gDocId(%s), lDocId(%u), rank(%f)", documentId.c_str(), docId, (float)rank); - if (it->getSortBlob().empty()) { + if (hit.getSortBlob().empty()) { searchResult.addHit(docId, documentId.c_str(), rank); } else { - searchResult.addHit(docId, documentId.c_str(), rank, it->getSortBlob().c_str(), it->getSortBlob().size()); + searchResult.addHit(docId, documentId.c_str(), rank, hit.getSortBlob().c_str(), hit.getSortBlob().size()); } } } @@ -138,9 +137,9 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram, names.emplace_back(resolver.name_of(i)); } FeatureSet::SP retval = FeatureSet::SP(new FeatureSet(names, _hits.size())); - for (HitVector::iterator it(_hits.begin()), mt(_hits.end()); it != mt; ++it) { - rankProgram.run(it->getDocId(), it->getMatchData()); - uint32_t docId = it->getDocId(); + for (const Hit & hit : _hits) { + rankProgram.run(hit.getDocId(), hit.getMatchData()); + uint32_t docId = hit.getDocId(); search::feature_t * f = retval->getFeaturesByIndex(retval->addDocId(docId)); for (uint32_t j = 0; j < names.size(); ++j) { f[j] = *resolver.resolve_number(j); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index ea4a20c187b..5f9d1d4a4bb 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -22,7 +22,7 @@ class HitCollector : public vsm::IDocSumCache class Hit { public: - Hit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & matchData, + Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score, const void * sortData, size_t sortDataLen) : _docid(docId), _score(score), @@ -35,10 +35,10 @@ class HitCollector : public vsm::IDocSumCache _matchData.emplace_back(*matchData.resolveTermField(handle)); } } - Hit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score) + Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score) : Hit(doc, docId, matchData, score, nullptr, 0) {} search::DocumentIdT getDocId() const { return _docid; } - const vsm::StorageDocument::SP & getDocument() const { return _document; } + const vsm::StorageDocument::LP & getDocument() const { return _document; } const std::vector &getMatchData() const { return _matchData; } search::feature_t getRankScore() const { return _score; } const vespalib::string & getSortBlob() const { return _sortBlob; } @@ -70,7 +70,7 @@ class HitCollector : public vsm::IDocSumCache private: uint32_t _docid; double _score; - vsm::StorageDocument::SP _document; + vsm::StorageDocument::LP _document; std::vector _matchData; vespalib::string _sortBlob; }; @@ -104,7 +104,7 @@ class HitCollector : public vsm::IDocSumCache * @param data The match data for the hit. * @return true if the document was added to the heap **/ - bool addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score); + bool addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score); /** * Adds a hit to this hit collector. @@ -118,7 +118,7 @@ class HitCollector : public vsm::IDocSumCache * @param sortDataLen The length of the sortdata. * @return true if the document was added to the heap **/ - bool addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, + bool addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score, const void * sortData, size_t sortDataLen); /** diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 061194ec99f..40c1d1f46db 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -4,27 +4,12 @@ #include "searchenvironment.h" #include "searchvisitor.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include #include #include -#include -#include -#include #include -#include -#include #include #include @@ -33,37 +18,16 @@ LOG_SETUP(".visitor.instance.searchvisitor"); namespace storage { -using vsm::VSMAdapter; using vsm::DocsumFilter; -using vsm::DocsumTools; -using vsm::DocsumToolsPtr; -using vsm::DocSumCache; -using vsm::FieldIdTSearcherMap; -using vsm::FieldPathMapT; -using vsm::FieldSearcher; -using vsm::FieldSearchSpecMap; -using vsm::VsmfieldsHandle; using vsm::FieldPath; using vsm::StorageDocument; using vsm::StringFieldIdTMap; -using search::IAttributeManager; using search::AttributeGuard; -using search::AttributeManager; using search::AttributeVector; using search::attribute::IAttributeVector; -using search::EmptyQueryNodeResult; -using search::Query; -using search::QueryPacketT; -using search::FeatureSet; -using search::fs4transport::FS4Packet_DOCSUM; -using search::fs4transport::FS4Packet_EOL; -using search::fs4transport::PacketArray; -using namespace search::docsummary; -using namespace search::aggregation; -using namespace search::expression; +using search::aggregation::HitsAggregationResult; +using search::expression::ConfigureStaticParams; using vdslib::Parameters; -using vdslib::DocumentList; - class ForceWordfolderInit { @@ -87,8 +51,7 @@ createMultiValueAttribute(const vespalib::string & name, const document::FieldVa { const document::DataType * ndt = fv.getDataType(); if (ndt->inherits(document::CollectionDataType::classId)) { - ndt = &(static_cast(ndt)) - ->getNestedType(); + ndt = &(static_cast(ndt))->getNestedType(); } LOG(debug, "Create %s attribute '%s' with data type '%s' (%s)", arrayType ? "array" : "weighted set", name.c_str(), ndt->getName().c_str(), fv.getClass().name()); @@ -133,7 +96,7 @@ createAttribute(const vespalib::string & name, const document::FieldValue & fv) } SearchVisitor::SummaryGenerator::SummaryGenerator() : - search::aggregation::HitsAggregationResult::SummaryGenerator(), + HitsAggregationResult::SummaryGenerator(), _callback(), _docsumState(_callback), _docsumFilter(), @@ -142,7 +105,7 @@ SearchVisitor::SummaryGenerator::SummaryGenerator() : { } -vespalib::ConstBufferRef SearchVisitor::SummaryGenerator::fillSummary(search::AttributeVector::DocId lid, const search::aggregation::HitsAggregationResult::SummaryClassType & summaryClass) +vespalib::ConstBufferRef SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const HitsAggregationResult::SummaryClassType & summaryClass) { if (_docsumWriter != NULL) { _rawBuf.reset(); @@ -155,14 +118,14 @@ vespalib::ConstBufferRef SearchVisitor::SummaryGenerator::fillSummary(search::At void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) { - search::aggregation::HitsAggregationResult & hitsAggr(static_cast(obj)); + HitsAggregationResult & hitsAggr(static_cast(obj)); hitsAggr.setSummaryGenerator(_summaryGenerator); _numHitsAggregators++; } bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const { - return obj.getClass().inherits(search::aggregation::HitsAggregationResult::classId); + return obj.getClass().inherits(HitsAggregationResult::classId); } SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) : @@ -320,7 +283,7 @@ void SearchVisitor::init(const Parameters & params) if ( params.get("query", queryBlob) ) { LOG(spam, "Received query blob of %zd bytes", queryBlob.size()); QueryTermData resultAddOn; - _query = Query(resultAddOn, QueryPacketT(static_cast(queryBlob.data()), queryBlob.size())); + _query = search::Query(resultAddOn, search::QueryPacketT(queryBlob.data(), queryBlob.size())); LOG(debug, "Query tree: '%s'", _query.asString().c_str()); _searchBuffer->reserve(0x10000); @@ -464,7 +427,7 @@ SearchVisitor::RankController::processHintedAttributes(const IndexEnvironment & found = (attributeFields[j]._field == fid); } if (!found) { - search::AttributeGuard::UP attr(attrMan.getAttribute(name)); + AttributeGuard::UP attr(attrMan.getAttribute(name)); if (attr->valid()) { LOG(debug, "Add attribute '%s' with field id '%u' to the list of needed attributes", name.c_str(), fid); attributeFields.push_back(AttrInfo(fid, std::move(attr))); @@ -558,23 +521,24 @@ bool SearchVisitor::RankController::collectMatchedDocument(bool hasSorting, SearchVisitor & visitor, const std::vector & tmpSortBuffer, - const vsm::StorageDocument::SP & document) + const StorageDocument::LP & document) { bool amongTheBest(false); uint32_t docId = _rankProcessor->getDocId(); if (!hasSorting) { - amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), _rankProcessor->getRankScore()); + amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), + _rankProcessor->getRankScore()); if (amongTheBest && _dumpFeatures) { - _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore()); + _dumpProcessor->getHitCollector().addHit(StorageDocument::LP(NULL), docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore()); } } else { size_t pos = visitor.fillSortBuffer(); LOG(spam, "SortBlob is %ld bytes", pos); - amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), _rankProcessor->getRankScore(), - &tmpSortBuffer[0], pos); + amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), + _rankProcessor->getRankScore(), &tmpSortBuffer[0], pos); if (amongTheBest && _dumpFeatures) { - _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore(), - &tmpSortBuffer[0], pos); + _dumpProcessor->getHitCollector().addHit(StorageDocument::LP(NULL), docId, _dumpProcessor->getMatchData(), + _dumpProcessor->getRankScore(), &tmpSortBuffer[0], pos); } } return amongTheBest; @@ -617,13 +581,13 @@ SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldR } void -SearchVisitor::SyntheticFieldsController::onDocument(vsm::StorageDocument & document) +SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument & document) { (void) document; } void -SearchVisitor::SyntheticFieldsController::onDocumentMatch(vsm::StorageDocument & document, +SearchVisitor::SyntheticFieldsController::onDocumentMatch(StorageDocument & document, const vespalib::string & documentId) { document.setField(_documentIdFId, document::FieldValue::UP(new document::StringFieldValue(documentId))); @@ -633,9 +597,9 @@ void SearchVisitor::registerAdditionalFields(const std::vector & docsumSpec, std::vector & fieldList) { - for (size_t i = 0; i < docsumSpec.size(); ++i) { - fieldList.push_back(docsumSpec[i].getOutputName()); - const std::vector & inputNames = docsumSpec[i].getInputNames(); + for (const vsm::DocsumTools::FieldSpec & spec : docsumSpec) { + fieldList.push_back(spec.getOutputName()); + const std::vector & inputNames = spec.getInputNames(); for (size_t j = 0; j < inputNames.size(); ++j) { fieldList.push_back(inputNames[j]); if (document::PositionDataType::isZCurveFieldName(inputNames[j])) { @@ -691,11 +655,10 @@ SearchVisitor::setupScratchDocument(const StringFieldIdTMap & fieldsInQuery) _fieldSearchSpecMap.documentTypeMap().size()); } _fieldsUnion = fieldsInQuery.map(); - for(vsm::StringFieldIdTMapT::const_iterator it(_fieldSearchSpecMap.nameIdMap().map().begin()), - mt(_fieldSearchSpecMap.nameIdMap().map().end()); it != mt; it++) { - if (_fieldsUnion.find(it->first) == _fieldsUnion.end()) { - LOG(debug, "Adding field '%s' from _fieldSearchSpecMap", it->first.c_str()); - _fieldsUnion[it->first] = it->second; + for(const auto & entry :_fieldSearchSpecMap.nameIdMap().map()) { + if (_fieldsUnion.find(entry.first) == _fieldsUnion.end()) { + LOG(debug, "Adding field '%s' from _fieldSearchSpecMap", entry.first.c_str()); + _fieldsUnion[entry.first] = entry.second; } } // Init based on default document type and mapping from field name to field id @@ -707,7 +670,8 @@ SearchVisitor::setupScratchDocument(const StringFieldIdTMap & fieldsInQuery) void SearchVisitor::setupDocsumObjects() { - std::unique_ptr docsumFilter(new DocsumFilter(_vsmAdapter->getDocsumTools(), _rankController.getRankProcessor()->getHitCollector())); + std::unique_ptr docsumFilter(new DocsumFilter(_vsmAdapter->getDocsumTools(), + _rankController.getRankProcessor()->getHitCollector())); docsumFilter->init(_fieldSearchSpecMap.nameIdMap(), *_fieldPathMap); docsumFilter->setSnippetModifiers(_snippetModifierManager.getModifiers()); _summaryGenerator.setFilter(std::move(docsumFilter)); @@ -747,53 +711,52 @@ SearchVisitor::setupDocsumObjects() void SearchVisitor::setupAttributeVectors() { - const FieldPathMapT & fm = *_fieldPathMap; - for (FieldPathMapT::const_iterator it(fm.begin()), mt(fm.end()); it != mt; it++) { - if ( ! it->empty() ) { - vespalib::string attrName(it->front().getName()); - for (FieldPath::const_iterator ft(it->begin()+1), fmt(it->end()); ft != fmt; ft++) { - attrName.append("."); - attrName.append(ft->getName()); - } + for (const FieldPath & fieldPath : *_fieldPathMap) { + if ( ! fieldPath.empty() ) { + setupAttributeVector(fieldPath); + } + } +} - enum FieldDataType { - OTHER = 0, - ARRAY, - WSET - }; - FieldDataType typeSeen = OTHER; - for (FieldPath::const_iterator ft(it->begin()), fmt(it->end()); ft != fmt; ft++) { - int dataTypeId(ft->getDataType().getClass().id()); - if (dataTypeId == document::ArrayDataType::classId) { - typeSeen = ARRAY; - } else if (dataTypeId == document::MapDataType::classId) { - typeSeen = ARRAY; - } else if (dataTypeId == document::WeightedSetDataType::classId) { - typeSeen = WSET; - } - } - const document::FieldValue & fv = it->back().getFieldValueToSet(); - AttributeVector::SP attr; - if (typeSeen == ARRAY) { - attr = createMultiValueAttribute(attrName, fv, true); - } else if (typeSeen == WSET) { - attr = createMultiValueAttribute (attrName, fv, false); - } else { - attr = createAttribute(attrName, fv); - } +void SearchVisitor::setupAttributeVector(const FieldPath &fieldPath) { + vespalib::string attrName(fieldPath.front().getName()); + for (FieldPath::const_iterator ft(fieldPath.begin() + 1), fmt(fieldPath.end()); ft != fmt; ft++) { + attrName.append("."); + attrName.append(ft->getName()); + } - if (attr.get()) { - LOG(debug, "Adding attribute '%s' for field '%s' with data type '%s' (%s)", - attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); - if ( ! _attrMan.add(attr) ) { - LOG(warning, "Failed adding attribute '%s' for field '%s' with data type '%s' (%s)", - attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); - } - } else { - LOG(debug, "Cannot setup attribute for field '%s' with data type '%s' (%s). Aggregation and sorting will not work for this field", - attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); - } + enum FieldDataType { OTHER = 0, ARRAY, WSET }; + FieldDataType typeSeen = OTHER; + for (const document::FieldPathEntry & entry : fieldPath) { + int dataTypeId(entry.getDataType().getClass().id()); + if (dataTypeId == document::ArrayDataType::classId) { + typeSeen = ARRAY; + } else if (dataTypeId == document::MapDataType::classId) { + typeSeen = ARRAY; + } else if (dataTypeId == document::WeightedSetDataType::classId) { + typeSeen = WSET; + } + } + const document::FieldValue & fv = fieldPath.back().getFieldValueToSet(); + AttributeVector::SP attr; + if (typeSeen == ARRAY) { + attr = createMultiValueAttribute(attrName, fv, true); + } else if (typeSeen == WSET) { + attr = createMultiValueAttribute (attrName, fv, false); + } else { + attr = createAttribute(attrName, fv); + } + + if (attr.get()) { + LOG(debug, "Adding attribute '%s' for field '%s' with data type '%s' (%s)", + attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); + if ( ! _attrMan.add(attr) ) { + LOG(warning, "Failed adding attribute '%s' for field '%s' with data type '%s' (%s)", + attr->getName().c_str(), attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); } + } else { + LOG(debug, "Cannot setup attribute for field '%s' with data type '%s' (%s). Aggregation and sorting will not work for this field", + attrName.c_str(), fv.getDataType()->getName().c_str(), fv.getClass().name()); } } @@ -801,8 +764,7 @@ void SearchVisitor::setupAttributeVectorsForSorting(const search::common::SortSpec & sortList) { if ( ! sortList.empty() ) { - for (size_t i(0), m(sortList.size()); i < m; i++) { - const search::common::SortInfo & sInfo(sortList[i]); + for (const search::common::SortInfo & sInfo : sortList) { vsm::FieldIdT fid = _fieldSearchSpecMap.nameIdMap().fieldNo(sInfo._field); if ( fid != StringFieldIdTMap::npos ) { AttributeGuard::UP attr(_attrMan.getAttribute(sInfo._field)); @@ -851,7 +813,7 @@ SearchVisitor::setupGrouping(const std::vector & groupingBlob) grouping.select(attr2Doc, attr2Doc); LOG(debug, "Grouping # %ld with id(%d)", i, grouping.getId()); try { - search::expression::ConfigureStaticParams stuff(_attrCtx.get(), &_docTypeMapping.getCurrentDocumentType()); + ConfigureStaticParams stuff(_attrCtx.get(), &_docTypeMapping.getCurrentDocumentType()); grouping.configureStaticStuff(stuff); HitsResultPreparator preparator(_summaryGenerator); grouping.select(preparator, preparator); @@ -870,13 +832,13 @@ SearchVisitor::setupGrouping(const std::vector & groupingBlob) class SingleDocumentStore : public vsm::IDocSumCache { public: - SingleDocumentStore(const vsm::StorageDocument & doc) : _doc(doc) { } + SingleDocumentStore(const StorageDocument & doc) : _doc(doc) { } virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const { (void) docId; return _doc; } private: - const vsm::StorageDocument & _doc; + const StorageDocument & _doc; }; bool @@ -904,30 +866,20 @@ SearchVisitor::handleDocuments(const document::BucketId&, return; } document::DocumentId emptyId; - LOG(debug, "SearchVisitor '%s' handling block of %zu documents", - _id.c_str(), entries.size()); + LOG(debug, "SearchVisitor '%s' handling block of %zu documents", _id.c_str(), entries.size()); size_t highestFieldNo(_fieldSearchSpecMap.nameIdMap().highestFieldNo()); - const document::DocumentType* defaultDocType = - _docTypeMapping.getDefaultDocumentType(); + const document::DocumentType* defaultDocType = _docTypeMapping.getDefaultDocumentType(); assert(defaultDocType); - for (size_t i = 0; i< entries.size(); ++i) { - spi::DocEntry& entry(*entries[i]); - vsm::StorageDocument::SP document( - new StorageDocument(entry.releaseDocument())); - document->fieldPathMap(_fieldPathMap); - document->setFieldCount(highestFieldNo); + for (const auto & entry : entries) { + StorageDocument::LP document(new StorageDocument(entry->releaseDocument(), _fieldPathMap, highestFieldNo)); try { - document->init(); if (defaultDocType != NULL - && !compatibleDocumentTypes(*defaultDocType, - document->docDoc().getType())) + && !compatibleDocumentTypes(*defaultDocType, document->docDoc().getType())) { - LOG(debug, "Skipping document of type '%s' when " - "handling only documents of type '%s'", - document->docDoc().getType().getName().c_str(), - defaultDocType->getName().c_str()); + LOG(debug, "Skipping document of type '%s' when handling only documents of type '%s'", + document->docDoc().getType().getName().c_str(), defaultDocType->getName().c_str()); } else { if (handleDocument(document)) { _backingDocuments.push_back(document); @@ -935,14 +887,13 @@ SearchVisitor::handleDocuments(const document::BucketId&, } } catch (const std::exception & e) { LOG(warning, "Caught exception handling document '%s'. Exception='%s'", - document->docDoc().getId().getScheme().toString().c_str(), - e.what()); + document->docDoc().getId().getScheme().toString().c_str(), e.what()); } } } bool -SearchVisitor::handleDocument(const vsm::StorageDocument::SP & document) +SearchVisitor::handleDocument(const StorageDocument::LP & document) { bool needToKeepDocument(false); _syntheticFieldsController.onDocument(*document); @@ -994,8 +945,7 @@ void SearchVisitor::group(const document::Document & doc, search::HitRank rank, bool all) { LOG(spam, "Group all: %s", all ? "true" : "false"); - for(GroupingList::iterator it(_groupingList.begin()), mt(_groupingList.end()); it != mt; it++) { - GroupingEntry & grouping(*it); + for(GroupingEntry & grouping : _groupingList) { if (all == grouping->getAll()) { grouping.aggregate(doc, rank); LOG(spam, "Actually group document with id '%s'", doc.getId().getScheme().toString().c_str()); @@ -1004,11 +954,10 @@ SearchVisitor::group(const document::Document & doc, search::HitRank rank, bool } bool -SearchVisitor::match(const vsm::StorageDocument & doc) +SearchVisitor::match(const StorageDocument & doc) { - for (FieldIdTSearcherMap::iterator it = _fieldSearcherMap.begin(), mt = _fieldSearcherMap.end(); it != mt; it++) { - FieldSearcher & fSearch = *(*it); - fSearch.search(doc); + for (vsm::FieldSearcherContainer & fSearch : _fieldSearcherMap) { + fSearch->search(doc); } bool hit(_query.evaluate()); if (hit) { @@ -1025,8 +974,7 @@ SearchVisitor::match(const vsm::StorageDocument & doc) void SearchVisitor::fillAttributeVectors(const vespalib::string & documentId, const StorageDocument & document) { - for (size_t i(0), im(_attributeFields.size()); i < im; i++) { - const AttrInfo & finfo = _attributeFields[i]; + for (const AttrInfo & finfo : _attributeFields) { const AttributeGuard &finfoGuard(*finfo._attr); bool isPosition = finfoGuard->getClass().inherits(search::IntegerAttribute::classId) && document::PositionDataType::isZCurveFieldName(finfoGuard->getName()); LOG(debug, "Filling attribute '%s', isPosition='%s'", finfoGuard->getName().c_str(), isPosition ? "true" : "false"); @@ -1062,9 +1010,9 @@ size_t SearchVisitor::fillSortBuffer() { size_t pos(0); - for(size_t i(0), m(_sortList.size()); i != m; i++) { + for (size_t index : _sortList) { + const AttrInfo & finfo = _attributeFields[index]; int written(0); - const AttrInfo & finfo = _attributeFields[_sortList[i]]; const AttributeGuard &finfoGuard(*finfo._attr); LOG(debug, "Adding sortdata for document %d for attribute '%s'", finfoGuard->getNumDocs() - 1, finfoGuard->getName().c_str()); @@ -1132,8 +1080,8 @@ void SearchVisitor::generateGroupingResults() { vdslib::SearchResult & searchResult(_queryResult->getSearchResult()); - for (GroupingList::iterator it(_groupingList.begin()), mt(_groupingList.end()); it != mt; it++) { - Grouping & grouping(**it); + for (auto & groupingPtr : _groupingList) { + Grouping & grouping(*groupingPtr); LOG(debug, "grouping before postAggregate: %s", grouping.asString().c_str()); grouping.postAggregate(); grouping.postMerge(); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index bc8d72c4177..f2025e8c161 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -2,6 +2,12 @@ #pragma once +#include "hitcollector.h" +#include "indexenvironment.h" +#include "queryenvironment.h" +#include "rankmanager.h" +#include "rankprocessor.h" +#include "searchenvironment.h" #include #include #include @@ -20,12 +26,6 @@ #include #include #include -#include "hitcollector.h" -#include "indexenvironment.h" -#include "queryenvironment.h" -#include "rankmanager.h" -#include "rankprocessor.h" -#include "searchenvironment.h" using namespace search::aggregation; @@ -60,8 +60,7 @@ class SearchVisitor : public Visitor { _ascending(true), _converter(NULL), _attr(std::move(attr)) - { - } + { } /** * Construct a new object. * @@ -75,8 +74,7 @@ class SearchVisitor : public Visitor { _ascending(ascending), _converter(converter), _attr(std::move(attr)) - { - } + { } vsm::FieldIdT _field; bool _ascending; const search::common::BlobConverter * _converter; @@ -189,7 +187,7 @@ class SearchVisitor : public Visitor { bool collectMatchedDocument(bool hasSorting, SearchVisitor & visitor, const std::vector & tmpSortBuffer, - const vsm::StorageDocument::SP & documentId); + const vsm::StorageDocument::LP & documentId); /** * Callback function that is called when visiting is completed. * Perform second phase ranking and calculate summary features / rank features if asked for. @@ -308,7 +306,7 @@ class SearchVisitor : public Visitor { * @param document Document to process. * @return true if the underlying buffer is needed later on, then it must be kept. */ - bool handleDocument(const vsm::StorageDocument::SP & document); + bool handleDocument(const vsm::StorageDocument::LP & document); /** * Collect the given document for grouping. @@ -379,7 +377,7 @@ class SearchVisitor : public Visitor { size_t _limit; }; typedef std::vector< GroupingEntry > GroupingList; - typedef std::vector DocumentVector; + typedef std::vector DocumentVector; class SummaryGenerator : public HitsAggregationResult::SummaryGenerator { @@ -394,7 +392,7 @@ class SearchVisitor : public Visitor { private: vsm::GetDocsumsStateCallback _callback; GetDocsumsState _docsumState; - std::unique_ptr _docsumFilter; + std::unique_ptr _docsumFilter; search::docsummary::IDocsumWriter * _docsumWriter; search::RawBuf _rawBuf; }; @@ -448,6 +446,8 @@ class SearchVisitor : public Visitor { RankController _rankController; DocumentVector _backingDocuments; vsm::StringFieldIdTMapT _fieldsUnion; + + void setupAttributeVector(const vsm::FieldPath &fieldPath); }; class SearchVisitorFactory : public VisitorFactory { diff --git a/vbench/src/tests/app_dumpurl/app_dumpurl_test.cpp b/vbench/src/tests/app_dumpurl/app_dumpurl_test.cpp index 48a8ce156f0..2135d53acba 100644 --- a/vbench/src/tests/app_dumpurl/app_dumpurl_test.cpp +++ b/vbench/src/tests/app_dumpurl/app_dumpurl_test.cpp @@ -12,14 +12,14 @@ bool endsWith(const Memory &mem, const string &str) { } void readUntil(Input &input, SimpleBuffer &buffer, const string &end) { - ByteInput in(input, 256); + ByteInput in(input); while (!endsWith(buffer.get(), end)) { int c = in.get(); if (c < 0) { return; } buffer.reserve(1).data[0] = c; - buffer.commit(1, 0); + buffer.commit(1); } } diff --git a/vbench/src/tests/app_vbench/app_vbench_test.cpp b/vbench/src/tests/app_vbench/app_vbench_test.cpp index 37b613210a0..d1285255959 100644 --- a/vbench/src/tests/app_vbench/app_vbench_test.cpp +++ b/vbench/src/tests/app_vbench/app_vbench_test.cpp @@ -12,14 +12,14 @@ bool endsWith(const Memory &mem, const string &str) { } void readUntil(Input &input, SimpleBuffer &buffer, const string &end) { - ByteInput in(input, 256); + ByteInput in(input); while (!endsWith(buffer.get(), end)) { int c = in.get(); if (c < 0) { return; } buffer.reserve(1).data[0] = c; - buffer.commit(1, 0); + buffer.commit(1); } } diff --git a/vbench/src/tests/buffered_output/buffered_output_test.cpp b/vbench/src/tests/buffered_output/buffered_output_test.cpp index 79bd490c71c..9514d619f77 100644 --- a/vbench/src/tests/buffered_output/buffered_output_test.cpp +++ b/vbench/src/tests/buffered_output/buffered_output_test.cpp @@ -14,7 +14,7 @@ TEST("buffered output") { dst.printf("%d + %d = %d\n", 2, 2, 4); } { // verify data - LineReader src(buffer, 3); + LineReader src(buffer); string str; EXPECT_TRUE(src.readLine(str)); EXPECT_EQUAL("abc", str); diff --git a/vbench/src/tests/byte_input/byte_input_test.cpp b/vbench/src/tests/byte_input/byte_input_test.cpp index 251b16449be..28bc3c7b3ad 100644 --- a/vbench/src/tests/byte_input/byte_input_test.cpp +++ b/vbench/src/tests/byte_input/byte_input_test.cpp @@ -12,7 +12,7 @@ TEST("byte input") { } EXPECT_EQUAL(8u, buffer.get().size); { - ByteInput in(buffer, 3); + ByteInput in(buffer); EXPECT_EQUAL('a', in.get()); EXPECT_EQUAL('b', in.get()); EXPECT_EQUAL('c', in.get()); @@ -20,7 +20,7 @@ TEST("byte input") { } EXPECT_EQUAL(4u, buffer.get().size); { - ByteInput in(buffer, 3); + ByteInput in(buffer); EXPECT_EQUAL('e', in.get()); EXPECT_EQUAL('f', in.get()); EXPECT_EQUAL('g', in.get()); @@ -30,7 +30,7 @@ TEST("byte input") { } EXPECT_EQUAL(0u, buffer.get().size); { - ByteInput in(buffer, 3); + ByteInput in(buffer); EXPECT_EQUAL(-1, in.get()); } EXPECT_EQUAL(0u, buffer.get().size); diff --git a/vbench/src/tests/http_client/http_client_test.cpp b/vbench/src/tests/http_client/http_client_test.cpp index ce763e09b7d..55cb899f5cb 100644 --- a/vbench/src/tests/http_client/http_client_test.cpp +++ b/vbench/src/tests/http_client/http_client_test.cpp @@ -14,14 +14,14 @@ bool endsWith(const Memory &mem, const string &str) { } void readUntil(Input &input, SimpleBuffer &buffer, const string &end) { - ByteInput in(input, 256); + ByteInput in(input); while (!endsWith(buffer.get(), end)) { int c = in.get(); if (c < 0) { return; } buffer.reserve(1).data[0] = c; - buffer.commit(1, 0); + buffer.commit(1); } } diff --git a/vbench/src/tests/http_connection/http_connection_test.cpp b/vbench/src/tests/http_connection/http_connection_test.cpp index dc03a9e7e8e..986bc18ebff 100644 --- a/vbench/src/tests/http_connection/http_connection_test.cpp +++ b/vbench/src/tests/http_connection/http_connection_test.cpp @@ -15,7 +15,7 @@ TEST("http connection") { EXPECT_FALSE(client.fresh()); EXPECT_TRUE(client.mayReuse(5.1)); server.reset(); - client.stream().obtain(1, 1); // trigger eof + client.stream().obtain(); // trigger eof EXPECT_FALSE(client.mayReuse(5.1)); } diff --git a/vbench/src/tests/http_connection_pool/http_connection_pool_test.cpp b/vbench/src/tests/http_connection_pool/http_connection_pool_test.cpp index 06f158e7b84..2cd749d0426 100644 --- a/vbench/src/tests/http_connection_pool/http_connection_pool_test.cpp +++ b/vbench/src/tests/http_connection_pool/http_connection_pool_test.cpp @@ -19,7 +19,7 @@ TEST_MT_F("http connection pool", 2, ServerSocket()) { EXPECT_TRUE(conn.get() == 0); conn = pool.getConnection(ServerSpec("localhost", f1.port())); EXPECT_TRUE(conn.get() != 0); - conn->stream().obtain(1, 1); // trigger eof + conn->stream().obtain(); // trigger eof pool.putConnection(std::move(conn)); EXPECT_TRUE(conn.get() == 0); conn = pool.getConnection(ServerSpec("localhost", f1.port())); @@ -38,7 +38,7 @@ TEST_MT_FFFF("stress http connection pool", 256, ServerSocket(), Timer(), HttpCo HttpConnection::UP conn = f3.getConnection(ServerSpec("localhost", f1.port())); EXPECT_TRUE(conn.get() != 0); if (thread_id > (num_threads / 2)) { - conn->stream().obtain(1, 1); // trigger eof + conn->stream().obtain(); // trigger eof } f3.putConnection(std::move(conn)); EXPECT_TRUE(conn.get() == 0); diff --git a/vbench/src/tests/line_reader/line_reader_test.cpp b/vbench/src/tests/line_reader/line_reader_test.cpp index 631b2ffc765..1b893a4ca15 100644 --- a/vbench/src/tests/line_reader/line_reader_test.cpp +++ b/vbench/src/tests/line_reader/line_reader_test.cpp @@ -16,7 +16,7 @@ TEST("line reader") { dst.append("zzz"); } { - LineReader src(buffer, 3); + LineReader src(buffer); string str; EXPECT_TRUE(src.readLine(str)); EXPECT_EQUAL("foo", str); diff --git a/vbench/src/tests/mapped_file_input/mapped_file_input_test.cpp b/vbench/src/tests/mapped_file_input/mapped_file_input_test.cpp index 639b91bf66b..2edbda02ec0 100644 --- a/vbench/src/tests/mapped_file_input/mapped_file_input_test.cpp +++ b/vbench/src/tests/mapped_file_input/mapped_file_input_test.cpp @@ -12,7 +12,7 @@ TEST("mapped file input") { { MappedFileInput file(TEST_PATH("file.txt")); EXPECT_TRUE(!file.tainted()); - LineReader reader(file, 3); + LineReader reader(file); string line; EXPECT_TRUE(reader.readLine(line)); EXPECT_EQUAL("file content", line); diff --git a/vbench/src/tests/simple_buffer/simple_buffer_test.cpp b/vbench/src/tests/simple_buffer/simple_buffer_test.cpp index f666438d721..f0be5766e55 100644 --- a/vbench/src/tests/simple_buffer/simple_buffer_test.cpp +++ b/vbench/src/tests/simple_buffer/simple_buffer_test.cpp @@ -16,7 +16,7 @@ TEST("simple buffer") { SimpleBuffer buf; TEST_DO(checkBuffer("", buf)); { // read from empty buffer - EXPECT_EQUAL(0u, buf.obtain(1, 1).size); + EXPECT_EQUAL(0u, buf.obtain().size); } { // write to buffer WritableMemory mem = buf.reserve(10); @@ -25,20 +25,20 @@ TEST("simple buffer") { mem.data[0] = 'a'; mem.data[1] = 'b'; mem.data[2] = 'c'; - EXPECT_EQUAL(&buf, &buf.commit(3, 0)); + EXPECT_EQUAL(&buf, &buf.commit(3)); mem = buf.reserve(0); TEST_DO(checkBuffer("abc", buf)); EXPECT_EQUAL(0u, mem.size); } - { // unaligned read across end (last byte not evicted) - Memory mem = buf.obtain(2, 1); + { // read without evicting last byte + Memory mem = buf.obtain(); TEST_DO(checkBuffer("abc", buf)); - TEST_DO(checkMemory("ab", mem)); + TEST_DO(checkMemory("abc", mem)); EXPECT_EQUAL(&buf, &buf.evict(2)); - mem = buf.obtain(2, 1); + mem = buf.obtain(); TEST_DO(checkBuffer("c", buf)); TEST_DO(checkMemory("c", mem)); - mem = buf.obtain(2, 1); + mem = buf.obtain(); TEST_DO(checkBuffer("c", buf)); TEST_DO(checkMemory("c", mem)); } @@ -47,21 +47,21 @@ TEST("simple buffer") { EXPECT_EQUAL(10u, mem.size); TEST_DO(checkBuffer("c", buf)); mem.data[0] = 'd'; - EXPECT_EQUAL(&buf, &buf.commit(1, 0)); + EXPECT_EQUAL(&buf, &buf.commit(1)); mem = buf.reserve(5); TEST_DO(checkBuffer("cd", buf)); EXPECT_EQUAL(5u, mem.size); } - { // aligned read until end - Memory mem = buf.obtain(1, 1); + { // read until end + Memory mem = buf.obtain(); TEST_DO(checkBuffer("cd", buf)); - TEST_DO(checkMemory("c", mem)); + TEST_DO(checkMemory("cd", mem)); EXPECT_EQUAL(&buf, &buf.evict(1)); - mem = buf.obtain(1, 1); + mem = buf.obtain(); TEST_DO(checkBuffer("d", buf)); TEST_DO(checkMemory("d", mem)); EXPECT_EQUAL(&buf, &buf.evict(1)); - mem = buf.obtain(1, 1); + mem = buf.obtain(); TEST_DO(checkBuffer("", buf)); TEST_DO(checkMemory("", mem)); } diff --git a/vbench/src/tests/socket/socket_test.cpp b/vbench/src/tests/socket/socket_test.cpp index 4e58465c5a8..e49a31773f3 100644 --- a/vbench/src/tests/socket/socket_test.cpp +++ b/vbench/src/tests/socket/socket_test.cpp @@ -17,7 +17,7 @@ struct Agent { out.append("\n"); } void read(const char *prefix) { - LineReader reader(*socket, 32); + LineReader reader(*socket); for (size_t lines = 0; true; ++lines) { string line; reader.readLine(line); @@ -60,7 +60,7 @@ TEST("socket") { serverThread.join(); { server.socket.reset(); - LineReader reader(*client.socket, 32); + LineReader reader(*client.socket); string line; EXPECT_FALSE(reader.readLine(line)); EXPECT_TRUE(line.empty()); diff --git a/vbench/src/vbench/core/buffered_output.h b/vbench/src/vbench/core/buffered_output.h index f274ce9f71c..80ad163869e 100644 --- a/vbench/src/vbench/core/buffered_output.h +++ b/vbench/src/vbench/core/buffered_output.h @@ -21,7 +21,7 @@ class BufferedOutput void ensureFree(size_t bytes) { if ((_pos + bytes) > _data.size) { - _data = _output.commit(_pos, 0).reserve(std::max(bytes, _chunkSize)); + _data = _output.commit(_pos).reserve(std::max(bytes, _chunkSize)); _pos = 0; } } @@ -29,7 +29,7 @@ class BufferedOutput public: BufferedOutput(Output &output, size_t chunkSize) : _output(output), _data(), _pos(), _chunkSize(chunkSize) {} - ~BufferedOutput() { _output.commit(_pos, 0); } + ~BufferedOutput() { _output.commit(_pos); } BufferedOutput &append(char c) { ensureFree(1); diff --git a/vbench/src/vbench/core/byte_input.h b/vbench/src/vbench/core/byte_input.h index 34468a2ffeb..7294c9682bb 100644 --- a/vbench/src/vbench/core/byte_input.h +++ b/vbench/src/vbench/core/byte_input.h @@ -16,17 +16,15 @@ class ByteInput Input &_input; Memory _data; size_t _pos; - size_t _chunkSize; public: /** * Wrap an Input to read one byte at a time. * * @param input the underlying Input - * @param chunkSize how much data to request from the input per transaction **/ - ByteInput(Input &input, size_t chunkSize) - : _input(input), _data(), _pos(0), _chunkSize(chunkSize) {} + ByteInput(Input &input) + : _input(input), _data(), _pos(0) {} ~ByteInput() { _input.evict(_pos); } /** @@ -38,7 +36,7 @@ class ByteInput if (_pos < _data.size) { return (_data.data[_pos++] & 0xff); } else { - _data = _input.evict(_pos).obtain(_chunkSize, 1); + _data = _input.evict(_pos).obtain(); if ((_pos = 0) < _data.size) { return (_data.data[_pos++] & 0xff); } diff --git a/vbench/src/vbench/core/input.h b/vbench/src/vbench/core/input.h index 6b767ca1997..eceb973a94a 100644 --- a/vbench/src/vbench/core/input.h +++ b/vbench/src/vbench/core/input.h @@ -14,14 +14,11 @@ namespace vbench { struct Input { /** - * Obtain more input data. You will never obtain more data than - * requested, but you may obtain less. + * Obtain more input data. * * @return the obtained input data - * @param bytes the number of bytes requested - * @param lowMark minimum bytes in byffer before refilling **/ - virtual Memory obtain(size_t bytes, size_t lowMark) = 0; + virtual Memory obtain() = 0; /** * Evict processed input data. Never evict more data than you have diff --git a/vbench/src/vbench/core/input_file_reader.h b/vbench/src/vbench/core/input_file_reader.h index de80a023719..919b188d44a 100644 --- a/vbench/src/vbench/core/input_file_reader.h +++ b/vbench/src/vbench/core/input_file_reader.h @@ -22,7 +22,7 @@ class InputFileReader : public Taintable public: InputFileReader(const string &name) - : _file(name), _lines(_file, 4096) {} + : _file(name), _lines(_file) {} /** * Read a single line from the input file and put it into diff --git a/vbench/src/vbench/core/line_reader.cpp b/vbench/src/vbench/core/line_reader.cpp index 5bc8160ccd2..4a893a1b9fc 100644 --- a/vbench/src/vbench/core/line_reader.cpp +++ b/vbench/src/vbench/core/line_reader.cpp @@ -13,8 +13,8 @@ void stripCR(string &dst) { } } // namespace vbench:: -LineReader::LineReader(Input &input, size_t chunkSize) - : _input(input, chunkSize) +LineReader::LineReader(Input &input) + : _input(input) { } diff --git a/vbench/src/vbench/core/line_reader.h b/vbench/src/vbench/core/line_reader.h index c26dea96db2..ba5e9e63fc7 100644 --- a/vbench/src/vbench/core/line_reader.h +++ b/vbench/src/vbench/core/line_reader.h @@ -22,9 +22,8 @@ class LineReader * Wrap an Input to read one line at a time. * * @param input the underlying Input - * @param chunkSize how much data to request from the input per transaction **/ - LineReader(Input &input, size_t chunkSize); + LineReader(Input &input); /** * Read the next line of input. Lines are separated by '\n'. '\r' diff --git a/vbench/src/vbench/core/mapped_file_input.cpp b/vbench/src/vbench/core/mapped_file_input.cpp index c4edc619e5d..0a20b91c375 100644 --- a/vbench/src/vbench/core/mapped_file_input.cpp +++ b/vbench/src/vbench/core/mapped_file_input.cpp @@ -25,9 +25,9 @@ MappedFileInput::MappedFileInput(const string &name) } Memory -MappedFileInput::obtain(size_t bytes, size_t) +MappedFileInput::obtain() { - return Memory(_data + _pos, std::min(bytes, (_size - _pos))); + return Memory(_data + _pos, (_size - _pos)); } Input & diff --git a/vbench/src/vbench/core/mapped_file_input.h b/vbench/src/vbench/core/mapped_file_input.h index 94f0e21dbf2..197dbf55959 100644 --- a/vbench/src/vbench/core/mapped_file_input.h +++ b/vbench/src/vbench/core/mapped_file_input.h @@ -24,7 +24,7 @@ class MappedFileInput : public Input, public: MappedFileInput(const string &name); Memory get() const { return Memory(_data, _size); } - virtual Memory obtain(size_t bytes, size_t lowMark); + virtual Memory obtain(); virtual Input &evict(size_t bytes); virtual const Taint &tainted() const { return _taint; } }; diff --git a/vbench/src/vbench/core/output.h b/vbench/src/vbench/core/output.h index 410e0f72643..6ad218c9718 100644 --- a/vbench/src/vbench/core/output.h +++ b/vbench/src/vbench/core/output.h @@ -27,9 +27,8 @@ struct Output * * @return this object, for chaining * @param bytes number of bytes to commit - * @param hiMark maximum number of unflushed bytes after commit **/ - virtual Output &commit(size_t bytes, size_t hiMark) = 0; + virtual Output &commit(size_t bytes) = 0; virtual ~Output() {} }; diff --git a/vbench/src/vbench/core/simple_buffer.cpp b/vbench/src/vbench/core/simple_buffer.cpp index 1e1d0a8e4ae..b37696b7e8a 100644 --- a/vbench/src/vbench/core/simple_buffer.cpp +++ b/vbench/src/vbench/core/simple_buffer.cpp @@ -12,9 +12,9 @@ SimpleBuffer::SimpleBuffer() } Memory -SimpleBuffer::obtain(size_t bytes, size_t) +SimpleBuffer::obtain() { - return Memory(&_data[0], std::min(bytes, _used)); + return Memory(&_data[0], _used); } Input & @@ -34,7 +34,7 @@ SimpleBuffer::reserve(size_t bytes) } Output & -SimpleBuffer::commit(size_t bytes, size_t) +SimpleBuffer::commit(size_t bytes) { assert(bytes <= (_data.size() - _used)); _used += bytes; diff --git a/vbench/src/vbench/core/simple_buffer.h b/vbench/src/vbench/core/simple_buffer.h index 173d774daf6..3cf55037917 100644 --- a/vbench/src/vbench/core/simple_buffer.h +++ b/vbench/src/vbench/core/simple_buffer.h @@ -26,10 +26,10 @@ class SimpleBuffer : public Input, public: SimpleBuffer(); Memory get() const { return Memory(&_data[0], _used); } - virtual Memory obtain(size_t bytes, size_t lowMark); + virtual Memory obtain(); virtual Input &evict(size_t bytes); virtual WritableMemory reserve(size_t bytes); - virtual Output &commit(size_t bytes, size_t hiMark); + virtual Output &commit(size_t bytes); bool operator==(const SimpleBuffer &rhs) const; }; diff --git a/vbench/src/vbench/core/socket.cpp b/vbench/src/vbench/core/socket.cpp index 265f41c3040..84975adf4ae 100644 --- a/vbench/src/vbench/core/socket.cpp +++ b/vbench/src/vbench/core/socket.cpp @@ -5,6 +5,8 @@ namespace vbench { +constexpr size_t READ_SIZE = 32768; + Socket::Socket(std::unique_ptr socket) : _socket(std::move(socket)), _input(), @@ -38,20 +40,20 @@ Socket::~Socket() } Memory -Socket::obtain(size_t bytes, size_t lowMark) +Socket::obtain() { - if (_input.get().size < bytes && _input.get().size < lowMark && !_eof && !_taint) { - WritableMemory buf = _input.reserve(bytes - _input.get().size); + if ((_input.get().size == 0) && !_eof && !_taint) { + WritableMemory buf = _input.reserve(READ_SIZE); ssize_t res = _socket->Read(buf.data, buf.size); if (res > 0) { - _input.commit(res, 0); + _input.commit(res); } else if (res < 0) { _taint.reset("socket read error"); } else { _eof = true; } } - return _input.obtain(bytes, 1); + return _input.obtain(); } Input & @@ -68,11 +70,11 @@ Socket::reserve(size_t bytes) } Output & -Socket::commit(size_t bytes, size_t hiMark) +Socket::commit(size_t bytes) { - _output.commit(bytes, 0); - while (_output.get().size > hiMark && !_taint) { - Memory buf = _output.obtain(_output.get().size, 1); + _output.commit(bytes); + while ((_output.get().size > 0) && !_taint) { + Memory buf = _output.obtain(); ssize_t res = _socket->Write(buf.data, buf.size); if (res > 0) { _output.evict(res); diff --git a/vbench/src/vbench/core/socket.h b/vbench/src/vbench/core/socket.h index 52c27dfc618..707ea6359aa 100644 --- a/vbench/src/vbench/core/socket.h +++ b/vbench/src/vbench/core/socket.h @@ -25,10 +25,10 @@ class Socket : public Stream Socket(const string host, int port); virtual ~Socket(); virtual bool eof() const { return _eof; } - virtual Memory obtain(size_t bytes, size_t lowMark); + virtual Memory obtain(); virtual Input &evict(size_t bytes); virtual WritableMemory reserve(size_t bytes); - virtual Output &commit(size_t bytes, size_t hiMark); + virtual Output &commit(size_t bytes); virtual const Taint &tainted() const { return _taint; } }; diff --git a/vbench/src/vbench/http/http_client.cpp b/vbench/src/vbench/http/http_client.cpp index a83ad41b41a..3503329991d 100644 --- a/vbench/src/vbench/http/http_client.cpp +++ b/vbench/src/vbench/http/http_client.cpp @@ -24,7 +24,7 @@ HttpClient::writeRequest() { bool HttpClient::readStatus() { - LineReader reader(_conn->stream(), READ_SIZE); + LineReader reader(_conn->stream()); if (reader.readLine(_line) && (splitstr(_line, "\t ", _split) >= 2)) { if (_split[0] == "HTTP/1.0") { _header.version = 0; @@ -53,7 +53,7 @@ HttpClient::readStatus() bool HttpClient::readHeaders() { - LineReader reader(_conn->stream(), READ_SIZE); + LineReader reader(_conn->stream()); while (reader.readLine(_line)) { if (_line.empty()) { return true; @@ -99,7 +99,7 @@ bool HttpClient::readContent(size_t len) { Input &input = _conn->stream(); while (len > 0) { - Memory mem = input.obtain(READ_SIZE, 1); + Memory mem = input.obtain(); mem.size = std::min(len, mem.size); if (mem.size == 0) { _handler.handleFailure(strfmt("short read: missing %zu bytes", len)); @@ -115,7 +115,7 @@ HttpClient::readContent(size_t len) { bool HttpClient::readChunkSize(bool first, size_t &size) { - LineReader reader(_conn->stream(), READ_SIZE); + LineReader reader(_conn->stream()); if (!first && (!reader.readLine(_line) || !_line.empty())) { return false; } @@ -130,7 +130,7 @@ HttpClient::readChunkSize(bool first, size_t &size) bool HttpClient::skipTrailers() { - LineReader reader(_conn->stream(), READ_SIZE); + LineReader reader(_conn->stream()); while (reader.readLine(_line)) { if (_line.empty()) { return true; @@ -164,7 +164,7 @@ HttpClient::readContent() } Input &input = _conn->stream(); for (;;) { - Memory mem = input.obtain(READ_SIZE, 1); + Memory mem = input.obtain(); if (mem.size == 0) { if (_conn->stream().tainted()) { _handler.handleFailure(strfmt("read error: '%s'", @@ -182,7 +182,7 @@ bool HttpClient::perform() { writeRequest(); - if (!_conn->fresh() && (_conn->stream().obtain(READ_SIZE, 1).size == 0)) { + if (!_conn->fresh() && (_conn->stream().obtain().size == 0)) { _conn.reset(new HttpConnection(_conn->server())); writeRequest(); } diff --git a/vbench/src/vbench/http/http_client.h b/vbench/src/vbench/http/http_client.h index b80dca2c819..7497d0c8621 100644 --- a/vbench/src/vbench/http/http_client.h +++ b/vbench/src/vbench/http/http_client.h @@ -17,7 +17,6 @@ namespace vbench { class HttpClient { private: - static const size_t READ_SIZE = 8000; static const size_t WRITE_SIZE = 2000; struct HeaderInfo { diff --git a/vbench/src/vbench/test/simple_http_result_handler.cpp b/vbench/src/vbench/test/simple_http_result_handler.cpp index a0896381f33..a09a4d30a22 100644 --- a/vbench/src/vbench/test/simple_http_result_handler.cpp +++ b/vbench/src/vbench/test/simple_http_result_handler.cpp @@ -24,7 +24,7 @@ SimpleHttpResultHandler::handleContent(const Memory &data) { WritableMemory wm = _content.reserve(data.size); memcpy(wm.data, data.data, data.size); - _content.commit(data.size, 0); + _content.commit(data.size); } void diff --git a/vespajlib/src/main/java/com/yahoo/io/GrowableBufferOutputStream.java b/vespajlib/src/main/java/com/yahoo/io/GrowableBufferOutputStream.java index 85b249432d4..b8dfedc8ede 100644 --- a/vespajlib/src/main/java/com/yahoo/io/GrowableBufferOutputStream.java +++ b/vespajlib/src/main/java/com/yahoo/io/GrowableBufferOutputStream.java @@ -9,13 +9,11 @@ import java.util.Iterator; import java.nio.ByteBuffer; - /** - * - * @author Bjorn Borud + * @author Bjørn Borud */ public class GrowableBufferOutputStream extends OutputStream { -// private static final int MINIMUM_BUFFERSIZE = (64 * 1024); + private ByteBuffer lastBuffer; private ByteBuffer directBuffer; private LinkedList bufferList = new LinkedList<>(); diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java index a66caa8dd35..a4b1a02f95c 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java @@ -207,8 +207,9 @@ public static Builder of(TensorType type, DimensionSizes sizes) { for (int i = 0; i < sizes.dimensions(); i++ ) { Optional size = type.dimensions().get(i).size(); if (size.isPresent() && size.get() < sizes.size(i)) - throw new IllegalArgumentException("Size of " + type.dimensions() + " is " + sizes.size(i) + - " but cannot be larger than " + size.get()); + throw new IllegalArgumentException("Size of dimension " + type.dimensions().get(i).name() + " is " + + sizes.size(i) + + " but cannot be larger than " + size.get() + " in " + type); } return new BoundBuilder(type, sizes); diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java index b0132693fa3..f934c4fcaf9 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java @@ -75,19 +75,19 @@ public Optional indexOfDimension(String dimension) { } /** - * Returns whether a tensor of the given type can be assigned to this type, - * i.e of this type is a generalization of the given type. + * Returns whether this type can be assigned to the given type, + * i.e if the given type is a generalization of this type. */ - public boolean isAssignableTo(TensorType other) { - if (other.dimensions().size() != this.dimensions().size()) return false; - for (int i = 0; i < other.dimensions().size(); i++) { + public boolean isAssignableTo(TensorType generalization) { + if (generalization.dimensions().size() != this.dimensions().size()) return false; + for (int i = 0; i < generalization.dimensions().size(); i++) { Dimension thisDimension = this.dimensions().get(i); - Dimension otherDimension = other.dimensions().get(i); - if (thisDimension.isIndexed() != otherDimension.isIndexed()) return false; - if ( ! thisDimension.name().equals(otherDimension.name())) return false; - if (thisDimension.size().isPresent()) { - if ( ! otherDimension.size().isPresent()) return false; - if (otherDimension.size().get() > thisDimension.size().get() ) return false; + Dimension generalizationDimension = generalization.dimensions().get(i); + if (thisDimension.isIndexed() != generalizationDimension.isIndexed()) return false; + if ( ! thisDimension.name().equals(generalizationDimension.name())) return false; + if (generalizationDimension.size().isPresent()) { + if ( ! thisDimension.size().isPresent()) return false; + if (thisDimension.size().get() > generalizationDimension.size().get() ) return false; } } return true; diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/BinaryFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/BinaryFormat.java index 9b0ccdcb6c8..a6949fdf57f 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/BinaryFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/BinaryFormat.java @@ -6,6 +6,8 @@ import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; +import java.util.Optional; + /** * Representation of a specific binary format with functions for serializing a Tensor object into * this format or de-serializing binary data into a Tensor object. @@ -23,9 +25,9 @@ interface BinaryFormat { /** * Deserialize the given binary data into a Tensor object. * - * @param type the expected abstract type of the tensor to serialize + * @param type the expected abstract type of the tensor to serialize, or empty to use type information from the data * @param buffer the buffer containing the tensor binary data */ - Tensor decode(TensorType type, GrowableByteBuffer buffer); + Tensor decode(Optional type, GrowableByteBuffer buffer); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/DenseBinaryFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/DenseBinaryFormat.java index 0a97576d5b7..5a34f59fe89 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/DenseBinaryFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/DenseBinaryFormat.java @@ -6,9 +6,9 @@ import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; -import com.yahoo.text.Utf8; import java.util.Iterator; +import java.util.Optional; /** * Implementation of a dense binary format for a tensor on the form: @@ -46,36 +46,39 @@ private void encodeCells(GrowableByteBuffer buffer, Tensor tensor) { } @Override - public Tensor decode(TensorType type, GrowableByteBuffer buffer) { - DimensionSizes sizes = decodeDimensionSizes(type, buffer); + public Tensor decode(Optional optionalType, GrowableByteBuffer buffer) { + TensorType type; + DimensionSizes sizes; + if (optionalType.isPresent()) { + type = optionalType.get(); + TensorType serializedType = decodeType(buffer); + if ( ! serializedType.isAssignableTo(type)) + throw new IllegalArgumentException("Type/instance mismatch: A tensor of type " + serializedType + + " cannot be assigned to type " + type); + sizes = sizesFromType(serializedType); + } + else { + type = decodeType(buffer); + sizes = sizesFromType(type); + } Tensor.Builder builder = Tensor.Builder.of(type, sizes); decodeCells(sizes, buffer, (IndexedTensor.BoundBuilder)builder); return builder.build(); } - private DimensionSizes decodeDimensionSizes(TensorType type, GrowableByteBuffer buffer) { + private TensorType decodeType(GrowableByteBuffer buffer) { int dimensionCount = buffer.getInt1_4Bytes(); - if (type.dimensions().size() != dimensionCount) - throw new IllegalArgumentException("Type/instance mismatch: Instance has " + dimensionCount + - " dimensions but type is " + type); - - DimensionSizes.Builder builder = new DimensionSizes.Builder(dimensionCount); - for (int i = 0; i < dimensionCount; i++) { - TensorType.Dimension expectedDimension = type.dimensions().get(i); - - String encodedName = buffer.getUtf8String(); - int encodedSize = buffer.getInt1_4Bytes(); - - if ( ! expectedDimension.name().equals(encodedName)) - throw new IllegalArgumentException("Type/instance mismatch: Instance has '" + encodedName + - "' as dimension " + i + " but type is " + type); - - if (expectedDimension.size().isPresent() && expectedDimension.size().get() < encodedSize) - throw new IllegalArgumentException("Type/instance mismatch: Instance has size " + encodedSize + - " in " + expectedDimension + " in type " + type); + TensorType.Builder builder = new TensorType.Builder(); + for (int i = 0; i < dimensionCount; i++) + builder.indexed(buffer.getUtf8String(), buffer.getInt1_4Bytes()); + return builder.build(); + } - builder.set(i, encodedSize); - } + /** Returns dimension sizes from a type consisting of fully specified, indexed dimensions only */ + private DimensionSizes sizesFromType(TensorType type) { + DimensionSizes.Builder builder = new DimensionSizes.Builder(type.dimensions().size()); + for (int i = 0; i < type.dimensions().size(); i++) + builder.set(i, type.dimensions().get(i).size().get()); return builder.build(); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/SparseBinaryFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/SparseBinaryFormat.java index 8ab23c8d77c..7609f6748f4 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/SparseBinaryFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/SparseBinaryFormat.java @@ -53,26 +53,29 @@ private void encodeAddress(GrowableByteBuffer buffer, TensorAddress address) { } @Override - public Tensor decode(TensorType type, GrowableByteBuffer buffer) { - consumeAndValidateDimensions(type, buffer); + public Tensor decode(Optional optionalType, GrowableByteBuffer buffer) { + TensorType type; + if (optionalType.isPresent()) { + type = optionalType.get(); + TensorType serializedType = decodeType(buffer); + if ( ! serializedType.isAssignableTo(type)) + throw new IllegalArgumentException("Type/instance mismatch: A tensor of type " + serializedType + + " cannot be assigned to type " + type); + } + else { + type = decodeType(buffer); + } Tensor.Builder builder = Tensor.Builder.of(type); decodeCells(buffer, builder, type); return builder.build(); } - private void consumeAndValidateDimensions(TensorType type, GrowableByteBuffer buffer) { - int dimensionCount = buffer.getInt1_4Bytes(); - if (type.dimensions().size() != dimensionCount) - throw new IllegalArgumentException("Type/instance mismatch: Instance has " + dimensionCount + - " dimensions but type is " + type); - - for (int i = 0; i < dimensionCount; ++i) { - TensorType.Dimension expectedDimension = type.dimensions().get(i); - String encodedName = buffer.getUtf8String(); - if ( ! expectedDimension.name().equals(encodedName)) - throw new IllegalArgumentException("Type/instance mismatch: Instance has '" + encodedName + - "' as dimension " + i + " but type is " + type); - } + private TensorType decodeType(GrowableByteBuffer buffer) { + int numDimensions = buffer.getInt1_4Bytes(); + TensorType.Builder builder = new TensorType.Builder(); + for (int i = 0; i < numDimensions; ++i) + builder.mapped(buffer.getUtf8String()); + return builder.build(); } private void decodeCells(GrowableByteBuffer buffer, Tensor.Builder builder, TensorType type) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/TypedBinaryFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/TypedBinaryFormat.java index 19c1810d928..6413602c532 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/TypedBinaryFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/TypedBinaryFormat.java @@ -7,6 +7,8 @@ import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; +import java.util.Optional; + /** * Class used by clients for serializing a Tensor object into binary format or * de-serializing binary data into a Tensor object. @@ -38,8 +40,15 @@ public static byte[] encode(Tensor tensor) { return result; } - public static Tensor decode(TensorType type, byte[] data) { - GrowableByteBuffer buffer = GrowableByteBuffer.wrap(data); + /** + * Decode some data to a tensor + * + * @param type the type to decode and validate to, or empty to use the type given in the data + * @param buffer the buffer containing the data, use GrowableByteByffer.wrap(byte[]) if you have a byte array + * @return the resulting tensor + * @throws IllegalArgumentException if the tensor data was invalid + */ + public static Tensor decode(Optional type, GrowableByteBuffer buffer) { int formatType = buffer.getInt1_4Bytes(); switch (formatType) { case SPARSE_BINARY_FORMAT_TYPE: return new SparseBinaryFormat().decode(type, buffer); diff --git a/vespajlib/src/test/java/com/yahoo/tensor/serialization/DenseBinaryFormatTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/serialization/DenseBinaryFormatTestCase.java index 15e82e6b15c..9cf48bd0fdf 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/serialization/DenseBinaryFormatTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/serialization/DenseBinaryFormatTestCase.java @@ -2,14 +2,18 @@ package com.yahoo.tensor.serialization; import com.google.common.collect.Sets; +import com.yahoo.io.GrowableByteBuffer; import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; import org.junit.Ignore; import org.junit.Test; import java.util.Arrays; +import java.util.Optional; import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; /** * Tests for the dense binary format. @@ -26,6 +30,19 @@ public void testSerialization() { assertSerialization("tensor(x[],y[]):{{x:0,y:0}:2.0, {x:0,y:1}:3.0, {x:1,y:0}:4.0, {x:1,y:1}:5.0}"); assertSerialization("tensor(x[1],y[2],z[3]):{{y:0,x:0,z:0}:2.0}"); } + + @Test + public void testSerializationToSeparateType() { + assertSerialization(Tensor.from("tensor(x[1],y[1]):{{x:0,y:0}:2.0}"), TensorType.fromSpec("tensor(x[],y[])")); + assertSerialization(Tensor.from("tensor(x[1],y[1]):{{x:0,y:0}:2.0}"), TensorType.fromSpec("tensor(x[2],y[2])")); + try { + assertSerialization(Tensor.from("tensor(x[2],y[2]):{{x:0,y:0}:2.0}"), TensorType.fromSpec("tensor(x[1],y[1])")); + fail("Expected exception"); + } + catch (IllegalArgumentException expected) { + assertEquals("Type/instance mismatch: A tensor of type tensor(x[2],y[2]) cannot be assigned to type tensor(x[1],y[1])", expected.getMessage()); + } + } @Test public void requireThatSerializationFormatDoNotChange() { @@ -45,8 +62,12 @@ private void assertSerialization(String tensorString) { } private void assertSerialization(Tensor tensor) { + assertSerialization(tensor, tensor.type()); + } + + private void assertSerialization(Tensor tensor, TensorType expectedType) { byte[] encodedTensor = TypedBinaryFormat.encode(tensor); - Tensor decodedTensor = TypedBinaryFormat.decode(tensor.type(), encodedTensor); + Tensor decodedTensor = TypedBinaryFormat.decode(Optional.of(expectedType), GrowableByteBuffer.wrap(encodedTensor)); assertEquals(tensor, decodedTensor); } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/serialization/SparseBinaryFormatTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/serialization/SparseBinaryFormatTestCase.java index 283aa90cf65..79c4c7938c1 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/serialization/SparseBinaryFormatTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/serialization/SparseBinaryFormatTestCase.java @@ -2,13 +2,17 @@ package com.yahoo.tensor.serialization; import com.google.common.collect.Sets; +import com.yahoo.io.GrowableByteBuffer; import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; import org.junit.Test; import java.util.Arrays; +import java.util.Optional; import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; /** * Tests for the sparse binary format. @@ -28,6 +32,17 @@ public void testSerialization() { assertSerialization("tensor(x{},y{},z{}):{{y:0,x:0,z:3}:2.0,{y:1,x:0,z:6}:3.0}"); } + @Test + public void testSerializationToSeparateType() { + try { + assertSerialization(Tensor.from("tensor(x{},y{}):{{x:0,y:0}:2.0}"), TensorType.fromSpec("tensor(x{})")); + fail("Expected exception"); + } + catch (IllegalArgumentException expected) { + assertEquals("Type/instance mismatch: A tensor of type tensor(x{},y{}) cannot be assigned to type tensor(x{})", expected.getMessage()); + } + } + @Test public void requireThatSerializationFormatDoNotChange() { byte[] encodedTensor = new byte[] {1, // binary format type @@ -45,8 +60,13 @@ private void assertSerialization(String tensorString) { } private void assertSerialization(Tensor tensor) { + assertSerialization(tensor, tensor.type()); + } + + private void assertSerialization(Tensor tensor, TensorType expectedType) { byte[] encodedTensor = TypedBinaryFormat.encode(tensor); - Tensor decodedTensor = TypedBinaryFormat.decode(tensor.type(), encodedTensor); + Tensor decodedTensor = TypedBinaryFormat.decode(Optional.of(expectedType), + GrowableByteBuffer.wrap(encodedTensor)); assertEquals(tensor, decodedTensor); } diff --git a/vespalib/src/vespa/vespalib/util/generationholder.cpp b/vespalib/src/vespa/vespalib/util/generationholder.cpp index d8f36793938..a33ba5e0cec 100644 --- a/vespalib/src/vespa/vespalib/util/generationholder.cpp +++ b/vespalib/src/vespa/vespalib/util/generationholder.cpp @@ -4,27 +4,13 @@ namespace vespalib { -GenerationHeldBase::~GenerationHeldBase() -{ -} - -GenerationHeldMalloc::GenerationHeldMalloc(size_t size, void *data) - : GenerationHeldBase(size), - _data(data) -{ -} - -GenerationHeldMalloc::~GenerationHeldMalloc() -{ - free(_data); -}; +GenerationHeldBase::~GenerationHeldBase() { } GenerationHolder::GenerationHolder() : _hold1List(), _hold2List(), _heldBytes(0) -{ -} +{ } GenerationHolder::~GenerationHolder() { diff --git a/vespalib/src/vespa/vespalib/util/generationholder.h b/vespalib/src/vespa/vespalib/util/generationholder.h index b4c8fa8a541..33aad9c3f88 100644 --- a/vespalib/src/vespa/vespalib/util/generationholder.h +++ b/vespalib/src/vespa/vespalib/util/generationholder.h @@ -25,18 +25,8 @@ class GenerationHeldBase _size(size) { } - virtual ~GenerationHeldBase(void); - size_t getSize(void) const { return _size; } -}; - -class GenerationHeldMalloc : public GenerationHeldBase -{ - void *_data; - -public: - GenerationHeldMalloc(size_t size, void *data); - - virtual ~GenerationHeldMalloc(void); + virtual ~GenerationHeldBase(); + size_t getSize() const { return _size; } }; template diff --git a/vsm/src/tests/docsum/docsum.cpp b/vsm/src/tests/docsum/docsum.cpp index c084bd5593f..4ebd7061b69 100644 --- a/vsm/src/tests/docsum/docsum.cpp +++ b/vsm/src/tests/docsum/docsum.cpp @@ -245,35 +245,6 @@ DocsumTest::requireThatJSONDocsumWriterHandlesMap() } } -void -DocsumTest::testDocSumCache() -{ - Document::SP d1(new TestDocument(0, 1)); - d1->setField(0, FieldValue::UP(new StringFieldValue("aa"))); - Document::SP d2(new TestDocument(1, 2)); - d2->setField(0, FieldValue::UP(new StringFieldValue("bbb"))); - d2->setField(1, FieldValue::UP(new StringFieldValue("cccc"))); - DocSumCache cac1; - cac1.push_back(d1); - cac1.push_back(d2); - EXPECT_EQUAL(cac1.cache().size(), 2u); - - Document::SP d3(new TestDocument(2, 1)); - d3->setField(0, FieldValue::UP(new StringFieldValue("ddddd"))); - DocSumCache cac2; - cac2.push_back(d3); - cac1.insert(cac2); - EXPECT_EQUAL(cac1.cache().size(), 3u); - - Document::SP d4(new TestDocument(2, 1)); - d4->setField(0, FieldValue::UP(new StringFieldValue("eeeeee"))); - DocSumCache cac3; - cac3.push_back(d4); - cac1.insert(cac3); - EXPECT_EQUAL(cac1.cache().size(), 3u); - EXPECT_EQUAL(2u, cac1.getDocSum(2).getDocId()); -} - int DocsumTest::Main() { @@ -282,7 +253,6 @@ DocsumTest::Main() testFlattenDocsumWriter(); testJSONDocsumWriter(); requireThatJSONDocsumWriterHandlesMap(); - testDocSumCache(); TEST_DONE(); } diff --git a/vsm/src/tests/document/document.cpp b/vsm/src/tests/document/document.cpp index a824d59a788..12b321f7c80 100644 --- a/vsm/src/tests/document/document.cpp +++ b/vsm/src/tests/document/document.cpp @@ -41,11 +41,8 @@ DocumentTest::testStorageDocument() ASSERT_TRUE((*fpmap)[1].size() == 1); ASSERT_TRUE((*fpmap)[2].size() == 0); - StorageDocument sdoc(std::move(doc)); + StorageDocument sdoc(std::move(doc), fpmap, 3); ASSERT_TRUE(sdoc.valid()); - sdoc.setFieldCount(3); - sdoc.fieldPathMap(fpmap); - sdoc.init(); EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); @@ -69,16 +66,10 @@ DocumentTest::testStorageDocument() EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString()); EXPECT_EQUAL(std::string("quux"), sdoc.getField(2)->getAsString()); - // reset cached field values - sdoc.init(); - EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); - EXPECT_TRUE(sdoc.getField(2) == NULL); - EXPECT_TRUE(!sdoc.setField(3, FieldValue::UP(new StringFieldValue("thud")))); SharedFieldPathMap fim; - StorageDocument s2(fim); + StorageDocument s2(std::make_unique(), fim, 0); EXPECT_EQUAL(vespalib::string("null::"), s2.docDoc().getId().toString()); } diff --git a/vsm/src/tests/searcher/searcher.cpp b/vsm/src/tests/searcher/searcher.cpp index 28e97f5e726..5bb47b7d2ae 100644 --- a/vsm/src/tests/searcher/searcher.cpp +++ b/vsm/src/tests/searcher/searcher.cpp @@ -300,9 +300,7 @@ performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & f // setup document SharedFieldPathMap sfim(new FieldPathMapT()); sfim->push_back(FieldPath()); - StorageDocument doc(sfim); - doc.setFieldCount(1); - doc.init(); + StorageDocument doc(std::make_unique(), sfim, 1); doc.setField(0, document::FieldValue::UP(fv.clone())); fs.search(doc); diff --git a/vsm/src/vespa/vsm/common/CMakeLists.txt b/vsm/src/vespa/vsm/common/CMakeLists.txt index 7f3618ac12c..9be6703eba9 100644 --- a/vsm/src/vespa/vsm/common/CMakeLists.txt +++ b/vsm/src/vespa/vsm/common/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_add_library(vsm_vsmcommon OBJECT SOURCES charbuffer.cpp - docsum.cpp document.cpp documenttypemapping.cpp fieldmodifier.cpp diff --git a/vsm/src/vespa/vsm/common/docsum.cpp b/vsm/src/vespa/vsm/common/docsum.cpp deleted file mode 100644 index 3d32f30aa87..00000000000 --- a/vsm/src/vespa/vsm/common/docsum.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "docsum.h" -#include -#include -#include -#include -#include - -#define DEBUGMASK 0x00 - -using search::DocumentIdT; - -namespace vsm { - -using document::FieldValue; -using document::StringFieldValue; - -IMPLEMENT_DUPLICATE(DocSumCache); - -DocSumCache::DocSumCache() : - _list() -{ } - -DocSumCache::~DocSumCache() { } - -const Document & DocSumCache::getDocSum(const DocumentIdT & docId) const -{ - DocSumCacheT::const_iterator found = _list.find(docId); - return *found->second; -} - -void DocSumCache::push_back(const Document::SP & docSum) -{ - _list[docSum->getDocId()] = docSum; -} - -void DocSumCache::insert(const DocSumCache & dc) -{ - for (DocSumCacheT::const_iterator itr = dc._list.begin(); itr != dc._list.end(); ++itr) { - if (_list.find(itr->first) == _list.end()) { - _list[itr->first] = itr->second; - } - } -} - -} diff --git a/vsm/src/vespa/vsm/common/docsum.h b/vsm/src/vespa/vsm/common/docsum.h index bde2625f7a8..9bfaf7eac05 100644 --- a/vsm/src/vespa/vsm/common/docsum.h +++ b/vsm/src/vespa/vsm/common/docsum.h @@ -1,11 +1,9 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include -#include +#include "document.h" -namespace vsm -{ +namespace vsm { /** Will represent a cache of the document summaries. -> Actual docsums will be @@ -20,20 +18,5 @@ class IDocSumCache virtual ~IDocSumCache() { } }; -class DocSumCache : public search::Object, public IDocSumCache -{ - public: - typedef vespalib::hash_map DocSumCacheT; - DUPLICATE(DocSumCache); - DocSumCache(); - virtual ~DocSumCache(); - virtual const Document & getDocSum(const search::DocumentIdT & docId) const; - void push_back(const Document::SP & doc); - void insert(const DocSumCache & dc); - const DocSumCacheT & cache() const { return _list; } - private: - DocSumCacheT _list; -}; - } diff --git a/vsm/src/vespa/vsm/common/document.cpp b/vsm/src/vespa/vsm/common/document.cpp index cc7414b8107..94aa1a0909b 100644 --- a/vsm/src/vespa/vsm/common/document.cpp +++ b/vsm/src/vespa/vsm/common/document.cpp @@ -57,24 +57,14 @@ FieldIdT StringFieldIdTMap::fieldNo(const vespalib::string & fName) const size_t StringFieldIdTMap::highestFieldNo() const { size_t maxFNo(0); - for (StringFieldIdTMapT::const_iterator it = _map.begin(), mt = _map.end(); it != mt; it++) { - if (it->second >= maxFNo) { - maxFNo = it->second + 1; + for (const auto & field : _map) { + if (field.second >= maxFNo) { + maxFNo = field.second + 1; } } return maxFNo; } -Document::Document(const DocumentIdT & doc, size_t maxField) : - _docId(doc), - _fieldCount(maxField) -{ } - -Document::Document() : - _docId(0), - _fieldCount(0) -{ } - Document::~Document() { } } diff --git a/vsm/src/vespa/vsm/common/document.h b/vsm/src/vespa/vsm/common/document.h index b4fc7c86d8e..a0341aaf7c0 100644 --- a/vsm/src/vespa/vsm/common/document.h +++ b/vsm/src/vespa/vsm/common/document.h @@ -47,14 +47,12 @@ typedef vespalib::stringref FieldRef; class Document { public: - typedef std::shared_ptr SP; - Document(); - Document(const search::DocumentIdT & doc, size_t maxFieldCount); + Document(size_t maxFieldCount) : _docId(0), _fieldCount(maxFieldCount) { } + Document(search::DocumentIdT doc, size_t maxFieldCount) : _docId(doc), _fieldCount(maxFieldCount) { } virtual ~Document(); const search::DocumentIdT & getDocId() const { return _docId; } size_t getFieldCount() const { return _fieldCount; } void setDocId(const search::DocumentIdT & v) { _docId = v; } - void setFieldCount(size_t v) { _fieldCount = v; } virtual const document::FieldValue * getField(FieldIdT fId) const = 0; /** Returns true, if not possible to set. @@ -62,7 +60,7 @@ class Document virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) = 0; private: search::DocumentIdT _docId; - size_t _fieldCount; + const size_t _fieldCount; }; } diff --git a/vsm/src/vespa/vsm/common/storagedocument.cpp b/vsm/src/vespa/vsm/common/storagedocument.cpp index 4963e1661f0..1cb636a80c7 100644 --- a/vsm/src/vespa/vsm/common/storagedocument.cpp +++ b/vsm/src/vespa/vsm/common/storagedocument.cpp @@ -2,51 +2,29 @@ #include "storagedocument.h" #include #include -#include #include LOG_SETUP(".vsm.storagedocument"); namespace vsm { -StorageDocument::StorageDocument(const SharedFieldPathMap & fim) : - Document(), - _doc(new document::Document()), +StorageDocument::StorageDocument(document::Document::UP doc, const SharedFieldPathMap & fim, size_t fieldNoLimit) : + Document(fieldNoLimit), + _doc(std::move(doc)), _fieldMap(fim), - _cachedFields(), - _backedFields() -{ } - -StorageDocument::StorageDocument(document::Document::UP doc) : - Document(), - _doc(doc.release()), - _fieldMap(), - _cachedFields(), + _cachedFields(getFieldCount()), _backedFields() { } StorageDocument::~StorageDocument() { } -void StorageDocument::init() -{ - _cachedFields.clear(); - _cachedFields.resize(getFieldCount()); -} - namespace { FieldPath _emptyFieldPath; StorageDocument::SubDocument _empySubDocument(NULL, _emptyFieldPath.begin(), _emptyFieldPath.end()); } -void StorageDocument::SubDocument::swap(SubDocument & rhs) -{ - std::swap(_fieldValue, rhs._fieldValue); - std::swap(_it, rhs._it); - std::swap(_mt, rhs._mt); -} - - -const StorageDocument::SubDocument & StorageDocument::getComplexField(FieldIdT fId) const +const StorageDocument::SubDocument & +StorageDocument::getComplexField(FieldIdT fId) const { if (_cachedFields[fId].getFieldValue() == NULL) { const FieldPath & fp = (*_fieldMap)[fId]; @@ -68,7 +46,7 @@ const StorageDocument::SubDocument & StorageDocument::getComplexField(FieldIdT f return _cachedFields[fId]; } -void StorageDocument::saveCachedFields() +void StorageDocument::saveCachedFields() const { size_t m(_cachedFields.size()); _backedFields.reserve(m); @@ -80,7 +58,8 @@ void StorageDocument::saveCachedFields() } } -const document::FieldValue * StorageDocument::getField(FieldIdT fId) const +const document::FieldValue * +StorageDocument::getField(FieldIdT fId) const { return getComplexField(fId).getFieldValue(); } diff --git a/vsm/src/vespa/vsm/common/storagedocument.h b/vsm/src/vespa/vsm/common/storagedocument.h index cf0638cb3c3..3c1f2ddc375 100644 --- a/vsm/src/vespa/vsm/common/storagedocument.h +++ b/vsm/src/vespa/vsm/common/storagedocument.h @@ -11,49 +11,50 @@ typedef document::FieldPath FieldPath; // field path to navigate a field value typedef std::vector FieldPathMapT; // map from field id to field path typedef std::shared_ptr SharedFieldPathMap; -class StorageDocument : public Document -{ - public: - typedef vespalib::LinkedPtr SP; - class SubDocument { - public: - SubDocument() : - _fieldValue(NULL) - { } - SubDocument(document::FieldValue * fv, FieldPath::const_iterator it, FieldPath::const_iterator mt) : - _fieldValue(fv), - _it(it), - _mt(mt) - { } - const document::FieldValue * getFieldValue() const { return _fieldValue; } - void setFieldValue(document::FieldValue * fv) { _fieldValue = fv; } - FieldPath::const_iterator begin() const { return _it; } - FieldPath::const_iterator end() const { return _mt; } - void swap(SubDocument & rhs); - private: - document::FieldValue * _fieldValue; - FieldPath::const_iterator _it; - FieldPath::const_iterator _mt; - }; - StorageDocument(const SharedFieldPathMap & fim); - StorageDocument(const document::Document & doc); - StorageDocument(document::Document::UP doc); - virtual ~StorageDocument(); - void init(); - const document::Document & docDoc() const { return *_doc; } - void fieldPathMap(const SharedFieldPathMap & fim) { _fieldMap = fim; } - const SharedFieldPathMap & fieldPathMap() const { return _fieldMap; } - bool valid() const { return _doc.get() != NULL; } - const SubDocument & getComplexField(FieldIdT fId) const; - virtual const document::FieldValue * getField(FieldIdT fId) const; - virtual bool setField(FieldIdT fId, document::FieldValue::UP fv); - void saveCachedFields(); - private: - typedef vespalib::CloneablePtr DocumentContainer; - DocumentContainer _doc; - SharedFieldPathMap _fieldMap; - mutable std::vector _cachedFields; - mutable std::vector _backedFields; +class StorageDocument : public Document { +public: + typedef vespalib::LinkedPtr LP; + + class SubDocument { + public: + SubDocument() : _fieldValue(NULL) {} + SubDocument(document::FieldValue *fv, FieldPath::const_iterator it, FieldPath::const_iterator mt) : + _fieldValue(fv), + _it(it), + _mt(mt) + { } + + const document::FieldValue *getFieldValue() const { return _fieldValue; } + void setFieldValue(document::FieldValue *fv) { _fieldValue = fv; } + FieldPath::const_iterator begin() const { return _it; } + FieldPath::const_iterator end() const { return _mt; } + void swap(SubDocument &rhs) { + std::swap(_fieldValue, rhs._fieldValue); + std::swap(_it, rhs._it); + std::swap(_mt, rhs._mt); + } + private: + document::FieldValue *_fieldValue; + FieldPath::const_iterator _it; + FieldPath::const_iterator _mt; + }; +public: + StorageDocument(document::Document::UP doc, const SharedFieldPathMap &fim, size_t fieldNoLimit); + StorageDocument(const StorageDocument &) = delete; + StorageDocument & operator = (const StorageDocument &) = delete; + ~StorageDocument(); + + const document::Document &docDoc() const { return *_doc; } + bool valid() const { return _doc.get() != NULL; } + const SubDocument &getComplexField(FieldIdT fId) const; + const document::FieldValue *getField(FieldIdT fId) const override; + bool setField(FieldIdT fId, document::FieldValue::UP fv) override ; + void saveCachedFields() const; +private: + document::Document::UP _doc; + SharedFieldPathMap _fieldMap; + mutable std::vector _cachedFields; + mutable std::vector _backedFields; }; }