diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index ddaedf3..f83ffcb 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -47,11 +47,11 @@ object Main extends IOApp { filePaths => for { // Schema for validations - schema <- TestData.mkSchemaShexIO() + schema <- TestData.mkSchemaShaclIO() // Trigger for validations - trigger = TestData.mkTriggerShex() + trigger = TestData.mkTriggerShacl // Validator settings - validatorConfiguration = ValidatorConfiguration(schema, trigger, haltOnErrored = true) + validatorConfiguration = ValidatorConfiguration(schema, trigger, haltOnInvalid = true, haltOnErrored = true) // RDF extractors: all types ready to be tested // - List extractor listExtractor = ListExtractor( @@ -78,7 +78,7 @@ object Main extends IOApp { itemTimeout = None) // Validator instance - validator = Validator(validatorConfiguration, fileExtractor) + validator = new Validator(validatorConfiguration, listExtractor) // Open validation stream app <- validator.validate // Init // .delayBy(10.minute) diff --git a/src/main/scala/validation/Validator.scala b/src/main/scala/validation/Validator.scala index 6607969..0c1fe79 100644 --- a/src/main/scala/validation/Validator.scala +++ b/src/main/scala/validation/Validator.scala @@ -48,9 +48,8 @@ import org.apache.kafka.common.KafkaException * such cases * */ -case class Validator[A](configuration: ValidatorConfiguration, - private val extractor: StreamExtractor[A]) { - // Shorthands for useful information +class Validator[A](configuration: ValidatorConfiguration, + private val extractor: StreamExtractor[A]) { /** * Schema against which this validator's data is validated @@ -65,7 +64,7 @@ case class Validator[A](configuration: ValidatorConfiguration, */ val validationTrigger: ValidationTrigger = configuration.trigger - // Expose useful extractor information + // Shorthands for useful information /** * Source from which this validator's data arrives @@ -77,11 +76,21 @@ case class Validator[A](configuration: ValidatorConfiguration, */ val dataFormat: DataFormat = extractor.format + // Expose useful extractor information + /** * Data inference applied by this this validator */ val dataInference: InferenceEngine = extractor.inference + /** + * Alternative constructor, automatically build the validator configuration + * given a schema and a trigger + */ + def this(extractor: StreamExtractor[A], schema: Schema, trigger: ValidationTrigger) = { + this(ValidatorConfiguration(schema, trigger), extractor) + } + /** * Main method of the validator, produces a Stream of validation results * from input data, fetching and processing items as specified in @@ -197,7 +206,7 @@ case class Validator[A](configuration: ValidatorConfiguration, /** * Helper utilities for all Validators */ -object Validator { +private[comet] object Validator { /** * Auxiliary messages emitted by the validator diff --git a/src/test/scala/utils/Samples.scala b/src/test/scala/utils/Samples.scala index 9a2ed53..e273d00 100644 --- a/src/test/scala/utils/Samples.scala +++ b/src/test/scala/utils/Samples.scala @@ -88,6 +88,8 @@ object Samples { * @param format Format of the output RDF string * @param min Minimum temperature of the item * @param max Maximum temperature of the items + * @param valid Whether the produced item should comply with the schemas + * generated for testing or not * @return A String containing RDF data with the format and contents * specified by the user * @@ -97,9 +99,11 @@ object Samples { */ def mkRdfItem(format: DataFormat = TURTLE, min: Double = minValidTemperature, - max: Double = maxValidTemperature): String = { + max: Double = maxValidTemperature, + valid: Boolean = true): String = { val dateFormatted = dateFormatter.format(new Date()) - val temperature = Random.between(min, max) + // Force a too low temperature to make items invalid if necessary + val temperature = if (valid) Random.between(min, max) else minValidTemperature - 1 // Format with US locale to have dots, not commas val temperatureFormatted = String.format(Locale.US, "%.2f", temperature) @@ -112,8 +116,8 @@ object Samples { | xmlns:ex="http://example.org/" | xmlns:xsd="http://www.w3.org/2001/XMLSchema#"> | - | $dateFormatted + | $dateFormatted | $temperatureFormatted | OK @@ -126,12 +130,12 @@ object Samples { |{ | "@id" : "ex:reading", | "@type" : "ex:sensorReading", - | "readingDate" : "$dateFormatted", + | "readingDateTime" : "$dateFormatted", | "readingTemperature" : "$temperatureFormatted", | "status" : "OK", | "@context" : { - | "readingDate" : { - | "@id" : "http://example.org/readingDate", + | "readingDateTime" : { + | "@id" : "http://example.org/readingDateTime", | "@type" : "http://www.w3.org/2001/XMLSchema#dateTime" | }, | "readingTemperature" : { @@ -155,7 +159,7 @@ object Samples { |@prefix ex: . | |ex:reading a ex:sensorReading ; - | ex:readingDatetime "$dateFormatted"^^xsd:dateTime ; + | ex:readingDateTime "$dateFormatted"^^xsd:dateTime ; | ex:readingTemperature "$temperatureFormatted"^^xsd:decimal ; | ex:status "OK" . |""".stripMargin.strip @@ -188,7 +192,7 @@ object Samples { | |# Filters of a valid sensor reading |ex:ValidReading { - | ex:readingDatetime xsd:dateTime ; # Has a VALID timestamp + | ex:readingDateTime xsd:dateTime ; # Has a VALID timestamp | ex:readingTemperature xsd:decimal MININCLUSIVE $minValidTemperature MAXINCLUSIVE $maxValidTemperature + ; # 1+ readings in range 18-20 | ex:status [ "OK" "RUNNING" ] # Status must be one of |} @@ -210,7 +214,7 @@ object Samples { | "expressions" : [ | { | "type" : "TripleConstraint", - | "predicate" : "http://example.org/readingDate", + | "predicate" : "http://example.org/readingDateTime", | "valueExpr" : { | "type" : "NodeConstraint", | "datatype" : "http://www.w3.org/2001/XMLSchema#dateTime" @@ -290,7 +294,7 @@ object Samples { | | | - | + | | | | @@ -313,7 +317,7 @@ object Samples { case JSONLD => f""" |{ - | "@graph" : [ {" + | "@graph" : [ { | "@id" : "_:b0", | "@type" : "sh:PropertyShape", | "datatype" : "xsd:decimal", @@ -332,7 +336,7 @@ object Samples { | "@id" : "_:b2", | "@type" : "sh:PropertyShape", | "datatype" : "xsd:dateTime", - | "path" : "ex:readingDate" + | "path" : "ex:readingDateTime" | }, { | "@id" : "ex:ValidReading", | "@type" : "sh:NodeShape", @@ -348,7 +352,7 @@ object Samples { | "@id" : "http://www.w3.org/ns/shacl#maxInclusive", | "@type" : "http://www.w3.org/2001/XMLSchema#decimal" | }, - | "miSHaclEXnInclusive" : { + | "minInclusive" : { | "@id" : "http://www.w3.org/ns/shacl#minInclusive", | "@type" : "http://www.w3.org/2001/XMLSchema#decimal" | }, @@ -395,14 +399,14 @@ object Samples { |ex:ValidReading a sh:NodeShape ; | sh:targetClass ex:sensorReading ; | sh:property [ - | sh:path ex:readingDate ; + | sh:path ex:readingDateTime ; | sh:datatype xsd:dateTime ; | ] ; | sh:property [ | sh:path ex:readingTemperature ; | sh:datatype xsd:decimal ; - | sh:minInclusive 18.00; - | sh:maxInclusive 20.00 ; + | sh:minInclusive $minValidTemperature; + | sh:maxInclusive $maxValidTemperature ; | sh:minCount 1 ; # 1+ readings | ] ; | sh:property [ @@ -413,7 +417,7 @@ object Samples { | ] . |""".stripMargin.strip } - Schemas.fromString(schemaText, format.name, Schemas.shEx.name) + Schemas.fromString(schemaText, format.name, Schemas.shaclex.name) } } diff --git a/src/test/scala/validation/ouputs/DummyTest.scala b/src/test/scala/validation/ouputs/DummyTest.scala deleted file mode 100644 index 28ce2e4..0000000 --- a/src/test/scala/validation/ouputs/DummyTest.scala +++ /dev/null @@ -1,15 +0,0 @@ -package org.ragna.comet -package validation.ouputs - -import cats.effect.IO -import cats.effect.testing.scalatest.AsyncIOSpec -import org.scalatest.freespec.AsyncFreeSpec -import org.scalatest.matchers.should.Matchers - -class DummyTest extends AsyncFreeSpec with AsyncIOSpec with Matchers { - "My Code " - { - "works" in { - IO(1).asserting(_ shouldBe 1) - } - } -} diff --git a/src/test/scala/validation/ouputs/SchemaTests.scala b/src/test/scala/validation/ouputs/SchemaTests.scala new file mode 100644 index 0000000..50541af --- /dev/null +++ b/src/test/scala/validation/ouputs/SchemaTests.scala @@ -0,0 +1,247 @@ +package org.ragna.comet +package validation.ouputs + +import data.DataFormat +import data.DataFormat.* +import implicits.RDFElementImplicits.rdfFromString +import schema.ShExSchemaFormat +import schema.ShExSchemaFormat.* +import stream.extractors.StreamExtractor +import stream.extractors.list.ListExtractor +import trigger.ShapeMapFormat.* +import trigger.TriggerModeType.{SHAPEMAP, TARGET_DECLARATIONS} +import trigger.{ShapeMapFormat, TriggerModeType, ValidationTrigger} +import utils.Samples +import validation.Validator +import validation.configuration.ValidatorConfiguration +import validation.result.ResultStatus.* +import validation.result.ValidationResult + +import cats.effect.IO +import cats.effect.testing.scalatest.AsyncIOSpec +import es.weso.schema.Schema +import org.scalatest.freespec.AsyncFreeSpec +import org.scalatest.matchers.should.Matchers + +/** + * Test suite checking that the validation mechanism works when using either + * ShEx or SHACL schemas + * + * The testing goes as follows: + * + * - The same RDF data example will be validated against a given schema, + * though slightly modified to test both valid and invalid validations + * + * - Single RDF items will be tested as a single item stream + * fed to a stream-validator to check that the results are the expected ones + * + * - The List extractor will be used for testing, since it is the simplest way + * we have to test in-memory data + * + * Tests are nested as follows to cover all possibilities: + * + * - Per Schema type/engine (ShEx, SHACL, etc.) + * - Per Schema syntax + * - Per expected validation result (valid or invalid) + * - Per input RDF data format (Turtle, JSON-LD, RDF/XML...) + * + * @note The testing of the validation mechanism could be considered redundant, + * since we should be able to trust SHaclEX as a validation library + * + * Still, + * it is unstable and its better to double check the SHaclEX validator in + * our streaming context + */ +//noinspection RedundantDefaultArgument +class SchemaTests extends AsyncFreeSpec with AsyncIOSpec with Matchers { + + /** + * Shortcut for generating single-item results through comet's stream validators, + * which is the main logic required for these tests + * + * Given the details of the validation (input data/schema format, wanted result...) + * this creates a validation stream that complies with it and gets its eventual + * result back + * + * @param rdfFormat Format of the RDF item that we want to get through validation + * @param schemaFormat Format of the Schema to be used for validation + * @param valid Whether if the produced RDF item should throw a VALID validation + * result or not + * @return The [[ValidationResult]] of getting an RDF item (of format [[rdfFormat]]) + * through a validator using a Schema (of format/engine [[schemaFormat]]) + * following the data and schema templates in [[Samples]] + */ + private def mkSingleValidationResult(rdfFormat: DataFormat, + schemaFormat: DataFormat | ShExSchemaFormat, + valid: Boolean = true): IO[ValidationResult] = { + for { + // Make the schema + schema <- schemaFormat match { + case df: DataFormat => Samples.SchemaSamples.mkSchemaShaclIO(df) + case sf: ShExSchemaFormat => Samples.SchemaSamples.mkSchemaShExIO(sf) + } + // Make the validation trigger (inferred from schema type) + trigger = schemaFormat match { + case _: DataFormat => Samples.TriggerSamples.mkTriggerShacl + case _: ShExSchemaFormat => Samples.TriggerSamples.mkTriggerShex(COMPACT) + } + // Make the RDF item and get it into a list extractor + rdfItem = Samples.RdfSamples.mkRdfItem(rdfFormat, valid = valid) + extractor = ListExtractor(items = List(rdfItem), format = rdfFormat) + + // Open validation stream and collect the validation results + validator = Validator(extractor, schema, trigger) + results: List[ValidationResult] <- validator.validate.compile.toList + _ <- if (!valid) IO.println(results.head) else IO.unit + } yield results.head + } + + "ShEx schemas" - { + "using ShExC syntax" - { + "validate VALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(TURTLE, SHEXC, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(JSONLD, SHEXC, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(RDFXML, SHEXC, valid = true) + .asserting(_.status shouldBe VALID) + } + } + + "do not validate INVALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(TURTLE, SHEXC, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(JSONLD, SHEXC, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(RDFXML, SHEXC, valid = false) + .asserting(_.status shouldBe INVALID) + } + } + } + + } + + "SHACL schemas" - { + "using TURTLE syntax" - { + "validate VALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = TURTLE, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = TURTLE, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = TURTLE, valid = true) + .asserting(_.status shouldBe VALID) + } + } + + "do not validate INVALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = TURTLE, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = TURTLE, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = TURTLE, valid = false) + .asserting(_.status shouldBe INVALID) + } + } + } + + "using JSON-LD syntax" - { + "validate VALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = JSONLD, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = JSONLD, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = JSONLD, valid = true) + .asserting(_.status shouldBe VALID) + } + } + + "do not validate INVALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = JSONLD, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = JSONLD, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = JSONLD, valid = false) + .asserting(_.status shouldBe INVALID) + } + } + } + + "using RDF/XML syntax" - { + "validate VALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = RDFXML, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = RDFXML, valid = true) + .asserting(_.status shouldBe VALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = RDFXML, valid = true) + .asserting(_.status shouldBe VALID) + } + } + + "do not validate INVALID RDF data" - { + "in Turtle format" in { + mkSingleValidationResult(rdfFormat = TURTLE, schemaFormat = RDFXML, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in JSON-LD format" in { + mkSingleValidationResult(rdfFormat = JSONLD, schemaFormat = RDFXML, valid = false) + .asserting(_.status shouldBe INVALID) + } + + "in RDF/XML format" in { + mkSingleValidationResult(rdfFormat = RDFXML, schemaFormat = RDFXML, valid = false) + .asserting(_.status shouldBe INVALID) + } + } + } + } +}