Skip to content

Commit

Permalink
Merge pull request #14 from wmde/monotext
Browse files Browse the repository at this point in the history
Monotext
  • Loading branch information
addshore committed Jan 26, 2016
2 parents dfaba44 + ee46b66 commit 64161fe
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 0 deletions.
@@ -0,0 +1,75 @@
package main.java.org.wikidata.analyzer.Processor;

import org.wikidata.wdtk.datamodel.interfaces.*;

import java.util.*;

/**
* MonolingualTextProcessor for wikidata-analysis
*
* Counts the languages used in monolingual text value snaks
*
* @author Addshore
*/
public class MonolingualTextProcessor implements EntityDocumentProcessor {

private Map<String, Long> counters;

public MonolingualTextProcessor(Map<String, Long> counters) {
this.counters = counters;
}

private void increment(String counter) {
this.increment(counter, 1);
}

private void increment(String counter, int quantity) {
this.initiateCounterIfNotReady(counter);
this.counters.put(counter, this.counters.get(counter) + (long) quantity);
}

private void initiateCounterIfNotReady(String counter) {
if (!this.counters.containsKey(counter)) {
this.counters.put(counter, (long) 0);
}
}

@Override
public void processItemDocument(ItemDocument item) {
for (Iterator<Statement> statements = item.getAllStatements(); statements.hasNext(); ) {
this.processStatement( statements.next() );
}
}

@Override
public void processPropertyDocument(PropertyDocument property) {
for (Iterator<Statement> statements = property.getAllStatements(); statements.hasNext(); ) {
this.processStatement( statements.next() );
}
}

private void processStatement( Statement statement ) {
this.processSnak(statement.getClaim().getMainSnak());
for (Iterator<Snak> qualifierSnaks = statement.getClaim().getAllQualifiers(); qualifierSnaks.hasNext(); ) {
Snak qualifierSnak = qualifierSnaks.next();
this.processSnak(qualifierSnak);
}
for (Reference reference : statement.getReferences()) {
for (Iterator<Snak> referenceSnaks = reference.getAllSnaks(); referenceSnaks.hasNext(); ) {
Snak referenceSnak = referenceSnaks.next();
this.processSnak(referenceSnak);
}
}
}

private void processSnak( Snak snak ) {
if (snak instanceof ValueSnak) {
Value value = ((ValueSnak) snak).getValue();
if (value instanceof MonolingualTextValue) {
MonolingualTextValue textValue = (MonolingualTextValue) value;
this.increment( textValue.getLanguageCode() );
}
}
}

}
Expand Up @@ -130,6 +130,12 @@ public void run() throws IOException {
controller.registerEntityDocumentProcessor(new MetricProcessor(metricsCounters), null, true);
}

// MonolingualText
Map<String, Long> monotextCounters = new HashMap<>();
if (processors.contains("MonolingualText")) {
controller.registerEntityDocumentProcessor(new MonolingualTextProcessor(monotextCounters), null, true);
}

// Map
JSONObject mapGeoData = new JSONObject();
JSONObject mapGraphData = new JSONObject();
Expand Down Expand Up @@ -175,6 +181,14 @@ public void run() throws IOException {
metricsJsonWriter.close();
}

// MonolingualText
if (processors.contains("MonolingualText")) {
File monotextJsonFile = new File(outputDir.getAbsolutePath() + File.separator + "monotext.json");
BufferedWriter monotextJsonWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(monotextJsonFile)));
new JSONObject(monotextCounters).writeJSONString(monotextJsonWriter);
monotextJsonWriter.close();
}

// Map
if (processors.contains("Map")) {
System.out.println("Writing map wdlabel.json");
Expand Down
@@ -0,0 +1,69 @@
package test.java.org.wikidata.analyzer.Processor;

import junit.framework.TestCase;
import main.java.org.wikidata.analyzer.Processor.MonolingualTextProcessor;
import org.wikidata.wdtk.datamodel.helpers.*;
import org.wikidata.wdtk.datamodel.implementation.ItemIdValueImpl;
import org.wikidata.wdtk.datamodel.implementation.PropertyIdValueImpl;
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;

import java.util.HashMap;
import java.util.Map;

/**
* @author Addshore
*/
public class MonolingualTextProcessorTest extends TestCase {

private void assertCounter( Map<String, Long> counters, String counter, int expected ) {
assertTrue( "Assert counter name exists '" + counter + "'", counters.containsKey( counter ) );
assertEquals( "Assert counter '" + counter + "'value correct", (long)expected, (long)counters.get( counter ) );
}

public void testProcessItemDocument() throws Exception {
Map<String, Long> counters = new HashMap<>();
MonolingualTextProcessor processor = new MonolingualTextProcessor(counters);

ItemIdValue itemId = ItemIdValueImpl.create("Q42", "foo");
ItemDocument itemDocument = ItemDocumentBuilder.forItemId(itemId)
.withStatement(
StatementBuilder
.forSubjectAndProperty(itemId, PropertyIdValueImpl.create("P1", "bar"))
.withValue(Datamodel.makeMonolingualTextValue("text", "en"))
.withQualifier(Datamodel.makeValueSnak(PropertyIdValueImpl.create("P1", "bar"), Datamodel.makeMonolingualTextValue("text", "de")))
.withQualifier(Datamodel.makeValueSnak(PropertyIdValueImpl.create("P1", "bar"), Datamodel.makeMonolingualTextValue("text", "fr")))
.withReference(ReferenceBuilder.newInstance().withPropertyValue(
PropertyIdValueImpl.create("P2", "Foo"),
Datamodel.makeMonolingualTextValue("text", "fr")
).build())
.withReference(ReferenceBuilder.newInstance().withPropertyValue(
PropertyIdValueImpl.create("P2", "Foo"),
Datamodel.makeMonolingualTextValue("text", "pt")
).build())
.build()
)
.build();

PropertyIdValue propertyId = PropertyIdValueImpl.create("P42", "foo");
PropertyDocument propertyDocument = PropertyDocumentBuilder.forPropertyIdAndDatatype(propertyId, "foo")
.withStatement(
StatementBuilder
.forSubjectAndProperty(propertyId, PropertyIdValueImpl.create("P1", "bar"))
.withValue(Datamodel.makeMonolingualTextValue("text", "pt"))
.build()
)
.build();

processor.processItemDocument( itemDocument );
processor.processPropertyDocument( propertyDocument );

this.assertCounter(counters, "en", 1);
this.assertCounter(counters, "de", 1);
this.assertCounter(counters, "fr", 2 );
this.assertCounter(counters, "pt", 2 );
}

}

0 comments on commit 64161fe

Please sign in to comment.