Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from wmde/monotext
Monotext
- Loading branch information
Showing
3 changed files
with
158 additions
and
0 deletions.
There are no files selected for viewing
75 changes: 75 additions & 0 deletions
75
java/analyzer/src/main/java/org/wikidata/analyzer/Processor/MonolingualTextProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package main.java.org.wikidata.analyzer.Processor; | ||
|
||
import org.wikidata.wdtk.datamodel.interfaces.*; | ||
|
||
import java.util.*; | ||
|
||
/** | ||
* MonolingualTextProcessor for wikidata-analysis | ||
* | ||
* Counts the languages used in monolingual text value snaks | ||
* | ||
* @author Addshore | ||
*/ | ||
public class MonolingualTextProcessor implements EntityDocumentProcessor { | ||
|
||
private Map<String, Long> counters; | ||
|
||
public MonolingualTextProcessor(Map<String, Long> counters) { | ||
this.counters = counters; | ||
} | ||
|
||
private void increment(String counter) { | ||
this.increment(counter, 1); | ||
} | ||
|
||
private void increment(String counter, int quantity) { | ||
this.initiateCounterIfNotReady(counter); | ||
this.counters.put(counter, this.counters.get(counter) + (long) quantity); | ||
} | ||
|
||
private void initiateCounterIfNotReady(String counter) { | ||
if (!this.counters.containsKey(counter)) { | ||
this.counters.put(counter, (long) 0); | ||
} | ||
} | ||
|
||
@Override | ||
public void processItemDocument(ItemDocument item) { | ||
for (Iterator<Statement> statements = item.getAllStatements(); statements.hasNext(); ) { | ||
this.processStatement( statements.next() ); | ||
} | ||
} | ||
|
||
@Override | ||
public void processPropertyDocument(PropertyDocument property) { | ||
for (Iterator<Statement> statements = property.getAllStatements(); statements.hasNext(); ) { | ||
this.processStatement( statements.next() ); | ||
} | ||
} | ||
|
||
private void processStatement( Statement statement ) { | ||
this.processSnak(statement.getClaim().getMainSnak()); | ||
for (Iterator<Snak> qualifierSnaks = statement.getClaim().getAllQualifiers(); qualifierSnaks.hasNext(); ) { | ||
Snak qualifierSnak = qualifierSnaks.next(); | ||
this.processSnak(qualifierSnak); | ||
} | ||
for (Reference reference : statement.getReferences()) { | ||
for (Iterator<Snak> referenceSnaks = reference.getAllSnaks(); referenceSnaks.hasNext(); ) { | ||
Snak referenceSnak = referenceSnaks.next(); | ||
this.processSnak(referenceSnak); | ||
} | ||
} | ||
} | ||
|
||
private void processSnak( Snak snak ) { | ||
if (snak instanceof ValueSnak) { | ||
Value value = ((ValueSnak) snak).getValue(); | ||
if (value instanceof MonolingualTextValue) { | ||
MonolingualTextValue textValue = (MonolingualTextValue) value; | ||
this.increment( textValue.getLanguageCode() ); | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
.../analyzer/src/test/java/org/wikidata/analyzer/Processor/MonolingualTextProcessorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package test.java.org.wikidata.analyzer.Processor; | ||
|
||
import junit.framework.TestCase; | ||
import main.java.org.wikidata.analyzer.Processor.MonolingualTextProcessor; | ||
import org.wikidata.wdtk.datamodel.helpers.*; | ||
import org.wikidata.wdtk.datamodel.implementation.ItemIdValueImpl; | ||
import org.wikidata.wdtk.datamodel.implementation.PropertyIdValueImpl; | ||
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; | ||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; | ||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; | ||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author Addshore | ||
*/ | ||
public class MonolingualTextProcessorTest extends TestCase { | ||
|
||
private void assertCounter( Map<String, Long> counters, String counter, int expected ) { | ||
assertTrue( "Assert counter name exists '" + counter + "'", counters.containsKey( counter ) ); | ||
assertEquals( "Assert counter '" + counter + "'value correct", (long)expected, (long)counters.get( counter ) ); | ||
} | ||
|
||
public void testProcessItemDocument() throws Exception { | ||
Map<String, Long> counters = new HashMap<>(); | ||
MonolingualTextProcessor processor = new MonolingualTextProcessor(counters); | ||
|
||
ItemIdValue itemId = ItemIdValueImpl.create("Q42", "foo"); | ||
ItemDocument itemDocument = ItemDocumentBuilder.forItemId(itemId) | ||
.withStatement( | ||
StatementBuilder | ||
.forSubjectAndProperty(itemId, PropertyIdValueImpl.create("P1", "bar")) | ||
.withValue(Datamodel.makeMonolingualTextValue("text", "en")) | ||
.withQualifier(Datamodel.makeValueSnak(PropertyIdValueImpl.create("P1", "bar"), Datamodel.makeMonolingualTextValue("text", "de"))) | ||
.withQualifier(Datamodel.makeValueSnak(PropertyIdValueImpl.create("P1", "bar"), Datamodel.makeMonolingualTextValue("text", "fr"))) | ||
.withReference(ReferenceBuilder.newInstance().withPropertyValue( | ||
PropertyIdValueImpl.create("P2", "Foo"), | ||
Datamodel.makeMonolingualTextValue("text", "fr") | ||
).build()) | ||
.withReference(ReferenceBuilder.newInstance().withPropertyValue( | ||
PropertyIdValueImpl.create("P2", "Foo"), | ||
Datamodel.makeMonolingualTextValue("text", "pt") | ||
).build()) | ||
.build() | ||
) | ||
.build(); | ||
|
||
PropertyIdValue propertyId = PropertyIdValueImpl.create("P42", "foo"); | ||
PropertyDocument propertyDocument = PropertyDocumentBuilder.forPropertyIdAndDatatype(propertyId, "foo") | ||
.withStatement( | ||
StatementBuilder | ||
.forSubjectAndProperty(propertyId, PropertyIdValueImpl.create("P1", "bar")) | ||
.withValue(Datamodel.makeMonolingualTextValue("text", "pt")) | ||
.build() | ||
) | ||
.build(); | ||
|
||
processor.processItemDocument( itemDocument ); | ||
processor.processPropertyDocument( propertyDocument ); | ||
|
||
this.assertCounter(counters, "en", 1); | ||
this.assertCounter(counters, "de", 1); | ||
this.assertCounter(counters, "fr", 2 ); | ||
this.assertCounter(counters, "pt", 2 ); | ||
} | ||
|
||
} |