Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cbf72b8
Showing
6 changed files
with
1,378 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
JPMML-Piglet | ||
============ | ||
|
||
A Java UDF for evaluating PMML models on the Apache Pig platform (http://pig.apache.org/). | ||
|
||
# Prerequisites # | ||
|
||
* Apache Pig 0.14.0 or newer. | ||
|
||
# Installation # | ||
|
||
Enter the project root directory and build using [Apache Maven] (http://maven.apache.org/): | ||
``` | ||
mvn clean install | ||
``` | ||
|
||
The build produces an uber-JAR file `target/jpmml-piglet-distributable-1.0-SNAPSHOT.jar`. | ||
|
||
# Usage # | ||
|
||
Add the uber-JAR file to Apache Pig classpath: | ||
``` | ||
REGISTER target/jpmml-piglet-distributable-1.0-SNAPSHOT.jar; | ||
``` | ||
|
||
The following example scores the `src/etc/Iris.csv` CSV file with the `src/etc/DecisionTreeIris.pmml` PMML file. | ||
|
||
Importing data from the CSV file: | ||
``` | ||
iris_input = LOAD 'src/etc/Iris.csv' USING PigStorage(',') | ||
AS (Sepal_Length:double, Sepal_Width:double, Petal_Length:double, Petal_Width:double); | ||
DESCRIBE iris_input; | ||
``` | ||
|
||
Defining a Java UDF for the PMML file: | ||
``` | ||
DEFINE iris_treemodel org.jpmml.piglet.PMMLFunc('src/etc/DecisionTreeIris.pmml'); | ||
``` | ||
|
||
Scoring data using the Java UDF: | ||
``` | ||
iris_prediction = FOREACH iris_input GENERATE iris_treemodel(*); | ||
DESCRIBE iris_prediction; | ||
DUMP iris_prediction; | ||
``` | ||
|
||
# License # | ||
|
||
JPMML-Piglet is dual-licensed under the [GNU Affero General Public License (AGPL) version 3.0] (http://www.gnu.org/licenses/agpl-3.0.html) and a commercial license. | ||
|
||
# Additional information # | ||
|
||
Please contact [info@openscoring.io] (mailto:info@openscoring.io) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
<?xml version="1.0" ?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>org.jpmml</groupId> | ||
<artifactId>jpmml-piglet</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
|
||
<licenses> | ||
<license> | ||
<name>GNU Affero General Public License (AGPL) version 3.0</name> | ||
<url>http://www.gnu.org/licenses/agpl-3.0.html</url> | ||
<distribution>repo</distribution> | ||
</license> | ||
</licenses> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.jpmml</groupId> | ||
<artifactId>pmml-evaluator</artifactId> | ||
<version>1.2.8</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.pig</groupId> | ||
<artifactId>pig</artifactId> | ||
<version>[0.14.0, 0.15.0]</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-core</artifactId> | ||
<version>[1.0.0, 1.2.1]</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.3</version> | ||
<configuration> | ||
<source>1.7</source> | ||
<target>1.7</target> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-enforcer-plugin</artifactId> | ||
<version>1.4.1</version> | ||
<executions> | ||
<execution> | ||
<goals> | ||
<goal>enforce</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
<configuration> | ||
<rules> | ||
<requireJavaVersion> | ||
<version>1.7</version> | ||
</requireJavaVersion> | ||
</rules> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>2.4.2</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
<configuration> | ||
<createDependencyReducedPom>false</createDependencyReducedPom> | ||
<finalName>${artifactId}-distributable-${version}</finalName> | ||
<minimizeJar>true</minimizeJar> | ||
<relocations> | ||
<relocation> | ||
<pattern>com.google.common</pattern> | ||
<shadedPattern>com.google.common19_0</shadedPattern> | ||
</relocation> | ||
</relocations> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
<?xml version="1.0"?> | ||
<PMML version="4.2" xmlns="http://www.dmg.org/PMML-4_2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2/pmml-4-2.xsd"> | ||
<Header copyright="Copyright (c) 2015 vfed" description="RPart Decision Tree Model"> | ||
<Extension name="user" value="vfed" extender="Rattle/PMML"/> | ||
<Application name="Rattle/PMML" version="1.4"/> | ||
<Timestamp>2015-12-14 02:09:09</Timestamp> | ||
</Header> | ||
<DataDictionary numberOfFields="5"> | ||
<DataField name="Species" optype="categorical" dataType="string"> | ||
<Value value="setosa"/> | ||
<Value value="versicolor"/> | ||
<Value value="virginica"/> | ||
</DataField> | ||
<DataField name="Sepal_Length" optype="continuous" dataType="double"> | ||
<Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9"/> | ||
</DataField> | ||
<DataField name="Sepal_Width" optype="continuous" dataType="double"> | ||
<Interval closure="closedClosed" leftMargin="2" rightMargin="4.4"/> | ||
</DataField> | ||
<DataField name="Petal_Length" optype="continuous" dataType="double"> | ||
<Interval closure="closedClosed" leftMargin="1" rightMargin="6.9"/> | ||
</DataField> | ||
<DataField name="Petal_Width" optype="continuous" dataType="double"> | ||
<Interval closure="closedClosed" leftMargin="0.1" rightMargin="2.5"/> | ||
</DataField> | ||
</DataDictionary> | ||
<TreeModel modelName="RPart_Model" functionName="classification" algorithmName="rpart" splitCharacteristic="binarySplit" missingValueStrategy="defaultChild" noTrueChildStrategy="returnLastPrediction"> | ||
<MiningSchema> | ||
<MiningField name="Species" usageType="predicted"/> | ||
<MiningField name="Sepal_Length" usageType="active"/> | ||
<MiningField name="Sepal_Width" usageType="active"/> | ||
<MiningField name="Petal_Length" usageType="active"/> | ||
<MiningField name="Petal_Width" usageType="active"/> | ||
</MiningSchema> | ||
<Output> | ||
<OutputField name="Predicted_Species" optype="categorical" dataType="string" feature="predictedValue"/> | ||
<OutputField name="Probability_setosa" optype="continuous" dataType="double" feature="probability" value="setosa"/> | ||
<OutputField name="Probability_versicolor" optype="continuous" dataType="double" feature="probability" value="versicolor"/> | ||
<OutputField name="Probability_virginica" optype="continuous" dataType="double" feature="probability" value="virginica"/> | ||
</Output> | ||
<Node id="1" score="setosa" recordCount="150" defaultChild="3"> | ||
<True/> | ||
<ScoreDistribution value="setosa" recordCount="50" confidence="0.333333333333333"/> | ||
<ScoreDistribution value="versicolor" recordCount="50" confidence="0.333333333333333"/> | ||
<ScoreDistribution value="virginica" recordCount="50" confidence="0.333333333333333"/> | ||
<Node id="2" score="setosa" recordCount="50"> | ||
<SimplePredicate field="Petal_Length" operator="lessThan" value="2.45"/> | ||
<ScoreDistribution value="setosa" recordCount="50" confidence="1"/> | ||
<ScoreDistribution value="versicolor" recordCount="0" confidence="0"/> | ||
<ScoreDistribution value="virginica" recordCount="0" confidence="0"/> | ||
</Node> | ||
<Node id="3" score="versicolor" recordCount="100" defaultChild="7"> | ||
<SimplePredicate field="Petal_Length" operator="greaterOrEqual" value="2.45"/> | ||
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/> | ||
<ScoreDistribution value="versicolor" recordCount="50" confidence="0.5"/> | ||
<ScoreDistribution value="virginica" recordCount="50" confidence="0.5"/> | ||
<Node id="6" score="versicolor" recordCount="54"> | ||
<SimplePredicate field="Petal_Width" operator="lessThan" value="1.75"/> | ||
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/> | ||
<ScoreDistribution value="versicolor" recordCount="49" confidence="0.907407407407407"/> | ||
<ScoreDistribution value="virginica" recordCount="5" confidence="0.0925925925925926"/> | ||
</Node> | ||
<Node id="7" score="virginica" recordCount="46"> | ||
<SimplePredicate field="Petal_Width" operator="greaterOrEqual" value="1.75"/> | ||
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/> | ||
<ScoreDistribution value="versicolor" recordCount="1" confidence="0.0217391304347826"/> | ||
<ScoreDistribution value="virginica" recordCount="45" confidence="0.978260869565217"/> | ||
</Node> | ||
</Node> | ||
</Node> | ||
</TreeModel> | ||
</PMML> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species | ||
5.1,3.5,1.4,0.2,setosa | ||
4.9,3,1.4,0.2,setosa | ||
4.7,3.2,1.3,0.2,setosa | ||
4.6,3.1,1.5,0.2,setosa | ||
5,3.6,1.4,0.2,setosa | ||
5.4,3.9,1.7,0.4,setosa | ||
4.6,3.4,1.4,0.3,setosa | ||
5,3.4,1.5,0.2,setosa | ||
4.4,2.9,1.4,0.2,setosa | ||
4.9,3.1,1.5,0.1,setosa | ||
5.4,3.7,1.5,0.2,setosa | ||
4.8,3.4,1.6,0.2,setosa | ||
4.8,3,1.4,0.1,setosa | ||
4.3,3,1.1,0.1,setosa | ||
5.8,4,1.2,0.2,setosa | ||
5.7,4.4,1.5,0.4,setosa | ||
5.4,3.9,1.3,0.4,setosa | ||
5.1,3.5,1.4,0.3,setosa | ||
5.7,3.8,1.7,0.3,setosa | ||
5.1,3.8,1.5,0.3,setosa | ||
5.4,3.4,1.7,0.2,setosa | ||
5.1,3.7,1.5,0.4,setosa | ||
4.6,3.6,1,0.2,setosa | ||
5.1,3.3,1.7,0.5,setosa | ||
4.8,3.4,1.9,0.2,setosa | ||
5,3,1.6,0.2,setosa | ||
5,3.4,1.6,0.4,setosa | ||
5.2,3.5,1.5,0.2,setosa | ||
5.2,3.4,1.4,0.2,setosa | ||
4.7,3.2,1.6,0.2,setosa | ||
4.8,3.1,1.6,0.2,setosa | ||
5.4,3.4,1.5,0.4,setosa | ||
5.2,4.1,1.5,0.1,setosa | ||
5.5,4.2,1.4,0.2,setosa | ||
4.9,3.1,1.5,0.2,setosa | ||
5,3.2,1.2,0.2,setosa | ||
5.5,3.5,1.3,0.2,setosa | ||
4.9,3.6,1.4,0.1,setosa | ||
4.4,3,1.3,0.2,setosa | ||
5.1,3.4,1.5,0.2,setosa | ||
5,3.5,1.3,0.3,setosa | ||
4.5,2.3,1.3,0.3,setosa | ||
4.4,3.2,1.3,0.2,setosa | ||
5,3.5,1.6,0.6,setosa | ||
5.1,3.8,1.9,0.4,setosa | ||
4.8,3,1.4,0.3,setosa | ||
5.1,3.8,1.6,0.2,setosa | ||
4.6,3.2,1.4,0.2,setosa | ||
5.3,3.7,1.5,0.2,setosa | ||
5,3.3,1.4,0.2,setosa | ||
7,3.2,4.7,1.4,versicolor | ||
6.4,3.2,4.5,1.5,versicolor | ||
6.9,3.1,4.9,1.5,versicolor | ||
5.5,2.3,4,1.3,versicolor | ||
6.5,2.8,4.6,1.5,versicolor | ||
5.7,2.8,4.5,1.3,versicolor | ||
6.3,3.3,4.7,1.6,versicolor | ||
4.9,2.4,3.3,1,versicolor | ||
6.6,2.9,4.6,1.3,versicolor | ||
5.2,2.7,3.9,1.4,versicolor | ||
5,2,3.5,1,versicolor | ||
5.9,3,4.2,1.5,versicolor | ||
6,2.2,4,1,versicolor | ||
6.1,2.9,4.7,1.4,versicolor | ||
5.6,2.9,3.6,1.3,versicolor | ||
6.7,3.1,4.4,1.4,versicolor | ||
5.6,3,4.5,1.5,versicolor | ||
5.8,2.7,4.1,1,versicolor | ||
6.2,2.2,4.5,1.5,versicolor | ||
5.6,2.5,3.9,1.1,versicolor | ||
5.9,3.2,4.8,1.8,versicolor | ||
6.1,2.8,4,1.3,versicolor | ||
6.3,2.5,4.9,1.5,versicolor | ||
6.1,2.8,4.7,1.2,versicolor | ||
6.4,2.9,4.3,1.3,versicolor | ||
6.6,3,4.4,1.4,versicolor | ||
6.8,2.8,4.8,1.4,versicolor | ||
6.7,3,5,1.7,versicolor | ||
6,2.9,4.5,1.5,versicolor | ||
5.7,2.6,3.5,1,versicolor | ||
5.5,2.4,3.8,1.1,versicolor | ||
5.5,2.4,3.7,1,versicolor | ||
5.8,2.7,3.9,1.2,versicolor | ||
6,2.7,5.1,1.6,versicolor | ||
5.4,3,4.5,1.5,versicolor | ||
6,3.4,4.5,1.6,versicolor | ||
6.7,3.1,4.7,1.5,versicolor | ||
6.3,2.3,4.4,1.3,versicolor | ||
5.6,3,4.1,1.3,versicolor | ||
5.5,2.5,4,1.3,versicolor | ||
5.5,2.6,4.4,1.2,versicolor | ||
6.1,3,4.6,1.4,versicolor | ||
5.8,2.6,4,1.2,versicolor | ||
5,2.3,3.3,1,versicolor | ||
5.6,2.7,4.2,1.3,versicolor | ||
5.7,3,4.2,1.2,versicolor | ||
5.7,2.9,4.2,1.3,versicolor | ||
6.2,2.9,4.3,1.3,versicolor | ||
5.1,2.5,3,1.1,versicolor | ||
5.7,2.8,4.1,1.3,versicolor | ||
6.3,3.3,6,2.5,virginica | ||
5.8,2.7,5.1,1.9,virginica | ||
7.1,3,5.9,2.1,virginica | ||
6.3,2.9,5.6,1.8,virginica | ||
6.5,3,5.8,2.2,virginica | ||
7.6,3,6.6,2.1,virginica | ||
4.9,2.5,4.5,1.7,virginica | ||
7.3,2.9,6.3,1.8,virginica | ||
6.7,2.5,5.8,1.8,virginica | ||
7.2,3.6,6.1,2.5,virginica | ||
6.5,3.2,5.1,2,virginica | ||
6.4,2.7,5.3,1.9,virginica | ||
6.8,3,5.5,2.1,virginica | ||
5.7,2.5,5,2,virginica | ||
5.8,2.8,5.1,2.4,virginica | ||
6.4,3.2,5.3,2.3,virginica | ||
6.5,3,5.5,1.8,virginica | ||
7.7,3.8,6.7,2.2,virginica | ||
7.7,2.6,6.9,2.3,virginica | ||
6,2.2,5,1.5,virginica | ||
6.9,3.2,5.7,2.3,virginica | ||
5.6,2.8,4.9,2,virginica | ||
7.7,2.8,6.7,2,virginica | ||
6.3,2.7,4.9,1.8,virginica | ||
6.7,3.3,5.7,2.1,virginica | ||
7.2,3.2,6,1.8,virginica | ||
6.2,2.8,4.8,1.8,virginica | ||
6.1,3,4.9,1.8,virginica | ||
6.4,2.8,5.6,2.1,virginica | ||
7.2,3,5.8,1.6,virginica | ||
7.4,2.8,6.1,1.9,virginica | ||
7.9,3.8,6.4,2,virginica | ||
6.4,2.8,5.6,2.2,virginica | ||
6.3,2.8,5.1,1.5,virginica | ||
6.1,2.6,5.6,1.4,virginica | ||
7.7,3,6.1,2.3,virginica | ||
6.3,3.4,5.6,2.4,virginica | ||
6.4,3.1,5.5,1.8,virginica | ||
6,3,4.8,1.8,virginica | ||
6.9,3.1,5.4,2.1,virginica | ||
6.7,3.1,5.6,2.4,virginica | ||
6.9,3.1,5.1,2.3,virginica | ||
5.8,2.7,5.1,1.9,virginica | ||
6.8,3.2,5.9,2.3,virginica | ||
6.7,3.3,5.7,2.5,virginica | ||
6.7,3,5.2,2.3,virginica | ||
6.3,2.5,5,1.9,virginica | ||
6.5,3,5.2,2,virginica | ||
6.2,3.4,5.4,2.3,virginica | ||
5.9,3,5.1,1.8,virginica |
Oops, something went wrong.