Skip to content

Commit

Permalink
Import
Browse files Browse the repository at this point in the history
  • Loading branch information
vruusmann committed Jan 5, 2016
0 parents commit cbf72b8
Show file tree
Hide file tree
Showing 6 changed files with 1,378 additions and 0 deletions.
661 changes: 661 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions README.md
@@ -0,0 +1,56 @@
JPMML-Piglet
============

A Java UDF for evaluating PMML models on the Apache Pig platform (http://pig.apache.org/).

# Prerequisites #

* Apache Pig 0.14.0 or newer.

# Installation #

Enter the project root directory and build using [Apache Maven] (http://maven.apache.org/):
```
mvn clean install
```

The build produces an uber-JAR file `target/jpmml-piglet-distributable-1.0-SNAPSHOT.jar`.

# Usage #

Add the uber-JAR file to Apache Pig classpath:
```
REGISTER target/jpmml-piglet-distributable-1.0-SNAPSHOT.jar;
```

The following example scores the `src/etc/Iris.csv` CSV file with the `src/etc/DecisionTreeIris.pmml` PMML file.

Importing data from the CSV file:
```
iris_input = LOAD 'src/etc/Iris.csv' USING PigStorage(',')
AS (Sepal_Length:double, Sepal_Width:double, Petal_Length:double, Petal_Width:double);
DESCRIBE iris_input;
```

Defining a Java UDF for the PMML file:
```
DEFINE iris_treemodel org.jpmml.piglet.PMMLFunc('src/etc/DecisionTreeIris.pmml');
```

Scoring data using the Java UDF:
```
iris_prediction = FOREACH iris_input GENERATE iris_treemodel(*);
DESCRIBE iris_prediction;
DUMP iris_prediction;
```

# License #

JPMML-Piglet is dual-licensed under the [GNU Affero General Public License (AGPL) version 3.0] (http://www.gnu.org/licenses/agpl-3.0.html) and a commercial license.

# Additional information #

Please contact [info@openscoring.io] (mailto:info@openscoring.io)
95 changes: 95 additions & 0 deletions pom.xml
@@ -0,0 +1,95 @@
<?xml version="1.0" ?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.jpmml</groupId>
<artifactId>jpmml-piglet</artifactId>
<version>1.0-SNAPSHOT</version>

<licenses>
<license>
<name>GNU Affero General Public License (AGPL) version 3.0</name>
<url>http://www.gnu.org/licenses/agpl-3.0.html</url>
<distribution>repo</distribution>
</license>
</licenses>

<dependencies>
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-evaluator</artifactId>
<version>1.2.8</version>
</dependency>

<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<version>[0.14.0, 0.15.0]</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>[1.0.0, 1.2.1]</version>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.4.1</version>
<executions>
<execution>
<goals>
<goal>enforce</goal>
</goals>
</execution>
</executions>
<configuration>
<rules>
<requireJavaVersion>
<version>1.7</version>
</requireJavaVersion>
</rules>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<finalName>${artifactId}-distributable-${version}</finalName>
<minimizeJar>true</minimizeJar>
<relocations>
<relocation>
<pattern>com.google.common</pattern>
<shadedPattern>com.google.common19_0</shadedPattern>
</relocation>
</relocations>
</configuration>
</plugin>
</plugins>
</build>
</project>
72 changes: 72 additions & 0 deletions src/etc/DecisionTreeIris.pmml
@@ -0,0 +1,72 @@
<?xml version="1.0"?>
<PMML version="4.2" xmlns="http://www.dmg.org/PMML-4_2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2/pmml-4-2.xsd">
<Header copyright="Copyright (c) 2015 vfed" description="RPart Decision Tree Model">
<Extension name="user" value="vfed" extender="Rattle/PMML"/>
<Application name="Rattle/PMML" version="1.4"/>
<Timestamp>2015-12-14 02:09:09</Timestamp>
</Header>
<DataDictionary numberOfFields="5">
<DataField name="Species" optype="categorical" dataType="string">
<Value value="setosa"/>
<Value value="versicolor"/>
<Value value="virginica"/>
</DataField>
<DataField name="Sepal_Length" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9"/>
</DataField>
<DataField name="Sepal_Width" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="2" rightMargin="4.4"/>
</DataField>
<DataField name="Petal_Length" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="1" rightMargin="6.9"/>
</DataField>
<DataField name="Petal_Width" optype="continuous" dataType="double">
<Interval closure="closedClosed" leftMargin="0.1" rightMargin="2.5"/>
</DataField>
</DataDictionary>
<TreeModel modelName="RPart_Model" functionName="classification" algorithmName="rpart" splitCharacteristic="binarySplit" missingValueStrategy="defaultChild" noTrueChildStrategy="returnLastPrediction">
<MiningSchema>
<MiningField name="Species" usageType="predicted"/>
<MiningField name="Sepal_Length" usageType="active"/>
<MiningField name="Sepal_Width" usageType="active"/>
<MiningField name="Petal_Length" usageType="active"/>
<MiningField name="Petal_Width" usageType="active"/>
</MiningSchema>
<Output>
<OutputField name="Predicted_Species" optype="categorical" dataType="string" feature="predictedValue"/>
<OutputField name="Probability_setosa" optype="continuous" dataType="double" feature="probability" value="setosa"/>
<OutputField name="Probability_versicolor" optype="continuous" dataType="double" feature="probability" value="versicolor"/>
<OutputField name="Probability_virginica" optype="continuous" dataType="double" feature="probability" value="virginica"/>
</Output>
<Node id="1" score="setosa" recordCount="150" defaultChild="3">
<True/>
<ScoreDistribution value="setosa" recordCount="50" confidence="0.333333333333333"/>
<ScoreDistribution value="versicolor" recordCount="50" confidence="0.333333333333333"/>
<ScoreDistribution value="virginica" recordCount="50" confidence="0.333333333333333"/>
<Node id="2" score="setosa" recordCount="50">
<SimplePredicate field="Petal_Length" operator="lessThan" value="2.45"/>
<ScoreDistribution value="setosa" recordCount="50" confidence="1"/>
<ScoreDistribution value="versicolor" recordCount="0" confidence="0"/>
<ScoreDistribution value="virginica" recordCount="0" confidence="0"/>
</Node>
<Node id="3" score="versicolor" recordCount="100" defaultChild="7">
<SimplePredicate field="Petal_Length" operator="greaterOrEqual" value="2.45"/>
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/>
<ScoreDistribution value="versicolor" recordCount="50" confidence="0.5"/>
<ScoreDistribution value="virginica" recordCount="50" confidence="0.5"/>
<Node id="6" score="versicolor" recordCount="54">
<SimplePredicate field="Petal_Width" operator="lessThan" value="1.75"/>
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/>
<ScoreDistribution value="versicolor" recordCount="49" confidence="0.907407407407407"/>
<ScoreDistribution value="virginica" recordCount="5" confidence="0.0925925925925926"/>
</Node>
<Node id="7" score="virginica" recordCount="46">
<SimplePredicate field="Petal_Width" operator="greaterOrEqual" value="1.75"/>
<ScoreDistribution value="setosa" recordCount="0" confidence="0"/>
<ScoreDistribution value="versicolor" recordCount="1" confidence="0.0217391304347826"/>
<ScoreDistribution value="virginica" recordCount="45" confidence="0.978260869565217"/>
</Node>
</Node>
</Node>
</TreeModel>
</PMML>
151 changes: 151 additions & 0 deletions src/etc/Iris.csv
@@ -0,0 +1,151 @@
Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
5.1,3.5,1.4,0.2,setosa
4.9,3,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa
4.6,3.4,1.4,0.3,setosa
5,3.4,1.5,0.2,setosa
4.4,2.9,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.4,3.7,1.5,0.2,setosa
4.8,3.4,1.6,0.2,setosa
4.8,3,1.4,0.1,setosa
4.3,3,1.1,0.1,setosa
5.8,4,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.1,3.5,1.4,0.3,setosa
5.7,3.8,1.7,0.3,setosa
5.1,3.8,1.5,0.3,setosa
5.4,3.4,1.7,0.2,setosa
5.1,3.7,1.5,0.4,setosa
4.6,3.6,1,0.2,setosa
5.1,3.3,1.7,0.5,setosa
4.8,3.4,1.9,0.2,setosa
5,3,1.6,0.2,setosa
5,3.4,1.6,0.4,setosa
5.2,3.5,1.5,0.2,setosa
5.2,3.4,1.4,0.2,setosa
4.7,3.2,1.6,0.2,setosa
4.8,3.1,1.6,0.2,setosa
5.4,3.4,1.5,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa
4.9,3.1,1.5,0.2,setosa
5,3.2,1.2,0.2,setosa
5.5,3.5,1.3,0.2,setosa
4.9,3.6,1.4,0.1,setosa
4.4,3,1.3,0.2,setosa
5.1,3.4,1.5,0.2,setosa
5,3.5,1.3,0.3,setosa
4.5,2.3,1.3,0.3,setosa
4.4,3.2,1.3,0.2,setosa
5,3.5,1.6,0.6,setosa
5.1,3.8,1.9,0.4,setosa
4.8,3,1.4,0.3,setosa
5.1,3.8,1.6,0.2,setosa
4.6,3.2,1.4,0.2,setosa
5.3,3.7,1.5,0.2,setosa
5,3.3,1.4,0.2,setosa
7,3.2,4.7,1.4,versicolor
6.4,3.2,4.5,1.5,versicolor
6.9,3.1,4.9,1.5,versicolor
5.5,2.3,4,1.3,versicolor
6.5,2.8,4.6,1.5,versicolor
5.7,2.8,4.5,1.3,versicolor
6.3,3.3,4.7,1.6,versicolor
4.9,2.4,3.3,1,versicolor
6.6,2.9,4.6,1.3,versicolor
5.2,2.7,3.9,1.4,versicolor
5,2,3.5,1,versicolor
5.9,3,4.2,1.5,versicolor
6,2.2,4,1,versicolor
6.1,2.9,4.7,1.4,versicolor
5.6,2.9,3.6,1.3,versicolor
6.7,3.1,4.4,1.4,versicolor
5.6,3,4.5,1.5,versicolor
5.8,2.7,4.1,1,versicolor
6.2,2.2,4.5,1.5,versicolor
5.6,2.5,3.9,1.1,versicolor
5.9,3.2,4.8,1.8,versicolor
6.1,2.8,4,1.3,versicolor
6.3,2.5,4.9,1.5,versicolor
6.1,2.8,4.7,1.2,versicolor
6.4,2.9,4.3,1.3,versicolor
6.6,3,4.4,1.4,versicolor
6.8,2.8,4.8,1.4,versicolor
6.7,3,5,1.7,versicolor
6,2.9,4.5,1.5,versicolor
5.7,2.6,3.5,1,versicolor
5.5,2.4,3.8,1.1,versicolor
5.5,2.4,3.7,1,versicolor
5.8,2.7,3.9,1.2,versicolor
6,2.7,5.1,1.6,versicolor
5.4,3,4.5,1.5,versicolor
6,3.4,4.5,1.6,versicolor
6.7,3.1,4.7,1.5,versicolor
6.3,2.3,4.4,1.3,versicolor
5.6,3,4.1,1.3,versicolor
5.5,2.5,4,1.3,versicolor
5.5,2.6,4.4,1.2,versicolor
6.1,3,4.6,1.4,versicolor
5.8,2.6,4,1.2,versicolor
5,2.3,3.3,1,versicolor
5.6,2.7,4.2,1.3,versicolor
5.7,3,4.2,1.2,versicolor
5.7,2.9,4.2,1.3,versicolor
6.2,2.9,4.3,1.3,versicolor
5.1,2.5,3,1.1,versicolor
5.7,2.8,4.1,1.3,versicolor
6.3,3.3,6,2.5,virginica
5.8,2.7,5.1,1.9,virginica
7.1,3,5.9,2.1,virginica
6.3,2.9,5.6,1.8,virginica
6.5,3,5.8,2.2,virginica
7.6,3,6.6,2.1,virginica
4.9,2.5,4.5,1.7,virginica
7.3,2.9,6.3,1.8,virginica
6.7,2.5,5.8,1.8,virginica
7.2,3.6,6.1,2.5,virginica
6.5,3.2,5.1,2,virginica
6.4,2.7,5.3,1.9,virginica
6.8,3,5.5,2.1,virginica
5.7,2.5,5,2,virginica
5.8,2.8,5.1,2.4,virginica
6.4,3.2,5.3,2.3,virginica
6.5,3,5.5,1.8,virginica
7.7,3.8,6.7,2.2,virginica
7.7,2.6,6.9,2.3,virginica
6,2.2,5,1.5,virginica
6.9,3.2,5.7,2.3,virginica
5.6,2.8,4.9,2,virginica
7.7,2.8,6.7,2,virginica
6.3,2.7,4.9,1.8,virginica
6.7,3.3,5.7,2.1,virginica
7.2,3.2,6,1.8,virginica
6.2,2.8,4.8,1.8,virginica
6.1,3,4.9,1.8,virginica
6.4,2.8,5.6,2.1,virginica
7.2,3,5.8,1.6,virginica
7.4,2.8,6.1,1.9,virginica
7.9,3.8,6.4,2,virginica
6.4,2.8,5.6,2.2,virginica
6.3,2.8,5.1,1.5,virginica
6.1,2.6,5.6,1.4,virginica
7.7,3,6.1,2.3,virginica
6.3,3.4,5.6,2.4,virginica
6.4,3.1,5.5,1.8,virginica
6,3,4.8,1.8,virginica
6.9,3.1,5.4,2.1,virginica
6.7,3.1,5.6,2.4,virginica
6.9,3.1,5.1,2.3,virginica
5.8,2.7,5.1,1.9,virginica
6.8,3.2,5.9,2.3,virginica
6.7,3.3,5.7,2.5,virginica
6.7,3,5.2,2.3,virginica
6.3,2.5,5,1.9,virginica
6.5,3,5.2,2,virginica
6.2,3.4,5.4,2.3,virginica
5.9,3,5.1,1.8,virginica

0 comments on commit cbf72b8

Please sign in to comment.