Skip to content

Commit

Permalink
Updates and fixes for Hadoop 2.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
kbeedkar committed Feb 5, 2015
1 parent 085307b commit 91efa99
Show file tree
Hide file tree
Showing 5 changed files with 456 additions and 460 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ example illustrates:
>
>
> $ MGFSM_HOME/bin/mgfsm -i /path/to/input/dir/ -o /path/to/output/dir/ -s σ -g γ -l λ -m d
Note: The current version of MG-FSM is tested with *Hadoop 0.20.2-cdh3u6*.
Note: The current version of MG-FSM is tested with *Hadoop 2.6*.

### Output format
Frequent sequences are written to a file in the output directory, where each line in the file has the following format:
Expand Down
261 changes: 122 additions & 139 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,147 +4,130 @@
<groupId>de.mpii</groupId>
<artifactId>mgfsm</artifactId>
<version>0.0.1-SNAPSHOT</version>

<repositories>
<repository>
<id>cloudera-repo-releases</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>


<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>

<dependencies>


<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>${java.version}</version>
<systemPath>${java.home}/../lib/tools.jar</systemPath>
<scope>system</scope>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>${java.version}</version>
<systemPath>${java.home}/../lib/tools.jar</systemPath>
<scope>system</scope>
</dependency>




<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2-cdh3u1</version>
</dependency>

<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>6.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-math</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>0.22.0</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math</artifactId>
<version>2.2</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.1</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.2</version>
</dependency>

<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>


<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-integration</artifactId>
<version>0.7</version>
</dependency>



</dependencies>


<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>

<plugin>
<!-- Build an executable JAR -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>de.mpii.fsm.driver.FsmDriver</mainClass>
</manifest>
</archive>
</configuration>
</plugin>

<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>de.mpii.fsm.driver.FsmDriver</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>


</plugins>
</build>


<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>6.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-math</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math</artifactId>
<version>2.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-integration</artifactId>
<version>0.7</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<!-- Build an executable JAR -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>de.mpii.fsm.driver.FsmDriver</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>de.mpii.fsm.driver.FsmDriver</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>

</project>
23 changes: 22 additions & 1 deletion src/main/java/de/mpii/fsm/input/InputFileConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer.Option;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;

import de.mpii.fsm.util.IntArrayWritable;
Expand All @@ -26,6 +30,7 @@
* hadoop jar fsm.jar org.apache.mahout.fsm.InputFileConverter /home/szoup/test/input1 /user/szoup/test
*
* @author Spyros Zoupanos
* @author Kaustubh Beedkar (kbeedkar@uni-mannheim.de)
*/
public class InputFileConverter {

Expand All @@ -38,7 +43,23 @@ public static void main(String[] args) throws IOException {
Job job = new Job();
Configuration conf = job.getConfiguration();
FileSystem fs = FileSystem.get(URI.create(args[1]), conf);
SequenceFile.Writer fileWriter = new SequenceFile.Writer(fs, conf, new Path(args[1]), LongWritable.class, IntArrayWritable.class);

Path path = new Path(fs.getUri());

LongWritable itemKey = new LongWritable();
IntArrayWritable itemValue = new IntArrayWritable();


/** GzipCodec might not work */
CompressionCodec Codec = new GzipCodec();

Option optPath = SequenceFile.Writer.file(path);
Option optKey = SequenceFile.Writer.keyClass(itemKey.getClass());
Option optValue = SequenceFile.Writer.valueClass(itemValue.getClass());
Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

SequenceFile.Writer fileWriter = SequenceFile.createWriter(conf, optPath, optKey, optValue, optCom);


String strLine;
long counter = 0;
Expand Down
15 changes: 12 additions & 3 deletions src/main/java/de/mpii/fsm/mgfsm/FsmJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer.Option;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
Expand Down Expand Up @@ -47,6 +48,7 @@
*
* @author Iris Miliaraki
* @author Spyros Zoupanos
* @author Kaustubh Beedkar (kbeedkar@uni-mannheim.de)
*/
public class FsmJob {

Expand Down Expand Up @@ -159,16 +161,23 @@ public static void runFsmJob() throws Exception {

// Sequence file for frequent 1 items
String fListURI = "fList";
FileSystem fs1 = FileSystem.get(URI.create(fListURI), conf);
//FileSystem fs1 = FileSystem.get(URI.create(fListURI), conf);
Path fListPath = new Path(fListURI);

commonConfig.setFlistPath(fListPath);

IntArrayWritable itemKey = new IntArrayWritable();
LongWritable itemValue = new LongWritable();

//CompressionCodec Codec = new GzipCodec();
SequenceFile.Writer writer = null;
writer = SequenceFile.createWriter(fs1, conf, fListPath, itemKey.getClass(), itemValue.getClass());

Option optPath = SequenceFile.Writer.file(fListPath);
Option optKey = SequenceFile.Writer.keyClass(itemKey.getClass());
Option optValue = SequenceFile.Writer.valueClass(itemValue.getClass());
//Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

writer = SequenceFile.createWriter(conf, optPath, optKey, optValue);

int totalPartitions = 0;
long partitionCurrSize = 0;

Expand Down
Loading

0 comments on commit 91efa99

Please sign in to comment.