Skip to content

Commit

Permalink
Merge pull request #84 from dgunning/Morpheus_Excel
Browse files Browse the repository at this point in the history
Morpheus excel
  • Loading branch information
Zavster committed Jan 6, 2018
2 parents ae40463 + 569ebcb commit 8d5bce4
Show file tree
Hide file tree
Showing 14 changed files with 893 additions and 41 deletions.
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>

<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
Expand Down
33 changes: 33 additions & 0 deletions src/main/java/com/zavtech/morpheus/frame/DataFrameRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import com.zavtech.morpheus.source.CsvSourceOptions;
import com.zavtech.morpheus.source.DbSourceOptions;
import com.zavtech.morpheus.source.ExcelSourceOptions;
import com.zavtech.morpheus.source.JsonSourceOptions;

/**
Expand Down Expand Up @@ -73,6 +74,38 @@ public interface DataFrameRead {
*/
<R> DataFrame<R,String> csv(Consumer<CsvSourceOptions<R>> configurator);

/**
* Reads a DataFrame from a excel InputStream
* @param is the input stream to read from
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(InputStream is);

/**
* Reads a DataFrame from a url based on the options configurator
* @param url a filename or URL
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(URL url);

/**
* Reads a DataFrame from a Excel resource based on the options configurator
* @param resource a filename or URL
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(String resource);

/**
* Reads a DataFrame from a Excel resource based on the options configurator
* @param configurator the configurator for Excel options
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(Consumer<ExcelSourceOptions<R>> configurator);

/**
* Reads a DataFrame from a JSON file
* @param file the input file
Expand Down
26 changes: 20 additions & 6 deletions src/main/java/com/zavtech/morpheus/reference/XDataFrameRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,7 @@
import com.zavtech.morpheus.frame.DataFrame;
import com.zavtech.morpheus.frame.DataFrameRead;
import com.zavtech.morpheus.frame.DataFrameSource;
import com.zavtech.morpheus.source.CsvSource;
import com.zavtech.morpheus.source.CsvSourceOptions;
import com.zavtech.morpheus.source.DbSource;
import com.zavtech.morpheus.source.DbSourceOptions;
import com.zavtech.morpheus.source.JsonSource;
import com.zavtech.morpheus.source.JsonSourceOptions;
import com.zavtech.morpheus.source.*;

/**
* The default implementation of the DataFrame read interface
Expand All @@ -46,6 +41,7 @@ class XDataFrameRead implements DataFrameRead {
DataFrameSource.register(new CsvSource<>());
DataFrameSource.register(new JsonSource<>());
DataFrameSource.register(new DbSource<>());
DataFrameSource.register(new ExcelSource<>());
}

/**
Expand Down Expand Up @@ -81,6 +77,24 @@ public <R> DataFrame<R,String> csv(Consumer<CsvSourceOptions<R>> configurator) {
return DataFrameSource.lookup(CsvSource.class).read(configurator);
}

@Override
public <R> DataFrame<R, String> excel(InputStream is) {
return excel(options -> options.setInputStream(is));
}

@Override
public <R> DataFrame<R,String> excel(URL url) { return excel(options -> options.setURL(url)); }


@Override
public <R> DataFrame<R,String> excel(String resource) { return excel(options -> options.setResource(resource)); }

@Override
@SuppressWarnings("unchecked")
public <R> DataFrame<R,String> excel(Consumer<ExcelSourceOptions<R>> configurator) {
return DataFrameSource.lookup(ExcelSource.class).read(configurator);
}

@Override
public <R, C> DataFrame<R, C> json(File file) {
return json(options -> options.setFile(file));
Expand Down
70 changes: 37 additions & 33 deletions src/main/java/com/zavtech/morpheus/source/CsvSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ private void initFrame(DataBatch<R> batch) {
final String colName = headers[i] != null ? headers[i] : "Column-" + i;
try {
final String[] rawValues = batch.colData(i);
final Optional<Parser<?>> userParser = getParser(colName);
final Optional<Parser<?>> userParser = getParser(options.getFormats(), colName);
final Optional<Class<?>> colType = getColumnType(colName);
if (colType.isPresent()) {
final Class<?> type = colType.get();
Expand Down Expand Up @@ -406,29 +406,7 @@ private Optional<Class<?>> getColumnType(String colName) {
}


/**
* Returns the user configured parser for column name
* @param colName the column name
* @return the parser match
*/
private Optional<Parser<?>> getParser(String colName) {
final Formats formats = options.getFormats();
final Parser<?> userParser = formats.getParser(colName);
if (userParser != null) {
return Optional.of(userParser);
} else {
for (Object key : formats.getParserKeys()) {
if (key instanceof String) {
final String keyString = key.toString();
if (colName.matches(keyString)) {
final Parser<?> parser = formats.getParserOrFail(keyString);
return Optional.ofNullable(parser);
}
}
}
return Optional.empty();
}
}



/**
Expand Down Expand Up @@ -474,13 +452,35 @@ private void processBatch(DataBatch<R> batch) {
}
}

/**
* Returns the user configured parser for column name
* @param colName the column name
* @return the parser match
*/
protected static Optional<Parser<?>> getParser(Formats formats, String colName) {
final Parser<?> userParser = formats.getParser(colName);
if (userParser != null) {
return Optional.of(userParser);
} else {
for (Object key : formats.getParserKeys()) {
if (key instanceof String) {
final String keyString = key.toString();
if (colName.matches(keyString)) {
final Parser<?> parser = formats.getParserOrFail(keyString);
return Optional.ofNullable(parser);
}
}
}
return Optional.empty();
}
}


/**
* A class that represents a batch of raw CSV that needs to be parsed into type specific values
* @param <X> the row key type
*/
private class DataBatch<X> {
protected static class DataBatch<X> {

private Array<X> keys;
private int rowCount;
Expand All @@ -492,23 +492,27 @@ private class DataBatch<X> {
* @param colCount the column count for this batch
*/
private DataBatch(CsvSourceOptions<X> request, int colCount) {
this.keys = Array.of(request.getRowAxisType(), request.getReadBatchSize());
this.data = new String[colCount][request.getReadBatchSize()];
this( request.getRowAxisType(), request.getReadBatchSize(), colCount);
}

protected DataBatch(Class<X> rowAxisType, int readBatchSize, int colCount) {
this.keys = Array.of(rowAxisType, readBatchSize);
this.data = new String[colCount][readBatchSize];
}

/**
* Returns the row count for this batch
* @return the populated row count
*/
private int rowCount() {
protected int rowCount() {
return rowCount;
}

/**
* Returns the keys for this batch
* @return the keys for this batch
*/
private Array<X> keys() {
protected Array<X> keys() {
return keys;
}

Expand All @@ -517,14 +521,14 @@ private Array<X> keys() {
* @param colIndex the column index
* @return the column vector
*/
private String[] colData(int colIndex) {
protected String[] colData(int colIndex) {
return data[colIndex];
}

/**
* Resets this batch so that it can be used again
*/
private void clear() {
protected void clear() {
this.rowCount = 0;
this.keys.fill(null);
for (int i=0; i<data.length; ++i) {
Expand All @@ -540,7 +544,7 @@ private void clear() {
* @param rowValues the row value tokens
* @return the row index in batch
*/
private int addRow(X rowKey, String[] rowValues) {
protected int addRow(X rowKey, String[] rowValues) {
this.keys.setValue(rowCount, rowKey);
for (int i=0; i<rowValues.length; ++i) {
this.data[i][rowCount] = rowValues[i];
Expand All @@ -554,7 +558,7 @@ private int addRow(X rowKey, String[] rowValues) {
* @param rowValues the row value tokens
* @return the row index in batch
*/
private int addRow(int rowKey, String[] rowValues) {
protected int addRow(int rowKey, String[] rowValues) {
this.keys.setInt(rowCount, rowKey);
for (int i=0; i<rowValues.length; ++i) {
this.data[i][rowCount] = rowValues[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public void setInputStream(InputStream is) {
* Applies to resource to load CSV content from
* @param resource the resource to load from (file, URL or Classpath resource)
*/
public final void setResource(String resource) {
public void setResource(String resource) {
Objects.requireNonNull(resource, "The resource cannot be null");
this.resource = Resource.of(resource);
}
Expand Down

0 comments on commit 8d5bce4

Please sign in to comment.