Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Morpheus excel #84

Merged
merged 2 commits into from
Jan 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>

<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
Expand Down
33 changes: 33 additions & 0 deletions src/main/java/com/zavtech/morpheus/frame/DataFrameRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import com.zavtech.morpheus.source.CsvSourceOptions;
import com.zavtech.morpheus.source.DbSourceOptions;
import com.zavtech.morpheus.source.ExcelSourceOptions;
import com.zavtech.morpheus.source.JsonSourceOptions;

/**
Expand Down Expand Up @@ -73,6 +74,38 @@ public interface DataFrameRead {
*/
<R> DataFrame<R,String> csv(Consumer<CsvSourceOptions<R>> configurator);

/**
* Reads a DataFrame from a excel InputStream
* @param is the input stream to read from
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(InputStream is);

/**
* Reads a DataFrame from a url based on the options configurator
* @param url a filename or URL
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(URL url);

/**
* Reads a DataFrame from a Excel resource based on the options configurator
* @param resource a filename or URL
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(String resource);

/**
* Reads a DataFrame from a Excel resource based on the options configurator
* @param configurator the configurator for Excel options
* @param <R> the row key type
* @return the resulting DataFrame
*/
<R> DataFrame<R,String> excel(Consumer<ExcelSourceOptions<R>> configurator);

/**
* Reads a DataFrame from a JSON file
* @param file the input file
Expand Down
26 changes: 20 additions & 6 deletions src/main/java/com/zavtech/morpheus/reference/XDataFrameRead.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,7 @@
import com.zavtech.morpheus.frame.DataFrame;
import com.zavtech.morpheus.frame.DataFrameRead;
import com.zavtech.morpheus.frame.DataFrameSource;
import com.zavtech.morpheus.source.CsvSource;
import com.zavtech.morpheus.source.CsvSourceOptions;
import com.zavtech.morpheus.source.DbSource;
import com.zavtech.morpheus.source.DbSourceOptions;
import com.zavtech.morpheus.source.JsonSource;
import com.zavtech.morpheus.source.JsonSourceOptions;
import com.zavtech.morpheus.source.*;

/**
* The default implementation of the DataFrame read interface
Expand All @@ -46,6 +41,7 @@ class XDataFrameRead implements DataFrameRead {
DataFrameSource.register(new CsvSource<>());
DataFrameSource.register(new JsonSource<>());
DataFrameSource.register(new DbSource<>());
DataFrameSource.register(new ExcelSource<>());
}

/**
Expand Down Expand Up @@ -81,6 +77,24 @@ public <R> DataFrame<R,String> csv(Consumer<CsvSourceOptions<R>> configurator) {
return DataFrameSource.lookup(CsvSource.class).read(configurator);
}

@Override
public <R> DataFrame<R, String> excel(InputStream is) {
return excel(options -> options.setInputStream(is));
}

@Override
public <R> DataFrame<R,String> excel(URL url) { return excel(options -> options.setURL(url)); }


@Override
public <R> DataFrame<R,String> excel(String resource) { return excel(options -> options.setResource(resource)); }

@Override
@SuppressWarnings("unchecked")
public <R> DataFrame<R,String> excel(Consumer<ExcelSourceOptions<R>> configurator) {
return DataFrameSource.lookup(ExcelSource.class).read(configurator);
}

@Override
public <R, C> DataFrame<R, C> json(File file) {
return json(options -> options.setFile(file));
Expand Down
70 changes: 37 additions & 33 deletions src/main/java/com/zavtech/morpheus/source/CsvSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ private void initFrame(DataBatch<R> batch) {
final String colName = headers[i] != null ? headers[i] : "Column-" + i;
try {
final String[] rawValues = batch.colData(i);
final Optional<Parser<?>> userParser = getParser(colName);
final Optional<Parser<?>> userParser = getParser(options.getFormats(), colName);
final Optional<Class<?>> colType = getColumnType(colName);
if (colType.isPresent()) {
final Class<?> type = colType.get();
Expand Down Expand Up @@ -406,29 +406,7 @@ private Optional<Class<?>> getColumnType(String colName) {
}


/**
* Returns the user configured parser for column name
* @param colName the column name
* @return the parser match
*/
private Optional<Parser<?>> getParser(String colName) {
final Formats formats = options.getFormats();
final Parser<?> userParser = formats.getParser(colName);
if (userParser != null) {
return Optional.of(userParser);
} else {
for (Object key : formats.getParserKeys()) {
if (key instanceof String) {
final String keyString = key.toString();
if (colName.matches(keyString)) {
final Parser<?> parser = formats.getParserOrFail(keyString);
return Optional.ofNullable(parser);
}
}
}
return Optional.empty();
}
}



/**
Expand Down Expand Up @@ -474,13 +452,35 @@ private void processBatch(DataBatch<R> batch) {
}
}

/**
* Returns the user configured parser for column name
* @param colName the column name
* @return the parser match
*/
protected static Optional<Parser<?>> getParser(Formats formats, String colName) {
final Parser<?> userParser = formats.getParser(colName);
if (userParser != null) {
return Optional.of(userParser);
} else {
for (Object key : formats.getParserKeys()) {
if (key instanceof String) {
final String keyString = key.toString();
if (colName.matches(keyString)) {
final Parser<?> parser = formats.getParserOrFail(keyString);
return Optional.ofNullable(parser);
}
}
}
return Optional.empty();
}
}


/**
* A class that represents a batch of raw CSV that needs to be parsed into type specific values
* @param <X> the row key type
*/
private class DataBatch<X> {
protected static class DataBatch<X> {

private Array<X> keys;
private int rowCount;
Expand All @@ -492,23 +492,27 @@ private class DataBatch<X> {
* @param colCount the column count for this batch
*/
private DataBatch(CsvSourceOptions<X> request, int colCount) {
this.keys = Array.of(request.getRowAxisType(), request.getReadBatchSize());
this.data = new String[colCount][request.getReadBatchSize()];
this( request.getRowAxisType(), request.getReadBatchSize(), colCount);
}

protected DataBatch(Class<X> rowAxisType, int readBatchSize, int colCount) {
this.keys = Array.of(rowAxisType, readBatchSize);
this.data = new String[colCount][readBatchSize];
}

/**
* Returns the row count for this batch
* @return the populated row count
*/
private int rowCount() {
protected int rowCount() {
return rowCount;
}

/**
* Returns the keys for this batch
* @return the keys for this batch
*/
private Array<X> keys() {
protected Array<X> keys() {
return keys;
}

Expand All @@ -517,14 +521,14 @@ private Array<X> keys() {
* @param colIndex the column index
* @return the column vector
*/
private String[] colData(int colIndex) {
protected String[] colData(int colIndex) {
return data[colIndex];
}

/**
* Resets this batch so that it can be used again
*/
private void clear() {
protected void clear() {
this.rowCount = 0;
this.keys.fill(null);
for (int i=0; i<data.length; ++i) {
Expand All @@ -540,7 +544,7 @@ private void clear() {
* @param rowValues the row value tokens
* @return the row index in batch
*/
private int addRow(X rowKey, String[] rowValues) {
protected int addRow(X rowKey, String[] rowValues) {
this.keys.setValue(rowCount, rowKey);
for (int i=0; i<rowValues.length; ++i) {
this.data[i][rowCount] = rowValues[i];
Expand All @@ -554,7 +558,7 @@ private int addRow(X rowKey, String[] rowValues) {
* @param rowValues the row value tokens
* @return the row index in batch
*/
private int addRow(int rowKey, String[] rowValues) {
protected int addRow(int rowKey, String[] rowValues) {
this.keys.setInt(rowCount, rowKey);
for (int i=0; i<rowValues.length; ++i) {
this.data[i][rowCount] = rowValues[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public void setInputStream(InputStream is) {
* Applies to resource to load CSV content from
* @param resource the resource to load from (file, URL or Classpath resource)
*/
public final void setResource(String resource) {
public void setResource(String resource) {
Objects.requireNonNull(resource, "The resource cannot be null");
this.resource = Resource.of(resource);
}
Expand Down