Skip to content
This repository has been archived by the owner on Sep 6, 2023. It is now read-only.

Commit

Permalink
Replace the deprecated method to read BigQuery table (#10)
Browse files Browse the repository at this point in the history
* Replace the deprecated method to read BigQuery

* Change the version to 0.2.1

* Modify README

* Fix
  • Loading branch information
yu-iskw committed Jul 20, 2020
1 parent 16508e0 commit 516ec3a
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 3 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ It is horizontally-scalable on top of distributed system, since apache beam can
mvn clean package
# Run bigquery-to-datastore via the compiled JAR file
java -cp $(pwd)/target/kuromoji-for-bigquery-bundled-0.2.0.jar \
java -cp $(pwd)/target/kuromoji-for-bigquery-bundled-0.2.1.jar \
com.github.yuiskw.beam.Kuromoji4BigQuery \
--project=test-project-id
--schema=id:integer
Expand All @@ -71,6 +71,12 @@ java -cp $(pwd)/target/kuromoji-for-bigquery-bundled-0.2.0.jar \
--workerMachineType=n1-standard-2
```

## Versions
|kuromoji-for-bigquery|Apache Beam|kuromoji|
|---------------------|-----------|--------|
|0.1.0 |2.1.0 |0.7.7 |
|0.2.x |2.20.0 |0.7.7 |

## License

Copyright (c) 2017 Yu Ishikawa.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

<groupId>com.github.yuiskw</groupId>
<artifactId>kuromoji-for-bigquery</artifactId>
<version>0.2.0</version>
<version>0.2.1</version>

<packaging>jar</packaging>

Expand Down
30 changes: 29 additions & 1 deletion src/main/java/com/github/yuiskw/beam/Kuromoji4BigQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ public static void main(String[] args) {
.setProjectId(projectId)
.setDatasetId(inputDatasetId)
.setTableId(inputTableId);
BigQueryIO.Read reader = BigQueryIO.read().from(inputTableRef);
BigQueryIO.TypedRead<TableRow> reader = BigQueryIO.readTableRows()
.withMethod(BigQueryIO.TypedRead.Method.DIRECT_READ) // to use the BigQuery Storage API
.withSelectedFields(getSelectedFields(schemaMap, tokenizedColumn))
.from(inputTableRef);

// Output
TableReference outputTableRef = new TableReference()
Expand Down Expand Up @@ -214,4 +217,29 @@ public static TableSchema convertToTableSchema(
TableSchema schema = new TableSchema().setFields(fields);
return schema;
}

/**
* Get selected fields
*
* @param schemaMap a map of BigQuery table schema
* @param tokenizedColumn column to be tokenized
* @return A list of selected fields
*/
public static List<String> getSelectedFields(
LinkedHashMap<String, String> schemaMap,
String tokenizedColumn) {
ArrayList<String> columns = new ArrayList<String>();

// Extract columns names from the schema.
String[] columnsInSchema = schemaMap.keySet().toArray(new String[schemaMap.keySet().size()]);
for (String c: columnsInSchema) {
columns.add(c);
}

// Append the tokenized column, if it doesn't exist in the schema
if (! columns.contains(tokenizedColumn)) {
columns.add(tokenizedColumn);
}
return columns;
}
}
23 changes: 23 additions & 0 deletions src/test/java/com/github/yuiskw/beam/Kuromoji4BigQueryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*/
package com.github.yuiskw.beam;

import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;

Expand Down Expand Up @@ -80,6 +81,28 @@ public void testParseSchemaWithSpaces() {
assertEquals("float", parents.get("height"));
}

@Test
public void testGetSelectedFields1() {
String tokenizedColumn = "text";
LinkedHashMap<String, String> schemaMap = new LinkedHashMap<String, String>();
schemaMap.put("x", "interger");
schemaMap.put("y", "string");
List<String> selectedFields = Kuromoji4BigQuery.getSelectedFields(schemaMap, tokenizedColumn);
List<String> expected = Arrays.asList("x", "y", "text");
assertEquals(selectedFields, expected);
}

@Test
public void testGetSelectedFields2() {
String tokenizedColumn = "text";
LinkedHashMap<String, String> schemaMap = new LinkedHashMap<String, String>();
schemaMap.put("x", "interger");
schemaMap.put("text", "string");
List<String> selectedFields = Kuromoji4BigQuery.getSelectedFields(schemaMap, tokenizedColumn);
List<String> expected = Arrays.asList("x", "text");
assertEquals(selectedFields, expected);
}


/**
Test Query
Expand Down

0 comments on commit 516ec3a

Please sign in to comment.