From 3dc9fb078fc2be4ec23fb1f85fd27045df74d318 Mon Sep 17 00:00:00 2001 From: Nick Peihl Date: Fri, 28 Jan 2022 15:05:47 -0500 Subject: [PATCH] Add `bytesUsed` to Shapefile `parseInBatches`. This modifies `zipBatchIterators` to return an object with keys `progress` and `data` rather than an array. --- .../shapefile/src/lib/parsers/parse-dbf.ts | 12 ++++--- .../src/lib/parsers/parse-shapefile.ts | 11 ++++-- .../shapefile/src/lib/parsers/parse-shp.ts | 20 ++++++++--- .../src/lib/streaming/zip-batch-iterators.ts | 36 ++++++++++++------- 4 files changed, 54 insertions(+), 25 deletions(-) diff --git a/modules/shapefile/src/lib/parsers/parse-dbf.ts b/modules/shapefile/src/lib/parsers/parse-dbf.ts index e34b88bd7c..e6cf71a39b 100644 --- a/modules/shapefile/src/lib/parsers/parse-dbf.ts +++ b/modules/shapefile/src/lib/parsers/parse-dbf.ts @@ -5,7 +5,7 @@ type DBFRowsOutput = object[]; interface DBFTableOutput { schema?: Schema; - rows: DBFRowsOutput; + data: DBFRowsOutput; } type DBFHeader = { @@ -111,7 +111,7 @@ export function parseDBF( switch (options.tables && options.tables.format) { case 'table': // TODO - parse columns - return {schema, rows: data}; + return {schema, data}; case 'rows': default: @@ -139,13 +139,17 @@ export async function* parseDBFInBatches( } if (parser.result.data.length > 0) { - yield parser.result.data; + yield { + data: parser.result.data + }; parser.result.data = []; } } parser.end(); if (parser.result.data.length > 0) { - yield parser.result.data; + yield { + data: parser.result.data + }; } } /** diff --git a/modules/shapefile/src/lib/parsers/parse-shapefile.ts b/modules/shapefile/src/lib/parsers/parse-shapefile.ts index d574b83b41..cd89c875e4 100644 --- a/modules/shapefile/src/lib/parsers/parse-shapefile.ts +++ b/modules/shapefile/src/lib/parsers/parse-shapefile.ts @@ -17,6 +17,8 @@ interface ShapefileOutput { shx?: SHXOutput; header: SHPHeader; data: object[]; + bytesUsed?: number; + bytesTotal?: number; } /** * Parsing of file in batches @@ -77,10 +79,11 @@ export async function* parseShapefileInBatches( for await (const item of iterator) { let geometries: any; let properties: any; + const {bytesUsed, bytesTotal} = item.progress; if (!propertyIterable) { - geometries = item; + geometries = item.data; } else { - [geometries, properties] = item; + [geometries, properties] = item.data; } const geojsonGeometries = parseGeometries(geometries); @@ -94,7 +97,9 @@ export async function* parseShapefileInBatches( prj, shx, header: shapeHeader, - data: features + data: features, + bytesUsed, + bytesTotal }; } } diff --git a/modules/shapefile/src/lib/parsers/parse-shp.ts b/modules/shapefile/src/lib/parsers/parse-shp.ts index 24973a527c..6a12fe350d 100644 --- a/modules/shapefile/src/lib/parsers/parse-shp.ts +++ b/modules/shapefile/src/lib/parsers/parse-shp.ts @@ -23,6 +23,7 @@ type SHPResult = { geometries: []; header?: {}; error?: string; + progress: {bytesUsed?: number; totalBytes?: number}; }; class SHPParser { @@ -30,7 +31,8 @@ class SHPParser { binaryReader = new BinaryChunkReader({maxRewindBytes: SHP_RECORD_HEADER_SIZE}); state = STATE.EXPECTING_HEADER; result: SHPResult = { - geometries: [] + geometries: [], + progress: {} }; constructor(options?: LoaderOptions) { @@ -45,7 +47,6 @@ class SHPParser { end() { this.binaryReader.end(); this.state = parseState(this.state, this.result, this.binaryReader, this.options); - // this.result.progress.bytesUsed = this.binaryReader.bytesUsed(); if (this.state !== STATE.END) { this.state = STATE.ERROR; this.result.error = 'SHP incomplete file'; @@ -81,13 +82,19 @@ export async function* parseSHPInBatches( } if (parser.result.geometries.length > 0) { - yield parser.result.geometries; + yield { + data: parser.result.geometries, + progress: parser.result.progress + }; parser.result.geometries = []; } } parser.end(); if (parser.result.geometries.length > 0) { - yield parser.result.geometries; + yield { + data: parser.result.geometries, + progress: parser.result.progress + }; } return; @@ -130,7 +137,7 @@ function parseState( result.header = parseSHPHeader(dataView); result.progress = { - bytesUsed: 0, + bytesUsed: SHP_HEADER_SIZE, bytesTotal: result.header.length, rows: 0 }; @@ -178,6 +185,9 @@ function parseState( result.currentIndex++; result.progress.rows = result.currentIndex - 1; + // +8 because the content length field in the record's header + // excludes the 8-byte record header itself + result.progress.bytesUsed += recordHeader.byteLength + 8; } } diff --git a/modules/shapefile/src/lib/streaming/zip-batch-iterators.ts b/modules/shapefile/src/lib/streaming/zip-batch-iterators.ts index 90686af019..d5c1239e99 100644 --- a/modules/shapefile/src/lib/streaming/zip-batch-iterators.ts +++ b/modules/shapefile/src/lib/streaming/zip-batch-iterators.ts @@ -1,3 +1,8 @@ +type Batch = { + data: number[] | number[][]; + progress?: {bytesUsed: number; totalBytes: number; rows: number}; +}; + /** * Zip two iterators together * @@ -7,23 +12,23 @@ export async function* zipBatchIterators( iterator1: AsyncIterator, iterator2: AsyncIterator -): AsyncGenerator { - let batch1 = []; - let batch2 = []; +): AsyncGenerator { + let batch1 = {data: []}; + let batch2 = {data: []}; let iterator1Done: boolean = false; let iterator2Done: boolean = false; // TODO - one could let all iterators flow at full speed using `Promise.race` // however we might end up with a big temporary buffer while (!iterator1Done && !iterator2Done) { - if (batch1.length === 0 && !iterator1Done) { + if (batch1.data.length === 0 && !iterator1Done) { const {value, done} = await iterator1.next(); if (done) { iterator1Done = true; } else { batch1 = value; } - } else if (batch2.length === 0 && !iterator2Done) { + } else if (batch2.data.length === 0 && !iterator2Done) { const {value, done} = await iterator2.next(); if (done) { iterator2Done = true; @@ -46,17 +51,22 @@ export async function* zipBatchIterators( * @param batch2 * @return array | null */ -function extractBatch(batch1: number[], batch2: number[]): number[][] | null { - const batchLength: number = Math.min(batch1.length, batch2.length); - if (batchLength === 0) { +function extractBatch(batch1: Batch, batch2: Batch): Batch | null { + const {data: data1, progress} = batch1; + const {data: data2} = batch2; + const dataLength: number = Math.min(data1.length, data2.length); + if (dataLength === 0) { return null; } // Non interleaved arrays - const batch: number[][] = [batch1.slice(0, batchLength), batch2.slice(0, batchLength)]; + const result: any = { + progress, + data: [data1.slice(0, dataLength), data2.slice(0, dataLength)] + }; - // Modify the 2 batches - batch1.splice(0, batchLength); - batch2.splice(0, batchLength); - return batch; + // Modify the 2 data arrays + data1.splice(0, dataLength); + data2.splice(0, dataLength); + return result; }