Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add progress to parseShapefileInBatches. #2073

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 8 additions & 4 deletions modules/shapefile/src/lib/parsers/parse-dbf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type DBFRowsOutput = object[];

interface DBFTableOutput {
schema?: Schema;
rows: DBFRowsOutput;
data: DBFRowsOutput;
}

type DBFHeader = {
Expand Down Expand Up @@ -111,7 +111,7 @@ export function parseDBF(
switch (options.tables && options.tables.format) {
case 'table':
// TODO - parse columns
return {schema, rows: data};
return {schema, data};

case 'rows':
default:
Expand Down Expand Up @@ -139,13 +139,17 @@ export async function* parseDBFInBatches(
}

if (parser.result.data.length > 0) {
yield parser.result.data;
yield {
data: parser.result.data
};
parser.result.data = [];
}
}
parser.end();
if (parser.result.data.length > 0) {
yield parser.result.data;
yield {
data: parser.result.data
};
}
}
/**
Expand Down
11 changes: 8 additions & 3 deletions modules/shapefile/src/lib/parsers/parse-shapefile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ interface ShapefileOutput {
shx?: SHXOutput;
header: SHPHeader;
data: object[];
bytesUsed?: number;
bytesTotal?: number;
}
/**
* Parsing of file in batches
Expand Down Expand Up @@ -77,10 +79,11 @@ export async function* parseShapefileInBatches(
for await (const item of iterator) {
let geometries: any;
let properties: any;
const {bytesUsed, bytesTotal} = item.progress;
if (!propertyIterable) {
geometries = item;
geometries = item.data;
} else {
[geometries, properties] = item;
[geometries, properties] = item.data;
}

const geojsonGeometries = parseGeometries(geometries);
Expand All @@ -94,7 +97,9 @@ export async function* parseShapefileInBatches(
prj,
shx,
header: shapeHeader,
data: features
data: features,
bytesUsed,
bytesTotal
};
}
}
Expand Down
20 changes: 15 additions & 5 deletions modules/shapefile/src/lib/parsers/parse-shp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ type SHPResult = {
geometries: [];
header?: {};
error?: string;
progress: {bytesUsed?: number; totalBytes?: number};
};

class SHPParser {
options?: any = {};
binaryReader = new BinaryChunkReader({maxRewindBytes: SHP_RECORD_HEADER_SIZE});
state = STATE.EXPECTING_HEADER;
result: SHPResult = {
geometries: []
geometries: [],
progress: {}
};

constructor(options?: LoaderOptions) {
Expand All @@ -45,7 +47,6 @@ class SHPParser {
end() {
this.binaryReader.end();
this.state = parseState(this.state, this.result, this.binaryReader, this.options);
// this.result.progress.bytesUsed = this.binaryReader.bytesUsed();
if (this.state !== STATE.END) {
this.state = STATE.ERROR;
this.result.error = 'SHP incomplete file';
Expand Down Expand Up @@ -81,13 +82,19 @@ export async function* parseSHPInBatches(
}

if (parser.result.geometries.length > 0) {
yield parser.result.geometries;
yield {
data: parser.result.geometries,
progress: parser.result.progress
};
parser.result.geometries = [];
}
}
parser.end();
if (parser.result.geometries.length > 0) {
yield parser.result.geometries;
yield {
data: parser.result.geometries,
progress: parser.result.progress
};
}

return;
Expand Down Expand Up @@ -130,7 +137,7 @@ function parseState(

result.header = parseSHPHeader(dataView);
result.progress = {
bytesUsed: 0,
bytesUsed: SHP_HEADER_SIZE,
bytesTotal: result.header.length,
rows: 0
};
Expand Down Expand Up @@ -178,6 +185,9 @@ function parseState(

result.currentIndex++;
result.progress.rows = result.currentIndex - 1;
// +8 because the content length field in the record's header
// excludes the 8-byte record header itself
result.progress.bytesUsed += recordHeader.byteLength + 8;
}
}

Expand Down
36 changes: 23 additions & 13 deletions modules/shapefile/src/lib/streaming/zip-batch-iterators.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
type Batch = {
data: number[] | number[][];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the union? Seems like you'd only need one type?

progress?: {bytesUsed: number; totalBytes: number; rows: number};
};

/**
* Zip two iterators together
*
Expand All @@ -7,23 +12,23 @@
export async function* zipBatchIterators(
iterator1: AsyncIterator<any[]>,
iterator2: AsyncIterator<any[]>
): AsyncGenerator<number[][], void, unknown> {
let batch1 = [];
let batch2 = [];
): AsyncGenerator<Batch, void, unknown> {
let batch1 = {data: []};
let batch2 = {data: []};
let iterator1Done: boolean = false;
let iterator2Done: boolean = false;

// TODO - one could let all iterators flow at full speed using `Promise.race`
// however we might end up with a big temporary buffer
while (!iterator1Done && !iterator2Done) {
if (batch1.length === 0 && !iterator1Done) {
if (batch1.data.length === 0 && !iterator1Done) {
const {value, done} = await iterator1.next();
if (done) {
iterator1Done = true;
} else {
batch1 = value;
}
} else if (batch2.length === 0 && !iterator2Done) {
} else if (batch2.data.length === 0 && !iterator2Done) {
const {value, done} = await iterator2.next();
if (done) {
iterator2Done = true;
Expand All @@ -46,17 +51,22 @@ export async function* zipBatchIterators(
* @param batch2
* @return array | null
*/
function extractBatch(batch1: number[], batch2: number[]): number[][] | null {
const batchLength: number = Math.min(batch1.length, batch2.length);
if (batchLength === 0) {
function extractBatch(batch1: Batch, batch2: Batch): Batch | null {
const {data: data1, progress} = batch1;
const {data: data2} = batch2;
const dataLength: number = Math.min(data1.length, data2.length);
if (dataLength === 0) {
return null;
}

// Non interleaved arrays
const batch: number[][] = [batch1.slice(0, batchLength), batch2.slice(0, batchLength)];
const result: any = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be result: Batch?

progress,
data: [data1.slice(0, dataLength), data2.slice(0, dataLength)]
};

// Modify the 2 batches
batch1.splice(0, batchLength);
batch2.splice(0, batchLength);
return batch;
// Modify the 2 data arrays
data1.splice(0, dataLength);
data2.splice(0, dataLength);
return result;
}