Skip to content

Commit

Permalink
Nodejs ser/de methods (#3250)
Browse files Browse the repository at this point in the history
* wip: serde for lazyframe

* add ser/de for series,df,ldf & expr
  • Loading branch information
universalmind303 committed Apr 28, 2022
1 parent 56df87e commit 8da6c04
Show file tree
Hide file tree
Showing 15 changed files with 559 additions and 363 deletions.
5 changes: 0 additions & 5 deletions nodejs-polars/__tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@ describe("dataframe", () => {
pl.Series("foo", [1, 2, 9], pl.Int16),
pl.Series("bar", [6, 2, 8], pl.Int16),
]);
test("to/fromBinary round trip", () => {
const buf = df.toBinary();
const actual = pl.DataFrame.fromBinary(buf);
expect(df).toStrictEqual(actual);
});

test("dtypes", () => {
const expected = [pl.Float64, pl.Utf8];
Expand Down
91 changes: 91 additions & 0 deletions nodejs-polars/__tests__/serde.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pl from "@polars";
describe("serde", () => {
test("lazyframe:json", () => {
const df = pl.scanCSV("../examples/datasets/foods1.csv");
const buf = df.serialize("json");
const deserde = pl.LazyDataFrame.deserialize(buf, "json");
const expected = df.collectSync();
const actual = deserde.collectSync();
expect(actual).toFrameEqual(expected);
});
test("lazyframe:bincode", () => {
const df = pl.scanCSV("../examples/datasets/foods1.csv");
const buf = df.serialize("bincode");
const deserde = pl.LazyDataFrame.deserialize(buf, "bincode");
const expected = df.collectSync();
const actual = deserde.collectSync();
expect(actual).toFrameEqual(expected);
});
test("expr:json", () => {
const expr = pl.cols("foo", "bar").sortBy("other");

const buf = expr.serialize("json");
const actual = pl.Expr.deserialize(buf, "json");

expect(actual.toString()).toEqual(expr.toString());
});
test("expr:bincode", () => {
const expr = pl.cols("foo", "bar").sortBy("other");
const buf = expr.serialize("bincode");
const actual = pl.Expr.deserialize(buf, "bincode");

expect(actual.toString()).toEqual(expr.toString());
});
test("dataframe:json", () => {
const df = pl.DataFrame({
foo: [1, 2],
bar: [2, 3]
});
const buf = df.serialize("json");
const expected = pl.DataFrame.deserialize(buf, "json");
expect(df).toFrameEqual(expected);
});
test("dataframe:bincode", () => {
const df = pl.DataFrame({
foo: [1, 2],
bar: [2, 3]
});
const buf = df.serialize("bincode");
const expected = pl.DataFrame.deserialize(buf, "bincode");
expect(df).toFrameEqual(expected);
});

test("dataframe:unsupported", () => {
const df = pl.DataFrame({
foo: [1, 2],
bar: [2, 3]
});
const ser = () => df.serialize("yaml" as any);
const buf = df.serialize("bincode");
const de = () => pl.DataFrame.deserialize(buf, "yaml" as any);
const mismatch = () => pl.DataFrame.deserialize(buf, "json");
expect(ser).toThrow();
expect(de).toThrow();
expect(mismatch).toThrow();
});
test("series:json", () => {
const s = pl.Series("foo", [1, 2, 3]);

const buf = s.serialize("json");
const expected = pl.Series.deserialize(buf, "json");
expect(s).toSeriesEqual(expected);
});
test("series:bincode", () => {
const s = pl.Series("foo", [1, 2, 3]);

const buf = s.serialize("bincode");
const expected = pl.Series.deserialize(buf, "bincode");
expect(s).toSeriesEqual(expected);
});

test("series:unsupported", () => {
const s = pl.Series("foo", [1, 2, 3]);
const ser = () => s.serialize("yaml" as any);
const buf = s.serialize("bincode");
const de = () => pl.Series.deserialize(buf, "yaml" as any);
const mismatch = () => pl.Series.deserialize(buf, "json");
expect(ser).toThrow();
expect(de).toThrow();
expect(mismatch).toThrow();
});
});
54 changes: 43 additions & 11 deletions nodejs-polars/__tests__/struct.test.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,50 @@

import pl from "@polars";
import {DataType} from "@polars/datatypes";

describe("struct", () => {
describe("series", () => {
test("series <--> array round trip", () => {
const data = [
{utf8: "a", f64: 1, },
{utf8: "b", f64: 2, }
];
const name = "struct";
const s = pl.Series(name, data);
expect(s.name).toEqual(name);
expect(s.toArray()).toEqual(data);
test("series <--> array round trip", () => {
const data = [
{utf8: "a", f64: 1, },
{utf8: "b", f64: 2, }
];
const name = "struct";
const s = pl.Series(name, data);
expect(s.name).toEqual(name);
expect(s.toArray()).toEqual(data);
});
test("pli.struct", () => {
const expected = pl.DataFrame({
foo: [1],
bar: [2]
}).toStruct("foo");
const foo = pl.Series("foo", [1]);
const bar = pl.Series("bar", [2]);
const actual = pl.struct([foo, bar]).rename("foo");
expect(actual).toSeriesEqual(expected);
});
test("pli.struct dataframe", () => {
const df = pl.DataFrame({
foo: [1],
bar: [2]
});
const actual = df
.select(pl.struct(pl.cols("foo", "bar")).alias("s"))
.toSeries();
expect(actual).toSeriesEqual(df.toStruct("s"));
});
test("struct toArray", () => {
const actual = pl.DataFrame({
foo: [1, 10, 100],
bar: [2, null, 200]
})
.toStruct("foobar")
.toArray();

const expected = [
{foo: 1, bar: 2},
{foo: 10, bar: null},
{foo: 100, bar: 200}
];
expect(actual).toEqual(expected);
});
});
114 changes: 86 additions & 28 deletions nodejs-polars/polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {
ExprOrString
} from "./utils";

import {Arithmetic, Sample} from "./shared_traits";
import {Arithmetic, Deserialize, Sample, Serialize} from "./shared_traits";
import {col} from "./lazy/functions";

const inspect = Symbol.for("nodejs.util.inspect.custom");
Expand Down Expand Up @@ -222,7 +222,7 @@ interface WriteMethods {
╰─────┴─────┴─────╯
```
*/
export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, WriteMethods {
export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, WriteMethods, Serialize {
/** @ignore */
_df: any
dtypes: DataType[]
Expand Down Expand Up @@ -1163,11 +1163,6 @@ export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, Wri
* ```
*/
tail(length?: number): DataFrame
/** serializes the DataFrame to a [bincode buffer](https://docs.rs/bincode/latest/bincode/index.html)
* @example
* pl.DataFrame.fromBinary(df.toBinary())
*/
toBinary(): Buffer
/** @deprecated *since 0.4.0* use {@link writeCSV} */
toCSV(destOrOptions?, options?);
/**
Expand All @@ -1184,14 +1179,9 @@ export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, Wri
*/
toRecords(): Record<string, any>[]

/**
* @deprecated
* @since 0.4.0
* @use {@link writeJSON}
* this will be removed in a later version to prevent collision with native `toJSON` method
* */
toJSON(options?: WriteJsonOptions): string
toJSON(destination: string | Writable, options?: WriteJsonOptions): void
/** compat with `JSON.stringify` */
toJSON(): string

/**
* Converts dataframe object into column oriented javascript objects
* @example
Expand All @@ -1211,6 +1201,28 @@ export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, Wri
toParquet(destination?, options?)
toSeries(index?: number): Series
toString(): string
/**
Convert a ``DataFrame`` to a ``Series`` of type ``Struct``
@param name Name for the struct Series
@example
```
>>> df = pl.DataFrame({
... "a": [1, 2, 3, 4, 5],
... "b": ["one", "two", "three", "four", "five"],
... })
>>> df.toStruct("nums")
shape: (5,)
Series: 'nums' [struct[2]{'a': i64, 'b': str}]
[
{1,"one"}
{2,"two"}
{3,"three"}
{4,"four"}
{5,"five"}
]
```
*/
toStruct(name: string): Series
/**
* Transpose a DataFrame over the diagonal.
*
Expand Down Expand Up @@ -1293,6 +1305,45 @@ export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, Wri
*/
unique(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"): DataFrame
unique(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"}): DataFrame
/**
Decompose a struct into its fields. The fields will be inserted in to the `DataFrame` on the
location of the `struct` type.
@param names Names of the struct columns that will be decomposed by its fields
@example
```
>>> df = pl.DataFrame({
... "int": [1, 2],
... "str": ["a", "b"],
... "bool": [true, null],
... "list": [[1, 2], [3]],
... })
... .toStruct("my_struct")
... .toFrame()
>>> df
shape: (2, 1)
┌─────────────────────────────┐
│ my_struct │
│ --- │
│ struct[4]{'int',...,'list'} │
╞═════════════════════════════╡
│ {1,"a",true,[1, 2]} │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ {2,"b",null,[3]} │
└─────────────────────────────┘
>>> df.unnest("my_struct")
shape: (2, 4)
┌─────┬─────┬──────┬────────────┐
│ int ┆ str ┆ bool ┆ list │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ bool ┆ list [i64] │
╞═════╪═════╪══════╪════════════╡
│ 1 ┆ a ┆ true ┆ [1, 2] │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2 ┆ b ┆ null ┆ [3] │
└─────┴─────┴──────┴────────────┘
```
*/
unnest(names: string | string[]): DataFrame
/**
* Aggregate the columns of this DataFrame to their variance value.
* @example
Expand Down Expand Up @@ -1779,8 +1830,8 @@ export const _DataFrame = (_df: any): DataFrame => {
return wrap("sum");
},
tail: (length=5) => wrap("tail", length),
toBinary() {
return _df.toBincode();
serialize(format) {
return _df.serialize(format);
},
toCSV(...args) {
return this.writeCSV(...args);
Expand All @@ -1804,8 +1855,13 @@ export const _DataFrame = (_df: any): DataFrame => {
toRecords() {
return _df.toObjects();
},
toJSON(arg0?, options?): any {
return _df.toJs();
toJSON(...args: any[]) {
// this is passed by `JSON.stringify` when calling `toJSON()`
if(args[0] === "") {
return _df.toJs();
}

return _df.serialize("json").toString();
},
toObject() {
return this.getColumns().reduce((acc, curr) => {
Expand Down Expand Up @@ -1893,6 +1949,9 @@ export const _DataFrame = (_df: any): DataFrame => {

},
toSeries: (index = 0) => _Series(_df.selectAtIdx(index) as any) as any,
toStruct(name) {
return _Series(_df.toStruct(name));
},
toString() {
return _df.toString();
},
Expand Down Expand Up @@ -1923,6 +1982,11 @@ export const _DataFrame = (_df: any): DataFrame => {

return df;
},
unnest(names) {
names = Array.isArray(names) ? names : [names];

return _DataFrame(_df.unnest(names));
},
var() {
return wrap("var");
},
Expand Down Expand Up @@ -2019,7 +2083,7 @@ export const _DataFrame = (_df: any): DataFrame => {
});
};

export interface DataFrameConstructor {
export interface DataFrameConstructor extends Deserialize<DataFrame> {
(): DataFrame
(data: any, options?: {
columns?: any[],
Expand All @@ -2028,10 +2092,6 @@ export interface DataFrameConstructor {
inferSchemaLength?: number,
}): DataFrame
isDataFrame(arg: any): arg is DataFrame;
/**
* @param binary used to serialize/deserialize dataframe. This will only work with the output from expr.toBinary().
*/
fromBinary(binary: Buffer): DataFrame
}
function DataFrameConstructor(data?, options?): DataFrame {

Expand All @@ -2058,13 +2118,11 @@ function objToDF(obj: Record<string, Array<any>>): any {
return new pli.JsDataFrame(columns);
}
const isDataFrame = (anyVal: any): anyVal is DataFrame => anyVal?.[Symbol.toStringTag] === "DataFrame";
const fromBinary = (buf: Buffer) => {
return _DataFrame(pli.JsDataFrame.fromBincode(buf));
};


export const DataFrame: DataFrameConstructor = Object.assign(
DataFrameConstructor, {
isDataFrame,
fromBinary
deserialize: (buf, fmt) => _DataFrame(pli.JsDataFrame.deserialize(buf, fmt))
}
);
3 changes: 3 additions & 0 deletions nodejs-polars/polars/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { DataType } from "./datatypes";
import * as func from "./functions";
import * as io from "./io";
import * as cfg from "./cfg";
import * as ldf from "./lazy/dataframe";
import pli from "./internals/polars_internal";
import {
funcs as lazy,
Expand All @@ -16,6 +17,7 @@ import {
namespace pl {
export import Expr = lazyExpr.Expr
export import DataFrame = df.DataFrame
export import LazyDataFrame = ldf.LazyDataFrame
export import Series = series.Series;
export type LazyGroupBy = lazyGroupBy;
export type When = _when.When;
Expand Down Expand Up @@ -81,6 +83,7 @@ namespace pl {
export import pearsonCorr = lazy.pearsonCorr
export import quantile = lazy.quantile
export import select = lazy.select
export import struct = lazy.struct
export import spearmanRankCorr = lazy.spearmanRankCorr
export import tail = lazy.tail
export import list = lazy.list
Expand Down
Loading

0 comments on commit 8da6c04

Please sign in to comment.