From 8da6c04e31f9974bd854fe7a2a9113d0610497c0 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Thu, 28 Apr 2022 12:02:48 -0500 Subject: [PATCH] Nodejs ser/de methods (#3250) * wip: serde for lazyframe * add ser/de for series,df,ldf & expr --- nodejs-polars/__tests__/dataframe.test.ts | 5 - nodejs-polars/__tests__/serde.test.ts | 91 ++++++++ nodejs-polars/__tests__/struct.test.ts | 54 ++++- nodejs-polars/polars/dataframe.ts | 114 +++++++--- nodejs-polars/polars/index.ts | 3 + nodejs-polars/polars/lazy/dataframe.ts | 32 ++- nodejs-polars/polars/lazy/expr.ts | 42 ++-- nodejs-polars/polars/lazy/functions.ts | 71 ++++++- nodejs-polars/polars/native-polars.js | 245 +--------------------- nodejs-polars/polars/series/series.ts | 31 +-- nodejs-polars/polars/shared_traits.ts | 19 ++ nodejs-polars/src/dataframe.rs | 62 +++--- nodejs-polars/src/lazy/dataframe.rs | 45 ++++ nodejs-polars/src/lazy/dsl.rs | 62 ++++++ nodejs-polars/src/series.rs | 46 +++- 15 files changed, 559 insertions(+), 363 deletions(-) create mode 100644 nodejs-polars/__tests__/serde.test.ts diff --git a/nodejs-polars/__tests__/dataframe.test.ts b/nodejs-polars/__tests__/dataframe.test.ts index 61a2bfec..6c783186 100644 --- a/nodejs-polars/__tests__/dataframe.test.ts +++ b/nodejs-polars/__tests__/dataframe.test.ts @@ -7,11 +7,6 @@ describe("dataframe", () => { pl.Series("foo", [1, 2, 9], pl.Int16), pl.Series("bar", [6, 2, 8], pl.Int16), ]); - test("to/fromBinary round trip", () => { - const buf = df.toBinary(); - const actual = pl.DataFrame.fromBinary(buf); - expect(df).toStrictEqual(actual); - }); test("dtypes", () => { const expected = [pl.Float64, pl.Utf8]; diff --git a/nodejs-polars/__tests__/serde.test.ts b/nodejs-polars/__tests__/serde.test.ts new file mode 100644 index 00000000..6700ec24 --- /dev/null +++ b/nodejs-polars/__tests__/serde.test.ts @@ -0,0 +1,91 @@ +import pl from "@polars"; +describe("serde", () => { + test("lazyframe:json", () => { + const df = pl.scanCSV("../examples/datasets/foods1.csv"); + const buf = df.serialize("json"); + const deserde = pl.LazyDataFrame.deserialize(buf, "json"); + const expected = df.collectSync(); + const actual = deserde.collectSync(); + expect(actual).toFrameEqual(expected); + }); + test("lazyframe:bincode", () => { + const df = pl.scanCSV("../examples/datasets/foods1.csv"); + const buf = df.serialize("bincode"); + const deserde = pl.LazyDataFrame.deserialize(buf, "bincode"); + const expected = df.collectSync(); + const actual = deserde.collectSync(); + expect(actual).toFrameEqual(expected); + }); + test("expr:json", () => { + const expr = pl.cols("foo", "bar").sortBy("other"); + + const buf = expr.serialize("json"); + const actual = pl.Expr.deserialize(buf, "json"); + + expect(actual.toString()).toEqual(expr.toString()); + }); + test("expr:bincode", () => { + const expr = pl.cols("foo", "bar").sortBy("other"); + const buf = expr.serialize("bincode"); + const actual = pl.Expr.deserialize(buf, "bincode"); + + expect(actual.toString()).toEqual(expr.toString()); + }); + test("dataframe:json", () => { + const df = pl.DataFrame({ + foo: [1, 2], + bar: [2, 3] + }); + const buf = df.serialize("json"); + const expected = pl.DataFrame.deserialize(buf, "json"); + expect(df).toFrameEqual(expected); + }); + test("dataframe:bincode", () => { + const df = pl.DataFrame({ + foo: [1, 2], + bar: [2, 3] + }); + const buf = df.serialize("bincode"); + const expected = pl.DataFrame.deserialize(buf, "bincode"); + expect(df).toFrameEqual(expected); + }); + + test("dataframe:unsupported", () => { + const df = pl.DataFrame({ + foo: [1, 2], + bar: [2, 3] + }); + const ser = () => df.serialize("yaml" as any); + const buf = df.serialize("bincode"); + const de = () => pl.DataFrame.deserialize(buf, "yaml" as any); + const mismatch = () => pl.DataFrame.deserialize(buf, "json"); + expect(ser).toThrow(); + expect(de).toThrow(); + expect(mismatch).toThrow(); + }); + test("series:json", () => { + const s = pl.Series("foo", [1, 2, 3]); + + const buf = s.serialize("json"); + const expected = pl.Series.deserialize(buf, "json"); + expect(s).toSeriesEqual(expected); + }); + test("series:bincode", () => { + const s = pl.Series("foo", [1, 2, 3]); + + const buf = s.serialize("bincode"); + const expected = pl.Series.deserialize(buf, "bincode"); + expect(s).toSeriesEqual(expected); + }); + + test("series:unsupported", () => { + const s = pl.Series("foo", [1, 2, 3]); + const ser = () => s.serialize("yaml" as any); + const buf = s.serialize("bincode"); + const de = () => pl.Series.deserialize(buf, "yaml" as any); + const mismatch = () => pl.Series.deserialize(buf, "json"); + expect(ser).toThrow(); + expect(de).toThrow(); + expect(mismatch).toThrow(); + }); +}); diff --git a/nodejs-polars/__tests__/struct.test.ts b/nodejs-polars/__tests__/struct.test.ts index 0e440751..e78d67a4 100644 --- a/nodejs-polars/__tests__/struct.test.ts +++ b/nodejs-polars/__tests__/struct.test.ts @@ -1,18 +1,50 @@ import pl from "@polars"; -import {DataType} from "@polars/datatypes"; describe("struct", () => { - describe("series", () => { - test("series <--> array round trip", () => { - const data = [ - {utf8: "a", f64: 1, }, - {utf8: "b", f64: 2, } - ]; - const name = "struct"; - const s = pl.Series(name, data); - expect(s.name).toEqual(name); - expect(s.toArray()).toEqual(data); + test("series <--> array round trip", () => { + const data = [ + {utf8: "a", f64: 1, }, + {utf8: "b", f64: 2, } + ]; + const name = "struct"; + const s = pl.Series(name, data); + expect(s.name).toEqual(name); + expect(s.toArray()).toEqual(data); + }); + test("pli.struct", () => { + const expected = pl.DataFrame({ + foo: [1], + bar: [2] + }).toStruct("foo"); + const foo = pl.Series("foo", [1]); + const bar = pl.Series("bar", [2]); + const actual = pl.struct([foo, bar]).rename("foo"); + expect(actual).toSeriesEqual(expected); + }); + test("pli.struct dataframe", () => { + const df = pl.DataFrame({ + foo: [1], + bar: [2] }); + const actual = df + .select(pl.struct(pl.cols("foo", "bar")).alias("s")) + .toSeries(); + expect(actual).toSeriesEqual(df.toStruct("s")); + }); + test("struct toArray", () => { + const actual = pl.DataFrame({ + foo: [1, 10, 100], + bar: [2, null, 200] + }) + .toStruct("foobar") + .toArray(); + + const expected = [ + {foo: 1, bar: 2}, + {foo: 10, bar: null}, + {foo: 100, bar: 200} + ]; + expect(actual).toEqual(expected); }); }); diff --git a/nodejs-polars/polars/dataframe.ts b/nodejs-polars/polars/dataframe.ts index 11708d65..53d77204 100644 --- a/nodejs-polars/polars/dataframe.ts +++ b/nodejs-polars/polars/dataframe.ts @@ -23,7 +23,7 @@ import { ExprOrString } from "./utils"; -import {Arithmetic, Sample} from "./shared_traits"; +import {Arithmetic, Deserialize, Sample, Serialize} from "./shared_traits"; import {col} from "./lazy/functions"; const inspect = Symbol.for("nodejs.util.inspect.custom"); @@ -222,7 +222,7 @@ interface WriteMethods { ╰─────┴─────┴─────╯ ``` */ -export interface DataFrame extends Arithmetic, Sample, WriteMethods { +export interface DataFrame extends Arithmetic, Sample, WriteMethods, Serialize { /** @ignore */ _df: any dtypes: DataType[] @@ -1163,11 +1163,6 @@ export interface DataFrame extends Arithmetic, Sample, Wri * ``` */ tail(length?: number): DataFrame - /** serializes the DataFrame to a [bincode buffer](https://docs.rs/bincode/latest/bincode/index.html) - * @example - * pl.DataFrame.fromBinary(df.toBinary()) - */ - toBinary(): Buffer /** @deprecated *since 0.4.0* use {@link writeCSV} */ toCSV(destOrOptions?, options?); /** @@ -1184,14 +1179,9 @@ export interface DataFrame extends Arithmetic, Sample, Wri */ toRecords(): Record[] - /** - * @deprecated - * @since 0.4.0 - * @use {@link writeJSON} - * this will be removed in a later version to prevent collision with native `toJSON` method - * */ - toJSON(options?: WriteJsonOptions): string - toJSON(destination: string | Writable, options?: WriteJsonOptions): void + /** compat with `JSON.stringify` */ + toJSON(): string + /** * Converts dataframe object into column oriented javascript objects * @example @@ -1211,6 +1201,28 @@ export interface DataFrame extends Arithmetic, Sample, Wri toParquet(destination?, options?) toSeries(index?: number): Series toString(): string + /** + Convert a ``DataFrame`` to a ``Series`` of type ``Struct`` + @param name Name for the struct Series + @example + ``` + >>> df = pl.DataFrame({ + ... "a": [1, 2, 3, 4, 5], + ... "b": ["one", "two", "three", "four", "five"], + ... }) + >>> df.toStruct("nums") + shape: (5,) + Series: 'nums' [struct[2]{'a': i64, 'b': str}] + [ + {1,"one"} + {2,"two"} + {3,"three"} + {4,"four"} + {5,"five"} + ] + ``` + */ + toStruct(name: string): Series /** * Transpose a DataFrame over the diagonal. * @@ -1293,6 +1305,45 @@ export interface DataFrame extends Arithmetic, Sample, Wri */ unique(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"): DataFrame unique(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"}): DataFrame + /** + Decompose a struct into its fields. The fields will be inserted in to the `DataFrame` on the + location of the `struct` type. + @param names Names of the struct columns that will be decomposed by its fields + @example + ``` + >>> df = pl.DataFrame({ + ... "int": [1, 2], + ... "str": ["a", "b"], + ... "bool": [true, null], + ... "list": [[1, 2], [3]], + ... }) + ... .toStruct("my_struct") + ... .toFrame() + >>> df + shape: (2, 1) + ┌─────────────────────────────┐ + │ my_struct │ + │ --- │ + │ struct[4]{'int',...,'list'} │ + ╞═════════════════════════════╡ + │ {1,"a",true,[1, 2]} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {2,"b",null,[3]} │ + └─────────────────────────────┘ + >>> df.unnest("my_struct") + shape: (2, 4) + ┌─────┬─────┬──────┬────────────┐ + │ int ┆ str ┆ bool ┆ list │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ bool ┆ list [i64] │ + ╞═════╪═════╪══════╪════════════╡ + │ 1 ┆ a ┆ true ┆ [1, 2] │ + ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 2 ┆ b ┆ null ┆ [3] │ + └─────┴─────┴──────┴────────────┘ + ``` + */ + unnest(names: string | string[]): DataFrame /** * Aggregate the columns of this DataFrame to their variance value. * @example @@ -1779,8 +1830,8 @@ export const _DataFrame = (_df: any): DataFrame => { return wrap("sum"); }, tail: (length=5) => wrap("tail", length), - toBinary() { - return _df.toBincode(); + serialize(format) { + return _df.serialize(format); }, toCSV(...args) { return this.writeCSV(...args); @@ -1804,8 +1855,13 @@ export const _DataFrame = (_df: any): DataFrame => { toRecords() { return _df.toObjects(); }, - toJSON(arg0?, options?): any { - return _df.toJs(); + toJSON(...args: any[]) { + // this is passed by `JSON.stringify` when calling `toJSON()` + if(args[0] === "") { + return _df.toJs(); + } + + return _df.serialize("json").toString(); }, toObject() { return this.getColumns().reduce((acc, curr) => { @@ -1893,6 +1949,9 @@ export const _DataFrame = (_df: any): DataFrame => { }, toSeries: (index = 0) => _Series(_df.selectAtIdx(index) as any) as any, + toStruct(name) { + return _Series(_df.toStruct(name)); + }, toString() { return _df.toString(); }, @@ -1923,6 +1982,11 @@ export const _DataFrame = (_df: any): DataFrame => { return df; }, + unnest(names) { + names = Array.isArray(names) ? names : [names]; + + return _DataFrame(_df.unnest(names)); + }, var() { return wrap("var"); }, @@ -2019,7 +2083,7 @@ export const _DataFrame = (_df: any): DataFrame => { }); }; -export interface DataFrameConstructor { +export interface DataFrameConstructor extends Deserialize { (): DataFrame (data: any, options?: { columns?: any[], @@ -2028,10 +2092,6 @@ export interface DataFrameConstructor { inferSchemaLength?: number, }): DataFrame isDataFrame(arg: any): arg is DataFrame; - /** - * @param binary used to serialize/deserialize dataframe. This will only work with the output from expr.toBinary(). - */ - fromBinary(binary: Buffer): DataFrame } function DataFrameConstructor(data?, options?): DataFrame { @@ -2058,13 +2118,11 @@ function objToDF(obj: Record>): any { return new pli.JsDataFrame(columns); } const isDataFrame = (anyVal: any): anyVal is DataFrame => anyVal?.[Symbol.toStringTag] === "DataFrame"; -const fromBinary = (buf: Buffer) => { - return _DataFrame(pli.JsDataFrame.fromBincode(buf)); -}; + export const DataFrame: DataFrameConstructor = Object.assign( DataFrameConstructor, { isDataFrame, - fromBinary + deserialize: (buf, fmt) => _DataFrame(pli.JsDataFrame.deserialize(buf, fmt)) } ); diff --git a/nodejs-polars/polars/index.ts b/nodejs-polars/polars/index.ts index 1d2b9df0..510c323d 100644 --- a/nodejs-polars/polars/index.ts +++ b/nodejs-polars/polars/index.ts @@ -4,6 +4,7 @@ import { DataType } from "./datatypes"; import * as func from "./functions"; import * as io from "./io"; import * as cfg from "./cfg"; +import * as ldf from "./lazy/dataframe"; import pli from "./internals/polars_internal"; import { funcs as lazy, @@ -16,6 +17,7 @@ import { namespace pl { export import Expr = lazyExpr.Expr export import DataFrame = df.DataFrame + export import LazyDataFrame = ldf.LazyDataFrame export import Series = series.Series; export type LazyGroupBy = lazyGroupBy; export type When = _when.When; @@ -81,6 +83,7 @@ namespace pl { export import pearsonCorr = lazy.pearsonCorr export import quantile = lazy.quantile export import select = lazy.select + export import struct = lazy.struct export import spearmanRankCorr = lazy.spearmanRankCorr export import tail = lazy.tail export import list = lazy.list diff --git a/nodejs-polars/polars/lazy/dataframe.ts b/nodejs-polars/polars/lazy/dataframe.ts index b0ca0b12..6d7b399b 100644 --- a/nodejs-polars/polars/lazy/dataframe.ts +++ b/nodejs-polars/polars/lazy/dataframe.ts @@ -1,6 +1,7 @@ import {DataFrame, _DataFrame} from "../dataframe"; import {Expr, exprToLitOrExpr} from "./expr"; +import pli from "../internals/polars_internal"; import { columnOrColumnsStrict, ColumnSelection, @@ -9,8 +10,8 @@ import { selectionToExprList, ValueOrArray } from "../utils"; -import pli from "../internals/polars_internal"; import {LazyGroupBy} from "./groupby"; +import {Deserialize, Serialize} from "../shared_traits"; type LazyJoinOptions = { @@ -33,7 +34,7 @@ type LazyOptions = { /** * Representation of a Lazy computation graph / query. */ -export interface LazyDataFrame { +export interface LazyDataFrame extends Serialize { /** @ignore */ _ldf: any; get columns(): string[] @@ -249,6 +250,10 @@ export interface LazyDataFrame { * @see {@link DataFrame.tail} */ tail(length?: number): LazyDataFrame + /** + * compatibility with `JSON.stringify` + */ + toJSON(): String /** * Drop duplicate rows from this DataFrame. * Note that this fails if there is a column of type `List` in the DataFrame. @@ -256,8 +261,8 @@ export interface LazyDataFrame { * @param subset - subset to drop duplicates for * @param keep "first" | "last" */ - unique(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"): LazyDataFrame - unique(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"}): LazyDataFrame + unique(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"): LazyDataFrame + unique(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"}): LazyDataFrame /** * Aggregate the columns in the DataFrame to their variance value. */ @@ -552,6 +557,18 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => { tail(length=5) { return _LazyDataFrame(_ldf.tail(length)); }, + toJSON(...args: any[]) { + // this is passed by `JSON.stringify` when calling `toJSON()` + if(args[0] === "") { + return JSON.parse(_ldf.serialize("json").toString()); + } + + return _ldf.serialize("json").toString(); + + }, + serialize(format) { + return _ldf.serialize(format); + }, withColumn(expr) { return _LazyDataFrame(_ldf.withColumn(expr._expr)); }, @@ -568,3 +585,10 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => { }, }; }; + + +export interface LazyDataFrameConstructor extends Deserialize {} + +export const LazyDataFrame: LazyDataFrameConstructor = Object.assign(_LazyDataFrame, { + deserialize: (buf, fmt) => _LazyDataFrame(pli.JsLazyFrame.deserialize(buf, fmt)) +}); diff --git a/nodejs-polars/polars/lazy/expr.ts b/nodejs-polars/polars/lazy/expr.ts index 5c1d94b9..0bf23b39 100644 --- a/nodejs-polars/polars/lazy/expr.ts +++ b/nodejs-polars/polars/lazy/expr.ts @@ -10,7 +10,7 @@ import { import {Series} from "../series/series"; import * as expr from "./expr/"; -import {Arithmetic, Comparison, Cumulative, Rolling, Round, Sample} from "../shared_traits"; +import {Arithmetic, Comparison, Cumulative, Deserialize, Rolling, Round, Sample, Serialize} from "../shared_traits"; export interface Expr extends Rolling, @@ -18,7 +18,8 @@ export interface Expr extends Comparison, Cumulative, Sample, - Round { + Round, + Serialize { /** @ignore */ _expr: any; get date(): expr.Datetime; @@ -27,12 +28,9 @@ export interface Expr extends get struct(): expr.Struct; [Symbol.toStringTag](): string; [INSPECT_SYMBOL](): string; - /** serializes the Expr to a [bincode buffer](https://docs.rs/bincode/latest/bincode/index.html) - * @example - * pl.Expr.fromBinary(expr.toBinary()) - */ - toBinary(): Buffer toString(): string; + /** compat with `JSON.stringify` */ + toJSON(): string; /** Take absolute values */ abs(): Expr aggGroups(): Expr @@ -536,15 +534,12 @@ export const _Expr = (_expr: any): Expr => { return _Expr(unwrap(method, ...args)); }; - const wrapNullArgs = (method: string) => () => wrap(method); const wrapExprArg = (method: string, lit=false) => (other: any) => { const expr = exprToLitOrExpr(other, lit).inner(); return wrap(method, expr); }; - type anyfunc = (...args: any[]) => any - const wrapUnaryWithDefault = (method: string, key: string, otherwise): anyfunc => (val=otherwise) => wrap(method, {[key]: val?.[key] ?? val}); const rolling = (method: string) => (opts, weights?, minPeriods?, center?): Expr => { const windowSize = opts?.["windowSize"] ?? (typeof opts === "number" ? opts : null); @@ -569,12 +564,20 @@ export const _Expr = (_expr: any): Expr => { [INSPECT_SYMBOL]() { return _expr.toString(); }, - toBinary() { - return null as any; + serialize(format) { + return _expr.serialize(format); }, toString() { return _expr.toString(); }, + toJSON(...args: any[]) { + // this is passed by `JSON.stringify` when calling `toJSON()` + if(args[0] === "") { + return _expr.toJs(); + } + + return _expr.serialize("json").toString(); + }, get str() { return expr.StringFunctions(_expr); }, @@ -1048,13 +1051,10 @@ export const _Expr = (_expr: any): Expr => { }; }; -export interface ExprConstructor { +export interface ExprConstructor extends Deserialize { isExpr(arg: any): arg is Expr; - /** - * @param binary used to serialize/deserialize expr. This will only work with the output from expr.toBinary(). - */ - fromBinary(binary: Buffer): Expr } + const isExpr = (anyVal: any): anyVal is Expr => { try { return anyVal?.[Symbol.toStringTag]?.() === "Expr"; @@ -1063,12 +1063,12 @@ const isExpr = (anyVal: any): anyVal is Expr => { } }; -const fromBinary = (buf: Buffer) => { - return null as any; - // return _Expr(pli.JsExpr.from_bincode(buf)); + +const deserialize = (buf, format) => { + return _Expr(pli.JsExpr.deserialize(buf, format)); }; -export const Expr: ExprConstructor = Object.assign(_Expr, {isExpr, fromBinary}); +export const Expr: ExprConstructor = Object.assign(_Expr, {isExpr, deserialize}); /** @ignore */ export const exprToLitOrExpr = (expr: any, stringToLit = true): Expr => { diff --git a/nodejs-polars/polars/lazy/functions.ts b/nodejs-polars/polars/lazy/functions.ts index e8667183..2d48ca5d 100644 --- a/nodejs-polars/polars/lazy/functions.ts +++ b/nodejs-polars/polars/lazy/functions.ts @@ -166,7 +166,10 @@ export function arange(opts: any, high?, step?, eager?): Series | Expr { return _Expr(pli.arange(low, high, step)); } } - +/** Alias for `pl.col("*")` */ +export function all(): Expr { + return col("*"); +} /** * __Find the indexes that would sort the columns.__ * ___ @@ -431,6 +434,72 @@ export function list(column: ExprOrString): Expr { } +/** + Collect several columns into a Series of dtype Struct + Parameters + ---------- + @param exprs + Columns/Expressions to collect into a Struct + @param eager + Evaluate immediately + + Examples + -------- + ``` + >>> pl.DataFrame( + ... { + ... "int": [1, 2], + ... "str": ["a", "b"], + ... "bool": [True, None], + ... "list": [[1, 2], [3]], + ... } + ... ).select([pl.struct(pl.all()).alias("my_struct")]) + shape: (2, 1) + ┌───────────────────────┐ + │ my_struct │ + │ --- │ + │ struct{int, ... list} │ + ╞═══════════════════════╡ + │ {1,"a",true,[1, 2]} │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ {2,"b",null,[3]} │ + └───────────────────────┘ + + // Only collect specific columns as a struct: + >>> df = pl.DataFrame({ + ... "a": [1, 2, 3, 4], + ... "b": ["one", "two", "three", "four"], + ... "c": [9, 8, 7, 6] + ... }) + >>> df.withColumn(pl.struct(pl.col(["a", "b"])).alias("a_and_b")) + shape: (4, 4) + ┌─────┬───────┬─────┬───────────────────────────────┐ + │ a ┆ b ┆ c ┆ a_and_b │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ i64 ┆ struct[2]{'a': i64, 'b': str} │ + ╞═════╪═══════╪═════╪═══════════════════════════════╡ + │ 1 ┆ one ┆ 9 ┆ {1,"one"} │ + ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 2 ┆ two ┆ 8 ┆ {2,"two"} │ + ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 3 ┆ three ┆ 7 ┆ {3,"three"} │ + ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 4 ┆ four ┆ 6 ┆ {4,"four"} │ + └─────┴───────┴─────┴───────────────────────────────┘ +``` +*/ +export function struct(exprs: Series[]): Series +export function struct(exprs: ExprOrString | ExprOrString[]): Expr +export function struct(exprs: ExprOrString | ExprOrString[] | Series[]): Expr | Series { + exprs = Array.isArray(exprs) ? exprs : [exprs]; + + if (Series.isSeries(exprs[0])) { + return select(_Expr(pli.asStruct(exprs.map(e => pli.lit(e.inner()))))).toSeries(); + } + exprs = selectionToExprList(exprs); + + return _Expr(pli.asStruct(exprs)); +} // // export function collect_all() {} // // export function all() {} // fold // // export function any() {} // fold diff --git a/nodejs-polars/polars/native-polars.js b/nodejs-polars/polars/native-polars.js index 0523dcdf..7e9a2d38 100644 --- a/nodejs-polars/polars/native-polars.js +++ b/nodejs-polars/polars/native-polars.js @@ -236,248 +236,5 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { version, toggleStringCache, JsDataFrame, readCsv, readJson, readParquet, readIpc, readAvro, fromRows, DataType, horizontalConcat, JsLazyGroupBy, JsLazyFrame, scanCsv, scanParquet, scanIpc, JsExpr, When, WhenThen, WhenThenThen, when, col, count, first, last, cols, dtypeCols, arange, pearsonCorr, spearmanRankCorr, cov, argsortBy, lit, range, concatLst, concatStr, JsSeries, seriesSetAtIdxStr, seriesSetAtIdxF64, seriesSetAtIdxF32, seriesSetAtIdxU8, seriesSetAtIdxU16, seriesSetAtIdxU32, seriesSetAtIdxU64, seriesSetAtIdxI8, seriesSetAtIdxI16, seriesSetAtIdxI32, seriesSetAtIdxI64, seriesSetWithMaskStr, seriesSetWithMaskF64, seriesSetWithMaskF32, seriesSetWithMaskU8, seriesSetWithMaskU16, seriesSetWithMaskU32, seriesSetWithMaskU64, seriesSetWithMaskI8, seriesSetWithMaskI16, seriesSetWithMaskI32, seriesSetWithMaskI64, seriesGetF32, seriesGetF64, seriesGetU8, seriesGetU16, seriesGetU32, seriesGetU64, seriesGetI8, seriesGetI16, seriesGetI32, seriesGetI64, seriesGetDate, seriesGetDatetime, seriesGetDuration, seriesGetStr, seriesAddU8, seriesAddU16, seriesAddU32, seriesAddU64, seriesAddI8, seriesAddI16, seriesAddI32, seriesAddI64, seriesAddDatetime, seriesAddDuration, seriesAddF32, seriesAddF64, seriesSubU8, seriesSubU16, seriesSubU32, seriesSubU64, seriesSubI8, seriesSubI16, seriesSubI32, seriesSubI64, seriesSubDatetime, seriesSubDuration, seriesSubF32, seriesSubF64, seriesDivU8, seriesDivU16, seriesDivU32, seriesDivU64, seriesDivI8, seriesDivI16, seriesDivI32, seriesDivI64, seriesDivF32, seriesDivF64, seriesMulU8, seriesMulU16, seriesMulU32, seriesMulU64, seriesMulI8, seriesMulI16, seriesMulI32, seriesMulI64, seriesMulF32, seriesMulF64, seriesRemU8, seriesRemU16, seriesRemU32, seriesRemU64, seriesRemI8, seriesRemI16, seriesRemI32, seriesRemI64, seriesRemF32, seriesRemF64, seriesAddU8Rhs, seriesAddU16Rhs, seriesAddU32Rhs, seriesAddU64Rhs, seriesAddI8Rhs, seriesAddI16Rhs, seriesAddI32Rhs, seriesAddI64Rhs, seriesAddF32Rhs, seriesAddF64Rhs, seriesSubU8Rhs, seriesSubU16Rhs, seriesSubU32Rhs, seriesSubU64Rhs, seriesSubI8Rhs, seriesSubI16Rhs, seriesSubI32Rhs, seriesSubI64Rhs, seriesSubF32Rhs, seriesSubF64Rhs, seriesDivU8Rhs, seriesDivU16Rhs, seriesDivU32Rhs, seriesDivU64Rhs, seriesDivI8Rhs, seriesDivI16Rhs, seriesDivI32Rhs, seriesDivI64Rhs, seriesDivF32Rhs, seriesDivF64Rhs, seriesMulU8Rhs, seriesMulU16Rhs, seriesMulU32Rhs, seriesMulU64Rhs, seriesMulI8Rhs, seriesMulI16Rhs, seriesMulI32Rhs, seriesMulI64Rhs, seriesMulF32Rhs, seriesMulF64Rhs, seriesRemU8Rhs, seriesRemU16Rhs, seriesRemU32Rhs, seriesRemU64Rhs, seriesRemI8Rhs, seriesRemI16Rhs, seriesRemI32Rhs, seriesRemI64Rhs, seriesRemF32Rhs, seriesRemF64Rhs, seriesEqU8, seriesEqU16, seriesEqU32, seriesEqU64, seriesEqI8, seriesEqI16, seriesEqI32, seriesEqI64, seriesEqF32, seriesEqF64, seriesEqStr, seriesNeqU8, seriesNeqU16, seriesNeqU32, seriesNeqU64, seriesNeqI8, seriesNeqI16, seriesNeqI32, seriesNeqI64, seriesNeqF32, seriesNeqF64, seriesNeqStr, seriesGtU8, seriesGtU16, seriesGtU32, seriesGtU64, seriesGtI8, seriesGtI16, seriesGtI32, seriesGtI64, seriesGtF32, seriesGtF64, seriesGtStr, seriesGtEqU8, seriesGtEqU16, seriesGtEqU32, seriesGtEqU64, seriesGtEqI8, seriesGtEqI16, seriesGtEqI32, seriesGtEqI64, seriesGtEqF32, seriesGtEqF64, seriesGtEqStr, seriesLtU8, seriesLtU16, seriesLtU32, seriesLtU64, seriesLtI8, seriesLtI16, seriesLtI32, seriesLtI64, seriesLtF32, seriesLtF64, seriesLtStr, seriesLtEqU8, seriesLtEqU16, seriesLtEqU32, seriesLtEqU64, seriesLtEqI8, seriesLtEqI16, seriesLtEqI32, seriesLtEqI64, seriesLtEqF32, seriesLtEqF64, seriesLtEqStr } = nativeBinding -module.exports.version = version -module.exports.toggleStringCache = toggleStringCache -module.exports.JsDataFrame = JsDataFrame -module.exports.readCsv = readCsv -module.exports.readJson = readJson -module.exports.readParquet = readParquet -module.exports.readIpc = readIpc -module.exports.readAvro = readAvro -module.exports.fromRows = fromRows -module.exports.DataType = DataType -module.exports.horizontalConcat = horizontalConcat -module.exports.JsLazyGroupBy = JsLazyGroupBy -module.exports.JsLazyFrame = JsLazyFrame -module.exports.scanCsv = scanCsv -module.exports.scanParquet = scanParquet -module.exports.scanIpc = scanIpc -module.exports.JsExpr = JsExpr -module.exports.When = When -module.exports.WhenThen = WhenThen -module.exports.WhenThenThen = WhenThenThen -module.exports.when = when -module.exports.col = col -module.exports.count = count -module.exports.first = first -module.exports.last = last -module.exports.cols = cols -module.exports.dtypeCols = dtypeCols -module.exports.arange = arange -module.exports.pearsonCorr = pearsonCorr -module.exports.spearmanRankCorr = spearmanRankCorr -module.exports.cov = cov -module.exports.argsortBy = argsortBy -module.exports.lit = lit -module.exports.range = range -module.exports.concatLst = concatLst -module.exports.concatStr = concatStr -module.exports.JsSeries = JsSeries -module.exports.seriesSetAtIdxStr = seriesSetAtIdxStr -module.exports.seriesSetAtIdxF64 = seriesSetAtIdxF64 -module.exports.seriesSetAtIdxF32 = seriesSetAtIdxF32 -module.exports.seriesSetAtIdxU8 = seriesSetAtIdxU8 -module.exports.seriesSetAtIdxU16 = seriesSetAtIdxU16 -module.exports.seriesSetAtIdxU32 = seriesSetAtIdxU32 -module.exports.seriesSetAtIdxU64 = seriesSetAtIdxU64 -module.exports.seriesSetAtIdxI8 = seriesSetAtIdxI8 -module.exports.seriesSetAtIdxI16 = seriesSetAtIdxI16 -module.exports.seriesSetAtIdxI32 = seriesSetAtIdxI32 -module.exports.seriesSetAtIdxI64 = seriesSetAtIdxI64 -module.exports.seriesSetWithMaskStr = seriesSetWithMaskStr -module.exports.seriesSetWithMaskF64 = seriesSetWithMaskF64 -module.exports.seriesSetWithMaskF32 = seriesSetWithMaskF32 -module.exports.seriesSetWithMaskU8 = seriesSetWithMaskU8 -module.exports.seriesSetWithMaskU16 = seriesSetWithMaskU16 -module.exports.seriesSetWithMaskU32 = seriesSetWithMaskU32 -module.exports.seriesSetWithMaskU64 = seriesSetWithMaskU64 -module.exports.seriesSetWithMaskI8 = seriesSetWithMaskI8 -module.exports.seriesSetWithMaskI16 = seriesSetWithMaskI16 -module.exports.seriesSetWithMaskI32 = seriesSetWithMaskI32 -module.exports.seriesSetWithMaskI64 = seriesSetWithMaskI64 -module.exports.seriesGetF32 = seriesGetF32 -module.exports.seriesGetF64 = seriesGetF64 -module.exports.seriesGetU8 = seriesGetU8 -module.exports.seriesGetU16 = seriesGetU16 -module.exports.seriesGetU32 = seriesGetU32 -module.exports.seriesGetU64 = seriesGetU64 -module.exports.seriesGetI8 = seriesGetI8 -module.exports.seriesGetI16 = seriesGetI16 -module.exports.seriesGetI32 = seriesGetI32 -module.exports.seriesGetI64 = seriesGetI64 -module.exports.seriesGetDate = seriesGetDate -module.exports.seriesGetDatetime = seriesGetDatetime -module.exports.seriesGetDuration = seriesGetDuration -module.exports.seriesGetStr = seriesGetStr -module.exports.seriesAddU8 = seriesAddU8 -module.exports.seriesAddU16 = seriesAddU16 -module.exports.seriesAddU32 = seriesAddU32 -module.exports.seriesAddU64 = seriesAddU64 -module.exports.seriesAddI8 = seriesAddI8 -module.exports.seriesAddI16 = seriesAddI16 -module.exports.seriesAddI32 = seriesAddI32 -module.exports.seriesAddI64 = seriesAddI64 -module.exports.seriesAddDatetime = seriesAddDatetime -module.exports.seriesAddDuration = seriesAddDuration -module.exports.seriesAddF32 = seriesAddF32 -module.exports.seriesAddF64 = seriesAddF64 -module.exports.seriesSubU8 = seriesSubU8 -module.exports.seriesSubU16 = seriesSubU16 -module.exports.seriesSubU32 = seriesSubU32 -module.exports.seriesSubU64 = seriesSubU64 -module.exports.seriesSubI8 = seriesSubI8 -module.exports.seriesSubI16 = seriesSubI16 -module.exports.seriesSubI32 = seriesSubI32 -module.exports.seriesSubI64 = seriesSubI64 -module.exports.seriesSubDatetime = seriesSubDatetime -module.exports.seriesSubDuration = seriesSubDuration -module.exports.seriesSubF32 = seriesSubF32 -module.exports.seriesSubF64 = seriesSubF64 -module.exports.seriesDivU8 = seriesDivU8 -module.exports.seriesDivU16 = seriesDivU16 -module.exports.seriesDivU32 = seriesDivU32 -module.exports.seriesDivU64 = seriesDivU64 -module.exports.seriesDivI8 = seriesDivI8 -module.exports.seriesDivI16 = seriesDivI16 -module.exports.seriesDivI32 = seriesDivI32 -module.exports.seriesDivI64 = seriesDivI64 -module.exports.seriesDivF32 = seriesDivF32 -module.exports.seriesDivF64 = seriesDivF64 -module.exports.seriesMulU8 = seriesMulU8 -module.exports.seriesMulU16 = seriesMulU16 -module.exports.seriesMulU32 = seriesMulU32 -module.exports.seriesMulU64 = seriesMulU64 -module.exports.seriesMulI8 = seriesMulI8 -module.exports.seriesMulI16 = seriesMulI16 -module.exports.seriesMulI32 = seriesMulI32 -module.exports.seriesMulI64 = seriesMulI64 -module.exports.seriesMulF32 = seriesMulF32 -module.exports.seriesMulF64 = seriesMulF64 -module.exports.seriesRemU8 = seriesRemU8 -module.exports.seriesRemU16 = seriesRemU16 -module.exports.seriesRemU32 = seriesRemU32 -module.exports.seriesRemU64 = seriesRemU64 -module.exports.seriesRemI8 = seriesRemI8 -module.exports.seriesRemI16 = seriesRemI16 -module.exports.seriesRemI32 = seriesRemI32 -module.exports.seriesRemI64 = seriesRemI64 -module.exports.seriesRemF32 = seriesRemF32 -module.exports.seriesRemF64 = seriesRemF64 -module.exports.seriesAddU8Rhs = seriesAddU8Rhs -module.exports.seriesAddU16Rhs = seriesAddU16Rhs -module.exports.seriesAddU32Rhs = seriesAddU32Rhs -module.exports.seriesAddU64Rhs = seriesAddU64Rhs -module.exports.seriesAddI8Rhs = seriesAddI8Rhs -module.exports.seriesAddI16Rhs = seriesAddI16Rhs -module.exports.seriesAddI32Rhs = seriesAddI32Rhs -module.exports.seriesAddI64Rhs = seriesAddI64Rhs -module.exports.seriesAddF32Rhs = seriesAddF32Rhs -module.exports.seriesAddF64Rhs = seriesAddF64Rhs -module.exports.seriesSubU8Rhs = seriesSubU8Rhs -module.exports.seriesSubU16Rhs = seriesSubU16Rhs -module.exports.seriesSubU32Rhs = seriesSubU32Rhs -module.exports.seriesSubU64Rhs = seriesSubU64Rhs -module.exports.seriesSubI8Rhs = seriesSubI8Rhs -module.exports.seriesSubI16Rhs = seriesSubI16Rhs -module.exports.seriesSubI32Rhs = seriesSubI32Rhs -module.exports.seriesSubI64Rhs = seriesSubI64Rhs -module.exports.seriesSubF32Rhs = seriesSubF32Rhs -module.exports.seriesSubF64Rhs = seriesSubF64Rhs -module.exports.seriesDivU8Rhs = seriesDivU8Rhs -module.exports.seriesDivU16Rhs = seriesDivU16Rhs -module.exports.seriesDivU32Rhs = seriesDivU32Rhs -module.exports.seriesDivU64Rhs = seriesDivU64Rhs -module.exports.seriesDivI8Rhs = seriesDivI8Rhs -module.exports.seriesDivI16Rhs = seriesDivI16Rhs -module.exports.seriesDivI32Rhs = seriesDivI32Rhs -module.exports.seriesDivI64Rhs = seriesDivI64Rhs -module.exports.seriesDivF32Rhs = seriesDivF32Rhs -module.exports.seriesDivF64Rhs = seriesDivF64Rhs -module.exports.seriesMulU8Rhs = seriesMulU8Rhs -module.exports.seriesMulU16Rhs = seriesMulU16Rhs -module.exports.seriesMulU32Rhs = seriesMulU32Rhs -module.exports.seriesMulU64Rhs = seriesMulU64Rhs -module.exports.seriesMulI8Rhs = seriesMulI8Rhs -module.exports.seriesMulI16Rhs = seriesMulI16Rhs -module.exports.seriesMulI32Rhs = seriesMulI32Rhs -module.exports.seriesMulI64Rhs = seriesMulI64Rhs -module.exports.seriesMulF32Rhs = seriesMulF32Rhs -module.exports.seriesMulF64Rhs = seriesMulF64Rhs -module.exports.seriesRemU8Rhs = seriesRemU8Rhs -module.exports.seriesRemU16Rhs = seriesRemU16Rhs -module.exports.seriesRemU32Rhs = seriesRemU32Rhs -module.exports.seriesRemU64Rhs = seriesRemU64Rhs -module.exports.seriesRemI8Rhs = seriesRemI8Rhs -module.exports.seriesRemI16Rhs = seriesRemI16Rhs -module.exports.seriesRemI32Rhs = seriesRemI32Rhs -module.exports.seriesRemI64Rhs = seriesRemI64Rhs -module.exports.seriesRemF32Rhs = seriesRemF32Rhs -module.exports.seriesRemF64Rhs = seriesRemF64Rhs -module.exports.seriesEqU8 = seriesEqU8 -module.exports.seriesEqU16 = seriesEqU16 -module.exports.seriesEqU32 = seriesEqU32 -module.exports.seriesEqU64 = seriesEqU64 -module.exports.seriesEqI8 = seriesEqI8 -module.exports.seriesEqI16 = seriesEqI16 -module.exports.seriesEqI32 = seriesEqI32 -module.exports.seriesEqI64 = seriesEqI64 -module.exports.seriesEqF32 = seriesEqF32 -module.exports.seriesEqF64 = seriesEqF64 -module.exports.seriesEqStr = seriesEqStr -module.exports.seriesNeqU8 = seriesNeqU8 -module.exports.seriesNeqU16 = seriesNeqU16 -module.exports.seriesNeqU32 = seriesNeqU32 -module.exports.seriesNeqU64 = seriesNeqU64 -module.exports.seriesNeqI8 = seriesNeqI8 -module.exports.seriesNeqI16 = seriesNeqI16 -module.exports.seriesNeqI32 = seriesNeqI32 -module.exports.seriesNeqI64 = seriesNeqI64 -module.exports.seriesNeqF32 = seriesNeqF32 -module.exports.seriesNeqF64 = seriesNeqF64 -module.exports.seriesNeqStr = seriesNeqStr -module.exports.seriesGtU8 = seriesGtU8 -module.exports.seriesGtU16 = seriesGtU16 -module.exports.seriesGtU32 = seriesGtU32 -module.exports.seriesGtU64 = seriesGtU64 -module.exports.seriesGtI8 = seriesGtI8 -module.exports.seriesGtI16 = seriesGtI16 -module.exports.seriesGtI32 = seriesGtI32 -module.exports.seriesGtI64 = seriesGtI64 -module.exports.seriesGtF32 = seriesGtF32 -module.exports.seriesGtF64 = seriesGtF64 -module.exports.seriesGtStr = seriesGtStr -module.exports.seriesGtEqU8 = seriesGtEqU8 -module.exports.seriesGtEqU16 = seriesGtEqU16 -module.exports.seriesGtEqU32 = seriesGtEqU32 -module.exports.seriesGtEqU64 = seriesGtEqU64 -module.exports.seriesGtEqI8 = seriesGtEqI8 -module.exports.seriesGtEqI16 = seriesGtEqI16 -module.exports.seriesGtEqI32 = seriesGtEqI32 -module.exports.seriesGtEqI64 = seriesGtEqI64 -module.exports.seriesGtEqF32 = seriesGtEqF32 -module.exports.seriesGtEqF64 = seriesGtEqF64 -module.exports.seriesGtEqStr = seriesGtEqStr -module.exports.seriesLtU8 = seriesLtU8 -module.exports.seriesLtU16 = seriesLtU16 -module.exports.seriesLtU32 = seriesLtU32 -module.exports.seriesLtU64 = seriesLtU64 -module.exports.seriesLtI8 = seriesLtI8 -module.exports.seriesLtI16 = seriesLtI16 -module.exports.seriesLtI32 = seriesLtI32 -module.exports.seriesLtI64 = seriesLtI64 -module.exports.seriesLtF32 = seriesLtF32 -module.exports.seriesLtF64 = seriesLtF64 -module.exports.seriesLtStr = seriesLtStr -module.exports.seriesLtEqU8 = seriesLtEqU8 -module.exports.seriesLtEqU16 = seriesLtEqU16 -module.exports.seriesLtEqU32 = seriesLtEqU32 -module.exports.seriesLtEqU64 = seriesLtEqU64 -module.exports.seriesLtEqI8 = seriesLtEqI8 -module.exports.seriesLtEqI16 = seriesLtEqI16 -module.exports.seriesLtEqI32 = seriesLtEqI32 -module.exports.seriesLtEqI64 = seriesLtEqI64 -module.exports.seriesLtEqF32 = seriesLtEqF32 -module.exports.seriesLtEqF64 = seriesLtEqF64 -module.exports.seriesLtEqStr = seriesLtEqStr +module.exports = nativeBinding \ No newline at end of file diff --git a/nodejs-polars/polars/series/series.ts b/nodejs-polars/polars/series/series.ts index 1fc856f7..819d644c 100644 --- a/nodejs-polars/polars/series/series.ts +++ b/nodejs-polars/polars/series/series.ts @@ -8,7 +8,7 @@ import {SeriesDateFunctions} from "./datetime"; import {SeriesStructFunctions} from "./struct"; import {InvalidOperationError} from "../error"; import {RankMethod} from "../utils"; -import {Arithmetic, Comparison, Cumulative, Rolling, Round, Sample} from "../shared_traits"; +import {Arithmetic, Comparison, Cumulative, Deserialize, Rolling, Round, Sample, Serialize} from "../shared_traits"; import {col} from "../lazy/functions"; const inspect = Symbol.for("nodejs.util.inspect.custom"); @@ -19,7 +19,8 @@ export interface Series extends Comparison, Cumulative, Round, - Sample { + Sample, + Serialize { inner(): any name: string dtype: DataType @@ -988,11 +989,7 @@ export interface Series extends */ toObject(): {name: string, datatype: string, values: any[]} toFrame(): DataFrame - /** serializes the Series to a [bincode buffer](https://docs.rs/bincode/latest/bincode/index.html) - * @example - * pl.Series.fromBinary(series.toBincode()) - */ - toBinary(): Buffer + /** compat with `JSON.stringify */ toJSON(): string /** Returns an iterator over the values */ values(): IterableIterator @@ -1060,6 +1057,9 @@ export function _Series(_s: any): Series { toString() { return _s.toString(); }, + serialize(format) { + return _s.serialize(format); + }, [Symbol.toStringTag]() { return "Series"; }, @@ -1677,11 +1677,16 @@ export function _Series(_s: any): Series { toBinary() { return _s.toBinary(); }, - toJSON() { - return _s.toJson().toString(); + toJSON(...args: any[]) { + // this is passed by `JSON.stringify` when calling `toJSON()` + if(args[0] === "") { + return _s.toJs(); + } + + return _s.serialize("json").toString(); }, toObject() { - return JSON.parse(_s.toJson().toString()); + return _s.toJs(); }, unique(maintainOrder?) { if(maintainOrder) { @@ -1692,7 +1697,6 @@ export function _Series(_s: any): Series { }, valueCounts() { return null as any; - }, values() { return this[Symbol.iterator](); @@ -1720,7 +1724,7 @@ export function _Series(_s: any): Series { }); } -export interface SeriesConstructor { +export interface SeriesConstructor extends Deserialize { (values: any): Series (name: string, values: any[], dtype?): Series @@ -1776,5 +1780,6 @@ const of = (...values: any[]): Series => { export const Series: SeriesConstructor = Object.assign(SeriesConstructor, { isSeries, from, - of + of, + deserialize: (buf, fmt) => _Series(pli.JsSeries.deserialize(buf, fmt)) }); diff --git a/nodejs-polars/polars/shared_traits.ts b/nodejs-polars/polars/shared_traits.ts index 2c9a93ac..2b1aa636 100644 --- a/nodejs-polars/polars/shared_traits.ts +++ b/nodejs-polars/polars/shared_traits.ts @@ -481,3 +481,22 @@ export interface DateFunctions { */ year(): T; } + +export interface Serialize { + /** + * Serializes object to desired format via [serde](https://serde.rs/) + * + * @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode) + * + */ + serialize(format: "json" | "bincode"): Buffer +} +export interface Deserialize { + /** + * De-serializes buffer via [serde](https://serde.rs/) + * @param buf buffer to deserialize + * @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode) + * + */ + deserialize(buf: Buffer, format: "json" | "bincode"): T +} diff --git a/nodejs-polars/src/dataframe.rs b/nodejs-polars/src/dataframe.rs index b8a78d4c..9e529957 100644 --- a/nodejs-polars/src/dataframe.rs +++ b/nodejs-polars/src/dataframe.rs @@ -386,6 +386,42 @@ pub fn from_rows( #[napi] impl JsDataFrame { + #[napi] + pub fn to_js(&self, env: Env) -> napi::Result { + env.to_js_value(&self.df) + } + + #[napi] + pub fn serialize(&self, format: String) -> napi::Result { + let buf = match format.as_ref() { + "bincode" => bincode::serialize(&self.df) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::to_vec(&self.df) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(Buffer::from(buf)) + } + + #[napi(factory)] + pub fn deserialize(buf: Buffer, format: String) -> napi::Result { + let df: DataFrame = match format.as_ref() { + "bincode" => bincode::deserialize(&buf) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::from_slice(&buf) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(df.into()) + } #[napi(constructor)] pub fn from_columns(columns: Array) -> napi::Result { let len = columns.len(); @@ -1037,32 +1073,6 @@ impl JsDataFrame { let df = self.df.unnest(names).map_err(JsPolarsErr::from)?; Ok(df.into()) } - #[napi(factory)] - pub fn from_bincode(buf: Buffer) -> napi::Result { - let df: DataFrame = bincode::deserialize(&buf).unwrap(); - - Ok(df.into()) - } - #[napi] - pub fn to_bincode(&self) -> napi::Result { - let buf = bincode::serialize(&self.df).unwrap(); - Ok(Buffer::from(buf)) - } - #[napi] - pub fn to_json(&self, pretty: Option) -> napi::Result { - let pretty = pretty.unwrap_or(false); - if pretty { - let bytes = serde_json::to_vec_pretty(&self.df)?; - Ok(bytes.into()) - } else { - let bytes = serde_json::to_vec(&self.df)?; - Ok(bytes.into()) - } - } - #[napi] - pub fn to_js(&self, env: Env) -> napi::Result { - env.to_js_value(&self.df) - } #[napi] pub fn to_row(&self, idx: f64, env: Env) -> napi::Result { let idx = idx as i64; diff --git a/nodejs-polars/src/lazy/dataframe.rs b/nodejs-polars/src/lazy/dataframe.rs index bbc474c2..0da18002 100644 --- a/nodejs-polars/src/lazy/dataframe.rs +++ b/nodejs-polars/src/lazy/dataframe.rs @@ -44,6 +44,51 @@ impl JsLazyGroupBy { } #[napi] impl JsLazyFrame { + #[napi] + pub fn to_js(&self, env: Env) -> napi::Result { + env.to_js_value(&self.ldf.logical_plan) + } + + #[napi] + pub fn serialize(&self, format: String) -> napi::Result { + let buf = match format.as_ref() { + "bincode" => bincode::serialize(&self.ldf.logical_plan) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::to_vec(&self.ldf.logical_plan) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(Buffer::from(buf)) + } + + #[napi(factory)] + pub fn deserialize(buf: Buffer, format: String) -> napi::Result { + // Safety + // we skipped the serializing/deserializing of the static in lifetime in `DataType` + // so we actually don't have a lifetime at all when serializing. + + // &[u8] still has a lifetime. But its ok, because we drop it immediately + // in this scope + let bytes: &[u8] = &buf; + let bytes = unsafe { std::mem::transmute::<&'_ [u8], &'static [u8]>(bytes) }; + let lp: LogicalPlan = match format.as_ref() { + "bincode" => bincode::deserialize(bytes) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::from_slice(bytes) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(LazyFrame::from(lp).into()) + } + #[napi] pub fn describe_plan(&self) -> String { self.ldf.describe_plan() diff --git a/nodejs-polars/src/lazy/dsl.rs b/nodejs-polars/src/lazy/dsl.rs index f6d2b24b..f169ac8e 100644 --- a/nodejs-polars/src/lazy/dsl.rs +++ b/nodejs-polars/src/lazy/dsl.rs @@ -45,6 +45,50 @@ impl ToExprs for Vec<&JsExpr> { #[napi] impl JsExpr { + #[napi] + pub fn to_js(&self, env: Env) -> napi::Result { + env.to_js_value(&self.inner) + } + + #[napi] + pub fn serialize(&self, format: String) -> napi::Result { + let buf = match format.as_ref() { + "bincode" => bincode::serialize(&self.inner) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::to_vec(&self.inner) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(Buffer::from(buf)) + } + + #[napi(factory)] + pub fn deserialize(buf: Buffer, format: String) -> napi::Result { + // Safety + // we skipped the serializing/deserializing of the static in lifetime in `DataType` + // so we actually don't have a lifetime at all when serializing. + + // &[u8] still has a lifetime. But its ok, because we drop it immediately + // in this scope + let bytes: &[u8] = &buf; + let bytes = unsafe { std::mem::transmute::<&'_ [u8], &'static [u8]>(bytes) }; + let expr: Expr = match format.as_ref() { + "bincode" => bincode::deserialize(bytes) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::from_slice(bytes) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supportd options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(expr.into()) + } #[napi] pub fn __add__(&self, rhs: &JsExpr) -> napi::Result { Ok(dsl::binary_expr(self.inner.clone(), Operator::Plus, rhs.inner.clone()).into()) @@ -1419,3 +1463,21 @@ fn concat_str(s: Vec<&JsExpr>, sep: String) -> JsExpr { let s = s.to_exprs(); dsl::concat_str(s, &sep).into() } + +#[napi] +fn min_exprs(exprs: Vec<&JsExpr>) -> JsExpr { + let exprs = exprs.to_exprs(); + polars::lazy::dsl::min_exprs(exprs).into() +} + +#[napi] +fn max_exprs(exprs: Vec<&JsExpr>) -> JsExpr { + let exprs = exprs.to_exprs(); + polars::lazy::dsl::max_exprs(exprs).into() +} + +#[napi] +fn as_struct(exprs: Vec<&JsExpr>) -> JsExpr { + let exprs = exprs.to_exprs(); + polars::lazy::dsl::as_struct(&exprs).into() +} diff --git a/nodejs-polars/src/series.rs b/nodejs-polars/src/series.rs index f17fe5f6..814f804d 100644 --- a/nodejs-polars/src/series.rs +++ b/nodejs-polars/src/series.rs @@ -23,6 +23,42 @@ impl From for JsSeries { #[napi] impl JsSeries { + #[napi] + pub fn to_js(&self, env: Env) -> napi::Result { + env.to_js_value(&self.series) + } + + #[napi] + pub fn serialize(&self, format: String) -> napi::Result { + let buf = match format.as_ref() { + "bincode" => bincode::serialize(&self.series) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::to_vec(&self.series) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supported options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(Buffer::from(buf)) + } + + #[napi(factory)] + pub fn deserialize(buf: Buffer, format: String) -> napi::Result { + let series: Series = match format.as_ref() { + "bincode" => bincode::deserialize(&buf) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + "json" => serde_json::from_slice(&buf) + .map_err(|err| napi::Error::from_reason(format!("{:?}", err)))?, + _ => { + return Err(napi::Error::from_reason( + "unexpected format. \n supported options are 'json', 'bincode'".to_owned(), + )) + } + }; + Ok(series.into()) + } // // FACTORIES // @@ -1175,16 +1211,6 @@ impl JsSeries { None } } - #[napi] - pub fn to_json(&self) -> napi::Result { - let buf = serde_json::to_vec(&self.series).unwrap(); - Ok(Buffer::from(buf)) - } - #[napi] - pub fn to_binary(&self) -> napi::Result { - let buf = bincode::serialize(&self.series).unwrap(); - Ok(Buffer::from(buf)) - } } macro_rules! impl_set_at_idx_wrap {