Skip to content

Commit 71ae1cf

Browse files
authored
Add caching to custom_func javascript to boost perf (see benchmark results) (#67)
* Add caching to custom_func `javascript` to boost perf (see benchmark results) * export caches so caller can adjust their caps if needed * separate javascript into javascript and javascript_with_context to avoid unnecessary JSONify of contextual node which is pretty expensive; also address PR comment * clear global vars after javascript program is executed
1 parent 9c017ae commit 71ae1cf

File tree

10 files changed

+187
-38
lines changed

10 files changed

+187
-38
lines changed

cli/cmd/transformCmd.go

+2-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"path/filepath"
88
"strings"
99

10+
"github.com/jf-tech/go-corelib/ios"
1011
"github.com/jf-tech/go-corelib/jsons"
1112
"github.com/jf-tech/go-corelib/strs"
1213
"github.com/spf13/cobra"
@@ -36,17 +37,8 @@ func init() {
3637
&input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)")
3738
}
3839

39-
// TODO: move to go-corelib.
40-
func fileExists(file string) bool {
41-
fi, err := os.Stat(file)
42-
if os.IsNotExist(err) {
43-
return false
44-
}
45-
return !fi.IsDir()
46-
}
47-
4840
func openFile(label string, filepath string) (io.ReadCloser, error) {
49-
if !fileExists(schema) {
41+
if !ios.FileExists(schema) {
5042
return nil, fmt.Errorf("%s file '%s' does not exist", label, filepath)
5143
}
5244
return os.Open(filepath)

customfuncs/.snapshots/TestDumpBuiltinCustomFuncNames

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"ifElse",
1515
"isEmpty",
1616
"javascript",
17+
"javascript_with_context",
1718
"lower",
1819
"splitIntoJsonArray",
1920
"substring",

customfuncs/customFuncs.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ var builtinPublishedCustomFuncs = map[string]CustomFuncType{
4242
"dateTimeLayoutToRFC3339": dateTimeLayoutToRFC3339,
4343
"dateTimeToEpoch": dateTimeToEpoch,
4444
"dateTimeToRFC3339": dateTimeToRFC3339,
45-
"eval": eval,
4645
"floor": floor,
4746
"ifElse": ifElse,
4847
"isEmpty": isEmpty,
4948
"javascript": javascript,
49+
"javascript_with_context": javascriptWithContext,
5050
"lower": lower,
5151
"splitIntoJsonArray": splitIntoJsonArray,
5252
"substring": substring,
@@ -59,6 +59,7 @@ var builtinHiddenBackCompatCustomFuncs = map[string]CustomFuncType{
5959
// keep these custom funcs lexically sorted
6060
"dateTimeToRfc3339": dateTimeToRFC3339, // deprecated; use dateTimeToRFC3339.
6161
"dateTimeWithLayoutToRfc3339": dateTimeLayoutToRFC3339, // deprecated; use dateTimeLayoutToRFC3339.
62+
"eval": eval, // deprecated; use javascript.
6263
"external": external, // deprecated; use "external" decl.
6364
}
6465

customfuncs/javascript.go

+86-6
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8+
"unsafe"
89

910
node "github.com/antchfx/xmlquery"
1011
"github.com/dop251/goja"
12+
"github.com/jf-tech/go-corelib/caches"
1113
"github.com/jf-tech/go-corelib/strs"
1214

1315
"github.com/jf-tech/omniparser/nodes"
@@ -62,20 +64,92 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac
6264
}
6365
}
6466

65-
func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (string, error) {
67+
// For debugging/testing purpose so we can easily disable all the caches. But not exported. We always
68+
// want caching in production.
69+
var disableCache = false
70+
71+
// JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used
72+
// across multiple goroutines and across different *goja.Runtime.
73+
var JSProgramCache = caches.NewLoadingCache() // per schema so won't have too many, no need to put a hard cap.
74+
// JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared
75+
// across multiple goroutines.
76+
var JSRuntimeCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap.
77+
// NodeToJSONCache caches *node.Node tree to translated JSON string.
78+
var NodeToJSONCache = caches.NewLoadingCache(100) // per transform, plus expensive, a smaller cap.
79+
80+
func getProgram(js string) (*goja.Program, error) {
81+
if disableCache {
82+
return goja.Compile("", js, false)
83+
}
84+
p, err := JSProgramCache.Get(js, func(interface{}) (interface{}, error) {
85+
return goja.Compile("", js, false)
86+
})
87+
if err != nil {
88+
return nil, err
89+
}
90+
return p.(*goja.Program), nil
91+
}
92+
93+
func ptrAddrStr(p unsafe.Pointer) string {
94+
return strconv.FormatUint(uint64(uintptr(p)), 16)
95+
}
96+
97+
func getRuntime(ctx *transformctx.Ctx) *goja.Runtime {
98+
if disableCache {
99+
return goja.New()
100+
}
101+
// a VM can be reused as long as not across thread. We don't have access to
102+
// thread/goroutine id (nor do we want to use some hack to get it, see
103+
// https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an
104+
// indicator - omniparser runs on a single thread per transform. And ctx is
105+
// is per transform.
106+
addr := ptrAddrStr(unsafe.Pointer(ctx))
107+
vm, _ := JSRuntimeCache.Get(addr, func(interface{}) (interface{}, error) {
108+
return goja.New(), nil
109+
})
110+
return vm.(*goja.Runtime)
111+
}
112+
113+
func getNodeJSON(n *node.Node) string {
114+
if disableCache {
115+
return nodes.JSONify2(n)
116+
}
117+
addr := ptrAddrStr(unsafe.Pointer(n))
118+
j, _ := NodeToJSONCache.Get(addr, func(interface{}) (interface{}, error) {
119+
return nodes.JSONify2(n), nil
120+
})
121+
return j.(string)
122+
}
123+
124+
// javascriptWithContext is a custom_func that runs a javascript with optional arguments and
125+
// with current node JSON, if the context node is provided.
126+
func javascriptWithContext(ctx *transformctx.Ctx, n *node.Node, js string, args ...string) (string, error) {
66127
if len(args)%2 != 0 {
67128
return "", errors.New("invalid number of args to 'javascript'")
68129
}
69-
vm := goja.New()
70-
vm.Set(argNameNode, nodes.JSONify2(n))
130+
program, err := getProgram(js)
131+
if err != nil {
132+
return "", fmt.Errorf("invalid javascript: %s", err.Error())
133+
}
134+
runtime := getRuntime(ctx)
135+
var varnames []string
136+
defer func() {
137+
for i := range varnames {
138+
runtime.Set(varnames[i], nil)
139+
}
140+
}()
71141
for i := 0; i < len(args)/2; i++ {
72-
n, v, err := parseArgTypeAndValue(args[i*2], args[i*2+1])
142+
varname, val, err := parseArgTypeAndValue(args[i*2], args[i*2+1])
73143
if err != nil {
74144
return "", err
75145
}
76-
vm.Set(n, v)
146+
runtime.Set(varname, val)
147+
varnames = append(varnames, varname)
148+
}
149+
if n != nil {
150+
runtime.Set(argNameNode, getNodeJSON(n))
77151
}
78-
v, err := vm.RunString(js)
152+
v, err := runtime.RunProgram(program)
79153
if err != nil {
80154
return "", err
81155
}
@@ -86,3 +160,9 @@ func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (s
86160
return v.String(), nil
87161
}
88162
}
163+
164+
// javascript is a custom_func that runs a javascript with optional arguments and without context
165+
// node JSON provided.
166+
func javascript(ctx *transformctx.Ctx, js string, args ...string) (string, error) {
167+
return javascriptWithContext(ctx, nil, js, args...)
168+
}

customfuncs/javascript_test.go

+51-15
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"strings"
55
"testing"
66

7-
node "github.com/antchfx/xmlquery"
87
"github.com/stretchr/testify/assert"
98

109
"github.com/jf-tech/omniparser/nodes"
@@ -128,7 +127,14 @@ func TestJavascript(t *testing.T) {
128127
name: "invalid javascript",
129128
js: "var;",
130129
args: nil,
131-
err: "SyntaxError: (anonymous): Line 1:4 Unexpected token ; (and 1 more errors)",
130+
err: "invalid javascript: SyntaxError: (anonymous): Line 1:4 Unexpected token ; (and 1 more errors)",
131+
expected: "",
132+
},
133+
{
134+
name: "javascript throws",
135+
js: "throw 'failure';",
136+
args: nil,
137+
err: "failure at <eval>:1:7(1)",
132138
expected: "",
133139
},
134140
{
@@ -160,8 +166,14 @@ func TestJavascript(t *testing.T) {
160166
expected: "",
161167
},
162168
} {
163-
t.Run(test.name, func(t *testing.T) {
164-
ret, err := javascript(nil, testNode, test.js, test.args...)
169+
testFn := func(t *testing.T) {
170+
var ret string
171+
var err error
172+
if strings.Contains(test.js, "_node") {
173+
ret, err = javascriptWithContext(nil, testNode, test.js, test.args...)
174+
} else {
175+
ret, err = javascript(nil, test.js, test.args...)
176+
}
165177
if test.err != "" {
166178
assert.Error(t, err)
167179
assert.Equal(t, test.err, err.Error())
@@ -170,19 +182,34 @@ func TestJavascript(t *testing.T) {
170182
assert.NoError(t, err)
171183
assert.Equal(t, test.expected, ret)
172184
}
185+
}
186+
t.Run(test.name+" (without cache)", func(t *testing.T) {
187+
disableCache = true
188+
testFn(t)
189+
})
190+
t.Run(test.name+" (with cache)", func(t *testing.T) {
191+
disableCache = false
192+
testFn(t)
173193
})
174194
}
175195
}
176196

177-
// The following benchmarks compare 'ifElse' vs 'eval' vs 'javascript' and show that
178-
// while the flexibility and power increase from left to right, the performance
179-
// decreases, dramatically:
180-
//
181-
// BenchmarkIfElse-4 4385941 270 ns/op 133 B/op 2 allocs/op
182-
// BenchmarkEval-4 642870 1843 ns/op 576 B/op 11 allocs/op
183-
// BenchmarkJavascript-4 5566 214070 ns/op 136756 B/op 1704 allocs/op
184-
//
185-
// So use 'javascript' only when expression/logic complexity warrants the performance tradeoff.
197+
func TestJavascriptClearVarsAfterRunProgram(t *testing.T) {
198+
r, err := javascriptWithContext(nil, nil, `v1 + v2`, "v1:int", "1", "v2:int", "2")
199+
assert.NoError(t, err)
200+
assert.Equal(t, "3", r)
201+
// Note v1 should be cleared before second run.
202+
r, err = javascriptWithContext(nil, nil, `v3 + v4 + v1`, "v3:int", "10", "v4:int", "20")
203+
assert.NoError(t, err)
204+
assert.Equal(t, "30", r)
205+
}
206+
207+
// go test -bench=. -benchmem -benchtime=30s
208+
// BenchmarkIfElse-4 234978459 152 ns/op 69 B/op 1 allocs/op
209+
// BenchmarkEval-4 19715643 1871 ns/op 576 B/op 11 allocs/op
210+
// BenchmarkJavascriptWithNoCache-4 165547 218455 ns/op 136733 B/op 1704 allocs/op
211+
// BenchmarkJavascriptWithCache-4 17685051 2047 ns/op 272 B/op 15 allocs/op
212+
186213
var (
187214
benchTitles = []string{"", "Dr", "Sir"}
188215
benchNames = []string{"", "Jane", "John"}
@@ -237,9 +264,10 @@ func BenchmarkEval(b *testing.B) {
237264
}
238265
}
239266

240-
func BenchmarkJavascript(b *testing.B) {
267+
func benchmarkJavascript(b *testing.B, cache bool) {
268+
disableCache = !cache
241269
for i := 0; i < b.N; i++ {
242-
ret, err := javascript(nil, &node.Node{}, `
270+
ret, err := javascript(nil, `
243271
if (!title) {
244272
""
245273
} else if (!name) {
@@ -257,3 +285,11 @@ func BenchmarkJavascript(b *testing.B) {
257285
}
258286
}
259287
}
288+
289+
func BenchmarkJavascriptWithNoCache(b *testing.B) {
290+
benchmarkJavascript(b, false)
291+
}
292+
293+
func BenchmarkJavascriptWithCache(b *testing.B) {
294+
benchmarkJavascript(b, true)
295+
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/go-chi/chi v4.1.2+incompatible
1313
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
1414
github.com/google/uuid v1.1.2
15-
github.com/jf-tech/go-corelib v0.0.3
15+
github.com/jf-tech/go-corelib v0.0.4
1616
github.com/spf13/cobra v1.0.0
1717
github.com/spf13/pflag v1.0.5 // indirect
1818
github.com/stretchr/testify v1.6.1

go.sum

+2-3
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uG
6464
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
6565
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
6666
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
67-
github.com/jf-tech/go-corelib v0.0.3 h1:8Dwg5MXZ6p6mUowMJpBPAeAAISmDzUcCUQoPFjYy6IA=
68-
github.com/jf-tech/go-corelib v0.0.3/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg=
67+
github.com/jf-tech/go-corelib v0.0.4 h1:XP5w5bumH/zR6/EZGzD4webeZ1BPU62xZvraAiyIqdc=
68+
github.com/jf-tech/go-corelib v0.0.4/go.mod h1:0+Fejzd53JtexKE5VI8I06WiBNATLIURRJgPrv4Yysg=
6969
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
7070
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
7171
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
@@ -116,7 +116,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
116116
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
117117
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
118118
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
119-
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
120119
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
121120
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
122121
github.com/tkuchiki/go-timezone v0.2.0 h1:yyZVHtQRVZ+wvlte5HXvSpBkR0dPYnPEIgq9qqAqltk=

handlers/omni/v2/transform/invokeCustomFunc_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func TestInvokeCustomFunc_Success(t *testing.T) {
7474
{Const: strs.StrPtr("-"), kind: KindConst},
7575
{
7676
CustomFunc: &CustomFuncDecl{
77-
Name: "javascript",
77+
Name: "javascript_with_context",
7878
Args: []*Decl{
7979
{Const: strs.StrPtr("var n=JSON.parse(_node); '['+n.B+'/'+n.C+']'"), kind: KindConst},
8080
},

samples/omniv2/json/2_multiple_objects.schema.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"title": { "xpath": "title" },
1515
"custom_func_demo": {
1616
"custom_func": {
17-
"name": "javascript",
17+
"name": "javascript_with_context",
1818
"args": [
1919
{ "const": "var n = JSON.parse(_node);n.author+'/'+n.title+'/'+press" },
2020
{ "const": "press:string" }, { "xpath": "../../name" }

samples/omniv2/json/json_test.go

+40
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package json
22

33
import (
4+
"bytes"
5+
"io/ioutil"
46
"testing"
57

68
"github.com/bradleyjkemp/cupaloy"
79
"github.com/jf-tech/go-corelib/jsons"
810

11+
"github.com/jf-tech/omniparser"
912
"github.com/jf-tech/omniparser/samples/sampleutil"
13+
"github.com/jf-tech/omniparser/transformctx"
1014
)
1115

1216
func Test1_Single_Object(t *testing.T) {
@@ -23,3 +27,39 @@ func Test3_XPathDynamic(t *testing.T) {
2327
cupaloy.SnapshotT(t, jsons.BPJ(sampleutil.SampleTestCommon(t,
2428
"./3_xpathdynamic.schema.json", "./3_xpathdynamic.input.json")))
2529
}
30+
31+
var benchSchemaFile = "./2_multiple_objects.schema.json"
32+
var benchInputFile = "./2_multiple_objects.input.json"
33+
var benchSchema omniparser.Schema
34+
var benchInput []byte
35+
36+
func init() {
37+
schema, err := ioutil.ReadFile(benchSchemaFile)
38+
if err != nil {
39+
panic(err)
40+
}
41+
benchSchema, err = omniparser.NewSchema("bench", bytes.NewReader(schema))
42+
if err != nil {
43+
panic(err)
44+
}
45+
benchInput, err = ioutil.ReadFile(benchInputFile)
46+
if err != nil {
47+
panic(err)
48+
}
49+
}
50+
51+
func Benchmark2_Multiple_Objects(b *testing.B) {
52+
for i := 0; i < b.N; i++ {
53+
transform, err := benchSchema.NewTransform(
54+
"bench", bytes.NewReader(benchInput), &transformctx.Ctx{})
55+
if err != nil {
56+
panic(err)
57+
}
58+
for transform.Next() {
59+
_, err := transform.Read()
60+
if err != nil {
61+
panic(err)
62+
}
63+
}
64+
}
65+
}

0 commit comments

Comments
 (0)