@@ -5,9 +5,11 @@ import (
5
5
"fmt"
6
6
"strconv"
7
7
"strings"
8
+ "unsafe"
8
9
9
10
node "github.com/antchfx/xmlquery"
10
11
"github.com/dop251/goja"
12
+ "github.com/jf-tech/go-corelib/caches"
11
13
"github.com/jf-tech/go-corelib/strs"
12
14
13
15
"github.com/jf-tech/omniparser/nodes"
@@ -62,20 +64,92 @@ func parseArgTypeAndValue(argDecl, argValue string) (name string, value interfac
62
64
}
63
65
}
64
66
65
- func javascript (_ * transformctx.Ctx , n * node.Node , js string , args ... string ) (string , error ) {
67
+ // For debugging/testing purpose so we can easily disable all the caches. But not exported. We always
68
+ // want caching in production.
69
+ var disableCache = false
70
+
71
+ // JSProgramCache caches *goja.Program. A *goja.Program is compiled javascript and it can be used
72
+ // across multiple goroutines and across different *goja.Runtime.
73
+ var JSProgramCache = caches .NewLoadingCache () // per schema so won't have too many, no need to put a hard cap.
74
+ // JSRuntimeCache caches *goja.Runtime. A *goja.Runtime is a javascript VM. It can *not* be shared
75
+ // across multiple goroutines.
76
+ var JSRuntimeCache = caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap.
77
+ // NodeToJSONCache caches *node.Node tree to translated JSON string.
78
+ var NodeToJSONCache = caches .NewLoadingCache (100 ) // per transform, plus expensive, a smaller cap.
79
+
80
+ func getProgram (js string ) (* goja.Program , error ) {
81
+ if disableCache {
82
+ return goja .Compile ("" , js , false )
83
+ }
84
+ p , err := JSProgramCache .Get (js , func (interface {}) (interface {}, error ) {
85
+ return goja .Compile ("" , js , false )
86
+ })
87
+ if err != nil {
88
+ return nil , err
89
+ }
90
+ return p .(* goja.Program ), nil
91
+ }
92
+
93
+ func ptrAddrStr (p unsafe.Pointer ) string {
94
+ return strconv .FormatUint (uint64 (uintptr (p )), 16 )
95
+ }
96
+
97
+ func getRuntime (ctx * transformctx.Ctx ) * goja.Runtime {
98
+ if disableCache {
99
+ return goja .New ()
100
+ }
101
+ // a VM can be reused as long as not across thread. We don't have access to
102
+ // thread/goroutine id (nor do we want to use some hack to get it, see
103
+ // https://golang.org/doc/faq#no_goroutine_id). Instead, we use ctx as an
104
+ // indicator - omniparser runs on a single thread per transform. And ctx is
105
+ // is per transform.
106
+ addr := ptrAddrStr (unsafe .Pointer (ctx ))
107
+ vm , _ := JSRuntimeCache .Get (addr , func (interface {}) (interface {}, error ) {
108
+ return goja .New (), nil
109
+ })
110
+ return vm .(* goja.Runtime )
111
+ }
112
+
113
+ func getNodeJSON (n * node.Node ) string {
114
+ if disableCache {
115
+ return nodes .JSONify2 (n )
116
+ }
117
+ addr := ptrAddrStr (unsafe .Pointer (n ))
118
+ j , _ := NodeToJSONCache .Get (addr , func (interface {}) (interface {}, error ) {
119
+ return nodes .JSONify2 (n ), nil
120
+ })
121
+ return j .(string )
122
+ }
123
+
124
+ // javascriptWithContext is a custom_func that runs a javascript with optional arguments and
125
+ // with current node JSON, if the context node is provided.
126
+ func javascriptWithContext (ctx * transformctx.Ctx , n * node.Node , js string , args ... string ) (string , error ) {
66
127
if len (args )% 2 != 0 {
67
128
return "" , errors .New ("invalid number of args to 'javascript'" )
68
129
}
69
- vm := goja .New ()
70
- vm .Set (argNameNode , nodes .JSONify2 (n ))
130
+ program , err := getProgram (js )
131
+ if err != nil {
132
+ return "" , fmt .Errorf ("invalid javascript: %s" , err .Error ())
133
+ }
134
+ runtime := getRuntime (ctx )
135
+ var varnames []string
136
+ defer func () {
137
+ for i := range varnames {
138
+ runtime .Set (varnames [i ], nil )
139
+ }
140
+ }()
71
141
for i := 0 ; i < len (args )/ 2 ; i ++ {
72
- n , v , err := parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
142
+ varname , val , err := parseArgTypeAndValue (args [i * 2 ], args [i * 2 + 1 ])
73
143
if err != nil {
74
144
return "" , err
75
145
}
76
- vm .Set (n , v )
146
+ runtime .Set (varname , val )
147
+ varnames = append (varnames , varname )
148
+ }
149
+ if n != nil {
150
+ runtime .Set (argNameNode , getNodeJSON (n ))
77
151
}
78
- v , err := vm . RunString ( js )
152
+ v , err := runtime . RunProgram ( program )
79
153
if err != nil {
80
154
return "" , err
81
155
}
@@ -86,3 +160,9 @@ func javascript(_ *transformctx.Ctx, n *node.Node, js string, args ...string) (s
86
160
return v .String (), nil
87
161
}
88
162
}
163
+
164
+ // javascript is a custom_func that runs a javascript with optional arguments and without context
165
+ // node JSON provided.
166
+ func javascript (ctx * transformctx.Ctx , js string , args ... string ) (string , error ) {
167
+ return javascriptWithContext (ctx , nil , js , args ... )
168
+ }
0 commit comments