/
client.go
296 lines (268 loc) · 8.25 KB
/
client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
package core
import (
"crypto/tls"
"encoding/csv"
"encoding/json"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"strings"
"time"
browser "github.com/EDDYCJY/fake-useragent"
"github.com/PuerkitoBio/goquery"
jar "github.com/juju/persistent-cookiejar"
log "github.com/sirupsen/logrus"
"golang.org/x/text/encoding"
"golang.org/x/text/transform"
)
// HTTPClient is like default client, but with some conveniency methods for common scenarios
type HTTPClient struct {
*http.Client
PersistentJar *jar.Jar
UserAgent string
}
// ClientOptions are HTTPClient that can be passed as args at startup
type ClientOptions struct {
UserAgent string `desc:"User agent for requests sent from scripts. Leave empty to use fake browser agent"`
Timeout int64 `desc:"Request timeout in seconds"`
WithoutTLS bool `desc:"Disable TLS for some gauges"`
Proxy string `desc:"HTTP client proxy (for example, you can use mitm for local development)"`
}
// RequestOptions are additional per-request options
type RequestOptions struct {
// When set to true, requests will be sent with random user-agent
FakeAgent bool
// Headers to set on request
Headers map[string]string
// Request will not save cookies
SkipCookies bool
}
// Client is default client for scripts
// It will be reinitialized during server creation
// This default value will be used in tests
var Client = NewClient(ClientOptions{
UserAgent: "whitewater.guide robot",
Timeout: 60,
WithoutTLS: false,
Proxy: "",
})
// NewClient constructs new HTTPClient with options
func NewClient(opts ClientOptions) *HTTPClient {
jarOpts := jar.Options{
Filename: "/tmp/cookies/gorge.cookies",
}
persJar, err := jar.New(&jarOpts)
if err != nil {
log.Fatalf("Failed to initialize cookie jar: %v", err)
return nil
}
transport := http.DefaultTransport.(*http.Transport).Clone()
transport.DisableKeepAlives = true
if proxy, perr := url.Parse(opts.Proxy); perr == nil && opts.Proxy != "" {
transport.Proxy = func(req *http.Request) (*url.URL, error) {
return proxy, nil
}
}
if opts.WithoutTLS {
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
}
client := &HTTPClient{
Client: &http.Client{Jar: persJar, Transport: transport},
PersistentJar: persJar,
}
client.Timeout = time.Duration(opts.Timeout) * time.Second
client.UserAgent = opts.UserAgent
return client
}
// EnsureCookie makes sure that cookies from given URL are present and will be sent with further requests
// Some scripts will not return correct data unless cookies are present
func (client *HTTPClient) EnsureCookie(fromURL string, force bool) error {
cURL, err := url.Parse(fromURL)
if err != nil {
return WrapErr(err, "failed to parse cookie URL").With("url", fromURL)
}
cookies := client.PersistentJar.Cookies(cURL)
if force || len(cookies) == 0 {
resp, err := client.Get(fromURL, nil)
if err != nil {
return WrapErr(err, "failed to fetch cookie URL").With("url", fromURL)
}
resp.Body.Close()
}
return nil
}
// SaveCookies dumps cookies to disk, so in case of service restart they are not lost
func (client *HTTPClient) SaveCookies() {
client.PersistentJar.Save() //nolint:errcheck
}
// Do is same as http.Client.Get, but sets extra headers
func (client *HTTPClient) Do(req *http.Request, opts *RequestOptions) (*http.Response, error) {
ua := client.UserAgent
if opts != nil && opts.FakeAgent {
ua = browser.MacOSX()
}
req.Header.Set("User-Agent", ua)
req.Header.Set("Cache-Control", "no-cache")
if opts != nil {
for k, v := range opts.Headers {
req.Header.Set(k, v)
}
}
resp, err := client.Client.Do(req)
if opts != nil && resp != nil && opts.SkipCookies {
cookies := resp.Cookies()
for _, rc := range cookies {
rc.MaxAge = -1
client.Jar.SetCookies(resp.Request.URL, []*http.Cookie{rc})
}
}
return resp, err
}
// Get is same as http.Client.Get, but sets extra headers
func (client *HTTPClient) Get(url string, opts *RequestOptions) (resp *http.Response, err error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return
}
resp, err = client.Do(req, opts)
return
}
// GetAsString is shortcut for http.Client.Get to get response as string
func (client *HTTPClient) GetAsString(url string, opts *RequestOptions) (string, error) {
resp, err := client.Get(url, opts)
if err != nil {
return "", err
}
defer resp.Body.Close()
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(bytes), nil
}
// GetAsJSON is shortcut for http.Client.Get to get response as JSON
func (client *HTTPClient) GetAsJSON(url string, dest interface{}, opts *RequestOptions) error {
resp, err := client.Get(url, opts)
if err != nil {
return err
}
defer resp.Body.Close()
return json.NewDecoder(resp.Body).Decode(dest)
}
// GetAsXML is shortcut for http.Client.Get to get response as XML
func (client *HTTPClient) GetAsXML(url string, dest interface{}, opts *RequestOptions) error {
resp, err := client.Get(url, opts)
if err != nil {
return err
}
defer resp.Body.Close()
return xml.NewDecoder(resp.Body).Decode(dest)
}
// Doc extends goquery.Document with a Close() method
type Doc struct {
*goquery.Document
resp *http.Response
}
// Close closes underlying resp body
func (doc *Doc) Close() {
doc.resp.Body.Close()
}
// GetAsDoc is shortcut for http.Client.Get to get HTML docs for goquery.
func (client *HTTPClient) GetAsDoc(url string, opts *RequestOptions) (*Doc, error) {
resp, err := client.Get(url, opts)
if err != nil {
return nil, err
}
// Sometimes this return document with empty body
qdoc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
return &Doc{
Document: qdoc,
resp: resp,
}, nil
}
// PostForm is like http.Client.PostForm but wit extra options
func (client *HTTPClient) PostForm(url string, data url.Values, opts *RequestOptions) (resp *http.Response, req *http.Request, err error) {
req, err = http.NewRequest("POST", url, strings.NewReader(data.Encode()))
if err != nil {
return
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
resp, err = client.Do(req, opts)
return
}
// PostFormAsString shortcut for http.Client.PostForm to get response as string
func (client *HTTPClient) PostFormAsString(url string, data url.Values, opts *RequestOptions) (result string, req *http.Request, err error) {
resp, req, err := client.PostForm(url, data, opts)
if err != nil {
return "", req, err
}
defer resp.Body.Close()
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", req, err
}
return string(bytes), req, nil
}
// CSVStreamOptions contains commons options for streaming data from CSV files
type CSVStreamOptions struct {
// CSV separator symbol
Comma rune
// Decoder, defaults to UTF-8
Decoder *encoding.Decoder
// Number of rows at the beginning of file that do not contain data
HeaderHeight int
// Number of colums. If a row contains different number of columns, the stream will stop with error
// For header rows this is ignored
NumColumns int
// Extra HTTPClient options
*RequestOptions
}
// StreamCSV reads CSV file from given URL and streams it by calling handler for each row
func (client *HTTPClient) StreamCSV(url string, handler func(row []string) error, opts CSVStreamOptions) error {
resp, err := client.Get(url, opts.RequestOptions)
if err != nil {
return err
}
defer resp.Body.Close()
defer io.Copy(ioutil.Discard, resp.Body) //nolint:errcheck
var reader io.Reader = resp.Body
if opts.Decoder != nil {
reader = transform.NewReader(resp.Body, opts.Decoder)
}
csvReader := csv.NewReader(reader)
csvReader.ReuseRecord = true
csvReader.FieldsPerRecord = opts.NumColumns
if opts.Comma != 0 {
csvReader.Comma = opts.Comma
}
skippedHeader := opts.HeaderHeight
var row []string
for {
row, err = csvReader.Read()
if err == io.EOF {
break
} else if e, ok := err.(*csv.ParseError); ok && e.Err == csv.ErrFieldCount {
skippedHeader--
continue
} else if err != nil {
return WrapErr(err, "csv stream error")
}
if skippedHeader > 0 {
skippedHeader--
continue
}
if opts.NumColumns != 0 && len(row) != opts.NumColumns {
return NewErr(fmt.Errorf("unexpected csv row with %d columns insteas of %d", len(row), opts.NumColumns)).With("row", row)
}
if err = handler(row); err != nil {
return err
}
}
return nil
}