/
data.headers.xml
108 lines (93 loc) · 4.9 KB
/
data.headers.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
<?xml version="1.0" encoding="UTF-8"?>
<table xmlns:i="http://query.yahooapis.com/v1/schema/internalTable.xsd" xmlns="http://query.yahooapis.com/v1/schema/table.xsd">
<meta>
<author>Marcel Duran</author>
<description>Request (GET) a URL and retrieves its content (non-binary) and headers. User Agent string (ua) is optional.</description>
<sampleQuery>select * from {table} where url="http://www.yahoo.com/"</sampleQuery>
<sampleQuery>select * from {table} where url in ("http://www.yahoo.com/","http://www.google.com/") and ua="Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.1)"</sampleQuery>
</meta>
<bindings>
<select itemPath="" produces="XML">
<inputs>
<key id="url" type="xs:string" paramType="variable" required="true"/>
<key id="ua" type="xs:string" paramType="variable" required="false"/>
</inputs>
<execute>
<![CDATA[
var resp, header, tag, redirect, location, redir, count, decodeError,
data = <resources/>,
headers = <headers/>,
reTagCase = /(^|\-)(\w)/g,
tagCase = function (a, b, c) {
return b + c.toUpperCase();
},
request = function (compress, charset) {
y.log('request with' + (compress ? '' : 'out') + ' compression');
var req = y.rest(url);
// set user agent
if (ua) {
req.header('User-Agent', ua);
}
// compression
if (compress) {
req.header('Accept-Encoding', 'gzip,deflate');
req.decompress(true);
}
if (charset) {
req.forceCharset(charset);
}
// fetch url
return req.get();
};
resp = request(true);
y.log('response length: ' + (resp.response && resp.response.length));
// check redirect
redirect = y.diagnostics.redirect;
count = redirect && redirect.length();
if (count) {
count = (count % 2) ? count : count / 2;
for (i = 0; i < count; i += 1) {
redir = redirect[i];
y.log('redirect: ' + redir.@status + ' = ' + redir.toString());
location = redir.toString();
if (location) {
data.resources +=
<redirect>
<url>{url}</url>
<status>{redir.@status}</status>
<headers>
<Location>{location}</Location>
</headers>
</redirect>;
url = location;
}
}
}
// get headers
for (header in resp.headers) {
tag = header.replace(reTagCase, tagCase);
headers.headers += <{tag}>{resp.headers[header]}</{tag}>;
}
// set result
data.resources += <url>{url}</url>;
data.resources += <status>{resp.status}</status>;
data.resources += headers;
// get uncompressed response for non-binary
if ((header = resp.headers) && (header = header['content-type']) &&
header.indexOf('image') && header.indexOf('x-shockwave-flash') < 0) {
decodeError = y.diagnostics.url.(@['error'] == "Unable to parse data using default charset utf-8").(url == url);
y.log('decode error: ' + decodeError);
// try again with different charset if error occurs
if (!resp.response && decodeError) {
resp = request(false, 'ISO-8859-1');
y.log('response iso length: ' + (resp.response && resp.response.length));
}
}
data.resources += <content>{resp.response}</content>;
response.maxAge = 300;
response.object = data;
]]>
</execute>
</select>
</bindings>
</table>