Permalink
Browse files

Initial version.

  • Loading branch information...
0 parents commit ee5b25a10d0d760423571fbe3895345cb0644df5 @umitanuki committed Sep 2, 2011
Showing with 1,238 additions and 0 deletions.
  1. +39 −0 META.json
  2. +23 −0 Makefile
  3. +37 −0 README.md
  4. +141 −0 connutil.c
  5. +11 −0 connutil.h
  6. +84 −0 doc/s3_fdw.md
  7. +887 −0 s3_fdw.c
  8. +5 −0 s3_fdw.control
  9. +11 −0 s3_fdw.sql
@@ -0,0 +1,39 @@
+{
+ "name": "s3_fdw",
+ "abstract": "foreign-data wrapper for Amazon S3",
+ "description": "This module provides reading files located in Amazon S3 privately, using COPY mechanism.",
+ "version": "0.1.0",
+ "maintainer": "Hitoshi Harada <umi.tanuki@gmail.com>",
+ "license": "postgresql",
+ "provides": {
+ "s3_fdw": {
+ "abstract": "fdw for Amazon S3",
+ "version": "0.1.0",
+ "file": "s3_fdw.sql",
+ "docfile": "doc/s3_fdw.md"
+ }
+ },
+ "resources": {
+ "bugtracker": {
+ "web": "http://github.com/umitanuki/s3_fdw/issues/"
+ },
+ "repository": {
+ "url": "git://github.com/umitanuki/s3_fdw.git",
+ "web": "http://github.com/umitanuki/s3_fdw",
+ "type": "git"
+ }
+ },
+ "release_status": "unstable",
+ "meta-spec": {
+ "version": "1.0.0",
+ "url": "http://pgxn.org/meta/spec.txt"
+ },
+ "tags": [
+ "fdw",
+ "web",
+ "internet",
+ "amazon",
+ "cloud",
+ "bulkload"
+ ]
+}
@@ -0,0 +1,23 @@
+
+MODULE_big = s3_fdw
+OBJS = s3_fdw.o connutil.o# copy_patched.o
+EXTENSION = $(MODULE_big)
+EXTVERSION = 0.1.0
+EXTSQL = $(MODULE_big)--$(EXTVERSION).sql
+DATA = $(EXTSQL)
+EXTRA_CLEAN += $(EXTSQL)
+SHLIB_LINK = -lcurl -lssl -lcrypto
+
+#DOCS = doc/$(MODULES).md
+REGRESS = $(MODULE_big)
+
+all: $(EXTSQL)
+
+$(EXTSQL): $(MODULE_big).sql
+ cp $< $@
+
+
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
@@ -0,0 +1,37 @@
+s3\_fdw
+=======
+
+s3\_fdw provides a foreign-data wrapper (FDW) for Amazon S3 files,
+using the builtin COPY format.
+
+To build it, just do this:
+
+ make
+ make install
+
+If you encounter an error such as:
+
+ make: pg_config: Command not found
+
+Be sure that you have `pg_config` installed and in your path. If you used a
+package management system such as RPM to install PostgreSQL, be sure that the
+`-devel` package is also installed. If necessary tell the build process where
+to find it:
+
+ env PG_CONFIG=/path/to/pg_config make && make installcheck && make install
+
+Once `make install` is done, connect to your database with psql or other client
+and type
+
+ CREATE EXTENSION s3_fdw;
+
+then you'll see the FDW is installed. With the FDW, create server, user mapping,
+foreign table. You'll need Amazon S3 access key ID and secret access key to
+authenticate private access to your data. Consult AWS documentation for those keys.
+The access information is stored in user mapping. Foreign tables stores options
+for COPY as well as hostname, bucketname and filename.
+
+Dependencies
+------------
+The `s3_fdw` data type depends on libcurl and openssl. You need those developer
+packages installed in the system path.
@@ -0,0 +1,141 @@
+#include "openssl/hmac.h"
+
+#include "postgres.h"
+#include "lib/stringinfo.h"
+
+#include "connutil.h"
+
+static char *sign_by_secretkey(char *input, char *secretkey);
+static int b64_encode(const uint8 *src, unsigned len, uint8 *dst);
+
+/*
+ * Constructs GMT-style string
+ */
+char *
+httpdate(time_t *timer)
+{
+ char *datestring;
+ time_t t;
+ struct tm *gt;
+
+ t = time(timer);
+ gt = gmtime(&t);
+ datestring = (char *) palloc0(256 * sizeof(char));
+ strftime(datestring, 256 * sizeof(char), "%a, %d %b %Y %H:%M:%S +0000", gt);
+ return datestring;
+}
+
+/*
+ * Construct signed string for the Authorization header,
+ * following the Amazon S3 REST API spec.
+ */
+char *
+s3_signature(char *method, char *datestring,
+ char *bucket, char *file, char *secretkey)
+{
+ size_t rs_size;
+ char *resource;
+ StringInfoData buf;
+
+ rs_size = strlen(bucket) + strlen(file) + 3; /* 3 = '/' + '/' + '\0' */
+ resource = (char *) palloc0(rs_size);
+
+ snprintf(resource, rs_size, "/%s/%s", bucket, file);
+ initStringInfo(&buf);
+ /*
+ * StringToSign = HTTP-Verb + "\n" +
+ * Content-MD5 + "\n" +
+ * Content-Type + "\n" +
+ * Date + "\n" +
+ * CanonicalizedAmzHeaders +
+ * CanonicalizedResource;
+ */
+ appendStringInfo(&buf, "%s\n", method);
+ appendStringInfo(&buf, "\n");
+ appendStringInfo(&buf, "\n");
+ appendStringInfo(&buf, "%s\n", datestring);
+// appendStringInfo(&buf, "");
+ appendStringInfo(&buf, "%s", resource);
+
+//elog(INFO, "StringToSign:%s", buf.data);
+ return sign_by_secretkey(buf.data, secretkey);
+}
+
+static char *
+sign_by_secretkey(char *input, char *secretkey)
+{
+ HMAC_CTX ctx;
+ /* sha1 has to be 30 charcters */
+ char result[256];
+ unsigned int len;
+ /* base64 may enlarge the size up to double */
+ char b64_result[256];
+ int b64_len;
+
+ HMAC_CTX_init(&ctx);
+ HMAC_Init(&ctx, secretkey, strlen(secretkey), EVP_sha1());
+ HMAC_Update(&ctx, (unsigned char *) input, strlen(input));
+ HMAC_Final(&ctx, (unsigned char *) result, &len);
+ HMAC_CTX_cleanup(&ctx);
+
+ b64_len = b64_encode((unsigned char *) result, len, (unsigned char *) b64_result);
+ b64_result[b64_len] = '\0';
+
+ return pstrdup(b64_result);
+}
+
+/*
+ * BASE64 - duplicated :(
+ */
+
+static int
+b64_encode(const uint8 *src, unsigned len, uint8 *dst)
+{
+ static const unsigned char _base64[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+ uint8 *p,
+ *lend = dst + 76;
+ const uint8 *s,
+ *end = src + len;
+ int pos = 2;
+ unsigned long buf = 0;
+
+ s = src;
+ p = dst;
+
+ while (s < end)
+ {
+ buf |= *s << (pos << 3);
+ pos--;
+ s++;
+
+ /*
+ * write it out
+ */
+ if (pos < 0)
+ {
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = _base64[(buf >> 6) & 0x3f];
+ *p++ = _base64[buf & 0x3f];
+
+ pos = 2;
+ buf = 0;
+ }
+ if (p >= lend)
+ {
+ *p++ = '\n';
+ lend = p + 76;
+ }
+ }
+ if (pos != 2)
+ {
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
+ *p++ = '=';
+ }
+
+ return p - dst;
+}
@@ -0,0 +1,11 @@
+#ifndef _S3_CONNUTIL_H_
+#define _S3_CONNUTIL_H_
+
+#include <time.h>
+
+extern char *httpdate(time_t *timer);
+extern char *s3_signature(char *method, char *datestring,
+ char *bucket, char *file, char *secretkey);
+
+
+#endif /* _S3_CONNUTIL_H */
@@ -0,0 +1,84 @@
+s3\_fdw
+=======
+
+Synopsis
+--------
+
+ db1=# CREATE EXTENSION s3_fdw;
+ CREATE EXTENSION
+
+ db1=# CREATE SERVER amazon_s3 FOREIGN DATA WRAPPER s3_fdw;
+ CREATE SERVER
+
+ db1=# CREATE USER MAPPING FOR CURRENT_USER SERVER amazon_s3
+ OPTIONS (
+ accesskey 'your-access-key-id',
+ secretkey 'your-secret-access-key'
+ );
+ CREATE USER MAPPING
+
+ db1=# CREATE FOREIGN TABLE log20110901(
+ atime timestamp,
+ method text, elapse int,
+ session text
+ ) SERVER amazon_s3
+ OPTIONS (
+ hostname 's3-ap-northeast-1.amazonaws.com',
+ bucketname 'umitanuki-dbtest',
+ filename 'log20110901.txt',
+ delimiter E'\t'
+ );
+ CREATE FOREIGN TABLE
+
+Description
+-----------
+
+This module provides foreign-data wrapper for Amazon S3 files.
+The procedure to initiate your foreign table is shown above.
+For the first process, `create extension` for this module. Then,
+`create server` with some name whatever you like without options,
+since server option is not supported yet. After that,
+`create user mapping` for current user with mandatory options
+`accesskey` and `secretkey`. They are provied from Amazon to you.
+
+Last, `create foreign table` for your file. At the moment you
+need to define one table for one file, as file\_fdw in contrib.
+s3\_fdw does support all the COPY options as file\_fdw does, as
+well as these additional mandatory options:
+
+ - hostname
+ - bucketname
+ - filename
+
+You'll find the access URL to S3 file. Split it into these
+tree options and specify separately.
+
+Roadmap
+-------
+
+ - gz file support
+ - bucket files bulk load
+ - normal URL option rather than split path
+ - windows support
+
+Caveat
+------
+
+This module is still under development. You may encounter
+unpredictable situation by using this program.
+
+Especially s3\_fdw forks backend and calls mkfifo to achieve
+read and write in parallel. So, it doesn't work on the
+platforms in which fork / mkfifo doesn't work.
+
+Support
+-------
+
+Goto http://github.com/umitanuki/s3_fdw
+Feel free to report any bug/issues if you find.
+
+Author
+------
+
+Hitoshi Harada
+
Oops, something went wrong.

0 comments on commit ee5b25a

Please sign in to comment.