Permalink
Browse files

Added probabilistic counter.

See the paper "Probabilistic Counting Algorithms for Data Base Applications" by Philippe Flajolet , G. N. Martin , G. Nigel Martin (1985) for more details.
  • Loading branch information...
1 parent f6dbaef commit 0e271220e9c0ec2e4b85088e768be9525eea6a44 @tvondra committed Oct 28, 2011
View
18 probabilistic/Makefile
@@ -0,0 +1,18 @@
+MODULE_big = probabilistic_counter
+OBJS = probabilistic_counter.o probabilistic.o
+
+EXTENSION = probabilistic_counter
+DATA = probabilistic_counter--1.0.sql
+MODULES = probabilistic_counter
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+all: probabilistic_counter.so
+
+probabilistic_counter.so: probabilistic.o probabilistic_counter.o
+
+probabilistic.o: probabilistic.c
+
+probabilistic_counter.o: probabilistic_counter.c
View
199 probabilistic/probabilistic.c
@@ -0,0 +1,199 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "probabilistic.h"
+#include "postgres.h"
+#include "libpq/md5.h"
+
+#define HASH_LENGTH 16
+
+int pc_estimate(ProbabilisticCounter pc);
+
+int pc_get_r(const unsigned char * buffer, int byteFrom, int bytes);
+int pc_get_min_bit(const unsigned char * buffer, int byteFrom, int bytes);
+
+void pc_hash_text(unsigned char * buffer, char salt, const char * element, int elen);
+void pc_hash_int(unsigned char * buffer, char salt, int element);
+
+/* allocate bitmap with a given length (to store the given number of elements) */
+ProbabilisticCounter pc_create(int nbytes, int nsalts) {
+
+ int i;
+
+ /* the bitmap is allocated as part of this memory block (-1 as one char is already in) */
+ ProbabilisticCounter p = (ProbabilisticCounter)palloc(sizeof(ProbabilisticCounterData) + nsalts * HASH_LENGTH - 1);
+
+ for (i = 0; i < nsalts * HASH_LENGTH; i++) {
+ p->bitmap[i] = 0;
+ }
+
+ SET_VARSIZE(p, sizeof(ProbabilisticCounterData) + nsalts * HASH_LENGTH - VARHDRSZ);
+
+ p->nbytes = nbytes;
+ p->nsalts = nsalts;
+
+ return p;
+
+}
+
+int pc_size(int nbytes, int nsalts) {
+ return sizeof(ProbabilisticCounterData) + nsalts * HASH_LENGTH;
+}
+
+/* searches for the leftmost 1 */
+int pc_get_min_bit(const unsigned char * buffer, int byteFrom, int bytes) {
+
+ int k = 0;
+ int byteIdx = 0;
+ int bitIdx = 0;
+
+ for (k = byteFrom * 8; k < ((byteFrom + bytes) * 8); k++) {
+
+ byteIdx = k / 8;
+ bitIdx= k % 8;
+
+ if ((buffer[byteIdx] & (0x1 << bitIdx)) != 0) {
+ return k - byteFrom *8;
+ }
+
+ }
+
+ return (HASH_LENGTH*8);
+
+}
+
+/* searches for the leftmost zero */
+int pc_get_r(const unsigned char * buffer, int byteFrom, int bytes) {
+
+ int k = 0;
+ int byteIdx = 0;
+ int bitIdx = 0;
+
+ for (k = byteFrom * 8; k < ((byteFrom + bytes) * 8); k++) {
+
+ byteIdx = k / 8;
+ bitIdx= k % 8;
+
+ if ((buffer[byteIdx] & (0x1 << bitIdx)) == 0) {
+ return k - byteFrom*8;
+ }
+
+ }
+
+ return (HASH_LENGTH*8);
+
+}
+
+int pc_estimate(ProbabilisticCounter pc) {
+
+ int salt = 0, slice = 0;
+ float bits = 0;
+
+ /* for each salt and each slice, get the estimate */
+ for (salt = 0; salt < pc->nsalts; salt++) {
+ for (slice = 0; slice < (HASH_LENGTH/pc->nbytes); slice++) {
+
+ bits += (float)pc_get_r(
+ pc->bitmap,
+ (salt * HASH_LENGTH) + (slice * pc->nbytes), pc->nbytes
+ ) / (HASH_LENGTH/pc->nbytes * pc->nsalts);
+
+ }
+ }
+
+ return powf(2, bits)/0.77351; /* magic constant, as listed in the paper */
+
+}
+
+void pc_hash_text(unsigned char * buffer, char salt, const char * element, int elen) {
+
+ unsigned char item[elen + 1];
+
+ memcpy(item, &salt, 1);
+ memcpy(item, element, elen);
+
+ pg_md5_binary(item, elen + 1, buffer);
+
+}
+
+void pc_hash_int(unsigned char * buffer, char salt, int element) {
+
+ unsigned char item[5];
+
+ memcpy(item, &salt, 1);
+ memcpy(item, &element, 4);
+
+ pg_md5_binary(item, 5, buffer);
+
+}
+
+void pc_add_element_text(ProbabilisticCounter pc, char * element, int elen) {
+
+ /* get the hash */
+ unsigned char hash[HASH_LENGTH];
+
+ int salt, slice;
+
+ /* compute hash for each salt, split the hash into pc->nbytes slices */
+ for (salt = 0; salt < pc->nsalts; salt++) {
+
+ /* compute the hash using the salt */
+ pc_hash_text(hash, salt, element, elen);
+
+ /* for each salt, process all the slices */
+ for (slice = 0; slice < (HASH_LENGTH / pc->nbytes); slice++) {
+
+ /* get the min bit (but skip the previous slices) */
+ int bit = pc_get_min_bit(hash, (slice * pc->nbytes), pc->nbytes);
+
+ /* get the current byte/bit index */
+ int byteIdx = (HASH_LENGTH * salt) + (slice * pc->nbytes) + bit / 8;
+ int bitIdx = bit % 8;
+
+ /* set the bit of the bitmap */
+ pc->bitmap[byteIdx] = pc->bitmap[byteIdx] | (0x1 << bitIdx);
+
+ }
+ }
+}
+
+void pc_add_element_int(ProbabilisticCounter pc, int element) {
+
+ /* get the hash */
+ unsigned char hash[HASH_LENGTH];
+
+ int salt, slice;
+
+ /* compute hash for each salt, split the hash into pc->nbytes slices */
+ for (salt = 0; salt < pc->nsalts; salt++) {
+
+ /* compute the hash using the salt */
+ pc_hash_int(hash, salt, element);
+
+ /* for each salt, process all the slices */
+ for (slice = 0; slice < (HASH_LENGTH / pc->nbytes); slice++) {
+
+ /* get the min bit (but skip the previous slices) */
+ int bit = pc_get_min_bit(hash, (slice * pc->nbytes), pc->nbytes);
+
+ /* get the current byte/bit index */
+ int byteIdx = (HASH_LENGTH * salt) + (slice * pc->nbytes) + bit / 8;
+ int bitIdx = bit % 8;
+
+ /* set the bit of the bitmap */
+ pc->bitmap[byteIdx] = pc->bitmap[byteIdx] | (0x1 << bitIdx);
+
+ }
+ }
+}
+
+void pc_reset(ProbabilisticCounter pc) {
+ int i;
+ for (i = 0; i < pc->nsalts * HASH_LENGTH; i++) {
+ pc->bitmap[i] = 0;
+ }
+}
View
40 probabilistic/probabilistic.h
@@ -0,0 +1,40 @@
+/* Length of hash (MD5 => 16B) */
+#define HASH_LENGTH 16
+
+/* This is an implementation of "probabilistic counter" as described in the
+ * article "Probalistic Counting Algorithms for Data Base Applications",
+ * published by Flajolet and Martin in 1985. */
+typedef struct ProbabilisticCounterData {
+
+ /* length of the struncture (in this case equal to sizeof) */
+ int length;
+
+ /* number of bytes per bitmap */
+ int nbytes;
+
+ /* number of salts */
+ int nsalts;
+
+ /* bitmap used to keep the list of items (uses the very same trick as in
+ * the varlena type in include/c.h */
+ unsigned char bitmap[1];
+
+} ProbabilisticCounterData;
+
+typedef ProbabilisticCounterData* ProbabilisticCounter;
+
+/* creates an optimal bloom filter for the given bitmap size and number of distinct values */
+ProbabilisticCounter pc_create(int nbytes, int nsalts);
+int pc_size(int nbytes, int nsalts);
+
+/* add element existence */
+void pc_add_element_text(ProbabilisticCounter pc, char * element, int elen);
+void pc_add_element_int(ProbabilisticCounter pc, int element);
+
+/* print info about the counter */
+void pc_print_info(ProbabilisticCounter pc);
+
+/* get current estimate */
+int pc_estimate(ProbabilisticCounter pc);
+
+void pc_reset(ProbabilisticCounter pc);
View
100 probabilistic/probabilistic_counter--1.0.sql
@@ -0,0 +1,100 @@
+-- ADAPTIVE SAMPLING ESTIMATOR
+
+-- shell type
+CREATE TYPE probabilistic_estimator;
+
+-- get estimator size for the requested error rate / item size
+CREATE FUNCTION probabilistic_size(int, int) RETURNS int
+ AS 'MODULE_PATHNAME', 'probabilistic_size'
+ LANGUAGE C;
+
+-- creates a new adaptive estimator with a given error / item size
+CREATE FUNCTION probabilistic_init(int, int) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_init'
+ LANGUAGE C;
+
+-- add an item to the estimator
+CREATE FUNCTION probabilistic_add_item(probabilistic_estimator, text) RETURNS void
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_text'
+ LANGUAGE C;
+
+-- add an item to the estimator
+CREATE FUNCTION probabilistic_add_item(probabilistic_estimator, int) RETURNS void
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_int'
+ LANGUAGE C;
+
+CREATE FUNCTION probabilistic_add_item_agg(probabilistic_estimator, text, integer, integer) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_agg_text'
+ LANGUAGE C;
+
+CREATE FUNCTION probabilistic_add_item_agg(probabilistic_estimator, int, integer, integer) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_agg_int'
+ LANGUAGE C;
+
+CREATE FUNCTION probabilistic_add_item_agg2(probabilistic_estimator, text) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_agg2_text'
+ LANGUAGE C;
+
+CREATE FUNCTION probabilistic_add_item_agg2(probabilistic_estimator, int) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_add_item_agg2_int'
+ LANGUAGE C;
+
+-- get current estimate of the distinct values (as a real number)
+CREATE FUNCTION probabilistic_get_estimate(probabilistic_estimator) RETURNS real
+ AS 'MODULE_PATHNAME', 'probabilistic_get_estimate'
+ LANGUAGE C STRICT;
+
+-- reset the estimator (start counting from the beginning)
+CREATE FUNCTION probabilistic_reset(probabilistic_estimator) RETURNS void
+ AS 'MODULE_PATHNAME', 'probabilistic_reset'
+ LANGUAGE C;
+
+-- reset the estimator (start counting from the beginning)
+CREATE FUNCTION length(probabilistic_estimator) RETURNS int
+ AS 'MODULE_PATHNAME', 'probabilistic_length'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION probabilistic_in(cstring) RETURNS probabilistic_estimator
+ AS 'MODULE_PATHNAME', 'probabilistic_in'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION probabilistic_out(probabilistic_estimator) RETURNS cstring
+ AS 'MODULE_PATHNAME', 'probabilistic_out'
+ LANGUAGE C STRICT;
+
+-- data type for the adaptive-sampling based distinct estimator
+CREATE TYPE probabilistic_estimator (
+ INPUT = probabilistic_in,
+ OUTPUT = probabilistic_out,
+ LIKE = bytea
+);
+
+-- adaptive based aggregate
+-- items / error rate / number of items
+CREATE AGGREGATE probabilistic_distinct(text, int, int)
+(
+ sfunc = probabilistic_add_item_agg,
+ stype = probabilistic_estimator,
+ finalfunc = probabilistic_get_estimate
+);
+
+CREATE AGGREGATE probabilistic_distinct(int, int, int)
+(
+ sfunc = probabilistic_add_item_agg,
+ stype = probabilistic_estimator,
+ finalfunc = probabilistic_get_estimate
+);
+
+CREATE AGGREGATE probabilistic_distinct(text)
+(
+ sfunc = probabilistic_add_item_agg2,
+ stype = probabilistic_estimator,
+ finalfunc = probabilistic_get_estimate
+);
+
+CREATE AGGREGATE probabilistic_distinct(int)
+(
+ sfunc = probabilistic_add_item_agg2,
+ stype = probabilistic_estimator,
+ finalfunc = probabilistic_get_estimate
+);
View
480 probabilistic/probabilistic_counter.c
@@ -0,0 +1,480 @@
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "probabilistic.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+#define VAL(CH) ((CH) - '0')
+#define DIG(VAL) ((VAL) + '0')
+
+PG_FUNCTION_INFO_V1(probabilistic_add_item_text);
+PG_FUNCTION_INFO_V1(probabilistic_add_item_int);
+
+PG_FUNCTION_INFO_V1(probabilistic_add_item_agg_text);
+PG_FUNCTION_INFO_V1(probabilistic_add_item_agg_int);
+
+PG_FUNCTION_INFO_V1(probabilistic_add_item_agg2_text);
+PG_FUNCTION_INFO_V1(probabilistic_add_item_agg2_int);
+
+PG_FUNCTION_INFO_V1(probabilistic_get_estimate);
+PG_FUNCTION_INFO_V1(probabilistic_size);
+PG_FUNCTION_INFO_V1(probabilistic_init);
+PG_FUNCTION_INFO_V1(probabilistic_reset);
+PG_FUNCTION_INFO_V1(probabilistic_in);
+PG_FUNCTION_INFO_V1(probabilistic_out);
+PG_FUNCTION_INFO_V1(probabilistic_rect);
+PG_FUNCTION_INFO_V1(probabilistic_send);
+PG_FUNCTION_INFO_V1(probabilistic_length);
+
+Datum probabilistic_add_item_text(PG_FUNCTION_ARGS);
+Datum probabilistic_add_item_int(PG_FUNCTION_ARGS);
+
+Datum probabilistic_add_item_agg_text(PG_FUNCTION_ARGS);
+Datum probabilistic_add_item_agg_int(PG_FUNCTION_ARGS);
+
+Datum probabilistic_add_item_agg2_text(PG_FUNCTION_ARGS);
+Datum probabilistic_add_item_agg2_int(PG_FUNCTION_ARGS);
+
+Datum probabilistic_get_estimate(PG_FUNCTION_ARGS);
+Datum probabilistic_size(PG_FUNCTION_ARGS);
+Datum probabilistic_init(PG_FUNCTION_ARGS);
+Datum probabilistic_reset(PG_FUNCTION_ARGS);
+Datum probabilistic_in(PG_FUNCTION_ARGS);
+Datum probabilistic_out(PG_FUNCTION_ARGS);
+Datum probabilistic_recv(PG_FUNCTION_ARGS);
+Datum probabilistic_send(PG_FUNCTION_ARGS);
+Datum probabilistic_length(PG_FUNCTION_ARGS);
+
+Datum
+probabilistic_add_item_text(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ text * item;
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if ((! PG_ARGISNULL(0)) && (! PG_ARGISNULL(1))) {
+
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+
+ /* get the new item */
+ item = PG_GETARG_TEXT_P(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_text(pc, VARDATA(item), VARSIZE(item) - VARHDRSZ);
+
+ } else if (PG_ARGISNULL(0)) {
+ elog(ERROR, "s-bitmap counter must not be NULL");
+ }
+
+ PG_RETURN_VOID();
+
+}
+
+Datum
+probabilistic_add_item_int(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ int item;
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if ((! PG_ARGISNULL(0)) && (! PG_ARGISNULL(1))) {
+
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+
+ /* get the new item */
+ item = PG_GETARG_INT32(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_int(pc, item);
+
+ } else if (PG_ARGISNULL(0)) {
+ elog(ERROR, "s-bitmap counter must not be NULL");
+ }
+
+ PG_RETURN_VOID();
+
+}
+
+Datum
+probabilistic_add_item_agg_text(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ text * item;
+ int bitmaps; /* number of bitmaps */
+ int keysize; /* keysize */
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if (PG_ARGISNULL(0)) {
+ bitmaps = PG_GETARG_INT32(2);
+ keysize = PG_GETARG_INT32(3);
+ pc = pc_create(bitmaps, keysize);
+ } else {
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+ }
+
+ /* get the new item */
+ item = PG_GETARG_TEXT_P(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_text(pc, VARDATA(item), VARSIZE(item) - VARHDRSZ);
+
+ /* return the updated bytea */
+ PG_RETURN_BYTEA_P(pc);
+
+}
+
+Datum
+probabilistic_add_item_agg_int(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ int item;
+ int bitmaps; /* number of bitmaps */
+ int keysize; /* keysize */
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if (PG_ARGISNULL(0)) {
+ bitmaps = PG_GETARG_INT32(2);
+ keysize = PG_GETARG_INT32(3);
+ pc = pc_create(bitmaps, keysize);
+ } else {
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+ }
+
+ /* get the new item */
+ item = PG_GETARG_INT32(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_int(pc, item);
+
+ /* return the updated bytea */
+ PG_RETURN_BYTEA_P(pc);
+
+}
+
+Datum
+probabilistic_add_item_agg2_text(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ text * item;
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if (PG_ARGISNULL(0)) {
+ pc = pc_create(64, 4);
+ } else {
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+ }
+
+ /* get the new item */
+ item = PG_GETARG_TEXT_P(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_text(pc, VARDATA(item), VARSIZE(item) - VARHDRSZ);
+
+ /* return the updated bytea */
+ PG_RETURN_BYTEA_P(pc);
+
+}
+
+Datum
+probabilistic_add_item_agg2_int(PG_FUNCTION_ARGS)
+{
+
+ ProbabilisticCounter pc;
+ int item;
+
+ /* is the counter created (if not, create it - error 1%, 10mil items) */
+ if (PG_ARGISNULL(0)) {
+ pc = pc_create(64, 4);
+ } else {
+ pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+ }
+
+ /* get the new item */
+ item = PG_GETARG_INT32(1);
+
+ /* in-place update works only if executed as aggregate */
+ pc_add_element_int(pc, item);
+
+ /* return the updated bytea */
+ PG_RETURN_BYTEA_P(pc);
+
+}
+
+Datum
+probabilistic_get_estimate(PG_FUNCTION_ARGS)
+{
+
+ int estimate;
+ ProbabilisticCounter pc = (ProbabilisticCounter)PG_GETARG_BYTEA_P(0);
+
+ /* in-place update works only if executed as aggregate */
+ estimate = pc_estimate(pc);
+
+ /* return the updated bytea */
+ PG_RETURN_FLOAT4(estimate);
+
+}
+
+Datum
+probabilistic_init(PG_FUNCTION_ARGS)
+{
+ ProbabilisticCounter pc;
+ int bitmaps;
+ int keysize;
+
+ bitmaps = PG_GETARG_INT32(0);
+ keysize = PG_GETARG_INT32(1);
+
+ pc = pc_create(bitmaps, keysize);
+
+ PG_RETURN_BYTEA_P(pc);
+}
+
+Datum
+probabilistic_size(PG_FUNCTION_ARGS)
+{
+
+ int nbytes, nsalts;
+
+ nbytes = PG_GETARG_INT32(0);
+ nsalts = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(pc_size(nbytes, nsalts));
+
+}
+
+Datum
+probabilistic_length(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(VARSIZE((ProbabilisticCounter)PG_GETARG_BYTEA_P(0)));
+}
+
+Datum
+probabilistic_reset(PG_FUNCTION_ARGS)
+{
+ pc_reset(((ProbabilisticCounter)PG_GETARG_BYTEA_P(0)));
+ PG_RETURN_VOID();
+}
+
+
+/*
+ * byteain - converts from printable representation of byte array
+ *
+ * Non-printable characters must be passed as '\nnn' (octal) and are
+ * converted to internal form. '\' must be passed as '\\'.
+ * ereport(ERROR, ...) if bad form.
+ *
+ * BUGS:
+ * The input is scanned twice.
+ * The error checking of input is minimal.
+ */
+Datum
+probabilistic_in(PG_FUNCTION_ARGS)
+{
+ char *inputText = PG_GETARG_CSTRING(0);
+ char *tp;
+ char *rp;
+ int bc;
+ bytea *result;
+
+ /* Recognize hex input */
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+
+ bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
+ result = palloc(bc);
+ bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
+ SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
+ /* Else, it's the traditional escaped style */
+ for (bc = 0, tp = inputText; *tp != '\0'; bc++)
+ {
+ if (tp[0] != '\\')
+ tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ tp += 4;
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ tp += 2;
+ else
+ {
+ /*
+ * one backslash, not followed by another or ### valid octal
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type bytea")));
+ }
+ }
+
+ bc += VARHDRSZ;
+
+ result = (bytea *) palloc(bc);
+ SET_VARSIZE(result, bc);
+
+ tp = inputText;
+ rp = VARDATA(result);
+ while (*tp != '\0')
+ {
+ if (tp[0] != '\\')
+ *rp++ = *tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ {
+ bc = VAL(tp[1]);
+ bc <<= 3;
+ bc += VAL(tp[2]);
+ bc <<= 3;
+ *rp++ = bc + VAL(tp[3]);
+
+ tp += 4;
+ }
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ {
+ *rp++ = '\\';
+ tp += 2;
+ }
+ else
+ {
+ /*
+ * We should never get here. The first pass should not allow it.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type bytea")));
+ }
+ }
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteaout - converts to printable representation of byte array
+ *
+ * In the traditional escaped format, non-printable characters are
+ * printed as '\nnn' (octal) and '\' as '\\'.
+ */
+Datum
+probabilistic_out(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_PP(0);
+ char *result;
+ char *rp;
+
+ if (bytea_output == BYTEA_OUTPUT_HEX)
+ {
+ /* Print hex format */
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+ {
+ /* Print traditional escaped format */
+ char *vp;
+ int len;
+ int i;
+
+ len = 1; /* empty string has 1 char */
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ len += 2;
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ len += 4;
+ else
+ len++;
+ }
+ rp = result = (char *) palloc(len);
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ {
+ *rp++ = '\\';
+ *rp++ = '\\';
+ }
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ {
+ int val; /* holds unprintable chars */
+
+ val = *vp;
+ rp[0] = '\\';
+ rp[3] = DIG(val & 07);
+ val >>= 3;
+ rp[2] = DIG(val & 07);
+ val >>= 3;
+ rp[1] = DIG(val & 03);
+ rp += 4;
+ }
+ else
+ *rp++ = *vp;
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized bytea_output setting: %d",
+ bytea_output);
+ rp = result = NULL; /* keep compiler quiet */
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * bytearecv - converts external binary format to bytea
+ */
+Datum
+probabilistic_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ bytea *result;
+ int nbytes;
+
+ nbytes = buf->len - buf->cursor;
+ result = (bytea *) palloc(nbytes + VARHDRSZ);
+ SET_VARSIZE(result, nbytes + VARHDRSZ);
+ pq_copymsgbytes(buf, VARDATA(result), nbytes);
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteasend - converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+probabilistic_send(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+ PG_RETURN_BYTEA_P(vlena);
+}
View
6 probabilistic/probabilistic_counter.control
@@ -0,0 +1,6 @@
+# probabilistic estimator control
+comment = 'Aggregation functions and data type for distinct estimation based on Probabilistic counting.'
+default_version = '1.0'
+relocatable = true
+
+module_pathname = '$libdir/probabilistic_counter'

0 comments on commit 0e27122

Please sign in to comment.