Permalink
Browse files

MB-6657: Use 64 bits type for file offsets on Windows

Windows follows the LLP64 data model:
http://en.wikipedia.org/wiki/LLP64#64-bit_data_models

This means both the int and long int types have a size of 32 bits
regardless if it's a 32 or 64 bits Windows system.

And Windows defines the type off_t as being a signed long integer:
http://msdn.microsoft.com/en-us/library/323b6b3k.aspx

Therefore we can't use off_t on Windows if we deal with files that
can have a size of 2Gb or more.

By using off_t to represent offsets and doing arithmetic operations
on offsets of this type, such operations result in negative values
when they should produce a value larger than 2Gb. Such negative values
are then converted by os_win.c operations into unsigned 32 bits
integers, as 'Offset' members of a structure of type OVERLAPPED.
This means we can end up overwriting previous data from the file
because couchstore opens files in read/write mode (and not in append
only mode) and does "pwrite" operations with an offset stored in
the db record which is of type off_t and gets incremented after every
write operation.

Other databases and VMs use a custom 64 bits integer on Windows to
represent file offsets, rather than the type off_t.

References:

SQLite:

http://www2.sqlite.org/cgi/src/artifact/eabd00b813577d36bd66271cb08dd64ea0589dac
(function seekWinFile for example)

Python:

http://bugs.python.org/issue12517

Erlang:

https://github.com/erlang/otp/blob/2d03ab7ffde828d3b56f8b7738fd0c00d0f5b630/erts/emulator/drivers/win32/win_efile.c#L1048

Change-Id: Ib49ac055e3073480f696576cc17c791d0349b773
  • Loading branch information...
1 parent 146c965 commit 243e9b89b3358f40959efabdc1ecace8ee6ac24b @fdmanana fdmanana committed with Mar 20, 2013
@@ -10,6 +10,31 @@
extern "C" {
#endif
+
+#ifdef _WIN32
+ /**
+ * Windows follows the LLP64 data model:
+ * http://en.wikipedia.org/wiki/LLP64#64-bit_data_models
+ *
+ * This means both the int and long int types have a size of 32 bits
+ * regardless if it's a 32 or 64 bits Windows system.
+ *
+ * And Windows defines the type cs_off_t as being a signed long integer:
+ * http://msdn.microsoft.com/en-us/library/323b6b3k.aspx
+ *
+ * This means we can't use cs_off_t on Windows if we deal with files
+ * that can have a size of 2Gb or more.
+ *
+ **/
+# if defined(_MSC_VER) || defined(__BORLANDC__)
+ typedef __int64 cs_off_t;
+# else
+ typedef long long int cs_off_t;
+# endif
+#else
+ typedef off_t cs_off_t;
+#endif
+
/** Document content metadata flags */
typedef uint8_t couchstore_content_meta_flags;
enum {
@@ -63,7 +88,7 @@ extern "C" {
uint64_t doc_count; /**< Total number of (non-deleted) documents */
uint64_t deleted_count; /**< Total number of deleted documents */
uint64_t space_used; /**< Disk space actively used by docs */
- off_t header_position; /**< File offset of current header */
+ cs_off_t header_position; /**< File offset of current header */
} DbInfo;
@@ -62,7 +62,7 @@ extern "C" {
* @return number of bytes read (which may be less than nbytes),
* or a value <= 0 if an error occurred
*/
- ssize_t (*pread)(couch_file_handle handle, void *buf, size_t nbytes, off_t offset);
+ ssize_t (*pread)(couch_file_handle handle, void *buf, size_t nbytes, cs_off_t offset);
/**
* Write a chunk of data to a given offset in the file.
@@ -74,15 +74,15 @@ extern "C" {
* @return number of bytes written (which may be less than nbytes),
* or a value <= 0 if an error occurred
*/
- ssize_t (*pwrite)(couch_file_handle handle, const void *buf, size_t nbytes, off_t offset);
+ ssize_t (*pwrite)(couch_file_handle handle, const void *buf, size_t nbytes, cs_off_t offset);
/**
* Move to the end of the file.
*
* @param handle file handle to move the filepointer in
* @return the offset (from beginning of the file), or -1 if the operation failed
*/
- off_t (*goto_eof)(couch_file_handle handle);
+ cs_off_t (*goto_eof)(couch_file_handle handle);
/**
* Flush the buffers to disk
View
@@ -182,7 +182,7 @@ static couchstore_error_t flush_mr_partial(couchfile_modify_result *res, size_t
char reducebuf[30];
size_t reducesize = 0;
uint64_t subtreesize = 0;
- off_t diskpos;
+ cs_off_t diskpos;
size_t disk_size;
sized_buf final_key = {NULL, 0};
View
@@ -35,7 +35,7 @@ static couchstore_error_t read_db_root(Db *db, node_pointer **root,
}
// Attempts to initialize the database from a header at the given file position
-static couchstore_error_t find_header_at_pos(Db *db, off_t pos)
+static couchstore_error_t find_header_at_pos(Db *db, cs_off_t pos)
{
int errcode = COUCHSTORE_SUCCESS;
raw_file_header *header_buf = NULL;
@@ -135,7 +135,7 @@ static couchstore_error_t write_header(Db *db)
encode_root(root, db->header.by_id_root);
root += idrootsize;
encode_root(root, db->header.local_docs_root);
- off_t pos;
+ cs_off_t pos;
couchstore_error_t errcode = db_write_header(db, &writebuf, &pos);
if (errcode == COUCHSTORE_SUCCESS) {
db->header.position = pos;
@@ -166,7 +166,7 @@ uint64_t couchstore_get_header_position(Db *db)
LIBCOUCHSTORE_API
couchstore_error_t couchstore_commit(Db *db)
{
- off_t curpos = db->file_pos;
+ cs_off_t curpos = db->file_pos;
sized_buf zerobyte = {"\0", 1};
size_t seqrootsize = 0, idrootsize = 0, localrootsize = 0;
if (db->header.by_seq_root) {
@@ -377,7 +377,7 @@ static int by_id_read_docinfo(DocInfo **pInfo, sized_buf *k, sized_buf *v)
}
//Fill in doc from reading file.
-static couchstore_error_t bp_to_doc(Doc **pDoc, Db *db, off_t bp, couchstore_open_options options)
+static couchstore_error_t bp_to_doc(Doc **pDoc, Db *db, cs_off_t bp, couchstore_open_options options)
{
couchstore_error_t errcode = COUCHSTORE_SUCCESS;
int bodylen = 0;
View
@@ -15,7 +15,7 @@
/** Read bytes from the database file, skipping over the header-detection bytes at every block
boundary. */
-static couchstore_error_t read_skipping_prefixes(Db* db, off_t *pos, ssize_t len, void *dst) {
+static couchstore_error_t read_skipping_prefixes(Db* db, cs_off_t *pos, ssize_t len, void *dst) {
if (*pos % COUCH_BLOCK_SIZE == 0) {
++*pos;
}
@@ -43,18 +43,18 @@ static couchstore_error_t read_skipping_prefixes(Db* db, off_t *pos, ssize_t len
/** Common subroutine of pread_bin, pread_compressed and pread_header.
Parameters and return value are the same as for pread_bin,
except the 'header' parameter which is 1 if reading a header, 0 otherwise. */
-static int pread_bin_internal(Db *db, off_t pos, char **ret_ptr, int header)
+static int pread_bin_internal(Db *db, cs_off_t pos, char **ret_ptr, int header)
{
struct {
uint32_t chunk_len;
uint32_t crc32;
} info;
-
+
couchstore_error_t err = read_skipping_prefixes(db, &pos, sizeof(info), &info);
if (err < 0) {
return err;
}
-
+
info.chunk_len = ntohl(info.chunk_len) & ~0x80000000;
if (header) {
if (info.chunk_len < 4 || info.chunk_len > MAX_HEADER_SIZE)
@@ -75,17 +75,17 @@ static int pread_bin_internal(Db *db, off_t pos, char **ret_ptr, int header)
free(buf);
return err;
}
-
+
*ret_ptr = buf;
return info.chunk_len;
}
-int pread_header(Db *db, off_t pos, char **ret_ptr)
+int pread_header(Db *db, cs_off_t pos, char **ret_ptr)
{
return pread_bin_internal(db, pos + 1, ret_ptr, 1);
}
-int pread_compressed(Db *db, off_t pos, char **ret_ptr)
+int pread_compressed(Db *db, cs_off_t pos, char **ret_ptr)
{
char *compressed_buf;
char *new_buf;
@@ -115,7 +115,7 @@ int pread_compressed(Db *db, off_t pos, char **ret_ptr)
return (int) uncompressed_len;
}
-int pread_bin(Db *db, off_t pos, char **ret_ptr)
+int pread_bin(Db *db, cs_off_t pos, char **ret_ptr)
{
return pread_bin_internal(db, pos, ret_ptr, 0);
}
View
@@ -13,9 +13,9 @@
#include "crc32.h"
#include "util.h"
-static ssize_t raw_write(Db *db, const sized_buf *buf, off_t pos)
+static ssize_t raw_write(Db *db, const sized_buf *buf, cs_off_t pos)
{
- off_t write_pos = pos;
+ cs_off_t write_pos = pos;
size_t buf_pos = 0;
char blockprefix = 0;
ssize_t written;
@@ -46,9 +46,9 @@ static ssize_t raw_write(Db *db, const sized_buf *buf, off_t pos)
return (ssize_t)(write_pos - pos);
}
-couchstore_error_t db_write_header(Db *db, sized_buf *buf, off_t *pos)
+couchstore_error_t db_write_header(Db *db, sized_buf *buf, cs_off_t *pos)
{
- off_t write_pos = db->file_pos;
+ cs_off_t write_pos = db->file_pos;
ssize_t written;
uint32_t size = htonl(buf->size + 4); //Len before header includes hash len.
uint32_t crc32 = htonl(hash_crc32(buf->buf, buf->size));
@@ -81,10 +81,10 @@ couchstore_error_t db_write_header(Db *db, sized_buf *buf, off_t *pos)
return COUCHSTORE_SUCCESS;
}
-int db_write_buf(Db *db, const sized_buf *buf, off_t *pos, size_t *disk_size)
+int db_write_buf(Db *db, const sized_buf *buf, cs_off_t *pos, size_t *disk_size)
{
- off_t write_pos = db->file_pos;
- off_t end_pos = write_pos;
+ cs_off_t write_pos = db->file_pos;
+ cs_off_t end_pos = write_pos;
ssize_t written;
uint32_t size = htonl(buf->size | 0x80000000);
uint32_t crc32 = htonl(hash_crc32(buf->buf, buf->size));
@@ -120,7 +120,7 @@ int db_write_buf(Db *db, const sized_buf *buf, off_t *pos, size_t *disk_size)
return 0;
}
-int db_write_buf_compressed(Db *db, const sized_buf *buf, off_t *pos, size_t *disk_size)
+int db_write_buf_compressed(Db *db, const sized_buf *buf, cs_off_t *pos, size_t *disk_size)
{
int errcode = 0;
sized_buf to_write;
@@ -138,4 +138,3 @@ int db_write_buf_compressed(Db *db, const sized_buf *buf, off_t *pos, size_t *di
free(to_write.buf);
return errcode;
}
-
View
@@ -47,9 +47,9 @@ static couchstore_error_t write_doc(Db *db, const Doc *doc, uint64_t *bp,
{
couchstore_error_t errcode;
if (writeopts & COMPRESS_DOC_BODIES) {
- errcode = db_write_buf_compressed(db, &doc->data, (off_t *) bp, disk_size);
+ errcode = db_write_buf_compressed(db, &doc->data, (cs_off_t *) bp, disk_size);
} else {
- errcode = db_write_buf(db, &doc->data, (off_t *) bp, disk_size);
+ errcode = db_write_buf(db, &doc->data, (cs_off_t *) bp, disk_size);
}
return errcode;
View
@@ -230,11 +230,11 @@ extern "C" {
Db *db = getDb(ls);
int64_t arg = static_cast<int64_t>(luaL_checknumber(ls, 2));
- off_t location(0);
+ cs_off_t location(0);
if (arg < 1) {
location = db->file_pos + arg;
} else {
- location = static_cast<off_t>(arg);
+ location = static_cast<cs_off_t>(arg);
}
const char* path = couchstore_get_db_filename(db);
View
@@ -287,7 +287,7 @@ static couchstore_error_t compact_seq_fetchcb(couchfile_lookup_request *rq, void
}
if(bp != 0) {
- off_t new_bp = 0;
+ cs_off_t new_bp = 0;
// Copy the document from the old db file to the new one:
size_t new_size = 0;
sized_buf item;
View
@@ -68,19 +68,19 @@ extern "C" {
or to NULL if the length is zero. Caller is responsible for freeing this buffer!
On failure, value pointed to is unaltered.
@return The length of the chunk (zero is a valid length!), or a negative error code */
- int pread_bin(Db *db, off_t pos, char **ret_ptr);
+ int pread_bin(Db *db, cs_off_t pos, char **ret_ptr);
/** Reads a compressed chunk from the file at a given position.
Parameters and return value are the same as for pread_bin. */
- int pread_compressed(Db *db, off_t pos, char **ret_ptr);
+ int pread_compressed(Db *db, cs_off_t pos, char **ret_ptr);
/** Reads a file header from the file at a given position.
Parameters and return value are the same as for pread_bin. */
- int pread_header(Db *db, off_t pos, char **ret_ptr);
+ int pread_header(Db *db, cs_off_t pos, char **ret_ptr);
- couchstore_error_t db_write_header(Db *db, sized_buf *buf, off_t *pos);
- int db_write_buf(Db *db, const sized_buf *buf, off_t *pos, size_t *disk_size);
- int db_write_buf_compressed(Db *db, const sized_buf *buf, off_t *pos, size_t *disk_size);
+ couchstore_error_t db_write_header(Db *db, sized_buf *buf, cs_off_t *pos);
+ int db_write_buf(Db *db, const sized_buf *buf, cs_off_t *pos, size_t *disk_size);
+ int db_write_buf_compressed(Db *db, const sized_buf *buf, cs_off_t *pos, size_t *disk_size);
struct _os_error *get_os_error_store(void);
extern pthread_key_t os_err_key;
View
@@ -35,7 +35,7 @@ typedef struct file_buffer {
struct buffered_file_handle *owner;
size_t capacity;
size_t length;
- off_t offset;
+ cs_off_t offset;
uint8_t dirty;
uint8_t bytes[1];
} file_buffer;
@@ -79,12 +79,12 @@ static void free_buffer(file_buffer* buf) {
// Write as many bytes as possible into the buffer, returning the count
-static size_t write_to_buffer(file_buffer* buf, const void *bytes, size_t nbyte, off_t offset)
+static size_t write_to_buffer(file_buffer* buf, const void *bytes, size_t nbyte, cs_off_t offset)
{
if (buf->length == 0) {
// If buffer is empty, align it to start at the current offset:
buf->offset = offset;
- } else if (offset < buf->offset || offset > buf->offset + (off_t)buf->length) {
+ } else if (offset < buf->offset || offset > buf->offset + (cs_off_t)buf->length) {
// If it's out of range, don't write anything
return 0;
}
@@ -122,8 +122,8 @@ static couchstore_error_t flush_buffer(file_buffer* buf) {
//////// BUFFER READS:
-static size_t read_from_buffer(file_buffer* buf, void *bytes, size_t nbyte, off_t offset) {
- if (offset < buf->offset || offset >= buf->offset + (off_t)buf->length) {
+static size_t read_from_buffer(file_buffer* buf, void *bytes, size_t nbyte, cs_off_t offset) {
+ if (offset < buf->offset || offset >= buf->offset + (cs_off_t)buf->length) {
return 0;
}
size_t offset_in_buffer = (size_t)(offset - buf->offset);
@@ -134,7 +134,7 @@ static size_t read_from_buffer(file_buffer* buf, void *bytes, size_t nbyte, off_
}
-static couchstore_error_t load_buffer_from(file_buffer* buf, off_t offset, size_t nbyte) {
+static couchstore_error_t load_buffer_from(file_buffer* buf, cs_off_t offset, size_t nbyte) {
if (buf->dirty) {
// If buffer contains data to be written, flush it first:
couchstore_error_t err = flush_buffer(buf);
@@ -168,7 +168,7 @@ static couchstore_error_t load_buffer_from(file_buffer* buf, off_t offset, size_
//////// BUFFER MANAGEMENT:
-static file_buffer* find_buffer(buffered_file_handle* h, off_t offset) {
+static file_buffer* find_buffer(buffered_file_handle* h, cs_off_t offset) {
offset = offset - offset % READ_BUFFER_CAPACITY;
// Find a buffer for this offset, or use the last one:
file_buffer* buffer = h->first_buffer;
@@ -261,7 +261,7 @@ static void buffered_close(couch_file_handle handle)
h->raw_ops->close(h->raw_ops_handle);
}
-static ssize_t buffered_pread(couch_file_handle handle, void *buf, size_t nbyte, off_t offset)
+static ssize_t buffered_pread(couch_file_handle handle, void *buf, size_t nbyte, cs_off_t offset)
{
#if LOG_BUFFER
//fprintf(stderr, "r");
@@ -288,7 +288,7 @@ static ssize_t buffered_pread(couch_file_handle handle, void *buf, size_t nbyte,
}
} else*/ {
// Move the buffer to cover the remainder of the data to be read.
- off_t block_start = offset - (offset % READ_BUFFER_CAPACITY);
+ cs_off_t block_start = offset - (offset % READ_BUFFER_CAPACITY);
err = load_buffer_from(buffer, block_start, (size_t)(offset + nbyte - block_start));
if (err < 0) {
return err;
@@ -306,7 +306,7 @@ static ssize_t buffered_pread(couch_file_handle handle, void *buf, size_t nbyte,
return total_read;
}
-static ssize_t buffered_pwrite(couch_file_handle handle, const void *buf, size_t nbyte, off_t offset)
+static ssize_t buffered_pwrite(couch_file_handle handle, const void *buf, size_t nbyte, cs_off_t offset)
{
#if LOG_BUFFER
//fprintf(stderr, "w");
@@ -354,7 +354,7 @@ static ssize_t buffered_pwrite(couch_file_handle handle, const void *buf, size_t
return nbyte_written;
}
-static off_t buffered_goto_eof(couch_file_handle handle)
+static cs_off_t buffered_goto_eof(couch_file_handle handle)
{
buffered_file_handle *h = (buffered_file_handle*)handle;
return h->raw_ops->goto_eof(h->raw_ops_handle);
Oops, something went wrong.

0 comments on commit 243e9b8

Please sign in to comment.