Skip to content

Commit

Permalink
- add an index setting for "max_terms_count"
Browse files Browse the repository at this point in the history
- default to translog.durability = async when creating an index, but set it to 'request' after the index is created
- don't hardcode nested/total_fields.limit in update_settings.rs
  • Loading branch information
eeeebbbbrrrr committed Dec 25, 2020
1 parent 929e201 commit 0e8b0ab
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
14 changes: 14 additions & 0 deletions INDEX-MANAGEMENT.md
Expand Up @@ -149,11 +149,25 @@ Type: integer
Default: 1000
Range: [1, INT_32_MAX]
```

The maximum number of fields in an index. Field and object mappings, as well as field aliases
count towards this limit. The default value is 1000.

See: https://www.elastic.co/guide/en/elasticsearch/reference/master/mapping-settings-limit.html

#### `max_terms_count`
```
Type: integer
Default: 65535
Range: [1, INT_32_MAX]
```

The maximum number of terms that can be used in Terms Query.

Increasing this limit might be necessary for performing large [cross-index joins](CROSS-INDEX-JOINS.md)
when the ZomboDB Search Accelerator is not installed.

https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules.html#index-max-terms-count

### Network Options

Expand Down
29 changes: 28 additions & 1 deletion src/access_method/options.rs
Expand Up @@ -19,6 +19,7 @@ const DEFAULT_OPTIMIZE_AFTER: i32 = 0;
const DEFAULT_MAX_RESULT_WINDOW: i32 = 10000;
const DEFAULT_NESTED_FIELDS_LIMIT: i32 = 1000;
const DEFAULT_TOTAL_FIELDS_LIMIT: i32 = 1000;
const DEFAULT_MAX_TERMS_COUNT: i32 = 65535;
const DEFAULT_URL: &str = "default";
const DEFAULT_TYPE_NAME: &str = "doc";
const DEFAULT_REFRESH_INTERVAL: &str = "-1";
Expand Down Expand Up @@ -56,6 +57,7 @@ struct ZDBIndexOptionsInternal {
refresh_interval_offset: i32,
nested_fields_limit: i32,
total_fields_limit: i32,
max_terms_count: i32,
alias_offset: i32,
uuid_offset: i32,
translog_durability_offset: i32,
Expand Down Expand Up @@ -94,6 +96,7 @@ impl ZDBIndexOptionsInternal {
ops.max_result_window = DEFAULT_MAX_RESULT_WINDOW;
ops.nested_fields_limit = DEFAULT_NESTED_FIELDS_LIMIT;
ops.total_fields_limit = DEFAULT_TOTAL_FIELDS_LIMIT;
ops.max_terms_count = DEFAULT_MAX_TERMS_COUNT;
ops.nested_object_date_detection = false;
ops.nested_object_numeric_detection = false;
ops
Expand Down Expand Up @@ -236,6 +239,7 @@ pub struct ZDBIndexOptions {
max_result_window: i32,
nested_fields_limit: i32,
total_field_limit: i32,
max_terms_count: i32,
alias: String,
uuid: String,
translog_durability: String,
Expand Down Expand Up @@ -270,6 +274,7 @@ impl ZDBIndexOptions {
max_result_window: internal.max_result_window,
nested_fields_limit: internal.nested_fields_limit,
total_field_limit: internal.total_fields_limit,
max_terms_count: internal.max_terms_count,
alias: internal.alias(&heap_relation, &relation),
uuid: internal.uuid(&heap_relation, &relation),
links: options.map_or_else(|| internal.links(), |v| Some(v)),
Expand Down Expand Up @@ -353,6 +358,10 @@ impl ZDBIndexOptions {
self.total_field_limit
}

pub fn max_terms_count(&self) -> i32 {
self.max_terms_count
}

pub fn alias(&self) -> &str {
&self.alias
}
Expand Down Expand Up @@ -628,7 +637,7 @@ extern "C" fn validate_text_mapping(value: *const std::os::raw::c_char) {
.expect("invalid nested_object_text_mapping");
}

const NUM_REL_OPTS: usize = 22;
const NUM_REL_OPTS: usize = 23;
#[allow(clippy::unneeded_field_pattern)] // b/c of offset_of!()
#[pg_guard]
pub unsafe extern "C" fn amoptions(
Expand Down Expand Up @@ -692,6 +701,11 @@ pub unsafe extern "C" fn amoptions(
opttype: pg_sys::relopt_type_RELOPT_TYPE_INT,
offset: offset_of!(ZDBIndexOptionsInternal, total_fields_limit) as i32,
},
pg_sys::relopt_parse_elt {
optname: "max_terms_count".as_pg_cstr(),
opttype: pg_sys::relopt_type_RELOPT_TYPE_INT,
offset: offset_of!(ZDBIndexOptionsInternal, max_terms_count) as i32,
},
pg_sys::relopt_parse_elt {
optname: "alias".as_pg_cstr(),
opttype: pg_sys::relopt_type_RELOPT_TYPE_STRING,
Expand Down Expand Up @@ -939,6 +953,19 @@ pub unsafe fn init() {
pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE
},
);
pg_sys::add_int_reloption(
RELOPT_KIND_ZDB,
"max_terms_count".as_pg_cstr(),
"The maximum number of terms that can be used in Terms Query. The default value is 65535."
.as_pg_cstr(),
DEFAULT_MAX_TERMS_COUNT,
1,
std::i32::MAX,
#[cfg(feature = "pg13")]
{
pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE
},
);
pg_sys::add_string_reloption(
RELOPT_KIND_ZDB,
"alias".as_pg_cstr(),
Expand Down
12 changes: 7 additions & 5 deletions src/elasticsearch/create_index.rs
Expand Up @@ -52,10 +52,11 @@ impl ElasticsearchCreateIndexRequest {
"number_of_replicas": 0,
"refresh_interval": "-1",
"query.default_field": "zdb_all",
"translog.durability": self.elasticsearch.options.translog_durability(),
"translog.durability": "async",
"mapping.nested_fields.limit": self.elasticsearch.options.nested_fields_limit(),
"mapping.total_fields.limit": self.elasticsearch.options.total_fields_limit(),
"max_result_window": self.elasticsearch.options.max_result_window()
"max_result_window": self.elasticsearch.options.max_result_window(),
"max_terms_count": self.elasticsearch.options.max_terms_count()
} }
} else {
// we can do an index-level sort on zdb_ctid:asc
Expand All @@ -64,12 +65,13 @@ impl ElasticsearchCreateIndexRequest {
"number_of_replicas": 0,
"refresh_interval": "-1",
"query.default_field": "zdb_all",
"translog.durability": self.elasticsearch.options.translog_durability(),
"translog.durability": "async",
"mapping.nested_fields.limit": self.elasticsearch.options.nested_fields_limit(),
"mapping.total_fields.limit": self.elasticsearch.options.total_fields_limit(),
"max_result_window": self.elasticsearch.options.max_result_window(),
"max_terms_count": self.elasticsearch.options.max_terms_count(),
"sort.field": "zdb_ctid",
"sort.order": "asc",
"max_result_window": self.elasticsearch.options.max_result_window()
"sort.order": "asc"
} }
};

Expand Down
5 changes: 3 additions & 2 deletions src/elasticsearch/update_settings.rs
Expand Up @@ -15,11 +15,12 @@ impl ElasticsearchUpdateSettingsRequest {
{
"index": {
"max_result_window": self.0.options.max_result_window(),
"mapping.nested_fields.limit": 1000,
"mapping.total_fields.limit": 1000000,
"mapping.nested_fields.limit": self.0.options.nested_fields_limit(),
"mapping.total_fields.limit": self.0.options.total_fields_limit(),
"refresh_interval": self.0.options.refresh_interval().as_str(),
"number_of_replicas": self.0.options.replicas(),
"translog.durability": self.0.options.translog_durability(),
"max_terms_count": self.0.options.max_terms_count()
}
}
}),
Expand Down

0 comments on commit 0e8b0ab

Please sign in to comment.