Use indexmap for deterministic behavior

This commits use a drop-in replacement for std::collections::HashMap. The std hashmap has a non-deterministic iteration order, which makes for a non-deterministic error reporting (when typechecking a file with multiple errors for example, running Nickel several time on the same file gives a different output, which is quite bad for the user, who might think they solved an issue while they didn't). IndexMap uses the same algorithm and is on par performance-wise, but has a deterministic iteration order (which is insertion order). Stuff not related to records or types (other hasmaps used for example in the cache to map file ids to parsed terms) are left unchanged.
tweag · Apr 7, 2023 · 3521698 · 3521698
1 parent 53dcc80
commit 3521698
Show file tree

Hide file tree

Showing 18 changed files with 100 additions and 107 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,7 +23,7 @@ default = ["markdown", "repl", "doc"]
 markdown = ["termimad"]
 repl = ["rustyline", "rustyline-derive", "ansi_term"]
 repl-wasm = ["wasm-bindgen", "js-sys", "serde_repr"]
-doc = [ "comrak" ]
+doc = ["comrak"]
 
 [build-dependencies]
 lalrpop = "0.19.9"
@@ -67,6 +67,7 @@ once_cell = "1.17.1"
 typed-arena = "2.0.2"
 malachite = {version = "0.3.2", features = ["enable_serde"] }
 malachite-q = "0.3.2"
+indexmap = {version = "1.9.3", features = ["serde"] }
 
 [dev-dependencies]
 pretty_assertions = "1.3.0"

diff --git a/lsp/nls/src/linearization/building.rs b/lsp/nls/src/linearization/building.rs
@@ -5,7 +5,7 @@ use log::debug;
 use nickel_lang::{
     cache::Cache,
     identifier::Ident,
-    term::record::Field,
+    term::{record::Field, IndexMap},
     typecheck::{linearization::LinearizationState, UnifType},
     types::TypeF,
 };
@@ -105,7 +105,7 @@ impl<'b> Building<'b> {
     pub(super) fn register_fields(
         &mut self,
         current_file: FileId,
-        record_fields: &HashMap<Ident, Field>,
+        record_fields: &IndexMap<Ident, Field>,
         record: ItemId,
         env: &mut Environment,
     ) {

diff --git a/src/deserialize.rs b/src/deserialize.rs
@@ -1,7 +1,6 @@
 //! Deserialization of an evaluated program to plain Rust types.
 
 use malachite::{num::conversion::traits::RoundingFrom, rounding_modes::RoundingMode};
-use std::collections::HashMap;
 use std::iter::ExactSizeIterator;
 
 use serde::de::{
@@ -12,7 +11,7 @@ use serde::de::{
 use crate::identifier::Ident;
 use crate::term::array::{self, Array};
 use crate::term::record::Field;
-use crate::term::{RichTerm, Term};
+use crate::term::{IndexMap, RichTerm, Term};
 
 macro_rules! deserialize_number {
     ($method:ident, $type:tt, $visit:ident) => {
@@ -387,12 +386,12 @@ where
 }
 
 struct RecordDeserializer {
-    iter: <HashMap<Ident, Field> as IntoIterator>::IntoIter,
+    iter: <IndexMap<Ident, Field> as IntoIterator>::IntoIter,
     field: Option<Field>,
 }
 
 impl RecordDeserializer {
-    fn new(map: HashMap<Ident, Field>) -> Self {
+    fn new(map: IndexMap<Ident, Field>) -> Self {
         RecordDeserializer {
             iter: map.into_iter(),
             field: None,
@@ -444,7 +443,7 @@ impl<'de> MapAccess<'de> for RecordDeserializer {
 }
 
 fn visit_record<'de, V>(
-    record: HashMap<Ident, Field>,
+    record: IndexMap<Ident, Field>,
     visitor: V,
 ) -> Result<V::Value, RustDeserializationError>
 where

diff --git a/src/eval/merge.rs b/src/eval/merge.rs
@@ -56,11 +56,11 @@ use crate::error::{EvalError, IllegalPolymorphicTailAction};
 use crate::label::{Label, MergeLabel};
 use crate::position::TermPos;
 use crate::term::{
+    IndexMap,
     record::{self, Field, FieldDeps, FieldMetadata, RecordAttrs, RecordData},
     BinaryOp, RichTerm, Term, TypeAnnotation,
 };
 use crate::transform::Closurizable;
-use std::collections::HashMap;
 
 /// Merging mode. Merging is used both to combine standard data and to apply contracts defined as
 /// records.
@@ -257,11 +257,11 @@ pub fn merge<C: Cache>(
                 });
             }
 
-            let hashmap::SplitResult {
+            let split::SplitResult {
                 left,
                 center,
                 right,
-            } = hashmap::split(r1.fields, r2.fields);
+            } = split::split(r1.fields, r2.fields);
 
             match mode {
                 MergeMode::Contract(label) if !r2.attrs.open && !left.is_empty() => {
@@ -301,7 +301,7 @@ Append `, ..` at the end of the record contract, as in `{some_field | SomeContra
                 .chain(right.keys())
                 .cloned()
                 .collect();
-            let mut m = HashMap::with_capacity(left.len() + center.len() + right.len());
+            let mut m = IndexMap::with_capacity(left.len() + center.len() + right.len());
             let mut env = Environment::new();
 
             // Merging recursive records is the one operation that may override recursive fields. To
@@ -671,24 +671,24 @@ impl RevertClosurize for Vec<PendingContract> {
     }
 }
 
-pub mod hashmap {
-    use std::collections::HashMap;
+pub mod split {
+    use crate::term::IndexMap;
 
     pub struct SplitResult<K, V1, V2> {
-        pub left: HashMap<K, V1>,
-        pub center: HashMap<K, (V1, V2)>,
-        pub right: HashMap<K, V2>,
+        pub left: IndexMap<K, V1>,
+        pub center: IndexMap<K, (V1, V2)>,
+        pub right: IndexMap<K, V2>,
     }
 
-    /// Split two hashmaps m1 and m2 in three parts (left,center,right), where left holds bindings
+    /// Split two maps m1 and m2 in three parts (left,center,right), where left holds bindings
     /// `(key,value)` where key is not in `m2.keys()`, right is the dual (keys of m2 that are not
     /// in m1), and center holds bindings for keys that are both in m1 and m2.
-    pub fn split<K, V1, V2>(m1: HashMap<K, V1>, m2: HashMap<K, V2>) -> SplitResult<K, V1, V2>
+    pub fn split<K, V1, V2>(m1: IndexMap<K, V1>, m2: IndexMap<K, V2>) -> SplitResult<K, V1, V2>
     where
         K: std::hash::Hash + Eq,
     {
-        let mut left = HashMap::new();
-        let mut center = HashMap::new();
+        let mut left = IndexMap::new();
+        let mut center = IndexMap::new();
         let mut right = m2;
 
         for (key, value) in m1 {
@@ -712,8 +712,8 @@ pub mod hashmap {
 
         #[test]
         fn all_left() -> Result<(), String> {
-            let mut m1 = HashMap::new();
-            let m2 = HashMap::<isize, isize>::new();
+            let mut m1 = IndexMap::new();
+            let m2 = IndexMap::<isize, isize>::new();
 
             m1.insert(1, 1);
             let SplitResult {
@@ -735,8 +735,8 @@ pub mod hashmap {
 
         #[test]
         fn all_right() -> Result<(), String> {
-            let m1 = HashMap::<isize, isize>::new();
-            let mut m2 = HashMap::new();
+            let m1 = IndexMap::<isize, isize>::new();
+            let mut m2 = IndexMap::new();
 
             m2.insert(1, 1);
             let SplitResult {
@@ -760,8 +760,8 @@ pub mod hashmap {
 
         #[test]
         fn all_center() -> Result<(), String> {
-            let mut m1 = HashMap::new();
-            let mut m2 = HashMap::new();
+            let mut m1 = IndexMap::new();
+            let mut m2 = IndexMap::new();
 
             m1.insert(1, 1);
             m2.insert(1, 2);
@@ -786,8 +786,8 @@ pub mod hashmap {
 
         #[test]
         fn mixed() -> Result<(), String> {
-            let mut m1 = HashMap::new();
-            let mut m2 = HashMap::new();
+            let mut m1 = IndexMap::new();
+            let mut m2 = IndexMap::new();
 
             m1.insert(1, 1);
             m1.insert(2, 1);

diff --git a/src/eval/operation.rs b/src/eval/operation.rs
@@ -23,6 +23,7 @@ use crate::{
     serialize::ExportFormat,
     stdlib::internals,
     term::{
+        IndexMap,
         array::{Array, ArrayAttrs},
         make as mk_term,
         record::{self, Field, FieldMetadata, RecordData},
@@ -47,7 +48,7 @@ use md5::digest::Digest;
 use simple_counter::*;
 use unicode_segmentation::UnicodeSegmentation;
 
-use std::{collections::HashMap, convert::TryFrom, iter::Extend, rc::Rc};
+use std::{convert::TryFrom, iter::Extend, rc::Rc};
 
 generate_counter!(FreshVariableCounter, usize);
 
@@ -3484,11 +3485,11 @@ fn eq<C: Cache>(
         (Term::SealingKey(s1), Term::SealingKey(s2)) => Ok(EqResult::Bool(s1 == s2)),
         (Term::Enum(id1), Term::Enum(id2)) => Ok(EqResult::Bool(id1 == id2)),
         (Term::Record(r1), Term::Record(r2)) => {
-            let merge::hashmap::SplitResult {
+            let merge::split::SplitResult {
                 left,
                 center,
                 right,
-            } = merge::hashmap::split(r1.fields, r2.fields);
+            } = merge::split::split(r1.fields, r2.fields);
 
             // As for other record operations, we ignore optional fields without a definition.
             if !left.values().all(Field::is_empty_optional)
@@ -3662,7 +3663,7 @@ impl RecordDataExt for RecordData {
     where
         F: FnMut(Ident, RichTerm) -> RichTerm,
     {
-        let fields: Result<HashMap<_, _>, _> = self
+        let fields: Result<IndexMap<_, _>, _> = self
             .fields
             .into_iter()
             .filter_map(|(id, field)| {

diff --git a/src/parser/grammar.lalrpop b/src/parser/grammar.lalrpop
@@ -35,7 +35,6 @@
 //! corresponding more precise return type. Other rules that produce or just
 //! propagate general uniterms have to return a `UniTerm`.
 use std::{
-    collections::HashMap,
     ffi::OsString,
     convert::TryFrom,
 };
@@ -290,7 +289,7 @@ Applicative: UniTerm = {
     NOpPre<AsTerm<RecordOperand>>,
     RecordOperand,
     "match" "{" <cases: (MatchCase ",")*> <last: MatchCase?> "}" => {
-        let mut acc = HashMap::with_capacity(cases.len());
+        let mut acc = IndexMap::with_capacity(cases.len());
         let mut default = None;
 
         for case in cases.into_iter().map(|x| x.0).chain(last.into_iter()) {

diff --git a/src/parser/utils.rs b/src/parser/utils.rs
@@ -1,7 +1,6 @@
 //! Various helpers and companion code for the parser are put here to keep the grammar definition
 //! uncluttered.
-use std::collections::hash_map::Entry;
-use std::collections::HashMap;
+use indexmap::map::Entry;
 use std::fmt::Debug;
 use std::rc::Rc;
 
@@ -17,10 +16,9 @@ use crate::{
     mk_fun,
     position::{RawSpan, TermPos},
     term::{
+        *,
         make as mk_term,
         record::{Field, FieldMetadata, RecordAttrs, RecordData},
-        BinaryOp, LabeledType, LetMetadata, Rational, RichTerm, StrChunk, Term, TypeAnnotation,
-        UnaryOp,
     },
     types::{TypeF, Types},
 };
@@ -146,7 +144,7 @@ impl FieldDef {
 
             match path_elem {
                 FieldPathElem::Ident(id) => {
-                    let mut fields = HashMap::new();
+                    let mut fields = IndexMap::new();
                     fields.insert(id, acc);
                     Field::from(RichTerm::new(
                         Term::Record(RecordData {
@@ -161,7 +159,7 @@ impl FieldDef {
 
                     if let Some(static_access) = static_access {
                         let id = Ident::new_with_pos(static_access, exp.pos);
-                        let mut fields = HashMap::new();
+                        let mut fields = IndexMap::new();
                         fields.insert(id, acc);
                         Field::from(RichTerm::new(
                             Term::Record(RecordData {
@@ -446,10 +444,10 @@ pub fn build_record<I>(fields: I, attrs: RecordAttrs) -> Term
 where
     I: IntoIterator<Item = (FieldPathElem, Field)> + Debug,
 {
-    let mut static_fields = HashMap::new();
+    let mut static_fields = IndexMap::new();
     let mut dynamic_fields = Vec::new();
 
-    fn insert_static_field(static_fields: &mut HashMap<Ident, Field>, id: Ident, field: Field) {
+    fn insert_static_field(static_fields: &mut IndexMap<Ident, Field>, id: Ident, field: Field) {
         match static_fields.entry(id) {
             Entry::Occupied(mut occpd) => {
                 // temporarily putting an empty field in the entry to take the previous value.

diff --git a/src/pretty.rs b/src/pretty.rs
@@ -3,17 +3,15 @@ use crate::identifier::Ident;
 use crate::parser::lexer::KEYWORDS;
 
 use crate::term::{
+    *,
     record::{Field, FieldMetadata},
-    BinaryOp, MergePriority, Number, RichTerm, StrChunk, Term, TypeAnnotation, UnaryOp,
 };
-use crate::types::{EnumRows, EnumRowsF, RecordRowF, RecordRows, RecordRowsF, TypeF, Types};
+use crate::types::*;
 
 use malachite::num::{basic::traits::Zero, conversion::traits::ToSci};
 pub use pretty::{DocAllocator, DocBuilder, Pretty};
 use regex::Regex;
 
-use std::collections::HashMap;
-
 #[derive(Clone, Copy, Eq, PartialEq)]
 enum StringRenderStyle {
     Monoline,
@@ -39,7 +37,7 @@ fn min_interpolate_sign(text: &str) -> usize {
         .unwrap_or(1)
 }
 
-fn sorted_map<K: Ord, V>(m: &'_ HashMap<K, V>) -> Vec<(&'_ K, &'_ V)> {
+fn sorted_map<K: Ord, V>(m: &'_ IndexMap<K, V>) -> Vec<(&'_ K, &'_ V)> {
     let mut ret: Vec<(&K, &V)> = m.iter().collect();
     ret.sort_by_key(|(k, _)| *k);
     ret
@@ -229,7 +227,11 @@ where
             .append(self.text(","))
     }
 
-    fn fields(&'a self, fields: &HashMap<Ident, Field>, with_doc: bool) -> DocBuilder<'a, Self, A> {
+    fn fields(
+        &'a self,
+        fields: &IndexMap<Ident, Field>,
+        with_doc: bool,
+    ) -> DocBuilder<'a, Self, A> {
         self.intersperse(
             sorted_map(fields)
                 .iter()

diff --git a/src/serialize.rs b/src/serialize.rs
@@ -6,7 +6,7 @@ use crate::{
     term::{
         array::{Array, ArrayAttrs},
         record::RecordData,
-        Number, RichTerm, Term, TypeAnnotation,
+        Number, RichTerm, Term, TypeAnnotation, IndexMap,
     },
 };
 
@@ -17,7 +17,7 @@ use serde::{
 
 use malachite::num::conversion::traits::IsInteger;
 
-use std::{collections::HashMap, fmt, io, rc::Rc, str::FromStr};
+use std::{fmt, io, rc::Rc, str::FromStr};
 
 /// Available export formats.
 // If you add or remove variants, remember to update the CLI docs in `src/bin/nickel.rs'
@@ -117,7 +117,6 @@ where
 }
 
 /// Serializer for a record. Serialize fields in alphabetical order to get a deterministic output
-/// (by default, `HashMap`'s randomness implies a randomized order of fields in the output).
 pub fn serialize_record<S>(record: &RecordData, serializer: S) -> Result<S::Ok, S::Error>
 where
     S: Serializer,
@@ -147,7 +146,7 @@ pub fn deserialize_record<'de, D>(deserializer: D) -> Result<RecordData, D::Erro
 where
     D: Deserializer<'de>,
 {
-    let fields = HashMap::deserialize(deserializer)?;
+    let fields = IndexMap::deserialize(deserializer)?;
     Ok(RecordData::with_field_values(fields))
 }