/
indexed.rs
109 lines (92 loc) · 2.71 KB
/
indexed.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
use crate::{
utils::{self, Captures},
Question, Row, User,
};
use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
use indexical::{define_index_type, IndexedDomain};
use super::CorrSetInner;
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct QuestionRef<'a>(pub &'a Question);
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct UserRef<'a>(pub &'a User);
define_index_type! {
pub struct QuestionIdx for QuestionRef<'a> = u16;
DISABLE_MAX_INDEX_CHECK = cfg!(not(debug_assertions));
}
define_index_type! {
pub struct UserIdx for UserRef<'a> = u32;
DISABLE_MAX_INDEX_CHECK = cfg!(not(debug_assertions));
}
pub struct IndexedCorrSet<'a> {
users: IndexedDomain<UserRef<'a>>,
questions: IndexedDomain<QuestionRef<'a>>,
q_to_score: HashMap<QuestionIdx, HashMap<UserIdx, u32>>,
grand_totals: HashMap<UserIdx, u32>,
}
impl<'a> CorrSetInner<'a> for IndexedCorrSet<'a> {
type Q = QuestionIdx;
type Scratch = ();
fn build(data: &'a [Row]) -> Self {
let (users, questions): (HashSet<_>, HashSet<_>) = data
.iter()
.map(|row| (UserRef(&row.user), QuestionRef(&row.question)))
.unzip();
let users = IndexedDomain::from_iter(users);
let questions = IndexedDomain::from_iter(questions);
let q_to_score = utils::group_by(data.iter().map(|r| {
(
questions.index(&(QuestionRef(&r.question))),
users.index(&(UserRef(&r.user))),
r.score,
)
}));
let u_to_score = utils::group_by(data.iter().map(|r| {
(
users.index(&(UserRef(&r.user))),
questions.index(&(QuestionRef(&r.question))),
r.score,
)
}));
let grand_totals = u_to_score
.iter()
.map(|(user, scores)| {
let total = scores.values().sum::<u32>();
(*user, total)
})
.collect::<HashMap<_, _>>();
IndexedCorrSet {
users,
questions,
q_to_score,
grand_totals,
}
}
fn iter_qs(&self) -> impl Iterator<Item = QuestionIdx> + Captures<'a> + '_ {
self.questions.indices()
}
fn to_question(&self, q: Self::Q) -> &'a Question {
self.questions.value(q).0
}
fn init_scratch(&self) -> Self::Scratch {}
fn corr_set(&self, _: &mut (), qs: &[Self::Q]) -> f64 {
let (qs_scores, grand_scores): (Vec<_>, Vec<_>) = self
.users
.indices()
.filter_map(|u| {
let total = qs
.iter()
.map(|q| self.q_to_score[q].get(&u).copied())
.sum::<Option<u32>>()?;
let grand_total = self.grand_totals[&u];
Some((total as f64, grand_total as f64))
})
.unzip();
utils::correlation(&qs_scores, &grand_scores)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test_inner;
test_inner!(indexed, IndexedCorrSet);
}