Skip to content

Commit e1a3029

Browse files
committed
Measure space allocated for data
1 parent de73f7b commit e1a3029

File tree

13 files changed

+110
-35
lines changed

13 files changed

+110
-35
lines changed

Cargo.lock

+63-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danny-base/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ bincode = "1.0"
2020
packed_simd = { version = "0.3.4", package = "packed_simd_2" }
2121
lazy_static = "1.3"
2222
statrs = "0.11"
23+
deepsize = "0.2"
2324

2425
[dev-dependencies]
2526
probabilistic-collections = "0.5"

danny-base/src/lsh.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::types::*;
33
use rand::distributions::{Distribution, Normal, Uniform};
44
use rand::Rng;
55
use std::clone::Clone;
6+
use deepsize::DeepSizeOf;
67

78
pub trait LSHFunction {
89
type Input;
@@ -18,7 +19,7 @@ pub trait LSHFunction {
1819
}
1920
}
2021

21-
#[derive(Clone, Abomonation, Debug, Hash)]
22+
#[derive(Clone, Abomonation, Debug, Hash, DeepSizeOf)]
2223
pub struct TensorPool {
2324
left: Vec<u16>,
2425
right: Vec<u16>,

danny-base/src/sketch.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use deepsize::DeepSizeOf;
12
use crate::lsh::*;
23
use crate::types::*;
34
use packed_simd::u64x2;
@@ -15,7 +16,7 @@ pub trait Sketcher {
1516
fn sketch(&self, v: &Self::Input) -> Self::Output;
1617
}
1718

18-
pub trait BitBasedSketch: Clone + Copy {
19+
pub trait BitBasedSketch: Clone + Copy + DeepSizeOf {
1920
fn different_bits(&self, other: &Self) -> u32;
2021
fn same_bits(&self, other: &Self) -> u32;
2122
fn num_bits(&self) -> usize;
@@ -32,7 +33,7 @@ pub trait FromCosine: Sized {
3233
}
3334

3435
/// A 0-bits sketch
35-
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq)]
36+
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq, DeepSizeOf)]
3637
pub struct Sketch0;
3738

3839
impl BitBasedSketch for Sketch0 {
@@ -84,7 +85,7 @@ where
8485
}
8586
}
8687

87-
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq)]
88+
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq, DeepSizeOf)]
8889
pub struct Sketch64 {
8990
data: u64,
9091
}
@@ -142,7 +143,7 @@ where
142143
}
143144
}
144145

145-
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq)]
146+
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq, DeepSizeOf)]
146147
pub struct Sketch128 {
147148
data: [u64; 2],
148149
}
@@ -270,7 +271,7 @@ where
270271
}
271272
}
272273

273-
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq)]
274+
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq, DeepSizeOf)]
274275
pub struct Sketch256 {
275276
data: [u64; 4],
276277
}
@@ -357,7 +358,7 @@ where
357358
}
358359
}
359360

360-
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq)]
361+
#[derive(Debug, Clone, Copy, Abomonation, Hash, Eq, PartialEq, DeepSizeOf)]
361362
pub struct Sketch512 {
362363
data: [u64; 8],
363364
}

danny-base/src/types.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ use std::collections::BTreeSet;
66
use std::collections::HashMap;
77
use std::fmt::Debug;
88
use std::hash::Hash;
9+
use deepsize::DeepSizeOf;
910

10-
#[derive(Clone, Default, Eq, Ord, Hash, PartialEq, PartialOrd, Abomonation, Copy, Debug)]
11+
#[derive(Clone, Default, Eq, Ord, Hash, PartialEq, PartialOrd, Abomonation, Copy, Debug, DeepSizeOf)]
1112
pub struct ElementId(pub u32);
1213

1314
impl Into<u64> for ElementId {
@@ -16,7 +17,7 @@ impl Into<u64> for ElementId {
1617
}
1718
}
1819

19-
#[derive(Serialize, Deserialize, Clone, Abomonation, Default)]
20+
#[derive(Serialize, Deserialize, Clone, Abomonation, Default, DeepSizeOf)]
2021
pub struct Vector {
2122
data: Vec<f32>,
2223
}
@@ -68,7 +69,7 @@ impl Vector {
6869
}
6970
}
7071

71-
#[derive(Abomonation, Serialize, Deserialize, Clone, Debug, Default)]
72+
#[derive(Abomonation, Serialize, Deserialize, Clone, Debug, Default, DeepSizeOf)]
7273
pub struct BagOfWords {
7374
pub universe: u32,
7475
words: Vec<u32>,

danny/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ packed_simd = { version = "0.3.4", package = "packed_simd_2" }
4444
progress-logger = "0.1.1"
4545
sha2 = "0.8"
4646
pprof = {version = "0.4.1", features = ["flamegraph", "protobuf"]}
47+
deepsize = "0.2"
4748

4849
[build-dependencies]
4950
vergen = "3.1"

danny/src/cartesian.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::operators::Route;
2+
use deepsize::DeepSizeOf;
23

34
/// Utilities to compute the (self) cartesian product of a stream.
45
///
@@ -47,7 +48,7 @@ impl SelfCartesian {
4748
}
4849
}
4950

50-
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Abomonation)]
51+
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Abomonation, DeepSizeOf)]
5152
pub enum Marker {
5253
Left,
5354
Right,
@@ -75,7 +76,7 @@ impl Marker {
7576
}
7677
}
7778

78-
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Abomonation)]
79+
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Abomonation, DeepSizeOf)]
7980
pub struct CartesianKey(pub u8, pub u8);
8081

8182
impl CartesianKey {

danny/src/config.rs

-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,6 @@ impl Hosts {
399399
pub fn parse_hosts(arg: &str) -> Result<Hosts, String> {
400400
use std::fs::File;
401401
use std::io::{BufRead, BufReader};
402-
use std::path::PathBuf;
403402

404403
let path = PathBuf::from(arg);
405404
if path.is_file() {

danny/src/join.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
use crate::sysmonitor::DATASTRUCTURES_BYTES;
12
use crate::cartesian::*;
23
use crate::logging::*;
34
use crate::operators::*;
5+
use deepsize::DeepSizeOf;
46
use channels::pact::Pipeline;
57
use std::cell::RefCell;
68
use std::collections::HashMap;
@@ -68,8 +70,8 @@ impl<G, K, V> Join<G, K, V> for Stream<G, (K, V)>
6870
where
6971
G: Scope,
7072
G::Timestamp: ToStepId,
71-
K: KeyData + Ord + std::fmt::Debug,
72-
V: ExchangeData + KeyPayload,
73+
K: KeyData + Ord + std::fmt::Debug + DeepSizeOf,
74+
V: ExchangeData + KeyPayload + DeepSizeOf,
7375
{
7476
fn self_join_map<F, I, O>(&self, balance: Balance, mut f: F) -> Stream<G, O>
7577
where
@@ -108,6 +110,8 @@ where
108110

109111
input.for_each(|t, data| {
110112
let data = data.replace(Vec::new());
113+
DATASTRUCTURES_BYTES
114+
.fetch_add(data.deep_size_of(), std::sync::atomic::Ordering::SeqCst);
111115
for (k, v) in data {
112116
stash
113117
.borrow_mut()

0 commit comments

Comments
 (0)