-
Notifications
You must be signed in to change notification settings - Fork 118
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
357 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule proto
deleted from
083464
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
generated | ||
generated/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
syntax = "proto3"; | ||
|
||
import "messages.proto"; | ||
|
||
option java_package = "com.whylogs.core.constraint"; | ||
option java_outer_classname = "Constraints"; | ||
option java_multiple_files = true; | ||
|
||
/* constraints specify one of the following binary boolean relationships. */ | ||
enum Op { | ||
unused = 0; | ||
LT = 1; | ||
LE = 2; | ||
EQ = 3; | ||
NE = 4; | ||
GE = 5; | ||
GT = 6; | ||
} | ||
|
||
/* Summary constraints specify a relationship between a summary field and a literal value, | ||
or between two summary fields. | ||
e.g. 'min' < 6 | ||
'std_dev' < 2.17 | ||
'min' > 'avg' | ||
*/ | ||
message SummaryConstraintMsg { | ||
string name = 1; | ||
string first_field = 2; | ||
oneof second { | ||
string second_field = 3; | ||
double value = 4; | ||
} | ||
Op op = 5; | ||
bool verbose = 6; | ||
} | ||
|
||
/* ValueConstraints express a binary boolean relationship between an implied numeric value and a literal. | ||
These are applied to every incoming value that is processed by whylogs. */ | ||
message ValueConstraintMsg { | ||
string name = 1; | ||
double value = 2; | ||
Op op = 3; | ||
bool verbose = 4; | ||
} | ||
|
||
message ValueConstraintMsgs { | ||
repeated ValueConstraintMsg constraints = 1; | ||
} | ||
|
||
message SummaryConstraintMsgs { | ||
repeated SummaryConstraintMsg constraints = 1; | ||
} | ||
|
||
message DatasetConstraintMsg { | ||
DatasetProperties properties = 1; | ||
map<string, ValueConstraintMsgs> value_constraints = 2; | ||
map<string, SummaryConstraintMsgs> summary_constraints = 3; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
syntax = "proto3"; | ||
|
||
import "google/protobuf/wrappers.proto"; | ||
|
||
option java_package = "com.whylogs.core.message"; | ||
option java_outer_classname = "Messages"; | ||
option java_multiple_files = true; | ||
|
||
message Counters { | ||
int64 count = 1; | ||
|
||
google.protobuf.Int64Value true_count = 2; | ||
google.protobuf.Int64Value null_count = 3; | ||
} | ||
|
||
message InferredType { | ||
enum Type { | ||
UNKNOWN = 0; | ||
NULL = 1; | ||
FRACTIONAL = 2; | ||
INTEGRAL = 3; | ||
BOOLEAN = 4; | ||
STRING = 5; | ||
} | ||
|
||
Type type = 1; | ||
double ratio = 2; | ||
} | ||
|
||
message DoublesMessage { | ||
int64 count = 1; | ||
double min = 2; | ||
double max = 3; | ||
double sum = 4; | ||
} | ||
|
||
message LongsMessage { | ||
int64 count = 1; | ||
int64 min = 2; | ||
int64 max = 3; | ||
int64 sum = 4; | ||
} | ||
|
||
message VarianceMessage { | ||
int64 count = 1; | ||
double sum = 2; // sample variance * (n-1) | ||
double mean = 3; | ||
} | ||
|
||
message FrequentNumbersSketchMessage { | ||
bytes sketch = 1; | ||
int32 lg_max_k = 2; | ||
} | ||
|
||
message FrequentItemsSketchMessage { | ||
bytes sketch = 1; | ||
int32 lg_max_k = 2; | ||
} | ||
|
||
message NumbersMessage { | ||
VarianceMessage variance = 1; | ||
oneof numbers { | ||
DoublesMessage doubles = 2; | ||
LongsMessage longs = 3; | ||
} | ||
|
||
// sketches | ||
bytes histogram = 4; | ||
bytes theta = 5; | ||
bytes compact_theta = 6; | ||
FrequentNumbersSketchMessage frequent_numbers = 7; | ||
} | ||
|
||
message StringsMessage { | ||
int64 count = 1; | ||
|
||
// sketches | ||
bytes theta = 2; | ||
bytes items = 3; | ||
bytes compact_theta = 4; | ||
} | ||
|
||
|
||
message SchemaMessage { | ||
map<int32, int64> typeCounts = 1; | ||
InferredType inferred_type = 2; | ||
} | ||
|
||
message ColumnMessage { | ||
string name = 1; | ||
Counters counters = 2; | ||
SchemaMessage schema = 3; | ||
NumbersMessage numbers = 4; | ||
StringsMessage strings = 5; | ||
InferredType inferred_type = 6; | ||
FrequentItemsSketchMessage frequent_items = 7; | ||
HllSketchMessage cardinality_tracker = 8; | ||
} | ||
|
||
message DatasetProperties { | ||
uint32 schema_major_version = 1; | ||
uint32 schema_minor_version = 2; | ||
|
||
string session_id = 3; | ||
int64 session_timestamp = 4; | ||
int64 data_timestamp = 5; | ||
map<string, string> tags = 6; | ||
map<string, string> metadata = 7; | ||
// TODO: store other configuration here | ||
} | ||
|
||
message ScoreMatrixMessage { | ||
repeated string labels = 1; | ||
string prediction_field = 2; | ||
string target_field = 3; | ||
string score_field = 4; | ||
|
||
// a flattened NxN matrix (N = len(labels)) | ||
repeated NumbersMessage scores = 10; | ||
} | ||
|
||
message RegressionMetricsMessage{ | ||
string prediction_field = 1; | ||
string target_field = 2; | ||
uint64 count = 3; | ||
double sum_abs_diff = 4; | ||
double sum_diff = 5; | ||
double sum2_diff = 6; | ||
} | ||
|
||
enum ModelType { | ||
UNKNOWN = 0; | ||
CLASSIFICATION = 1; | ||
REGRESSION = 2; | ||
EMBEDDINGS = 3; | ||
} | ||
|
||
message ModelMetricsMessage { | ||
ScoreMatrixMessage scoreMatrix = 1; | ||
ModelType modelType = 2; | ||
RegressionMetricsMessage regressionMetrics = 3; | ||
} | ||
|
||
message ModelProfileMessage { | ||
repeated string output_fields = 1; | ||
// Reserving fields for ModelMessage | ||
|
||
ModelMetricsMessage metrics = 10; | ||
} | ||
|
||
message DatasetProfileMessage { | ||
DatasetProperties properties = 1; | ||
map<string, ColumnMessage> columns = 2; | ||
// reserve other fields for dataset level data | ||
ModelProfileMessage modeProfile = 10; | ||
} | ||
|
||
/** | ||
* The follow section is for transmission and reconstruction of the dataset | ||
* in WhyLogs backend | ||
*/ | ||
message ColumnsChunkSegment { | ||
// UUID is required to aggregate to the original message | ||
// This should map back to the original dataset | ||
string marker = 1; | ||
repeated ColumnMessage columns = 2; | ||
} | ||
|
||
message DatasetMetadataSegment { | ||
string marker = 1; | ||
DatasetProperties properties = 2; | ||
} | ||
|
||
// A segment of a dataset profile. This can be used to composed the | ||
// original object back | ||
message MessageSegment { | ||
string marker = 1; | ||
oneof item { | ||
DatasetMetadataSegment metadata = 2; | ||
ColumnsChunkSegment columns = 3; | ||
} | ||
} | ||
|
||
message HllSketchMessage { | ||
bytes sketch = 1; | ||
int32 lg_k = 2; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
syntax = "proto3"; | ||
|
||
import "messages.proto"; | ||
|
||
option java_package = "com.whylogs.core.message"; | ||
option java_outer_classname = "Summaries"; | ||
option java_multiple_files = true; | ||
|
||
message UniqueCountSummary { | ||
double estimate = 1; | ||
double upper = 2; | ||
double lower = 3; | ||
} | ||
|
||
message FrequentStringsSummary { | ||
message FrequentItem { | ||
string value = 1; | ||
double estimate = 2; | ||
} | ||
repeated FrequentItem items = 1; | ||
} | ||
|
||
message FrequentNumbersSummary { | ||
message FrequentDoubleItem { | ||
int64 estimate = 1; | ||
double value = 2; | ||
int32 rank = 3; | ||
} | ||
message FrequentLongItem { | ||
int64 estimate = 1; | ||
int64 value = 2; | ||
int32 rank = 3; | ||
} | ||
repeated FrequentDoubleItem doubles = 1; | ||
repeated FrequentLongItem longs = 2; | ||
} | ||
|
||
message FrequentItemsSummary { | ||
message FrequentItem { | ||
int64 estimate = 1; | ||
string json_value = 2; | ||
} | ||
repeated FrequentItem items = 1; | ||
} | ||
|
||
message StringsSummary { | ||
UniqueCountSummary unique_count = 1; | ||
FrequentStringsSummary frequent = 2; | ||
} | ||
|
||
message SchemaSummary { | ||
InferredType inferred_type = 1; | ||
map<string, int64> type_counts = 2; | ||
} | ||
|
||
message HistogramSummary { | ||
double start = 1; | ||
double end = 2; | ||
double width = 3; | ||
repeated int64 counts = 4; | ||
|
||
double max = 5; | ||
double min = 6; | ||
repeated double bins = 7; | ||
int64 n = 8; | ||
|
||
} | ||
|
||
message QuantileSummary { | ||
repeated double quantiles = 1; | ||
repeated double quantile_values = 2; | ||
} | ||
|
||
message NumberSummary { | ||
uint64 count = 1; | ||
double min = 2; | ||
double max = 3; | ||
double mean = 4; | ||
double stddev = 5; | ||
|
||
HistogramSummary histogram = 6; | ||
UniqueCountSummary unique_count = 7; | ||
QuantileSummary quantiles = 8; | ||
FrequentNumbersSummary frequent_numbers = 9; | ||
|
||
bool is_discrete = 10; | ||
} | ||
|
||
message ColumnSummary { | ||
Counters counters = 1; | ||
SchemaSummary schema = 2; | ||
NumberSummary number_summary = 3; | ||
StringsSummary string_summary = 4; | ||
FrequentItemsSummary frequent_items = 5; | ||
UniqueCountSummary unique_count = 6; | ||
} | ||
|
||
message DatasetSummary { | ||
DatasetProperties properties = 1; | ||
map<string, ColumnSummary> columns = 2; | ||
} | ||
|
||
message DatasetSummaries { | ||
map<string, DatasetSummary> profiles = 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
"""WhyLabs version number.""" | ||
|
||
__version__ = "0.4.4-dev0" | ||
__version__ = "0.4.4-dev3" |