Skip to content

Commit

Permalink
Merge 49e424c into 242db7f
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Jan 5, 2021
2 parents 242db7f + 49e424c commit 5a68e30
Show file tree
Hide file tree
Showing 19 changed files with 764 additions and 295 deletions.
10 changes: 6 additions & 4 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: whylogs CI

on:
push:
branches: [ mainline ]
branches: [ mainline, release]
pull_request:
branches: [ mainline ]
branches: [ mainline, release]

jobs:
test:
Expand All @@ -14,7 +14,7 @@ jobs:
fail-fast: false
max-parallel: 6
matrix:
python-version: [ 3.6,3.7,3.8]
python-version: [3.6,3.7, 3.8]
os: [ubuntu-latest, macOS-latest]

steps:
Expand Down Expand Up @@ -48,11 +48,13 @@ jobs:
with:
flag-name: run-${{ matrix.os }}-${{matrix.python-version}}
parallel: true
file: ./coverage.xml
finish:
needs: test
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: AndreMiras/coveralls-python-action@develop
with:
parallel-finished: true
parallel-finished: true
file: ./coverage.xml
17 changes: 15 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ def setup(app):
from recommonmark.transform import AutoStructify

app.add_config_value(
"recommonmark_config", {"auto_toc_tree_section": "Contents", "enable_eval_rst": True, "enable_math": True, "enable_inline_math": True,}, True,
"recommonmark_config",
{
"auto_toc_tree_section": "Contents",
"enable_eval_rst": True,
"enable_math": True,
"enable_inline_math": True,
},
True,
)
app.add_transform(AutoStructify)

Expand Down Expand Up @@ -249,7 +256,13 @@ def setup(app):
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
("index", "user_guide.tex", "whylogs Documentation", "WhyLabs", "manual",),
(
"index",
"user_guide.tex",
"whylogs Documentation",
"WhyLabs",
"manual",
),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down
5 changes: 4 additions & 1 deletion examples/configure_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
# Create a whylogs logging session
session = get_or_create_session()
# Log statistics for the dataset with config options
with session.logger(dataset_name="lending-club", dataset_timestamp=df["issue_d"].max(),) as ylog:
with session.logger(
dataset_name="lending-club",
dataset_timestamp=df["issue_d"].max(),
) as ylog:
ylog.log_dataframe(df)
# Note that the logger is active within this context
print("Logger is active:", ylog.is_active())
Expand Down
17 changes: 15 additions & 2 deletions scripts/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,28 @@ def run(
parent_folder = os.path.dirname(os.path.realpath(input_path))
basename = os.path.splitext(os.path.basename(input_path))[0]
epoch_minutes = int(time.time() / 60)
output_base = "{}.{}-{}-{}".format(basename, epoch_minutes, random.randint(100000, 999999), random.randint(100000, 999999),)
output_base = "{}.{}-{}-{}".format(
basename,
epoch_minutes,
random.randint(100000, 999999),
random.randint(100000, 999999),
)
output_prefix = os.path.join(parent_folder, output_base)

output_base = output_prefix
binary_output_path = output_base + ".bin"
json_output_path = output_base + ".json"

# Process records
reader = csv_reader(input_path, fmt, parse_dates=parse_dates, nrows=nrows, sep=separator, dropna=dropna, infer_dtypes=infer_dtypes,)
reader = csv_reader(
input_path,
fmt,
parse_dates=parse_dates,
nrows=nrows,
sep=separator,
dropna=dropna,
infer_dtypes=infer_dtypes,
)
profiles = {}
for record in reader:
dt = record.get(datetime_col, datetime.utcnow())
Expand Down
38 changes: 38 additions & 0 deletions scripts/segments_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
import time
from whylogs.core.datasetprofile import dataframe_profile
from whylogs import get_or_create_session

if __name__ == "__main__":
df = pd.read_csv("data/lending-club-accepted-10.csv")
print(df.head())
session = get_or_create_session()

with session.logger(
"segment", segments=[[{"key": "home_ownership", "value": "RENT"}], [{"key": "home_ownership", "value": "MORTGAGE"}]], cache=1
) as logger:
print(session.get_config())
logger.log_dataframe(df)
profile_seg = logger.segemented_profiles

with session.logger("my_rotated_seg", segments=["home_ownership"], with_rotation_time="s", cache=1) as logger:
print(session.get_config())
logger.log_dataframe(df)
time.sleep(2)
logger.log_dataframe(df)
profile_seg = logger.segemented_profiles

with session.logger("my_rotated_seg_two_keys", segments=["home_ownership", "sub_grade"], with_rotation_time="s", cache=1) as logger:
print(session.get_config())
logger.log_csv("data/lending-club-accepted-10.csv")
time.sleep(2)
logger.log_dataframe(df)
profile_seg = logger.segemented_profiles

with session.logger("my_rotated_seg_two_keys", segments=["home_ownership"], profile_full_dataset=True, with_rotation_time="s", cache=1) as logger:
print(session.get_config())
logger.log_csv("data/lending-club-accepted-10.csv")
time.sleep(2)
logger.log_dataframe(df)
profile_seg = logger.segemented_profiles
full_profile = logger.profile
15 changes: 6 additions & 9 deletions scripts/session_dataloging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@

if __name__ == "__main__":
df = pd.read_csv("data/lending-club-accepted-10.csv")

session = get_or_create_session()
with session.logger("test", with_rotation_time='s',cache=1) as logger:
profile = logger.log_dataframe(df)
with session.logger("test", with_rotation_time="s", cache=1) as logger:
logger.log_dataframe(df)
time.sleep(2)
profile = logger.log_dataframe(df)
profile = logger.log_dataframe(df)
logger.log_dataframe(df)
logger.log_dataframe(df)
time.sleep(2)
profile = logger.log_dataframe(df)



logger.log_dataframe(df)
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ def generate_proto(sr_path, dst_path):
sys.stderr.write("Unable to locate proto source files")
sys.exit(-1)

protoc_command = [protoc, "-I", sr_path, "--python_out={}".format(dst_path),] + proto_files
protoc_command = [
protoc,
"-I",
sr_path,
"--python_out={}".format(dst_path),
] + proto_files
if protoc is None:
sys.stderr.write("protoc is not installed nor found in ../src. Please compile it " "or install the binary package.\n")
sys.exit(-1)
Expand Down
47 changes: 31 additions & 16 deletions src/whylogs/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
.. autodata:: ALL_SUPPORTED_FORMATS
"""
from logging import getLogger
from typing import List
from typing import List, Dict, Union, Optional

import typing
# import typing
import yaml as yaml
from marshmallow import Schema, fields, post_load, validate

Expand All @@ -18,6 +18,10 @@
"""Supported output formats for whylogs writer configuration"""


SegmentTag = Dict[str, any]
SegmentTags = List[SegmentTag]


class WriterConfig:
"""
Config for whylogs writers
Expand Down Expand Up @@ -56,8 +60,8 @@ def __init__(
type: str,
formats: List[str],
output_path: str,
path_template: typing.Optional[str] = None,
filename_template: typing.Optional[str] = None,
path_template: Optional[str] = None,
filename_template: Optional[str] = None,
):
self.type = type
self.formats = formats
Expand Down Expand Up @@ -116,6 +120,14 @@ class SessionConfig:
A list of `WriterConfig` objects defining writer outputs
verbose : bool, default=False
Output verbosity
with_rotation_time: str, default = None, to rotate profiles with time, takes values of overall rotation interval,
"s" for seconds
"m" for minutes
"h" for hours
"d" for days
cache: int default =1, sets how many dataprofiles to cache in logger during rotation
segments: List
"""

def __init__(
Expand All @@ -124,15 +136,17 @@ def __init__(
pipeline: str,
writers: List[WriterConfig],
verbose: bool = False,
with_rotation_time: str =None,
cache :int = None,
with_rotation_time: str = None,
cache: int = None,
segments: Optional[Union[List[str], List[SegmentTags]]] = None,
full_dataset_profile: bool = True,
):
self.project = project
self.pipeline = pipeline
self.verbose = verbose
self.writers = writers
self.with_rotation_time = with_rotation_time
self.cache =cache
self.cache = cache

def to_yaml(self, stream=None):
"""
Expand Down Expand Up @@ -191,8 +205,9 @@ class SessionConfigSchema(Schema):

project = fields.Str(required=True)
pipeline = fields.Str(required=True)
with_rotation_time=fields.Str(required=False, validate=validate.OneOf(["s","m","h","d"]))
cache= fields.Int(required=False)
with_rotation_time = fields.Str(
required=False, validate=validate.OneOf(["s", "m", "h", "d"]))
cache = fields.Int(required=False)
verbose = fields.Bool(missing=False)
writers = fields.List(
fields.Nested(WriterConfigSchema),
Expand Down Expand Up @@ -227,13 +242,13 @@ def load_config():

logger = getLogger(__name__)
cfg_candidates = {
"enviroment": os.environ.get("WHYLOGS_CONFIG"),
"current_dir": WHYLOGS_YML,
"home_dir": os.path.join(os.path.expanduser("~"), WHYLOGS_YML),
"opt" : os.path.join("/opt/whylogs/", WHYLOGS_YML),
"enviroment": os.environ.get("WHYLOGS_CONFIG"),
"current_dir": WHYLOGS_YML,
"home_dir": os.path.join(os.path.expanduser("~"), WHYLOGS_YML),
"opt": os.path.join("/opt/whylogs/", WHYLOGS_YML),
}

location_found=None
location_found = None

for k, fpath in cfg_candidates.items():
logger.debug(f"Attempting to load config file: {fpath}")
Expand All @@ -242,8 +257,8 @@ def load_config():

try:
with open(fpath, "rt") as f:
session_config=SessionConfig.from_yaml(f)
location_found= {k, fpath}
session_config = SessionConfig.from_yaml(f)
location_found = {k, fpath}
return session_config
except IOError as e:
logger.warning("Failed to load YAML config", e)
Expand Down

0 comments on commit 5a68e30

Please sign in to comment.