-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from vsoch/add/metrics
adding first example of metrics extraction
- Loading branch information
Showing
5 changed files
with
316 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
__author__ = "Vanessa Sochat" | ||
__copyright__ = "Copyright 2020-2021, Vanessa Sochat" | ||
__license__ = "MPL 2.0" | ||
|
||
from abc import abstractmethod | ||
from collections.abc import Mapping | ||
from caliper.logger import logger | ||
import os | ||
|
||
here = os.path.abspath(os.path.dirname(__file__)) | ||
|
||
|
||
class MetricBase: | ||
name = "metric" | ||
description = "Extract a metric for a particular tag or commit" | ||
|
||
@abstractmethod | ||
def extract(self, git): | ||
pass | ||
|
||
@abstractmethod | ||
def _extract(self, git, commit): | ||
pass | ||
|
||
@abstractmethod | ||
def get_file_results(self): | ||
pass | ||
|
||
@abstractmethod | ||
def get_summed_results(self): | ||
pass | ||
|
||
|
||
class ChangeMetricBase(MetricBase): | ||
|
||
name = "changemetric" | ||
description = "Extract a metric between two tags or commits" | ||
|
||
@abstractmethod | ||
def _extract(self, git, commit1, commit2): | ||
pass | ||
|
||
|
||
class MetricFinder(Mapping): | ||
"""This is a metric cache (inspired by spack packages) that will keep | ||
a cache of all installed metrics under caliper/metrics/collection | ||
""" | ||
|
||
_metrics = {} | ||
|
||
def __init__(self, metrics_path=None): | ||
|
||
# Default to the collection folder, add to metrics cache if not there | ||
self.metrics_path = metrics_path or os.path.join(here, "collection") | ||
self.update() | ||
|
||
def update(self): | ||
"""Add a new path to the metrics cache, if it doesn't exist""" | ||
self._metrics = self._find_metrics() | ||
|
||
def _find_metrics(self): | ||
"""Find metrics based on listing folders under the metrics collection | ||
folder. | ||
""" | ||
# Create a metric lookup dictionary | ||
metrics = {} | ||
for metric_name in os.listdir(self.metrics_path): | ||
metric_dir = os.path.join(self.metrics_path, metric_name) | ||
metric_file = os.path.join(metric_dir, "metric.py") | ||
|
||
# Skip files in collection folder | ||
if os.path.isfile(metric_dir): | ||
continue | ||
|
||
# Continue if the file doesn't exist | ||
if not os.path.exists(metric_file): | ||
logger.debug( | ||
"%s does not appear to have a metric.py, skipping." % metric_dir | ||
) | ||
continue | ||
|
||
# The class name means we split by underscore, capitalize, and join | ||
class_name = "".join([x.capitalize() for x in metric_name.split("_")]) | ||
metrics[metric_name] = "caliper.metrics.collection.%s.metric.%s" % ( | ||
metric_name, | ||
class_name, | ||
) | ||
return metrics | ||
|
||
def __getitem__(self, name): | ||
return self._metrics.get(name) | ||
|
||
def __iter__(self): | ||
return iter(self._metrics) | ||
|
||
def __len__(self): | ||
return len(self._metrics) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
__author__ = "Vanessa Sochat" | ||
__copyright__ = "Copyright 2020-2021, Vanessa Sochat" | ||
__license__ = "MPL 2.0" | ||
|
||
from caliper.metrics.base import ChangeMetricBase | ||
import os | ||
|
||
import git as gitpython | ||
|
||
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" | ||
EMPTY_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" | ||
|
||
|
||
class Changedlines(ChangeMetricBase): | ||
|
||
name = "changedlines" | ||
description = "count lines added and removed between versions" | ||
|
||
def __init__(self): | ||
self._data = {} | ||
|
||
@property | ||
def rawdata(self): | ||
return self._data | ||
|
||
def extract(self, git): | ||
"""given a file before and after, count the number of changed lines""" | ||
repo = gitpython.Repo(git.folder) | ||
for tag in repo.tags: | ||
parent = tag.commit.parents[0] if tag.commit.parents else EMPTY_SHA | ||
|
||
# Derive the diff name | ||
tag2 = "EMPTY" if isinstance(parent, str) else parent.message.strip() | ||
index = "%s..%s" % (tag2, tag) | ||
|
||
# A ChangeMetric stores tag diffs | ||
self._data[index] = self._extract(git, tag.commit, parent) | ||
|
||
def _extract(self, git, commit1, commit2): | ||
"""The second commit should be the parent""" | ||
diffs = {diff.a_path: diff for diff in commit1.diff(commit2)} | ||
data = [] | ||
|
||
# The stats on the commit is a summary of all the changes for this | ||
# commit, we'll iterate through it to get the information we need. | ||
for filepath, stats in commit1.stats.files.items(): | ||
|
||
# Select the diff for the path in the stats | ||
diff = diffs.get(filepath) | ||
|
||
# Was the path renamed? | ||
if not diff: | ||
for diff in diffs.values(): | ||
if diff.b_path == git.folder and diff.renamed: | ||
break | ||
|
||
# Update the stats with the additional information | ||
stats.update( | ||
{ | ||
"object": os.path.join(git.folder, filepath), | ||
"commit": commit1.hexsha, | ||
"author": commit1.author.email, | ||
"timestamp": commit1.authored_datetime.strftime(DATE_TIME_FORMAT), | ||
"size": diff_size(diff), | ||
} | ||
) | ||
if stats: | ||
data.append(stats) | ||
|
||
return data | ||
|
||
def get_file_results(self): | ||
"""return a lookup of changes, where each change has a list of files""" | ||
return self._data | ||
|
||
def get_summed_results(self): | ||
"""Get summed values (e.g., lines changed) across files""" | ||
results = {} | ||
summary_keys = ["size", "insertions", "deletions", "lines"] | ||
for index, items in self._data.items(): | ||
results[index] = dict((x, 0) for x in summary_keys) | ||
for item in items: | ||
for key in summary_keys: | ||
results[index][key] += item.get(key, 0) | ||
return results | ||
|
||
|
||
def diff_size(diff): | ||
"""Calculate the size of the diff by comparing blob size | ||
Computes the size of the diff by comparing the size of the blobs. | ||
""" | ||
# New file | ||
if not diff.a_blob and diff.new_file: | ||
return diff.b_blob.size | ||
|
||
# Deletion (should be negative) | ||
if not diff.b_blob and diff.deleted_file: | ||
return -1 * diff.a_blob.size | ||
|
||
return diff.a_blob.size - diff.b_blob.size |