Skip to content

Commit

Permalink
Added Interestingness Scoring for Colored Bar and Line charts (lux-or…
Browse files Browse the repository at this point in the history
…g#59)

* Modular Scores

Added scoring functions for skew, kurtosis, and number of peaks

* Correlation, Mutual Information, Skew

* Removing old unused files

* Added Intesestingness Scoring for Colored Bar and Line charts

Also added test for this case in test_interestingness.py

* Bug fix Pandas Executor

Fixed bug where vis' stats and metadata were not being calculated in a specific case

* Updated PandasExecutor

Updated PandasExecutor to recompute stats and metadata for colored charts since non-colored charts do not require this data to compute interestingness scores.

Reverted test_performance to previous version since performance was improved
  • Loading branch information
19thyneb committed Aug 14, 2020
1 parent c821a76 commit b04472d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
5 changes: 5 additions & 0 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ def execute_aggregate(view: Vis,isFiltered = True):
for col in columns[1:]:
view.data[col] = view.data[col].fillna(0)
assert len(list(view.data[groupby_attr.attribute])) == len(all_attr_vals), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute}`."
#need to compute the statistics and metadata for the view's data if no new rows were added
else:
if view.data.cardinality is None and has_color:
view.data.compute_stats()
view.data.compute_dataset_metadata()
view.data = view.data.sort_values(by=groupby_attr.attribute, ascending=True)
view.data = view.data.reset_index()
view.data = view.data.drop(columns="index")
Expand Down
25 changes: 23 additions & 2 deletions lux/interestingness/interestingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,29 @@ def interestingness(vis:Vis ,ldf:LuxDataFrame) -> int:
elif (n_msr == 3):
return 0.1
# colored line and barchart cases
elif ((vis.mark == "line" or vis.mark == "bar") and n_dim == 2):
return 0.2
elif (vis.mark == "line" and n_dim == 2):
return 0.15
elif (vis.mark == "bar" and n_dim == 2):
from scipy.stats import chi2_contingency
measure_column = vis.get_attr_by_data_model("measure")[0].attribute
dimension_columns = vis.get_attr_by_data_model("dimension")

groupby_column = dimension_columns[0].attribute
color_column = dimension_columns[1].attribute

contingency_table = []
groupby_cardinality = vis.data.cardinality[groupby_column]
groupby_unique_vals = vis.data.unique_values[groupby_column]
for c in range(0, groupby_cardinality):
contingency_table.append(vis.data[vis.data[groupby_column] == groupby_unique_vals[c]][measure_column])
score = 0.12
#ValueError results if an entire column of the contingency table is 0, can happen if an applied filter results in
#a category having no counts
try:
score = min(0.13, chi2_contingency(contingency_table)[0])
except ValueError:
pass
return(score)
# Default
else:
return -1
Expand Down
16 changes: 16 additions & 0 deletions tests/test_interestingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,22 @@ def test_interestingness_1_1_1():
#check for top recommended Filter graph score is not none
assert interestingness(df.recommendation['Filter'][0],df) != None

def test_interestingness_1_2_0():
from lux.vis.Vis import Vis
from lux.vis.Vis import Clause
from lux.interestingness.interestingness import interestingness

df = pd.read_csv("lux/data/car.csv")
y_clause = Clause(attribute = "Name", channel = "y")
color_clause = Clause(attribute = 'Cylinders', channel = "color")

new_vis = Vis([y_clause, color_clause])
new_vis.refresh_source(df)
new_vis
#assert(len(new_vis.data)==color_cardinality*group_by_cardinality)

assert(interestingness(new_vis, df)==0.13)

def test_interestingness_0_2_0():
df = pd.read_csv("lux/data/car.csv")
df["Year"] = pd.to_datetime(df["Year"], format='%Y')
Expand Down

0 comments on commit b04472d

Please sign in to comment.