references.bib

@article{kumar_shapley_2020,
	title = {Shapley {Residuals}: {Quantifying} the limits of the {Shapley} value for explanations},
	abstract = {Popular feature importance techniques compute additive approximations to nonlinear models by first defining a cooperative game describing the value of different subsets of the model’s features, then calculating the resulting game’s Shapley values to attribute credit additively between the features. However, the specific modeling settings in which the Shapley values are a poor approximation for the true game have not been welldescribed. In this paper we utilize an interpretation of Shapley values as the result of an orthogonal projection between vector spaces to calculate a residual representing the kernel component of that projection. We provide an algorithm for computing these residuals, characterize different modeling settings based on the value of the residuals, and demonstrate that they capture information about model predictions that Shapley values cannot.},
	language = {en},
	author = {Kumar, I Elizabeth and Scheidegger, Carlos and Venkatasubramanian, Suresh and Frieddler, Sorelle A},
	year = {2020},
	pages = {10},
	file = {Kumar et al. - Shapley Residuals Quantifying the limits of the S.pdf:/home/zduey/Zotero/storage/Y3NSGDP7/Kumar et al. - Shapley Residuals Quantifying the limits of the S.pdf:application/pdf},
}

@article{kumar_problems_2020,
	title = {Problems with {Shapley}-value-based explanations as feature importance measures},
	url = {http://arxiv.org/abs/2002.11097},
	abstract = {Game-theoretic formulations of feature importance have become popular as a way to "explain" machine learning models. These methods define a cooperative game between the features of a model and distribute influence among these input elements using some form of the game's unique Shapley values. Justification for these methods rests on two pillars: their desirable mathematical properties, and their applicability to specific motivations for explanations. We show that mathematical problems arise when Shapley values are used for feature importance and that the solutions to mitigate these necessarily induce further complexity, such as the need for causal reasoning. We also draw on additional literature to argue that Shapley values do not provide explanations which suit human-centric goals of explainability.},
	journal = {arXiv:2002.11097 [cs, stat]},
	author = {Kumar, I. Elizabeth and Venkatasubramanian, Suresh and Scheidegger, Carlos and Friedler, Sorelle},
	month = jun,
	year = {2020},
	note = {arXiv: 2002.11097},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/7ANERNKA/Kumar et al. - 2020 - Problems with Shapley-value-based explanations as .pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/MVVHUE2U/2002.html:text/html},
}

@article{frye_asymmetric_2020,
	title = {Asymmetric {Shapley} values: incorporating causal knowledge into model-agnostic explainability},
	shorttitle = {Asymmetric {Shapley} values},
	url = {http://arxiv.org/abs/1910.06358},
	abstract = {Explaining AI systems is fundamental both to the development of high performing models and to the trust placed in them by their users. The Shapley framework for explainability has strength in its general applicability combined with its precise, rigorous foundation: it provides a common, model-agnostic language for AI explainability and uniquely satisfies a set of intuitive mathematical axioms. However, Shapley values are too restrictive in one significant regard: they ignore all causal structure in the data. We introduce a less restrictive framework, Asymmetric Shapley values (ASVs), which are rigorously founded on a set of axioms, applicable to any AI system, and flexible enough to incorporate any causal structure known to be respected by the data. We demonstrate that ASVs can (i) improve model explanations by incorporating causal information, (ii) provide an unambiguous test for unfair discrimination in model predictions, (iii) enable sequentially incremental explanations in time-series models, and (iv) support feature-selection studies without the need for model retraining.},
	journal = {arXiv:1910.06358 [cs, stat]},
	author = {Frye, Christopher and Rowat, Colin and Feige, Ilya},
	month = oct,
	year = {2020},
	note = {arXiv: 1910.06358},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/BGJ9S2G8/Frye et al. - 2020 - Asymmetric Shapley values incorporating causal kn.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/5CELMEPS/1910.html:text/html},
}

@article{viswanathan_model_2021,
	title = {Model {Explanations} via the {Axiomatic} {Causal} {Lens}},
	url = {http://arxiv.org/abs/2109.03890},
	abstract = {Explaining the decisions of black-box models has been a central theme in the study of trustworthy ML. Numerous measures have been proposed in the literature; however, none of them have been able to adopt a provably causal take on explainability. Building upon Halpern and Pearl's formal definition of a causal explanation, we derive an analogous set of axioms for the classification setting, and use them to derive three explanation measures. Our first measure is a natural adaptation of Chockler and Halpern's notion of causal responsibility, whereas the other two correspond to existing game-theoretic influence measures. We present an axiomatic treatment for our proposed indices, showing that they can be uniquely characterized by a set of desirable properties. We compliment this with computational analysis, providing probabilistic approximation schemes for all of our proposed measures. Thus, our work is the first to formally bridge the gap between model explanations, game-theoretic influence, and causal analysis.},
	journal = {arXiv:2109.03890 [cs]},
	author = {Viswanathan, Vignesh and Zick, Yair},
	month = sep,
	year = {2021},
	note = {arXiv: 2109.03890},
	keywords = {Computer Science - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/UKMXBC6E/Viswanathan and Zick - 2021 - Model Explanations via the Axiomatic Causal Lens.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/MTSJPEW4/2109.html:text/html},
}

@article{basu_shapley_2020,
	title = {On {Shapley} {Credit} {Allocation} for {Interpretability}},
	url = {http://arxiv.org/abs/2012.05506},
	abstract = {We emphasize the importance of asking the right question when interpreting the decisions of a learning model. We discuss a natural extension of the theoretical machinery from Janzing et. al. 2020, which answers the question "Why did my model predict a person has cancer?" for answering a more involved question, "What caused my model to predict a person has cancer?" While the former quantifies the direct effects of variables on the model, the latter also accounts for indirect effects, thereby providing meaningful insights wherever human beings can reason in terms of cause and effect. We propose three broad categories for interpretations: observational, model-specific and causal each of which are significant in their own right. Furthermore, this paper quantifies feature relevance by weaving different natures of interpretations together with different measures as characteristic functions for Shapley symmetrization. Besides the widely used expected value of the model, we also discuss measures of statistical uncertainty and dispersion as informative candidates, and their merits in generating explanations for each data point, some of which are used in this context for the first time. These measures are not only useful for studying the influence of variables on the model output, but also on the predictive performance of the model, and for that we propose relevant characteristic functions that are also used for the first time.},
	journal = {arXiv:2012.05506 [cs, stat]},
	author = {Basu, Debraj},
	month = dec,
	year = {2020},
	note = {arXiv: 2012.05506},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/S3IJYAT2/Basu - 2020 - On Shapley Credit Allocation for Interpretability.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/7QLRGHIY/2012.html:text/html},
}

@article{aas_explaining_2020,
	title = {Explaining individual predictions when features are dependent: {More} accurate approximations to {Shapley} values},
	shorttitle = {Explaining individual predictions when features are dependent},
	url = {http://arxiv.org/abs/1903.10464},
	abstract = {Explaining complex or seemingly simple machine learning models is an important practical problem. We want to explain individual predictions from a complex machine learning model by learning simple, interpretable explanations. Shapley values is a game theoretic concept that can be used for this purpose. The Shapley value framework has a series of desirable theoretical properties, and can in principle handle any predictive model. Kernel SHAP is a computationally efficient approximation to Shapley values in higher dimensions. Like several other existing methods, this approach assumes that the features are independent, which may give very wrong explanations. This is the case even if a simple linear model is used for predictions. In this paper, we extend the Kernel SHAP method to handle dependent features. We provide several examples of linear and non-linear models with various degrees of feature dependence, where our method gives more accurate approximations to the true Shapley values. We also propose a method for aggregating individual Shapley values, such that the prediction can be explained by groups of dependent variables.},
	journal = {arXiv:1903.10464 [cs, stat]},
	author = {Aas, Kjersti and Jullum, Martin and Løland, Anders},
	month = feb,
	year = {2020},
	note = {arXiv: 1903.10464},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Statistics - Methodology},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/3GIVUBNY/Aas et al. - 2020 - Explaining individual predictions when features ar.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/7THWCEBV/1903.html:text/html},
}

@article{wachter_counterfactual_2018,
	title = {Counterfactual {Explanations} without {Opening} the {Black} {Box}: {Automated} {Decisions} and the {GDPR}},
	shorttitle = {Counterfactual {Explanations} without {Opening} the {Black} {Box}},
	url = {http://arxiv.org/abs/1711.00399},
	abstract = {There has been much discussion of the right to explanation in the EU General Data Protection Regulation, and its existence, merits, and disadvantages. Implementing a right to explanation that opens the black box of algorithmic decision-making faces major legal and technical barriers. Explaining the functionality of complex algorithmic decision-making systems and their rationale in specific cases is a technically challenging problem. Some explanations may offer little meaningful information to data subjects, raising questions around their value. Explanations of automated decisions need not hinge on the general public understanding how algorithmic systems function. Even though such interpretability is of great importance and should be pursued, explanations can, in principle, be offered without opening the black box. Looking at explanations as a means to help a data subject act rather than merely understand, one could gauge the scope and content of explanations according to the specific goal or action they are intended to support. From the perspective of individuals affected by automated decision-making, we propose three aims for explanations: (1) to inform and help the individual understand why a particular decision was reached, (2) to provide grounds to contest the decision if the outcome is undesired, and (3) to understand what would need to change in order to receive a desired result in the future, based on the current decision-making model. We assess how each of these goals finds support in the GDPR. We suggest data controllers should offer a particular type of explanation, unconditional counterfactual explanations, to support these three aims. These counterfactual explanations describe the smallest change to the world that can be made to obtain a desirable outcome, or to arrive at the closest possible world, without needing to explain the internal logic of the system.},
	journal = {arXiv:1711.00399 [cs]},
	author = {Wachter, Sandra and Mittelstadt, Brent and Russell, Chris},
	month = mar,
	year = {2018},
	note = {arXiv: 1711.00399},
	keywords = {Computer Science - Artificial Intelligence},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/T5WB9TGX/Wachter et al. - 2018 - Counterfactual Explanations without Opening the Bl.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/FS83KRWJ/1711.html:text/html},
}

@article{miller_explanation_2018,
	title = {Explanation in {Artificial} {Intelligence}: {Insights} from the {Social} {Sciences}},
	shorttitle = {Explanation in {Artificial} {Intelligence}},
	url = {http://arxiv.org/abs/1706.07269},
	abstract = {There has been a recent resurgence in the area of explainable artificial intelligence as researchers and practitioners seek to make their algorithms more understandable. Much of this research is focused on explicitly explaining decisions or actions to a human observer, and it should not be controversial to say that looking at how humans explain to each other can serve as a useful starting point for explanation in artificial intelligence. However, it is fair to say that most work in explainable artificial intelligence uses only the researchers' intuition of what constitutes a `good' explanation. There exists vast and valuable bodies of research in philosophy, psychology, and cognitive science of how people define, generate, select, evaluate, and present explanations, which argues that people employ certain cognitive biases and social expectations towards the explanation process. This paper argues that the field of explainable artificial intelligence should build on this existing research, and reviews relevant papers from philosophy, cognitive psychology/science, and social psychology, which study these topics. It draws out some important findings, and discusses ways that these can be infused with work on explainable artificial intelligence.},
	journal = {arXiv:1706.07269 [cs]},
	author = {Miller, Tim},
	month = aug,
	year = {2018},
	note = {arXiv: 1706.07269},
	keywords = {Computer Science - Artificial Intelligence},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/94X9889C/Miller - 2018 - Explanation in Artificial Intelligence Insights f.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/RBRMXCGA/1706.html:text/html},
}

@article{janzing_feature_2019,
	title = {Feature relevance quantification in explainable {AI}: {A} causal problem},
	shorttitle = {Feature relevance quantification in explainable {AI}},
	url = {http://arxiv.org/abs/1910.13413},
	abstract = {We discuss promising recent contributions on quantifying feature relevance using Shapley values, where we observed some confusion on which probability distribution is the right one for dropped features. We argue that the confusion is based on not carefully distinguishing between observational and interventional conditional probabilities and try a clarification based on Pearl's seminal work on causality. We conclude that unconditional rather than conditional expectations provide the right notion of dropping features in contradiction to the theoretical justification of the software package SHAP. Parts of SHAP are unaffected because unconditional expectations (which we argue to be conceptually right) are used as approximation for the conditional ones, which encouraged others to `improve' SHAP in a way that we believe to be flawed.},
	journal = {arXiv:1910.13413 [cs, stat]},
	author = {Janzing, Dominik and Minorics, Lenon and Blöbaum, Patrick},
	month = nov,
	year = {2019},
	note = {arXiv: 1910.13413},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/XJUYQIE2/Janzing et al. - 2019 - Feature relevance quantification in explainable AI.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/VISIUP9P/1910.html:text/html},
}

@article{sundararajan_many_2020,
	title = {The many {Shapley} values for model explanation},
	url = {http://arxiv.org/abs/1908.08474},
	abstract = {The Shapley value has become a popular method to attribute the prediction of a machine-learning model on an input to its base features. The use of the Shapley value is justified by citing [16] showing that it is the {\textbackslash}emph\{unique\} method that satisfies certain good properties ({\textbackslash}emph\{axioms\}). There are, however, a multiplicity of ways in which the Shapley value is operationalized in the attribution problem. These differ in how they reference the model, the training data, and the explanation context. These give very different results, rendering the uniqueness result meaningless. Furthermore, we find that previously proposed approaches can produce counterintuitive attributions in theory and in practice---for instance, they can assign non-zero attributions to features that are not even referenced by the model. In this paper, we use the axiomatic approach to study the differences between some of the many operationalizations of the Shapley value for attribution, and propose a technique called Baseline Shapley (BShap) that is backed by a proper uniqueness result. We also contrast BShap with Integrated Gradients, another extension of Shapley value to the continuous setting.},
	journal = {arXiv:1908.08474 [cs, econ]},
	author = {Sundararajan, Mukund and Najmi, Amir},
	month = feb,
	year = {2020},
	note = {arXiv: 1908.08474},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Economics - Theoretical Economics},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/4JVWLJLH/Sundararajan and Najmi - 2020 - The many Shapley values for model explanation.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/QYQAV8EB/1908.html:text/html},
}

@article{alvarez-melis_robustness_2018,
	title = {On the {Robustness} of {Interpretability} {Methods}},
	url = {http://arxiv.org/abs/1806.08049},
	abstract = {We argue that robustness of explanations---i.e., that similar inputs should give rise to similar explanations---is a key desideratum for interpretability. We introduce metrics to quantify robustness and demonstrate that current methods do not perform well according to these metrics. Finally, we propose ways that robustness can be enforced on existing interpretability approaches.},
	journal = {arXiv:1806.08049 [cs, stat]},
	author = {Alvarez-Melis, David and Jaakkola, Tommi S.},
	month = jun,
	year = {2018},
	note = {arXiv: 1806.08049},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/F2IGQ84P/Alvarez-Melis and Jaakkola - 2018 - On the Robustness of Interpretability Methods.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/4ELNZNA9/1806.html:text/html},
}

@article{lundberg_unified_2017,
	title = {A {Unified} {Approach} to {Interpreting} {Model} {Predictions}},
	url = {http://arxiv.org/abs/1705.07874},
	abstract = {Understanding why a model makes a certain prediction can be as crucial as the prediction's accuracy in many applications. However, the highest accuracy for large modern datasets is often achieved by complex models that even experts struggle to interpret, such as ensemble or deep learning models, creating a tension between accuracy and interpretability. In response, various methods have recently been proposed to help users interpret the predictions of complex models, but it is often unclear how these methods are related and when one method is preferable over another. To address this problem, we present a unified framework for interpreting predictions, SHAP (SHapley Additive exPlanations). SHAP assigns each feature an importance value for a particular prediction. Its novel components include: (1) the identification of a new class of additive feature importance measures, and (2) theoretical results showing there is a unique solution in this class with a set of desirable properties. The new class unifies six existing methods, notable because several recent methods in the class lack the proposed desirable properties. Based on insights from this unification, we present new methods that show improved computational performance and/or better consistency with human intuition than previous approaches.},
	journal = {arXiv:1705.07874 [cs, stat]},
	author = {Lundberg, Scott and Lee, Su-In},
	month = nov,
	year = {2017},
	note = {arXiv: 1705.07874},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/UJ3XLMXU/Lundberg and Lee - 2017 - A Unified Approach to Interpreting Model Predictio.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/X2B34IZ9/1705.html:text/html},
}

@article{zhao_causal_2021,
	title = {Causal {Interpretations} of {Black}-{Box} {Models}},
	volume = {39},
	issn = {0735-0015},
	url = {https://doi.org/10.1080/07350015.2019.1624293},
	doi = {10.1080/07350015.2019.1624293},
	abstract = {The fields of machine learning and causal inference have developed many concepts, tools, and theory that are potentially useful for each other. Through exploring the possibility of extracting causal interpretations from black-box machine-trained models, we briefly review the languages and concepts in causal inference that may be interesting to machine learning researchers. We start with the curious observation that Friedman’s partial dependence plot has exactly the same formula as Pearl’s back-door adjustment and discuss three requirements to make causal interpretations: a model with good predictive performance, some domain knowledge in the form of a causal diagram and suitable visualization tools. We provide several illustrative examples and find some interesting and potentially causal relations using visualization tools for black-box models.},
	number = {1},
	journal = {Journal of Business \& Economic Statistics},
	author = {Zhao, Qingyuan and Hastie, Trevor},
	month = jan,
	year = {2021},
	note = {Publisher: Taylor \& Francis
\_eprint: https://doi.org/10.1080/07350015.2019.1624293},
	keywords = {Back-door adjustment, Data visualization, Machine learning, Mediation analysis, Partial dependence plot},
	pages = {272--281},
	file = {Full Text PDF:/home/zduey/Zotero/storage/GZDPTGH3/Zhao and Hastie - 2021 - Causal Interpretations of Black-Box Models.pdf:application/pdf;Snapshot:/home/zduey/Zotero/storage/X7A3JL4V/07350015.2019.html:text/html},
}

@article{chen_true_2020,
	title = {True to the {Model} or {True} to the {Data}?},
	url = {http://arxiv.org/abs/2006.16234},
	abstract = {A variety of recent papers discuss the application of Shapley values, a concept for explaining coalitional games, for feature attribution in machine learning. However, the correct way to connect a machine learning model to a coalitional game has been a source of controversy. The two main approaches that have been proposed differ in the way that they condition on known features, using either (1) an interventional or (2) an observational conditional expectation. While previous work has argued that one of the two approaches is preferable in general, we argue that the choice is application dependent. Furthermore, we argue that the choice comes down to whether it is desirable to be true to the model or true to the data. We use linear models to investigate this choice. After deriving an efficient method for calculating observational conditional expectation Shapley values for linear models, we investigate how correlation in simulated data impacts the convergence of observational conditional expectation Shapley values. Finally, we present two real data examples that we consider to be representative of possible use cases for feature attribution -- (1) credit risk modeling and (2) biological discovery. We show how a different choice of value function performs better in each scenario, and how possible attributions are impacted by modeling choices.},
	journal = {arXiv:2006.16234 [cs, stat]},
	author = {Chen, Hugh and Janizek, Joseph D. and Lundberg, Scott and Lee, Su-In},
	month = jun,
	year = {2020},
	note = {arXiv: 2006.16234},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/8FE69TGW/Chen et al. - 2020 - True to the Model or True to the Data.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/S6CRJGH7/2006.html:text/html},
}

@article{frye_shapley_2021,
	title = {Shapley explainability on the data manifold},
	url = {http://arxiv.org/abs/2006.01272},
	abstract = {Explainability in AI is crucial for model development, compliance with regulation, and providing operational nuance to predictions. The Shapley framework for explainability attributes a model's predictions to its input features in a mathematically principled and model-agnostic way. However, general implementations of Shapley explainability make an untenable assumption: that the model's features are uncorrelated. In this work, we demonstrate unambiguous drawbacks of this assumption and develop two solutions to Shapley explainability that respect the data manifold. One solution, based on generative modelling, provides flexible access to data imputations; the other directly learns the Shapley value-function, providing performance and stability at the cost of flexibility. While "off-manifold" Shapley values can (i) give rise to incorrect explanations, (ii) hide implicit model dependence on sensitive attributes, and (iii) lead to unintelligible explanations in higher-dimensional data, on-manifold explainability overcomes these problems.},
	journal = {arXiv:2006.01272 [cs, stat]},
	author = {Frye, Christopher and de Mijolla, Damien and Begley, Tom and Cowton, Laurence and Stanley, Megan and Feige, Ilya},
	month = feb,
	year = {2021},
	note = {arXiv: 2006.01272},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/X4893DII/Frye et al. - 2021 - Shapley explainability on the data manifold.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/ZQEIQ8I2/2006.html:text/html},
}

@article{hancox-li_epistemic_2021,
	title = {Epistemic values in feature importance methods: {Lessons} from feminist epistemology},
	shorttitle = {Epistemic values in feature importance methods},
	url = {http://arxiv.org/abs/2101.12737},
	doi = {10.1145/3442188.3445943},
	abstract = {As the public seeks greater accountability and transparency from machine learning algorithms, the research literature on methods to explain algorithms and their outputs has rapidly expanded. Feature importance methods form a popular class of explanation methods. In this paper, we apply the lens of feminist epistemology to recent feature importance research. We investigate what epistemic values are implicitly embedded in feature importance methods and how or whether they are in conflict with feminist epistemology. We offer some suggestions on how to conduct research on explanations that respects feminist epistemic values, taking into account the importance of social context, the epistemic privileges of subjugated knowers, and adopting more interactional ways of knowing.},
	journal = {Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency},
	author = {Hancox-Li, Leif and Kumar, I. Elizabeth},
	month = mar,
	year = {2021},
	note = {arXiv: 2101.12737},
	keywords = {Computer Science - Computers and Society},
	pages = {817--826},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/8EUNVGRD/Hancox-Li and Kumar - 2021 - Epistemic values in feature importance methods Le.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/RF9UJ9CK/2101.html:text/html},
}

@article{wang_shapley_2021,
	title = {Shapley {Flow}: {A} {Graph}-based {Approach} to {Interpreting} {Model} {Predictions}},
	shorttitle = {Shapley {Flow}},
	url = {http://arxiv.org/abs/2010.14592},
	abstract = {Many existing approaches for estimating feature importance are problematic because they ignore or hide dependencies among features. A causal graph, which encodes the relationships among input variables, can aid in assigning feature importance. However, current approaches that assign credit to nodes in the causal graph fail to explain the entire graph. In light of these limitations, we propose Shapley Flow, a novel approach to interpreting machine learning models. It considers the entire causal graph, and assigns credit to {\textbackslash}textit\{edges\} instead of treating nodes as the fundamental unit of credit assignment. Shapley Flow is the unique solution to a generalization of the Shapley value axioms to directed acyclic graphs. We demonstrate the benefit of using Shapley Flow to reason about the impact of a model's input on its output. In addition to maintaining insights from existing approaches, Shapley Flow extends the flat, set-based, view prevalent in game theory based explanation methods to a deeper, {\textbackslash}textit\{graph-based\}, view. This graph-based view enables users to understand the flow of importance through a system, and reason about potential interventions.},
	journal = {arXiv:2010.14592 [cs, stat]},
	author = {Wang, Jiaxuan and Wiens, Jenna and Lundberg, Scott},
	month = feb,
	year = {2021},
	note = {arXiv: 2010.14592},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/5JPLBW8R/Wang et al. - 2021 - Shapley Flow A Graph-based Approach to Interpreti.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/SY289RS7/2010.html:text/html},
}

@techreport{singal_flow-based_2021,
	address = {Rochester, NY},
	type = {{SSRN} {Scholarly} {Paper}},
	title = {Flow-{Based} {Attribution} in {Graphical} {Models}: {A} {Recursive} {Shapley} {Approach}},
	shorttitle = {Flow-{Based} {Attribution} in {Graphical} {Models}},
	url = {https://papers.ssrn.com/abstract=3845526},
	abstract = {We study the attribution problem in a graphical model, wherein the objective is to quantify how the effect of changes at the source nodes propagates through the graph. We develop a model-agnostic flow-based attribution method, called recursive Shapley value (RSV). RSV generalizes a number of existing node-based methods and uniquely satisfies a set of flow-based axioms. In addition to admitting a natural characterization for linear models and facilitating mediation analysis for non-linear models, RSV satisfies a mix of desirable properties discussed in the recent literature, including implementation invariance, sensitivity, monotonicity, and affine scale invariance.},
	language = {en},
	number = {ID 3845526},
	institution = {Social Science Research Network},
	author = {Singal, Raghav and Michailidis, George and Ng, Hoiyi},
	month = may,
	year = {2021},
	doi = {10.2139/ssrn.3845526},
	keywords = {effect propagation, Graphical model, interpretable ML, network flow, Shapley value},
	file = {Full Text PDF:/home/zduey/Zotero/storage/CTRIYGQS/Singal et al. - 2021 - Flow-Based Attribution in Graphical Models A Recu.pdf:application/pdf;Snapshot:/home/zduey/Zotero/storage/3S68KTJF/papers.html:text/html},
}

@article{heskes_causal_2020,
	title = {Causal {Shapley} {Values}: {Exploiting} {Causal} {Knowledge} to {Explain} {Individual} {Predictions} of {Complex} {Models}},
	shorttitle = {Causal {Shapley} {Values}},
	url = {http://arxiv.org/abs/2011.01625},
	abstract = {Shapley values underlie one of the most popular model-agnostic methods within explainable artificial intelligence. These values are designed to attribute the difference between a model's prediction and an average baseline to the different features used as input to the model. Being based on solid game-theoretic principles, Shapley values uniquely satisfy several desirable properties, which is why they are increasingly used to explain the predictions of possibly complex and highly non-linear machine learning models. Shapley values are well calibrated to a user's intuition when features are independent, but may lead to undesirable, counterintuitive explanations when the independence assumption is violated. In this paper, we propose a novel framework for computing Shapley values that generalizes recent work that aims to circumvent the independence assumption. By employing Pearl's do-calculus, we show how these 'causal' Shapley values can be derived for general causal graphs without sacrificing any of their desirable properties. Moreover, causal Shapley values enable us to separate the contribution of direct and indirect effects. We provide a practical implementation for computing causal Shapley values based on causal chain graphs when only partial information is available and illustrate their utility on a real-world example.},
	journal = {arXiv:2011.01625 [cs]},
	author = {Heskes, Tom and Sijben, Evi and Bucur, Ioan Gabriel and Claassen, Tom},
	month = nov,
	year = {2020},
	note = {arXiv: 2011.01625},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, I.2.6},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/PULDFKDP/Heskes et al. - 2020 - Causal Shapley Values Exploiting Causal Knowledge.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/Y72YX7Q2/2011.html:text/html},
}

@inproceedings{datta_algorithmic_2016,
	title = {Algorithmic {Transparency} via {Quantitative} {Input} {Influence}: {Theory} and {Experiments} with {Learning} {Systems}},
	shorttitle = {Algorithmic {Transparency} via {Quantitative} {Input} {Influence}},
	doi = {10.1109/SP.2016.42},
	abstract = {Algorithmic systems that employ machine learning play an increasing role in making substantive decisions in modern society, ranging from online personalization to insurance and credit decisions to predictive policing. But their decision-making processes are often opaque-it is difficult to explain why a certain decision was made. We develop a formal foundation to improve the transparency of such decision-making systems. Specifically, we introduce a family of Quantitative Input Influence (QII) measures that capture the degree of influence of inputs on outputs of systems. These measures provide a foundation for the design of transparency reports that accompany system decisions (e.g., explaining a specific credit decision) and for testing tools useful for internal and external oversight (e.g., to detect algorithmic discrimination). Distinctively, our causal QII measures carefully account for correlated inputs while measuring influence. They support a general class of transparency queries and can, in particular, explain decisions about individuals (e.g., a loan decision) and groups (e.g., disparate impact based on gender). Finally, since single inputs may not always have high influence, the QII measures also quantify the joint influence of a set of inputs (e.g., age and income) on outcomes (e.g. loan decisions) and the marginal influence of individual inputs within such a set (e.g., income). Since a single input may be part of multiple influential sets, the average marginal influence of the input is computed using principled aggregation measures, such as the Shapley value, previously applied to measure influence in voting. Further, since transparency reports could compromise privacy, we explore the transparency-privacy tradeoff and prove that a number of useful transparency reports can be made differentially private with very little addition of noise. Our empirical validation with standard machine learning algorithms demonstrates that QII measures are a useful transparency mechanism when black box access to the learning system is available. In particular, they provide better explanations than standard associative measures for a host of scenarios that we consider. Further, we show that in the situations we consider, QII is efficiently approximable and can be made differentially private while preserving accuracy.},
	booktitle = {2016 {IEEE} {Symposium} on {Security} and {Privacy} ({SP})},
	author = {Datta, Anupam and Sen, Shayak and Zick, Yair},
	month = may,
	year = {2016},
	note = {ISSN: 2375-1207},
	keywords = {Algorithm design and analysis, Atmospheric measurements, Correlation, Decision making, fairness, machine learning, Machine learning algorithms, Particle measurements, Privacy, transparency},
	pages = {598--617},
	file = {IEEE Xplore Full Text PDF:/home/zduey/Zotero/storage/CCDBVYG4/Datta et al. - 2016 - Algorithmic Transparency via Quantitative Input In.pdf:application/pdf},
}

@article{moraffah_causal_2020,
	title = {Causal {Interpretability} for {Machine} {Learning} - {Problems}, {Methods} and {Evaluation}},
	volume = {22},
	issn = {1931-0145},
	url = {https://doi.org/10.1145/3400051.3400058},
	doi = {10.1145/3400051.3400058},
	abstract = {Machine learning models have had discernible achievements in a myriad of applications. However, most of these models are black-boxes, and it is obscure how the decisions are made by them. This makes the models unreliable and untrustworthy. To provide insights into the decision making processes of these models, a variety of traditional interpretable models have been proposed. Moreover, to generate more humanfriendly explanations, recent work on interpretability tries to answer questions related to causality such as "Why does this model makes such decisions?" or "Was it a specific feature that caused the decision made by the model?". In this work, models that aim to answer causal questions are referred to as causal interpretable models. The existing surveys have covered concepts and methodologies of traditional interpretability. In this work, we present a comprehensive survey on causal interpretable models from the aspects of the problems and methods. In addition, this survey provides in-depth insights into the existing evaluation metrics for measuring interpretability, which can help practitioners understand for what scenarios each evaluation metric is suitable.},
	number = {1},
	journal = {ACM SIGKDD Explorations Newsletter},
	author = {Moraffah, Raha and Karami, Mansooreh and Guo, Ruocheng and Raglin, Adrienne and Liu, Huan},
	month = may,
	year = {2020},
	keywords = {machine learning, causal inference, counterfactuals, explainability, interpratablity},
	pages = {18--33},
	file = {Submitted Version:/home/zduey/Zotero/storage/8WG6SZJK/Moraffah et al. - 2020 - Causal Interpretability for Machine Learning - Pro.pdf:application/pdf},
}

@article{doshi-velez_towards_2017,
	title = {Towards {A} {Rigorous} {Science} of {Interpretable} {Machine} {Learning}},
	url = {http://arxiv.org/abs/1702.08608},
	abstract = {As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.},
	journal = {arXiv:1702.08608 [cs, stat]},
	author = {Doshi-Velez, Finale and Kim, Been},
	month = mar,
	year = {2017},
	note = {arXiv: 1702.08608},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/KEARZIVD/Doshi-Velez and Kim - 2017 - Towards A Rigorous Science of Interpretable Machin.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/4N79WZYX/1702.html:text/html},
}

@article{merrick_explanation_2020,
	title = {The {Explanation} {Game}: {Explaining} {Machine} {Learning} {Models} {Using} {Shapley} {Values}},
	shorttitle = {The {Explanation} {Game}},
	url = {http://arxiv.org/abs/1909.08128},
	abstract = {A number of techniques have been proposed to explain a machine learning model's prediction by attributing it to the corresponding input features. Popular among these are techniques that apply the Shapley value method from cooperative game theory. While existing papers focus on the axiomatic motivation of Shapley values, and efficient techniques for computing them, they offer little justification for the game formulations used, and do not address the uncertainty implicit in their methods' outputs. For instance, the popular SHAP algorithm's formulation may give substantial attributions to features that play no role in the model. In this work, we illustrate how subtle differences in the underlying game formulations of existing methods can cause large differences in the attributions for a prediction. We then present a general game formulation that unifies existing methods, and enables straightforward confidence intervals on their attributions. Furthermore, it allows us to interpret the attributions as contrastive explanations of an input relative to a distribution of reference inputs. We tie this idea to classic research in cognitive psychology on contrastive explanations, and propose a conceptual framework for generating and interpreting explanations for ML models, called formulate, approximate, explain (FAE). We apply this framework to explain black-box models trained on two UCI datasets and a Lending Club dataset.},
	journal = {arXiv:1909.08128 [cs, stat]},
	author = {Merrick, Luke and Taly, Ankur},
	month = jun,
	year = {2020},
	note = {arXiv: 1909.08128},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/HDRYNUGT/Merrick and Taly - 2020 - The Explanation Game Explaining Machine Learning .pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/SYWGYHED/1909.html:text/html},
}

@article{sundararajan_axiomatic_2017,
	title = {Axiomatic {Attribution} for {Deep} {Networks}},
	url = {http://arxiv.org/abs/1703.01365},
	abstract = {We study the problem of attributing the prediction of a deep network to its input features, a problem previously studied by several other works. We identify two fundamental axioms---Sensitivity and Implementation Invariance that attribution methods ought to satisfy. We show that they are not satisfied by most known attribution methods, which we consider to be a fundamental weakness of those methods. We use the axioms to guide the design of a new attribution method called Integrated Gradients. Our method requires no modification to the original network and is extremely simple to implement; it just needs a few calls to the standard gradient operator. We apply this method to a couple of image models, a couple of text models and a chemistry model, demonstrating its ability to debug networks, to extract rules from a network, and to enable users to engage with models better.},
	journal = {arXiv:1703.01365 [cs]},
	author = {Sundararajan, Mukund and Taly, Ankur and Yan, Qiqi},
	month = jun,
	year = {2017},
	note = {arXiv: 1703.01365},
	keywords = {Computer Science - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/8TXIB4YW/Sundararajan et al. - 2017 - Axiomatic Attribution for Deep Networks.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/TTYSV2ID/1703.html:text/html},
}

@article{adler_auditing_2016,
	title = {Auditing {Black}-box {Models} for {Indirect} {Influence}},
	url = {http://arxiv.org/abs/1602.07043},
	abstract = {Data-trained predictive models see widespread use, but for the most part they are used as black boxes which output a prediction or score. It is therefore hard to acquire a deeper understanding of model behavior, and in particular how different features influence the model prediction. This is important when interpreting the behavior of complex models, or asserting that certain problematic attributes (like race or gender) are not unduly influencing decisions. In this paper, we present a technique for auditing black-box models, which lets us study the extent to which existing models take advantage of particular features in the dataset, without knowing how the models work. Our work focuses on the problem of indirect influence: how some features might indirectly influence outcomes via other, related features. As a result, we can find attribute influences even in cases where, upon further direct examination of the model, the attribute is not referred to by the model at all. Our approach does not require the black-box model to be retrained. This is important if (for example) the model is only accessible via an API, and contrasts our work with other methods that investigate feature influence like feature selection. We present experimental evidence for the effectiveness of our procedure using a variety of publicly available datasets and models. We also validate our procedure using techniques from interpretable learning and feature selection, as well as against other black-box auditing procedures.},
	journal = {arXiv:1602.07043 [cs, stat]},
	author = {Adler, Philip and Falk, Casey and Friedler, Sorelle A. and Rybeck, Gabriel and Scheidegger, Carlos and Smith, Brandon and Venkatasubramanian, Suresh},
	month = nov,
	year = {2016},
	note = {arXiv: 1602.07043},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/RKTVET6T/Adler et al. - 2016 - Auditing Black-box Models for Indirect Influence.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/6K723JTG/1602.html:text/html},
}

@article{lipovetsky_analysis_2001,
	title = {Analysis of regression in game theory approach},
	volume = {17},
	issn = {1526-4025},
	url = {http://onlinelibrary.wiley.com/doi/abs/10.1002/asmb.446},
	doi = {10.1002/asmb.446},
	abstract = {Working with multiple regression analysis a researcher usually wants to know a comparative importance of predictors in the model. However, the analysis can be made difficult because of multicollinearity among regressors, which produces biased coefficients and negative inputs to multiple determination from presum ably useful regressors. To solve this problem we apply a tool from the co-operative games theory, the Shapley Value imputation. We demonstrate the theoretical and practical advantages of the Shapley Value and show that it provides consistent results in the presence of multicollinearity. Copyright © 2001 John Wiley \& Sons, Ltd.},
	language = {en},
	number = {4},
	journal = {Applied Stochastic Models in Business and Industry},
	author = {Lipovetsky, Stan and Conklin, Michael},
	year = {2001},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/asmb.446},
	keywords = {co-operative games, multicollinearity, regressors net effects, Shapley Value},
	pages = {319--330},
	file = {Full Text PDF:/home/zduey/Zotero/storage/RJ3M8WEE/Lipovetsky and Conklin - 2001 - Analysis of regression in game theory approach.pdf:application/pdf},
}

@article{mase_explaining_2020,
	title = {Explaining black box decisions by {Shapley} cohort refinement},
	url = {http://arxiv.org/abs/1911.00467},
	abstract = {We introduce a variable importance measure to quantify the impact of individual input variables to a black box function. Our measure is based on the Shapley value from cooperative game theory. Many measures of variable importance operate by changing some predictor values with others held fixed, potentially creating unlikely or even logically impossible combinations. Our cohort Shapley measure uses only observed data points. Instead of changing the value of a predictor we include or exclude subjects similar to the target subject on that predictor to form a similarity cohort. Then we apply Shapley value to the cohort averages. We connect variable importance measures from explainable AI to function decompositions from global sensitivity analysis. We introduce a squared cohort Shapley value that splits previously studied Shapley effects over subjects, consistent with a Shapley axiom.},
	journal = {arXiv:1911.00467 [cs, econ, stat]},
	author = {Mase, Masayoshi and Owen, Art B. and Seiler, Benjamin},
	month = oct,
	year = {2020},
	note = {arXiv: 1911.00467},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning, Economics - Econometrics},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/Q6P55WEM/Mase et al. - 2020 - Explaining black box decisions by Shapley cohort r.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/D6WHQTBP/1911.html:text/html},
}

@article{strumbelj_explaining_2014,
	title = {Explaining prediction models and individual predictions with feature contributions},
	volume = {41},
	issn = {0219-3116},
	url = {https://doi.org/10.1007/s10115-013-0679-x},
	doi = {10.1007/s10115-013-0679-x},
	abstract = {We present a sensitivity analysis-based method for explaining prediction models that can be applied to any type of classification or regression model. Its advantage over existing general methods is that all subsets of input features are perturbed, so interactions and redundancies between features are taken into account. Furthermore, when explaining an additive model, the method is equivalent to commonly used additive model-specific methods. We illustrate the method’s usefulness with examples from artificial and real-world data sets and an empirical analysis of running times. Results from a controlled experiment with 122 participants suggest that the method’s explanations improved the participants’ understanding of the model.},
	language = {en},
	number = {3},
	journal = {Knowledge and Information Systems},
	author = {Štrumbelj, Erik and Kononenko, Igor},
	month = dec,
	year = {2014},
	pages = {647--665},
}

@article{anders_fairwashing_2020,
	title = {Fairwashing {Explanations} with {Off}-{Manifold} {Detergent}},
	url = {http://arxiv.org/abs/2007.09969},
	abstract = {Explanation methods promise to make black-box classifiers more transparent. As a result, it is hoped that they can act as proof for a sensible, fair and trustworthy decision-making process of the algorithm and thereby increase its acceptance by the end-users. In this paper, we show both theoretically and experimentally that these hopes are presently unfounded. Specifically, we show that, for any classifier \$g\$, one can always construct another classifier \${\textbackslash}tilde\{g\}\$ which has the same behavior on the data (same train, validation, and test error) but has arbitrarily manipulated explanation maps. We derive this statement theoretically using differential geometry and demonstrate it experimentally for various explanation methods, architectures, and datasets. Motivated by our theoretical insights, we then propose a modification of existing explanation methods which makes them significantly more robust.},
	journal = {arXiv:2007.09969 [cs, stat]},
	author = {Anders, Christopher J. and Pasliev, Plamen and Dombrowski, Ann-Kathrin and Müller, Klaus-Robert and Kessel, Pan},
	month = jul,
	year = {2020},
	note = {arXiv: 2007.09969},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/DTKG9ZMT/Anders et al. - 2020 - Fairwashing Explanations with Off-Manifold Deterge.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/YN2HLE3D/2007.html:text/html},
}

@article{strumbelj_explaining_2009,
	title = {Explaining instance classifications with interactions of subsets of feature values},
	volume = {68},
	issn = {0169-023X},
	url = {https://www.sciencedirect.com/science/article/pii/S0169023X09000056},
	doi = {10.1016/j.datak.2009.01.004},
	abstract = {In this paper, we present a novel method for explaining the decisions of an arbitrary classifier, independent of the type of classifier. The method works at the instance level, decomposing the model’s prediction for an instance into the contributions of the attributes’ values. We use several artificial data sets and several different types of models to show that the generated explanations reflect the decision-making properties of the explained model and approach the concepts behind the data set as the prediction quality of the model increases. The usefulness of the method is justified by a successful application on a real-world breast cancer recurrence prediction problem.},
	language = {en},
	number = {10},
	journal = {Data \& Knowledge Engineering},
	author = {Štrumbelj, E. and Kononenko, I. and Robnik Šikonja, M.},
	month = oct,
	year = {2009},
	keywords = {Machine learning, Classification, Data mining, Explanation, Knowledge discovery, Visualization},
	pages = {886--904},
}

@article{lundberg_consistent_2019,
	title = {Consistent {Individualized} {Feature} {Attribution} for {Tree} {Ensembles}},
	url = {http://arxiv.org/abs/1802.03888},
	abstract = {Interpreting predictions from tree ensemble methods such as gradient boosting machines and random forests is important, yet feature attribution for trees is often heuristic and not individualized for each prediction. Here we show that popular feature attribution methods are inconsistent, meaning they can lower a feature's assigned importance when the true impact of that feature actually increases. This is a fundamental problem that casts doubt on any comparison between features. To address it we turn to recent applications of game theory and develop fast exact tree solutions for SHAP (SHapley Additive exPlanation) values, which are the unique consistent and locally accurate attribution values. We then extend SHAP values to interaction effects and define SHAP interaction values. We propose a rich visualization of individualized feature attributions that improves over classic attribution summaries and partial dependence plots, and a unique "supervised" clustering (clustering based on feature attributions). We demonstrate better agreement with human intuition through a user study, exponential improvements in run time, improved clustering performance, and better identification of influential features. An implementation of our algorithm has also been merged into XGBoost and LightGBM, see http://github.com/slundberg/shap for details.},
	journal = {arXiv:1802.03888 [cs, stat]},
	author = {Lundberg, Scott M. and Erion, Gabriel G. and Lee, Su-In},
	month = mar,
	year = {2019},
	note = {arXiv: 1802.03888},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/U4U8PJWL/Lundberg et al. - 2019 - Consistent Individualized Feature Attribution for .pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/4ZYR8J75/1802.html:text/html},
}

@article{lundberg_local_2020,
	title = {From local explanations to global understanding with explainable {AI} for trees},
	volume = {2},
	copyright = {2020 The Author(s), under exclusive licence to Springer Nature Limited},
	issn = {2522-5839},
	url = {https://www.nature.com/articles/s42256-019-0138-9},
	doi = {10.1038/s42256-019-0138-9},
	abstract = {Tree-based machine learning models such as random forests, decision trees and gradient boosted trees are popular nonlinear predictive models, yet comparatively little attention has been paid to explaining their predictions. Here we improve the interpretability of tree-based models through three main contributions. (1) A polynomial time algorithm to compute optimal explanations based on game theory. (2) A new type of explanation that directly measures local feature interaction effects. (3) A new set of tools for understanding global model structure based on combining many local explanations of each prediction. We apply these tools to three medical machine learning problems and show how combining many high-quality local explanations allows us to represent global structure while retaining local faithfulness to the original model. These tools enable us to (1) identify high-magnitude but low-frequency nonlinear mortality risk factors in the US population, (2) highlight distinct population subgroups with shared risk characteristics, (3) identify nonlinear interaction effects among risk factors for chronic kidney disease and (4) monitor a machine learning model deployed in a hospital by identifying which features are degrading the model’s performance over time. Given the popularity of tree-based machine learning models, these improvements to their interpretability have implications across a broad set of domains.},
	language = {en},
	number = {1},
	journal = {Nature Machine Intelligence},
	author = {Lundberg, Scott M. and Erion, Gabriel and Chen, Hugh and DeGrave, Alex and Prutkin, Jordan M. and Nair, Bala and Katz, Ronit and Himmelfarb, Jonathan and Bansal, Nisha and Lee, Su-In},
	month = jan,
	year = {2020},
	note = {Bandiera\_abtest: a
Cg\_type: Nature Research Journals
Number: 1
Primary\_atype: Research
Publisher: Nature Publishing Group
Subject\_term: Computer science;Medical research;Software
Subject\_term\_id: computer-science;medical-research;software},
	keywords = {Computer science, Medical research, Software},
	pages = {56--67},
	file = {Accepted Version:/home/zduey/Zotero/storage/RMYWZMY6/Lundberg et al. - 2020 - From local explanations to global understanding wi.pdf:application/pdf;Snapshot:/home/zduey/Zotero/storage/WUFSK7HW/s42256-019-0138-9.html:text/html},
}

@article{kruskal_relative_1987,
	title = {Relative {Importance} by {Averaging} {Over} {Orderings}},
	volume = {41},
	issn = {0003-1305},
	url = {https://www.jstor.org/stable/2684310},
	doi = {10.2307/2684310},
	abstract = {Many ways have been suggested for explicating the ambiguous concept of relative importance for independent variables in a multiple regression setting. There are drawbacks to all the explications, but a relatively acceptable one is available when the independent variables have a relevant, known ordering: consider the proportion of variance of the dependent variable linearly accounted for by the first independent variable; then consider the proportion of remaining variance linearly accounted for by the second independent variable; and so on. When, however, the independent variables do not have a relevant ordering, that approach fails. The primary suggestion of this article is to rescue the idea by averaging relative importance over all orderings of the independent variables. Variations and extensions of the idea are described.},
	number = {1},
	journal = {The American Statistician},
	author = {Kruskal, William},
	year = {1987},
	note = {Publisher: [American Statistical Association, Taylor \& Francis, Ltd.]},
	pages = {6--10},
}

@article{owen_sobol_2014,
	title = {Sobol' {Indices} and {Shapley} {Value}},
	volume = {2},
	issn = {2166-2525},
	url = {http://epubs.siam.org/doi/10.1137/130936233},
	doi = {10.1137/130936233},
	abstract = {Global sensitivity analysis measures the importance of some input variables to a function f by looking at the impact on f of making large random perturbations to subsets of those variables. Using measures like those of Sobol’ we can attribute importance to input variables based on the extent to which they help predict the target function f . There is a longstanding literature in economics and game theory that considers how to attribute the value of a team eﬀort to individual members of that team. The primary result, known as Shapley value, is the unique method satisfying some intuitively necessary criteria. In this paper we ﬁnd the Shapley value of individual variables when we take ‘variance explained’ as their combined value. The result does not match either of the usual Sobol’ indices. It is instead bracketed between them, for variance explained or indeed any totally monotone game. Because those indices are comparatively easy to compute, Sobol’ indices provide eﬀectively computable bounds for the Shapley value.},
	language = {en},
	number = {1},
	journal = {SIAM/ASA Journal on Uncertainty Quantification},
	author = {Owen, Art B.},
	month = jan,
	year = {2014},
	pages = {245--251},
	file = {Owen - 2014 - Sobol' Indices and Shapley Value.pdf:/home/zduey/Zotero/storage/LTQ9LPAZ/Owen - 2014 - Sobol' Indices and Shapley Value.pdf:application/pdf},
}

@article{owen_shapley_2017,
	title = {On {Shapley} {Value} for {Measuring} {Importance} of {Dependent} {Inputs}},
	volume = {5},
	url = {https://epubs.siam.org/doi/10.1137/16M1097717},
	doi = {10.1137/16M1097717},
	abstract = {This paper makes the case for using Shapley value to quantify the importance of random input variables to a function. Alternatives based on the ANOVA decomposition can run into conceptual and computational problems when the input variables are dependent. Our main goal here is to show that Shapley value removes the conceptual problems. We do this with some simple examples where Shapley value leads to intuitively reasonable nearly closed form answers.},
	number = {1},
	journal = {SIAM/ASA Journal on Uncertainty Quantification},
	author = {Owen, Art B. and Prieur, Clémentine},
	month = jan,
	year = {2017},
	note = {Publisher: Society for Industrial and Applied Mathematics},
	keywords = {65C20, functional ANOVA, Sobol' indices, variable importance},
	pages = {986--1002},
	file = {Submitted Version:/home/zduey/Zotero/storage/JHABMJXQ/Owen and Prieur - 2017 - On Shapley Value for Measuring Importance of Depen.pdf:application/pdf},
}

@article{song_shapley_2016,
	title = {Shapley {Effects} for {Global} {Sensitivity} {Analysis}: {Theory} and {Computation}},
	volume = {4},
	shorttitle = {Shapley {Effects} for {Global} {Sensitivity} {Analysis}},
	url = {https://epubs.siam.org/doi/abs/10.1137/15M1048070},
	doi = {10.1137/15M1048070},
	abstract = {Variance-based global sensitivity analysis decomposes the variance of the output of a computer model, resulting from uncertainty about the model's inputs, into variance components associated with each input's contribution. The two most common variance-based sensitivity measures, the first-order effects and the total effects, may fail to sum to the total variance. They are often used together in sensitivity analysis, because neither of them adequately deals with interactions in the way the inputs affect the output. Therefore Owen proposed an alternative sensitivity measure, based on the concept of the Shapley value in game theory, and showed it always sums to the correct total variance if inputs are independent. We analyze Owen's measure, which we call the Shapley effect, in the case of dependent inputs. We show empirically how the first-order and total effects, even when used together, may fail to appropriately measure how sensitive the output is to uncertainty in the inputs when there is probabilistic dependence or structural interaction among the inputs. Because they involve all subsets of the inputs, Shapley effects could be expensive to compute if the number of inputs is large. We propose a Monte Carlo algorithm that makes accurate approximation of Shapley effects computationally affordable, and we discuss efficient allocation of the computation budget in this algorithm.},
	number = {1},
	journal = {SIAM/ASA Journal on Uncertainty Quantification},
	author = {Song, Eunhye and Nelson, Barry L. and Staum, Jeremy},
	month = jan,
	year = {2016},
	note = {Publisher: Society for Industrial and Applied Mathematics},
	keywords = {Shapley value, 62K99, 62P30, 68U20, computer experiments, global sensitivity},
	pages = {1060--1083},
}

@techreport{selbst_intuitive_2018,
	address = {Rochester, NY},
	type = {{SSRN} {Scholarly} {Paper}},
	title = {The {Intuitive} {Appeal} of {Explainable} {Machines}},
	url = {https://papers.ssrn.com/abstract=3126971},
	abstract = {Algorithmic decision-making has become synonymous with inexplicable decision-making, but what makes algorithms so difficult to explain? This Article examines what sets machine learning apart from other ways of developing rules for decision-making and the problem these properties pose for explanation. We show that machine learning models can be both inscrutable and nonintuitive and that these are related, but distinct, properties.Calls for explanation have treated these problems as one and the same, but disentangling the two reveals that they demand very different responses. Dealing with inscrutability requires providing a sensible description of the rules; addressing nonintuitiveness requires providing a satisfying explanation for why the rules are what they are. Existing laws like the Fair Credit Reporting Act (FCRA), the Equal Credit Opportunity Act (ECOA), and the General Data Protection Regulation (GDPR), as well as techniques within machine learning, are focused almost entirely on the problem of inscrutability. While such techniques could allow a machine learning system to comply with existing law, doing so may not help if the goal is to assess whether the basis for decision-making is normatively defensible.In most cases, intuition serves as the unacknowledged bridge between a descriptive account and a normative evaluation. But because machine learning is often valued for its ability to uncover statistical relationships that defy intuition, relying on intuition is not a satisfying approach. This Article thus argues for other mechanisms for normative evaluation. To know why the rules are what they are, one must seek explanations of the process behind a model’s development, not just explanations of the model itself.},
	language = {en},
	number = {ID 3126971},
	institution = {Social Science Research Network},
	author = {Selbst, Andrew D. and Barocas, Solon},
	month = mar,
	year = {2018},
	doi = {10.2139/ssrn.3126971},
	keywords = {machine learning, algorithmic accountability, big data, discrimination, explanations, law and technology, privacy},
	file = {Full Text PDF:/home/zduey/Zotero/storage/UYWA9SSJ/Selbst and Barocas - 2018 - The Intuitive Appeal of Explainable Machines.pdf:application/pdf;Snapshot:/home/zduey/Zotero/storage/KM6ZRQFI/papers.html:text/html},
}

@article{hooker_unrestricted_2021,
	title = {Unrestricted {Permutation} forces {Extrapolation}: {Variable} {Importance} {Requires} at least {One} {More} {Model}, or {There} {Is} {No} {Free} {Variable} {Importance}},
	shorttitle = {Unrestricted {Permutation} forces {Extrapolation}},
	url = {http://arxiv.org/abs/1905.03151},
	abstract = {This paper reviews and advocates against the use of permute-and-predict (PaP) methods for interpreting black box functions. Methods such as the variable importance measures proposed for random forests, partial dependence plots, and individual conditional expectation plots remain popular because they are both model-agnostic and depend only on the pre-trained model output, making them computationally efficient and widely available in software. However, numerous studies have found that these tools can produce diagnostics that are highly misleading, particularly when there is strong dependence among features. The purpose of our work here is to (i) review this growing body of literature, (ii) provide further demonstrations of these drawbacks along with a detailed explanation as to why they occur, and (iii) advocate for alternative measures that involve additional modeling. In particular, we describe how breaking dependencies between features in hold-out data places undue emphasis on sparse regions of the feature space by forcing the original model to extrapolate to regions where there is little to no data. We explore these effects across various model setups and find support for previous claims in the literature that PaP metrics can vastly over-emphasize correlated features in both variable importance measures and partial dependence plots. As an alternative, we discuss and recommend more direct approaches that involve measuring the change in model performance after muting the effects of the features under investigation.},
	journal = {arXiv:1905.03151 [cs, stat]},
	author = {Hooker, Giles and Mentch, Lucas and Zhou, Siyu},
	month = oct,
	year = {2021},
	note = {arXiv: 1905.03151},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Statistics - Methodology, 62G08, I.5.1},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/UCG9KCQC/Hooker et al. - 2021 - Unrestricted Permutation forces Extrapolation Var.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/NRWDQU25/1905.html:text/html},
}

@article{covert_understanding_2020,
	title = {Understanding {Global} {Feature} {Contributions} {With} {Additive} {Importance} {Measures}},
	url = {http://arxiv.org/abs/2004.00668},
	abstract = {Understanding the inner workings of complex machine learning models is a long-standing problem and most recent research has focused on local interpretability. To assess the role of individual input features in a global sense, we explore the perspective of defining feature importance through the predictive power associated with each feature. We introduce two notions of predictive power (model-based and universal) and formalize this approach with a framework of additive importance measures, which unifies numerous methods in the literature. We then propose SAGE, a model-agnostic method that quantifies predictive power while accounting for feature interactions. Our experiments show that SAGE can be calculated efficiently and that it assigns more accurate importance values than other methods.},
	journal = {arXiv:2004.00668 [cs, stat]},
	author = {Covert, Ian and Lundberg, Scott and Lee, Su-In},
	month = oct,
	year = {2020},
	note = {arXiv: 2004.00668},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/G9KF2TYD/Covert et al. - 2020 - Understanding Global Feature Contributions With Ad.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/YPWC4IAV/2004.html:text/html},
}

@article{miller_explainable_2017,
	title = {Explainable {AI}: {Beware} of {Inmates} {Running} the {Asylum} {Or}: {How} {I} {Learnt} to {Stop} {Worrying} and {Love} the {Social} and {Behavioural} {Sciences}},
	shorttitle = {Explainable {AI}},
	url = {http://arxiv.org/abs/1712.00547},
	abstract = {In his seminal book `The Inmates are Running the Asylum: Why High-Tech Products Drive Us Crazy And How To Restore The Sanity' [2004, Sams Indianapolis, IN, USA], Alan Cooper argues that a major reason why software is often poorly designed (from a user perspective) is that programmers are in charge of design decisions, rather than interaction designers. As a result, programmers design software for themselves, rather than for their target audience, a phenomenon he refers to as the `inmates running the asylum'. This paper argues that explainable AI risks a similar fate. While the re-emergence of explainable AI is positive, this paper argues most of us as AI researchers are building explanatory agents for ourselves, rather than for the intended users. But explainable AI is more likely to succeed if researchers and practitioners understand, adopt, implement, and improve models from the vast and valuable bodies of research in philosophy, psychology, and cognitive science, and if evaluation of these models is focused more on people than on technology. From a light scan of literature, we demonstrate that there is considerable scope to infuse more results from the social and behavioural sciences into explainable AI, and present some key results from these fields that are relevant to explainable AI.},
	journal = {arXiv:1712.00547 [cs]},
	author = {Miller, Tim and Howe, Piers and Sonenberg, Liz},
	month = dec,
	year = {2017},
	note = {arXiv: 1712.00547},
	keywords = {Computer Science - Artificial Intelligence},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/WXMHL6MU/Miller et al. - 2017 - Explainable AI Beware of Inmates Running the Asyl.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/BQAKBDMF/1712.html:text/html},
}

@inproceedings{kaur_interpreting_2020,
	address = {Honolulu HI USA},
	title = {Interpreting {Interpretability}: {Understanding} {Data} {Scientists}' {Use} of {Interpretability} {Tools} for {Machine} {Learning}},
	isbn = {978-1-4503-6708-0},
	shorttitle = {Interpreting {Interpretability}},
	url = {https://dl.acm.org/doi/10.1145/3313831.3376219},
	doi = {10.1145/3313831.3376219},
	abstract = {Machine learning (ML) models are now routinely deployed in domains ranging from criminal justice to healthcare. With this newfound ubiquity, ML has moved beyond academia and grown into an engineering discipline. To that end, interpretability tools have been designed to help data scientists and machine learning practitioners better understand how ML models work. However, there has been little evaluation of the extent to which these tools achieve this goal. We study data scientists’ use of two existing interpretability tools, the InterpretML implementation of GAMs and the SHAP Python package. We conduct a contextual inquiry (N=11) and a survey (N=197) of data scientists to observe how they use interpretability tools to uncover common issues that arise when building and evaluating ML models. Our results indicate that data scientists over-trust and misuse interpretability tools. Furthermore, few of our participants were able to accurately describe the visualizations output by these tools. We highlight qualitative themes for data scientists’ mental models of interpretability tools. We conclude with implications for researchers and tool designers, and contextualize our ﬁndings in the social science literature.},
	language = {en},
	booktitle = {Proceedings of the 2020 {CHI} {Conference} on {Human} {Factors} in {Computing} {Systems}},
	publisher = {ACM},
	author = {Kaur, Harmanpreet and Nori, Harsha and Jenkins, Samuel and Caruana, Rich and Wallach, Hanna and Wortman Vaughan, Jennifer},
	month = apr,
	year = {2020},
	pages = {1--14},
	file = {Kaur et al. - 2020 - Interpreting Interpretability Understanding Data .pdf:/home/zduey/Zotero/storage/CCK4PPX9/Kaur et al. - 2020 - Interpreting Interpretability Understanding Data .pdf:application/pdf},
}

@article{lage_evaluation_2019,
	title = {An {Evaluation} of the {Human}-{Interpretability} of {Explanation}},
	url = {http://arxiv.org/abs/1902.00006},
	abstract = {Recent years have seen a boom in interest in machine learning systems that can provide a human-understandable rationale for their predictions or decisions. However, exactly what kinds of explanation are truly human-interpretable remains poorly understood. This work advances our understanding of what makes explanations interpretable under three specific tasks that users may perform with machine learning systems: simulation of the response, verification of a suggested response, and determining whether the correctness of a suggested response changes under a change to the inputs. Through carefully controlled human-subject experiments, we identify regularizers that can be used to optimize for the interpretability of machine learning systems. Our results show that the type of complexity matters: cognitive chunks (newly defined concepts) affect performance more than variable repetitions, and these trends are consistent across tasks and domains. This suggests that there may exist some common design principles for explanation systems.},
	journal = {arXiv:1902.00006 [cs, stat]},
	author = {Lage, Isaac and Chen, Emily and He, Jeffrey and Narayanan, Menaka and Kim, Been and Gershman, Sam and Doshi-Velez, Finale},
	month = aug,
	year = {2019},
	note = {arXiv: 1902.00006},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/2VA34WDE/Lage et al. - 2019 - An Evaluation of the Human-Interpretability of Exp.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/UBG5YAST/1902.html:text/html},
}

@article{marx_disentangling_2019,
	title = {Disentangling {Influence}: {Using} {Disentangled} {Representations} to {Audit} {Model} {Predictions}},
	shorttitle = {Disentangling {Influence}},
	url = {http://arxiv.org/abs/1906.08652},
	abstract = {Motivated by the need to audit complex and black box models, there has been extensive research on quantifying how data features influence model predictions. Feature influence can be direct (a direct influence on model outcomes) and indirect (model outcomes are influenced via proxy features). Feature influence can also be expressed in aggregate over the training or test data or locally with respect to a single point. Current research has typically focused on one of each of these dimensions. In this paper, we develop disentangled influence audits, a procedure to audit the indirect influence of features. Specifically, we show that disentangled representations provide a mechanism to identify proxy features in the dataset, while allowing an explicit computation of feature influence on either individual outcomes or aggregate-level outcomes. We show through both theory and experiments that disentangled influence audits can both detect proxy features and show, for each individual or in aggregate, which of these proxy features affects the classifier being audited the most. In this respect, our method is more powerful than existing methods for ascertaining feature influence.},
	journal = {arXiv:1906.08652 [cs, stat]},
	author = {Marx, Charles T. and Phillips, Richard Lanas and Friedler, Sorelle A. and Scheidegger, Carlos and Venkatasubramanian, Suresh},
	month = jun,
	year = {2019},
	note = {arXiv: 1906.08652},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/QTYDVHU4/Marx et al. - 2019 - Disentangling Influence Using Disentangled Repres.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/ULQXUADX/1906.html:text/html},
}

@article{slack_fooling_2020,
	title = {Fooling {LIME} and {SHAP}: {Adversarial} {Attacks} on {Post} hoc {Explanation} {Methods}},
	shorttitle = {Fooling {LIME} and {SHAP}},
	url = {http://arxiv.org/abs/1911.02508},
	abstract = {As machine learning black boxes are increasingly being deployed in domains such as healthcare and criminal justice, there is growing emphasis on building tools and techniques for explaining these black boxes in an interpretable manner. Such explanations are being leveraged by domain experts to diagnose systematic errors and underlying biases of black boxes. In this paper, we demonstrate that post hoc explanations techniques that rely on input perturbations, such as LIME and SHAP, are not reliable. Specifically, we propose a novel scaffolding technique that effectively hides the biases of any given classifier by allowing an adversarial entity to craft an arbitrary desired explanation. Our approach can be used to scaffold any biased classifier in such a way that its predictions on the input data distribution still remain biased, but the post hoc explanations of the scaffolded classifier look innocuous. Using extensive evaluation with multiple real-world datasets (including COMPAS), we demonstrate how extremely biased (racist) classifiers crafted by our framework can easily fool popular explanation techniques such as LIME and SHAP into generating innocuous explanations which do not reflect the underlying biases.},
	journal = {arXiv:1911.02508 [cs, stat]},
	author = {Slack, Dylan and Hilgard, Sophie and Jia, Emily and Singh, Sameer and Lakkaraju, Himabindu},
	month = feb,
	year = {2020},
	note = {arXiv: 1911.02508},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/NJ8V5V7H/Slack et al. - 2020 - Fooling LIME and SHAP Adversarial Attacks on Post.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/K42YQWX3/1911.html:text/html},
}

@article{dhamdhere_shapley_2020,
	title = {The {Shapley} {Taylor} {Interaction} {Index}},
	url = {http://arxiv.org/abs/1902.05622},
	abstract = {The attribution problem, that is the problem of attributing a model's prediction to its base features, is well-studied. We extend the notion of attribution to also apply to feature interactions. The Shapley value is a commonly used method to attribute a model's prediction to its base features. We propose a generalization of the Shapley value called Shapley-Taylor index that attributes the model's prediction to interactions of subsets of features up to some size k. The method is analogous to how the truncated Taylor Series decomposes the function value at a certain point using its derivatives at a different point. In fact, we show that the Shapley Taylor index is equal to the Taylor Series of the multilinear extension of the set-theoretic behavior of the model. We axiomatize this method using the standard Shapley axioms -- linearity, dummy, symmetry and efficiency -- and an additional axiom that we call the interaction distribution axiom. This new axiom explicitly characterizes how interactions are distributed for a class of functions that model pure interaction. We contrast the Shapley-Taylor index against the previously proposed Shapley Interaction index (cf. [9]) from the cooperative game theory literature. We also apply the Shapley Taylor index to three models and identify interesting qualitative insights.},
	journal = {arXiv:1902.05622 [cs, econ]},
	author = {Dhamdhere, Kedar and Agarwal, Ashish and Sundararajan, Mukund},
	month = feb,
	year = {2020},
	note = {arXiv: 1902.05622},
	keywords = {Economics - Theoretical Economics, Computer Science - Computer Science and Game Theory},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/5CEJFHKS/Dhamdhere et al. - 2020 - The Shapley Taylor Interaction Index.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/9F4GQJ8J/1902.html:text/html},
}

@article{chockler_responsibility_2004,
	title = {Responsibility and {Blame}: {A} {Structural}-{Model} {Approach}},
	volume = {22},
	issn = {1076-9757},
	shorttitle = {Responsibility and {Blame}},
	url = {https://jair.org/index.php/jair/article/view/10386},
	doi = {10.1613/jair.1391},
	abstract = {Causality is typically treated an all-or-nothing concept; either A is a cause of B or it is not. We extend the deﬁnition of causality introduced by Halpern and Pearl (2004a) to take into account the degree of responsibility of A for B. For example, if someone wins an election 11–0, then each person who votes for him is less responsible for the victory than if he had won 6–5. We then deﬁne a notion of degree of blame, which takes into account an agent’s epistemic state. Roughly speaking, the degree of blame of A for B is the expected degree of responsibility of A for B, taken over the epistemic state of an agent.},
	language = {en},
	journal = {Journal of Artificial Intelligence Research},
	author = {Chockler, H. and Halpern, J. Y.},
	month = oct,
	year = {2004},
	pages = {93--115},
	file = {Chockler and Halpern - 2004 - Responsibility and Blame A Structural-Model Appro.pdf:/home/zduey/Zotero/storage/SB63F57U/Chockler and Halpern - 2004 - Responsibility and Blame A Structural-Model Appro.pdf:application/pdf},
}

@article{halpern_causes_2005,
	title = {Causes and {Explanations}: {A} {Structural}-{Model} {Approach}. {Part} {II}: {Explanations}},
	volume = {56},
	issn = {0007-0882},
	shorttitle = {Causes and {Explanations}},
	url = {https://www.jstor.org/stable/3541871},
	abstract = {We propose new definitions of (causal) explanation, using structural equations to model counterfactuals. The definition is based on the notion of actual cause, as defined and motivated in a companion article. Essentially, an explanation is a fact that is not known for certain but, if found to be true, would constitute an actual cause of the fact to be explained, regardless of the agent's initial uncertainty. We show that the definition handles well a number of problematic examples from the literature.},
	number = {4},
	journal = {The British Journal for the Philosophy of Science},
	author = {Halpern, Joseph Y. and Pearl, Judea},
	year = {2005},
	note = {Publisher: [Oxford University Press, The British Society for the Philosophy of Science]},
	pages = {889--911},
}

@article{halpern_causes_2005-1,
	title = {Causes and {Explanations}: {A} {Structural}-{Model} {Approach}. {Part} {I}: {Causes}},
	volume = {56},
	issn = {0007-0882},
	shorttitle = {Causes and {Explanations}},
	url = {http://www.jstor.org/stable/3541870},
	abstract = {We propose a new definition of actual causes, using structural equations to model counterfactuals. We show that the definition yields a plausible and elegant account of causation that handles well examples which have caused problems for other definitions and resolves major difficulties in the traditional account.},
	number = {4},
	journal = {The British Journal for the Philosophy of Science},
	author = {Halpern, Joseph Y. and Pearl, Judea},
	year = {2005},
	note = {Publisher: [Oxford University Press, The British Society for the Philosophy of Science]},
	pages = {843--887},
	file = {JSTOR Full Text PDF:/home/zduey/Zotero/storage/P52TA4EX/Halpern and Pearl - 2005 - Causes and Explanations A Structural-Model Approa.pdf:application/pdf},
}

@article{strumbelj_efficient_2010,
	title = {An {Efficient} {Explanation} of {Individual} {Classifications} using {Game} {Theory}},
	volume = {11},
	issn = {1532-4435},
	abstract = {We present a general method for explaining individual predictions of classification models. The method is based on fundamental concepts from coalitional game theory and predictions are explained with contributions of individual feature values. We overcome the method's initial exponential time complexity with a sampling-based approximation. In the experimental part of the paper we use the developed method on models generated by several well-known machine learning algorithms on both synthetic and real-world data sets. The results demonstrate that the method is efficient and that the explanations are intuitive and useful.},
	journal = {The Journal of Machine Learning Research},
	author = {Strumbelj, Erik and Kononenko, Igor},
	month = mar,
	year = {2010},
	pages = {1--18},
	file = {Full Text PDF:/home/zduey/Zotero/storage/AFHUK3KC/Strumbelj and Kononenko - 2010 - An Efficient Explanation of Individual Classificat.pdf:application/pdf},
}

@article{lipton_mythos_2017,
	title = {The {Mythos} of {Model} {Interpretability}},
	url = {http://arxiv.org/abs/1606.03490},
	abstract = {Supervised machine learning models boast remarkable predictive capabilities. But can you trust your model? Will it work in deployment? What else can it tell you about the world? We want models to be not only good, but interpretable. And yet the task of interpretation appears underspecified. Papers provide diverse and sometimes non-overlapping motivations for interpretability, and offer myriad notions of what attributes render models interpretable. Despite this ambiguity, many papers proclaim interpretability axiomatically, absent further explanation. In this paper, we seek to refine the discourse on interpretability. First, we examine the motivations underlying interest in interpretability, finding them to be diverse and occasionally discordant. Then, we address model properties and techniques thought to confer interpretability, identifying transparency to humans and post-hoc explanations as competing notions. Throughout, we discuss the feasibility and desirability of different notions, and question the oft-made assertions that linear models are interpretable and that deep neural networks are not.},
	journal = {arXiv:1606.03490 [cs, stat]},
	author = {Lipton, Zachary C.},
	month = mar,
	year = {2017},
	note = {arXiv: 1606.03490},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Neural and Evolutionary Computing},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/E25VGWID/Lipton - 2017 - The Mythos of Model Interpretability.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/TLJT62DT/1606.html:text/html},
}

@article{chen_l-shapley_2018,
	title = {L-{Shapley} and {C}-{Shapley}: {Efficient} {Model} {Interpretation} for {Structured} {Data}},
	shorttitle = {L-{Shapley} and {C}-{Shapley}},
	url = {http://arxiv.org/abs/1808.02610},
	abstract = {We study instancewise feature importance scoring as a method for model interpretation. Any such method yields, for each predicted instance, a vector of importance scores associated with the feature vector. Methods based on the Shapley score have been proposed as a fair way of computing feature attributions of this kind, but incur an exponential complexity in the number of features. This combinatorial explosion arises from the definition of the Shapley value and prevents these methods from being scalable to large data sets and complex models. We focus on settings in which the data have a graph structure, and the contribution of features to the target variable is well-approximated by a graph-structured factorization. In such settings, we develop two algorithms with linear complexity for instancewise feature importance scoring. We establish the relationship of our methods to the Shapley value and another closely related concept known as the Myerson value from cooperative game theory. We demonstrate on both language and image data that our algorithms compare favorably with other methods for model interpretation.},
	journal = {arXiv:1808.02610 [cs, stat]},
	author = {Chen, Jianbo and Song, Le and Wainwright, Martin J. and Jordan, Michael I.},
	month = aug,
	year = {2018},
	note = {arXiv: 1808.02610},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/ENIL4AB9/Chen et al. - 2018 - L-Shapley and C-Shapley Efficient Model Interpret.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/TUMBZIHT/1808.html:text/html},
}

@article{rudin_stop_2019,
	title = {Stop {Explaining} {Black} {Box} {Machine} {Learning} {Models} for {High} {Stakes} {Decisions} and {Use} {Interpretable} {Models} {Instead}},
	url = {http://arxiv.org/abs/1811.10154},
	abstract = {Black box machine learning models are currently being used for high stakes decision-making throughout society, causing problems throughout healthcare, criminal justice, and in other domains. People have hoped that creating methods for explaining these black box models will alleviate some of these problems, but trying to {\textbackslash}textit\{explain\} black box models, rather than creating models that are {\textbackslash}textit\{interpretable\} in the first place, is likely to perpetuate bad practices and can potentially cause catastrophic harm to society. There is a way forward -- it is to design models that are inherently interpretable. This manuscript clarifies the chasm between explaining black boxes and using inherently interpretable models, outlines several key reasons why explainable black boxes should be avoided in high-stakes decisions, identifies challenges to interpretable machine learning, and provides several example applications where interpretable models could potentially replace black box models in criminal justice, healthcare, and computer vision.},
	journal = {arXiv:1811.10154 [cs, stat]},
	author = {Rudin, Cynthia},
	month = sep,
	year = {2019},
	note = {arXiv: 1811.10154},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/ELE5BJLL/Rudin - 2019 - Stop Explaining Black Box Machine Learning Models .pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/3LWSNIC9/1811.html:text/html},
}

@article{gromping_estimators_2007,
	title = {Estimators of {Relative} {Importance} in {Linear} {Regression} {Based} on {Variance} {Decomposition}},
	volume = {61},
	issn = {0003-1305},
	url = {https://www.jstor.org/stable/27643865},
	abstract = {Assigning shares of "relative importance" to each of a set of regressors is one of the key goals of researchers applying linear regression, particularly in sciences that work with observational data. Although the topic is quite old, advances in computational capabilities have led to increased applications of computer-intensive methods like averaging over orderings that enable a reasonable decomposition of the model variance. This article serves two purposes: to reconcile the large and somewhat fragmented body of recent literature on relative importance and to investigate the theoretical and empirical properties of the key competitors for decomposition of model variance.},
	number = {2},
	journal = {The American Statistician},
	author = {Grömping, Ulrike},
	year = {2007},
	note = {Publisher: [American Statistical Association, Taylor \& Francis, Ltd.]},
	pages = {139--147},
}

@article{sun_axiomatic_2011,
	title = {Axiomatic {Attribution} for {Multilinear} {Functions}},
	url = {http://arxiv.org/abs/1102.0989},
	doi = {10.1145/1993574.1993601},
	abstract = {We study the attribution problem, that is, the problem of attributing a change in the value of a characteristic function to its independent variables. We make three contributions. First, we propose a formalization of the problem based on a standard cost sharing model. Second, we show that there is a unique attribution method that satisfies Dummy, Additivity, Conditional Nonnegativity, Affine Scale Invariance, and Anonymity for all characteristic functions that are the sum of a multilinear function and an additive function. We term this the Aumann-Shapley-Shubik method. Conversely, we show that such a uniqueness result does not hold for characteristic functions outside this class. Third, we study multilinear characteristic functions in detail; we describe a computationally efficient implementation of the Aumann-Shapley-Shubik method and discuss practical applications to pay-per-click advertising and portfolio analysis.},
	journal = {Proceedings of the 12th ACM conference on Electronic commerce - EC '11},
	author = {Sun, Yi and Sundararajan, Mukund},
	year = {2011},
	note = {arXiv: 1102.0989},
	keywords = {Computer Science - Computer Science and Game Theory, J.4, K.6.0},
	pages = {177},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/2DJ32IXB/Sun and Sundararajan - 2011 - Axiomatic Attribution for Multilinear Functions.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/67DGTD68/1102.html:text/html},
}

@article{janzing_quantifying_2021,
	title = {Quantifying intrinsic causal contributions via structure preserving interventions},
	url = {http://arxiv.org/abs/2007.00714},
	abstract = {We propose a new notion of causal contribution which describes the 'intrinsic' part of the contribution of a node on a target node in a DAG. We show that in some scenarios the existing causal quantification methods failed to capture this notion exactly. By recursively writing each node as a function of the upstream noise terms, we separate the intrinsic information added by each node from the one obtained from its ancestors. To interpret the intrinsic information as a causal contribution, we consider 'structure-preserving interventions' that randomize each node in a way that mimics the usual dependence on the parents and do not perturb the observed joint distribution. To get a measure that is invariant across arbitrary orderings of nodes we propose Shapley based symmetrization. We describe our contribution analysis for variance and entropy, but contributions for other target metrics can be defined analogously.},
	journal = {arXiv:2007.00714 [cs, math, stat]},
	author = {Janzing, Dominik and Blöbaum, Patrick and Minorics, Lenon and Faller, Philipp and Mastakouri, Atalanti},
	month = nov,
	year = {2021},
	note = {arXiv: 2007.00714},
	keywords = {Computer Science - Artificial Intelligence, Statistics - Machine Learning, Computer Science - Information Theory},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/9DNVNPL9/Janzing et al. - 2021 - Quantifying intrinsic causal contributions via str.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/5YSWDAV7/2007.html:text/html},
}

@article{mittelstadt_explaining_2019,
	title = {Explaining {Explanations} in {AI}},
	url = {http://arxiv.org/abs/1811.01439},
	doi = {10.1145/3287560.3287574},
	abstract = {Recent work on interpretability in machine learning and AI has focused on the building of simplified models that approximate the true criteria used to make decisions. These models are a useful pedagogical device for teaching trained professionals how to predict what decisions will be made by the complex system, and most importantly how the system might break. However, when considering any such model it's important to remember Box's maxim that "All models are wrong but some are useful." We focus on the distinction between these models and explanations in philosophy and sociology. These models can be understood as a "do it yourself kit" for explanations, allowing a practitioner to directly answer "what if questions" or generate contrastive explanations without external assistance. Although a valuable ability, giving these models as explanations appears more difficult than necessary, and other forms of explanation may not have the same trade-offs. We contrast the different schools of thought on what makes an explanation, and suggest that machine learning might benefit from viewing the problem more broadly.},
	journal = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
	author = {Mittelstadt, Brent and Russell, Chris and Wachter, Sandra},
	month = jan,
	year = {2019},
	note = {arXiv: 1811.01439},
	keywords = {Computer Science - Artificial Intelligence},
	pages = {279--288},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/PAH4X62E/Mittelstadt et al. - 2019 - Explaining Explanations in AI.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/U5FHLQII/1811.html:text/html},
}

@article{kilbertus_avoiding_2018,
	title = {Avoiding {Discrimination} through {Causal} {Reasoning}},
	url = {http://arxiv.org/abs/1706.02744},
	abstract = {Recent work on fairness in machine learning has focused on various statistical discrimination criteria and how they trade off. Most of these criteria are observational: They depend only on the joint distribution of predictor, protected attribute, features, and outcome. While convenient to work with, observational criteria have severe inherent limitations that prevent them from resolving matters of fairness conclusively. Going beyond observational criteria, we frame the problem of discrimination based on protected attributes in the language of causal reasoning. This viewpoint shifts attention from "What is the right fairness criterion?" to "What do we want to assume about the causal data generating process?" Through the lens of causality, we make several contributions. First, we crisply articulate why and when observational criteria fail, thus formalizing what was before a matter of opinion. Second, our approach exposes previously ignored subtleties and why they are fundamental to the problem. Finally, we put forward natural causal non-discrimination criteria and develop algorithms that satisfy them.},
	journal = {arXiv:1706.02744 [cs, stat]},
	author = {Kilbertus, Niki and Rojas-Carulla, Mateo and Parascandolo, Giambattista and Hardt, Moritz and Janzing, Dominik and Schölkopf, Bernhard},
	month = jan,
	year = {2018},
	note = {arXiv: 1706.02744},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Computers and Society},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/2FZKDFK4/Kilbertus et al. - 2018 - Avoiding Discrimination through Causal Reasoning.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/Q42MSMSH/1706.html:text/html},
}

@article{frye_shapley_2021-1,
	title = {Shapley explainability on the data manifold},
	url = {http://arxiv.org/abs/2006.01272},
	abstract = {Explainability in AI is crucial for model development, compliance with regulation, and providing operational nuance to predictions. The Shapley framework for explainability attributes a model's predictions to its input features in a mathematically principled and model-agnostic way. However, general implementations of Shapley explainability make an untenable assumption: that the model's features are uncorrelated. In this work, we demonstrate unambiguous drawbacks of this assumption and develop two solutions to Shapley explainability that respect the data manifold. One solution, based on generative modelling, provides flexible access to data imputations; the other directly learns the Shapley value-function, providing performance and stability at the cost of flexibility. While "off-manifold" Shapley values can (i) give rise to incorrect explanations, (ii) hide implicit model dependence on sensitive attributes, and (iii) lead to unintelligible explanations in higher-dimensional data, on-manifold explainability overcomes these problems.},
	journal = {arXiv:2006.01272 [cs, stat]},
	author = {Frye, Christopher and de Mijolla, Damien and Begley, Tom and Cowton, Laurence and Stanley, Megan and Feige, Ilya},
	month = feb,
	year = {2021},
	note = {arXiv: 2006.01272},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {arXiv Fulltext PDF:/home/zduey/Zotero/storage/33B6A2PZ/Frye et al. - 2021 - Shapley explainability on the data manifold.pdf:application/pdf;arXiv.org Snapshot:/home/zduey/Zotero/storage/9ZA3QR78/2006.html:text/html},
}

@article{castro_polynomial_2009,
	title = {Polynomial calculation of the {Shapley} value based on sampling},
	volume = {36},
	issn = {03050548},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0305054808000804},
	doi = {10.1016/j.cor.2008.04.004},
	abstract = {In this paper we develop a polynomial method based on sampling theory that can be used to estimate the Shapley value (or any semivalue) for cooperative games. Besides analyzing the complexity problem, we examine some desirable statistical properties of the proposed approach and provide some computational results.},
	language = {en},
	number = {5},
	journal = {Computers \& Operations Research},
	author = {Castro, Javier and Gómez, Daniel and Tejada, Juan},
	month = may,
	year = {2009},
	pages = {1726--1730},
	file = {Castro et al. - 2009 - Polynomial calculation of the Shapley value based .pdf:/home/zduey/Zotero/storage/9EFHG6XH/Castro et al. - 2009 - Polynomial calculation of the Shapley value based .pdf:application/pdf},
}

@incollection{shapley_value_1953,
	address = {Princeton, NJ},
	title = {A {Value} for n-{Person} {Games}},
	volume = {2},
	booktitle = {Contributions to the {Theory} of {Games}},
	publisher = {Princeton University Press},
	author = {Shapley, L},
	year = {1953},
	pages = {307--317},
}

@book{pearl_causal_2016,
	title = {Causal {Inference} in {Statistics}: {A} {Primer}},
	publisher = {Wiley},
	author = {Pearl, Judea and Glymour, Madelyn and Jewell, Nicholas P.},
	year = {2016},
}

@book{pearl_causality_2009,
	edition = {Second},
	title = {Causality: {Models}, {Reasoning}, and {Inference}},
	isbn = {978-0-521-89560-6},
	publisher = {Cambridge University Press},
	author = {Pearl, Judea},
	year = {2009},
}

@book{peters_elements_2017,
	address = {Cambride, MA},
	title = {Elements of {Causal} {Inference}},
	isbn = {978-0-262-03731-0},
	publisher = {The MIT Press},
	author = {Peters, Jonas and Janzing, Dominik and Schölkopf, Bernhard},
	year = {2017},
}

@book{cunningham_causal_2021,
	address = {New Haven, CT},
	title = {Causal {Inference}: {The} {Mixtape}},
	isbn = {978-0-300-25168-5},
	publisher = {Yale University Press},
	author = {Cunningham, Scott},
	year = {2021},
}

@article{dominguez-olmedo_adversarial_nodate,
	title = {On the {Adversarial} {Robustness} of {Causal} {Algorithmic} {Recourse}},
	abstract = {Algorithmic recourse seeks to provide actionable recommendations for individuals to overcome unfavorable outcomes made by automated decision-making systems. The individual then exerts time and effort to positively change their circumstances. Recourse recommendations should ideally be robust to reasonably small changes in the circumstances of the individual seeking recourse. In this work, we formulate the adversarially robust recourse problem and show that methods that offer minimally costly recourse fail to be robust. We restrict ourselves to linear classiﬁers, and show that the adversarially robust recourse problem reduces to the standard recourse problem for some modiﬁed classiﬁer with a shifted decision boundary. Finally, we derive bounds on the extra cost incurred by individuals seeking robust recourse, and discuss how to regulate this cost between the individual and the decision-maker.},
	language = {en},
	author = {Dominguez-Olmedo, Ricardo and Karimi, Amir-Hossein and Schölkopf, Bernhard},
	pages = {11},
	file = {Dominguez-Olmedo et al. - On the Adversarial Robustness of Causal Algorithmi.pdf:/home/zduey/Zotero/storage/KZ7P6J83/Dominguez-Olmedo et al. - On the Adversarial Robustness of Causal Algorithmi.pdf:application/pdf},
}

@article{karimi_algorithmic_2020,
	title = {Algorithmic {Recourse}: from {Counterfactual} {Explanations} to {Interventions}},
	shorttitle = {Algorithmic {Recourse}},
	url = {http://arxiv.org/abs/2002.06278},
	abstract = {As machine learning is increasingly used to inform consequential decision-making (e.g., pre-trial bail and loan approval), it becomes important to explain how the system arrived at its decision, and also suggest actions to achieve a favorable decision. Counterfactual explanations –“how the world would have (had) to be different for a desirable outcome to occur”– aim to satisfy these criteria. Existing works have primarily focused on designing algorithms to obtain counterfactual explanations for a wide range of settings. However, it has largely been overlooked that ultimately, one of the main objectives is to allow people to act rather than just understand. In layman’s terms, counterfactual explanations inform an individual where they need to get to, but not how to get there. In this work, we rely on causal reasoning to caution against the use of counterfactual explanations as a recommendable set of actions for recourse. Instead, we propose a shift of paradigm from recourse via nearest counterfactual explanations to recourse through minimal interventions, shifting the focus from explanations to interventions.},
	language = {en},
	journal = {arXiv:2002.06278 [cs, stat]},
	author = {Karimi, Amir-Hossein and Schölkopf, Bernhard and Valera, Isabel},
	month = oct,
	year = {2020},
	note = {arXiv: 2002.06278},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {Karimi et al. - 2020 - Algorithmic Recourse from Counterfactual Explanat.pdf:/home/zduey/Zotero/storage/FNAPQ3PK/Karimi et al. - 2020 - Algorithmic Recourse from Counterfactual Explanat.pdf:application/pdf},
}

@book{lindeman_introduction_1980,
	address = {Glenview, IL},
	title = {Introduction to {Bivariate} and {Multivariate} {Analysis}},
	author = {Lindeman, Richard and Merrenda, Peter and Gold, Ruth},
	year = {1980},
}

@article{stufken_hierarchical_1992,
	title = {On {Hierarchical} {Partitioning}},
	volume = {46},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2684415},
	number = {1},
	journal = {The American Statistician},
	author = {Stufken, John},
	year = {1992},
	note = {Publisher: [American Statistical Association, Taylor \& Francis, Ltd.]},
	pages = {70--71},
	file = {JSTOR Full Text PDF:/home/zduey/Zotero/storage/GXWILIRU/Dallal et al. - 1992 - Letters to the Editor.pdf:application/pdf},
}

@article{feldman_proportional_2005,
	title = {The {Proportional} {Value} of a {Cooperative} {Game}},
	abstract = {The proportional value is the unique strictly consistent TU and NTU value which, in two-player TU games, gives players equal proportional gains from cooperation. Strict consistency means consistency with respect to the Hart and Mas-Colell (1989) reduced game. The proportional value is a nonlinear analog of the Shapley (1953) value in TU games and the egalitarian value (Kalai and Samet (1985)) in NTU games. It is derived from a ratio potential similar to the Hart and Mas-Colell (1989) diﬀerence potential. The proportional value is monotonic and is in the core of a log-convex game. It is also the unique equilibrium payoﬀ conﬁguration in a variation of the noncooperative bargaining game of Hart and Mas-Colell (1996) where players’ probabilities of participation at any point in the game are proportional to their expected payoﬀ at that time. Thus, it is a model of endogenous power in cooperative games. Application to cost allocation problems is considered.},
	language = {en},
	author = {Feldman, Barry},
	year = {2005},
	pages = {30},
	file = {Feldman - The Proportional Value of a Cooperative Game.pdf:/home/zduey/Zotero/storage/H6746EMN/Feldman - The Proportional Value of a Cooperative Game.pdf:application/pdf},
}

@article{williamson_efficient_2020,
	title = {Efficient nonparametric statistical inference on population feature importance using {Shapley} values},
	url = {http://arxiv.org/abs/2006.09481},
	abstract = {The true population-level importance of a variable in a prediction task provides useful knowledge about the underlying data-generating mechanism and can help in deciding which measurements to collect in subsequent experiments. Valid statistical inference on this importance is a key component in understanding the population of interest. We present a computationally efﬁcient procedure for estimating and obtaining valid statistical inference on the Shapley Population Variable Importance Measure (SPVIM). Although the computational complexity of the true SPVIM scales exponentially with the number of variables, we propose an estimator based on randomly sampling only Θ(n) feature subsets given n observations. We prove that our estimator converges at an asymptotically optimal rate. Moreover, by deriving the asymptotic distribution of our estimator, we construct valid conﬁdence intervals and hypothesis tests. Our procedure has good ﬁnite-sample performance in simulations, and for an in-hospital mortality prediction task produces similar variable importance estimates when different machine learning algorithms are applied.},
	language = {en},
	journal = {arXiv:2006.09481 [stat]},
	author = {Williamson, Brian D. and Feng, Jean},
	month = jun,
	year = {2020},
	note = {arXiv: 2006.09481},
	keywords = {Statistics - Machine Learning, Statistics - Methodology},
	file = {Williamson and Feng - 2020 - Efficient nonparametric statistical inference on p.pdf:/home/zduey/Zotero/storage/2G9QDMG2/Williamson and Feng - 2020 - Efficient nonparametric statistical inference on p.pdf:application/pdf},
}

@article{covert_explaining_2020,
	title = {Explaining by {Removing}: {A} {Uniﬁed} {Framework} for {Model} {Explanation}},
	abstract = {Researchers have proposed a wide variety of model explanation approaches, but it remains unclear how most methods are related or when one method is preferable to another. We describe a new uniﬁed class of methods, removal-based explanations, that are based on the principle of simulating feature removal to quantify each feature’s inﬂuence. These methods vary in several respects, so we develop a framework that characterizes each method along three dimensions: 1) how the method removes features, 2) what model behavior the method explains, and 3) how the method summarizes each feature’s inﬂuence. Our framework uniﬁes 26 existing methods, including several of the most widely used approaches: SHAP, LIME, Meaningful Perturbations, and permutation tests. This newly understood class of explanation methods has rich connections that we examine using tools that have been largely overlooked by the explainability literature. To anchor removal-based explanations in cognitive psychology, we show that feature removal is a simple application of subtractive counterfactual reasoning. Ideas from cooperative game theory shed light on the relationships and trade-oﬀs among diﬀerent methods, and we derive conditions under which all removal-based explanations have information-theoretic interpretations. Through this analysis, we develop a uniﬁed framework that helps practitioners better understand model explanation tools, and that oﬀers a strong theoretical foundation upon which future explainability research can build.},
	language = {en},
	journal =  {arXiv:2011.14878 [cs]},
	author = {Covert, Ian C},
    year = {2020},
	pages = {90},
	file = {Covert - Explaining by Removing A Uniﬁed Framework for Mod.pdf:/home/zduey/Zotero/storage/CBIS6B8C/Covert - Explaining by Removing A Uniﬁed Framework for Mod.pdf:application/pdf},
}

@article{beckers_causal_2022,
	title = {Causal {Explanations} and {XAI}},
	url = {http://arxiv.org/abs/2201.13169},
	abstract = {Although standard Machine Learning models are optimized for making predictions about observations, more and more they are used for making predictions about the results of actions. An important goal of Explainable Artiﬁcial Intelligence (XAI) is to compensate for this mismatch by offering explanations about the predictions of an ML-model which ensure that they are reliably action-guiding. As action-guiding explanations are causal explanations, the literature on this topic is starting to embrace insights from the literature on causal models. Here I take a step further down this path by formally deﬁning the causal notions of sufﬁcient explanations and counterfactual explanations. I show how these notions relate to (and improve upon) existing work, and motivate their adequacy by illustrating how different explanations are action-guiding under different circumstances. Moreover, this work is the ﬁrst to offer a formal deﬁnition of actual causation that is founded entirely in action-guiding explanations. Although the deﬁnitions are motivated by a focus on XAI, the analysis of causal explanation and actual causation applies in general. I also touch upon the signiﬁcance of this work for fairness in AI by showing how actual causation can be used to improve the idea of path-speciﬁc counterfactual fairness.},
	language = {en},
	journal = {arXiv:2201.13169 [cs]},
	author = {Beckers, Sander},
	month = feb,
	year = {2022},
	note = {arXiv: 2201.13169},
	keywords = {Computer Science - Artificial Intelligence},
	file = {Beckers - 2022 - Causal Explanations and XAI.pdf:/home/zduey/Zotero/storage/4PW6ECDH/Beckers - 2022 - Causal Explanations and XAI.pdf:application/pdf},
}

@article{pawelczyk_carla_2021,
	title = {{CARLA}: {A} {Python} {Library} to {Benchmark} {Algorithmic} {Recourse} and {Counterfactual} {Explanation} {Algorithms}},
	shorttitle = {{CARLA}},
	url = {http://arxiv.org/abs/2108.00783},
	abstract = {Counterfactual explanations provide means for prescriptive model explanations by suggesting actionable feature changes (e.g., increase income) that allow individuals to achieve favourable outcomes in the future (e.g., insurance approval). Choosing an appropriate method is a crucial aspect for meaningful counterfactual explanations. As documented in recent reviews, there exists a quickly growing literature with available methods. Yet, in the absence of widely available open–source implementations, the decision in favour of certain models is primarily based on what is readily available. Going forward – to guarantee meaningful comparisons across explanation methods – we present CARLA (Counterfactual And Recourse LibrAry), a python library for benchmarking counterfactual explanation methods across both different data sets and different machine learning models. In summary, our work provides the following contributions: (i) an extensive benchmark of 11 popular counterfactual explanation methods, (ii) a benchmarking framework for research on future counterfactual explanation methods, and (iii) a standardized set of integrated evaluation measures and data sets for transparent and extensive comparisons of these methods. We have open sourced CARLA and our experimental results on Github, making them available as competitive baselines. We welcome contributions from other research groups and practitioners.},
	language = {en},
	urldate = {2022-04-30},
	journal = {arXiv:2108.00783 [cs]},
	author = {Pawelczyk, Martin and Bielawski, Sascha and Heuvel, Johannes van den and Richter, Tobias and Kasneci, Gjergji},
	month = aug,
	year = {2021},
	note = {arXiv: 2108.00783},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning},
	file = {Pawelczyk et al. - 2021 - CARLA A Python Library to Benchmark Algorithmic R.pdf:/home/zduey/Zotero/storage/YC3TQ5C7/Pawelczyk et al. - 2021 - CARLA A Python Library to Benchmark Algorithmic R.pdf:application/pdf},
}