Skip to content

Commit

Permalink
RuleMiner now also uses the previously introduced Constraints class t…
Browse files Browse the repository at this point in the history
…o apply rule consequent item constraints.

FP-growth-based constrained association rule mining is now complete. Next up: FP-stream!
  • Loading branch information
wimleers committed Mar 1, 2011
1 parent 70f723b commit 89c7b20
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 48 deletions.
27 changes: 7 additions & 20 deletions code/Analytics/Analyst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ namespace Analytics {
* The constraint type.
*/
void Analyst::addFrequentItemsetItemConstraint(ItemName item, ItemConstraintType type) {
if (!this->frequentItemsetItemConstraints.contains(type))
this->frequentItemsetItemConstraints.insert(type, QSet<ItemName>());
this->frequentItemsetItemConstraints[type].insert(item);
this->frequentItemsetItemConstraints.addItemConstraint(item, type);
}

/**
Expand All @@ -40,11 +38,9 @@ namespace Analytics {
// frequent itemsets.
// By also applying these item constraints to frequent itemset
// generation, we reduce the amount of work to be done to a minimum.
this->addFrequentItemsetItemConstraint(item, type);
this->frequentItemsetItemConstraints.addItemConstraint(item, type);

if (!this->ruleConsequentItemConstraints.contains(type))
this->ruleConsequentItemConstraints.insert(type, QSet<ItemName>());
this->ruleConsequentItemConstraints[type].insert(item);
this->ruleConsequentItemConstraints.addItemConstraint(item, type);
}


Expand All @@ -62,27 +58,18 @@ namespace Analytics {
void Analyst::performMining(const QList<QStringList> & transactions) {
qDebug() << "starting mining, # transactions: " << transactions.size();
FPGrowth * fpgrowth = new FPGrowth(transactions, ceil(this->minSupport * 4000));
for (int type = CONSTRAINT_POSITIVE_MATCH_ALL; type <= CONSTRAINT_NEGATIVE_MATCH_ANY; type++)
fpgrowth->setItemConstraints(this->frequentItemsetItemConstraints[(ItemConstraintType) type], (ItemConstraintType) type);
fpgrowth->setConstraints(this->frequentItemsetItemConstraints);
fpgrowth->setConstraintsToPreprocess(this->ruleConsequentItemConstraints);
QList<ItemList> frequentItemsets = fpgrowth->mineFrequentItemsets();
qDebug() << "frequent itemset mining complete, # frequent itemsets:" << frequentItemsets.size();

ItemList requirements;
foreach (ItemName name, this->ruleConsequentItemConstraints[CONSTRAINT_POSITIVE_MATCH_ANY]) {
#ifdef DEBUG
requirements.append(Item(fpgrowth->getItemID(name), fpgrowth->getItemIDNameHash()));
#else
requirements.append(Item(fpgrowth->getItemID(name)));
#endif
}

QList<AssociationRule> associationRules = RuleMiner::mineAssociationRules(frequentItemsets, this->minConfidence, requirements, fpgrowth);
this->ruleConsequentItemConstraints = fpgrowth->getPreprocessedConstraints();
QList<AssociationRule> associationRules = RuleMiner::mineAssociationRules(frequentItemsets, this->minConfidence, this->ruleConsequentItemConstraints, fpgrowth);
qDebug() << "mining association rules complete, # association rules:" << associationRules.size();

qDebug() << associationRules;

delete fpgrowth;
// exit(1);
}

}
5 changes: 3 additions & 2 deletions code/Analytics/Analyst.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <QStringList>

#include "Item.h"
#include "Constraints.h"
#include "FPGrowth.h"
#include "RuleMiner.h"
#include "FPNode.h"
Expand All @@ -32,8 +33,8 @@ namespace Analytics {
float minSupport;
float minConfidence;

QHash<ItemConstraintType, QSet<ItemName> > frequentItemsetItemConstraints;
QHash<ItemConstraintType, QSet<ItemName> > ruleConsequentItemConstraints;
Constraints frequentItemsetItemConstraints;
Constraints ruleConsequentItemConstraints;
};
}

Expand Down
4 changes: 4 additions & 0 deletions code/Analytics/Constraints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ namespace Analytics {
ItemConstraintType constraintType;
for (int i = CONSTRAINT_POSITIVE_MATCH_ALL; i <= CONSTRAINT_NEGATIVE_MATCH_ANY; i++) {
constraintType = (ItemConstraintType) i;

if (!this->itemConstraints.contains(constraintType))
continue;

foreach (ItemName constraint, this->itemConstraints[constraintType]) {
// Map ItemNames to ItemIDs.
if (constraint.compare(name) == 0) {
Expand Down
26 changes: 6 additions & 20 deletions code/Analytics/FPGrowth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,6 @@ namespace Analytics {
delete this->tree;
}

/**
* Set the requirements for frequent itemset. Wildcards are allowed, e.g.
* "episode:*" will match "episode:foo", "episode:bar", etc.
*
* Note: wilcard items will be expanded to their corresponding item ids in
* FPGrowth::scanTransactions().
*
* @param contraints
* A list of constraints.
* @param type
* The item constraint type.
*/
void FPGrowth::setItemConstraints(const QSet<ItemName> & constraints, ItemConstraintType type) {
this->constraints.setItemConstraints(constraints, type);
}

/**
* Mine frequent itemsets. (First scan the transactions, then build the
* FP-tree, then generate the frequent itemsets from there.)
Expand Down Expand Up @@ -267,6 +251,7 @@ namespace Analytics {

// Consider this item for use with constraints.
this->constraints.preprocessItem(itemName, itemID);
this->constraintsToPreprocess.preprocessItem(itemName, itemID);
}
else
itemID = this->itemNameIDHash[itemName];
Expand All @@ -276,13 +261,14 @@ namespace Analytics {
}

// Discard infrequent items' SupportCount.
foreach (ItemID id, this->totalFrequentSupportCounts.keys()) {
if (this->totalFrequentSupportCounts[id] < this->minSupportAbsolute) {
this->totalFrequentSupportCounts.remove(id);
foreach (itemID, this->totalFrequentSupportCounts.keys()) {
if (this->totalFrequentSupportCounts[itemID] < this->minSupportAbsolute) {
this->totalFrequentSupportCounts.remove(itemID);

// Remove infrequent items' ids from the preprocessed
// constraints.
this->constraints.removeItem(id);
this->constraints.removeItem(itemID);
this->constraintsToPreprocess.removeItem(itemID);
}
}

Expand Down
7 changes: 5 additions & 2 deletions code/Analytics/FPGrowth.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ namespace Analytics {
FPGrowth(const QList<QStringList> & transactions, SupportCount minSupportAbsolute);
~FPGrowth();

// Core functionality.
void setItemConstraints(const QSet<ItemName> & constraints, ItemConstraintType type);
void setConstraints(const Constraints & constraints) { this->constraints = constraints; }
void setConstraintsToPreprocess(const Constraints & constraints) { this->constraintsToPreprocess = constraints; }
const Constraints & getPreprocessedConstraints() const { return this->constraintsToPreprocess; }

QList<ItemList> mineFrequentItemsets();

// Ability to calculate support for any itemset; necessary to
Expand Down Expand Up @@ -59,6 +61,7 @@ namespace Analytics {
// Properties.
FPTree * tree;
Constraints constraints;
Constraints constraintsToPreprocess;
QList<QStringList> transactions;

SupportCount minSupportAbsolute;
Expand Down
6 changes: 3 additions & 3 deletions code/Analytics/RuleMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ namespace Analytics {
/**
* An exact implementation of algorithm 6.2 on page 351 in the textbook.
*/
QList<AssociationRule> RuleMiner::mineAssociationRules(QList<ItemList> frequentItemsets, float minimumConfidence, ItemList ruleConsequentRequirements, const FPGrowth * fpgrowth) {
QList<AssociationRule> RuleMiner::mineAssociationRules(QList<ItemList> frequentItemsets, float minimumConfidence, const Constraints & ruleConsequentConstraints, const FPGrowth * fpgrowth) {
QList<AssociationRule> associationRules;
QList<ItemList> consequents;
bool hasConstraints = !ruleConsequentRequirements.empty();
bool hasConstraints = !ruleConsequentConstraints.empty();

QList<SupportCount> frequentItemsetsSupportCounts = RuleMiner::calculateSupportCountsForFrequentItemsets(frequentItemsets);

Expand All @@ -29,7 +29,7 @@ namespace Analytics {
// Store this consequent whenever no constraints are
// defined, or when constraints are defined and the
// consequent matches the constraints.
if (!hasConstraints || consequent.contains(ruleConsequentRequirements[0]))
if (!hasConstraints || ruleConsequentConstraints.matchItemset(consequent))
consequents.append(consequent);
}

Expand Down
3 changes: 2 additions & 1 deletion code/Analytics/RuleMiner.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define RULEMINER_H

#include "Item.h"
#include "Constraints.h"
#include "FPGrowth.h"
#include <QList>

Expand All @@ -14,7 +15,7 @@ namespace Analytics {

class RuleMiner {
public:
static QList<AssociationRule> mineAssociationRules(QList<ItemList> frequentItemsets, float minimumConfidence, ItemList ruleConsequentRequirements, const FPGrowth * fpgrowth);
static QList<AssociationRule> mineAssociationRules(QList<ItemList> frequentItemsets, float minimumConfidence, const Constraints & ruleConsequentConstraints, const FPGrowth * fpgrowth);

protected:
static QList<SupportCount> calculateSupportCountsForFrequentItemsets(QList<ItemList> frequentItemsets);
Expand Down

0 comments on commit 89c7b20

Please sign in to comment.