In [2]:
from __future__ import division # safety with double division
from pyomo.environ import *
from pyomo.opt import SolverFactory
Opt = SolverFactory("gurobi")
M1 = AbstractModel()
M1.name = "Clustering Centroid-Distance LP"

M2 = AbstractModel()
M2.name = "Clustering Assignment LP"

## Parameters
- **d**: number of dimensions
- **n**: number of points to cluster
- **k**: number of clusters to generate

In [2]:
M1.NumberOfDimensions = Param(within=NonNegativeIntegers)
M1.NumberOfPoints = Param(within=NonNegativeIntegers)
M1.NumberOfClusters = Param(within=NonNegativeIntegers)

## Set
- **Dimension Index (D)**: Set consisting of all possible possible dimensions an arbitrary point i.e. $[x_1, x_2, x_3 ... x_d]$
- **Points (P)**: Set consisting of all indexes for Points in the system. $[p_1, p_2, p_3 ... p_n]$
- **Cluster Index (C)**: Set consisting of possible ClusterIndex. $[c_1, c_2, c_3 ... c_k]$

In [1]:
M1.DimensionIndex = RangeSet(1,M1.NumberOfDimensions)
M1.PointsIndex = RangeSet(1,M1.NumberOfPoints)
M1.ClusterIndex = RangeSet(1,M1.NumberOfClusters)

NameError: name 'RangeSet' is not defined

## Inputs
- **Point**: $P_{i,j}$ where $i$ $\in$ PointsIndex and $j$ $\in$ DimensionIndex 

In [4]:
M1.Point = Param(M1.PointsIndex,M1.DimensionIndex, within=Reals)

## Variables
- **Centroid**: $C_{i,d}$ where i $\in$ ClusterIndex and d $\in$ dimensionalIndex 
- **Assignment**: $A_{i,j}$ where i $\in$ pointsIndex and j $\in$ clusteringIndex


In [5]:
M1.Centroid=Var(M1.ClusterIndex, M1.DimensionIndex, within=Reals)
M1.Assignment=Var(M1.PointsIndex, M1.ClusterIndex, within=Binary)
M1.Slack_Plus = Var(M1.PointsIndex,M1.ClusterIndex,M1.DimensionIndex, within=NonNegativeReals)
M1.Slack_Minus = Var(M1.PointsIndex,M1.ClusterIndex,M1.DimensionIndex, within=NonNegativeReals)

## Objective Function
$$ \sum_{i \in Points}\sum_{j \in Clusters}\sum_{x \in Dimensions} A_{i,j}\cdot(S^{+}_{i,j,x}+S^{-}_{i,j,x}) $$

In [6]:
def ObjectiveFunction(M):
    return sum( \
        M.Assignment[i,j]*(M.Slack_Plus[i,j,x]+M.Slack_Minus[i,j,x])\
               for i in M.PointsIndex \
               for j in M.ClusterIndex \
               for x in M.DimensionIndex)
M.Distance = Objective(rule=ObjectiveFunction, sense=minimize)
    

## Constraints

### Constraint 1: Distance Constraint
Used to convert distance metric into 1-norm
$$0=P_{i,x}-C_{j,x}+(S^{+}_{i,j,x}-S^{-}_{i,j,x}) \qquad \forall i \in P, j\in C, x \in d $$

In [7]:
def DistanceConstraint(M,i,j,x):
    return 0 == M.Assignment[i,j]*(M.Point[i,x]-M.Centroid[j,x])+M.Slack_Plus[i,j,x]-M.Slack_Minus[i,j,x]
M.Norm = Constraint(M.PointsIndex, M.ClusterIndex, M.DimensionIndex, rule = DistanceConstraint)

### Constraint 2: Only assign to 1 cluster
This constraint ensures that for any arbitrary $P_i$ it is assigned to a singular Centroid $C_j$
$$ 1= \sum_{j\in C} A_{i,j} \qquad \forall i \in P $$


In [8]:
def SingularAssignment(M,i):
    return 1==sum(M.Assignment[i,j] for j in M.ClusterIndex)
M.SingularBalanceConstraint = Constraint(M.PointsIndex, rule = SingularAssignment)

### Constraint 3: Non-Empty Cluster
By definition an arbitrary cluster $C_j$ cannot be empty with respects to the number of points assigned to it
$$ 1 \leq \sum_{i\in P} A_{i,j} \qquad \forall j \in C $$

In [9]:
def NonEmptyAssignments(M,j):
    return 1<=sum(M.Assignment[i,j] for i in M.PointsIndex)
M.NonEmptyConstraint=Constraint(M.ClusterIndex, rule=NonEmptyAssignments)

## Create Problem and Solver Instance

In [10]:
instance = M.create_instance("Data/simpleTest.dat")
# Indicate which solver to use
Opt = SolverFactory("gurobi")

# Generate a solution
Soln = Opt.solve(instance)
instance.solutions.load_from(Soln)

# Print the output
print("Termination Condition was "+str(Soln.Solver.Termination_condition))
display(instance)

ERROR: Solver (gurobi) returned non-zero return code (1)
ERROR: Solver log: Academic license - for non-commercial use only Read LP
    format model from file
    /home/wongaz/CodingWorkspace/ClusteringThesis/tmp_snam4oc.pyomo.lp Reading
    time = 0.00 seconds x45: 7 rows, 45 columns, 17 nonzeros Optimize a model
    with 7 rows, 45 columns and 17 nonzeros Model has 32 quadratic objective
    terms Model has 16 quadratic constraints Variable types: 37 continuous, 8
    integer (8 binary) Coefficient statistics:
      Matrix range     [1e+00, 1e+00] QMatrix range    [1e+00, 1e+00] QLMatrix
      range   [1e+00, 2e+00] Objective range  [0e+00, 0e+00] QObjective range
      [2e+00, 2e+00] Bounds range     [1e+00, 1e+00] RHS range        [1e+00,
      1e+00]
    Presolve removed 1 rows and 1 columns Presolve time: 0.00s

    Explored 0 nodes (0 simplex iterations) in 0.00 seconds Thread count was 1
    (of 8 available processors)

    Solution count 0

    Best objective -, best bound -, g

ApplicationError: Solver (gurobi) did not exit normally