Added Gibbs sampling (EXPERIMENTAL), added tests, other minor changeS

zgornel · Nov 24, 2017 · e685b90 · e685b90
1 parent b7a93c0
commit e685b90
Show file tree

Hide file tree

Showing 6 changed files with 127 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@ Two scenarios for network learning can be distinguished:
 - **Collective inference**
 	- relaxation labeling
 	- collective classification
+	- gibbs sampling (EXPERIMENTAL, slow)
 
 - **Adjacency strucures**
 	- matrices
@@ -292,8 +293,8 @@ The output of the above code is:
 
 ## Planned features
 
-- Gibbs sampling collective inference
 - Support for both column-major and row-major datasets; so far, only column-major datasets supported (i.e. observations correspond to columns in the input/output matrices)
+- Implement optional parallelism (where feasible)
 
 
 ## Documentation

diff --git a/src/cinference.jl b/src/cinference.jl
@@ -62,8 +62,7 @@ function transform!(Xo::T, Ci::RelaxationLabelingInferer, Mr::M, fr_exec::E, RL:
 	f_targets = Ci.tf									# function used to obtain targets
 	size_out = size(Xo,1)									# ouput size (corresponds to the number of classes)
 	Xl = copy(Xo)										# local estimates
-	ŷₗ = f_targets(Xo)									# Obtain first the labels corresponding to the local model
-	ŷ = ŷₗ											#   and initialize the current estimates
+	ŷ = f_targets(Xo)									# Obtain first the labels corresponding to the local model
 	ŷₒ = similar(ŷ)										#   and the 'previous' iteration estimates
 	AV = adjacency_matrix.(Adj)								# Pre-calculate adjacency matrices
 	Xrᵢ = zeros(size_out,n)									# Initialize temporary storage	
@@ -120,8 +119,7 @@ function transform!(Xo::T, Ci::IterativeClassificationInferer, Mr::M, fr_exec::E
 	f_targets = Ci.tf									# function used to obtain targets
 	size_out = size(Xo,1)									# ouput size (corresponds to the number of classes)
 	Xl = copy(Xo)										# local estimates	
-	ŷₗ = f_targets(Xo)									# Obtain first the labels corresponding to the local model
-	ŷ = ŷₗ											#   and initialize the current estimates
+	ŷ = f_targets(Xo)									# Obtain first the labels corresponding to the local model
 	ŷₒ = similar(ŷ)										#   and the 'previous' iteration estimates
 	AV = adjacency_matrix.(Adj)								# Pre-calculate adjacency matrices
 	Xrᵢⱼ = zeros(size_out,1)								# Initialize temporary storage	
@@ -173,11 +171,111 @@ function transform!(Xo::T, Ci::IterativeClassificationInferer, Mr::M, fr_exec::E
 	return Xo
 end
 
-function transform!(Xo::T, Ci::GibbsSamplingInferer, Mr::M, fr_exec::E, RL::R, Adj::A, offset::Int, Xr::S) where {
+# Version of Gibbs sampling (experimental) similar to iterative classification (i.e. no sampling) from: 
+# P. Sen, G. Namata, M. Bilgic, L. Getoor, B. Gallagher, T. Eliassi-Rad "Collective classification in network data", AI Magazine 29(3), 2008
+
+# Another (slower) alternative would be to assign to each observation a class sampled
+# in accordance to the class-wise probabilities of its neighbourhood. This however
+# implies 1. sampling (slow), 2. this would work only for relational learners that 
+# make use of the neighbourhood class estimates (i.e. :rn and :bayesrn only)
+function transform!(Xo::T, Ci::GibbsSamplingInferer, Mr::M, fr_exec::E, RL::R, Adj::A, offset::Int, Xr::S,
+		    update::BitVector=trues(nobs(Xo))) where {
 		M, E, 
 		T<:AbstractMatrix, R<:Vector{<:AbstractRelationalLearner}, 
 		A<:Vector{<:AbstractAdjacency}, S<:AbstractMatrix}
 
-	warn("Gibbs sampling not implemented, returning input (local model) estimates.")
+	# Initializations
+	n = nobs(Xr)										# number of observations 
+	updateable = find(update)
+	ordering = [i:i for i in updateable]							# observation estimation order 
+	maxiter = Ci.maxiter									# maximum number of iterations
+	burniter = Ci.burniter									# number of burn-in iterations
+	tol = Ci.tol										# maximum error 
+	f_targets = Ci.tf									# function used to obtain targets
+	size_out = size(Xo,1)									# ouput size (corresponds to the number of classes)
+	Xl = copy(Xo)										# local estimates	
+	ŷ = f_targets(Xo)									# Obtain first the labels corresponding to the local model
+	ŷₒ = similar(ŷ)										#   and the 'previous' iteration estimates
+	AV = adjacency_matrix.(Adj)								# Pre-calculate adjacency matrices
+	Xrᵢⱼ = zeros(size_out,1)								# Initialize temporary storage	
+
+	# Burn-in
+	@print_verbose 2 "\tRunning $burniter burn-in iterations ..."
+	for it in 1:burniter
+		shuffle!(ordering)								# Randomize observation order
+
+		# Loop over observations and obtain individual estimates
+		for rⱼ in ordering		
+
+			# Get data subsets pertinent to the current observation 
+			Xrⱼ = datasubset(Xr, rⱼ)
+			Xoⱼ = datasubset(Xo, rⱼ)
+			ŷⱼ = datasubset(ŷ, rⱼ)
+
+			# Obtain relational data for the current observation
+			@inbounds for (i,(RLᵢ,Aᵢ)) in enumerate(zip(RL,AV))		
+
+				# Apply relational learner
+				transform!(Xrᵢⱼ, RLᵢ, Aᵢ[:,rⱼ], Xo, ŷ) 				# TODO: Find a better compromise for adjacency access; views - slow for sparse matrices
+												#	slicing - increases the number of allocations.
+				# Update relational data output for the current sample
+				Xrⱼ[offset+(i-1)*size_out+1 : offset+i*size_out,:] = Xrᵢⱼ
+			end
+
+			# Update estimates
+			Xoⱼ[:] = fr_exec(Mr, Xrⱼ) 
+			ŷⱼ[:] = f_targets(Xoⱼ)
+		end
+	end	
+
+	# Initialize class-counting structure
+	class_counts = zeros(size_out, n) 
+
+	# Small function that makes the class count work (even though it does not make sense)
+	# for cases outside classification (i.e. input labels are floats)
+	_idx_(x::AbstractVector{Int}) = x
+	_idx_(x::AbstractVector) = 1
+	# Iterate
+	@print_verbose 2 "\tRunning $maxiter iterations ..."
+	for it in 1:maxiter	
+		shuffle!(ordering)								# Randomize observation order
+		copy!(ŷₒ, ŷ);									# Update 'previous iteration' estimates 
+
+		# Loop over observations and obtain individual estimates
+		for rⱼ in ordering		
+
+			# Get data subsets pertinent to the current observation 
+			Xrⱼ = datasubset(Xr, rⱼ)
+			Xoⱼ = datasubset(Xo, rⱼ)
+			ŷⱼ = datasubset(ŷ, rⱼ)
+
+			# Obtain relational data for the current observation
+			@inbounds for (i,(RLᵢ,Aᵢ)) in enumerate(zip(RL,AV))		
+
+				# Apply relational learner
+				transform!(Xrᵢⱼ, RLᵢ, Aᵢ[:,rⱼ], Xo, ŷ) 				# TODO: Find a better compromise for adjacency access; views - slow for sparse matrices
+												#	slicing - increases the number of allocations.
+				# Update relational data output for the current sample
+				Xrⱼ[offset+(i-1)*size_out+1 : offset+i*size_out,:] = Xrᵢⱼ
+			end
+
+			# Update estimates
+			Xoⱼ[:] = fr_exec(Mr, Xrⱼ) 
+			ŷⱼ[:] = f_targets(Xoⱼ)
+			class_counts[_idx_(ŷⱼ),rⱼ].+=1.0
+		end
+
+		# Convergence check
+		if isequal(ŷ,ŷₒ) || mean(abs.(ŷ-ŷₒ))<=tol
+			@print_verbose 1 "Convergence reached at iteration $it."
+			break
+		else
+			@print_verbose 2 "\tIteration $it: $(sum(ŷ.!= ŷₒ)) estimates changed"
+	   	end
+	end
+
+	# Assign new estimates
+	Xo[:, updateable] = (class_counts./sum(class_counts,1))[:,updateable]	
+
 	return Xo
 end
diff --git a/src/entlearning.jl b/src/entlearning.jl
@@ -7,6 +7,11 @@
 # data is used as well
 
 
+"""
+Entity-based network learning model state. It consists of an `Array` with estimates and a an update
+mask in the form of a `BitVector` indicating which observation estimates are to be updated (the
+ones that are not updated are considered training/stable observations).
+"""
 mutable struct NetworkLearnerState{T<:AbstractArray}
 	ê::T			# estimates
 	update::BitVector	# which estimates to update
@@ -21,6 +26,9 @@ Base.show(io::IO, m::NetworkLearnerState) = print(io, "NetworkLearner state: $(s
 
 
 
+"""
+Entity-based network learning model type.
+"""
 mutable struct NetworkLearnerEnt{S,V,
 				    NS<:NetworkLearnerState,
 				    R<:Vector{<:AbstractRelationalLearner},
@@ -52,6 +60,9 @@ end
 ####################
 # Training methods #
 ####################
+"""
+Training method for the network learning framework.
+"""
 function fit(::Type{NetworkLearnerEnt}, Xo::AbstractMatrix, update::BitVector, Adj::A where A<:Vector{<:AbstractAdjacency}, 
 	     	fr_train, fr_exec; 
 		priors::Vector{Float64}=1/size(Xo,1).*ones(size(Xo,1)), learner::Symbol=:wvrn, inference::Symbol=:rl, 
@@ -98,8 +109,11 @@ end
 
 
 
+"""
+Training method for the network learning framework. This method should not be called directly.
+"""
 function fit(::Type{NetworkLearnerEnt}, Xo::T, update::BitVector, Adj::A, Rl::R, Ci::C, fr_train::U, fr_exec::U2; 
-		priors::Vector{Float64}=1/size(Xo,1).*ones(size(Xo,1)), normalize::Bool=true, use_local_data::Bool=true) where {
+		priors::Vector{Float64}=1/size(Xo,1).*ones(size(Xo,1)), normalize::Bool=true) where {
 			T<:AbstractMatrix, 
 			A<:Vector{<:AbstractAdjacency}, 
 			R<:Type{<:AbstractRelationalLearner}, 
@@ -151,8 +165,9 @@ end
 
 
 
-# Function that calls collective inference using the information in contained in the 
-# entity-based network learner
+"""
+Function that calls collective inference using the information in contained in the entity-based network learner
+"""
 function infer!(model::T) where T<:NetworkLearnerEnt
 	p = size(model.state.ê,1)								# number of estimates/entity
 	m = length(model.Adj) * p								# number of relational variables

diff --git a/src/obslearning.jl b/src/obslearning.jl
@@ -2,7 +2,7 @@
 # Observation-based learning #
 ##############################
 """
-Observation-based netwok learning model type.
+Observation-based network learning model type.
 """
 mutable struct NetworkLearnerObs{T,U,S,V,
 				    R<:Vector{<:AbstractRelationalLearner},

diff --git a/test/t_entity_networklearner.jl b/test/t_entity_networklearner.jl
@@ -5,7 +5,7 @@ function t_entity_networklearner()
 # Test the entity-based NetworkLearner  #
 #########################################
 N= 100							# Number of entitites 
-inferences = [:ic, :rl]					# Collective inferences
+inferences = [:ic, :rl, :gs]				# Collective inferences
 rlearners = [:rn, :wrn, :bayesrn]			# Relational learners
 nAdj = 2						# Number of adjacencies to generate	
 X = rand(1,N); 						# Training data

diff --git a/test/t_observation_networklearner.jl b/test/t_observation_networklearner.jl
@@ -6,7 +6,7 @@ function t_observation_networklearner()
 #############################################
 Ntrain = 100						# Number of training observations
 Ntest = 10						# Number of testing observations					
-inferences = [:ic, :rl]					# Collective inferences
+inferences = [:ic, :rl, :gs]				# Collective inferences
 rlearners = [:rn, :wrn, :bayesrn, :cdrn]		# Relational learners
 nAdj = 2						# Number of adjacencies to generate	
 X = rand(1,Ntrain); 					# Training data