# Notebook to test codes

In [2]:
import numpy as np
import pandas as pd
import scipy

In [None]:
def distance_matrix(A, B, squared=False):
    """
    Compute all pairwise distances between vectors in A and B.

    Parameters
    ----------
    A : np.array
        shape should be (M, K)
    B : np.array
        shape should be (N, K)

    Returns
    -------
    D : np.array
        A matrix D of shape (M, N).  Each entry in D i,j represnets the
        distance between row i in A and row j in B.
    """
    M = A.shape[0]
    N = B.shape[0]

    assert A.shape[1] == B.shape[1], f"The number of components for vectors in A \
        {A.shape[1]} does not match that of B {B.shape[1]}!"

    A_dots = (A*A).sum(axis=1).reshape((M,1))*np.ones(shape=(1,N))
    B_dots = (B*B).sum(axis=1)*np.ones(shape=(M,1))
    D_squared =  A_dots + B_dots -2*A.dot(B.T)

    if squared == False:
        zero_mask = np.less(D_squared, 0.0)
        D_squared[zero_mask] = 0.0
        return np.sqrt(D_squared)

    return D_squared

In [3]:
from scipy.spatial import distance_matrix

Yinyang_data = function(n_m=400,n_c=200,n_r=2000,sd_c=0.1,sd_r=0.1, d=2, sd_high=0.1){
  X_m = clusterSim::shapes.two.moon(n_m)

  x1 = cbind(stats::rnorm(n_c, mean= 0.5, sd=sd_c), stats::rnorm(n_c, mean= -1.5, sd=sd_c))
  x2 = cbind(stats::rnorm(n_c, mean= -1, sd=sd_c), stats::rnorm(n_c, mean= 0.5, sd=sd_c))

  th = stats::runif(n_r,0,2*pi)
  x  = 2.5*cos(th) - 0.25
  y  = 2.5*sin(th) -0.5
  x3  = cbind(x,y) + matrix(stats::rnorm(2*length(x), sd=sd_r), ncol=2)

  X = rbind(X_m$data, x1, x2, x3)

  if(d==2){
    X0 = X
  }
  if(d>2){
    n = nrow(X)
    d_add = d-2
    X0 = cbind(X, matrix(stats::rnorm(d_add*(n),  sd = sd_high), nrow = n))
  }
  Y0 = c(X_m$clusters, rep(3,n_c), rep(4,n_c), rep(5,n_r))
  return(list(data = X0, clus = Y0))
}

In [None]:
    """
    Generating Yinyang data

    Parameters
    ----------
    n_m number of points in each of the two moon shapes. Positive integer value or vector with length=2
    n_c number of points in each of the two small circles
    n_r number of points in the outer ring
    sd_c the standard deviation of shifts for points in the two small circles
    sd_r the standard deviation of shifts for points in the outer ring
    d a number indicating the dimension of the generated data. d>2 dimensions are filled with independent Gaussian noises
    sd_high a number indicating the standard deviation for Gaussian noises in the higher dimensions
#' @
#' \itemize{
#'   \item 
#'   \item 
#' }
    Returns
    -------
    return A list of generated data points and the true labels
    
    data: The generated data matrix
    clus: The vector of true cluster labels for each data point
    """
def yinyang_data(n_m=400,n_c=200,n_r=2000,sd_c=0.1,sd_r=0.1, d=2, sd_high=0.1):
    

In [None]:
def twoMoon(numObjects = 180, shape1a = -0.4, shape2b = 1, shape1rFrom = 0.8, 
    shape1rTo = 1.2, shape2rFrom = 0.8, shape2rTo = 1.2, outputCsv = "", 
    outputCsv2 = "", outputColNames = TRUE, outputRowNames = TRUE):
    
    nrow = numObjects*2
    x =  np.array([[0 for row in range(nrow)] for col in range(2)])
    for (i in 1:nrow) {
        alpha <- runif(1, 0, 2 * pi)
        if (i > lo[1]) {
            r = runif(1, shape2rFrom, shape2rTo)
        }
        else {
            r = runif(1, shape1rFrom, shape1rTo)
        }
        x[i, 1] <- r * cos(alpha)
        x[i, 2] <- r * sin(alpha)
        if (i <= lo[1]) {
            x[i, 1] = shape1a + abs(x[i, 1])
        }
        else {
            x[i, 1] = -abs(x[i, 1])
            x[i, 2] = x[i, 2] - shape2b
        }
    }
    data <- x
    klasy <- c(rep(1, lo[1]), rep(2, lo[2]))
    .toCsv(outputCsv, data, klasy, outputColNames, outputRowNames, 
        FALSE)
    .toCsv(outputCsv2, data, klasy, outputColNames, outputRowNames, 
        TRUE)
    list(data = data, clusters = klasy)
}

In [7]:
numObjects = 180


array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 