-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathprincipal.py
86 lines (71 loc) · 2.45 KB
/
principal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
## \file
## \ingroup tutorial_math
## \notebook
## Principal Components Analysis (PCA) example.
##
## Example of using TPrincipal as a stand alone class.
##
## I create n-dimensional data points, where c = trunc(n / 5) + 1
## are correlated with the rest n - c randomly distributed variables.
##
## Based on principal.C by Rene Brun and Christian Holm Christensen
##
## \macro_image
## \macro_output
## \macro_code
##
## \authors Juan Fernando, Jaramillo Botero
from ROOT import TPrincipal, gRandom, TBrowser, vector
n = 10
m = 10000
c = int(n / 5) + 1
print ("""*************************************************
* Principal Component Analysis *
* *
* Number of variables: {0:4d} *
* Number of data points: {1:8d} *
* Number of dependent variables: {2:4d} *
* *
*************************************************""".format(n, m, c))
# Initilase the TPrincipal object. Use the empty string for the
# final argument, if you don't wan't the covariance
# matrix. Normalising the covariance matrix is a good idea if your
# variables have different orders of magnitude.
principal = TPrincipal(n, "ND")
# Use a pseudo-random number generator
randomNum = gRandom
# Make the m data-points
# Make a variable to hold our data
# Allocate memory for the data point
data = vector('double')()
for i in range(m):
# First we create the un-correlated, random variables, according
# to one of three distributions
for j in range(n - c):
if j % 3 == 0:
data.push_back(randomNum.Gaus(5, 1))
elif j % 3 == 1:
data.push_back(randomNum.Poisson(8))
else:
data.push_back(randomNum.Exp(2))
# Then we create the correlated variables
for j in range(c):
data.push_back(0)
for k in range(n - c - j):
data[n - c + j] += data[k]
# Finally we're ready to add this datapoint to the PCA
principal.AddRow(data.data())
data.clear()
# Do the actual analysis
principal.MakePrincipals()
# Print out the result on
principal.Print()
# Test the PCA
principal.Test()
# Make some histograms of the original, principal, residue, etc data
principal.MakeHistograms()
# Make two functions to map between feature and pattern space
# Start a browser, so that we may browse the histograms generated
# above
principal.MakeCode()
b = TBrowser("principalBrowser", principal)