From 9f570b21bfad23be226fc7d10ec1ac9d4a77a633 Mon Sep 17 00:00:00 2001 From: Jaremy Creechley Date: Thu, 2 Jan 2020 23:28:28 -0700 Subject: [PATCH 1/4] updaes --- lib/matrex/algorithms.ex | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/matrex/algorithms.ex b/lib/matrex/algorithms.ex index f94f324..7122088 100644 --- a/lib/matrex/algorithms.ex +++ b/lib/matrex/algorithms.ex @@ -270,24 +270,33 @@ defmodule Matrex.Algorithms do defp iteration2(%FMinCG{} = data) do data = tighten(data) + IO.puts("\n\n\n\n### Iter ###") + + IO.puts("iter2 first: #{inspect {data.f2, :>, data.f1 + data.z1 * @rho * data.d1, data.d2 > -@sig * data.d1}}") + IO.puts("iter2 second: #{inspect {data.d2, :>, @sig * data.d1}}") + IO.puts("iter2 third: #{inspect {data.m, :=, 0}}") + IO.puts("iter2 data: #{inspect data |> Map.drop([:fParams])}") + cond do data.f2 in [:nan, :inf, :neg_inf] -> + IO.puts("iter2: inf ") {false, data} data.f2 > data.f1 + data.z1 * @rho * data.d1 or data.d2 > -@sig * data.d1 -> + IO.puts("iter2: first ") {false, data} data.d2 > @sig * data.d1 -> - # IO.puts("second #{data.d1}, #{data.d2}") + IO.puts("iter2: second #{data.d1}, #{data.d2}") {true, data} data.m == 0 -> - # IO.puts("third") + IO.puts("iter2: third") {false, data} true -> z2 = z2(data, data.limit) - # IO.puts("none, z2=#{z2}") + IO.puts("iter2: none, z2=#{z2}") data = %{ data From d07e8acfe3507ecd8dcfb8e4571506260c9ff879 Mon Sep 17 00:00:00 2001 From: Jaremy Creechley Date: Sat, 4 Jan 2020 00:11:05 -0700 Subject: [PATCH 2/4] add test for linear cost func --- lib/matrex/algorithms.ex | 97 ++++++++++++++++++++++++++++++++++++++- test/algorithms_test.exs | 27 +++++++++++ test/rand_array.mtx | Bin 0 -> 336 bytes test/rand_array_2.mtx | Bin 0 -> 336 bytes 4 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 test/rand_array.mtx create mode 100644 test/rand_array_2.mtx diff --git a/lib/matrex/algorithms.ex b/lib/matrex/algorithms.ex index 7122088..440b858 100644 --- a/lib/matrex/algorithms.ex +++ b/lib/matrex/algorithms.ex @@ -410,12 +410,12 @@ defmodule Matrex.Algorithms do end @doc """ - Linear regression cost and gradient function with regularization from Andrew Ng's course (ex3). + Logistic regression cost and gradient function with regularization from Andrew Ng's course (ex3). Computes the cost of using `theta` as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. - Compatible with `fmincg/4` algorithm from thise module. + Compatible with `fmincg/4` algorithm from this module. `theta` — parameters, to compute cost for @@ -592,6 +592,99 @@ defmodule Matrex.Algorithms do accuracy end + @doc """ + Linear regression cost and gradient function, with no normalization. + + Computes the cost of using `theta` as the parameter for regularized linear regression and the + gradient of the cost w.r.t. to the parameters. + + Compatible with `fmincg/4` algorithm from thise module. + + `theta` — parameters, to compute cost for + + `X` — training data input. + + `y` — training data output. + + `lambda` — regularization parameter. + + """ + @spec linear_cost_fun(Matrex.t(), {Matrex.t(), Matrex.t(), number, non_neg_integer}, pos_integer) :: + {float, Matrex.t()} + def linear_cost_fun( + %Matrex{} = theta, + {%Matrex{} = x, %Matrex{} = y, lambda} = _params, + _iteration \\ 0 + ) when is_number(lambda) do + n = Enum.count(y) + h! = x |> Matrex.dot(theta) + + h_sub_y = Matrex.subtract(h!,y) + + j = 1.0/(2*n) * + (Matrex.dot( h_sub_y |> Matrex.transpose, h_sub_y) |> Matrex.scalar()) + + grad = x |> Matrex.transpose |> Matrex.dot(h_sub_y) |> Matrex.apply(& &1 * lambda / n) + + {j, grad} + end + + @doc """ + Fit polynomial function based on given `x` and `y` using gradient descent (`fmincg/4`) using + `linear_cost_fun/4`. + + This approach produces decent results for many datasets. It isn't as efficient as using + least squares, but is still useful and provides a goode example of how to optimize general + functions. + + Note that gradient descent won't always converge well for polynomials with linear cost function. + If this happens for your dataset try adjusting `opts` parameters. Uses the `fmincg/4` algorithm + from this module. + + `x` — training data input. + + `y` — training data output. + + `opts` - algorithm parameters + `lambda` — regularization parameter. + `iterations` — regularization parameter. + + """ + @spec fit_poly(Matrex.t(), Matrex.t(), pos_integer, keyword() ) :: + %{ coefs: keyword(), error: float(), fun: (Matrex.t() -> Matrex.t()) } + def fit_poly(x, y, degree, opts \\ []) do + iterations = Keyword.get(opts, :iterations, 100) + lambda = Keyword.get(opts, :lambda, 1.0) + + {m, n} = Matrex.size(y) + unless m >= n, do: raise %ArgumentError{message: "y shape (m,n) must have m > n"} + + xx = + for i <- 0..degree, into: [] do + x |> Matrex.apply(&:math.pow(&1, i)) + end |> Matrex.concat() + + theta = Matrex.zeros(degree + 1, 1) + + {sX, fX, _i} = Matrex.Algorithms.fmincg(&LRTest.linear_cost_fun/3, theta, {xx, y, lambda}, iterations) + + coefs = sX |> Enum.to_list() |> Enum.with_index(0) |> Enum.map(fn {x,y} -> {y,x} end) + %{coefs: coefs, fun: &poly_func(&1, coefs), error: fX |> Enum.at(-1)} + end + + def fit_linear(x, y, opts \\ []) do + fit_poly(x, y, 1, opts) + end + + defp poly_func(x, coefs) when is_list(coefs) do + # coefs_idx = Enum.with_index(coefs, 0) + x |> Enum.map(fn x -> + Enum.reduce(coefs, 0.0, fn {i, f}, acc -> + acc + f * :math.pow(x, i) + end) + end) + end + @doc """ Function of a surface with two hills. """ diff --git a/test/algorithms_test.exs b/test/algorithms_test.exs index 3a48a4d..aef435c 100644 --- a/test/algorithms_test.exs +++ b/test/algorithms_test.exs @@ -115,4 +115,31 @@ defmodule AlgorithmsTest do {x_train, y_train, x_test, y_test} end + + + test "#linear_cost_fun computes cost" do + m = Matrex.load("test/rand_array.mtx") + y_t = m |> Matrex.submatrix(1..41, 2..2) + + # for linear func, must add `ones` for the offset constant + x = m |> Matrex.submatrix(1..41, 1..1) + x_t = Matrex.concat(Matrex.ones(Matrex.size(x)), x) + + lambda_t = 0.01 + theta_t = Matrex.zeros(2, 1) + + expected_j = 5238.50381097561 + + expected_grad = + Matrex.new( + " -0.91246 ; -2.41489 " + ) + + {j, grad} = Algorithms.linear_cost_fun(theta_t, {x_t, y_t, lambda_t}) + + assert grad |> Matrex.subtract(expected_grad) |> Matrex.sum() < 5.0e-6 + assert j == expected_j + + end + end diff --git a/test/rand_array.mtx b/test/rand_array.mtx new file mode 100644 index 0000000000000000000000000000000000000000..6e7e51934683139449506cf392d4c4244b72899c GIT binary patch literal 336 zcmdO7U|?VZ;-$4+lygX653Gwl*9?mGg-?U?TTcLXZ27rNEzG;5}$ z{jpqjCu3t{dnVyBkb3*(={innX=(Oy&HYXc3=Q@NdRlKGi- zlKX(-Y!3Hs)&bS?IP5kr1Ii0K{94%#a*x9%_wzt?3JxL{X93lzIq-Mh0jkq+*xtDs z7Rh=EFIXjxtxKCh=wx& D8HRrE literal 0 HcmV?d00001 diff --git a/test/rand_array_2.mtx b/test/rand_array_2.mtx new file mode 100644 index 0000000000000000000000000000000000000000..b5a574ed952f6c90f1dfe2891373f9dd2e10d4bf GIT binary patch literal 336 zcmdO7U|?VZ;h(a5{U&$1d`kqtmRJGwm9L<$?S&c04E2oq$U07g{y}#U<@8 zN1Hnt8ynj*d=CMsw_kfa&?zk~&E8K>3#hKa{+#eipm>k{t4E$7bL>4fEe6Ujv(G=i z2&iwfz4M>rKy?S~Z=5~@GT**PWjRpYO?!W~Pe66g>`%M%IU5^)wtv3hE684l|LyY5 zX=!W@YDx^wXV36BZ2Bb+loxjBj&^Vcy2s(+6+>qR1_cM7J?21lY7WV*)M9*t)9(Y-H8{*UeHSR+?7*_%jxzvj CiiXty literal 0 HcmV?d00001 From ed71fa1d81aef6da94d826e6c387c56b25c29b01 Mon Sep 17 00:00:00 2001 From: Jaremy Creechley Date: Sat, 4 Jan 2020 00:25:03 -0700 Subject: [PATCH 3/4] adding test for fit_poly --- lib/matrex/algorithms.ex | 22 +++++++++++----------- test/algorithms_test.exs | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/lib/matrex/algorithms.ex b/lib/matrex/algorithms.ex index 440b858..5b97a64 100644 --- a/lib/matrex/algorithms.ex +++ b/lib/matrex/algorithms.ex @@ -270,33 +270,33 @@ defmodule Matrex.Algorithms do defp iteration2(%FMinCG{} = data) do data = tighten(data) - IO.puts("\n\n\n\n### Iter ###") + # IO.puts("\n\n\n\n### Iter ###") - IO.puts("iter2 first: #{inspect {data.f2, :>, data.f1 + data.z1 * @rho * data.d1, data.d2 > -@sig * data.d1}}") - IO.puts("iter2 second: #{inspect {data.d2, :>, @sig * data.d1}}") - IO.puts("iter2 third: #{inspect {data.m, :=, 0}}") - IO.puts("iter2 data: #{inspect data |> Map.drop([:fParams])}") + # IO.puts("iter2 first: #{inspect {data.f2, :>, data.f1 + data.z1 * @rho * data.d1, data.d2 > -@sig * data.d1}}") + # IO.puts("iter2 second: #{inspect {data.d2, :>, @sig * data.d1}}") + # IO.puts("iter2 third: #{inspect {data.m, :=, 0}}") + # IO.puts("iter2 data: #{inspect data |> Map.drop([:fParams])}") cond do data.f2 in [:nan, :inf, :neg_inf] -> - IO.puts("iter2: inf ") + # IO.puts("iter2: inf ") {false, data} data.f2 > data.f1 + data.z1 * @rho * data.d1 or data.d2 > -@sig * data.d1 -> - IO.puts("iter2: first ") + # IO.puts("iter2: first ") {false, data} data.d2 > @sig * data.d1 -> - IO.puts("iter2: second #{data.d1}, #{data.d2}") + # IO.puts("iter2: second #{data.d1}, #{data.d2}") {true, data} data.m == 0 -> - IO.puts("iter2: third") + # IO.puts("iter2: third") {false, data} true -> z2 = z2(data, data.limit) - IO.puts("iter2: none, z2=#{z2}") + # IO.puts("iter2: none, z2=#{z2}") data = %{ data @@ -666,7 +666,7 @@ defmodule Matrex.Algorithms do theta = Matrex.zeros(degree + 1, 1) - {sX, fX, _i} = Matrex.Algorithms.fmincg(&LRTest.linear_cost_fun/3, theta, {xx, y, lambda}, iterations) + {sX, fX, _i} = fmincg(&linear_cost_fun/3, theta, {xx, y, lambda}, iterations) coefs = sX |> Enum.to_list() |> Enum.with_index(0) |> Enum.map(fn {x,y} -> {y,x} end) %{coefs: coefs, fun: &poly_func(&1, coefs), error: fX |> Enum.at(-1)} diff --git a/test/algorithms_test.exs b/test/algorithms_test.exs index aef435c..cd1fb69 100644 --- a/test/algorithms_test.exs +++ b/test/algorithms_test.exs @@ -142,4 +142,30 @@ defmodule AlgorithmsTest do end + test "#fit_poly " do + m = Matrex.load("test/rand_array.mtx") + y = m |> Matrex.submatrix(1..41, 2..2) + x = m |> Matrex.submatrix(1..41, 1..1) + + fit = Algorithms.fit_poly(x, y, 2) + + expected_fit = %{ + coefs: [ + {0, 37.48050308227539}, + {1, 6.260676383972168}, + {2, 6.991103172302246} + ], + error: 149.0388957698171, + } + + # IO.inspect(fit, label: :fit) + expected_coefs = expected_fit[:coefs] |> coefs_nums() + coefs = fit[:coefs] |> coefs_nums() + + assert coefs |> Matrex.subtract(expected_coefs) |> Matrex.sum() < 1.0e-5 + end + + defp coefs_nums(c) do + [c |> Enum.map(& &1 |> elem(1))] |> Matrex.new() + end end From e51f9d415b02138020ca04b8fe0d8054de77a10d Mon Sep 17 00:00:00 2001 From: Jaremy Creechley Date: Mon, 6 Jan 2020 13:20:24 -0700 Subject: [PATCH 4/4] remove extra logging --- lib/matrex/algorithms.ex | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/matrex/algorithms.ex b/lib/matrex/algorithms.ex index 5b97a64..631db6d 100644 --- a/lib/matrex/algorithms.ex +++ b/lib/matrex/algorithms.ex @@ -270,13 +270,6 @@ defmodule Matrex.Algorithms do defp iteration2(%FMinCG{} = data) do data = tighten(data) - # IO.puts("\n\n\n\n### Iter ###") - - # IO.puts("iter2 first: #{inspect {data.f2, :>, data.f1 + data.z1 * @rho * data.d1, data.d2 > -@sig * data.d1}}") - # IO.puts("iter2 second: #{inspect {data.d2, :>, @sig * data.d1}}") - # IO.puts("iter2 third: #{inspect {data.m, :=, 0}}") - # IO.puts("iter2 data: #{inspect data |> Map.drop([:fParams])}") - cond do data.f2 in [:nan, :inf, :neg_inf] -> # IO.puts("iter2: inf ")