Permalink
Browse files

merge and resolve conflicts

  • Loading branch information...
2 parents 51e5084 + b8766ea commit 258a0174604c6c2801946dfee65eb80ad302bf8e @stross stross committed Jul 23, 2012
Showing with 2,692 additions and 1,125 deletions.
  1. +11 −5 Makefile
  2. +9 −9 Makefile.am
  3. +10 −0 README.hal3
  4. +3 −0 library/Makefile
  5. +131 −0 library/ezexample.h
  6. +2 −2 library/library_example.cc
  7. +75 −0 library/temp2.cc
  8. +12 −0 library/test.cc
  9. +4 −0 library/train
  10. +5 −0 library/train.sh
  11. BIN library/train.w
  12. +21 −21 test/RunTests
  13. +7 −7 test/train-sets/cs_test.ldf
  14. +19 −19 test/train-sets/ref/cs_test.ldf.csoaa.predict
  15. +8 −10 test/train-sets/ref/cs_test.ldf.csoaa.stderr
  16. +19 −19 test/train-sets/ref/cs_test.ldf.wap.predict
  17. +8 −10 test/train-sets/ref/cs_test.ldf.wap.stderr
  18. +9 −9 test/train-sets/ref/rcv1_small.stderr
  19. +9 −9 test/train-sets/ref/wiki1K.stderr
  20. +2 −2 test/train-sets/ref/zero.stderr
  21. +144 −0 utl/vw-regr
  22. +471 −0 utl/vw-varinfo
  23. +1 −1 vowpalwabbit/Makefile
  24. +2 −1 vowpalwabbit/Makefile.am
  25. +232 −0 vowpalwabbit/beam.cc
  26. +51 −0 vowpalwabbit/beam.h
  27. +3 −4 vowpalwabbit/bfgs.cc
  28. +748 −136 vowpalwabbit/csoaa.cc
  29. +22 −28 vowpalwabbit/csoaa.h
  30. +149 −113 vowpalwabbit/ect.cc
  31. +3 −3 vowpalwabbit/gd.cc
  32. +0 −1 vowpalwabbit/gd_mf.cc
  33. +1 −1 vowpalwabbit/io.h
  34. +32 −15 vowpalwabbit/oaa.cc
  35. +18 −2 vowpalwabbit/oaa.h
  36. +5 −4 vowpalwabbit/parse_args.cc
  37. +203 −140 vowpalwabbit/parse_example.cc
  38. +51 −20 vowpalwabbit/parse_primitives.h
  39. +2 −3 vowpalwabbit/parse_regressor.cc
  40. +12 −2 vowpalwabbit/parser.cc
  41. +50 −188 vowpalwabbit/searn.cc
  42. +10 −36 vowpalwabbit/searn.h
  43. +2 −2 vowpalwabbit/searn_sequencetask.cc
  44. +25 −20 vowpalwabbit/sequence.cc
  45. +2 −0 vowpalwabbit/simple_label.h
  46. +5 −0 vowpalwabbit/v_array.h
  47. +68 −11 vowpalwabbit/v_hashmap.h
  48. +1 −5 vowpalwabbit/vw.cc
  49. +9 −0 vowpalwabbit/vw.h
  50. +6 −256 vowpalwabbit/wap.cc
  51. +0 −11 vowpalwabbit/wap.h
View
@@ -1,16 +1,19 @@
COMPILER = g++
UNAME := $(shell uname)
+LIBS = -l boost_program_options -l pthread -l z
+BOOST_INCLUDE = /usr/include
ifeq ($(UNAME), FreeBSD)
LIBS = -l boost_program_options -l pthread -l z -l compat
BOOST_INCLUDE = /usr/local/include
-BOOST_LIBRARY = /usr/local/lib
-else
-LIBS = -l boost_program_options -l pthread -l z
+endif
+ifeq "CYGWIN" "$(findstring CYGWIN,$(UNAME))"
+LIBS = -l boost_program_options-mt -l pthread -l z
BOOST_INCLUDE = /usr/include
-BOOST_LIBRARY = /usr/local/lib
endif
+BOOST_LIBRARY = /usr/local/lib
+
ARCH = $(shell test `g++ -v 2>&1 | tail -1 | cut -d ' ' -f 3 | cut -d '.' -f 1,2` \< 4.3 && echo -march=nocona || echo -march=native)
#LIBS = -l boost_program_options-gcc34 -l pthread -l z
@@ -30,7 +33,10 @@ FLAGS = $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 -I $(BOOST_I
#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -pg -g
# for valgrind
-# FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0
+#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0
+
+# for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes'
+#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O3 -fomit-frame-pointer -ffast-math -fno-strict-aliasing
BINARIES = vw active_interactor
MANPAGES = vw.1
View
@@ -7,17 +7,17 @@ noinst_HEADERS = vowpalwabbit/accumulate.h vowpalwabbit/oaa.h \
vowpalwabbit/parse_example.h vowpalwabbit/cache.h \
vowpalwabbit/parse_primitives.h vowpalwabbit/comp_io.h \
vowpalwabbit/parse_regressor.h vowpalwabbit/constant.h \
- vowpalwabbit/parser.h vowpalwabbit/csoaa.h \
+ vowpalwabbit/parser.h vowpalwabbit/csoaa.h vowpalwabbit/beam.h \
vowpalwabbit/searn.h vowpalwabbit/ect.h vowpalwabbit/cb.h \
vowpalwabbit/searn_sequencetask.h vowpalwabbit/example.h \
- vowpalwabbit/sender.h vowpalwabbit/gd.h \
- vowpalwabbit/sequence.h vowpalwabbit/gd_mf.h \
- vowpalwabbit/simple_label.h vowpalwabbit/global_data.h \
- vowpalwabbit/sparse_dense.h vowpalwabbit/hash.h \
- vowpalwabbit/unique_sort.h vowpalwabbit/io.h \
- vowpalwabbit/v_array.h vowpalwabbit/lda_core.h \
- vowpalwabbit/v_hashmap.h vowpalwabbit/loss_functions.h \
- vowpalwabbit/network.h vowpalwabbit/wap.h vowpalwabbit/noop.h
+ vowpalwabbit/sender.h vowpalwabbit/gd.h vowpalwabbit/sequence.h \
+ vowpalwabbit/gd_mf.h vowpalwabbit/simple_label.h \
+ vowpalwabbit/global_data.h vowpalwabbit/sparse_dense.h \
+ vowpalwabbit/hash.h vowpalwabbit/unique_sort.h \
+ vowpalwabbit/io.h vowpalwabbit/v_array.h \
+ vowpalwabbit/lda_core.h vowpalwabbit/v_hashmap.h \
+ vowpalwabbit/loss_functions.h vowpalwabbit/network.h \
+ vowpalwabbit/wap.h vowpalwabbit/noop.h
ACLOCAL_AMFLAGS = -I acinclude.d
View
@@ -0,0 +1,10 @@
+To get John's changes:
+ git pull git://github.com/JohnLangford/vowpal_wabbit.git master
+
+To enable use of valgrind, do:
+ ./configure --enable-profiling
+
+To do fancy learning, do
+ --exact_adaptive_norm --power_t 1
+or
+ --exact_adaptive_norm --power_t 0.5
View
@@ -1,2 +1,5 @@
+ezexample: temp2.cc ../vowpalwabbit/libvw.a
+ g++ -g -o $@ $< -L ../vowpalwabbit -l vw -l allreduce -l boost_program_options -l z -l pthread
+
library_example: library_example.cc ../vowpalwabbit/libvw.a
g++ -g -o $@ $< -L ../vowpalwabbit -l vw -l allreduce -l boost_program_options -l z -l pthread
View
@@ -0,0 +1,131 @@
+#ifndef EZEXAMPLE_H
+#define EZEXAMPLE_H
+
+#include <stdio.h>
+#include "../vowpalwabbit/vw.h"
+
+using namespace std;
+typedef uint32_t fid;
+
+struct vw_namespace {
+ char namespace_letter;
+public: vw_namespace(const char c) : namespace_letter(c) {}
+};
+
+class ezexample {
+private:
+ vw*vw_ref;
+ vector<VW::feature_space> *dat;
+ vector<fid> past_seeds;
+ fid current_seed;
+ vector<feature>*current_ns;
+ char str[2];
+ bool pass_empty;
+ string mylabel;
+ ezexample(const ezexample & ex);
+ ezexample & operator=(const ezexample & ex);
+
+public:
+
+ ezexample(vw*this_vw, bool pe=false) {
+ dat = new vector<VW::feature_space>();
+ vw_ref = this_vw;
+ current_seed = 0;
+ current_ns = NULL;
+ str[0] = ' '; str[1] = 0;
+ pass_empty = pe;
+ mylabel = "";
+ }
+
+ ~ezexample() {
+ if (dat != NULL)
+ delete dat;
+ }
+
+ void addns(char c) {
+ str[0] = c;
+ dat->push_back( VW::feature_space(c, vector<feature>()) );
+ current_ns = &( dat->at(dat->size()-1).second );
+ past_seeds.push_back(current_seed);
+ current_seed = VW::hash_space(*vw_ref, str);
+ }
+
+ void remns() {
+ if (dat->size() == 0) {
+ current_seed = 0;
+ current_ns = NULL;
+ } else {
+ current_seed = past_seeds.back();
+ past_seeds.pop_back();
+ dat->pop_back();
+ current_ns = &(dat->back().second);
+ }
+ }
+
+ inline fid hash(string fstr) {
+ return VW::hash_feature(*vw_ref, fstr, current_seed);
+ }
+ inline fid hash(char* fstr) {
+ return VW::hash_feature_cstr(*vw_ref, fstr, current_seed);
+ }
+ inline fid hash(char c, string fstr) {
+ str[0] = c;
+ return VW::hash_feature(*vw_ref, fstr, VW::hash_space(*vw_ref, str));
+ }
+ inline fid hash(char c, char* fstr) {
+ str[0] = c;
+ return VW::hash_feature_cstr(*vw_ref, fstr, VW::hash_space(*vw_ref, str));
+ }
+
+ inline fid addf(fid fint, float val) {
+ if (!current_ns) return 0;
+ feature f = { val, fint };
+ current_ns->push_back(f);
+ return fint;
+ }
+ inline fid addf(fid fint ) { return addf(fint , 1.0); }
+ inline fid addf(string fstr, float val) { return addf(hash(fstr), val); }
+ inline fid addf(string fstr ) { return addf(hash(fstr), 1.0); }
+
+ float predict() {
+ static example* empty_example = VW::read_example(*vw_ref, (char*)"| ");
+ example *ec = VW::import_example(*vw_ref, *dat);
+
+ if (mylabel.length() > 0)
+ VW::parse_example_label(*vw_ref, *ec, mylabel);
+
+ vw_ref->learn(vw_ref, ec);
+ if (pass_empty)
+ vw_ref->learn(vw_ref, empty_example);
+ float pred = ec->final_prediction;
+ VW::finish_example(*vw_ref, ec);
+ return pred;
+ }
+
+ inline ezexample& set_label(string label) { mylabel = label; return *this; }
+ inline ezexample& operator()(fid fint ) { addf(fint, 1.0); return *this; }
+ inline ezexample& operator()(string fstr ) { addf(fstr, 1.0); return *this; }
+ inline ezexample& operator()(const char* fstr ) { addf(fstr, 1.0); return *this; }
+ inline ezexample& operator()(fid fint, float val) { addf(fint, val); return *this; }
+ inline ezexample& operator()(string fstr, float val) { addf(fstr, val); return *this; }
+ inline ezexample& operator()(const char* fstr, float val) { addf(fstr, val); return *this; }
+ inline ezexample& operator()(const vw_namespace&n) { addns(n.namespace_letter); return *this; }
+ inline ezexample& operator--() { remns(); return *this; }
+ inline float operator()() { return predict(); }
+
+
+ void print() {
+ cerr << "ezexample dat->size=" << dat->size() << ", current_seed=" << current_seed << endl;
+ for (size_t i=0; i<dat->size(); i++) {
+ cerr << " namespace(" << dat->at(i).first << "):" << endl;
+ for (size_t j=0; j<dat->at(i).second.size(); j++) {
+ cerr << " " << dat->at(i).second[j].weight_index << "\t: " << dat->at(i).second[j].x << endl;
+ }
+ }
+ }
+};
+
+
+
+
+#endif
@@ -13,9 +13,9 @@ inline feature vw_feature_from_string(vw& v, string fstr, unsigned long seed, fl
int main(int argc, char *argv[])
{
- vw vw = VW::initialize("--hash all -q st --noconstant");
+ vw vw = VW::initialize("--hash all -q st --noconstant -i train.w");
- example *vec2 = VW::read_example(vw, "|s p^the_man w^the w^man |t p^le_homme w^le w^homme");
+ example *vec2 = VW::read_example(vw, "|s p^the_man w^the w^man |t p^un_homme w^un w^homme");
vw.learn(&vw, vec2);
cerr << "p2 = " << vec2->final_prediction << endl;
VW::finish_example(vw, vec2);
View
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include "../vowpalwabbit/vw.h"
+#include "ezexample.h"
+
+using namespace std;
+
+inline feature vw_feature_from_string(vw& v, string fstr, unsigned long seed, float val)
+{
+ uint32_t foo = VW::hash_feature(v, fstr, seed);
+ feature f = { val, foo};
+ return f;
+}
+
+int main(int argc, char *argv[])
+{
+ // INITIALIZE WITH WHATEVER YOU WOULD PUT ON THE VW COMMAND LINE -- THIS READS IN A MODEL FROM train.w
+ vw vw = VW::initialize("--hash all -q st --noconstant -i train.w -t --quiet");
+
+ // HAL'S SPIFFY INTERFACE USING C++ CRAZINESS
+ ezexample ex(&vw, false);
+ ex(vw_namespace('s'))
+ ("p^the_man")
+ ("w^the")
+ ("w^man")
+ (vw_namespace('t'))
+ ("p^le_homme")
+ ("w^le")
+ ("w^homme");
+ cerr << "should be near zero = " << ex() << endl;
+
+ --ex; // remove the most recent namespace
+ ex(vw_namespace('t'))
+ ("p^un_homme")
+ ("w^un")
+ ("w^homme");
+ cerr << "should be near one = " << ex() << endl;
+
+ // AND FINISH UP
+ vw.finish(&vw);
+}
+
+ /*
+
+ */
+
+ /*
+ // JOHN'S CLUNKY INTERFACE USING STRINGS
+ example *vec1 = VW::read_example(vw, (char*)"|s p^the_man w^the w^man |t p^un_homme w^un w^homme");
+ vw.learn(&vw, vec1);
+ cerr << "p1 = " << vec1->final_prediction << endl;
+ VW::finish_example(vw, vec1);
+
+ example *vec2 = VW::read_example(vw, (char*)"|s p^the_man w^the w^man |t p^le_homme w^le w^homme");
+ vw.learn(&vw, vec2);
+ cerr << "p2 = " << vec2->final_prediction << endl;
+ VW::finish_example(vw, vec2);
+
+ // JOHN'S CLUNKY INTERFACE USING VECTORS
+ vector< VW::feature_space > ec_info;
+ vector<feature> s_features, t_features;
+ uint32_t s_hash = VW::hash_space(vw, "s");
+ uint32_t t_hash = VW::hash_space(vw, "t");
+ s_features.push_back( vw_feature_from_string(vw, "p^the_man", s_hash, 1.0) );
+ s_features.push_back( vw_feature_from_string(vw, "w^the", s_hash, 1.0) );
+ s_features.push_back( vw_feature_from_string(vw, "w^man", s_hash, 1.0) );
+ t_features.push_back( vw_feature_from_string(vw, "p^le_homme", t_hash, 1.0) );
+ t_features.push_back( vw_feature_from_string(vw, "w^le", t_hash, 1.0) );
+ t_features.push_back( vw_feature_from_string(vw, "w^homme", t_hash, 1.0) );
+ ec_info.push_back( VW::feature_space('s', s_features) );
+ ec_info.push_back( VW::feature_space('t', t_features) );
+ example* vec3 = VW::import_example(vw, ec_info);
+ vw.learn(&vw, vec3);
+ cerr << "p3 = " << vec3->final_prediction << endl;
+ VW::finish_example(vw, vec3);
+*/
View
@@ -0,0 +1,12 @@
+#include <iostream>
+#include <vector>
+
+using namespace std;
+
+int main(int argc, char**argv) {
+ vector< pair< char, vector<int> > > u = vector< pair< char, vector<int> > >();
+ u.push_back( pair< char, vector<int> >('a', vector<int>()) );
+ vector<int>*v = &(u[0].second);
+ v->push_back(0);
+ cout << "i want this to say one: " << u[0].second.size() << endl;
+}
View
@@ -0,0 +1,4 @@
+1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme
+0 |s p^the_man w^the w^man |t p^le_homme w^le w^homme
+0 |s p^a_man w^a w^man |t p^un_homme w^un w^homme
+1 |s p^a_man w^a w^man |t p^le_homme w^le w^homme
View
@@ -0,0 +1,5 @@
+#!/bin/bash
+rm -f train.cache train.w
+../vowpalwabbit/vw -c -d train -f train.w -q st --passes 100 --hash all --noconstant
+../vowpalwabbit/vw -t -d train -i train.w -p train.pred --noconstant
+
View
Binary file not shown.
Oops, something went wrong.

0 comments on commit 258a017

Please sign in to comment.