diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..7b48638 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,16 @@ +Cufflinks authors +Primary contact + +Cole Trapnell designed and wrote Cufflinks and Cuffdiff, with substantial technical input +from Geo Pertea, Brian Williams, Ali Mortazavi, Jeltje van Baren, Steven Salzberg, Barbara Wold, +and Lior Pachter. Geo Pertea wrote Cuffcompare. Adam Roberts made substantial improvements to +the abundance estimation model used by Cufflinks and Cuffdiff. + +"Tuxedo" Websites: + Cufflinks: http://cufflinks.cbcb.umd.edu + TopHat: http://tophat.cbcb.umd.edu + Bowtie: http://bowtie-bio.sf.net + +As of version 1.0, Cufflinks depends on and includes LOCFIT, a regression package originally written +by Catherine Loader and Jiayang Sun. Some modifications were made to LOCFIT. Modified source +for LOCFIT lives in src/locfit. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..69f08e2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +Copyright (C) 2003-2009 Cole Trapnell et al + +=========================================================================== +Boost Software License, Version 1.0 +=========================================================================== + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..83cb1b8 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,8 @@ + +ALWAYS_BUILT = src +SUBDIRS = $(ALWAYS_BUILT) +DIST_SUBDIRS = $(ALWAYS_BUILT) + +EXTRA_DIST = LICENSE + +.PHONY: FORCE diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..bc5d3c8 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,612 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = . +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/config.h.in \ + $(top_srcdir)/configure AUTHORS build-aux/config.guess \ + build-aux/config.sub build-aux/depcomp build-aux/install-sh \ + build-aux/missing +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/ax_boost_base.m4 \ + $(top_srcdir)/ax_boost_thread.m4 $(top_srcdir)/ax_bam.m4 \ + $(top_srcdir)/ax_check_zlib.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno configure.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-exec-recursive install-info-recursive \ + install-recursive installcheck-recursive installdirs-recursive \ + pdf-recursive ps-recursive uninstall-info-recursive \ + uninstall-recursive +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + { test ! -d $(distdir) \ + || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -fr $(distdir); }; } +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +distuninstallcheck_listfiles = find . -type f -print +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BAM_CPPFLAGS = @BAM_CPPFLAGS@ +BAM_LDFLAGS = @BAM_LDFLAGS@ +BAM_LIB = @BAM_LIB@ +BOOST_CPPFLAGS = @BOOST_CPPFLAGS@ +BOOST_LDFLAGS = @BOOST_LDFLAGS@ +BOOST_THREAD_LIB = @BOOST_THREAD_LIB@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PYTHON = @PYTHON@ +PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@ +PYTHON_PLATFORM = @PYTHON_PLATFORM@ +PYTHON_PREFIX = @PYTHON_PREFIX@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +ZLIB = @ZLIB@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pkgpyexecdir = @pkgpyexecdir@ +pkgpythondir = @pkgpythondir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +pyexecdir = @pyexecdir@ +pythondir = @pythondir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +ALWAYS_BUILT = src +SUBDIRS = $(ALWAYS_BUILT) +DIST_SUBDIRS = $(ALWAYS_BUILT) +EXTRA_DIST = LICENSE +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +am--refresh: + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \ + cd $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) + +config.h: stamp-h1 + @if test ! -f $@; then \ + rm -f stamp-h1; \ + $(MAKE) stamp-h1; \ + else :; fi + +stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status config.h +$(srcdir)/config.h.in: $(am__configure_deps) + cd $(top_srcdir) && $(AUTOHEADER) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f config.h stamp-h1 +uninstall-info-am: + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +mostlyclean-recursive clean-recursive distclean-recursive \ +maintainer-clean-recursive: + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + $(am__remove_distdir) + mkdir $(distdir) + $(mkdir_p) $(distdir)/build-aux + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(mkdir_p) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + distdir) \ + || exit 1; \ + fi; \ + done + -find $(distdir) -type d ! -perm -755 -exec chmod a+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 + $(am__remove_distdir) + +dist-tarZ: distdir + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__remove_distdir) + +dist-shar: distdir + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__remove_distdir) + +dist dist-all: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir); chmod a+w $(distdir) + mkdir $(distdir)/_build + mkdir $(distdir)/_inst + chmod a-w $(distdir) + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && cd $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck + $(am__remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e '1{h;s/./=/g;p;x;}' -e '$${p;x;}' +distuninstallcheck: + @cd $(distuninstallcheck_dir) \ + && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile config.h +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-recursive + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-info-am + +uninstall-info: uninstall-info-recursive + +.PHONY: $(RECURSIVE_TARGETS) CTAGS GTAGS all all-am am--refresh check \ + check-am clean clean-generic clean-recursive ctags \ + ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-shar \ + dist-tarZ dist-zip distcheck distclean distclean-generic \ + distclean-hdr distclean-recursive distclean-tags \ + distcleancheck distdir distuninstallcheck dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-exec install-exec-am install-info \ + install-info-am install-man install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic maintainer-clean-recursive \ + mostlyclean mostlyclean-generic mostlyclean-recursive pdf \ + pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \ + uninstall-info-am + + +.PHONY: FORCE +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/README b/README index e69de29..b3cfe8e 100644 --- a/README +++ b/README @@ -0,0 +1,74 @@ +CUFFLINKS +---------------------------- +Cufflinks is a reference-guided assembler for RNA-Seq experiments. It +simultaneously assembles transcripts from reads and estimates their relative +abundances, without using a reference annotation. The software expects as +input RNA-Seq read alignments in SAM format (http://samtools.sourceforge.net). + +Here's an example spliced read alignment record: +s6.25mer.txt-913508 16 chr1 4482736 255 14M431N11M * 0 0 CAAGATGCTAGGCAAGTCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:- + +This record includes a custom tag used by Cufflinks to determine the strand +of the mRNA from which this read originated. Often, RNA-Seq experiments lose +strand information, and so reads will map to either strand of the genome. +However, strandedness of spliced alignments can often be inferred from the +orientation of consensus splice sites, and Cufflinks requires that spliced +aligments have the custom strand tag XS, which has SAM attribute type "A", and +can take values of "+" or "-". If your RNA-Seq protocol is strand specific, +including this tag for all alignments, including unspliced alignments, will +improve the assembly quality. + +The SAM records MUST BE SORTED by reference coordinate, like so: + +sort -k 3,3 -k 4,4n hits.sam + +The program is fully threaded, and when running with multiple threads, should +be run on a machine with plenty of RAM. 4 GB per thread is probably reasonable +for most experiments. Since many experiments feature a handful of genes that +are very abundantly transcribed, Cufflinks will spend much of its time +assembling" a few genes. When using more than one thread, Cufflinks may +appear to "hang" while these genes are being assembled. + +Cufflinks assumes that library fragment lengths are size selected and normally +distributed. When using paired end RNA-Seq reads, you must take care to supply +Cufflinks with the mean and variance on the inner distances between mate +pairs. For the moment, Cufflinks doesn't support assembling mixtures of paired +end reads from different fragment size distributions. Mixtures of single +ended reads (of varying lengths) with paired ends are supported. + +Cufflinks also assumes that the donor does not contain major structural +variations with respect to the reference. Tumor genomes are often highly +rearranged, and while Cufflinks may eventually identify gene fusions and +gracefully handle genome breakpoints, users are encouraged to be careful when +using Cufflinks on tumor RNA-Seq experiments. + +The full manual may be found at http://cufflinks.cbcb.umd.edu + +CUFFCOMPARE +---------------------------- +Please see http://cufflinks.cbcb.umd.edu/manual.html + +REQUIREMENTS +--------------------------- + +Cufflinks is a standalone tool that requires gcc 4.0 or greater, and runs on +Linux and OS X. It depends on Boost (http://www.boost.org) version 1.38 or +higher. + +REFERENCES +--------------------------- +Cufflinks builds on many ideas, including some +proposed in the following papers: + +Ali Mortazavi, Brian A Williams, Kenneth McCue, Lorian Schaeffer and Barbara +Wold, "Mapping and quantifying mammalian transcriptomes by RNA-Seq",Nature +Methods, volume 5, 621 - 628 (2008) + +Hui Jiang and Wing Hung Wong, "Statistical Inferences for isoform expression", +Bioinformatics, 2009 25(8):1026-1032 + +Nicholas Eriksson, Lior Pachter, Yumi Mitsuya, Soo-Yon Rhee, Chunlin Wang, +Baback Gharizadeh, Mostafa Ronaghi, Robert W. Shafer, Niko Beerenwinkel, "Viral +population estimation using pyrosequencing", PLoS Computational Biology, +4(5):e1000074 + diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..7f99a5a --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,1031 @@ +# generated automatically by aclocal 1.9.6 -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, +# 2005 Free Software Foundation, Inc. +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +# Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.9"]) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION so it can be traced. +# This function is AC_REQUIREd by AC_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], + [AM_AUTOMAKE_VERSION([1.9.6])]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to +# `$srcdir', `$srcdir/..', or `$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is `.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 7 + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ(2.52)dnl + ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE]) +AC_SUBST([$1_FALSE]) +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + + +# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 8 + +# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "GCJ", or "OBJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +ifelse([$1], CC, [depcc="$CC" am_compiler_list=], + [$1], CXX, [depcc="$CXX" am_compiler_list=], + [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE(dependency-tracking, +[ --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH]) +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +#serial 3 + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[for mf in $CONFIG_FILES; do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named `Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # So let's grep whole file. + if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then + dirpart=`AS_DIRNAME("$mf")` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running `make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n 's/^U = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`AS_DIRNAME(["$file"])` + AS_MKDIR_P([$dirpart/$fdir]) + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done +done +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking +# is enabled. FIXME. This creates each `.P' file that we will +# need in order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) +]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 12 + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.58])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +# test to see if srcdir already configured +if test "`cd $srcdir && pwd`" != "`pwd`" && + test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) + AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) +AM_MISSING_PROG(AUTOCONF, autoconf) +AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) +AM_MISSING_PROG(AUTOHEADER, autoheader) +AM_MISSING_PROG(MAKEINFO, makeinfo) +AM_PROG_INSTALL_SH +AM_PROG_INSTALL_STRIP +AC_REQUIRE([AM_PROG_MKDIR_P])dnl +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES(CC)], + [define([AC_PROG_CC], + defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES(CXX)], + [define([AC_PROG_CXX], + defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl +]) +]) + + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $1 | $1:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +install_sh=${install_sh-"$am_aux_dir/install-sh"} +AC_SUBST(install_sh)]) + +# Copyright (C) 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 2 + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 3 + +# AM_MAKE_INCLUDE() +# ----------------- +# Check to see how make treats includes. +AC_DEFUN([AM_MAKE_INCLUDE], +[am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo done +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +AC_MSG_CHECKING([for style of include used by $am_make]) +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# We grep out `Entering directory' and `Leaving directory' +# messages which can occur if `w' ends up in MAKEFLAGS. +# In particular we don't look at `^make:' because GNU make might +# be invoked under some other name (usually "gmake"), in which +# case it prints its new name instead of `make'. +if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then + am__include=include + am__quote= + _am_result=GNU +fi +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then + am__include=.include + am__quote="\"" + _am_result=BSD + fi +fi +AC_SUBST([am__include]) +AC_SUBST([am__quote]) +AC_MSG_RESULT([$_am_result]) +rm -f confinc confmf +]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 4 + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it supports --run. +# If it does, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" +# Use eval to expand $SHELL +if eval "$MISSING --run true"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + AC_MSG_WARN([`missing' script is too old or missing]) +fi +]) + +# Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_MKDIR_P +# --------------- +# Check whether `mkdir -p' is supported, fallback to mkinstalldirs otherwise. +# +# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories +# created by `make install' are always world readable, even if the +# installer happens to have an overly restrictive umask (e.g. 077). +# This was a mistake. There are at least two reasons why we must not +# use `-m 0755': +# - it causes special bits like SGID to be ignored, +# - it may be too restrictive (some setups expect 775 directories). +# +# Do not use -m 0755 and let people choose whatever they expect by +# setting umask. +# +# We cannot accept any implementation of `mkdir' that recognizes `-p'. +# Some implementations (such as Solaris 8's) are not thread-safe: if a +# parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c' +# concurrently, both version can detect that a/ is missing, but only +# one can create it and the other will error out. Consequently we +# restrict ourselves to GNU make (using the --version option ensures +# this.) +AC_DEFUN([AM_PROG_MKDIR_P], +[if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then + # We used to keeping the `.' as first argument, in order to + # allow $(mkdir_p) to be used without argument. As in + # $(mkdir_p) $(somedir) + # where $(somedir) is conditionally defined. However this is wrong + # for two reasons: + # 1. if the package is installed by a user who cannot write `.' + # make install will fail, + # 2. the above comment should most certainly read + # $(mkdir_p) $(DESTDIR)$(somedir) + # so it does not work when $(somedir) is undefined and + # $(DESTDIR) is not. + # To support the latter case, we have to write + # test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir), + # so the `.' trick is pointless. + mkdir_p='mkdir -p --' +else + # On NextStep and OpenStep, the `mkdir' command does not + # recognize any option. It will interpret all options as + # directories to create, and then abort because `.' already + # exists. + for d in ./-p ./--version; + do + test -d $d && rmdir $d + done + # $(mkinstalldirs) is defined by Automake if mkinstalldirs exists. + if test -f "$ac_aux_dir/mkinstalldirs"; then + mkdir_p='$(mkinstalldirs)' + else + mkdir_p='$(install_sh) -d' + fi +fi +AC_SUBST([mkdir_p])]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 3 + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# ------------------------------ +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), 1)]) + +# _AM_SET_OPTIONS(OPTIONS) +# ---------------------------------- +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# --------------------------------------------------------------------------- +# Adds support for distributing Python modules and packages. To +# install modules, copy them to $(pythondir), using the python_PYTHON +# automake variable. To install a package with the same name as the +# automake package, install to $(pkgpythondir), or use the +# pkgpython_PYTHON automake variable. +# +# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as +# locations to install python extension modules (shared libraries). +# Another macro is required to find the appropriate flags to compile +# extension modules. +# +# If your package is configured with a different prefix to python, +# users will have to add the install directory to the PYTHONPATH +# environment variable, or create a .pth file (see the python +# documentation for details). +# +# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will +# cause an error if the version of python installed on the system +# doesn't meet the requirement. MINIMUM-VERSION should consist of +# numbers and dots only. +AC_DEFUN([AM_PATH_PYTHON], + [ + dnl Find a Python interpreter. Python versions prior to 1.5 are not + dnl supported because the default installation locations changed from + dnl $prefix/lib/site-python in 1.4 to $prefix/lib/python1.5/site-packages + dnl in 1.5. + m4_define_default([_AM_PYTHON_INTERPRETER_LIST], + [python python2 python2.5 python2.4 python2.3 python2.2 dnl +python2.1 python2.0 python1.6 python1.5]) + + m4_if([$1],[],[ + dnl No version check is needed. + # Find any Python interpreter. + if test -z "$PYTHON"; then + AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :) + fi + am_display_PYTHON=python + ], [ + dnl A version check is needed. + if test -n "$PYTHON"; then + # If the user set $PYTHON, use it and don't search something else. + AC_MSG_CHECKING([whether $PYTHON version >= $1]) + AM_PYTHON_CHECK_VERSION([$PYTHON], [$1], + [AC_MSG_RESULT(yes)], + [AC_MSG_ERROR(too old)]) + am_display_PYTHON=$PYTHON + else + # Otherwise, try each interpreter until we find one that satisfies + # VERSION. + AC_CACHE_CHECK([for a Python interpreter with version >= $1], + [am_cv_pathless_PYTHON],[ + for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do + test "$am_cv_pathless_PYTHON" = none && break + AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break]) + done]) + # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON. + if test "$am_cv_pathless_PYTHON" = none; then + PYTHON=: + else + AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON]) + fi + am_display_PYTHON=$am_cv_pathless_PYTHON + fi + ]) + + if test "$PYTHON" = :; then + dnl Run any user-specified action, or abort. + m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])]) + else + + dnl Query Python for its version number. Getting [:3] seems to be + dnl the best way to do this; it's what "site.py" does in the standard + dnl library. + + AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version], + [am_cv_python_version=`$PYTHON -c "import sys; print sys.version[[:3]]"`]) + AC_SUBST([PYTHON_VERSION], [$am_cv_python_version]) + + dnl Use the values of $prefix and $exec_prefix for the corresponding + dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made + dnl distinct variables so they can be overridden if need be. However, + dnl general consensus is that you shouldn't need this ability. + + AC_SUBST([PYTHON_PREFIX], ['${prefix}']) + AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}']) + + dnl At times (like when building shared libraries) you may want + dnl to know which OS platform Python thinks this is. + + AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform], + [am_cv_python_platform=`$PYTHON -c "import sys; print sys.platform"`]) + AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform]) + + + dnl Set up 4 directories: + + dnl pythondir -- where to install python scripts. This is the + dnl site-packages directory, not the python standard library + dnl directory like in previous automake betas. This behavior + dnl is more consistent with lispdir.m4 for example. + dnl Query distutils for this directory. distutils does not exist in + dnl Python 1.5, so we fall back to the hardcoded directory if it + dnl doesn't work. + AC_CACHE_CHECK([for $am_display_PYTHON script directory], + [am_cv_python_pythondir], + [am_cv_python_pythondir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(0,0,prefix='$PYTHON_PREFIX')" 2>/dev/null || + echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`]) + AC_SUBST([pythondir], [$am_cv_python_pythondir]) + + dnl pkgpythondir -- $PACKAGE directory under pythondir. Was + dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is + dnl more consistent with the rest of automake. + + AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE]) + + dnl pyexecdir -- directory for installing python extension modules + dnl (shared libraries) + dnl Query distutils for this directory. distutils does not exist in + dnl Python 1.5, so we fall back to the hardcoded directory if it + dnl doesn't work. + AC_CACHE_CHECK([for $am_display_PYTHON extension module directory], + [am_cv_python_pyexecdir], + [am_cv_python_pyexecdir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(1,0,prefix='$PYTHON_EXEC_PREFIX')" 2>/dev/null || + echo "${PYTHON_EXEC_PREFIX}/lib/python${PYTHON_VERSION}/site-packages"`]) + AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir]) + + dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE) + + AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE]) + + dnl Run any user-specified action. + $2 + fi + +]) + + +# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +# --------------------------------------------------------------------------- +# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION. +# Run ACTION-IF-FALSE otherwise. +# This test uses sys.hexversion instead of the string equivalent (first +# word of sys.version), in order to cope with versions such as 2.2c1. +# hexversion has been introduced in Python 1.5.2; it's probably not +# worth to support older versions (1.5.1 was released on October 31, 1998). +AC_DEFUN([AM_PYTHON_CHECK_VERSION], + [prog="import sys, string +# split strings by '.' and convert to numeric. Append some zeros +# because we need at least 4 digits for the hex conversion. +minver = map(int, string.split('$2', '.')) + [[0, 0, 0]] +minverhex = 0 +for i in xrange(0, 4): minverhex = (minverhex << 8) + minver[[i]] +sys.exit(sys.hexversion < minverhex)" + AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 4 + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Just in case +sleep 1 +echo timestamp > conftest.file +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftest.file` + fi + rm -f conftest.file + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken +alias in your environment]) + fi + + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT(yes)]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor `install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in `make install-strip', and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be `maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 2 + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of `v7', `ustar', or `pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. +AM_MISSING_PROG([AMTAR], [tar]) +m4_if([$1], [v7], + [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'], + [m4_case([$1], [ustar],, [pax],, + [m4_fatal([Unknown tar format])]) +AC_MSG_CHECKING([how to create a $1 tar archive]) +# Loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' +_am_tools=${am_cv_prog_tar_$1-$_am_tools} +# Do not fold the above two line into one, because Tru64 sh and +# Solaris sh will not grok spaces in the rhs of `-'. +for _am_tool in $_am_tools +do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; + do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi +done +rm -rf conftest.dir + +AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) +AC_MSG_RESULT([$am_cv_prog_tar_$1])]) +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + diff --git a/ax_bam.m4 b/ax_bam.m4 new file mode 100644 index 0000000..7d463b7 --- /dev/null +++ b/ax_bam.m4 @@ -0,0 +1,203 @@ +# SYNOPSIS +# +# AX_BAM +# +# DESCRIPTION +# +# Test for the BAM libraries of a particular version (or newer) +# +# If no path to the installed bam library is given the macro searchs +# under /usr, /usr/local, /opt and /opt/local and evaluates the +# $BAM_ROOT environment variable. +# Adapted from AX_BOOST_BASE +# +# This macro calls: +# +# AC_SUBST(BAM_CPPFLAGS) / AC_SUBST(BAM_LDFLAGS) +# +# And sets: +# +# HAVE_BAM +# +# LICENSE +# +# Copyright (c) 2010 Cole Trapnell +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. + +AC_DEFUN([AX_BAM], +[ +AC_ARG_WITH([bam], + AS_HELP_STRING([--with-bam@<:@=DIR@:>@], [use BAM libraries (default is yes) - it is possible to specify the root directory for BAM (optional)]), + [ + if test "$withval" = "no"; then + want_bam="no" + elif test "$withval" = "yes"; then + want_bam="yes" + ac_bam_path="" + else + want_bam="yes" + ac_bam_path="$withval" + fi + ], + [want_bam="yes"]) + + +AC_ARG_WITH([bam-libdir], + AS_HELP_STRING([--with-bam-libdir=LIB_DIR], + [Force given directory for bam libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your bam libraries are located.]), + [ + if test -d $withval + then + ac_bam_lib_path="$withval" + else + AC_MSG_ERROR(--with-bam-libdir expected directory name) + fi + ], + [ac_bam_lib_path=""] +) + +if test "x$want_bam" = "xyes"; then +# bam_lib_version_req=ifelse([$1], ,1.20.0,$1) +# bam_lib_version_req_shorten=`expr $bam_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` +# bam_lib_version_req_major=`expr $bam_lib_version_req : '\([[0-9]]*\)'` +# bam_lib_version_req_minor=`expr $bam_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` +# bam_lib_version_req_sub_minor=`expr $bam_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` +# if test "x$bam_lib_version_req_sub_minor" = "x" ; then +# bam_lib_version_req_sub_minor="0" +# fi +# WANT_BAM_VERSION=`expr $bam_lib_version_req_major \* 100000 \+ $bam_lib_version_req_minor \* 100 \+ $bam_lib_version_req_sub_minor` + AC_MSG_CHECKING(for bamlib) + succeeded=no + + dnl first we check the system location for bam libraries + if test "$ac_bam_path" != ""; then + BAM_LDFLAGS="-L$ac_bam_path/lib" + BAM_CPPFLAGS="-I$ac_bam_path/include" + else + for ac_bam_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_bam_path_tmp/include/bam" && test -r "$ac_bam_path_tmp/include/bam"; then + BAM_LDFLAGS="-L$ac_bam_path_tmp/lib" + BAM_CPPFLAGS="-I$ac_bam_path_tmp/include" + break; + fi + done + fi + + dnl overwrite ld flags if we have required special directory with + dnl --with-bam-libdir parameter + if test "$ac_bam_lib_path" != ""; then + BAM_LDFLAGS="-L$ac_bam_lib_path" + fi + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BAM_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BAM_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include + ]], [[ + ]])],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + + dnl if we found no bam with system layout we search for bam libraries + dnl built and installed without the --layout=system option or for a staged(not installed) version + if test "x$succeeded" != "xyes"; then + _version=0 + if test "$ac_bam_path" != ""; then + if test -d "$ac_bam_path" && test -r "$ac_bam_path"; then + for i in `ls -d $ac_bam_path/include/bam-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_bam_path##" | sed 's/\/include\/bam-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + fi + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BAM_CPPFLAGS="-I$ac_bam_path/include/bam-$VERSION_UNDERSCORE" + done + fi + else + for ac_bam_path in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_bam_path" && test -r "$ac_bam_path"; then + for i in `ls -d $ac_bam_path/include/bam-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_bam_path##" | sed 's/\/include\/bam-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + best_path=$ac_bam_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BAM_CPPFLAGS="-I$best_path/include/bam-$VERSION_UNDERSCORE" + if test "$ac_bam_lib_path" = "" + then + BAM_LDFLAGS="-L$best_path/lib" + fi + + if test "x$BAM_ROOT" != "x"; then + if test -d "$BAM_ROOT" && test -r "$BAM_ROOT" && test -d "$BAM_ROOT/stage/lib" && test -r "$BAM_ROOT/stage/lib"; then + version_dir=`expr //$BAM_ROOT : '.*/\(.*\)'` + stage_version=`echo $version_dir | sed 's/bam_//' | sed 's/_/./g'` + stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` + V_CHECK=`expr $stage_version_shorten \>\= $_version` + if test "$V_CHECK" = "1" -a "$ac_bam_lib_path" = "" ; then + AC_MSG_NOTICE(We will use a staged bam library from $BAM_ROOT) + BAM_CPPFLAGS="-I$BAM_ROOT" + BAM_LDFLAGS="-L$BAM_ROOT/stage/lib" + fi + fi + fi + fi + + CPPFLAGS="$CPPFLAGS $BAM_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BAM_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include + ]], [[ + ]])],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + fi + + if test "$succeeded" != "yes" ; then + if test "$_version" = "0" ; then + AC_MSG_ERROR([[We could not detect the bam libraries (version $bam_lib_version_req_shorten or higher). If you have a staged bam library (still not installed) please specify \$BAM_ROOT in your environment and do not give a PATH to --with-bam option. If you are sure you have bam installed, then check your version number looking in . See http://randspringer.de/bam for more documentation.]]) + else + AC_MSG_NOTICE([Your bam libraries seem too old (version $_version).]) + fi + else + BAM_LIB="-lbam" + AC_SUBST(BAM_CPPFLAGS) + AC_SUBST(BAM_LDFLAGS) + AC_SUBST(BAM_LIB) + AC_DEFINE(HAVE_BAM,,[define if the BAM library is available]) + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" +fi + +]) diff --git a/ax_boost_base.m4 b/ax_boost_base.m4 new file mode 100644 index 0000000..e1504ba --- /dev/null +++ b/ax_boost_base.m4 @@ -0,0 +1,219 @@ +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_boost_base.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_BASE([MINIMUM-VERSION]) +# +# DESCRIPTION +# +# Test for the Boost C++ libraries of a particular version (or newer) +# +# If no path to the installed boost library is given the macro searchs +# under /usr, /usr/local, /opt and /opt/local and evaluates the +# $BOOST_ROOT environment variable. Further documentation is available at +# . +# +# This macro calls: +# +# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) +# +# And sets: +# +# HAVE_BOOST +# +# LICENSE +# +# Copyright (c) 2008 Thomas Porschberg +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. + +AC_DEFUN([AX_BOOST_BASE], +[ +AC_ARG_WITH([boost], + AS_HELP_STRING([--with-boost@<:@=DIR@:>@], [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ac_boost_path="" + else + want_boost="yes" + ac_boost_path="$withval" + fi + ], + [want_boost="yes"]) + + +AC_ARG_WITH([boost-libdir], + AS_HELP_STRING([--with-boost-libdir=LIB_DIR], + [Force given directory for boost libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), + [ + if test -d $withval + then + ac_boost_lib_path="$withval" + else + AC_MSG_ERROR(--with-boost-libdir expected directory name) + fi + ], + [ac_boost_lib_path=""] +) + +if test "x$want_boost" = "xyes"; then + boost_lib_version_req=ifelse([$1], ,1.20.0,$1) + boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` + boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` + boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` + boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` + if test "x$boost_lib_version_req_sub_minor" = "x" ; then + boost_lib_version_req_sub_minor="0" + fi + WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` + AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) + succeeded=no + + dnl first we check the system location for boost libraries + dnl this location ist chosen if boost libraries are installed with the --layout=system option + dnl or if you install boost with RPM + if test "$ac_boost_path" != ""; then + BOOST_LDFLAGS="-L$ac_boost_path/lib" + BOOST_CPPFLAGS="-I$ac_boost_path/include" + else + for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then + BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib" + BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" + break; + fi + done + fi + + dnl overwrite ld flags if we have required special directory with + dnl --with-boost-libdir parameter + if test "$ac_boost_lib_path" != ""; then + BOOST_LDFLAGS="-L$ac_boost_lib_path" + fi + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include + ]], [[ + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + ]])],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + + + + dnl if we found no boost with system layout we search for boost libraries + dnl built and installed without the --layout=system option or for a staged(not installed) version + if test "x$succeeded" != "xyes"; then + _version=0 + if test "$ac_boost_path" != ""; then + if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then + for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + fi + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" + done + fi + else + for ac_boost_path in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then + for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + best_path=$ac_boost_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" + if test "$ac_boost_lib_path" = "" + then + BOOST_LDFLAGS="-L$best_path/lib" + fi + + if test "x$BOOST_ROOT" != "x"; then + if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then + version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` + stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` + stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` + V_CHECK=`expr $stage_version_shorten \>\= $_version` + if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then + AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) + BOOST_CPPFLAGS="-I$BOOST_ROOT" + BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib" + fi + fi + fi + fi + + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include + ]], [[ + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + ]])],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + fi + + if test "$succeeded" != "yes" ; then + if test "$_version" = "0" ; then + AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) + else + AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) + fi + else + AC_SUBST(BOOST_CPPFLAGS) + AC_SUBST(BOOST_LDFLAGS) + AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" +fi + +]) diff --git a/ax_boost_thread.m4 b/ax_boost_thread.m4 new file mode 100644 index 0000000..d1d42f6 --- /dev/null +++ b/ax_boost_thread.m4 @@ -0,0 +1,143 @@ +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_boost_thread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_THREAD +# +# DESCRIPTION +# +# Test for Thread library from the Boost C++ libraries. The macro requires +# a preceding call to AX_BOOST_BASE. Further documentation is available at +# . +# +# This macro calls: +# +# AC_SUBST(BOOST_THREAD_LIB) +# +# And sets: +# +# HAVE_BOOST_THREAD +# +# LICENSE +# +# Copyright (c) 2009 Thomas Porschberg +# Copyright (c) 2009 Michael Tindal +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. + +AC_DEFUN([AX_BOOST_THREAD], +[ + AC_ARG_WITH([boost-thread], + AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@], + [use the Thread library from boost - it is possible to specify a certain library for the linker + e.g. --with-boost-thread=boost_thread-gcc-mt ]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ax_boost_user_thread_lib="" + else + want_boost="yes" + echo "using $withval" + ax_boost_user_thread_lib="$withval" + fi + ], + [want_boost="yes"] + ) + + if test "x$want_boost" = "xyes"; then + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([AC_CANONICAL_BUILD]) + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_CACHE_CHECK(whether the Boost::Thread library is available, + ax_cv_boost_thread, + [AC_LANG_PUSH([C++]) + CXXFLAGS_SAVE=$CXXFLAGS + + if test "x$build_os" = "xsolaris" ; then + CXXFLAGS="-pthreads $CXXFLAGS" + elif test "x$build_os" = "xming32" ; then + CXXFLAGS="-mthreads $CXXFLAGS" + else + CXXFLAGS="-pthread $CXXFLAGS" + fi + AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], + [[boost::thread_group thrds; + return 0;]]), + ax_cv_boost_thread=yes, ax_cv_boost_thread=no) + CXXFLAGS=$CXXFLAGS_SAVE + AC_LANG_POP([C++]) + ]) + if test "x$ax_cv_boost_thread" = "xyes"; then + if test "x$build_os" = "xsolaris" ; then + BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS" + elif test "x$build_os" = "xming32" ; then + BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS" + else + BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS" + fi + + AC_SUBST(BOOST_CPPFLAGS) + + AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available]) + BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` + + LDFLAGS_SAVE=$LDFLAGS + case "x$build_os" in + *bsd* ) + LDFLAGS="-pthread $LDFLAGS" + break; + ;; + esac + if test "x$ax_boost_user_thread_lib" = "x"; then + for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], + [link_thread="no"]) + done + if test "x$link_thread" != "xyes"; then + for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], + [link_thread="no"]) + done + fi + + else + BOOST_THREAD_LIB="$ax_boost_user_thread_lib"; + AC_SUBST(BOOST_THREAD_LIB) + link_thread="yes"; + + + fi + if test "x$link_thread" = "xno"; then + AC_MSG_ERROR(Could not link against $ax_lib !) + else + case "x$build_os" in + *bsd* ) + BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS" + break; + ;; + esac + + fi + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + fi +]) diff --git a/ax_check_zlib.m4 b/ax_check_zlib.m4 new file mode 100644 index 0000000..012023c --- /dev/null +++ b/ax_check_zlib.m4 @@ -0,0 +1,126 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_zlib.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_ZLIB() +# +# DESCRIPTION +# +# This macro searches for an installed zlib library. If nothing was +# specified when calling configure, it searches first in /usr/local and +# then in /usr. If the --with-zlib=DIR is specified, it will try to find +# it in DIR/include/zlib.h and DIR/lib/libz.a. If --without-zlib is +# specified, the library is not searched at all. +# +# If either the header file (zlib.h) or the library (libz) is not found, +# the configuration exits on error, asking for a valid zlib installation +# directory or --without-zlib. +# +# The macro defines the symbol HAVE_LIBZ if the library is found. You +# should use autoheader to include a definition for this symbol in a +# config.h file. Sample usage in a C/C++ source is as follows: +# +# #ifdef HAVE_LIBZ +# #include +# #endif /* HAVE_LIBZ */ +# +# LICENSE +# +# Copyright (c) 2008 Loic Dachary +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 7 + +AU_ALIAS([CHECK_ZLIB], [AX_CHECK_ZLIB]) +AC_DEFUN([AX_CHECK_ZLIB], +# +# Handle user hints +# +[AC_MSG_CHECKING(if zlib is wanted) +AC_ARG_WITH(zlib, +[ --with-zlib=DIR root directory path of zlib installation [defaults to + /usr/local or /usr if not found in /usr/local] + --without-zlib to disable zlib usage completely], +[if test "$withval" != no ; then + AC_MSG_RESULT(yes) + if test -d "$withval" + then + ZLIB_HOME="$withval" + else + AC_MSG_WARN([Sorry, $withval does not exist, checking usual places]) + fi +else + AC_MSG_RESULT(no) +fi], +[AC_MSG_RESULT(yes)]) + +ZLIB_HOME=/usr/local +if test ! -f "${ZLIB_HOME}/include/zlib.h" +then + ZLIB_HOME=/usr +fi + +# +# Locate zlib, if wanted +# +if test -n "${ZLIB_HOME}" +then + ZLIB_OLD_LDFLAGS=$LDFLAGS + ZLIB_OLD_CPPFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -L${ZLIB_HOME}/lib" + CPPFLAGS="$CPPFLAGS -I${ZLIB_HOME}/include" + AC_LANG_SAVE + AC_LANG_C + AC_CHECK_LIB(z, inflateEnd, [zlib_cv_libz=yes], [zlib_cv_libz=no]) + AC_CHECK_HEADER(zlib.h, [zlib_cv_zlib_h=yes], [zlib_cv_zlib_h=no]) + AC_LANG_RESTORE + if test "$zlib_cv_libz" = "yes" -a "$zlib_cv_zlib_h" = "yes" + then + # + # If both library and header were found, use them + # + AC_CHECK_LIB(z, inflateEnd) + AC_MSG_CHECKING(zlib in ${ZLIB_HOME}) + AC_MSG_RESULT(ok) + ZLIB="-lz" + AC_SUBST(ZLIB) + else + # + # If either header or library was not found, revert and bomb + # + AC_MSG_CHECKING(zlib in ${ZLIB_HOME}) + LDFLAGS="$ZLIB_OLD_LDFLAGS" + CPPFLAGS="$ZLIB_OLD_CPPFLAGS" + AC_MSG_RESULT(failed) + AC_MSG_ERROR(either specify a valid zlib installation with --with-zlib=DIR or disable zlib usage with --without-zlib) + fi +fi + +]) diff --git a/build-aux/config.guess b/build-aux/config.guess new file mode 100755 index 0000000..917bbc5 --- /dev/null +++ b/build-aux/config.guess @@ -0,0 +1,1463 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + +timestamp='2005-07-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerppc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[45]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + i*:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + x86:Interix*:[34]*) + echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' + exit ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #ifdef __INTEL_COMPILER + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + *86) UNAME_PROCESSOR=i686 ;; + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/build-aux/config.sub b/build-aux/config.sub new file mode 100755 index 0000000..1c366df --- /dev/null +++ b/build-aux/config.sub @@ -0,0 +1,1579 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + +timestamp='2005-07-08' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ + kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32r | m32rle | m68000 | m68k | m88k | maxq | mcore \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | ms1 \ + | msp430 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b \ + | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m32c) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | ms1-* \ + | msp430-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa-* \ + | ymp-* \ + | z8k-*) + ;; + m32c-*) + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16c) + basic_machine=cr16c-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/build-aux/depcomp b/build-aux/depcomp new file mode 100755 index 0000000..04701da --- /dev/null +++ b/build-aux/depcomp @@ -0,0 +1,530 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2005-07-09.11 + +# Copyright (C) 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try \`$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by `PROGRAMS ARGS'. + object Object file output by `PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputing dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. + "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +## The second -e expression handles DOS-style file names with drive letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like `#:fec' to the end of the + # dependency line. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ + tr ' +' ' ' >> $depfile + echo >> $depfile + + # The second pass generates a dummy entry for each header file. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> $depfile + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts `$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` + tmpdepfile="$stripped.u" + if test "$libtool" = yes; then + "$@" -Wc,-M + else + "$@" -M + fi + stat=$? + + if test -f "$tmpdepfile"; then : + else + stripped=`echo "$stripped" | sed 's,^.*/,,'` + tmpdepfile="$stripped.u" + fi + + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + + if test -f "$tmpdepfile"; then + outname="$stripped.o" + # Each line is of the form `foo.o: dependent.h'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" + sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +icc) + # Intel's C compiler understands `-MD -MF file'. However on + # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c + # ICC 7.0 will fill foo.d with something like + # foo.o: sub/foo.c + # foo.o: sub/foo.h + # which is wrong. We want: + # sub/foo.o: sub/foo.c + # sub/foo.o: sub/foo.h + # sub/foo.c: + # sub/foo.h: + # ICC 7.1 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using \ : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | + sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in `foo.d' instead, so we check for that too. + # Subdirectories are respected. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + + if test "$libtool" = yes; then + # With Tru64 cc, shared objects can also be used to make a + # static library. This mecanism is used in libtool 1.4 series to + # handle both shared and static libraries in a single compilation. + # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. + # + # With libtool 1.5 this exception was removed, and libtool now + # generates 2 separate objects for the 2 libraries. These two + # compilations output dependencies in in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 + tmpdepfile2=$dir$base.o.d # libtool 1.5 + tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 + tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.o.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + tmpdepfile4=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for `:' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. + "$@" $dashmflag | + sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no + for arg in "$@"; do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix="`echo $object | sed 's/^.*\././'`" + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + sed '1,2d' "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E | + sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | + sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + "$@" || exit $? + IFS=" " + for arg + do + case "$arg" in + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" + echo " " >> "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/build-aux/install-sh b/build-aux/install-sh new file mode 100755 index 0000000..4d4a951 --- /dev/null +++ b/build-aux/install-sh @@ -0,0 +1,323 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2005-05-14.22 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +chmodcmd="$chmodprog 0755" +chowncmd= +chgrpcmd= +stripcmd= +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src= +dst= +dir_arg= +dstarg= +no_target_directory= + +usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: +-c (ignored) +-d create directories instead of installing files. +-g GROUP $chgrpprog installed files to GROUP. +-m MODE $chmodprog installed files to MODE. +-o USER $chownprog installed files to USER. +-s $stripprog installed files. +-t DIRECTORY install into DIRECTORY. +-T report an error if DSTFILE is a directory. +--help display this help and exit. +--version display version info and exit. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG +" + +while test -n "$1"; do + case $1 in + -c) shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + --help) echo "$usage"; exit $?;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t) dstarg=$2 + shift + shift + continue;; + + -T) no_target_directory=true + shift + continue;; + + --version) echo "$0 $scriptversion"; exit $?;; + + *) # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + test -n "$dir_arg$dstarg" && break + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dstarg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dstarg" + shift # fnord + fi + shift # arg + dstarg=$arg + done + break;; + esac +done + +if test -z "$1"; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src ;; + esac + + if test -n "$dir_arg"; then + dst=$src + src= + + if test -d "$dst"; then + mkdircmd=: + chmodcmd= + else + mkdircmd=$mkdirprog + fi + else + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dstarg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dstarg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst ;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dstarg: Is a directory" >&2 + exit 1 + fi + dst=$dst/`basename "$src"` + fi + fi + + # This sed command emulates the dirname command. + dstdir=`echo "$dst" | sed -e 's,/*$,,;s,[^/]*$,,;s,/*$,,;s,^$,.,'` + + # Make sure that the destination directory exists. + + # Skip lots of stat calls in the usual case. + if test ! -d "$dstdir"; then + defaultIFS=' + ' + IFS="${IFS-$defaultIFS}" + + oIFS=$IFS + # Some sh's can't handle IFS=/ for some reason. + IFS='%' + set x `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` + shift + IFS=$oIFS + + pathcomp= + + while test $# -ne 0 ; do + pathcomp=$pathcomp$1 + shift + if test ! -d "$pathcomp"; then + $mkdirprog "$pathcomp" + # mkdir can fail with a `File exist' error in case several + # install-sh are creating the directory concurrently. This + # is OK. + test -d "$pathcomp" || exit + fi + pathcomp=$pathcomp/ + done + fi + + if test -n "$dir_arg"; then + $doit $mkdircmd "$dst" \ + && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } + + else + dstfile=`basename "$dst"` + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + trap '(exit $?); exit' 1 2 13 15 + + # Copy the file name to the temp name. + $doit $cpprog "$src" "$dsttmp" && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && + + # Now rename the file to the real destination. + { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ + || { + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + if test -f "$dstdir/$dstfile"; then + $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ + || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ + || { + echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 + (exit 1); exit 1 + } + else + : + fi + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" + } + } + fi || { (exit 1); exit 1; } +done + +# The final little trick to "correctly" pass the exit status to the exit trap. +{ + (exit 0); exit 0 +} + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/build-aux/missing b/build-aux/missing new file mode 100755 index 0000000..894e786 --- /dev/null +++ b/build-aux/missing @@ -0,0 +1,360 @@ +#! /bin/sh +# Common stub for a few missing GNU programs while installing. + +scriptversion=2005-06-08.21 + +# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# Originally by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 +fi + +run=: + +# In the cases where this matters, `missing' is being run in the +# srcdir already. +if test -f configure.ac; then + configure_ac=configure.ac +else + configure_ac=configure.in +fi + +msg="missing on your system" + +case "$1" in +--run) + # Try to run requested program, and just exit if it succeeds. + run= + shift + "$@" && exit 0 + # Exit code 63 means version mismatch. This often happens + # when the user try to use an ancient version of a tool on + # a file that requires a minimum version. In this case we + # we should proceed has if the program had been absent, or + # if --run hadn't been passed. + if test $? = 63; then + run=: + msg="probably too old" + fi + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an +error status if there is no known handling for PROGRAM. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + --run try to run the given command, and emulate it if it fails + +Supported PROGRAM values: + aclocal touch file \`aclocal.m4' + autoconf touch file \`configure' + autoheader touch file \`config.h.in' + automake touch all \`Makefile.in' files + bison create \`y.tab.[ch]', if possible, from existing .[ch] + flex create \`lex.yy.c', if possible, from existing .c + help2man touch the output file + lex create \`lex.yy.c', if possible, from existing .c + makeinfo touch the output file + tar try tar, gnutar, gtar, then tar without non-portable flags + yacc create \`y.tab.[ch]', if possible, from existing .[ch] + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: Unknown \`$1' option" + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 + ;; + +esac + +# Now exit if we have it, but it failed. Also exit now if we +# don't have it and --version was passed (most likely to detect +# the program). +case "$1" in + lex|yacc) + # Not GNU programs, they don't have --version. + ;; + + tar) + if test -n "$run"; then + echo 1>&2 "ERROR: \`tar' requires --run" + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + exit 1 + fi + ;; + + *) + if test -z "$run" && ($1 --version) > /dev/null 2>&1; then + # We have it, but it failed. + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + # Could not run --version or --help. This is probably someone + # running `$TOOL --version' or `$TOOL --help' to check whether + # $TOOL exists and not knowing $TOOL uses missing. + exit 1 + fi + ;; +esac + +# If it does not exist, or fails to run (possibly an outdated version), +# try to emulate it. +case "$1" in + aclocal*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acinclude.m4' or \`${configure_ac}'. You might want + to install the \`Automake' and \`Perl' packages. Grab them from + any GNU archive site." + touch aclocal.m4 + ;; + + autoconf) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`${configure_ac}'. You might want to install the + \`Autoconf' and \`GNU m4' packages. Grab them from any GNU + archive site." + touch configure + ;; + + autoheader) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acconfig.h' or \`${configure_ac}'. You might want + to install the \`Autoconf' and \`GNU m4' packages. Grab them + from any GNU archive site." + files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` + test -z "$files" && files="config.h" + touch_files= + for f in $files; do + case "$f" in + *:*) touch_files="$touch_files "`echo "$f" | + sed -e 's/^[^:]*://' -e 's/:.*//'`;; + *) touch_files="$touch_files $f.in";; + esac + done + touch $touch_files + ;; + + automake*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. + You might want to install the \`Automake' and \`Perl' packages. + Grab them from any GNU archive site." + find . -type f -name Makefile.am -print | + sed 's/\.am$/.in/' | + while read f; do touch "$f"; done + ;; + + autom4te) + echo 1>&2 "\ +WARNING: \`$1' is needed, but is $msg. + You might have modified some files without having the + proper tools for further handling them. + You can get \`$1' as part of \`Autoconf' from any GNU + archive site." + + file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` + test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo "#! /bin/sh" + echo "# Created by GNU Automake missing as a replacement of" + echo "# $ $@" + echo "exit 0" + chmod +x $file + exit 1 + fi + ;; + + bison|yacc) + echo 1>&2 "\ +WARNING: \`$1' $msg. You should only need it if + you modified a \`.y' file. You may need the \`Bison' package + in order for those modifications to take effect. You can get + \`Bison' from any GNU archive site." + rm -f y.tab.c y.tab.h + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.y) + SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.c + fi + SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.h + fi + ;; + esac + fi + if [ ! -f y.tab.h ]; then + echo >y.tab.h + fi + if [ ! -f y.tab.c ]; then + echo 'main() { return 0; }' >y.tab.c + fi + ;; + + lex|flex) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.l' file. You may need the \`Flex' package + in order for those modifications to take effect. You can get + \`Flex' from any GNU archive site." + rm -f lex.yy.c + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.l) + SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" lex.yy.c + fi + ;; + esac + fi + if [ ! -f lex.yy.c ]; then + echo 'main() { return 0; }' >lex.yy.c + fi + ;; + + help2man) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a dependency of a manual page. You may need the + \`Help2man' package in order for those modifications to take + effect. You can get \`Help2man' from any GNU archive site." + + file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` + if test -z "$file"; then + file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` + fi + if [ -f "$file" ]; then + touch $file + else + test -z "$file" || exec >$file + echo ".ab help2man is required to generate this page" + exit 1 + fi + ;; + + makeinfo) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.texi' or \`.texinfo' file, or any other file + indirectly affecting the aspect of the manual. The spurious + call might also be the consequence of using a buggy \`make' (AIX, + DU, IRIX). You might want to install the \`Texinfo' package or + the \`GNU make' package. Grab either from any GNU archive site." + # The file to touch is that specified with -o ... + file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` + if test -z "$file"; then + # ... or it is the one specified with @setfilename ... + infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` + file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $infile` + # ... or it is derived from the source name (dir/f.texi becomes f.info) + test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info + fi + # If the file does not exist, the user really needs makeinfo; + # let's fail without touching anything. + test -f $file || exit 1 + touch $file + ;; + + tar) + shift + + # We have already tried tar in the generic part. + # Look for gnutar/gtar before invocation to avoid ugly error + # messages. + if (gnutar --version > /dev/null 2>&1); then + gnutar "$@" && exit 0 + fi + if (gtar --version > /dev/null 2>&1); then + gtar "$@" && exit 0 + fi + firstarg="$1" + if shift; then + case "$firstarg" in + *o*) + firstarg=`echo "$firstarg" | sed s/o//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + case "$firstarg" in + *h*) + firstarg=`echo "$firstarg" | sed s/h//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + fi + + echo 1>&2 "\ +WARNING: I can't seem to be able to run \`tar' with the given arguments. + You may want to install GNU tar or Free paxutils, or check the + command line arguments." + exit 1 + ;; + + *) + echo 1>&2 "\ +WARNING: \`$1' is needed, and is $msg. + You might have modified some files without having the + proper tools for further handling them. Check the \`README' file, + it often tells you about the needed prerequisites for installing + this package. You may also peek at any GNU archive site, in case + some other package would contain this missing \`$1' program." + exit 1 + ;; +esac + +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..cf20431 --- /dev/null +++ b/config.h.in @@ -0,0 +1,103 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* define if the BAM library is available */ +#undef HAVE_BAM + +/* define if the Boost library is available */ +#undef HAVE_BOOST + +/* define if the Boost::Thread library is available */ +#undef HAVE_BOOST_THREAD + +/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't. + */ +#undef HAVE_DECL_CTL_HW + +/* Define to 1 if you have the declaration of `HW_PHYSMEM', and to 0 if you + don't. */ +#undef HAVE_DECL_HW_PHYSMEM + +/* Define to 1 if you have the declaration of `sysctl', and to 0 if you don't. + */ +#undef HAVE_DECL_SYSCTL + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `z' library (-lz). */ +#undef HAVE_LIBZ + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if the system has the type `ptrdiff_t'. */ +#undef HAVE_PTRDIFF_T + +/* Define to 1 if stdbool.h conforms to C99. */ +#undef HAVE_STDBOOL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if `totalram' is member of `struct sysinfo'. */ +#undef HAVE_STRUCT_SYSINFO_TOTALRAM + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* SVN Revision */ +#undef SVN_REVISION + +/* Version number of package */ +#undef VERSION + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* Define to `int' if does not define. */ +#undef pid_t + +/* Define to `unsigned' if does not define. */ +#undef size_t diff --git a/configure b/configure new file mode 100755 index 0000000..7486fbb --- /dev/null +++ b/configure @@ -0,0 +1,8647 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.59 for cufflinks 1.3.0. +# +# Report bugs to . +# +# Copyright (C) 2003 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_config_libobj_dir=. +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME='cufflinks' +PACKAGE_TARNAME='cufflinks' +PACKAGE_VERSION='1.3.0' +PACKAGE_STRING='cufflinks 1.3.0' +PACKAGE_BUGREPORT='cole@cs.umd.edu' + +ac_unique_file="config.h.in" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#if HAVE_SYS_TYPES_H +# include +#endif +#if HAVE_SYS_STAT_H +# include +#endif +#if STDC_HEADERS +# include +# include +#else +# if HAVE_STDLIB_H +# include +# endif +#endif +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include +# endif +# include +#endif +#if HAVE_STRINGS_H +# include +#endif +#if HAVE_INTTYPES_H +# include +#else +# if HAVE_STDINT_H +# include +# endif +#endif +#if HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar PYTHON CXX CXXFLAGS LDFLAGS CPPFLAGS ac_ct_CXX EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE CC CFLAGS ac_ct_CC CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE RANLIB ac_ct_RANLIB PYTHON_VERSION PYTHON_PREFIX PYTHON_EXEC_PREFIX PYTHON_PLATFORM pythondir pkgpythondir pyexecdir pkgpyexecdir BOOST_CPPFLAGS BOOST_LDFLAGS BAM_CPPFLAGS BAM_LDFLAGS BAM_LIB build build_cpu build_vendor build_os BOOST_THREAD_LIB CPP EGREP ZLIB host host_cpu host_vendor host_os LIBOBJS LTLIBOBJS' +ac_subst_files='' + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 + { (exit 1); exit 1; }; } +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_PYTHON_set=${PYTHON+set} +ac_env_PYTHON_value=$PYTHON +ac_cv_env_PYTHON_set=${PYTHON+set} +ac_cv_env_PYTHON_value=$PYTHON +ac_env_CXX_set=${CXX+set} +ac_env_CXX_value=$CXX +ac_cv_env_CXX_set=${CXX+set} +ac_cv_env_CXX_value=$CXX +ac_env_CXXFLAGS_set=${CXXFLAGS+set} +ac_env_CXXFLAGS_value=$CXXFLAGS +ac_cv_env_CXXFLAGS_set=${CXXFLAGS+set} +ac_cv_env_CXXFLAGS_value=$CXXFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_CPP_set=${CPP+set} +ac_env_CPP_value=$CPP +ac_cv_env_CPP_set=${CPP+set} +ac_cv_env_CPP_value=$CPP + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures cufflinks 1.3.0 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of cufflinks 1.3.0:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors + --enable-vectorize Enable GCC auto-vectorization + --enable-intel64 optimize for Intel64 CPU such as Xeon and Core2 + --enable-debug enable debugging info (default is no) + --enable-optim[=0|1|2|3] + set optimization level (default is 3) + --enable-profiling enable profiling with google-perftools + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-boost[=DIR] + use boost (default is yes) - it is possible to + specify the root directory for boost (optional) + --with-boost-libdir=LIB_DIR + Force given directory for boost libraries. Note that + this will overwrite library path detection, so use + this parameter only if default library detection + fails and you know exactly where your boost + libraries are located. + --with-bam[=DIR] use BAM libraries (default is yes) - it is possible + to specify the root directory for BAM (optional) + --with-bam-libdir=LIB_DIR + Force given directory for bam libraries. Note that + this will overwrite library path detection, so use + this parameter only if default library detection + fails and you know exactly where your bam libraries + are located. + --with-boost-thread[=special-lib] + use the Thread library from boost - it is possible + to specify a certain library for the linker e.g. + --with-boost-thread=boost_thread-gcc-mt + --with-zlib=DIR root directory path of zlib installation defaults to + /usr/local or /usr if not found in /usr/local + --without-zlib to disable zlib usage completely + +Some influential environment variables: + PYTHON python program + CXX C++ compiler command + CXXFLAGS C++ compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have + headers in a nonstandard directory + CC C compiler command + CFLAGS C compiler flags + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd $ac_popdir + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF +cufflinks configure 1.3.0 +generated by GNU Autoconf 2.59 + +Copyright (C) 2003 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by cufflinks $as_me 1.3.0, which was +generated by GNU Autoconf 2.59. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_sep= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------- ## +## Output files. ## +## ------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h | sort + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + + + + + + + + + + +cat >>confdefs.h <<\_ACEOF +#define SVN_REVISION "3022" +_ACEOF + + + + ac_config_headers="$ac_config_headers config.h" + +ac_aux_dir= +for ac_dir in build-aux $srcdir/build-aux; do + if test -f $ac_dir/install-sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f $ac_dir/install.sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f $ac_dir/shtool; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in build-aux $srcdir/build-aux" >&5 +echo "$as_me: error: cannot find install-sh or install.sh in build-aux $srcdir/build-aux" >&2;} + { (exit 1); exit 1; }; } +fi +ac_config_guess="$SHELL $ac_aux_dir/config.guess" +ac_config_sub="$SHELL $ac_aux_dir/config.sub" +ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure. + +am__api_version="1.9" +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 +echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6 +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in + ./ | .// | /cC/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + done + done + ;; +esac +done + + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL=$ac_install_sh + fi +fi +echo "$as_me:$LINENO: result: $INSTALL" >&5 +echo "${ECHO_T}$INSTALL" >&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +echo "$as_me:$LINENO: checking whether build environment is sane" >&5 +echo $ECHO_N "checking whether build environment is sane... $ECHO_C" >&6 +# Just in case +sleep 1 +echo timestamp > conftest.file +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftest.file` + fi + rm -f conftest.file + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + { { echo "$as_me:$LINENO: error: ls -t appears to fail. Make sure there is not a broken +alias in your environment" >&5 +echo "$as_me: error: ls -t appears to fail. Make sure there is not a broken +alias in your environment" >&2;} + { (exit 1); exit 1; }; } + fi + + test "$2" = conftest.file + ) +then + # Ok. + : +else + { { echo "$as_me:$LINENO: error: newly created file is older than distributed files! +Check your system clock" >&5 +echo "$as_me: error: newly created file is older than distributed files! +Check your system clock" >&2;} + { (exit 1); exit 1; }; } +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +test "$program_prefix" != NONE && + program_transform_name="s,^,$program_prefix,;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s,\$,$program_suffix,;$program_transform_name" +# Double any \ or $. echo might interpret backslashes. +# By default was `s,x,x', remove it if useless. +cat <<\_ACEOF >conftest.sed +s/[\\$]/&&/g;s/;s,x,x,$// +_ACEOF +program_transform_name=`echo $program_transform_name | sed -f conftest.sed` +rm conftest.sed + +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` + +test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" +# Use eval to expand $SHELL +if eval "$MISSING --run true"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + { echo "$as_me:$LINENO: WARNING: \`missing' script is too old or missing" >&5 +echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} +fi + +if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then + # We used to keeping the `.' as first argument, in order to + # allow $(mkdir_p) to be used without argument. As in + # $(mkdir_p) $(somedir) + # where $(somedir) is conditionally defined. However this is wrong + # for two reasons: + # 1. if the package is installed by a user who cannot write `.' + # make install will fail, + # 2. the above comment should most certainly read + # $(mkdir_p) $(DESTDIR)$(somedir) + # so it does not work when $(somedir) is undefined and + # $(DESTDIR) is not. + # To support the latter case, we have to write + # test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir), + # so the `.' trick is pointless. + mkdir_p='mkdir -p --' +else + # On NextStep and OpenStep, the `mkdir' command does not + # recognize any option. It will interpret all options as + # directories to create, and then abort because `.' already + # exists. + for d in ./-p ./--version; + do + test -d $d && rmdir $d + done + # $(mkinstalldirs) is defined by Automake if mkinstalldirs exists. + if test -f "$ac_aux_dir/mkinstalldirs"; then + mkdir_p='$(mkinstalldirs)' + else + mkdir_p='$(install_sh) -d' + fi +fi + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_AWK+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + echo "$as_me:$LINENO: result: $AWK" >&5 +echo "${ECHO_T}$AWK" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$AWK" && break +done + +echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6 +set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y,:./+-,___p_,'` +if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.make <<\_ACEOF +all: + @echo 'ac_maketemp="$(MAKE)"' +_ACEOF +# GNU make sometimes prints "make[1]: Entering...", which would confuse us. +eval `${MAKE-make} -f conftest.make 2>/dev/null | grep temp=` +if test -n "$ac_maketemp"; then + eval ac_cv_prog_make_${ac_make}_set=yes +else + eval ac_cv_prog_make_${ac_make}_set=no +fi +rm -f conftest.make +fi +if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + SET_MAKE= +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# test to see if srcdir already configured +if test "`cd $srcdir && pwd`" != "`pwd`" && + test -f $srcdir/config.status; then + { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5 +echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;} + { (exit 1); exit 1; }; } +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='cufflinks' + VERSION='1.3.0' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +install_sh=${install_sh-"$am_aux_dir/install-sh"} + +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + echo "$as_me:$LINENO: result: $STRIP" >&5 +echo "${ECHO_T}$STRIP" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_ac_ct_STRIP" && ac_cv_prog_ac_ct_STRIP=":" +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 +echo "${ECHO_T}$ac_ct_STRIP" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + STRIP=$ac_ct_STRIP +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s" + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. + +AMTAR=${AMTAR-"${am_missing_run}tar"} + +am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -' + + + + + + +#AM_PATH_CPPUNIT(1.10.2) + + + + # Make sure CXXFLAGS is defined so that AC_PROG_CXX doesn't set it. +CXXFLAGS="$CXXFLAGS" +CFLAGS="$CFLAGS" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Checks for programs. +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_AWK+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + echo "$as_me:$LINENO: result: $AWK" >&5 +echo "${ECHO_T}$AWK" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$AWK" && break +done + +ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + echo "$as_me:$LINENO: result: $CXX" >&5 +echo "${ECHO_T}$CXX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5 +echo "${ECHO_T}$ac_ct_CXX" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CXX" && break +done +test -n "$ac_ct_CXX" || ac_ct_CXX="g++" + + CXX=$ac_ct_CXX +fi + + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C++ compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C++ compiler default output file name" >&5 +echo $ECHO_N "checking for C++ compiler default output file name... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext + break;; + * ) + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C++ compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C++ compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C++ compiler works" >&5 +echo $ECHO_N "checking whether the C++ compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C++ compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C++ compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6 +if test "${ac_cv_cxx_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6 +GXX=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +CXXFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5 +echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cxx_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cxx_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cxx_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6 +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +DEPDIR="${am__leading_dot}deps" + + ac_config_commands="$ac_config_commands depfiles" + + +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo done +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +echo "$as_me:$LINENO: checking for style of include used by $am_make" >&5 +echo $ECHO_N "checking for style of include used by $am_make... $ECHO_C" >&6 +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# We grep out `Entering directory' and `Leaving directory' +# messages which can occur if `w' ends up in MAKEFLAGS. +# In particular we don't look at `^make:' because GNU make might +# be invoked under some other name (usually "gmake"), in which +# case it prints its new name instead of `make'. +if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then + am__include=include + am__quote= + _am_result=GNU +fi +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then + am__include=.include + am__quote="\"" + _am_result=BSD + fi +fi + + +echo "$as_me:$LINENO: result: $_am_result" >&5 +echo "${ECHO_T}$_am_result" >&6 +rm -f confinc confmf + +# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then + enableval="$enable_dependency_tracking" + +fi; +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' +fi + + +if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + + +depcc="$CXX" am_compiler_list= + +echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6 +if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6 +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + + +if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CC" am_compiler_list= + +echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6 +if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CC_dependencies_compiler_type" >&6 +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + + +if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + +echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6 +set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y,:./+-,___p_,'` +if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.make <<\_ACEOF +all: + @echo 'ac_maketemp="$(MAKE)"' +_ACEOF +# GNU make sometimes prints "make[1]: Entering...", which would confuse us. +eval `${MAKE-make} -f conftest.make 2>/dev/null | grep temp=` +if test -n "$ac_maketemp"; then + eval ac_cv_prog_make_${ac_make}_set=yes +else + eval ac_cv_prog_make_${ac_make}_set=no +fi +rm -f conftest.make +fi +if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + SET_MAKE= +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + SET_MAKE="MAKE=${MAKE-make}" +fi + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + echo "$as_me:$LINENO: result: $RANLIB" >&5 +echo "${ECHO_T}$RANLIB" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_ac_ct_RANLIB" && ac_cv_prog_ac_ct_RANLIB=":" +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5 +echo "${ECHO_T}$ac_ct_RANLIB" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + RANLIB=$ac_ct_RANLIB +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 +echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6 +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in + ./ | .// | /cC/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + done + done + ;; +esac +done + + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL=$ac_install_sh + fi +fi +echo "$as_me:$LINENO: result: $INSTALL" >&5 +echo "${ECHO_T}$INSTALL" >&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + + + + + + if test -n "$PYTHON"; then + # If the user set $PYTHON, use it and don't search something else. + echo "$as_me:$LINENO: checking whether $PYTHON version >= 2.4" >&5 +echo $ECHO_N "checking whether $PYTHON version >= 2.4... $ECHO_C" >&6 + prog="import sys, string +# split strings by '.' and convert to numeric. Append some zeros +# because we need at least 4 digits for the hex conversion. +minver = map(int, string.split('2.4', '.')) + [0, 0, 0] +minverhex = 0 +for i in xrange(0, 4): minverhex = (minverhex << 8) + minver[i] +sys.exit(sys.hexversion < minverhex)" + if { echo "$as_me:$LINENO: $PYTHON -c "$prog"" >&5 + ($PYTHON -c "$prog") >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else + { { echo "$as_me:$LINENO: error: too old" >&5 +echo "$as_me: error: too old" >&2;} + { (exit 1); exit 1; }; } +fi + + am_display_PYTHON=$PYTHON + else + # Otherwise, try each interpreter until we find one that satisfies + # VERSION. + echo "$as_me:$LINENO: checking for a Python interpreter with version >= 2.4" >&5 +echo $ECHO_N "checking for a Python interpreter with version >= 2.4... $ECHO_C" >&6 +if test "${am_cv_pathless_PYTHON+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + + for am_cv_pathless_PYTHON in python python2 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 python1.6 python1.5 none; do + test "$am_cv_pathless_PYTHON" = none && break + prog="import sys, string +# split strings by '.' and convert to numeric. Append some zeros +# because we need at least 4 digits for the hex conversion. +minver = map(int, string.split('2.4', '.')) + [0, 0, 0] +minverhex = 0 +for i in xrange(0, 4): minverhex = (minverhex << 8) + minver[i] +sys.exit(sys.hexversion < minverhex)" + if { echo "$as_me:$LINENO: $am_cv_pathless_PYTHON -c "$prog"" >&5 + ($am_cv_pathless_PYTHON -c "$prog") >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + break +fi + + done +fi +echo "$as_me:$LINENO: result: $am_cv_pathless_PYTHON" >&5 +echo "${ECHO_T}$am_cv_pathless_PYTHON" >&6 + # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON. + if test "$am_cv_pathless_PYTHON" = none; then + PYTHON=: + else + # Extract the first word of "$am_cv_pathless_PYTHON", so it can be a program name with args. +set dummy $am_cv_pathless_PYTHON; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_path_PYTHON+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $PYTHON in + [\\/]* | ?:[\\/]*) + ac_cv_path_PYTHON="$PYTHON" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PYTHON="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + ;; +esac +fi +PYTHON=$ac_cv_path_PYTHON + +if test -n "$PYTHON"; then + echo "$as_me:$LINENO: result: $PYTHON" >&5 +echo "${ECHO_T}$PYTHON" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + fi + am_display_PYTHON=$am_cv_pathless_PYTHON + fi + + + if test "$PYTHON" = :; then + { { echo "$as_me:$LINENO: error: no suitable Python interpreter found" >&5 +echo "$as_me: error: no suitable Python interpreter found" >&2;} + { (exit 1); exit 1; }; } + else + + + echo "$as_me:$LINENO: checking for $am_display_PYTHON version" >&5 +echo $ECHO_N "checking for $am_display_PYTHON version... $ECHO_C" >&6 +if test "${am_cv_python_version+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_python_version=`$PYTHON -c "import sys; print sys.version[:3]"` +fi +echo "$as_me:$LINENO: result: $am_cv_python_version" >&5 +echo "${ECHO_T}$am_cv_python_version" >&6 + PYTHON_VERSION=$am_cv_python_version + + + + PYTHON_PREFIX='${prefix}' + + PYTHON_EXEC_PREFIX='${exec_prefix}' + + + + echo "$as_me:$LINENO: checking for $am_display_PYTHON platform" >&5 +echo $ECHO_N "checking for $am_display_PYTHON platform... $ECHO_C" >&6 +if test "${am_cv_python_platform+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_python_platform=`$PYTHON -c "import sys; print sys.platform"` +fi +echo "$as_me:$LINENO: result: $am_cv_python_platform" >&5 +echo "${ECHO_T}$am_cv_python_platform" >&6 + PYTHON_PLATFORM=$am_cv_python_platform + + + + + echo "$as_me:$LINENO: checking for $am_display_PYTHON script directory" >&5 +echo $ECHO_N "checking for $am_display_PYTHON script directory... $ECHO_C" >&6 +if test "${am_cv_python_pythondir+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_python_pythondir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(0,0,prefix='$PYTHON_PREFIX')" 2>/dev/null || + echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"` +fi +echo "$as_me:$LINENO: result: $am_cv_python_pythondir" >&5 +echo "${ECHO_T}$am_cv_python_pythondir" >&6 + pythondir=$am_cv_python_pythondir + + + + pkgpythondir=\${pythondir}/$PACKAGE + + + echo "$as_me:$LINENO: checking for $am_display_PYTHON extension module directory" >&5 +echo $ECHO_N "checking for $am_display_PYTHON extension module directory... $ECHO_C" >&6 +if test "${am_cv_python_pyexecdir+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_python_pyexecdir=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_lib(1,0,prefix='$PYTHON_EXEC_PREFIX')" 2>/dev/null || + echo "${PYTHON_EXEC_PREFIX}/lib/python${PYTHON_VERSION}/site-packages"` +fi +echo "$as_me:$LINENO: result: $am_cv_python_pyexecdir" >&5 +echo "${ECHO_T}$am_cv_python_pyexecdir" >&6 + pyexecdir=$am_cv_python_pyexecdir + + + + pkgpyexecdir=\${pyexecdir}/$PACKAGE + + + + fi + + + + + +# Check whether --with-boost or --without-boost was given. +if test "${with_boost+set}" = set; then + withval="$with_boost" + + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ac_boost_path="" + else + want_boost="yes" + ac_boost_path="$withval" + fi + +else + want_boost="yes" +fi; + + + +# Check whether --with-boost-libdir or --without-boost-libdir was given. +if test "${with_boost_libdir+set}" = set; then + withval="$with_boost_libdir" + + if test -d $withval + then + ac_boost_lib_path="$withval" + else + { { echo "$as_me:$LINENO: error: --with-boost-libdir expected directory name" >&5 +echo "$as_me: error: --with-boost-libdir expected directory name" >&2;} + { (exit 1); exit 1; }; } + fi + +else + ac_boost_lib_path="" + +fi; + +if test "x$want_boost" = "xyes"; then + boost_lib_version_req=1.38.0 + boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([0-9]*\.[0-9]*\)'` + boost_lib_version_req_major=`expr $boost_lib_version_req : '\([0-9]*\)'` + boost_lib_version_req_minor=`expr $boost_lib_version_req : '[0-9]*\.\([0-9]*\)'` + boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[0-9]*\.[0-9]*\.\([0-9]*\)'` + if test "x$boost_lib_version_req_sub_minor" = "x" ; then + boost_lib_version_req_sub_minor="0" + fi + WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` + echo "$as_me:$LINENO: checking for boostlib >= $boost_lib_version_req" >&5 +echo $ECHO_N "checking for boostlib >= $boost_lib_version_req... $ECHO_C" >&6 + succeeded=no + + if test "$ac_boost_path" != ""; then + BOOST_LDFLAGS="-L$ac_boost_path/lib" + BOOST_CPPFLAGS="-I$ac_boost_path/include" + else + for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then + BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib" + BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" + break; + fi + done + fi + + if test "$ac_boost_lib_path" != ""; then + BOOST_LDFLAGS="-L$ac_boost_lib_path" + fi + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + succeeded=yes + found_system=yes + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + if test "x$succeeded" != "xyes"; then + _version=0 + if test "$ac_boost_path" != ""; then + if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then + for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + fi + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" + done + fi + else + for ac_boost_path in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then + for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + best_path=$ac_boost_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" + if test "$ac_boost_lib_path" = "" + then + BOOST_LDFLAGS="-L$best_path/lib" + fi + + if test "x$BOOST_ROOT" != "x"; then + if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then + version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` + stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` + stage_version_shorten=`expr $stage_version : '\([0-9]*\.[0-9]*\)'` + V_CHECK=`expr $stage_version_shorten \>\= $_version` + if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then + { echo "$as_me:$LINENO: We will use a staged boost library from $BOOST_ROOT" >&5 +echo "$as_me: We will use a staged boost library from $BOOST_ROOT" >&6;} + BOOST_CPPFLAGS="-I$BOOST_ROOT" + BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib" + fi + fi + fi + fi + + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + succeeded=yes + found_system=yes + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + if test "$succeeded" != "yes" ; then + if test "$_version" = "0" ; then + { { echo "$as_me:$LINENO: error: We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation." >&5 +echo "$as_me: error: We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation." >&2;} + { (exit 1); exit 1; }; } + else + { echo "$as_me:$LINENO: Your boost libraries seems to old (version $_version)." >&5 +echo "$as_me: Your boost libraries seems to old (version $_version)." >&6;} + fi + else + + + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BOOST +_ACEOF + + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" +fi + + + + +# Check whether --with-bam or --without-bam was given. +if test "${with_bam+set}" = set; then + withval="$with_bam" + + if test "$withval" = "no"; then + want_bam="no" + elif test "$withval" = "yes"; then + want_bam="yes" + ac_bam_path="" + else + want_bam="yes" + ac_bam_path="$withval" + fi + +else + want_bam="yes" +fi; + + + +# Check whether --with-bam-libdir or --without-bam-libdir was given. +if test "${with_bam_libdir+set}" = set; then + withval="$with_bam_libdir" + + if test -d $withval + then + ac_bam_lib_path="$withval" + else + { { echo "$as_me:$LINENO: error: --with-bam-libdir expected directory name" >&5 +echo "$as_me: error: --with-bam-libdir expected directory name" >&2;} + { (exit 1); exit 1; }; } + fi + +else + ac_bam_lib_path="" + +fi; + +if test "x$want_bam" = "xyes"; then +# bam_lib_version_req=ifelse([], ,1.20.0,) +# bam_lib_version_req_shorten=`expr $bam_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` +# bam_lib_version_req_major=`expr $bam_lib_version_req : '\([[0-9]]*\)'` +# bam_lib_version_req_minor=`expr $bam_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` +# bam_lib_version_req_sub_minor=`expr $bam_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` +# if test "x$bam_lib_version_req_sub_minor" = "x" ; then +# bam_lib_version_req_sub_minor="0" +# fi +# WANT_BAM_VERSION=`expr $bam_lib_version_req_major \* 100000 \+ $bam_lib_version_req_minor \* 100 \+ $bam_lib_version_req_sub_minor` + echo "$as_me:$LINENO: checking for bamlib" >&5 +echo $ECHO_N "checking for bamlib... $ECHO_C" >&6 + succeeded=no + + if test "$ac_bam_path" != ""; then + BAM_LDFLAGS="-L$ac_bam_path/lib" + BAM_CPPFLAGS="-I$ac_bam_path/include" + else + for ac_bam_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_bam_path_tmp/include/bam" && test -r "$ac_bam_path_tmp/include/bam"; then + BAM_LDFLAGS="-L$ac_bam_path_tmp/lib" + BAM_CPPFLAGS="-I$ac_bam_path_tmp/include" + break; + fi + done + fi + + if test "$ac_bam_lib_path" != ""; then + BAM_LDFLAGS="-L$ac_bam_lib_path" + fi + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BAM_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BAM_LDFLAGS" + export LDFLAGS + + ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + succeeded=yes + found_system=yes + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + if test "x$succeeded" != "xyes"; then + _version=0 + if test "$ac_bam_path" != ""; then + if test -d "$ac_bam_path" && test -r "$ac_bam_path"; then + for i in `ls -d $ac_bam_path/include/bam-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_bam_path##" | sed 's/\/include\/bam-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + fi + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BAM_CPPFLAGS="-I$ac_bam_path/include/bam-$VERSION_UNDERSCORE" + done + fi + else + for ac_bam_path in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_bam_path" && test -r "$ac_bam_path"; then + for i in `ls -d $ac_bam_path/include/bam-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_bam_path##" | sed 's/\/include\/bam-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + best_path=$ac_bam_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BAM_CPPFLAGS="-I$best_path/include/bam-$VERSION_UNDERSCORE" + if test "$ac_bam_lib_path" = "" + then + BAM_LDFLAGS="-L$best_path/lib" + fi + + if test "x$BAM_ROOT" != "x"; then + if test -d "$BAM_ROOT" && test -r "$BAM_ROOT" && test -d "$BAM_ROOT/stage/lib" && test -r "$BAM_ROOT/stage/lib"; then + version_dir=`expr //$BAM_ROOT : '.*/\(.*\)'` + stage_version=`echo $version_dir | sed 's/bam_//' | sed 's/_/./g'` + stage_version_shorten=`expr $stage_version : '\([0-9]*\.[0-9]*\)'` + V_CHECK=`expr $stage_version_shorten \>\= $_version` + if test "$V_CHECK" = "1" -a "$ac_bam_lib_path" = "" ; then + { echo "$as_me:$LINENO: We will use a staged bam library from $BAM_ROOT" >&5 +echo "$as_me: We will use a staged bam library from $BAM_ROOT" >&6;} + BAM_CPPFLAGS="-I$BAM_ROOT" + BAM_LDFLAGS="-L$BAM_ROOT/stage/lib" + fi + fi + fi + fi + + CPPFLAGS="$CPPFLAGS $BAM_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BAM_LDFLAGS" + export LDFLAGS + + ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + +int +main () +{ + + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + succeeded=yes + found_system=yes + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + if test "$succeeded" != "yes" ; then + if test "$_version" = "0" ; then + { { echo "$as_me:$LINENO: error: We could not detect the bam libraries (version $bam_lib_version_req_shorten or higher). If you have a staged bam library (still not installed) please specify \$BAM_ROOT in your environment and do not give a PATH to --with-bam option. If you are sure you have bam installed, then check your version number looking in . See http://randspringer.de/bam for more documentation." >&5 +echo "$as_me: error: We could not detect the bam libraries (version $bam_lib_version_req_shorten or higher). If you have a staged bam library (still not installed) please specify \$BAM_ROOT in your environment and do not give a PATH to --with-bam option. If you are sure you have bam installed, then check your version number looking in . See http://randspringer.de/bam for more documentation." >&2;} + { (exit 1); exit 1; }; } + else + { echo "$as_me:$LINENO: Your bam libraries seem too old (version $_version)." >&5 +echo "$as_me: Your bam libraries seem too old (version $_version)." >&6;} + fi + else + BAM_LIB="-lbam" + + + + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BAM +_ACEOF + + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" +fi + + +# Make sure we can run config.sub. +$ac_config_sub sun4 >/dev/null 2>&1 || + { { echo "$as_me:$LINENO: error: cannot run $ac_config_sub" >&5 +echo "$as_me: error: cannot run $ac_config_sub" >&2;} + { (exit 1); exit 1; }; } + +echo "$as_me:$LINENO: checking build system type" >&5 +echo $ECHO_N "checking build system type... $ECHO_C" >&6 +if test "${ac_cv_build+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_build_alias=$build_alias +test -z "$ac_cv_build_alias" && + ac_cv_build_alias=`$ac_config_guess` +test -z "$ac_cv_build_alias" && + { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 +echo "$as_me: error: cannot guess build type; you must specify one" >&2;} + { (exit 1); exit 1; }; } +ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_build_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_build_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +echo "$as_me:$LINENO: result: $ac_cv_build" >&5 +echo "${ECHO_T}$ac_cv_build" >&6 +build=$ac_cv_build +build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + + + + +# Check whether --with-boost-thread or --without-boost-thread was given. +if test "${with_boost_thread+set}" = set; then + withval="$with_boost_thread" + + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ax_boost_user_thread_lib="" + else + want_boost="yes" + echo "using $withval" + ax_boost_user_thread_lib="$withval" + fi + +else + want_boost="yes" + +fi; + + if test "x$want_boost" = "xyes"; then + + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + echo "$as_me:$LINENO: checking whether the Boost::Thread library is available" >&5 +echo $ECHO_N "checking whether the Boost::Thread library is available... $ECHO_C" >&6 +if test "${ax_cv_boost_thread+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_ext=cc +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + CXXFLAGS_SAVE=$CXXFLAGS + + if test "x$build_os" = "xsolaris" ; then + CXXFLAGS="-pthreads $CXXFLAGS" + elif test "x$build_os" = "xming32" ; then + CXXFLAGS="-mthreads $CXXFLAGS" + else + CXXFLAGS="-pthread $CXXFLAGS" + fi + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +boost::thread_group thrds; + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_boost_thread=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ax_cv_boost_thread=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$CXXFLAGS_SAVE + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +fi +echo "$as_me:$LINENO: result: $ax_cv_boost_thread" >&5 +echo "${ECHO_T}$ax_cv_boost_thread" >&6 + if test "x$ax_cv_boost_thread" = "xyes"; then + if test "x$build_os" = "xsolaris" ; then + BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS" + elif test "x$build_os" = "xming32" ; then + BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS" + else + BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS" + fi + + + + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BOOST_THREAD +_ACEOF + + BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/[^\/]*//'` + + LDFLAGS_SAVE=$LDFLAGS + case "x$build_os" in + *bsd* ) + LDFLAGS="-pthread $LDFLAGS" + break; + ;; + esac + if test "x$ax_boost_user_thread_lib" = "x"; then + for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do + ax_lib=${libextension} + as_ac_Lib=`echo "ac_cv_lib_$ax_lib''_exit" | $as_tr_sh` +echo "$as_me:$LINENO: checking for exit in -l$ax_lib" >&5 +echo $ECHO_N "checking for exit in -l$ax_lib... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Lib+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-l$ax_lib $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char exit (); +int +main () +{ +exit (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Lib=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Lib=no" +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Lib'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Lib'}'`" >&6 +if test `eval echo '${'$as_ac_Lib'}'` = yes; then + BOOST_THREAD_LIB="-l$ax_lib"; link_thread="yes"; break +else + link_thread="no" +fi + + done + if test "x$link_thread" != "xyes"; then + for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do + ax_lib=${libextension} + as_ac_Lib=`echo "ac_cv_lib_$ax_lib''_exit" | $as_tr_sh` +echo "$as_me:$LINENO: checking for exit in -l$ax_lib" >&5 +echo $ECHO_N "checking for exit in -l$ax_lib... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Lib+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-l$ax_lib $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char exit (); +int +main () +{ +exit (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Lib=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Lib=no" +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Lib'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Lib'}'`" >&6 +if test `eval echo '${'$as_ac_Lib'}'` = yes; then + BOOST_THREAD_LIB="-l$ax_lib"; link_thread="yes"; break +else + link_thread="no" +fi + + done + fi + + else + BOOST_THREAD_LIB="$ax_boost_user_thread_lib"; + + link_thread="yes"; + + + fi + if test "x$link_thread" = "xno"; then + { { echo "$as_me:$LINENO: error: Could not link against $ax_lib !" >&5 +echo "$as_me: error: Could not link against $ax_lib !" >&2;} + { (exit 1); exit 1; }; } + else + case "x$build_os" in + *bsd* ) + BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS" + break; + ;; + esac + + fi + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6 +if test "${ac_cv_prog_egrep+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if echo a | (grep -E '(a|b)') >/dev/null 2>&1 + then ac_cv_prog_egrep='grep -E' + else ac_cv_prog_egrep='egrep' + fi +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 +echo "${ECHO_T}$ac_cv_prog_egrep" >&6 + EGREP=$ac_cv_prog_egrep + + +echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_header_stdc=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6 +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +# +# Handle user hints +# +echo "$as_me:$LINENO: checking if zlib is wanted" >&5 +echo $ECHO_N "checking if zlib is wanted... $ECHO_C" >&6 + +# Check whether --with-zlib or --without-zlib was given. +if test "${with_zlib+set}" = set; then + withval="$with_zlib" + if test "$withval" != no ; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + if test -d "$withval" + then + ZLIB_HOME="$withval" + else + { echo "$as_me:$LINENO: WARNING: Sorry, $withval does not exist, checking usual places" >&5 +echo "$as_me: WARNING: Sorry, $withval does not exist, checking usual places" >&2;} + fi +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +fi; + +ZLIB_HOME=/usr/local +if test ! -f "${ZLIB_HOME}/include/zlib.h" +then + ZLIB_HOME=/usr +fi + +# +# Locate zlib, if wanted +# +if test -n "${ZLIB_HOME}" +then + ZLIB_OLD_LDFLAGS=$LDFLAGS + ZLIB_OLD_CPPFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -L${ZLIB_HOME}/lib" + CPPFLAGS="$CPPFLAGS -I${ZLIB_HOME}/include" + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + echo "$as_me:$LINENO: checking for inflateEnd in -lz" >&5 +echo $ECHO_N "checking for inflateEnd in -lz... $ECHO_C" >&6 +if test "${ac_cv_lib_z_inflateEnd+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char inflateEnd (); +int +main () +{ +inflateEnd (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_z_inflateEnd=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_z_inflateEnd=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_z_inflateEnd" >&5 +echo "${ECHO_T}$ac_cv_lib_z_inflateEnd" >&6 +if test $ac_cv_lib_z_inflateEnd = yes; then + zlib_cv_libz=yes +else + zlib_cv_libz=no +fi + + if test "${ac_cv_header_zlib_h+set}" = set; then + echo "$as_me:$LINENO: checking for zlib.h" >&5 +echo $ECHO_N "checking for zlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_zlib_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_zlib_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking zlib.h usability" >&5 +echo $ECHO_N "checking zlib.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking zlib.h presence" >&5 +echo $ECHO_N "checking zlib.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: zlib.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: zlib.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: zlib.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: zlib.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: zlib.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: zlib.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: zlib.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: zlib.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: zlib.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: zlib.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: zlib.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------ ## +## Report this to cole@cs.umd.edu ## +## ------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for zlib.h" >&5 +echo $ECHO_N "checking for zlib.h... $ECHO_C" >&6 +if test "${ac_cv_header_zlib_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_zlib_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_zlib_h" >&5 +echo "${ECHO_T}$ac_cv_header_zlib_h" >&6 + +fi +if test $ac_cv_header_zlib_h = yes; then + zlib_cv_zlib_h=yes +else + zlib_cv_zlib_h=no +fi + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "$zlib_cv_libz" = "yes" -a "$zlib_cv_zlib_h" = "yes" + then + # + # If both library and header were found, use them + # + +echo "$as_me:$LINENO: checking for inflateEnd in -lz" >&5 +echo $ECHO_N "checking for inflateEnd in -lz... $ECHO_C" >&6 +if test "${ac_cv_lib_z_inflateEnd+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char inflateEnd (); +int +main () +{ +inflateEnd (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_z_inflateEnd=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_z_inflateEnd=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_z_inflateEnd" >&5 +echo "${ECHO_T}$ac_cv_lib_z_inflateEnd" >&6 +if test $ac_cv_lib_z_inflateEnd = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBZ 1 +_ACEOF + + LIBS="-lz $LIBS" + +fi + + echo "$as_me:$LINENO: checking zlib in ${ZLIB_HOME}" >&5 +echo $ECHO_N "checking zlib in ${ZLIB_HOME}... $ECHO_C" >&6 + echo "$as_me:$LINENO: result: ok" >&5 +echo "${ECHO_T}ok" >&6 + ZLIB="-lz" + + else + # + # If either header or library was not found, revert and bomb + # + echo "$as_me:$LINENO: checking zlib in ${ZLIB_HOME}" >&5 +echo $ECHO_N "checking zlib in ${ZLIB_HOME}... $ECHO_C" >&6 + LDFLAGS="$ZLIB_OLD_LDFLAGS" + CPPFLAGS="$ZLIB_OLD_CPPFLAGS" + echo "$as_me:$LINENO: result: failed" >&5 +echo "${ECHO_T}failed" >&6 + { { echo "$as_me:$LINENO: error: either specify a valid zlib installation with --with-zlib=DIR or disable zlib usage with --without-zlib" >&5 +echo "$as_me: error: either specify a valid zlib installation with --with-zlib=DIR or disable zlib usage with --without-zlib" >&2;} + { (exit 1); exit 1; }; } + fi +fi + + + +# Checks for header files. + + + +for ac_header in stdlib.h string.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------ ## +## Report this to cole@cs.umd.edu ## +## ------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +# Checks for typedefs, structures, and compiler characteristics. +echo "$as_me:$LINENO: checking for stdbool.h that conforms to C99" >&5 +echo $ECHO_N "checking for stdbool.h that conforms to C99... $ECHO_C" >&6 +if test "${ac_cv_header_stdbool_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +#include +#ifndef bool +# error bool is not defined +#endif +#ifndef false +# error false is not defined +#endif +#if false +# error false is not 0 +#endif +#ifndef true +# error true is not defined +#endif +#if true != 1 +# error true is not 1 +#endif +#ifndef __bool_true_false_are_defined +# error __bool_true_false_are_defined is not defined +#endif + + struct s { _Bool s: 1; _Bool t; } s; + + char a[true == 1 ? 1 : -1]; + char b[false == 0 ? 1 : -1]; + char c[__bool_true_false_are_defined == 1 ? 1 : -1]; + char d[(bool) -0.5 == true ? 1 : -1]; + bool e = &s; + char f[(_Bool) -0.0 == false ? 1 : -1]; + char g[true]; + char h[sizeof (_Bool)]; + char i[sizeof s.t]; + +int +main () +{ + return !a + !b + !c + !d + !e + !f + !g + !h + !i; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_stdbool_h=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_header_stdbool_h=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdbool_h" >&5 +echo "${ECHO_T}$ac_cv_header_stdbool_h" >&6 +echo "$as_me:$LINENO: checking for _Bool" >&5 +echo $ECHO_N "checking for _Bool... $ECHO_C" >&6 +if test "${ac_cv_type__Bool+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((_Bool *) 0) + return 0; +if (sizeof (_Bool)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type__Bool=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type__Bool=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type__Bool" >&5 +echo "${ECHO_T}$ac_cv_type__Bool" >&6 +if test $ac_cv_type__Bool = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE__BOOL 1 +_ACEOF + + +fi + +if test $ac_cv_header_stdbool_h = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STDBOOL_H 1 +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for inline" >&5 +echo $ECHO_N "checking for inline... $ECHO_C" >&6 +if test "${ac_cv_c_inline+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_c_inline=no +for ac_kw in inline __inline__ __inline; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifndef __cplusplus +typedef int foo_t; +static $ac_kw foo_t static_foo () {return 0; } +$ac_kw foo_t foo () {return 0; } +#endif + +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_c_inline=$ac_kw; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_inline" >&5 +echo "${ECHO_T}$ac_cv_c_inline" >&6 + + +case $ac_cv_c_inline in + inline | yes) ;; + *) + case $ac_cv_c_inline in + no) ac_val=;; + *) ac_val=$ac_cv_c_inline;; + esac + cat >>confdefs.h <<_ACEOF +#ifndef __cplusplus +#define inline $ac_val +#endif +_ACEOF + ;; +esac + +echo "$as_me:$LINENO: checking for pid_t" >&5 +echo $ECHO_N "checking for pid_t... $ECHO_C" >&6 +if test "${ac_cv_type_pid_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((pid_t *) 0) + return 0; +if (sizeof (pid_t)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_pid_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_pid_t=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_pid_t" >&5 +echo "${ECHO_T}$ac_cv_type_pid_t" >&6 +if test $ac_cv_type_pid_t = yes; then + : +else + +cat >>confdefs.h <<_ACEOF +#define pid_t int +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for size_t" >&5 +echo $ECHO_N "checking for size_t... $ECHO_C" >&6 +if test "${ac_cv_type_size_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((size_t *) 0) + return 0; +if (sizeof (size_t)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_size_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_size_t=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5 +echo "${ECHO_T}$ac_cv_type_size_t" >&6 +if test $ac_cv_type_size_t = yes; then + : +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned +_ACEOF + +fi + +echo "$as_me:$LINENO: checking for ptrdiff_t" >&5 +echo $ECHO_N "checking for ptrdiff_t... $ECHO_C" >&6 +if test "${ac_cv_type_ptrdiff_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((ptrdiff_t *) 0) + return 0; +if (sizeof (ptrdiff_t)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_ptrdiff_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_ptrdiff_t=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_ptrdiff_t" >&5 +echo "${ECHO_T}$ac_cv_type_ptrdiff_t" >&6 +if test $ac_cv_type_ptrdiff_t = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_PTRDIFF_T 1 +_ACEOF + + +fi + + +# Checks for library functions. +#AC_FUNC_FORK +#AC_CHECK_FUNCS([floor memmove pow regcomp sqrt strchr strcspn strspn strstr]) + +# check the platform +echo "$as_me:$LINENO: checking host system type" >&5 +echo $ECHO_N "checking host system type... $ECHO_C" >&6 +if test "${ac_cv_host+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_host_alias=$host_alias +test -z "$ac_cv_host_alias" && + ac_cv_host_alias=$ac_cv_build_alias +ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_host_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_host_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +echo "$as_me:$LINENO: result: $ac_cv_host" >&5 +echo "${ECHO_T}$ac_cv_host" >&6 +host=$ac_cv_host +host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + + +# set CFLAGS and CXXFLAGS +user_CFLAGS=${CFLAGS} +generic_CFLAGS="-Wall -Wno-strict-aliasing -g -gdwarf-2 -Wuninitialized" +ext_CFLAGS="" +debug_CFLAGS="" +#echo "${host_cpu}-${host_os}" +case "${host_cpu}-${host_os}" in + i*86-*linux*) + ext_CFLAGS="-march=i686";; + i*86-darwin*) + CFLAGS="-m64" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ext_CFLAGS="-arch x86_64" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext;; + *) + echo "$as_me:$LINENO: checking if gcc accepts -m64" >&5 +echo $ECHO_N "checking if gcc accepts -m64... $ECHO_C" >&6 + CFLAGS="-m64" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ext_CFLAGS="-m64"; echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ext_CFLAGS="-D_FILE_OFFSET_BITS=64"; echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext;; +esac + +# Check whether --enable-vectorize or --disable-vectorize was given. +if test "${enable_vectorize+set}" = set; then + enableval="$enable_vectorize" + ext_CFLAGS="${ext_CFLAGS} -ftree-vectorize -msse3 -ffast-math -ftree-vectorizer-verbose=99" +fi; + +# Check whether --enable-intel64 or --disable-intel64 was given. +if test "${enable_intel64+set}" = set; then + enableval="$enable_intel64" + ext_CFLAGS="${ext_CFLAGS} -march=nocona" +fi; + +# Check whether --enable-debug or --disable-debug was given. +if test "${enable_debug+set}" = set; then + enableval="$enable_debug" + +else + enable_debug=no +fi; + + +# Check whether --enable-optim or --disable-optim was given. +if test "${enable_optim+set}" = set; then + enableval="$enable_optim" + if test "x$enable_optim" = xyes; then enable_optim=3; fi +else + enable_optim=3 +fi; + +if test "x$enable_optim" != xno; then + ext_CFLAGS="$ext_CFLAGS -O$enable_optim" +fi + + +if test "x$enable_debug" = xyes; then + debug_CFLAGS="-DDEBUG" +else + debug_CFLAGS="-DNDEBUG" +fi + + +# Check whether --enable-profiling or --disable-profiling was given. +if test "${enable_profiling+set}" = set; then + enableval="$enable_profiling" + ext_LDFLAGS="-lprofiler -ltcmalloc" +fi; + +CFLAGS="${generic_CFLAGS} ${ext_CFLAGS} ${user_CFLAGS} ${debug_CFLAGS} ${OPENMP_CFLAGS}" +CXXFLAGS="$CFLAGS" +CXXFLAGS="$CXXFLAGS $BOOST_CPPFLAGS $BAM_CPPFLAGS" +LDFLAGS="$ext_LDFLAGS" + +# Checks for structures/functions that can be used to determine system memory +echo "$as_me:$LINENO: checking for struct sysinfo.totalram" >&5 +echo $ECHO_N "checking for struct sysinfo.totalram... $ECHO_C" >&6 +if test "${ac_cv_member_struct_sysinfo_totalram+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +static struct sysinfo ac_aggr; +if (ac_aggr.totalram) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_sysinfo_totalram=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +static struct sysinfo ac_aggr; +if (sizeof ac_aggr.totalram) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_sysinfo_totalram=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_member_struct_sysinfo_totalram=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_member_struct_sysinfo_totalram" >&5 +echo "${ECHO_T}$ac_cv_member_struct_sysinfo_totalram" >&6 +if test $ac_cv_member_struct_sysinfo_totalram = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_SYSINFO_TOTALRAM 1 +_ACEOF + + +fi + +echo "$as_me:$LINENO: checking whether sysctl is declared" >&5 +echo $ECHO_N "checking whether sysctl is declared... $ECHO_C" >&6 +if test "${ac_cv_have_decl_sysctl+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef sysctl + char *p = (char *) sysctl; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_sysctl=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_have_decl_sysctl=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_have_decl_sysctl" >&5 +echo "${ECHO_T}$ac_cv_have_decl_sysctl" >&6 +if test $ac_cv_have_decl_sysctl = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYSCTL 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYSCTL 0 +_ACEOF + + +fi +echo "$as_me:$LINENO: checking whether CTL_HW is declared" >&5 +echo $ECHO_N "checking whether CTL_HW is declared... $ECHO_C" >&6 +if test "${ac_cv_have_decl_CTL_HW+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef CTL_HW + char *p = (char *) CTL_HW; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_CTL_HW=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_have_decl_CTL_HW=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_have_decl_CTL_HW" >&5 +echo "${ECHO_T}$ac_cv_have_decl_CTL_HW" >&6 +if test $ac_cv_have_decl_CTL_HW = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_CTL_HW 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_CTL_HW 0 +_ACEOF + + +fi +echo "$as_me:$LINENO: checking whether HW_PHYSMEM is declared" >&5 +echo $ECHO_N "checking whether HW_PHYSMEM is declared... $ECHO_C" >&6 +if test "${ac_cv_have_decl_HW_PHYSMEM+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef HW_PHYSMEM + char *p = (char *) HW_PHYSMEM; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_HW_PHYSMEM=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_have_decl_HW_PHYSMEM=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_have_decl_HW_PHYSMEM" >&5 +echo "${ECHO_T}$ac_cv_have_decl_HW_PHYSMEM" >&6 +if test $ac_cv_have_decl_HW_PHYSMEM = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_HW_PHYSMEM 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_HW_PHYSMEM 0 +_ACEOF + + +fi + + + +# test to see if srcdir already configured +if test "`cd $srcdir && pwd`" != "`pwd`" && + test -f $srcdir/config.status; then + { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5 +echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;} + { (exit 1); exit 1; }; } +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='cufflinks' + VERSION='1.3.0' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +install_sh=${install_sh-"$am_aux_dir/install-sh"} + +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + echo "$as_me:$LINENO: result: $STRIP" >&5 +echo "${ECHO_T}$STRIP" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + + test -z "$ac_cv_prog_ac_ct_STRIP" && ac_cv_prog_ac_ct_STRIP=":" +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 +echo "${ECHO_T}$ac_ct_STRIP" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + STRIP=$ac_ct_STRIP +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s" + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. + +AMTAR=${AMTAR-"${am_missing_run}tar"} + + +echo "$as_me:$LINENO: checking how to create a pax tar archive" >&5 +echo $ECHO_N "checking how to create a pax tar archive... $ECHO_C" >&6 +# Loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' +_am_tools=${am_cv_prog_tar_pax-$_am_tools} +# Do not fold the above two line into one, because Tru64 sh and +# Solaris sh will not grok spaces in the rhs of `-'. +for _am_tool in $_am_tools +do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; + do + { echo "$as_me:$LINENO: $_am_tar --version" >&5 + ($_am_tar --version) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && break + done + am__tar="$_am_tar --format=posix -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=posix -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x pax -w "$$tardir"' + am__tar_='pax -L -x pax -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H pax -L' + am__tar_='find "$tardir" -print | cpio -o -H pax -L' + am__untar='cpio -i -H pax -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_pax}" && break + + # tar/untar a dummy directory, and stop if the command works + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 + (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + rm -rf conftest.dir + if test -s conftest.tar; then + { echo "$as_me:$LINENO: $am__untar &5 + ($am__untar &5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + grep GrepMe conftest.dir/file >/dev/null 2>&1 && break + fi +done +rm -rf conftest.dir + +if test "${am_cv_prog_tar_pax+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_prog_tar_pax=$_am_tool +fi + +echo "$as_me:$LINENO: result: $am_cv_prog_tar_pax" >&5 +echo "${ECHO_T}$am_cv_prog_tar_pax" >&6 + + + + +depcc="$CC" am_compiler_list= + +echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6 +if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CC_dependencies_compiler_type" >&6 +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + + +if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + +depcc="$CXX" am_compiler_list= + +echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6 +if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6 +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + + +if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + + + + ac_config_files="$ac_config_files Makefile src/Makefile" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by cufflinks $as_me 1.3.0, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +cufflinks config.status 1.3.0 +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +INSTALL="$INSTALL" +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +# +# INIT-COMMANDS section. +# + +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" + +_ACEOF + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "src/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t +s,@INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t +s,@INSTALL_DATA@,$INSTALL_DATA,;t t +s,@CYGPATH_W@,$CYGPATH_W,;t t +s,@PACKAGE@,$PACKAGE,;t t +s,@VERSION@,$VERSION,;t t +s,@ACLOCAL@,$ACLOCAL,;t t +s,@AUTOCONF@,$AUTOCONF,;t t +s,@AUTOMAKE@,$AUTOMAKE,;t t +s,@AUTOHEADER@,$AUTOHEADER,;t t +s,@MAKEINFO@,$MAKEINFO,;t t +s,@install_sh@,$install_sh,;t t +s,@STRIP@,$STRIP,;t t +s,@ac_ct_STRIP@,$ac_ct_STRIP,;t t +s,@INSTALL_STRIP_PROGRAM@,$INSTALL_STRIP_PROGRAM,;t t +s,@mkdir_p@,$mkdir_p,;t t +s,@AWK@,$AWK,;t t +s,@SET_MAKE@,$SET_MAKE,;t t +s,@am__leading_dot@,$am__leading_dot,;t t +s,@AMTAR@,$AMTAR,;t t +s,@am__tar@,$am__tar,;t t +s,@am__untar@,$am__untar,;t t +s,@PYTHON@,$PYTHON,;t t +s,@CXX@,$CXX,;t t +s,@CXXFLAGS@,$CXXFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CXX@,$ac_ct_CXX,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@DEPDIR@,$DEPDIR,;t t +s,@am__include@,$am__include,;t t +s,@am__quote@,$am__quote,;t t +s,@AMDEP_TRUE@,$AMDEP_TRUE,;t t +s,@AMDEP_FALSE@,$AMDEP_FALSE,;t t +s,@AMDEPBACKSLASH@,$AMDEPBACKSLASH,;t t +s,@CXXDEPMODE@,$CXXDEPMODE,;t t +s,@am__fastdepCXX_TRUE@,$am__fastdepCXX_TRUE,;t t +s,@am__fastdepCXX_FALSE@,$am__fastdepCXX_FALSE,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@CCDEPMODE@,$CCDEPMODE,;t t +s,@am__fastdepCC_TRUE@,$am__fastdepCC_TRUE,;t t +s,@am__fastdepCC_FALSE@,$am__fastdepCC_FALSE,;t t +s,@RANLIB@,$RANLIB,;t t +s,@ac_ct_RANLIB@,$ac_ct_RANLIB,;t t +s,@PYTHON_VERSION@,$PYTHON_VERSION,;t t +s,@PYTHON_PREFIX@,$PYTHON_PREFIX,;t t +s,@PYTHON_EXEC_PREFIX@,$PYTHON_EXEC_PREFIX,;t t +s,@PYTHON_PLATFORM@,$PYTHON_PLATFORM,;t t +s,@pythondir@,$pythondir,;t t +s,@pkgpythondir@,$pkgpythondir,;t t +s,@pyexecdir@,$pyexecdir,;t t +s,@pkgpyexecdir@,$pkgpyexecdir,;t t +s,@BOOST_CPPFLAGS@,$BOOST_CPPFLAGS,;t t +s,@BOOST_LDFLAGS@,$BOOST_LDFLAGS,;t t +s,@BAM_CPPFLAGS@,$BAM_CPPFLAGS,;t t +s,@BAM_LDFLAGS@,$BAM_LDFLAGS,;t t +s,@BAM_LIB@,$BAM_LIB,;t t +s,@build@,$build,;t t +s,@build_cpu@,$build_cpu,;t t +s,@build_vendor@,$build_vendor,;t t +s,@build_os@,$build_os,;t t +s,@BOOST_THREAD_LIB@,$BOOST_THREAD_LIB,;t t +s,@CPP@,$CPP,;t t +s,@EGREP@,$EGREP,;t t +s,@ZLIB@,$ZLIB,;t t +s,@host@,$host,;t t +s,@host_cpu@,$host_cpu,;t t +s,@host_vendor@,$host_vendor,;t t +s,@host_os@,$host_os,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_builddir$INSTALL ;; + esac + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +s,@INSTALL@,$ac_INSTALL,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + # Do quote $f, to prevent DOS paths from being IFS'd. + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + +_ACEOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +# Using a here document instead of a string reduces the quoting nightmare. +# Putting comments in sed scripts is not portable. +# +# `end' is used to avoid that the second main sed command (meant for +# 0-ary CPP macros) applies to n-ary macro definitions. +# See the Autoconf documentation for `clear'. +cat >confdef2sed.sed <<\_ACEOF +s/[\\&,]/\\&/g +s,[\\$`],\\&,g +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp +t end +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp +: end +_ACEOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC-DEFINE to be honored. +uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines +sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f confdef2sed.sed + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >>conftest.undefs <<\_ACEOF +s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, +_ACEOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null +do + # Write a limited-size here document to $tmp/defines.sed. + echo ' cat >$tmp/defines.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#define' lines. + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/defines.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines +echo ' fi # grep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $tmp/undefs.sed. + echo ' cat >$tmp/undefs.sed <>$CONFIG_STATUS + # Speed up: don't consider the non `#undef' + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/undefs.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs + +cat >>$CONFIG_STATUS <<\_ACEOF + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in + if test x"$ac_file" != x-; then + if diff $ac_file $tmp/config.h >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + rm -f $ac_file + mv $tmp/config.h $ac_file + fi + else + cat $tmp/config.h + rm -f $tmp/config.h + fi +# Compute $ac_file's index in $config_headers. +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $ac_file | $ac_file:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $ac_file" >`(dirname $ac_file) 2>/dev/null || +$as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X$ac_file : 'X\(//\)[^/]' \| \ + X$ac_file : 'X\(//\)$' \| \ + X$ac_file : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X$ac_file | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'`/stamp-h$_am_stamp_count +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_COMMANDS section. +# +for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue + ac_dest=`echo "$ac_file" | sed 's,:.*,,'` + ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_dir=`(dirname "$ac_dest") 2>/dev/null || +$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_dest" : 'X\(//\)[^/]' \| \ + X"$ac_dest" : 'X\(//\)$' \| \ + X"$ac_dest" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_dest" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + { echo "$as_me:$LINENO: executing $ac_dest commands" >&5 +echo "$as_me: executing $ac_dest commands" >&6;} + case $ac_dest in + depfiles ) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named `Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # So let's grep whole file. + if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then + dirpart=`(dirname "$mf") 2>/dev/null || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running `make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n 's/^U = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`(dirname "$file") 2>/dev/null || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p $dirpart/$fdir + else + as_dir=$dirpart/$fdir + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory $dirpart/$fdir" >&5 +echo "$as_me: error: cannot create directory $dirpart/$fdir" >&2;} + { (exit 1); exit 1; }; }; } + + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done +done + ;; + esac +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + +# dump some configuration confirmations +echo \ +" +-- ${PACKAGE_STRING} Configuration Results -- + C++ compiler: ${CXX} ${CXXFLAGS} ${LDFLAGS}" + +if test x"${GCC}" = x"yes" ; then + gcc_version=`${CC} --version | head -n 1` + echo " GCC version: ${gcc_version}" +else + gcc_version='' +fi + +echo \ +" Host System type: ${host} + Install prefix: ${prefix} + Install eprefix: ${exec_prefix} + + See config.h for further configuration information. + Email <${PACKAGE_BUGREPORT}> with questions and bug reports. +" + +if test x"${PYTHON}" = x":" || ! test -x "${PYTHON}"; then + echo "WARNING! python was not found and is required to run some utility scripts" + echo " We recommend installing python and pointing configure to the installed location" +fi + diff --git a/configure.ac b/configure.ac new file mode 100755 index 0000000..b8a0b9b --- /dev/null +++ b/configure.ac @@ -0,0 +1,142 @@ +m4_include([ax_boost_base.m4]) +m4_include([ax_boost_thread.m4]) +m4_include([ax_bam.m4]) +m4_include([ax_check_zlib.m4]) + +define([svnversion], esyscmd([sh -c "svnversion|tr -d '\n'"]))dnl +AC_INIT([cufflinks], [1.3.0], [cole@cs.umd.edu]) +AC_DEFINE(SVN_REVISION, "svnversion", [SVN Revision]) + +AC_CONFIG_SRCDIR([config.h.in]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_AUX_DIR([build-aux]) +AM_INIT_AUTOMAKE + +#AM_PATH_CPPUNIT(1.10.2) + +AC_ARG_VAR(PYTHON, [python program]) + + # Make sure CXXFLAGS is defined so that AC_PROG_CXX doesn't set it. +CXXFLAGS="$CXXFLAGS" +CFLAGS="$CFLAGS" + +AC_LANG(C) + +# Checks for programs. +AC_PROG_AWK +AC_PROG_CXX +AC_PROG_CC +AC_PROG_MAKE_SET +AC_PROG_RANLIB +AC_PROG_INSTALL +AM_PATH_PYTHON([2.4]) +AX_BOOST_BASE([1.38.0]) +AX_BAM +AX_BOOST_THREAD +AX_CHECK_ZLIB() + +# Checks for header files. +AC_CHECK_HEADERS([stdlib.h string.h unistd.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_HEADER_STDBOOL +AC_C_INLINE +AC_TYPE_PID_T +AC_TYPE_SIZE_T +AC_CHECK_TYPES([ptrdiff_t]) + +# Checks for library functions. +#AC_FUNC_FORK +#AC_CHECK_FUNCS([floor memmove pow regcomp sqrt strchr strcspn strspn strstr]) + +# check the platform +AC_CANONICAL_HOST + +# set CFLAGS and CXXFLAGS +user_CFLAGS=${CFLAGS} +generic_CFLAGS="-Wall -Wno-strict-aliasing -g -gdwarf-2 -Wuninitialized" +ext_CFLAGS="" +debug_CFLAGS="" +#echo "${host_cpu}-${host_os}" +case "${host_cpu}-${host_os}" in + i*86-*linux*) + ext_CFLAGS="-march=i686";; + i*86-darwin*) + CFLAGS="-m64" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], [ext_CFLAGS="-arch x86_64"], []);; + *) + AC_MSG_CHECKING([if gcc accepts -m64]) + CFLAGS="-m64" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], [ext_CFLAGS="-m64"; AC_MSG_RESULT([yes])], + [ext_CFLAGS="-D_FILE_OFFSET_BITS=64"; AC_MSG_RESULT([no])]);; +esac + +AC_ARG_ENABLE(vectorize, [ --enable-vectorize Enable GCC auto-vectorization], + [ext_CFLAGS="${ext_CFLAGS} -ftree-vectorize -msse3 -ffast-math -ftree-vectorizer-verbose=99"], []) + +AC_ARG_ENABLE(intel64, [ --enable-intel64 optimize for Intel64 CPU such as Xeon and Core2], + [ext_CFLAGS="${ext_CFLAGS} -march=nocona"], []) + +AC_ARG_ENABLE([debug], + [AS_HELP_STRING([--enable-debug], + [enable debugging info (default is no)])], + [], [enable_debug=no]) + + +AC_ARG_ENABLE([optim], + [AS_HELP_STRING([--enable-optim@<:@=0|1|2|3@:>@], + [set optimization level (default is 3)])], + [if test "x$enable_optim" = xyes; then enable_optim=3; fi], + [enable_optim=3]) + +AS_IF([test "x$enable_optim" != xno], [ext_CFLAGS="$ext_CFLAGS -O$enable_optim"]) + +AS_IF([test "x$enable_debug" = xyes], + [debug_CFLAGS="-DDEBUG"], + [debug_CFLAGS="-DNDEBUG"]) + +AC_ARG_ENABLE(profiling, [ --enable-profiling enable profiling with google-perftools], + [ext_LDFLAGS="-lprofiler -ltcmalloc"], []) + +CFLAGS="${generic_CFLAGS} ${ext_CFLAGS} ${user_CFLAGS} ${debug_CFLAGS} ${OPENMP_CFLAGS}" +CXXFLAGS="$CFLAGS" +CXXFLAGS="$CXXFLAGS $BOOST_CPPFLAGS $BAM_CPPFLAGS" +LDFLAGS="$ext_LDFLAGS" + +# Checks for structures/functions that can be used to determine system memory +AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [#include ]) +AC_CHECK_DECLS([sysctl, CTL_HW, HW_PHYSMEM], [], [], [#include ]) + +AM_INIT_AUTOMAKE([-Wall -Werror tar-pax foreign]) + +AC_CONFIG_FILES([Makefile + src/Makefile]) + +AC_OUTPUT +# dump some configuration confirmations +echo \ +" +-- ${PACKAGE_STRING} Configuration Results -- + C++ compiler: ${CXX} ${CXXFLAGS} ${LDFLAGS}" + +if test x"${GCC}" = x"yes" ; then + gcc_version=`${CC} --version | head -n 1` + echo " GCC version: ${gcc_version}" +else + gcc_version='' +fi + +echo \ +" Host System type: ${host} + Install prefix: ${prefix} + Install eprefix: ${exec_prefix} + + See config.h for further configuration information. + Email <${PACKAGE_BUGREPORT}> with questions and bug reports. +" + +if test x"${PYTHON}" = x":" || ! test -x "${PYTHON}"; then + echo "WARNING! python was not found and is required to run some utility scripts" + echo " We recommend installing python and pointing configure to the installed location" +fi + diff --git a/make_bin.sh b/make_bin.sh new file mode 100755 index 0000000..61ba726 --- /dev/null +++ b/make_bin.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +#simple script to pack up a precompiled binary package, with the boost thread +# library statically linked in. + +echo "packing up $1.tar.gz, using boost in $2, linking against $3 and using BAM in $4" +mkdir $1 +make clean +./configure --enable-intel64 --with-boost=$2 --with-boost-thread=$3 --with-bam=$4 +make +cp src/cufflinks $1 +cp src/cuffcompare $1 +cp src/cuffdiff $1 +cp src/cuffmerge $1/cuffmerge +cp src/gffread $1 +cp src/gtf_to_sam $1 +cp README $1 +cp LICENSE $1 +cp AUTHORS $1 + +tar cvfz $1.tar.gz $1 \ No newline at end of file diff --git a/src/GArgs.cpp b/src/GArgs.cpp new file mode 100644 index 0000000..f3b72b9 --- /dev/null +++ b/src/GArgs.cpp @@ -0,0 +1,376 @@ +#include "GBase.h" +#include "GArgs.h" +#include + +GArgs::GArgs(int argc, char* const argv[], const char* format, bool nodigitopts) { + /* format can be: + {;|=} e.g. disable-test;PID=S= for --disable-test PID=50 (or --PID 50) S=3.5 etc. + [:] e.g. p:hT for -p testing (or -ptesting) -h -T + */ +const char* fstr=format; +fmtcount=0; +count=0; +nonOptCount=0; +nonOptPos=0; +optPos=0; +errarg=0; +err_valmissing=false; +args=NULL; +fmt=NULL; +_argc=argc; +_argv=argv; +int fmtlen=strlen(format); +//---- first parse the format string +while (fstr-format < fmtlen ) { + int l=strcspn(fstr, ";=:"); + if (fstr[l]==0) { //end of string reached + //all previous chars are just switches: + GREALLOC(fmt, (fmtcount+l)*sizeof(fmtdef)); + //store each switch + for (int i=0; i=0) { + GREALLOC(args, (count+1)*sizeof(argdata)); + GCALLOC(args[count].opt, 2); + args[count].opt[0]=c; + args[count].fmti=f; + if (!fmt[f].req_value) {//switch type + GCALLOC(args[count].value,1);//so getOpt() functions would not return NULL + count++; + // only switches can be grouped with some other switches or options + if (_argv[p][cpos+1]!='\0') { + cpos++; + c=_argv[p][cpos]; + goto COLLAPSED; + } + } + else { + //single-dash argument followed by a value + if (_argv[p][cpos+1]=='\0') { + if (p+1<_argc && _argv[p+1][0]!=0) { //value is the whole next argument + p++; + args[count].value=Gstrdup(_argv[p]); + } + else { + errarg=p; + err_valmissing=true; + return errarg; + } + } + else { //value immediately follows the dash-option + args[count].value=Gstrdup(_argv[p]+cpos+1); + } + count++; + } + } //was validShortOpt + else { //option not found in format definition! + errarg=p; + return errarg; + } + } + } //-single-dash + else {//not a single-dash argument + char* ap=_argv[p]; + bool is_longopt=false; + if (*ap=='-' && ap[1]=='-') { + is_longopt=true; + ap+=2; + } + char* e=strchr(ap+1,'='); + while (e!=NULL && *(e-1)=='\\') e=strchr(e,'='); + if (e==NULL && is_longopt) { + e=ap; + while (*e!=0 && *e!=' ') e++; + //e will be on eos or next space + } + if (e!=NULL && e>ap) { + //this must be a long option + //e is on eos, space or '=' + if ((f=validLongOpt(ap,e-1))>=0) { + GREALLOC(args, (count+1)*sizeof(argdata)); + args[count].opt=Gstrdup(ap,e-1); + args[count].fmti=f; + if (fmt[f].req_value) { + if (*e==0) { + //value is the next argument + if (p+1<_argc && _argv[p+1][0]!=0) { + p++; + args[count].value=Gstrdup(_argv[p]); + } + else { + errarg=p; + err_valmissing=true; + return errarg; + } + } + else { //value is in the same argument + //while (*e!=0 && (*e==' ' || *e=='=')) e++; + if (*e=='=') e++; + if (*e==0) { + errarg=p; + err_valmissing=true; + return errarg; + } + args[count].value=Gstrdup(e); + } + } //value required + else { //no value expected + GCALLOC(args[count].value,1); //do not return NULL + } + count++; + } + else { //error - this long argument not recognized + errarg=p; + return errarg; + } + } + else { //just a plain non-option argument + if (e==ap) { //i.e. just "--" + errarg=p; + return errarg; + } + GREALLOC(args, (count+1)*sizeof(argdata)); + args[count].opt=NULL; //it's not an option + args[count].value=Gstrdup(_argv[p]); + args[count].fmti=-1; + count++; + nonOptCount++; + } + } + p++;//check next arg string + } //while arguments + return errarg; +} + +void GArgs::printError(FILE* fout, const char* usage, bool exitProgram) { + if (errarg==0) return; + if (usage) fprintf(fout, "%s\n", usage); + if (err_valmissing) + fprintf(fout, "Error: value required for option '%s'\n", _argv[errarg]); + else + fprintf(fout, "Error: invalid argument '%s'\n", _argv[errarg]); + if (exitProgram) + exit(1); +} + +void GArgs::printError(const char* usage, bool exitProgram) { + printError(stderr, usage, exitProgram); +} + +void GArgs::printCmdLine(FILE* fout) { + if (_argv==NULL) return; + for (int i=0;i<_argc;i++) { + fprintf(fout, "%s%c", _argv[i], (i==_argc-1)?'\n':' '); + } +} + +GArgs::GArgs(int argc, char* const argv[], const GArgsDef fmtrecs[], bool nodigitopts) { + fmtcount=0; + count=0; + nonOptCount=0; + nonOptPos=0; + optPos=0; + errarg=0; + err_valmissing=false; + args=NULL; + fmt=NULL; + _argc=argc; + _argv=argv; + if (fmtrecs==NULL) return; + + const GArgsDef* frec=fmtrecs; + while ((frec->longopt || frec->opt) && fmtcount<255) { + fmtcount++; + frec=&(fmtrecs[fmtcount]); + } + GCALLOC(fmt, fmtcount*sizeof(fmtdef)); + for (int i=0;i=0 && fmt[args[i].fmti].code==c) + return args[i].value; + return NULL; +} + +char* GArgs::getOptName(int c) { + for (int i=0; i=0 && fmt[args[i].fmti].code==c) + return args[i].opt; + return NULL; +} + + +int GArgs::startNonOpt(){ //reset iteration through non-option arguments + //returns the number of non-option arguments +nonOptPos=0; +return nonOptCount; +} + + +char* GArgs::nextNonOpt() { //get the next non-dashed argument + //or NULL if no more +for (int i=nonOptPos;i=0) { + optPos=i+1; + return fmt[args[i].fmti].code; + } +return 0; //must make sure that codes are > 0 for this to work properly +} + diff --git a/src/GArgs.h b/src/GArgs.h new file mode 100644 index 0000000..92f32fb --- /dev/null +++ b/src/GArgs.h @@ -0,0 +1,98 @@ +/* +GArgs is a quick'n'dirty object oriented replacement for the standard + getopts library call available on many unix platforms; + it accepts the regular single dash style options + -[ ][] + but also attr=value style options: + = +*/ + +#ifndef G_ARGS_DEFINED +#define G_ARGS_DEFINED + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +struct GArgsDef { + const char* longopt; + char opt; //equivalent one-char option, if any + bool req_value; //true if the string that follows must be a value + int code; //an enum code to be associated with this option +}; + +class GArgs { + //structure for parsing arguments format definition + struct fmtdef { + char* longopt; + char opt; //equivalent one-char option, if any + bool req_value; //true if the string that follows must be a value + int code; //an enum code to be associated with this option + }; + int fmtcount; + fmtdef* fmt; //this will store format definition after parsing it + struct argdata { + char* opt; // this is NULL for non-dashed arguments + // a single character for single dash style arguments + // a string for ARG=VALUE or --long_option style arguments + char* value; // is NULL for switches (dashed flags) + int fmti; //index in fmt table + //int code; // if GArgsDef[] constructor was used, for getOpt + }; + int _argc; + char* const *_argv; //the original main() values + argdata* args; //arguments table after parsing it + int count; //total count of elements in 'args' array + int nonOptCount; //count of non-dashed, non= arguments + int nonOptPos; //current position for nonOpt arguments iterator + int optPos; //current position for options iterator + int errarg; //argv error position after parsing + bool err_valmissing; //if the error is strictly about missing value for errarg option + int parseArgs(bool nodigitopts=false); + //parsing helper functions + int validOpt(int c); + int validShortOpt(char o); + int validLongOpt(char* o, char* to); + public: + + GArgs(int argc, char* const argv[], const char* format, bool nodigitopts=false); + /* format can be: + {;|=} e.g. disable-test;PID=S= for --disable-test PID=50 (or --PID 50) S=3.5 etc. + [:] e.g. p:hT for -p testing (or -ptesting) -h -T + This means that the long options, if present, should be given at the beginning + of the format string, before the single-dash, single-char options + */ + GArgs(int argc, char* const argv[], const GArgsDef fmtrecs[], bool nodigitopts=false); + + ~GArgs(); + int isError(); // returns the offending argv position or 0 if no error + int getCount() { return count; } //total number of arguments given + int getFmtCount() { return fmtcount; } //total number of option definitions + int getNonOptCount() { return nonOptCount; } //total number of non-option arguments + char* getOpt(const char* o); /* retrieve the value for option o + returns + NULL if option not given at all + !=NULL if boolean option was given + opt's value if value option was given + */ + char* getOpt(const char o); + char* getOpt(int c); //retrieve value by enum code + char* getOptName(int c); //retrieve name of by enum code + int startOpt(); //init iteration through option arguments + // returns number of option args + + char* nextOpt(); //get next option argument's string + int nextCode(); //get next option argument's code + + int startNonOpt(void); //init iteration through non-option arguments + // returns the number of non-option arguments + void printError(FILE* fout, const char* usage=NULL, + bool exitProgram=false); + void printError(const char* usage=NULL, bool exitProgram=false); + void printCmdLine(FILE* fout); + char* nextNonOpt(); //get the next non-option argument +}; + +#endif diff --git a/src/GBase.cpp b/src/GBase.cpp new file mode 100644 index 0000000..efbebea --- /dev/null +++ b/src/GBase.cpp @@ -0,0 +1,760 @@ +#include "GBase.h" +#include +#include +#include + +#ifndef S_ISDIR +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#endif + +#ifndef S_ISREG +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#endif + +static char msg[4069]; +/* +#ifdef _DEFINE_WIN32_FSEEKO + int fseeko(FILE *stream, off_t offset, int whence) { + + } +#endif + +#ifdef _DEFINE_WIN32_FTELLO + off_t ftello(FILE *stream) { + + } +#endif +*/ + + +int saprintf(char **retp, const char *fmt, ...) { + va_list argp; + int len; + char *buf; + + va_start(argp, fmt); + len = vsnprintf(NULL, 0, fmt, argp); + va_end(argp); + GMALLOC(buf, (len + 1)); + if(buf == NULL) + { + *retp = NULL; + return -1; + } + + va_start(argp, fmt); + vsnprintf(buf, len+1, fmt, argp); + va_end(argp); + + *retp = buf; + return len; +} + +//************************* Debug helpers ************************** +// Assert failed routine +void GAssert(const char* expression, const char* filename, unsigned int lineno){ + sprintf(msg,"%s(%d): ASSERT(%s) failed.\n",filename,lineno,expression); + fprintf(stderr,"%s",msg); + //abort(); + } +// Error routine (prints error message and exits!) +void GError(const char* format,...){ + #ifdef __WIN32__ + va_list arguments; + va_start(arguments,format); + vsprintf(msg,format,arguments); + va_end(arguments); + OutputDebugString(msg); + fprintf(stderr,"%s",msg); // if a console is available + MessageBox(NULL,msg,NULL,MB_OK|MB_ICONEXCLAMATION|MB_APPLMODAL); + #else + va_list arguments; + va_start(arguments,format); + vfprintf(stderr,format,arguments); + va_end(arguments); + #ifdef DEBUG + // modify here if you want a core dump + abort(); + #endif + #endif + exit(1); + } + +// Warning routine (just print message without exiting) +void GMessage(const char* format,...){ + va_list arguments; + va_start(arguments,format); + vsprintf(msg,format,arguments); + va_end(arguments); + #ifdef __WIN32__ + OutputDebugString(msg); + #endif + fprintf(stderr,"%s",msg);fflush(stderr); + } + +/*************** Memory management routines *****************/ +// Allocate memory +bool GMalloc(pointer* ptr,unsigned long size){ + //GASSERT(ptr); + if (size!=0) *ptr=malloc(size); + return *ptr!=NULL; + } + +// Allocate cleaned memory (0 filled) +bool GCalloc(pointer* ptr,unsigned long size){ + GASSERT(ptr); + *ptr=calloc(size,1); + return *ptr!=NULL; + } + +// Resize memory +bool GRealloc(pointer* ptr,unsigned long size){ + //GASSERT(ptr); + if (size==0) { + GFree(ptr); + return true; + } + if (*ptr==NULL) {//simple malloc + void *p=malloc(size); + if (p != NULL) { + *ptr=p; + return true; + } + else return false; + }//malloc + else {//realloc + void *p=realloc(*ptr,size); + if (p) { + *ptr=p; + return true; + } + return false; + } + } +// Free memory, resets ptr to NULL afterward +void GFree(pointer* ptr){ + GASSERT(ptr); + if (*ptr) free(*ptr); + *ptr=NULL; + } + +char* Gstrdup(const char* str) { + if (str==NULL) return NULL; + char *copy=NULL; + GMALLOC(copy, strlen(str)+1); + strcpy(copy,str); + return copy; + } + +char* newEmptyStr() { + char* zs=NULL; + GMALLOC(zs,1); + zs[0]=0; + return zs; +} + +char* Gstrdup(const char* sfrom, const char* sto) { + if (sfrom==NULL || sto==NULL) return NULL; + char *copy=NULL; + if (sfrom[0]==0) return newEmptyStr(); + GMALLOC(copy, sto-sfrom+2); + strncpy(copy, sfrom, sto-sfrom+1); + copy[sto-sfrom+1]=0; + return copy; + } + +int Gstrcmp(char* a, char* b) { + if (a==NULL || b==NULL) { + return a==NULL ? -1 : 1; + } + else return strcmp(a,b); +} + +int Gstricmp(const char* a, const char* b) { + if (a==NULL || b==NULL) return a==NULL ? -1 : 1; + register int ua, ub; + while ((*a!=0) && (*b!=0)) { + ua=tolower((unsigned char)*a); + ub=tolower((unsigned char)*b); + a++;b++; + if (ua!=ub) return ua < ub ? -1 : 1; + } + return (*a == 0) ? ( (*b == 0) ? 0 : -1 ) : 1 ; +} + +int strsplit(char* str, char** fields, int maxfields, const char* delim) { + //splits by placing 0 where delim chars are found, setting fields[] to the beginning + //of each field (stopping after maxfields); returns number of fields parsed + int tidx=0; + bool afterdelim=true; + int i=0; + while (str[i]!=0 && tidx=str) { + if (*p==ch) return p; + p--; + } + return NULL; + } + + +/* DOS/UNIX safer fgets : reads a text line from a (binary) file and + update the file position accordingly and the buffer capacity accordingly. + The given buf is resized to read the entire line in memory + -- even when it's abnormally long + */ +char* fgetline(char* & buf, int& buf_cap, FILE *stream, off_t* f_pos, int* linelen) { + //reads a char at a time until \n and/or \r are encountered + int i=0; + int c=0; + off_t fpos=(f_pos!=NULL) ? *f_pos : 0; + while ((c=getc(stream))!=EOF) { + if (i>=buf_cap-1) { + buf_cap+=1024; + GREALLOC(buf, buf_cap); + } + if (c=='\n' || c=='\r') { + if (c=='\r') { + if ((c=getc(stream))!='\n') ungetc(c,stream); + else fpos++; + } + fpos++; + break; + } + fpos++; + buf[i]=(char)c; + i++; + } //while i=allocated-1) { + allocated+=1024; + GREALLOC(buf, allocated); + } + if (c=='\n' || c=='\r') { + buf[len]='\0'; + if (c=='\r') { //DOS file -- special case + if ((c=getc(stream))!='\n') ungetc(c,stream); + else f_pos++; + } + f_pos++; + lcount++; + return buf; + } + f_pos++; + buf[len]=(char)c; + len++; + } //while i=str) { + for (i=0; i=0 && s[i]==suffix[j]) { i--; j--; } + return (j==-1); + } + + +char* reverseChars(char* str, int slen) { + if (slen==0) slen=strlen(str); + int l=0; + int r=slen-1; + char c; + while (l=lend) { + for (i=0;i>24; + h&=0x0fffffff; + } + GASSERT(h<=0x0fffffff); + return h; + } + +// removes the last part (file or directory name) of a full path +// this is a destructive operation for the given string!!! +// the trailing '/' is guaranteed to be there +void delFileName(char* filepath) { + char *p, *sep; + if (filepath==NULL) return; + for (p=filepath, sep=filepath;*p!='\0';p++) + if (*p=='/' || *p=='\\') sep=p+1; + *sep='\0'; // truncate filepath +} + +// returns a pointer to the last file or directory name in a full path +const char* getFileName(const char* filepath) { + const char *p, *sep; + if (filepath==NULL) return NULL; + for (p=filepath, sep=filepath;*p!='\0';p++) + if (*p=='/' || *p=='\\') sep=p+1; + return sep; +} + +// returns a pointer to the file "extension" part in a filename +const char* getFileExt(const char* filepath) { + const char *p, *dp, *sep; + if (filepath==NULL) return NULL; + for (p=filepath, dp=filepath, sep=filepath;*p!='\0';p++) { + if (*p=='.') dp=p+1; + else if (*p=='/' || *p=='\\') + sep=p+1; + } + return (dp>sep) ? dp : NULL ; +} + +int fileExists(const char* fname) { + struct stat stFileInfo; + int r=0; + // Attempt to get the file attributes + int fs = stat(fname,&stFileInfo); + if (fs == 0) { + r=3; + // We were able to get the file attributes + // so the file obviously exists. + if (S_ISREG (stFileInfo.st_mode)) { + r=2; + } + if (S_ISDIR (stFileInfo.st_mode)) { + r=1; + } + } + return r; +} + +/*bool fileExists(const char* filepath) { + if (filepath==NULL) return false; + FILE* ft=fopen(filepath, "rb"); + if (ft==NULL) return false; + fclose(ft); + return true; +} +*/ +int64 fileSize(const char* fpath) { + struct stat results; + if (stat(fpath, &results) == 0) + // The size of the file in bytes is in + return (int64)results.st_size; + else + // An error occurred + //GMessage("Error at stat(%s)!\n", fpath); + return 0; +} + +bool parseNumber(char* &p, double& v) { + //skip any spaces.. + while (*p==' ' || *p=='\t') p++; + char* start=p; + /*if (*p=='-') p++; + else if (*p=='+') { p++;start++; }*/ + + /* while ((*p>='1' && *p<='9') || *p=='0' || + *p=='.' || *p=='-' || tolower(*p)=='e') p++; */ + int numlen=strspn(start, "0123456789eE.-+"); + p=start+numlen; + //now p is on a non-digit; + if (*start=='-' && p==start+1) return false; + char saved=*p; + *p='\0'; + char* endptr=p; + v=strtod(start,&endptr); + *p=saved; + if (endptr!=p) return false; + return true; +} + + +bool parseDouble(char* &p, double& v) { + return parseNumber(p,v); +} + +bool parseInt(char* &p, int& i) { + while (*p==' ' || *p=='\t') p++; + char* start=p; + if (*p=='-') p++; + else if (*p=='+') { p++;start++; } + while ((*p>='1' && *p<='9') || *p=='0') p++; + //now p is on a non-digit; + if (*start=='-' && p==start+1) return false; + char saved=*p; + *p='\0'; + char* endptr=p; + long l=strtol(start,&endptr,10); + i=(int)l; + *p=saved; + if (endptr!=p || i!=l) return false; + return true; +} + +bool parseUInt(char* &p, uint& i) { + while (*p==' ' || *p=='\t') p++; + char* start=p; + if (*p=='-') return false; + else if (*p=='+') { p++;start++; } + while ((*p>='1' && *p<='9') || *p=='0') p++; + //now p is on a non-digit; + if (*start=='-' && p==start+1) return false; + char saved=*p; + *p='\0'; + char* endptr=p; + unsigned long l=strtoul(start,&endptr,10); + i=(uint) l; + *p=saved; + if (endptr!=p || i!=l) return false; + return true; +} + +bool parseHex(char* &p, uint& i) { + //skip initial spaces/prefix + while (*p==' ' || *p=='\t' || *p=='0' || *p=='x') p++; + char* start=p; + if (*p=='-') return false; + else if (*p=='+') { p++;start++; } + while (isxdigit(*p)) p++; + //now p is on a non-hexdigit; + if (p==start+1) return false; + char saved=*p; + *p='\0'; + char* endptr=p; + unsigned long l=strtoul(start,&endptr,16); + i=(uint) l; + *p=saved; + if (endptr!=p || i!=l) return false; + return true; +} + +//write a formatted fasta record, fasta formatted +void writeFasta(FILE *fw, const char* seqid, const char* descr, + const char* seq, int linelen, int seqlen) { + fflush(fw); + // write header line only if given! + if (seqid!=NULL) { + if (descr==NULL || descr[0]==0) + fprintf(fw,">%s\n",seqid); + else fprintf(fw,">%s %s\n",seqid, descr); + } + fflush(fw); + if (seq==NULL || *seq==0) return; //nothing to print + if (linelen==0) { //unlimited line length: write the whole sequence on a line + if (seqlen>0) + fwrite((const void*)seq, 1, seqlen,fw); + else fprintf(fw,"%s",seq); + fprintf(fw,"\n"); + fflush(fw); + return; + } + int ilen=0; + if (seqlen>0) { //seq length given, so we know when to stop + for (int i=0; i < seqlen; i++, ilen++) { + if (ilen == linelen) { + fputc('\n', fw); + ilen = 0; + } + fputc(seq[i], fw); + } + fputc('\n', fw); + } + else { //seq length not given, stop when 0 encountered + for (int i=0; seq[i]!=0; i++, ilen++) { + if (ilen == linelen) { + fputc('\n', fw); + ilen = 0; + } + fputc(seq[i], fw); + } //for + fputc('\n', fw); + } + fflush(fw); + } + +char* commaprint(uint64 n) { + static int comma = '\0'; + static char retbuf[48]; + char *p = &retbuf[sizeof(retbuf)-1]; + int i = 0; + if(comma == '\0') { + /* struct lconv *lcp = localeconv(); + if(lcp != NULL) { + if(lcp->thousands_sep != NULL && + *lcp->thousands_sep != '\0') + comma = *lcp->thousands_sep; + else */ + comma = ','; + // } + } + *p = '\0'; + do { + if(i%3 == 0 && i != 0) + *--p = comma; + *--p = '0' + n % 10; + n /= 10; + i++; + } while(n != 0); + return p; +} diff --git a/src/GBase.h b/src/GBase.h new file mode 100644 index 0000000..5e5dacf --- /dev/null +++ b/src/GBase.h @@ -0,0 +1,489 @@ +#ifndef G_BASE_DEFINED +#define G_BASE_DEFINED +#ifndef _POSIX_SOURCE +//mostly for MinGW +#define _POSIX_SOURCE +#endif +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_ + #ifndef __WIN32__ + #define __WIN32__ + #endif + #include + #include + #define CHPATHSEP '\\' + #undef off_t + #define off_t int64_t + #ifdef _fseeki64 + #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin) + #else + /* + #define _DEFINE_WIN32_FSEEKO + int fseeko(FILE *stream, off_t offset, int whence); + */ + #define fseeko fseek + #endif + #ifdef _ftelli64 + #define ftello(stream) _ftelli64(stream) + #else + /* + #define _DEFINE_WIN32_FTELLO + off_t ftello(FILE *stream); + */ + #define ftello ftell + #endif + #else + #define CHPATHSEP '/' + #include +#endif + +#ifndef fseeko + #define fseeko fseek +#endif +#ifndef ftello + #define ftello ftell +#endif + +#ifdef DEBUG +#undef NDEBUG +#endif + +typedef int32_t int32; +typedef uint32_t uint32; + +typedef unsigned char uchar; +typedef unsigned char byte; + +#ifndef MAXUINT +#define MAXUINT ((unsigned int)-1) +#endif + +#ifndef MAXINT +#define MAXINT INT_MAX +#endif + +#ifndef MAX_UINT +#define MAX_UINT ((unsigned int)-1) +#endif + +#ifndef MAX_INT +#define MAX_INT INT_MAX +#endif + +typedef int64_t int64; +typedef uint64_t uint64; + +/****************************************************************************/ + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +/****************************************************************************/ +#define ERR_ALLOC "Error allocating memory.\n" + +//------------------- + +// Debug helpers +#ifndef NDEBUG + #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__)) + #ifdef TRACE + #define GTRACE(exp) (GMessage exp) + #else + #define GTRACE(exp) ((void)0) + #endif +#else + #define GASSERT(exp) ((void)0) + #define GTRACE(exp) ((void)0) +#endif + +#define GERROR(exp) (GError exp) +/********************************** Macros ***********************************/ +// Abolute value +#define GABS(val) (((val)>=0)?(val):-(val)) + +// Min and Max +#define GMAX(a,b) (((a)>(b))?(a):(b)) +#define GMIN(a,b) (((a)>(b))?(b):(a)) + +// Min of three +#define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z)) + +// Max of three +#define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z)) + +// Return minimum and maximum of a, b +#define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a))) + +// Clamp value x to range [lo..hi] +#define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x))) + +typedef void* pointer; +typedef unsigned int uint; + +typedef int GCompareProc(const pointer item1, const pointer item2); +typedef void GFreeProc(pointer item); //usually just delete, + //but may also support structures with embedded dynamic members + +#define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \ + GError(ERR_ALLOC) +#define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \ + GError(ERR_ALLOC) +#define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \ + GError(ERR_ALLOC) +#define GFREE(ptr) GFree((pointer*)(&ptr)) + +inline char* strMin(char *arg1, char *arg2) { + return (strcmp(arg1, arg2) < 0)? arg1 : arg2; +} + +inline char* strMax(char *arg1, char *arg2) { + return (strcmp(arg2, arg1) < 0)? arg1 : arg2; +} + +inline int iround(double x) { + return (int)floor(x + 0.5); +} + +/****************************************************************************/ + +inline int Gintcmp(int a, int b) { + //return (a>b)? 1 : ((a==b)?0:-1); + return a-b; +} + +int Gstrcmp(char* a, char* b); +//same as strcmp but doesn't crash on NULL pointers + +int Gstricmp(const char* a, const char* b); + +inline void swap(int &arg1, int &arg2){ + //arg1 ^= arg2; + //arg2 ^= arg1; + //arg1 ^= arg2; + register int swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(char* &arg1, char* &arg2){ //swap pointers! + register char* swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(uint &arg1, uint &arg2) { + register uint swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(short &arg1, short &arg2) { + register short swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(unsigned short &arg1, unsigned short &arg2) { + register unsigned short swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(long &arg1, long &arg2) { + register long swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(unsigned long &arg1, unsigned long &arg2) { + register unsigned long swp=arg1; + arg1=arg2; arg2=swp; + } + + +inline void swap(char &arg1, char &arg2) { + register char swp=arg1; + arg1=arg2; arg2=swp; + } + +inline void swap(unsigned char &arg1, unsigned char &arg2) { + register unsigned char swp=arg1; + arg1=arg2; arg2=swp; + } + +/**************** Memory management ***************************/ + +bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory +bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory +bool GRealloc(pointer* ptr,unsigned long size); // Resize memory +void GFree(pointer* ptr); // Free memory, resets ptr to NULL + + +int saprintf(char **retp, const char *fmt, ...); + +void GError(const char* format,...); // Error routine (aborts program) +void GMessage(const char* format,...);// Log message to stderr +// Assert failed routine:- usually not called directly but through GASSERT +void GAssert(const char* expression, const char* filename, unsigned int lineno); + +// ****************** string manipulation ************************* +char *Gstrdup(const char* str); +//duplicate a string by allocating a copy for it and returning it +char* Gstrdup(const char* sfrom, const char* sto); +//same as GStrdup, but with an early termination (e.g. on delimiter) + +char* Gsubstr(const char* str, char* from, char* to=NULL); +//extracts a substring, allocating it, including boundaries (from/to) + +int strsplit(char* str, char** fields, int maxfields, const char* delim); +int strsplit(char* str, char** fields, int maxfields, const char delim); +int strsplit(char* str, char** fields, int maxfields); //splits by tab or space + +char* replaceStr(char* &str, char* newvalue); + +//conversion: to Lower/Upper case +// creating a new string: +char* upCase(const char* str); +char* loCase(const char* str); +// changing string in place: +char* strlower(char * str); +char* strupper(char * str); + +//strstr but for memory zones: scans a memory region +//for a substring: +void* Gmemscan(void *mem, unsigned int len, + void *part, unsigned int partlen); + +// test if a char is in a string: +bool chrInStr(char c, const char* str); + +char* rstrchr(char* str, char ch); +/* returns a pointer to the rightmost + occurence of ch in str - like rindex for platforms missing it*/ + +char* strchrs(const char* s, const char* chrs); +//strchr but with a set of chars instead of only one + +char* rstrfind(const char* str, const char *substr); +// like rindex() but for strings; right side version of strstr() + +char* reverseChars(char* str, int slen=0); //in place reversal of string + +char* rstrstr(const char* rstart, const char *lend, const char* substr); +/*the reversed, rightside equivalent of strstr: starts searching + from right end (rstart), going back to left end (lend) and returns + a pointer to the last (right) matching character in str */ + +char* strifind(const char* str, const char* substr); +// the case insensitive version of strstr -- finding a string within a strin + + +//Determines if a string begins with a given prefix +//(returns false when any of the params is NULL, +// but true when prefix is '' (empty string)!) +bool startsWith(const char* s, const char* prefix); + +bool endsWith(const char* s, const char* suffix); +//Note: returns true if suffix is empty string, but false if it's NULL + + +// ELF hash function for strings +int strhash(const char* str); + + + +//---- generic base GSeg : genomic segment (interval) -- +// coordinates are considered 1-based (so 0 is invalid) +class GSeg { + public: + uint start; //starte) { start=e;end=s; } + else { start=s;end=e; } + } + //check for overlap with other segment + uint len() { return end-start+1; } + bool overlap(GSeg* d) { + //return startstart ? (d->start<=end) : (start<=d->end); + return (start<=d->end && end>=d->start); + } + + bool overlap(GSeg& d) { + //return start=d.start); + } + + bool overlap(GSeg& d, int fuzz) { + //return start=d.start); + } + + bool overlap(uint s, uint e) { + if (s>e) { swap(s,e); } + //return start=s); + } + + //return the length of overlap between two segments + int overlapLen(GSeg* r) { + if (startstart) { + if (r->start>end) return 0; + return (r->end>end) ? end-r->start+1 : r->end-r->start+1; + } + else { //r->start<=start + if (start>r->end) return 0; + return (r->endend-start+1 : end-start+1; + } + } + int overlapLen(uint rstart, uint rend) { + if (rstart>rend) { swap(rstart,rend); } + if (startend) return 0; + return (rend>end) ? end-rstart+1 : rend-rstart+1; + } + else { //rstart<=start + if (start>rend) return 0; + return (rendstart && end==s->end); + uint sd = (start>s->start) ? start-s->start : s->start-start; + uint ed = (end>s->end) ? end-s->end : s->end-end; + return (sd<=fuzz && ed<=fuzz); + } + //comparison operators required for sorting + bool operator==(GSeg& d){ + return (start==d.start && end==d.end); + } + bool operator>(GSeg& d){ + return (start==d.start)?(end>d.end):(start>d.start); + } + bool operator<(GSeg& d){ + return (start==d.start)?(end0) pushed=true; } // "undo" the last getLine request + // so the next call will in fact return the same line + GLineReader(const char* fname) { + FILE* f=fopen(fname, "rb"); + if (f==NULL) GError("Error opening file '%s'!\n",fname); + closeFile=true; + init(f); + } + GLineReader(FILE* stream=NULL, off_t fpos=0) { + closeFile=false; + init(stream,fpos); + } + void init(FILE* stream, off_t fpos=0) { + len=0; + isEOF=false; + allocated=1024; + GMALLOC(buf,allocated); + lcount=0; + buf[0]=0; + file=stream; + filepos=fpos; + pushed=false; + } + ~GLineReader() { + GFREE(buf); + if (closeFile) fclose(file); + } +}; + + +/* extended fgets() - to read one full line from a file and + update the file position correctly ! + buf will be reallocated as necessary, to fit the whole line + */ +char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL); + + +//print int/values nicely formatted in 3-digit groups +char* commaprint(uint64 n); + +/*********************** File management functions *********************/ + +// removes the last part (file or directory name) of a full path +// WARNING: this is a destructive operation for the given string! +void delFileName(char* filepath); + +// returns a pointer to the last file or directory name in a full path +const char* getFileName(const char* filepath); +// returns a pointer to the file "extension" part in a filename +const char* getFileExt(const char* filepath); + + +int fileExists(const char* fname); +//returns 0 if file entry doesn't exist +// 1 if it's a directory +// 2 if it's a regular file +// 3 otherwise (?) + +int64 fileSize(const char* fpath); + +//write a formatted fasta record, fasta formatted +void writeFasta(FILE *fw, const char* seqid, const char* descr, + const char* seq, int linelen=60, int seqlen=0); + +//parses the next number found in a string at the current position +//until a non-digit (and not a '.', 'e','E','-','+') is encountered; +//updates the char* pointer to be after the last digit parsed +bool parseNumber(char* &p, double& v); +bool parseDouble(char* &p, double& v); //just an alias for parseNumber + +bool parseInt(char* &p, int& i); +bool parseUInt(char* &p, uint& i); +bool parseHex(char* &p, uint& i); + +#endif /* G_BASE_DEFINED */ diff --git a/src/GFaSeqGet.cpp b/src/GFaSeqGet.cpp new file mode 100644 index 0000000..6070667 --- /dev/null +++ b/src/GFaSeqGet.cpp @@ -0,0 +1,317 @@ +#include "GFaSeqGet.h" +#include "gdna.h" +#include + +void GSubSeq::setup(uint sstart, int slen, int sovl, int qfrom, int qto, uint maxseqlen) { + if (sovl==0) { + GFREE(sq); + sqstart=sstart; + uint max_len=(maxseqlen>0) ? maxseqlen : MAX_FASUBSEQ; + sqlen = (slen==0 ? max_len : slen); + GMALLOC(sq, sqlen); + return; + } + //overlap -- copy the overlapping region + char* newsq=NULL; + GMALLOC(newsq, slen); + memcpy((void*)&newsq[qto], (void*)&sq[qfrom], sovl); + GFREE(sq); + sq=newsq; + sqstart=sstart; + sqlen=slen; +} + +void GFaSeqGet::finit(const char* fn, off_t fofs, bool validate) { + fh=fopen(fn,"rb"); + if (fh==NULL) { + GError("Error (GFaSeqGet) opening file '%s'\n",fn); + } + fname=Gstrdup(fn); + initialParse(fofs, validate); + lastsub=new GSubSeq(); +} + +GFaSeqGet::GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen) { +//for GFastaIndex use mostly -- the important difference is that +//the file offset is to the sequence, not to the defline + fh=fopen(faname,"rb"); + if (fh==NULL) { + GError("Error (GFaSeqGet) opening file '%s'\n",faname); + } + fname=Gstrdup(faname); + line_len=l_len; + line_blen=l_blen; + seq_len=seqlen; + if (line_blen0) { //end of the first "sequence" line + lendlen++; + break; + } + else {// another EoL char at the end of defline + fseqstart++; + continue; + } + }// end-of-line characters + line_len++; + } + //we are at the end of first sequence line + while ((c=getc(fh))!=EOF) { + if (c=='\n' || c=='\r') lendlen++; + else { + ungetc(c,fh); + break; + } + } + line_blen=line_len+lendlen; + if (c==EOF) return; + // -- you don't need to check it all if you're sure it's safe + if (checkall) { //validate the rest of the FASTA record + int llen=0; //last line length + int elen=0; //length of last line ending + bool waseol=true; + while ((c=getc(fh))!=EOF) { + if (c=='>' && waseol) { ungetc(c,fh); break; } + if (c=='\n' || c=='\r') { + // eol char + elen++; + if (waseol) continue; //2nd eol char + waseol=true; + elen=1; + continue; + } + if (c<=32) GError(gfa_ERRPARSE); //invalid character encountered + //--- on a seq char here: + if (waseol) {//beginning of a seq line + if (elen && (llen!=line_len || elen!=lendlen)) + //GError(gfa_ERRPARSE); + GError("Error: invalid FASTA format for GFaSeqGet; make sure that\n\ + the sequence lines have the same length (except for the last line)"); + waseol=false; + llen=0; + elen=0; + } + llen++; + } //while reading chars + }// FASTA checking was requested + fseeko(fh,fseqstart,SEEK_SET); +} + +const char* GFaSeqGet::subseq(uint cstart, int& clen) { + //cstart is 1-based genomic coordinate within current fasta sequence + int maxlen=(seq_len>0)?seq_len : MAX_FASUBSEQ; + //GMessage("--> call: subseq(%u, %d)\n", cstart, clen); + if (clen>maxlen) { + GMessage("Error (GFaSeqGet): subsequence cannot be larger than %d\n", maxlen); + return NULL; + } + if (seq_len>0 && clen+cstart-1>seq_len) { + GMessage("Error (GFaSeqGet): end coordinate (%d) cannot be larger than sequence length %d\n", clen+cstart-1, seq_len); + } + if (lastsub->sq==NULL || lastsub->sqlen==0) { + lastsub->setup(cstart, clen, 0,0,0,seq_len); + loadsubseq(cstart, clen); + lastsub->sqlen=clen; + return (const char*)lastsub->sq; + } + //allow extension up to MAX_FASUBSEQ + uint bstart=lastsub->sqstart; + uint bend=lastsub->sqstart+lastsub->sqlen-1; + uint cend=cstart+clen-1; + int qlen=0; //only the extra len to be allocated/appended/prepended + uint qstart=cstart; //start coordinate of the new seq block of length qlen to be read from file + int newlen=0; //the new total length of the buffered sequence lastsub->sq + int kovl=0; + int czfrom=0;//0-based offsets for copying a previously read sequence chunk + int czto=0; + uint newstart=cstart; + if (cstart>=bstart && cend<=bend) { //new reg contained within existing buffer + return (const char*) &(lastsub->sq[cstart-bstart]) ; + } + //extend downward + uint newend=GMAX(cend, bend); + if (cstartMAX_FASUBSEQ) { + newlen=MAX_FASUBSEQ; + newend=cstart+newlen-1; //keep newstart, set newend + } + qlen=bstart-cstart; + if (newend>bstart) { //overlap + if (newend>bend) {// new region is larger & around the old one - so we have two regions to update + kovl=bend-bstart+1; + czfrom=0; + czto=bstart-cstart; + lastsub->setup(newstart, newlen, kovl, czfrom, czto, seq_len); //this should realloc and copy the kovl subseq + qlen=bstart-cstart; + loadsubseq(newstart, qlen); + qlen=newend-bend; + int toread=qlen; + loadsubseq(bend+1, qlen); + clen-=(toread-qlen); + lastsub->sqlen=clen; + return (const char*)lastsub->sq; + } + //newend<=bend + kovl=newend-bstart+1; + } + else { //no overlap with previous buffer + if (newend>bend) kovl=bend-bstart+1; + else kovl=newend-bstart+1; + } + qlen=bstart-cstart; + czfrom=0; + czto=qlen; + } //cstart=bstart, possibly extend upwards + newstart=bstart; + newlen=(newend-newstart+1); + if (newlen>MAX_FASUBSEQ) { + newstart=bstart+(newlen-MAX_FASUBSEQ);//keep newend, assign newstart + newlen=MAX_FASUBSEQ; + if (newstart<=bend) { //overlap with old buffer + kovl=bend-newstart+1; + czfrom=newstart-bstart; + czto=0; + } + else { //not overlapping old buffer + kovl=0; + } + } //newstart reassigned + else { //we can extend the buffer to include the old one + qlen=newend-bend; //how much to read from file + qstart=bend+1; + kovl=bend-bstart+1; + czfrom=0; + czto=0; + } + } + lastsub->setup(newstart, newlen, kovl, czfrom, czto, seq_len); //this should realloc but copy any overlapping region + lastsub->sqlen-=qlen; //appending may result in a premature eof + int toread=qlen; + loadsubseq(qstart, qlen); //read the missing chunk, if any + clen-=(toread-qlen); + lastsub->sqlen+=qlen; + return (const char*)(lastsub->sq+(cstart-newstart)); +} + +char* GFaSeqGet::copyRange(uint cstart, uint cend, bool revCmpl, bool upCase) { + if (cstart>cend) { swap(cstart, cend); } + int clen=cend-cstart+1; + const char* gs=subseq(cstart, clen); + if (gs==NULL) return NULL; + char* r=NULL; + GMALLOC(r,clen+1); + r[clen]=0; + memcpy((void*)r,(void*)gs, clen); + if (revCmpl) reverseComplement(r,clen); + if (upCase) { + for (int i=0;isq space allocated previously + //only loads the requested clen chars from file, at offset &lastsub->sq[cstart-lastsub->sqstart] + int sofs=cstart-lastsub->sqstart; + int lendlen=line_blen-line_len; + char* seqp=lastsub->sq+sofs; + //find the proper file offset and read the appropriate lines + uint seqofs=cstart-1; + uint startlno = seqofs/line_len; + int lineofs = seqofs % line_len; + off_t fstart=fseqstart + (startlno*line_blen); + fstart+=lineofs; + + fseeko(fh, fstart, SEEK_SET); + int toread=clen; + int maxlen=(seq_len>0)? seq_len-cstart+1 : MAX_FASUBSEQ ; + if (toread==0) toread=maxlen; //read max allowed, or to the end of file + int actualrlen=0; + int sublen=0; + if (lineofs>0) { //read the partial first line + int reqrlen=line_len-lineofs; + if (reqrlen>toread) reqrlen=toread; //in case we need to read just a few chars + actualrlen=fread((void*)seqp, 1, reqrlen, fh); + if (actualrlen=line_len) { + char* rseqp=&(seqp[sublen]); + actualrlen=fread((void*)rseqp, 1, line_len, fh); + /* + char dbuf[256];dbuf[255]=0; + strncpy(dbuf,rseqp, actualrlen); + dbuf[actualrlen]=0; + GMessage("<<0) { + char* rseqp=&(seqp[sublen]); + actualrlen=fread((void*)rseqp, 1, toread, fh); + if (actualrlensqlen+=sublen; + clen=sublen; + + return (const char*)seqp; + } + + diff --git a/src/GFaSeqGet.h b/src/GFaSeqGet.h new file mode 100644 index 0000000..b9ad618 --- /dev/null +++ b/src/GFaSeqGet.h @@ -0,0 +1,113 @@ +#ifndef GFASEQGET_H +#define GFASEQGET_H + +#include "GList.hh" + +#define MAX_FASUBSEQ 0x20000000 +//max 512MB sequence data held in memory at a time + +class GSubSeq { + public: + uint sqstart; //1-based coord of subseq start on sequence + uint sqlen; //length of subseq loaded + char* sq; //actual subsequence data will be stored here + // (with end-of-line characters removed) + + /*char* xseq; //the exposed pointer to the last requested subsequence start + off_t xstart; //the coordinate start for the last requested subseq + off_t xlen; //the last requested subseq len*/ + GSubSeq() { + sqstart=0; + sqlen=0; + sq=NULL; + /* xseq=NULL; + xstart=0; + xlen=0;*/ + } + ~GSubSeq() { + GFREE(sq); + } + // genomic, 1-based coordinates: + void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0, uint maxseqlen=0); + //check for overlap with previous window and realloc/extend appropriately + //returns offset from seq that corresponds to sstart + // the window will keep extending until MAX_FASUBSEQ is reached +}; + +class GFaSeqGet { + char* fname; + FILE* fh; + //raw offset in the file where the sequence actually starts: + off_t fseqstart; + uint seq_len; //total sequence length, if known (when created from GFastaIndex) + int line_len; //length of each line of text + int line_blen; //binary length of each line + // = line_len + number of EOL character(s) + GSubSeq* lastsub; + void initialParse(off_t fofs=0, bool checkall=true); + const char* loadsubseq(uint cstart, int& clen); + void finit(const char* fn, off_t fofs, bool validate); + public: + GFaSeqGet() { + fh=NULL; + fseqstart=0; + seq_len=0; + line_len=0; + line_blen=0; + fname=NULL; + lastsub=NULL; + } + GFaSeqGet(const char* fn, off_t fofs, bool validate=false) { + seq_len=0; + finit(fn,fofs,validate); + } + GFaSeqGet(const char* fn, bool validate=false) { + seq_len=0; + finit(fn,0,validate); + } + + GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen); + //constructor from GFastaIndex record + + GFaSeqGet(FILE* f, off_t fofs=0, bool validate=false); + + ~GFaSeqGet() { + if (fname!=NULL) { + GFREE(fname); + fclose(fh); + } + delete lastsub; + } + const char* subseq(uint cstart, int& clen); + const char* getRange(uint cstart=1, uint cend=0) { + if (cend==0) cend=(seq_len>0)?seq_len : MAX_FASUBSEQ; + if (cstart>cend) { swap(cstart, cend); } + int clen=cend-cstart+1; + //int rdlen=clen; + return subseq(cstart, clen); + } + + char* copyRange(uint cstart, uint cend, bool revCmpl=false, bool upCase=false); + //caller is responsible for deallocating the return string + + void loadall(uint32 max_len=0) { + //TODO: must read the whole sequence differently here - line by line + //so when EOF or another '>' line is found, the reading stops! + int clen=(seq_len>0) ? seq_len : ((max_len>0) ? max_len : MAX_FASUBSEQ); + subseq(1, clen); + } + void load(uint cstart, uint cend) { + //cache as much as possible + if (seq_len>0 && cend>seq_len) cend=seq_len; //correct a bad request + int clen=cend-cstart+1; + subseq(cstart, clen); + } + int getsublen() { return lastsub!=NULL ? lastsub->sqlen : 0 ; } + off_t getseqofs() { return fseqstart; } + int getLineLen() { return line_len; } + int getLineBLen() { return line_blen; } + //reads a subsequence starting at genomic coordinate cstart (1-based) + }; + + +#endif diff --git a/src/GFastaIndex.cpp b/src/GFastaIndex.cpp new file mode 100644 index 0000000..25c52ee --- /dev/null +++ b/src/GFastaIndex.cpp @@ -0,0 +1,170 @@ +/* + * GFaIdx.cpp + * + * Created on: Aug 25, 2010 + * Author: gpertea + */ + +#include "GFastaIndex.h" +#define ERR_FAIDXLINE "Error parsing fasta index line: \n%s\n" +#define ERR_FALINELEN "Error: sequence lines in a FASTA record must have the same length!\n" +void GFastaIndex::addRecord(const char* seqname, uint seqlen, off_t foffs, int llen, int llen_full) { + GFastaRec* farec=records.Find(seqname); + if (farec!=NULL) { + GMessage("Warning: duplicate sequence ID (%s) added to the fasta index! Only last entry data will be kept.\n"); + farec->seqlen=seqlen; + farec->fpos=foffs; + farec->line_len=llen; + farec->line_blen=llen_full; + } + else { + farec=new GFastaRec(seqlen,foffs,llen,llen_full); + records.Add(seqname,farec); + farec->seqname=records.getLastKey(); + } + } + +int GFastaIndex::loadIndex(const char* finame) { //load record info from existing fasta index + if (finame==NULL) finame=fai_name; + if (finame!=fai_name) { + fai_name=Gstrdup(finame); + } + if (fai_name==NULL) GError("Error: GFastaIndex::loadIndex() called with no file name!\n"); + records.Clear(); + haveFai=false; + FILE* fi=fopen(fai_name,"rb"); + if (fi==NULL) { + GMessage("Warning: cannot open fasta index file: %s!\n",fai_name); + return 0; + } + GLineReader fl(fi); + char* s=NULL; + while ((s=fl.nextLine())!=NULL) { + if (*s=='#') continue; + char* p=strchrs(s,"\t "); + if (p==NULL) GError(ERR_FAIDXLINE,s); + *p=0; //s now holds the genomic sequence name + p++; + uint len=0; + int line_len=0, line_blen=0; + #ifdef __WIN32__ + long offset=-1; + sscanf(p, "%d%ld%d%d", &len, &offset, &line_len, &line_blen); + #else + long long offset=-1; + sscanf(p, "%d%lld%d%d", &len, &offset, &line_len, &line_blen); + #endif + if (len==0 || line_len==0 || line_blen==0 || line_blen0); + return records.Count(); + } + +int GFastaIndex::buildIndex() { + //this parses the whole fasta file, so it could be slow + if (fa_name==NULL) + GError("Error: GFastaIndex::buildIndex() called with no fasta file!\n"); + FILE* fa=fopen(fa_name,"rb"); + if (fa==NULL) { + GMessage("Warning: cannot open fasta index file: %s!\n",fa_name); + return 0; + } + records.Clear(); + GLineReader fl(fa); + char* s=NULL; + uint seqlen=0; + int line_len=0,line_blen=0; + bool newSeq=false; //set to true after defline + off_t newSeqOffset=0; + int prevOffset=0; + char* seqname=NULL; + int last_len=0; + bool mustbeLastLine=false; //true if the line length decreases + while ((s=fl.nextLine())!=NULL) { + if (s[0]=='>') { + if (seqname!=NULL) { + if (seqlen==0) + GError("Warning: empty FASTA record skipped (%s)!\n",seqname); + else { //seqlen!=0 + addRecord(seqname, seqlen,newSeqOffset, line_len, line_blen); + } + } + char *p=s; + while (*p > 32) p++; + *p=0; + GFREE(seqname); + seqname=Gstrdup(&s[1]); + newSeq=true; + newSeqOffset=fl.getfpos(); + last_len=0; + line_len=0; + line_blen=0; + seqlen=0; + mustbeLastLine=false; + } //defline parsing + else { //sequence line + int llen=fl.length(); + int lblen=fl.getFpos()-prevOffset; + if (newSeq) { //first sequence line after defline + line_len=llen; + line_blen=lblen; + } + else {//next seq lines after first + if (mustbeLastLine || llen>last_len) + GError(ERR_FALINELEN); + if (llen0) + addRecord(seqname, seqlen, newSeqOffset, line_len, line_blen); + GFREE(seqname); + fclose(fa); + return records.Count(); + } + + +int GFastaIndex::storeIndex(const char* finame) { //write the hash to a file + if (records.Count()==0) + GError("Error at GFastaIndex:storeIndex(): no records found!\n"); + FILE* fai=fopen(finame, "w"); + if (fai==NULL) GError("Error creating fasta index file: %s\n",finame); + int rcount=storeIndex(fai); + GFREE(fai_name); + fai_name=Gstrdup(finame); + return rcount; + } + +int GFastaIndex::storeIndex(FILE* fai) { + int rcount=0; + GList reclist(true,false,true); //sorted, don't free members, unique + records.startIterate(); + GFastaRec* rec=NULL; + while ((rec=records.NextData())!=NULL) { + reclist.Add(rec); + } + //reclist has records sorted by file offset + for (int i=0;iseqname,reclist[i]->seqlen,(long)reclist[i]->fpos, + reclist[i]->line_len, reclist[i]->line_blen); +#else + int written=fprintf(fai, "%s\t%d\t%lld\t%d\t%d\n", + reclist[i]->seqname, reclist[i]->seqlen, (long long)(reclist[i]->fpos), + reclist[i]->line_len, reclist[i]->line_blen); +#endif + if (written>0) rcount++; + else break; //couldn't write anymore + } + fclose(fai); + haveFai=(rcount>0); + return rcount; +} diff --git a/src/GFastaIndex.h b/src/GFastaIndex.h new file mode 100644 index 0000000..1509f3a --- /dev/null +++ b/src/GFastaIndex.h @@ -0,0 +1,79 @@ +/* + * GFaIdx.h + * + * Created on: Aug 25, 2010 + * Author: gpertea + */ + +#ifndef GFAIDX_H_ +#define GFAIDX_H_ + +#include "GHash.hh" +#include "GList.hh" + +class GFastaRec { + public: + char* seqname; + uint seqlen; + off_t fpos; + int line_len; //effective line length (without EoL) + int line_blen; //length of line including EoL characters + GFastaRec(uint slen=0, off_t fp=0, int llen=0, int llenb=0) { + seqname=NULL; //only a pointer copy + seqlen=slen; + fpos=fp; + line_len=llen; + line_blen=llenb; + } + bool operator==(GFastaRec& d){ + return (fpos==d.fpos); + } + bool operator>(GFastaRec& d){ + return (fpos>d.fpos); + } + bool operator<(GFastaRec& d){ + return (fpos records; + void addRecord(const char* seqname, uint seqlen, + off_t foffs, int llen, int llen_full); + + GFastaRec* getRecord(const char* seqname) { + return records.Find(seqname); + } + bool hasIndex() { return haveFai; } + int loadIndex(const char* finame); + int buildIndex(); //build index in memory by parsing the whole fasta file + int storeIndex(const char* finame); + int storeIndex(FILE* fai); + int getCount() { return records.Count(); } + GFastaIndex(const char* fname, const char* finame=NULL):records() { + if (fileExists(fname)!=2) GError("Error: fasta file %s not found!\n",fname); + if (fileSize(fname)<=0) GError("Error: invalid fasta file %s !\n",fname); + fa_name=Gstrdup(fname); + fai_name=finame!=NULL ? Gstrdup(finame) : NULL; + if (fileSize(fa_name)==0) { + GError("Error creating GFastaIndex(%s): invalid fasta file!\n",fa_name); + } + haveFai=false; + if (fai_name!=NULL && fileSize(fai_name)>0) { + //try to load the index file if it exists + loadIndex(fai_name); + haveFai=(records.Count()>0); + } + } + ~GFastaIndex() { + GFREE(fa_name); + GFREE(fai_name); + } +}; + +#endif /* GFAIDX_H_ */ diff --git a/src/GHash.hh b/src/GHash.hh new file mode 100644 index 0000000..4d7fc51 --- /dev/null +++ b/src/GHash.hh @@ -0,0 +1,560 @@ +/******************************************************************************** +* Hash table class template (char* based) * +*********************************************************************************/ + +#ifndef GHash_HH +#define GHash_HH +#include "GBase.h" +/** +* This class maintains a fast-access hash table of entities +* indexed by a character string (essentially, maps strings to pointers) +*/ + +typedef struct { + char* key; // Key string + bool keyalloc; //shared key flag (to not free the key chars) + int hash; // Hash value of key + pointer data; // Data + bool mark; // Entry is marked + } GHashEntry; + +template class GHash { + protected: + GHashEntry* hash; // Hash + int fCapacity; // table size + int fCount; // number of valid entries + int fCurrentEntry; + char* lastkeyptr; //pointer to last key string added + //---------- Raw data retrieval (including empty entries + // Return key at position pos. + const char* Key(uint pos) const { return hash[pos].key; } + // return data OBJ* at given position + OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; } + // Return mark flag of entry at position pos. + bool Mark(uint pos) const { return hash[pos].mark; } + // Return position of first filled slot, or >= fCapacity + int First() const; + // Return position of last filled slot or -1 + int Last() const; + // Return position of next filled slot in hash table + // or a value greater than or equal to fCapacity if no filled + // slot was found + int Next(int pos) const; + //Return position of previous filled slot in hash table + //or a -1 if no filled slot was found + int Prev(int pos) const; + +private: + GHash(const GHash&); + GHash &operator=(const GHash&); + GFreeProc* fFreeProc; //procedure to free item data +protected: +public: + static void DefaultFreeProc(pointer item) { + delete (OBJ*)item; + item=NULL; + } +public: + GHash(GFreeProc* freeProc); // constructs of an empty hash + GHash(bool doFree=true); // constructs of an empty hash (free the item objects) + void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; } + void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; } + int Capacity() const { return fCapacity; } // table's size, including the empty slots. + void Resize(int m); // Resize the table to the given size. + int Count() const { return fCount; }// the total number of entries in the table. + // Insert a new entry into the table given key and mark. + // If there is already an entry with that key, leave it unchanged, + const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false); + //same as Add, but the key pointer is stored directly, no string duplicate + //is made (shared-key-Add) + const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false); + + // Replace data at key, if the entry's mark is less than + // or equal to the given mark. If there was no existing entry, + // a new entry is inserted with the given mark. + OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false); + // Remove a given key and its data + OBJ* Remove(const char* ky); + // Find data OBJ* given key. + OBJ* Find(const char* ky, char** keyptr=NULL); + bool hasKey(const char* ky); + char* getLastKey() { return lastkeyptr; } + OBJ* operator[](const char* ky) { return Find(ky); } + void startIterate(); //iterator-like initialization + char* NextKey(); //returns next valid key in the table (NULL if no more) + OBJ* NextData(); //returns next valid hash[].data + OBJ* NextData(char*& nextkey); //returns next valid hash[].data + //or NULL if no more + //nextkey is SET to the corresponding key + GHashEntry* NextEntry(); //returns a pointer to a GHashEntry + + /// Clear all entries + void Clear(); + + /// Destructor + virtual ~GHash(); + }; +// +//======================== method definitions ======================== +// +/* + Notes: + - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY) + GHash::EMPTY and GHash::UNUSED are needed for flag purposes. + - Since the algorithm doubles the table size when exceeding MAX_LOAD, + it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD; + otherwise, the algorithm might hip-hop between halving and doubling, + which would be quite expensive!! + - Not many people seem to know that hash tables don't have to be prime + numbers; in fact, a table size of 2**n and odd probe distance are very + easy to arrange, and this works just as well! + - We store the hash key, so that 99.999% of the time we can compare hash numbers; + only when hash numbers match do we need to compare keys. + Thus, with a good hash function, the fCount of calls to strcmp() should be + roughly the same as the fCount of successful lookups. + - The hash table should NEVER get full, or stuff will loop forever!! +*/ + +// Initial table size (MUST be power of 2) +#define DEF_HASH_SIZE 32 +// Maximum hash table load factor (%) +#define MAX_LOAD 80 +// Minimum hash table load factor (%) +#define MIN_LOAD 10 +// Probe Position [0..n-1] +#define HASH1(x,n) (((unsigned int)(x)*13)%(n)) +// Probe Distance [1..n-1] +#define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1))) + +#define FREEDATA (fFreeProc!=NULL) + +/*******************************************************************************/ +// Construct empty hash +template GHash::GHash(GFreeProc* freeProc) { + GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE); + fFreeProc=freeProc; + for (uint i=0; i GHash::GHash(bool doFree) { + GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE); + fFreeProc = (doFree)?&DefaultFreeProc : NULL; + for (uint i=0; i void GHash::Resize(int m){ + register int i,n,p,x,h; + GHashEntry *k; + GASSERT(fCount<=fCapacity); + if(m>2)>m) n>>=1; // Shrink until n/4 <= m + while((n>>1)>1)); + GASSERT(DEF_HASH_SIZE<=n); + if(n!=fCapacity){ + GASSERT(m<=n); + GMALLOC(k, sizeof(GHashEntry)*n); + for(i=0; i const OBJ* GHash::Add(const char* ky, + const OBJ* pdata,bool mrk){ + register int p,i,x,h,n; + if(!ky) GError("GHash::insert: NULL key argument.\n"); + GASSERT(fCount=(MAX_LOAD*fCapacity)) Resize(fCount); + GASSERT(fCount const OBJ* GHash::shkAdd(const char* ky, + const OBJ* pdata,bool mrk){ + register int p,i,x,h,n; + if(!ky) GError("GHash::insert: NULL key argument.\n"); + GASSERT(fCount=(MAX_LOAD*fCapacity)) Resize(fCount); + GASSERT(fCount OBJ* GHash::Replace(const char* ky,const OBJ* pdata, bool mrk){ + register int p,i,x,h,n; + if(!ky){ GError("GHash::replace: NULL key argument.\n"); } + GASSERT(fCount=(MAX_LOAD*fCapacity)) Resize(fCount); + GASSERT(fCount OBJ* GHash::Remove(const char* ky){ + register int p,x,h,n; + if(!ky){ GError("GHash::remove: NULL key argument.\n"); } + if(0 bool GHash::hasKey(const char* ky) { + register int p,x,h,n; + if(!ky){ GError("GHash::find: NULL key argument.\n"); } + if(0 OBJ* GHash::Find(const char* ky, char** keyptr){ + register int p,x,h,n; + if(!ky){ GError("GHash::find: NULL key argument.\n"); } + if(0 void GHash::startIterate() {// initialize a key iterator; call + fCurrentEntry=0; +} + +template char* GHash::NextKey() { + register int pos=fCurrentEntry; + while (pos OBJ* GHash::NextData() { + register int pos=fCurrentEntry; + while (pos OBJ* GHash::NextData(char* &nextkey) { + register int pos=fCurrentEntry; + while (pos GHashEntry* GHash::NextEntry() { + register int pos=fCurrentEntry; + while (pos int GHash::First() const { + register int pos=0; + while(pos int GHash::Last() const { + register int pos=fCapacity-1; + while(0<=pos){ if(0<=hash[pos].hash) break; pos--; } + GASSERT(pos<0 || 0<=hash[pos].hash); + return pos; + } + + +// Find next valid entry +template int GHash::Next(int pos) const { + GASSERT(0<=pos && pos int GHash::Prev(int pos) const { + GASSERT(0<=pos && pos= 0){ if(0<=hash[pos].hash) break; } + GASSERT(pos<0 || 0<=hash[pos].hash); + return pos; + } + + +// Remove all +template void GHash::Clear(){ + register int i; + for(i=0; i=0){ + if (hash[i].keyalloc) GFREE((hash[i].key)); + if (FREEDATA) + (*fFreeProc)(hash[i].data); + } + } + GFREE(hash); + GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE); + //reinitialize it + for (i=0; i=0){ + uint len=strlen(hash[i].key); + store << len; + store << hash[i].mark; + store.save(hash[i].key,len); + } + } + } + + +// Load data +void GHash::Load(Stream& store){ + Object::load(store); + store >> fCapacity; + store >> fCount; + for(int i=0; i> hash[i].hash; + if(hash[i].hash>=0){ + uint len; + store >> len; + store >> hash[i].mark; + GMALLOC(hash[i].key,len+1); + store.load(hash[i].key,len); + hash[i].key[len]='\0'; + } + } + } +*/ + +// Destroy table +template GHash::~GHash(){ + register int i; + for(i=0; i=0){ + if (hash[i].keyalloc) GFREE((hash[i].key)); + if (FREEDATA) (*fFreeProc)(hash[i].data); + } + } + GFREE(hash); + } + +#endif diff --git a/src/GList.hh b/src/GList.hh new file mode 100644 index 0000000..2622725 --- /dev/null +++ b/src/GList.hh @@ -0,0 +1,1371 @@ +//--------------------------------------------------------------------------- +/* +Sortable collection of pointers to objects +*/ + +#ifndef GListHH +#define GListHH + +#include "GBase.h" +//#include "assert.h" + +#ifdef __LINE__ +#define SLISTINDEX_ERR "GList error (%s:%d):Invalid list index: %d\n" +#define TEST_INDEX(x) \ + if (x<0 || x>=fCount) GError(SLISTINDEX_ERR, __FILE__,__LINE__, x) +#else +#define SLISTINDEX_ERR "GList error:Invalid list index: %d\n" +#define TEST_INDEX(x) \ + if (x<0 || x>=fCount) GError(SLISTINDEX_ERR, x, __FILE__,__LINE__) +#endif + +#define SLISTCAPACITY_ERR "GList error: invalid capacity: %d\n" +#define SLISTCOUNT_ERR "GList error: invalid count: %d\n" +#define SLISTSORTED_ERR "Operation not allowed on a sorted list!\n" +#define SLISTUNSORTED_ERR "Operation not allowed on an unsorted list!\n" + +// ------ macros: +#define BE_UNSORTED if (fCompareProc!=NULL) { GError(SLISTSORTED_ERR); return; } +#define BE_SORTED if (fCompareProc==NULL) { GError(SLISTUNSORTED_ERR); return; } + +#define MAXLISTSIZE INT_MAX-1 + +#define SORTED (fCompareProc!=NULL) +#define UNSORTED (fCompareProc==NULL) +#define FREEDATA (fFreeProc!=NULL) +/* #define TEST_INDEX(x) assert(x>=0 && x=fCount) GError(SLISTINDEX_ERR, x) */ + + +//template for array of objects; GVec is not sortable, +// so it doesn't require comparison operators defined +template class GVec { + protected: + OBJ* fArray; + int fCount; + int fCapacity; + public: + GVec(int init_capacity=20); + GVec(GVec& array); //copy constructor + const GVec& operator=(GVec& array); //copy operator + virtual ~GVec(); + void idxInsert(int idx, OBJ& item); + void Grow(); + void Grow(int idx, OBJ& item); + void Reverse(); //WARNING: will break the sort order if SORTED! + int Add(OBJ* item); // simply append to the end of fArray, reallocating as needed + int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it + // using OBJ new operator= + void Add(GVec& list); //append copies of all items from another list + OBJ& Get(int idx) { + TEST_INDEX(idx); + return fArray[idx]; + } + OBJ& operator[](int i) { + TEST_INDEX(i); + return fArray[i]; + } + void Clear(); + void Insert(int idx, OBJ* item); + void Delete(int index); + void Replace(int idx, OBJ& item); //Put, use operator= to copy + void Exchange(int idx1, int idx2); + void Swap(int idx1, int idx2) { Exchange(idx1, idx2); } + int Capacity() { return fCapacity; } + //this will reject identical items in sorted lists only! + void setCapacity(int NewCapacity); + int Count() { return fCount; } + void setCount(int NewCount); //?! - will trim or expand the array as needed + void Move(int curidx, int newidx); +}; + +// GArray is the sortable collection, but requires the comparison operators to be defined +template class GArray:public GVec { + protected: + bool fUnique; + static int DefaultCompareProc(OBJ& item1, OBJ& item2) { + //the comparison operators MUST be defined for OBJ class! + if ( item1 > item2) return 1; + else return (item2 > item1) ? -1 : 0 ; + } + public: + typedef int CompareProc(OBJ& item1, OBJ& item2); + protected: + CompareProc* fCompareProc; + void qSort(int L, int R); + public: + GArray(CompareProc* cmpFunc=NULL); + GArray(bool sorted, bool unique=false); + GArray(int init_capacity, bool sorted, bool unique=false); + GArray(GArray& array); //copy constructor + const GArray& operator=(GArray& array); + //~GArray(); + //assignment operator + void setSorted(CompareProc* cmpFunc); + //sort the array if cmpFunc not NULL or changes + int Add(OBJ* item); // specific implementation if sorted + int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it + // using OBJ new operator= + void Add(GArray& list); //add copies of all items from another list + //this will reject identical items in sorted lists only! + void setUnique(bool beUnique) { fUnique = beUnique; }; + void Sort(); //explicit sort may be requested + bool Sorted() { return fCompareProc!=NULL; } + void Replace(int idx, OBJ& item); //Put, use operator= to copy + int Unique() { return fUnique; } + int IndexOf(OBJ& item); + //this needs the == operator to have been defined for OBJ + bool Found(OBJ& item, int& idx); // for sorted arrays only; + //search by content; if found, returns true and idx will be the index + //of the first item found matching for which CompareProc returns 0 + bool Exists(OBJ& item); //same as above without existing index info + //unsorted only, place item at position idx: + void Move(int curidx, int newidx); + void Insert(int idx, OBJ* item); + void Insert(int idx, OBJ& item) { Insert(idx,&item); } +}; + +//------- template for array of pointers to objects --------- +template class GPVec { + protected: + OBJ** fList; //pointer to an array of pointers to objects + int fCount; //total number of entries in list + int fCapacity; //current allocated size + GFreeProc* fFreeProc; //useful for deleting objects + //--- + void Expand(); + void Grow(); + void Grow(int idx, OBJ* newitem); + void setCount(int NewCount); //will trim/expand the array as needed + public: + static void DefaultFreeProc(pointer item) { + delete (OBJ*)item; + } + virtual ~GPVec(); + GPVec(int init_capacity=10, bool free_elements=true); //also the default constructor + GPVec(GPVec& list); //copy constructor? + GPVec(GPVec* list); //kind of a copy constructor + const GPVec& operator=(GPVec& list); + OBJ* Get(int i); + OBJ* operator[](int i) { return this->Get(i); } + void Reverse(); //reverse pointer array; WARNING: will break the sort order if sorted! + void freeItem(int idx); //calls fFreeProc (or DefaultFreeProc) on fList[idx] and sets NULL there, doesn't pack! + //it will free even if fFreeProc is NULL! + void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; } + void setFreeItem(bool doFree) { + if (doFree) fFreeProc=DefaultFreeProc; + else fFreeProc=NULL; + } + // -- stack usage: + int Push(OBJ* item) { return Add(item); } + OBJ* Pop();// Stack use; removes and returns last item,but does NOT FREE it + OBJ* Shift(); //Queue use: removes and returns first item, but does NOT FREE it + void deallocate_item(OBJ* item); //forcefully call fFreeProc or delete on item + void Clear(); + void Exchange(int idx1, int idx2); + void Swap(int idx1, int idx2) { Exchange(idx1, idx2); } + OBJ* First() { return (fCount>0)?fList[0]:NULL; } + OBJ* Last() { return (fCount>0)?fList[fCount-1]:NULL;} + bool isEmpty() { return fCount==0; } + bool notEmpty() { return fCount>0; } + int Capacity() { return fCapacity; } + int Count() { return fCount; } + void setCapacity(int NewCapacity); + int Add(OBJ* item); //simply append the pointer copy + void Add(GPVec& list); //add all pointers from another list + void Insert(int idx, OBJ* item); + void Move(int curidx, int newidx); + void Put(int idx, OBJ* item); + void Pack(); + void Delete(int index); //also frees the item if fFreeProc!=NULL, and shifts the successor items + void Forget(int idx); //simply places a NULL at fList[idx], nothing else + int RemovePtr(pointer item); //always use linear search to find the pointer! calls Delete() if found + int IndexOf(pointer item); //a linear search for pointer address only! + }; + +template class GList:public GPVec { + protected: + bool fUnique; + GCompareProc* fCompareProc; //a pointer to a Compare function + static int DefaultCompareProc(const pointer item1, const pointer item2) { + //the comparison operators MUST be defined for OBJ class! + if (*((OBJ*)item1) > *((OBJ*)item2)) return 1; + else if (*((OBJ*)item2) > *((OBJ*)item1)) return -1; + else return 0; + } + void QuickSort(int L, int R); + public: + void sortInsert(int idx, OBJ* item); + GList(GCompareProc* compareProc=NULL); //free by default + GList(GCompareProc* compareProc, //unsorted by default + GFreeProc *freeProc, + bool beUnique=false); + GList(bool sorted, bool free_elements=true, bool beUnique=false); + GList(int init_capacity, bool sorted, bool free_elements=true, bool beUnique=false); + GList(GList& list); //copy constructor? + GList(GList* list); //kind of a copy constructor + const GList& operator=(GList& list); + //void Clear(); + //~GList(); + void setSorted(GCompareProc* compareProc); + //sorted if compareProc not NULL; sort the list if compareProc changes ! + bool Sorted() { return fCompareProc!=NULL; } + void setSorted(bool sorted) { + if (sorted) { + if (fCompareProc!=&DefaultCompareProc) { + fCompareProc=&DefaultCompareProc; + Sort(); + } + } + else fCompareProc=NULL; + } + int Add(OBJ* item); //-- specific implementation if sorted + void Add(GList& list); //add all pointers from another list + + OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=NULL); + // default: delete item if Found() (and pointers are not equal)! + //returns the equal (==) object if it's in the list already + //or the item itself if it is unique and actually added + + int AddedIfNew(OBJ* item); + // if Found(item) (and pointers are not equal) delete item and returns -1 + // if added, returns the new item index + + + int Unique() { return fUnique; } + //this will reject identical items in sorted lists only! + void setUnique(bool beUnique) { fUnique = beUnique; }; + + GCompareProc* GetCompareProc() {return fCompareProc;} + int IndexOf(OBJ* item); //this has a specific implementation for sorted lists + //if list is sorted, item data is located by binary search + //based on the Compare function + //if not, a linear search is performed, but + //this needs the == operator to have been defined for OBJ + + void Put(int idx, OBJ* item, bool re_sort=false); + bool Found(OBJ* item, int & idx); // sorted only; + //search by content; if found, returns true and idx will be the index + //of the first item found matching for which GTCompareProc returns 0 + bool Exists(OBJ* item); //same as above without existing index info + bool Exists(OBJ& item); //same as above without existing index info + void Sort(); //explicit sort may be requested using this function + int Remove(OBJ* item); //search for pointer, using binary search if sorted + void Insert(int idx, OBJ* item); //unsorted only, place item at position idx + void Move(int curidx, int newidx); +}; //GList + + +//basic template for a Stack of pointers (implemented as a linked list) +template class GStack { + protected: + struct StackOBJ { + OBJ* obj; + StackOBJ* prev; + }; + int fCount; //total number of elements in stack + StackOBJ* base; + StackOBJ* top; + public: + GStack(OBJ* po=NULL) { + base=NULL; + top=NULL; + fCount=0; + if (po!=NULL) Push(po); + } + ~GStack() { + while (fCount>0) Pop(); + } + bool isEmpty() { return fCount==0; } + int Size() { return fCount; } + int Count() { return fCount; } + OBJ* Pop() { + if (top==NULL) return NULL; + fCount--; + StackOBJ* ctop=top; + if (top==base) base=NULL; + OBJ* r=top->obj; + top=top->prev; + GFREE(ctop); + return r; + } + OBJ* Push(OBJ* o) { + fCount++; + StackOBJ* ctop=top; //could be NULL + GMALLOC(top, sizeof(StackOBJ)); + top->obj=o; + top->prev=ctop; + if (base==NULL) base=top; + return o; + } + OBJ* Top() { return ((top==NULL)? NULL : top->obj); } + OBJ* Base() { return ((base==NULL)? NULL : base->obj); } +}; + +//-------------------- TEMPLATE IMPLEMENTATION------------------------------- + +template GVec::GVec(int init_capacity) { + fCount=0; + fCapacity=0; + fArray=NULL; + setCapacity(init_capacity); +} + +template GVec::GVec(GVec& array) { //copy constructor + this->fCount=array.fCount; + this->fCapacity=array.fCapacity; + this->fArray=NULL; + if (this->fCapacity>0) { + //GMALLOC(fArray, fCapacity*sizeof(OBJ)); + fArray=new OBJ[this->fCapacity]; + } + this->fCount=array.fCount; + // uses OBJ operator= + for (int i=0;ifCount;i++) fArray[i]=array[i]; + } + +template GArray::GArray(GArray& array):GVec(0) { //copy constructor + this->fCount=array.fCount; + this->fCapacity=array.fCapacity; + this->fArray=NULL; + if (this->fCapacity>0) { + //GMALLOC(this->fArray, this->fCapacity*sizeof(OBJ)); + this->fArray=new OBJ[this->fCapacity]; + } + this->fCount=array.fCount; + fUnique=array.fUnique; + fCompareProc=array.fCompareProc; + // uses OBJ operator= + for (int i=0;ifCount;i++) this->fArray[i]=array[i]; + } + +template const GVec& GVec::operator=(GVec& array) { + if (&array==this) return *this; + Clear(); + fCount=array.fCount; + fCapacity=array.fCapacity; + if (fCapacity>0) { + //GMALLOC(fArray, fCapacity*sizeof(OBJ)); + fArray=new OBJ[this->fCapacity]; + } + fCount=array.fCount; + // uses OBJ operator= + for (int i=0;i const GArray& GArray::operator=(GArray& array) { + if (&array==this) return *this; + GVec::Clear(); + this->fCount=array.fCount; + this->fUnique=array.fUnique; + this->fCapacity=array.fCapacity; + if (this->fCapacity>0) { + //GMALLOC(this->fArray, this->fCapacity*sizeof(OBJ)); + this->fArray=new OBJ[this->fCapacity]; + } + this->fCompareProc=array.fCompareProc; + this->fCount=array.fCount; + // uses OBJ operator= + for (int i=0;ifCount;i++) { + this->fArray[i]=array[i]; + } + return *this; +} + +template GArray::GArray(CompareProc* cmpFunc):GVec(0) { + fCompareProc = cmpFunc; + fUnique = false; //only affects sorted lists +} + +template GArray::GArray(bool sorted, bool unique):GVec(0) { + fUnique=unique; + fCompareProc=sorted? &DefaultCompareProc : NULL; +} + +template GArray::GArray(int init_capacity, + bool sorted, bool unique):GVec(init_capacity) { + fUnique=unique; + fCompareProc=sorted? &DefaultCompareProc : NULL; +} + +template GVec::~GVec() { + this->Clear(); +} + + +template void GVec::setCapacity(int NewCapacity) { + if (NewCapacity < fCount || NewCapacity > MAXLISTSIZE) + GError(SLISTCAPACITY_ERR, NewCapacity); + //error: capacity not within range + if (NewCapacity!=fCapacity) { + if (NewCapacity==0) { + delete[] fArray; + } + else { + //GREALLOC(fArray, NewCapacity*sizeof(OBJ)); + OBJ* oldArray=fArray; + fArray=new OBJ[NewCapacity]; + for (int i=0;ifCount;i++) { + fArray[i] = oldArray[i]; + } + delete[] oldArray; + } + fCapacity=NewCapacity; + } +} + +template void GVec::Clear() { + setCount(0); + setCapacity(0); //so the array itself is deallocated too! +} + +/* +template void GArray::Clear() { + CompareProc* fcmp=fCompareProc; + fCompareProc=NULL; + GVec::setCount(0); + GVec::setCapacity(0); //so the array itself is deallocated too! + fCompareProc=fcmp; +} +*/ +template void GArray::setSorted(CompareProc* cmpFunc) { + CompareProc* old_proc=fCompareProc; + fCompareProc=cmpFunc; + if (fCompareProc!=old_proc && fCompareProc!=NULL) + Sort(); //new compare method +} + +template void GVec::Grow() { + int delta; + if (fCapacity > 64) delta = fCapacity/4; + else if (fCapacity > 8) delta = 16; + else delta = 4; + setCapacity(fCapacity + delta); +} + +template void GVec::Reverse() { + int l=0; + int r=fCount-1; + OBJ c; + while (l void GVec::Grow(int idx, OBJ& item) { + int delta; + if (fCapacity > 64) delta = fCapacity/4; + else if (fCapacity > 8) delta = 16; + else delta = 4; + int NewCapacity=fCapacity+delta; + if (NewCapacity <= fCount || NewCapacity >= MAXLISTSIZE) + GError(SLISTCAPACITY_ERR, NewCapacity); + //error: capacity not within range + + if (NewCapacity!=fCapacity) { + if (NewCapacity==0) { + //GFREE(fArray); + delete[] fArray; + fArray=NULL; + } + else { //add the new item + if (idx==fCount) { //append item + //GREALLOC(fArray, NewCapacity*sizeof(OBJ)); + setCapacity(NewCapacity); + fArray[idx]=item; + } + else { //insert item at idx + OBJ* newList; + //GMALLOC(newList, NewCapacity*sizeof(OBJ)); + newList=new OBJ[NewCapacity]; + //copy data before idx + //memmove(&newList[0],&fArray[0], idx*sizeof(OBJ)); + // operator= required! + for (int i=0;i int GArray::IndexOf(OBJ& item) { + int result=0; + if (Found(item, result)) return result; + else return -1; + } + +template bool GArray::Exists(OBJ& item) { + int result=0; + if (Found(item, result)) return true; + else return false; + } + + +template int GVec::Add(OBJ* item) { + if (item==NULL) return -1; + int result=fCount; + if (result==fCapacity) Grow(); + fArray[result] = *item; //OBJ::operator= must copy OBJ properly! + fCount++; + return result; +} + +template int GArray::Add(OBJ* item) { + if (item==NULL) return -1; + int result; + if (SORTED) { + if (Found(*item, result)) + if (fUnique) return -1; //cannot add a duplicate! + //Found sets result to the position where the item should be! + this->idxInsert(result, *item); + } + else { + if (fUnique && Found(*item,result)) return -1; //set behaviour + result = this->fCount; + if (result==this->fCapacity) GVec::Grow(); + this->fArray[result] = *item; //operator=, copies the item + this->fCount++; + } + return result; +} + + +template void GVec::Add(GVec& list) { + if (list.Count()==0) return; + //simply copy + setCapacity(fCapacity+list.fCount); + int s=fCount; + for (int i=0;i void GArray::Add(GArray& list) { + if (list.Count()==0) return; + if (SORTED) { + for (int i=0;isetCapacity(this->fCapacity+list.fCount); + int s=this->fCount; + for (int i=0;ifArray[s+i]=list.fArray[i]; + this->fCount+=list.fCount; + } +} + +template bool GArray::Found(OBJ& item, int& idx) { + //search the list by using CompareProc (if defined) + //or == operator for a non-sortable list + //for sorted lists, even when the result is false, the idx is + //set to the closest matching object! + int i; + idx=-1; + if (this->fCount==0) { idx=0;return false;} + if (SORTED) { //binary search based on CompareProc + //do the simplest tests first: + if ((*fCompareProc)(this->fArray[0],item)>0) { + idx=0; + return false; + } + if ((*fCompareProc)(item, this->fArray[this->fCount-1])>0) { + idx=this->fCount; + return false; + } + + int l=0; + int h = this->fCount - 1; + int c; + while (l <= h) { + i = (l + h) >> 1; + c = (*fCompareProc)(this->fArray[i], item); + if (c < 0) l = i + 1; + else { + h = i - 1; + if (c == 0) { //found! + idx=i; + return true; + } + } + } //while + idx = l; + return false; + } + else {//not sorted: use linear search + // needs == operator to compare user defined objects ! + i=0; + while (ifCount) { + if (this->fArray[i]==item) { //requires operator== + idx=i; + return true; + } + i++; + } + return false; + } +} + +template void GVec::idxInsert(int idx, OBJ& item) { + //idx must be the new position this new item must have + //so the allowed range is [0..fCount] + //the old idx item all the above will be shifted to idx+1 + if (idx<0 || idx>fCount) GError(SLISTINDEX_ERR, idx); + if (fCount==fCapacity) { //need to resize the array + Grow(idx, item); //expand and also copy/move data and insert the new item + return; + } + //move data around to make room for the new item + if (idxidx; i--) { + fArray[i]=fArray[i-1]; + } + } + fArray[idx]=item; + fCount++; +} + +template void GVec::Insert(int idx, OBJ* item) { + //idx can be [0..fCount] so an item can be actually added + idxInsert(idx, item); +} + +template void GArray::Insert(int idx, OBJ* item) { + //idx can be [0..fCount] so an item can be actually added + BE_UNSORTED; //forbid this operation on sorted data + idxInsert(idx, item); +} + + +template void GVec::Move(int curidx, int newidx) { + if (curidx!=newidx || newidx>=fCount) + GError(SLISTINDEX_ERR, newidx); + OBJ tmp=fArray[curidx]; //copy constructor here + fArray[curidx]=fArray[newidx]; + fArray[newidx]=tmp; +} + + +template void GArray::Move(int curidx, int newidx) { + BE_UNSORTED; //cannot do this in a sorted list! + if (curidx!=newidx || newidx>=this->fCount) + GError(SLISTINDEX_ERR, newidx); + + OBJ tmp=this->fArray[curidx]; //copy constructor here + this->fArray[curidx]=this->fArray[newidx]; + this->fArray[newidx]=tmp; +} + +template void GVec::Replace(int idx, OBJ& item) { + TEST_INDEX(idx); + fArray[idx]=item; +} + +template void GVec::Exchange(int idx1, int idx2) { + TEST_INDEX(idx1); + TEST_INDEX(idx2); + OBJ item=fArray[idx1]; + fArray[idx1]=fArray[idx2]; + fArray[idx2]=item; +} + + +template void GArray::Replace(int idx, OBJ& item) { + //TEST_INDEX(idx); + if (idx<0 || idx>=this->fCount) GError(SLISTINDEX_ERR, __FILE__,__LINE__, idx); + this->fArray[idx]=item; + if ( SORTED ) Sort(); //re-sort ! this could be very expensive, don't do it +} + + +template void GVec::Delete(int index) { + TEST_INDEX(index); + fCount--; + while (index void GVec::setCount(int NewCount) { + if (NewCount<0 || NewCount > MAXLISTSIZE) + GError(SLISTCOUNT_ERR, NewCount); + if (NewCount > fCapacity) setCapacity(NewCount); + //if (NewCount > fCount) + // memset(&fArray[fCount], 0, (NewCount - fCount) * sizeof(OBJ)); + fCount = NewCount; +} + +template void GArray::qSort(int l, int r) { + int i, j; + OBJ p,t; + do { + i = l; j = r; + p = this->fArray[(l + r) >> 1]; + do { + while (fCompareProc(this->fArray[i], p) < 0) i++; + while (fCompareProc(this->fArray[j], p) > 0) j--; + if (i <= j) { + t = this->fArray[i]; + this->fArray[i] = this->fArray[j]; + this->fArray[j] = t; + i++; j--; + } + } while (i <= j); + if (l < j) qSort(l, j); + l = i; + } while (i < r); +} + +template void GArray::Sort() { + if (this->fArray!=NULL && this->fCount>0 && fCompareProc!=NULL) + qSort(0, this->fCount-1); +} + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +//*=> GPVec and GList implementation -- sortable array of pointers to OBJ + +template GPVec::GPVec(GPVec& list) { //copy constructor + fCount=list.fCount; + fCapacity=list.fCapacity; + if (fCapacity>0) { + GMALLOC(fList, fCapacity*sizeof(OBJ*)); + } + fFreeProc=list.fFreeProc; + fCount=list.fCount; + memcpy(fList, list.fList, fCount*sizeof(OBJ*)); + //for (int i=0;i GPVec::GPVec(GPVec* plist) { //another copy constructor + fCount=0; + fCapacity=plist->fCapacity; + fList=NULL; + if (fCapacity>0) { + GMALLOC(fList, fCapacity*sizeof(OBJ*)); + } + fFreeProc=plist->fFreeProc; + fCount=plist->fCount; + memcpy(fList, plist->fList, fCount*sizeof(OBJ*)); + //for (int i=0;ifCount;i++) Add(plist->Get(i)); +} + +template const GPVec& GPVec::operator=(GPVec& list) { + if (&list!=this) { + Clear(); + fFreeProc=list.fFreeProc; + //Attention: the object *POINTERS* are copied, + // but the actual object content is NOT duplicated + for (int i=0;i void GPVec::Add(GPVec& list) { + if (list.Count()==0) return; + //simply copy the pointers! -- the objects will be shared + setCapacity(fCapacity+list.fCount); + memcpy( & (fList[fCount]), list.fList, list.fCount*sizeof(OBJ*)); + fCount+=list.fCount; +} + + +template GList::GList(GList& list):GPVec(list) { //copy constructor + fUnique=list.fUnique; + fCompareProc=list.fCompareProc; +} + +template GList::GList(GList* plist):GPVec(0) { //another copy constructor + this->fCapacity=plist->fCapacity; + this->fList=NULL; + if (this->fCapacity>0) { + GMALLOC(this->fList, this->fCapacity*sizeof(OBJ*)); + } + fUnique=plist->fUnique; + fCompareProc=plist->fCompareProc; + this->fFreeProc=plist->fFreeProc; + this->fCount=plist->fCount; + memcpy(this->fList, plist->fList, this->fCount*sizeof(OBJ*)); + //for (int i=0;ifCount;i++) Add(plist->Get(i)); +} + +template void GList::Add(GList& list) { + if (list.Count()==0) return; + if (SORTED) { + for (int i=0;isetCapacity(this->fCapacity+list.fCount); + memcpy( & (this->fList[this->fCount]), list.fList, list.fCount*sizeof(OBJ*)); + this->fCount+=list.fCount; + } +} + + +template GList::GList(GCompareProc* compareProc, + GFreeProc* freeProc, bool beUnique) { + fCompareProc = compareProc; + this->fFreeProc = freeProc; + fUnique = beUnique; //only affects sorted lists +} + +template GList::GList(GCompareProc* compareProc) { + fCompareProc = compareProc; + this->fFreeProc = GPVec::DefaultFreeProc; + fUnique = false; //only affects sorted lists +} + +template void GPVec::Reverse() { + int l=0; + int r=fCount-1; + OBJ* c; + while (l GList::GList(bool sorted, + bool free_elements, bool beUnique) { + if (sorted) { + if (free_elements) { + fCompareProc=&DefaultCompareProc; + this->fFreeProc = GPVec::DefaultFreeProc; + fUnique=beUnique; + } + else { + fCompareProc=&DefaultCompareProc; + this->fFreeProc=NULL; + fUnique=beUnique; + } + } + else { + if (free_elements) { + fCompareProc=NULL; + this->fFreeProc=GPVec::DefaultFreeProc; + fUnique=beUnique; + } + else { + fCompareProc=NULL; + this->fFreeProc=NULL; + fUnique=beUnique; + } + } +} + +template GPVec::GPVec(int init_capacity, bool free_elements) { + fCount=0; + fCapacity=0; + fList=NULL; + fFreeProc=(free_elements) ? DefaultFreeProc : NULL; + setCapacity(init_capacity); +} + + +template GList::GList(int init_capacity, bool sorted, + bool free_elements, bool beUnique):GPVec(init_capacity, free_elements) { + if (sorted) { + fCompareProc=&DefaultCompareProc; + fUnique=beUnique; + } + else { + fCompareProc=NULL; + fUnique=beUnique; + } +} + +template GPVec::~GPVec() { + this->Clear();//this will free the items if fFreeProc is defined +} + +/* +template GList::~GList() { + this->Clear();//this will free the items if fFreeProc is defined +} +*/ + +template void GPVec::setCapacity(int NewCapacity) { + if (NewCapacity < fCount || NewCapacity > MAXLISTSIZE) + GError(SLISTCAPACITY_ERR, NewCapacity); + //error: capacity not within range + if (NewCapacity!=fCapacity) { + if (NewCapacity==0) { + GFREE(fList); + } + else { + GREALLOC(fList, NewCapacity*sizeof(OBJ*)); + } + fCapacity=NewCapacity; + } +} + +template void GPVec::deallocate_item(OBJ* item) { + if (item==NULL) return; + if (FREEDATA) { + (*fFreeProc)(item); + } + else { + delete item; + } +} + +template void GPVec::Clear() { + if (FREEDATA) { + for (int i=0; i void GPVec::Exchange(int idx1, int idx2) { +//Warning: this will BREAK sort order for sorted GList + TEST_INDEX(idx1); + TEST_INDEX(idx2); + OBJ* item=fList[idx1]; + fList[idx1]=fList[idx2]; + fList[idx2]=item; +} + +template void GPVec::Expand() { + if (fCount==fCapacity) Grow(); + //return this; +} + +template OBJ* GPVec::Get(int idx) { + TEST_INDEX(idx); + return fList[idx]; +} + +template const GList& GList::operator=(GList& list) { + if (&list!=this) { + GPVec::Clear(); + fCompareProc=list.fCompareProc; + this->fFreeProc=list.fFreeProc; + //Attention: the object pointers are copied directly, + //but the actual objects are NOT duplicated + for (int i=0;i void GList::setSorted(GCompareProc* compareProc) { + GCompareProc* old_proc=fCompareProc; + fCompareProc=compareProc; + if (fCompareProc!=old_proc && fCompareProc!=NULL) + Sort(); //new compare method +} + +template void GPVec::Grow() { + int delta; + if (fCapacity > 64) delta = fCapacity/4; + else if (fCapacity > 8) delta = 16; + else delta = 4; + setCapacity(fCapacity + delta); +} + +template void GPVec::Grow(int idx, OBJ* newitem) { + int delta; + if (fCapacity > 64) delta = fCapacity/4; + else if (fCapacity > 8) delta = 16; + else delta = 4; + // setCapacity(fCapacity + delta); + int NewCapacity=fCapacity+delta; + if (NewCapacity <= fCount || NewCapacity > MAXLISTSIZE) + GError(SLISTCAPACITY_ERR, NewCapacity); + //error: capacity not within range + if (NewCapacity!=fCapacity) { + if (NewCapacity==0) { + GFREE(fList); + } + else {//add the new item + if (idx==fCount) { + GREALLOC(fList, NewCapacity*sizeof(OBJ*)); + fList[idx]=newitem; + } + else { + OBJ** newList; + GMALLOC(newList, NewCapacity*sizeof(OBJ*)); + //copy data before idx + memmove(&newList[0],&fList[0], idx*sizeof(OBJ*)); + newList[idx]=newitem; + //copy data after idx + memmove(&newList[idx+1],&fList[idx], (fCount-idx)*sizeof(OBJ*)); + memset(&newList[fCount+1], 0, (NewCapacity-fCount-1)*sizeof(OBJ*)); + //data copied: + GFREE(fList); + fList=newList; + } + fCount++; + } + fCapacity=NewCapacity; + } +} + +template int GPVec::IndexOf(pointer item) { + int result=-1; + for (int i=0;i int GList::IndexOf(OBJ* item) { + int result=0; + if (Found(item, result)) return result; + else return -1; + } + +template bool GList::Exists(OBJ& item) { + int result=0; + if (Found(&item, result)) return true; + else return false; + } + +template bool GList::Exists(OBJ* item) { + int result=0; + if (Found(item, result)) return true; + else return false; + } + +template int GPVec::Add(OBJ* item) { + int result; + if (item==NULL) return -1; + result = fCount; + if (result==fCapacity) this->Grow(); + fList[result]=item; + fCount++; + return fCount-1; +} + +template int GList::Add(OBJ* item) { + int result; + if (item==NULL) return -1; + if (SORTED) { + if (Found(item, result)) + if (fUnique) return -1; //duplicates forbidden + //Found sets result to the position where the item should be! + sortInsert(result, item); + } + else { + if (fUnique && Found(item,result)) return -1; //set behaviour + result = this->fCount; + if (result==this->fCapacity) GPVec::Grow(); + this->fList[result]=item; + this->fCount++; + } + return result; +} + +//by default, it deletes the item if it has an equal in the list! +//returns the existing equal (==) object if it's in the list already +//or returns the item itself if it's unique (and adds it) +template OBJ* GList::AddIfNew(OBJ* item, + bool deleteIfFound, int* fidx) { + int r; + if (Found(item, r)) { + if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) { + this->deallocate_item(item); + } + if (fidx!=NULL) *fidx=r; + return this->fList[r]; //found + } + //not found: + if (SORTED) { + //Found() set result to the position where the item should be inserted: + sortInsert(r, item); + } + else { + r = this->fCount; + if (r==this->fCapacity) GPVec::Grow(); + this->fList[r]=item; + this->fCount++; + } + if (fidx!=NULL) *fidx=r; + return item; +} + +//if item is found already in the list DELETE it and return -1 +//otherwise the item is added and its index is returned +template int GList::AddedIfNew(OBJ* item) { + int r; + if (Found(item, r)) { + if ((pointer)item != (pointer)(this->fList[r])) { + this->deallocate_item(item); + } + return -1; + } + //not found: + if (SORTED) { + //Found() set r to the position where the item should be inserted: + sortInsert(r, item); + } + else { + r = this->fCount; + if (r==this->fCapacity) GPVec::Grow(); + this->fList[r]=item; + this->fCount++; + } + return r; +} + + +template bool GList::Found(OBJ* item, int& idx) { + //search the list by using CompareProc (if defined) + //or == operator for a non-sortable list + //for sorted lists, even when the result is false, the idx is + //set to the closest matching object! + int i; + idx=-1; + if (this->fCount==0) { idx=0;return false;} + if (SORTED) { //binary search based on CompareProc + //do the simple test first: + + if ((*fCompareProc)(this->fList[0],item)>0) { + idx=0; + return false; + } + if ((*fCompareProc)(item, this->fList[this->fCount-1])>0) { + idx=this->fCount; + return false; + } + + int l, h, c; + l = 0; + h = this->fCount - 1; + while (l <= h) { + i = (l + h) >> 1; + c = (*fCompareProc)(this->fList[i], item); + if (c < 0) l = i + 1; + else { + h = i - 1; + if (c == 0) { + idx=i; + return true; + } + } + } //while + idx = l; + return false; + } + else {//not sorted: use linear search + // needs == operator to compare user defined objects ! + i=0; + while (ifCount) { + if (*this->fList[i]==*item) { + idx=i; + return true; + } + i++; + } + return false; + } +} + +template void GList::sortInsert(int idx, OBJ* item) { + //idx must be the new position this new item must have + //so the allowed range is [0..fCount] + //the old idx item all the above will be shifted to idx+1 + if (idx<0 || idx>this->fCount) GError(SLISTINDEX_ERR, idx); + if (this->fCount==this->fCapacity) { + GPVec::Grow(idx, item); + //expand and also copy/move data and insert the new item + return; + } + //room still left, just move data around and insert the new one + if (idxfCount) //copy/move pointers only! + memmove(&(this->fList[idx+1]), &(this->fList[idx]), (this->fCount-idx)*sizeof(OBJ*)); + this->fList[idx]=item; + this->fCount++; +} + +template void GPVec::Insert(int idx, OBJ* item) { + //idx can be [0..fCount] so an item can be actually added + if (idx<0 || idx>fCount) GError(SLISTINDEX_ERR, idx); + if (fCount==fCapacity) { + Grow(idx, item); + return; + } + if (idx void GList::Insert(int idx, OBJ* item) { + //idx can be [0..fCount] so an item can be actually added + BE_UNSORTED; //cannot do that with a sorted list! + GPVec::Insert(idx,item); +} + +template void GPVec::Move(int curidx, int newidx) { + //BE_UNSORTED; //cannot do that in a sorted list! + if (curidx!=newidx || newidx>=fCount) + GError(SLISTINDEX_ERR, newidx); + OBJ* p; + p=Get(curidx); + //this is a delete: + fCount--; + if (curidx void GList::Move(int curidx, int newidx) { + BE_UNSORTED; //cannot do this in a sorted list! + GPVec::Move(curidx,newidx); +} + +template void GPVec::Put(int idx, OBJ* item) { + //WARNING: this will never free the replaced item! + TEST_INDEX(idx); + fList[idx]=item; +} + +template void GList::Put(int idx, OBJ* item, bool re_sort) { + //WARNING: this will never free the replaced item! + // this may BREAK the sort order unless the "re_sort" parameter is given + if (idx<0 || idx>this->fCount) GError(SLISTINDEX_ERR, idx); + this->fList[idx]=item; + if (SORTED && item!=NULL && re_sort) Sort(); //re-sort +} + + +template void GPVec::Forget(int idx) { + TEST_INDEX(idx); + fList[idx]=NULL; //user should free that somewhere else +} + +template void GPVec::freeItem(int idx) { + TEST_INDEX(idx); + if (fFreeProc!=NULL) { + (*fFreeProc)(fList[idx]); + } + else this->DefaultFreeProc(fList[idx]); + fList[idx]=NULL; +} + +template void GPVec::Delete(int index) { + TEST_INDEX(index); + if (fFreeProc!=NULL && fList[index]!=NULL) { + (*fFreeProc)(fList[index]); //freeItem + } + fList[index]=NULL; + fCount--; + if (index OBJ* GPVec::Pop() { + if (fCount<=0) return NULL; + fCount--; + OBJ* o=fList[fCount]; + fList[fCount]=NULL; + return o; +} + +//Queue usage: +template OBJ* GPVec::Shift() { + if (fCount<=0) return NULL; + fCount--; + OBJ* o=fList[0]; + if (fCount>0) + memmove(&fList[0], &fList[1], (fCount)*sizeof(OBJ*)); + fList[fCount]=NULL; //not that it matters.. + return o; +} + +template int GList::Remove(OBJ* item) { +//removes an item if it's in our list + int result=IndexOf(item); + if (result>=0) GPVec::Delete(result); + return result; +} + +//linear search for the pointer address +template int GPVec::RemovePtr(pointer item) { +if (item==NULL) return -1; +for (int i=0;i void GPVec::Pack() {//also frees items! + for (int i=fCount-1; i>=0; i--) + if (fList[i]==NULL) Delete(i); //shift rest of fList content accordingly +} + +template void GPVec::setCount(int NewCount) { + if (NewCount<0 || NewCount > MAXLISTSIZE) + GError(SLISTCOUNT_ERR, NewCount); + if (NewCount > fCapacity) setCapacity(NewCount); + if (NewCount > fCount) + memset(fList[fCount], 0, (NewCount - fCount) * sizeof(OBJ*)); + fCount = NewCount; +} + +template void GList::QuickSort(int L, int R) { + int I, J; + OBJ* P; + OBJ* T; + do { + I = L; + J = R; + P = this->fList[(L + R) >> 1]; + do { + while (fCompareProc(this->fList[I], P) < 0) I++; + while (fCompareProc(this->fList[J], P) > 0) J--; + if (I <= J) { + T = this->fList[I]; + this->fList[I] = this->fList[J]; + this->fList[J] = T; + I++; + J--; + } + } + while (I <= J); + if (L < J) QuickSort(L, J); + L = I; + } + while (I < R); + +} + +template void GList::Sort() { + if (this->fList!=NULL && this->fCount>0 && fCompareProc!=NULL) + QuickSort(0, this->fCount-1); +} + +//--------------------------------------------------------------------------- +#endif diff --git a/src/GStr.cpp b/src/GStr.cpp new file mode 100644 index 0000000..f7ab2ec --- /dev/null +++ b/src/GStr.cpp @@ -0,0 +1,1331 @@ +//--------------------------------------------------------------------------- +#include "GStr.h" +#include +#include +#include +#include "GBase.h" +#include +#include + +//--------------------------------------------------------------------------- + +GStr::Data GStr::null_data; + +//========================================= + +GStr::Data * GStr::new_data(int length) { +//static method to return a new Data object (allocate length) +//content is undefined, but it's null terminated + if (length > 0) { + Data* data; + GMALLOC(data, sizeof(Data)+length); + data->ref_count = 0; + data->length = length; + data->chars[length] = '\0'; + return data; + } + else + return &null_data; + } + +GStr::Data* GStr::new_data(const char* str) { +//static method to return a new Data object (allocate length) +//as a copy of a given string + if (str==NULL) return &null_data; + int length=strlen(str); + if (length > 0) { + Data* data; + GMALLOC(data, sizeof(Data)+length); + strcpy(data->chars, str); + data->ref_count = 0; + data->length = length; + data->chars[length] = '\0'; + return data; + } + else + return &null_data; + } + +void GStr::replace_data(int len) { + + if (len == my_data->length && my_data->ref_count <= 1) + return; + + if (my_data != &null_data && --my_data->ref_count == 0) + GFREE(my_data); + + if (len > 0) { + //my_data = (Data *) malloc(sizeof(Data) + len); + GMALLOC(my_data, sizeof(Data) + len); + my_data->ref_count = 1; + my_data->length = len; + my_data->chars[len] = '\0'; + } + else + my_data = &null_data; +} + +void GStr::replace_data(Data *data) { + if (my_data != &null_data && --my_data->ref_count == 0) + GFREE(my_data); + if (data != &null_data) + data->ref_count++; + my_data = data; +} + +void GStr::make_unique() {//make sure it's not a reference to other string + if (my_data->ref_count > 1) { + Data *data = new_data(length()); + ::memcpy(data->chars, chars(), length()); + my_data->ref_count--; + my_data = data; + my_data->ref_count++; + } +} + +bool operator==(const char *s1, const GStr& s2){ + if (s1==NULL) return s2.is_empty(); + return (strcmp(s1, s2.chars()) == 0); + } + +bool operator<(const char *s1, const GStr& s2) { + if (s1==NULL) return !s2.is_empty(); + return (strcmp(s1, s2.chars()) < 0); + } + +bool operator<=(const char *s1, const GStr& s2){ + if (s1==NULL) return true; + return (strcmp(s1, s2.chars()) <= 0); + } + +bool operator>(const char *s1, const GStr& s2) { + if (s1==NULL) return false; + return (strcmp(s1, s2.chars()) > 0); + } + + +GStr::GStr():my_data(&null_data) { + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + } + +GStr::GStr(const GStr& s): my_data(&null_data){ + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + replace_data(s.my_data); + } + +GStr::GStr(const char *s): my_data(&null_data) { + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + my_data=new_data(s); + my_data->ref_count = 1; + } + +GStr::GStr(const int i): my_data(&null_data) { + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + char buf[20]; + sprintf(buf,"%d",i); + const int len = ::strlen(buf); + replace_data(len); + ::memcpy(chrs(), buf, len); + } + +GStr::GStr(const double f): my_data(&null_data) { + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + char buf[20]; + sprintf(buf,"%f",f); + const int len = ::strlen(buf); + replace_data(len); + ::memcpy(chrs(), buf, len); + } + +GStr::GStr(char c, int n): my_data(&null_data) { + fTokenDelimiter=NULL; + fLastTokenStart=0; + readbuf=NULL; + replace_data(n); ::memset(chrs(), c, n); + } + +GStr::~GStr() { + if (my_data != &null_data && --my_data->ref_count == 0) + GFREE(my_data); + GFREE(fTokenDelimiter); + GFREE(readbuf); + } + +char& GStr::operator[](int idx){ +//returns reference to char (can be l-value) + if (idx < 0) idx += length(); + if (idx < 0 || idx >= length()) invalid_index_error("operator[]"); + make_unique(); //because the user will probably modify this char! + return chrs()[idx]; + } + +char GStr::operator[](int idx) const { +//returns char copy (cannot be l-value!) + if (idx < 0) idx += length(); + if (idx < 0 || idx >= length()) invalid_index_error("operator[]"); + return chars()[idx]; + } + +GStr& GStr::operator=(const GStr& s) { + make_unique(); //edit operation ahead + replace_data(s.my_data); + return *this; + } + +GStr& GStr::operator=(const char *s) { + make_unique(); //edit operation ahead + if (s==NULL) { + replace_data(0); + return *this; + } + const int len = ::strlen(s); replace_data(len); + ::memcpy(chrs(), s, len); + return *this; + } + +GStr& GStr::operator=(const double f) { + make_unique(); //edit operation ahead + char buf[20]; + sprintf(buf,"%f",f); + const int len = ::strlen(buf); + replace_data(len); + ::memcpy(chrs(), buf, len); + return *this; +} + +GStr& GStr::operator=(const int i) { + make_unique(); //edit operation ahead + char buf[20]; + sprintf(buf,"%d",i); + const int len = ::strlen(buf); + replace_data(len); + ::memcpy(chrs(), buf, len); + return *this; +} + +bool GStr::operator==(const GStr& s) const { + if (s.is_empty()) return is_empty(); + return (length() == s.length()) && + (memcmp(chars(), s.chars(), length()) == 0); + } + +bool GStr::operator==(const char *s) const { + if (s==NULL) return is_empty(); + return (strcmp(chars(), s) == 0); + } + +bool GStr::operator<(const GStr& s) const { + if (s.is_empty()) return false; + return (strcmp(chars(), s.chars()) < 0); + } + +bool GStr::operator<(const char *s) const { + if (s==NULL) return false; + return (strcmp(chars(), s) < 0); + } + +bool GStr::operator<=(const GStr& s) const { + if (s.is_empty()) return is_empty(); + return (strcmp(chars(), s.chars()) <= 0); + } + +bool GStr::operator<=(const char *s) const { + if (s==NULL) return is_empty(); + return (strcmp(chars(), s) <= 0); + } + +bool GStr::operator>(const GStr& s) const { + if (s.is_empty()) return !is_empty(); + return (strcmp(chars(), s.chars()) > 0); + } + +bool GStr::operator>(const char *s) const { + if (s==NULL) return !is_empty(); + return (strcmp(chars(), s) > 0); + } + +bool GStr::operator>=(const GStr& s) const { + if (s.is_empty()) return true; + return (strcmp(chars(), s.chars()) >= 0); + } + +bool GStr::operator>=(const char *s) const { + if (s==NULL) return true; + return (strcmp(chars(), s) >= 0); + } + +bool GStr::operator!=(const GStr& s) const { + if (s.is_empty()) return !is_empty(); + return (length() != s.length()) || + (memcmp(chars(), s.chars(), length()) != 0); + } + +bool GStr::operator!=(const char *s) const { + if (s==NULL) return !is_empty(); + return (strcmp(chars(), s) != 0); + } + +GStr& GStr::operator+=(const GStr& s) { + return append((const char *)s); + } + +GStr& GStr::operator+=(const char* s) { + return append(s); + } + +GStr& GStr::operator+=(const char c) { + char buf[4]; + sprintf(buf,"%c",c); + return append(buf); + } + +GStr& GStr::operator+=(const int i) { + char buf[20]; + sprintf(buf,"%d",i); + return append(buf); + } + + +GStr& GStr::operator+=(const double f) { + char buf[30]; + sprintf(buf,"%f",f); + return append(buf); + } + +bool GStr::is_empty() const { + //return my_data == &null_data; + return (length()==0); + } + +GStr GStr::copy() const { + GStr newstring(*this); + return newstring; + } + +GStr& GStr::clear() { + make_unique(); //edit operation ahead + replace_data(0); + return *this; + } + +int GStr::index(const GStr& s, int start_index) const { + return index(s.chars(), start_index); + } + +bool GStr::contains(const GStr& s) const { + return (index(s, 0) >= 0); + } + +bool GStr::contains(const char *s) const { + return (index(s, 0) >= 0); + } + +bool GStr::startsWith(const char *s) const { + //return (index(s, 0) == 0); + return ::startsWith(this->chars(), s); + } + +bool GStr::startsWith(const GStr& s) const { + //return (index(s, 0) == 0); + return ::startsWith(this->chars(), s.chars()); + } + +bool GStr::endsWith(const char *s) const { + //return (index(s, 0) == 0); + return ::endsWith(this->chars(), s); + } + +bool GStr::endsWith(const GStr& s) const { + //return (index(s, 0) == 0); + return ::endsWith(this->chars(), s.chars()); + } + +bool GStr::contains(char c) const { + return (index(c, 0) >= 0); + } + +GStr& GStr::format(const char *fmt,...) { +// Format as in sprintf + make_unique(); //edit operation ahead + char* buf; + GMALLOC(buf, strlen(fmt)+1024); + va_list arguments; + va_start(arguments,fmt); + //+1K buffer, should be enough for common expressions + int len=vsprintf(buf,fmt,arguments); + va_end(arguments); + replace_data(len); //this also adds the '\0' at the end! + //and sets the right len + ::memcpy(chrs(), buf, len); + GFREE(buf); + return *this; + } + +GStr& GStr::appendfmt(const char *fmt,...) { +// Format as in sprintf + make_unique(); //edit operation ahead + char* buf; + GMALLOC(buf, strlen(fmt)+1024); + va_list arguments; + va_start(arguments,fmt); + //+1K buffer, should be enough for common expressions + vsprintf(buf,fmt,arguments); + va_end(arguments); + append(buf); + GFREE(buf); + return *this; + } + +GStr& GStr::trim(char c) { + register int istart; + register int iend; + for (istart=0; istartistart && chars()[iend]==c;iend--) ; + int newlen=iend-istart+1; + if (newlen==length()) //nothing to trim + return *this; + make_unique(); //edit operation ahead + Data *data = new_data(newlen); + ::memcpy(data->chars, &chars()[istart], newlen); + replace_data(data); + return *this; + } + +GStr& GStr::trim(const char* c) { + register int istart; + register int iend; + for (istart=0; istartistart && strchr(c, chars()[iend])!=NULL;iend--) ; + int newlen=iend-istart+1; + if (newlen==length()) //nothing to trim + return *this; + make_unique(); //edit operation ahead + Data *data = new_data(newlen); + ::memcpy(data->chars, &chars()[istart], newlen); + replace_data(data); + return *this; + } + +GStr& GStr::trimR(char c) { + //only trim the right end + //register int istart; + register int iend; + for (iend=length()-1; iend>=0 && chars()[iend]==c;iend--) ; + if (iend==-1) { + replace_data(0); //string was entirely trimmed + return *this; + } + int newlen=iend+1; + if (newlen==length()) //nothing to trim + return *this; + make_unique(); //edit operation ahead + + Data *data = new_data(newlen); + ::memcpy(data->chars, chars(), newlen); + replace_data(data); + return *this; + } + +GStr& GStr::trimR(const char* c) { + register int iend; + for (iend=length()-1; iend>=0 && strchr(c,chars()[iend])!=NULL;iend--) ; + if (iend==-1) { + replace_data(0); //string was entirely trimmed + return *this; + } + int newlen=iend+1; + if (newlen==length()) //nothing to trim + return *this; + make_unique(); //edit operation ahead + Data *data = new_data(newlen); + ::memcpy(data->chars, chars(), newlen); + replace_data(data); + return *this; + } + + +GStr& GStr::chomp(const char* cstr) { + register int iend; + if (cstr==NULL || *cstr==0) return *this; + //check if this ends with cstr + int cend=strlen(cstr)-1; + iend=my_data->length-1; + while (iend>=0 && cend>=0) { + if (my_data->chars[iend]!=cstr[cend]) return *this; + iend--; + cend--; + } + if (iend==-1) { + replace_data(0); //string will be entirely trimmed + return *this; + } + int newlen=iend+1; + make_unique(); //edit operation ahead + Data *data = new_data(newlen); + ::memcpy(data->chars, chars(), newlen); + replace_data(data); + return *this; + } + +GStr& GStr::trimL(char c) { + register int istart; + for (istart=0; istartchars, &chars()[istart], newlen); + replace_data(data); + return *this; + } + +GStr& GStr::trimL(const char* c) { + register int istart; + for (istart=0; istartchars, &chars()[istart], newlen); + replace_data(data); + return *this; + } + +GStr& GStr::padR(int len, char c) { + //actually means align right in len + if (length()>=len) return *this; //no room for padding + make_unique(); //edit operation ahead + Data *data = new_data(len); + ::memset(data->chars,c,len-length()); + ::memcpy(&data->chars[len-length()], chars(), length()); + replace_data(data); + return *this; + } + +GStr& GStr::padL(int len, char c) { //align left the string + if (length()>=len) return *this; //no room for padding + make_unique(); //edit operation ahead + Data *data = new_data(len); + ::memcpy(data->chars, chars(), length()); + ::memset(&data->chars[length()],c,len-length()); + replace_data(data); + return *this; + } + +GStr& GStr::padC(int len, char c) { + if (length()>=len) return *this; //no room for padding + make_unique(); //edit operation ahead + int istart=(len-length())/2; + Data *data = new_data(len); + if (istart>0) + ::memset(data->chars, c, istart); + ::memcpy(&data->chars[istart], chars(), length()); + int iend=istart+length(); + if (iendchars[iend],c,len-iend); + replace_data(data); + return *this; + } + +GStr operator+(const char *s1, const GStr& s2) { + const int s1_length = ::strlen(s1); + + if (s1_length == 0) + return s2; + else { + GStr newstring; + newstring.replace_data(s1_length + s2.length()); + ::memcpy(newstring.chrs(), s1, s1_length); + ::memcpy(&(newstring.chrs())[s1_length], s2.chars(), s2.length()); + return newstring; + } +} + +//========================================= + +GStr GStr::operator+(const GStr& s) const { + if (length() == 0) + return s; + else if (s.length() == 0) + return *this; + else { + GStr newstring; + newstring.replace_data(length() + s.length()); + ::memcpy(newstring.chrs(), chars(), length()); + ::memcpy(&(newstring.chrs())[length()], s.chars(), s.length()); + return newstring; + } +} + +//========================================= + +GStr GStr::operator+(const char *s) const { + + const int s_length = ::strlen(s); + + if (s_length == 0) + return *this; + else { + GStr newstring; + newstring.replace_data(length() + s_length); + ::memcpy(newstring.chrs(), chars(), length()); + ::memcpy(&(newstring.chrs())[length()], s, s_length); + return newstring; + } +} + +GStr GStr::operator+(const int i) const { + char buf[20]; + sprintf(buf, "%d", i); + const int s_length = ::strlen(buf); + GStr newstring; + newstring.replace_data(length() + s_length); + ::memcpy(newstring.chrs(), chars(), length()); + ::memcpy(&(newstring.chrs())[length()], buf, s_length); + return newstring; +} + +GStr GStr::operator+(const char c) const { + char buf[4]; + sprintf(buf, "%c", c); + const int s_length = ::strlen(buf); + GStr newstring; + newstring.replace_data(length() + s_length); + ::memcpy(newstring.chrs(), chars(), length()); + ::memcpy(&(newstring.chrs())[length()], buf, s_length); + return newstring; +} + +GStr GStr::operator+(const double f) const { + char buf[30]; + sprintf(buf, "%f", f); + const int s_length = ::strlen(buf); + GStr newstring; + newstring.replace_data(length() + s_length); + ::memcpy(newstring.chrs(), chars(), length()); + ::memcpy(&(newstring.chrs())[length()], buf, s_length); + return newstring; +} + + +//========================================= + +bool GStr::is_space() const { + + if (my_data == &null_data) + return false; + + for (register const char *p = chars(); *p; p++) + if (!isspace(*p)) + return false; + + return true; +} + +//========================================= + +GStr GStr::substr(int idx, int len) const { + // A negative idx specifies an idx from the right of the string. + if (idx < 0) + idx += length(); + + // A length of -1 specifies the rest of the string. + if (len < 0 || len>length()-idx) + len = length() - idx; + + if (idx<0 || idx>=length() || len<0 ) + invalid_args_error("substr()"); + + GStr newstring; + newstring.replace_data(len); + ::memcpy(newstring.chrs(), &chars()[idx], len); + return newstring; +} + +GStr& GStr::reverse() { + make_unique(); + int l=0; + int r=my_data->length-1; + char c; + while (lchars[l]; + my_data->chars[l]=my_data->chars[r]; + my_data->chars[r]=c; + l++;r--; + } + return *this; +} + + +//transform: any character from 'from' is replaced with a coresponding +//char from 'to' + +GStr& GStr::tr(const char *rfrom, const char* rto) { + if (length() == 0 || rfrom==NULL || strlen(rfrom)==0) + return *this; + unsigned int l=strlen(rfrom); + if (rto!=NULL && strlen(rto)!=l) + invalid_args_error("tr()"); + make_unique(); //edit operation ahead + Data *data = new_data(length()); + + if (rto==NULL) { //deletion case + char* s = my_data->chars; + char* p; + char* dest = data->chars; + do { + if ((p=strpbrk(s,rfrom))!=NULL) { + memcpy(dest,s,p-s); + dest+=p-s; + s=p+1; + } + else { + strcpy(dest, s); + dest+=strlen(s); + } + } while (p!=NULL); + (*dest)='\0'; + } + else { //char substitution case - easier! + const char* p; + for (int i=0; ichars[i]))!=NULL) + my_data->chars[i]=rto[p-rfrom]; + } + } + data->length=strlen(data->chars); + replace_data(data); + return *this; +} + + +// search and replace all the occurences of a string with another string +// or just remove the given string (if replacement is NULL) +GStr& GStr::replace(const char *rfrom, const char* rto) { + if (length() == 0 || rfrom==NULL || strlen(rfrom)==0) + return *this; + unsigned int l=strlen(rfrom); + unsigned int tl= (rto==NULL)?0:strlen(rto); + make_unique(); //edit operation ahead + char* p; + char* dest; + char* newdest=NULL; + char* s = my_data->chars; + if (tl!=l) { //reallocation + if (tl>l) { //possible enlargement + GMALLOC(newdest, length()*(tl-l+1)+1); + } + else {//delete or replace with a shorter string + GMALLOC(newdest, length() + 1); + } + dest=newdest; + if (tl==0) {//deletion + while ((p=strstr(s,rfrom))!=NULL) { + //rfrom found at position p + memcpy(dest,s,p-s); + dest+=p-s; + s+=p-s+l; //s positioned in string after rfrom + } + //no more occurences, copy the remaining string + strcpy(dest, s); + } + else { //replace with another string + while ((p=strstr(s,rfrom))!=NULL) { + memcpy(dest,s,p-s); //copy up rto the match + dest+=p-s; + memcpy(dest,rto,tl); //put the replacement string + dest+=tl; + s+=p-s+l; + } + //not found any more, copy rto end of string + strcpy(dest, s); + } + Data* data=new_data(newdest); + replace_data(data); + GFREE(newdest); + } + else { //inplace editing: no need rto reallocate + while ((p=strstr(s,rfrom))!=NULL) { + memcpy(p,rto,l); + s+=p-s+l; + } + } + return *this; +} + + + +GStr& GStr::cut(int idx, int len) { + + if (len == 0) + return *this; + make_unique(); //edit operation ahead + + // A negative idx specifies an idx from the right of the string, + // so the left part will be cut out + if (idx < 0) + idx += length(); + + // A length of -1 specifies the rest of the string. + if (len == -1) + len = length() - idx; + + if (idx<0 || idx>=length() || len<0 || len>length()-idx) + invalid_args_error("cut()"); + + Data *data = new_data(length() - len); + if (idx > 0) + ::memcpy(data->chars, chars(), idx); + ::strcpy(&data->chars[idx], &chars()[idx+len]); + replace_data(data); + + return *this; +} + +//========================================= + +GStr& GStr::paste(const GStr& s, int idx, int len) { + // A negative idx specifies an idx from the right of the string. + if (idx < 0) + idx += length(); + make_unique(); //edit operation ahead + + // A length of -1 specifies the rest of the string. + if (len == -1) + len = length() - idx; + + if (idx<0 || idx>=length() || len<0 || len>length()-idx) + invalid_args_error("replace()"); + + if (len == s.length() && my_data->ref_count == 1) + ::memcpy(&chrs()[idx], s.chars(), len); + else { + Data *data = new_data(length() - len + s.length()); + if (idx > 0) + ::memcpy(data->chars, chars(), idx); + if (s.length() > 0) + ::memcpy(&data->chars[idx], s.chars(), s.length()); + ::strcpy(&data->chars[idx+s.length()], &chars()[idx+len]); + replace_data(data); + } + + return *this; +} + +//========================================= + +GStr& GStr::paste(const char *s, int idx, int len) { + + // A negative idx specifies an idx from the right of the string. + make_unique(); //edit operation ahead + if (idx < 0) + idx += length(); + + // A length of -1 specifies the rest of the string. + if (len == -1) + len = length() - idx; + + if (idx<0 || idx>=length() || len<0 || len>length()-idx) + invalid_args_error("replace()"); + + const int s_length = ::strlen(s); + + if (len == s_length && my_data->ref_count == 1) + ::memcpy(&chrs()[idx], s, len); + else { + Data *data = new_data(length() - len + s_length); + if (idx > 0) + ::memcpy(data->chars, chars(), idx); + if (s_length > 0) + ::memcpy(&data->chars[idx], s, s_length); + ::strcpy(&data->chars[idx+s_length], &chars()[idx+len]); + replace_data(data); + } + + return *this; +} + +//========================================= + +GStr& GStr::insert(const GStr& s, int idx) { + make_unique(); //edit operation ahead + + // A negative idx specifies an idx from the right of the string. + if (idx < 0) + idx += length(); + + if (idx < 0 || idx >= length()) + invalid_index_error("insert()"); + + if (s.length() > 0) { + Data *data = new_data(length() + s.length()); + if (idx > 0) + ::memcpy(data->chars, chars(), idx); + ::memcpy(&data->chars[idx], s.chars(), s.length()); + ::strcpy(&data->chars[idx+s.length()], &chars()[idx]); + replace_data(data); + } + + return *this; +} + +//========================================= + +GStr& GStr::insert(const char *s, int idx) { + // A negative idx specifies an idx from the right of the string. + make_unique(); //edit operation ahead + if (idx < 0) + idx += length(); + + if (idx < 0 || idx >= length()) + invalid_index_error("insert()"); + + const int s_length = ::strlen(s); + + if (s_length > 0) { + Data *data = new_data(length() + s_length); + if (idx > 0) + ::memcpy(data->chars, chars(), idx); + ::memcpy(&data->chars[idx], s, s_length); + ::strcpy(&data->chars[idx+s_length], &chars()[idx]); + replace_data(data); + } + + return *this; +} +//========================================= + +GStr& GStr::append(const char* s) { + make_unique(); //edit operation ahead + int len=::strlen(s); + int newlength=len+my_data->length; + if (newlength<=my_data->length) return *this; + if (my_data->length==0) { + replace_data(len); + ::memcpy(my_data->chars, s, len); + return *this; + } + //faster solution with realloc + GREALLOC(my_data, sizeof(Data)+newlength); + ::strcpy(&my_data->chars[my_data->length], s); + my_data->length=newlength; + my_data->chars[newlength]='\0'; + return *this; +} + +GStr& GStr::append(const GStr& s) { + return append((const char *)s); +} + + +GStr& GStr::upper() { + make_unique(); //edit operation ahead + for (register char *p = chrs(); *p; p++) + *p = (char) toupper(*p); + + return *this; +} + +//========================================= + +GStr& GStr::lower() { + make_unique(); + + for (register char *p = chrs(); *p; p++) + *p = (char) tolower(*p); + + return *this; +} + +//========================================= + +int GStr::index(const char *s, int start_index) const { + // A negative index specifies an index from the right of the string. + if (strlen(s)>(size_t)length()) return -1; + if (start_index < 0) + start_index += length(); + + if (start_index < 0 || start_index >= length()) + invalid_index_error("index()"); + const char* idx = strstr(&chars()[start_index], s); + if (!idx) + return -1; + else + return idx - chars(); +} + +//========================================= + +int GStr::index(char c, int start_index) const { + // A negative index specifies an index from the right of the string. + if (length()==0) return -1; + if (start_index < 0) + start_index += length(); + + if (start_index < 0 || start_index >= length()) + invalid_index_error("index()"); + + + if (c == '\0') + return -1; + const char *idx=(char *) ::memchr(&chars()[start_index], c, + length()-start_index); + if (idx==NULL) + return -1; + else + return idx - chars(); +} + +int GStr::rindex(char c, int end_index) const { + if (c == 0 || length()==0 || end_index>=length()) return -1; + if (end_index<0) end_index=my_data->length-1; + for (int i=end_index;i>=0;i--) { + if (my_data->chars[i]==c) return i; + } + return -1; +} + +int GStr::rindex(const char* str, int end_index) const { + if (str==NULL || *str == '\0' || length()==0 || end_index>=length()) + return -1; + int slen=strlen(str); + if (end_index<0) end_index=my_data->length-1; + //end_index is the index of the right-side boundary + //the scanning starts at the end + if (end_index>=0 && end_index=0;i--) { + if (memcmp((void*)(my_data->chars+i),(void*)str, slen)==0) + return i; + } + return -1; +} + +GStr GStr::split(const char* delim) { + /* splits "this" in two parts, at the first (left) + encounter of delim: + 1st would stay in "this", + 2nd part will be returned + as a new string! + */ + GStr result; + int i=index(delim); + if (i>=0){ + result=substr(i+strlen(delim)); + cut(i); + return result; + } + return result; +} + +GStr GStr::split(char c) { + /* splits "this" in two parts, at the first (left) + encounter of delim: + 1st would stay in "this", + 2nd part will be returned + as a new string! + */ + GStr result; + int i=index(c); + if (i>=0){ + result=substr(i+1); + cut(i); + return result; + } + return result; +} + +GStr GStr::splitr(const char* delim) { + GStr result; + int i=rindex(delim); + if (i>=0){ + result=substr(i+strlen(delim)); + cut(i); + return result; + } + return result; +} + +GStr GStr::splitr(char c) { + GStr result; + int i=rindex(c); + if (i>=0){ + result=substr(i+1); + cut(i); + return result; + } + return result; +} + + +void GStr::startTokenize(const char* delimiter, enTokenizeMode tokenizemode) { + GFREE(fTokenDelimiter); + GMALLOC(fTokenDelimiter,strlen(delimiter)+1); + strcpy(fTokenDelimiter, delimiter); + fLastTokenStart=0; + fTokenizeMode=tokenizemode; +} + +bool GStr::nextToken(GStr& token) { + if (fTokenDelimiter==NULL) { + GError("GStr:: no token delimiter; use StartTokenize first\n"); + } + if (fLastTokenStart>=length()) {//no more + GFREE(fTokenDelimiter); + fLastTokenStart=0; + return false; + } + int dlen=strlen(fTokenDelimiter); + char* delpos=NULL; //delimiter position + int tlen=0; + if (fTokenizeMode==tkFullString) { //exact string as a delimiter + delpos=(char*)strstr(chars()+fLastTokenStart,fTokenDelimiter); + if (delpos==NULL) delpos=(char*)(chars()+length()); + //empty records may be returned + if (chars()+fLastTokenStart == delpos) { //empty token + fLastTokenStart=(delpos-chars())+dlen; + token=""; + return true; + } + else { + tlen=delpos-(chars()+fLastTokenStart); + token.replace_data(tlen); + ::memcpy(token.chrs(), &chars()[fLastTokenStart], tlen); + fLastTokenStart=(delpos-chars())+dlen; + return true; + } + } + else { //tkCharSet - any character is a delimiter + //empty records are never returned ! + if (fLastTokenStart==0) {//skip any starting delimiters + delpos=(char*)chars(); + while (*delpos!='\0' && strchr(fTokenDelimiter, *delpos)!=NULL) + delpos++; + if (*delpos!='\0') + fLastTokenStart = delpos-chars(); + else { //only delimiters here,no tokens + GFREE(fTokenDelimiter); + fLastTokenStart=0; + return false; + } + } + //now fLastTokenStart is on a non-delimiter char + //GMessage("String at fLastTokenStart=%d is %s\n", fLastTokenStart, delpos); + char* token_end=NULL; + delpos=(char*)strpbrk(chars()+fLastTokenStart,fTokenDelimiter); + if (delpos==NULL) delpos=(char*)(chars()+length()); + token_end=delpos-1; + while (*delpos!='\0' && strchr(fTokenDelimiter, *delpos)!=NULL) + delpos++; //skip any other delimiters in the set! + //now we know that delpos is on the beginning of next token + tlen=(token_end-chars())-fLastTokenStart+1; + if (tlen==0) { + GFREE(fTokenDelimiter); + fLastTokenStart=0; + return false; + } + token.replace_data(tlen); + ::memcpy(token.chrs(), &chars()[fLastTokenStart], tlen); + fLastTokenStart=delpos-chars(); + return true; + } + //return true; +} + +size_t GStr::read(FILE* stream, const char* delimiter, size_t bufsize) { +//read up to (and including) the given delimiter string + if (readbuf==NULL) { + GMALLOC(readbuf, bufsize); + readbufsize=bufsize; + } + else if (bufsize!=readbufsize) { + GFREE(readbuf); + if (bufsize>0) { + GMALLOC(readbuf, bufsize); + } + readbufsize=bufsize; + } + if (bufsize==0) { + replace_data(0); + return 0; //clear the string and free the buffer + } + size_t numread; + size_t acc_len=0; //accumulated length + int seplen=strlen(delimiter); + void* p=NULL; + Data *data = new_data(0); + do { + numread=fread(readbuf, 1, bufsize, stream); + if (numread) { + p=Gmemscan(readbuf, bufsize, (void*) delimiter, seplen); + if (p!=NULL) {//found the delimiter + //position the stream after it + int l = (char*)p-(char*)readbuf; + fseek(stream, l+seplen-numread, SEEK_CUR); + numread=l+seplen; + } + else {//not found, go back if not eof + if (numread==bufsize) { + fseek(stream, -seplen, SEEK_CUR); //check if this works! + numread-=seplen; + } + } + if (data==&null_data) { + data=new_data(numread); + ::memcpy(data->chars, readbuf, numread); + acc_len+=numread; + } + else { + GREALLOC(data, sizeof(Data)+acc_len+numread); + memcpy(&data->chars[acc_len], readbuf, numread); + acc_len+=numread; + data->length=acc_len; + data->chars[acc_len]='\0'; + } + } //if something read + } while (p==NULL && numread!=0); + replace_data(data); + return acc_len; +} + + +int GStr::asInt(int base /*=10 */) { + return strtol(text(), NULL, base); +} + +bool GStr::asInt(int& r, int base) { + errno=0; + char*endptr; + long val=strtol(text(), &endptr, base); + if (errno!=0) return false; + if (endptr == text()) return false; + /* If we got here, strtol() successfully parsed a number */ + r=val; + return true; +} + +double GStr::asReal() { + return strtod(text(), NULL); +} + +bool GStr::asReal(double& r) { + errno=0; + char* endptr; + double val=strtod(text(), &endptr); + if (errno!=0) return false; + if (endptr == text()) return false; //no digits to parse + r=val; + return true; +} + + +int GStr::peelInt() const { + if (is_empty()) return 0; + char buf[24]; + bool started=false; + int j=0; + int i; + for (i=0;ichars[i])) j++; //set coord + else break; //finished + } + else + if (isdigit(my_data->chars[i])) { + j++; started=true; + } + } + if (j>0) { + strncpy(buf, &my_data->chars[i-j], j); + buf[j]='\0'; + return strtol(buf, NULL, 10); + } + return 0; +} + +int GStr::peelIntR() const { + if (is_empty()) return 0; + char buf[24]; + bool started=false; + int j=0; + int i; + for (i=length()-1;i>=0;i--) { + if (started) { + if (isdigit(my_data->chars[i])) j++; //set length + else break; //finished + } + else + if (isdigit(my_data->chars[i])) { + j++; started=true; + } + } + if (j>0) { + strncpy(buf, &my_data->chars[i+1], j); + buf[j]='\0'; + return strtol(buf, NULL, 10); + } + return 0; +} + +GStr GStr::to(char c) { //return the first part up to first occurence of c + int i=index(c); + if (i>=0) return substr(0,i); + else return (*this); +} + //or whole string if c not found +GStr GStr::from(char c) { //same as to, but starting from the right side + int i=rindex(c); + if (i>=0) return substr(i+1); + else return (*this); +} + +int GStr::count(char c){ + //return the number of occurences of char c within the string + int result=0; + for (int i=0;ichars[i]==c) result++; + return result; + } + +//========================================= + +void GStr::invalid_args_error(const char *fname) { + GError("GStr:: %s - invalid arguments\n", fname); +} + +//**************************************************************************** + +void GStr::invalid_index_error(const char *fname) { + GError("GStr:: %s - invalid index\n", fname); +} +//**************************************************************************** + diff --git a/src/GStr.h b/src/GStr.h new file mode 100644 index 0000000..5a6d8fa --- /dev/null +++ b/src/GStr.h @@ -0,0 +1,215 @@ +//--------------------------------------------------------------------------- +#ifndef GSTR_H +#define GSTR_H +//--------------------------------------------------------------------------- +#include "GBase.h" +#include +#include +#include + +// This class uses reference counting and copy-on-write semantics + +// All indexes are zero-based. For all functions that accept an index, a +// negative index specifies an index from the right of the string. Also, +// for all functions that accept a length, a length of -1 specifies the rest +// of the string. +enum enTokenizeMode { + tkFullString, + tkCharSet + }; + +class GStr { + friend GStr operator+(const char* s1, const GStr& s2); + friend bool operator==(const char* s1, const GStr& s2); + friend bool operator<(const char* s1, const GStr& s2); + friend bool operator<=(const char* s1, const GStr& s2); + friend bool operator>(const char* s1, const GStr& s2); + friend bool operator>=(const char* s1, const GStr& s2); + friend bool operator!=(const char* s1, const GStr& s2); + friend void swap(GStr& s1, GStr& s2); + public: + GStr(); + GStr(const GStr& s); + GStr(const char* s); + GStr(const int i); + GStr(const double f); + GStr(char c, int n = 1); + ~GStr(); + operator const char* () const { return my_data->chars;} //inline here + char& operator[](int index); + char operator[](int index) const; + GStr& operator=(const GStr& s); + GStr& operator=(const char* s); + GStr& operator=(const int i); + GStr& operator=(const double f); + GStr operator+(const GStr& s) const; + GStr operator+(const char* s) const; + GStr operator+(const char c) const; + GStr operator+(const int i) const; + GStr operator+(const double f) const; + bool operator==(const GStr& s) const; + bool operator==(const char* s) const; + bool operator<(const GStr& s) const; + bool operator<(const char* s) const; + bool operator<=(const GStr& s) const; + bool operator<=(const char* s) const; + bool operator>(const GStr& s) const; + bool operator>(const char* s) const; + bool operator>=(const GStr& s) const; + bool operator>=(const char* s) const; + bool operator!=(const GStr& s) const; + bool operator!=(const char* s) const; + GStr& operator+=(const GStr& s); + GStr& operator+=(const char* s); + GStr& operator+=(const char c); + GStr& operator+=(const int i); + GStr& operator+=(const double f); + //interface: + public: + int length() const; + bool is_empty() const; + bool is_space() const; + GStr substr(int index = 0, int len = -1) const; + GStr to(char c); //return the first part up to first occurence of c + //or whole string if c not found + GStr from(char c); //same as to, but starting from the right side + GStr copy() const; + GStr& format(const char *fmt,...); + GStr& reverse(); + GStr& appendfmt(const char *fmt,...); + GStr& cut(int index = 0, int len = -1); //delete a specified length + GStr& remove(int from, int to) { + return cut(from, to-from+1); + } + + //paste a string at the specified position + GStr& paste(const GStr& s, int index = 0, int len=-1); + GStr& paste(const char* s, int index = 0, int len = -1); + GStr& replace(const char* from, const char* to=NULL); + GStr& insert(const GStr& s, int index = 0); + GStr& insert(const char* s, int index = 0); + GStr& append(const char* s); + GStr& append(const GStr& s); + GStr& upper(); + GStr& lower(); + GStr& clear();//make empty + //character translation or removal: + GStr& tr(const char* from, const char* to=NULL); + //number of occurences of a char in the string: + int count(char c); + void startTokenize(const char* delimiter, enTokenizeMode tokenizemode=tkCharSet); + bool nextToken(GStr& token); + int asInt(int base=10); + double asReal(); + double asDouble() { return asReal(); } + bool asReal(double& r); + bool asDouble(double& r) { return asReal(r); } + bool asInt(int& r, int base=10); + int index(const GStr& s, int start_index = 0) const; + int index(const char* s, int start_index = 0) const; + int index(char c, int start_index = 0) const; + int rindex(char c, int end_index = -1) const; + int rindex(const char* str, int end_index = -1) const; + bool contains(const GStr& s) const; + bool contains(const char* s) const; + bool contains(char c) const; + bool startsWith(const char* s) const; + bool startsWith(const GStr& s) const; + bool endsWith(const char* s) const; + bool endsWith(const GStr& s) const; + GStr split(const char* delim); + GStr split(char c); + /* splits "this" in two parts, at the first (leftmost) + encounter of delim: + 1st would stay in "this" + (which this way is truncated) + 2nd will go to the returned string + */ + GStr splitr(const char* delim); + GStr splitr(char c); + /* splits "this" in two parts, at the last (rightmost) + encounter of delim: + 1st would stay in "this" + 2nd will be returned + */ + + int peelInt() const; //extract an integer, (left to right), from a + //mixed alphanumeric string, e.g. 'T24HC1234b'=> 2 + int peelIntR() const; //same as above, but starts from the right side + //e.g. 'T2HC1234b'=> 1234 + GStr& trim(char c); + GStr& trim(const char* c=" \t\n\r"); //trim both ends of characters in given set + GStr& trimR(const char* c=" \t\n\r"); //trim only right end + GStr& trimR(char c=' '); + GStr& chomp(char c='\n') { return trimR(c); } + GStr& chomp(const char* cstr); //like trimR, but given string is taken as a whole + GStr& trimL(const char* c=" \t\n\r"); //trim only left end + GStr& trimL(char c=' '); + GStr& padR(int len, char c=' '); //align it in len spaces to the right + GStr& padL(int len, char c=' '); //align it in len spaces to the left + GStr& padC(int len, char c=' '); //center it + size_t read(FILE* stream, const char* delimiter="\n", size_t bufsize=4096); + //read next token from stream, using the given string as + //a marker where the block should stop + const char* chars() const; + const char* text() const; + protected: + char* fTokenDelimiter; + int fLastTokenStart; + enTokenizeMode fTokenizeMode; + void* readbuf; //file read buffer for the read() function + size_t readbufsize; //last setting for the readbuf + static void invalid_args_error(const char* fname); + static void invalid_index_error(const char* fname); + struct Data {//structure holding actual + //string data and reference count information + Data() { ref_count=0; length=0; chars[0] = '\0'; } + unsigned int ref_count; + int length; + char chars[1]; + }; + static Data* new_data(int length); //alloc a specified length string's Data + static Data* new_data(const char* str); //alloc a copy of a specified string + void replace_data(int length); + void replace_data(Data* data); + void make_unique(); + char* chrs(); // this is dangerous, length should not be affected + static Data null_data; //a null (empty) string Data is available here + Data* my_data; //pointer to a Data object holding actual string data +}; + +/***************************************************************************/ + +inline int GStr::length() const { + return my_data->length; + } + + +inline const char *GStr::chars() const { + return my_data->chars; + } + +inline char *GStr::chrs() { //protected version, allows modification of the chars + return my_data->chars; + } + +inline const char *GStr::text() const { + return my_data->chars; + } + + +inline bool operator>=(const char *s1, const GStr& s2) { + return (strcmp(s1, s2.chars()) >= 0); + } + +inline bool operator!=(const char *s1, const GStr& s2) { + return (strcmp(s1, s2.chars()) != 0); + } + +inline void swap(GStr& s1, GStr& s2) { + GStr::Data *tmp = s1.my_data; s1.my_data = s2.my_data; + s2.my_data = tmp; + } + + +#endif diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..88ded9b --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,229 @@ +#include $(top_srcdir)/build-aux/cufflinks.mk + +AM_CPPFLAGS = -I$(top_srcdir)/src + +EXTRA_DIST = \ + $(top_srcdir)/AUTHORS $(top_srcdir)/make_bin.sh + +AM_CXXFLAGS = + +bin_PROGRAMS = \ + cufflinks \ + cuffcompare \ + cuffdiff \ + gtf_to_sam \ + compress_gtf \ + gffread +# cuffcluster +# gtf_reads + +noinst_HEADERS = \ + update_check.h \ + biascorrection.h \ + codons.h \ + clustering.h \ + differential.h \ + GArgs.h \ + GBase.h \ + gdna.h \ + GFaSeqGet.h \ + GFastaIndex.h \ + gff.h \ + GHash.hh \ + GList.hh \ + GStr.h \ + assemble.h \ + bundles.h \ + filters.h \ + hits.h \ + tokenize.h \ + scaffolds.h \ + abundances.h \ + genes.h \ + transitive_closure.h \ + transitive_reduction.h \ + gff_utils.h \ + gtf_tracking.h \ + progressbar.h \ + bundles.h \ + scaffold_graph.h \ + matching_merge.h \ + graph_optimize.h \ + jensen_shannon.h \ + sampling.h \ + lemon/maps.h \ + lemon/bin_heap.h \ + lemon/fib_heap.h \ + lemon/bipartite_matching.h \ + lemon/topology.h \ + lemon/bucket_heap.h \ + lemon/smart_graph.h \ + lemon/list_graph.h \ + lemon/error.h \ + lemon/graph_utils.h \ + lemon/bfs.h \ + lemon/dfs.h \ + lemon/math.h \ + lemon/tolerance.h \ + lemon/graph_adaptor.h \ + lemon/concept_check.h \ + lemon/bits/default_map.h \ + lemon/bits/array_map.h \ + lemon/bits/vector_map.h \ + lemon/bits/debug_map.h \ + lemon/bits/map_extender.h \ + lemon/bits/base_extender.h \ + lemon/bits/graph_adaptor_extender.h \ + lemon/bits/graph_extender.h \ + lemon/bits/invalid.h \ + lemon/bits/path_dump.h \ + lemon/bits/traits.h \ + lemon/bits/utility.h \ + lemon/bits/alteration_notifier.h \ + lemon/bits/variant.h \ + lemon/concepts/graph.h \ + lemon/concepts/bpugraph.h \ + lemon/concepts/graph_components.h \ + lemon/concepts/ugraph.h \ + lemon/concepts/heap.h \ + lemon/concepts/maps.h \ + lemon/concepts/path.h \ + lemon/concepts/matrix_maps.h \ + locfit/design.h \ + locfit/lfcons.h \ + locfit/lfstruc.h \ + locfit/local.h \ + locfit/imatlb.h \ + locfit/lffuns.h \ + locfit/lfwin.h \ + locfit/mutil.h \ + locfit/vari.hpp \ + replicates.h \ + multireads.h \ + common.h + +noinst_LIBRARIES = libcufflinks.a libgc.a + +libcufflinks_a_SOURCES = \ + clustering.cpp \ + differential.cpp \ + common.cpp \ + assemble.cpp \ + tokenize.cpp \ + abundances.cpp \ + scaffolds.cpp \ + hits.cpp \ + genes.cpp \ + bundles.cpp \ + filters.cpp \ + scaffold_graph.cpp \ + matching_merge.cpp \ + graph_optimize.cpp \ + biascorrection.cpp \ + sampling.cpp \ + locfit/adap.c \ + locfit/ar_funs.c \ + locfit/arith.c \ + locfit/band.c \ + locfit/c_args.c \ + locfit/c_plot.c \ + locfit/cmd.c \ + locfit/dens_haz.c \ + locfit/dens_int.c \ + locfit/dens_odi.c \ + locfit/density.c \ + locfit/dist.c \ + locfit/ev_atree.c \ + locfit/ev_interp.c \ + locfit/ev_kdtre.c \ + locfit/ev_main.c \ + locfit/ev_trian.c \ + locfit/family.c \ + locfit/fitted.c \ + locfit/frend.c \ + locfit/help.c \ + locfit/lf_dercor.c \ + locfit/lf_fitfun.c \ + locfit/lf_robust.c \ + locfit/lf_vari.c \ + locfit/lfd.c \ + locfit/lfstr.c \ + locfit/linalg.c \ + locfit/locfit.c \ + locfit/m_chol.c \ + locfit/m_eigen.c \ + locfit/m_jacob.c \ + locfit/m_max.c \ + locfit/makecmd.c \ + locfit/math.c \ + locfit/minmax.c \ + locfit/nbhd.c \ + locfit/pcomp.c \ + locfit/pout.c \ + locfit/preplot.c \ + locfit/random.c \ + locfit/readfile.c \ + locfit/scb.c \ + locfit/scb_cons.c \ + locfit/simul.c \ + locfit/solve.c \ + locfit/startlf.c \ + locfit/strings.c \ + locfit/vari.cpp \ + locfit/wdiag.c \ + locfit/weight.c \ + replicates.cpp \ + multireads.cpp \ + jensen_shannon.cpp + +libgc_a_SOURCES = \ + codons.cpp \ + GArgs.cpp \ + GBase.cpp \ + gdna.cpp \ + GStr.cpp \ + GFaSeqGet.cpp \ + GFastaIndex.cpp \ + gff.cpp \ + gff_utils.cpp \ + gtf_tracking.cpp + +#-- scripts to be installed in $prefix/bin +dist_bin_SCRIPTS = \ + cuffmerge + +CLEANFILES = $(bin_SCRIPTS) + +#SUFFIXES = .py +#.py: +# (echo '#!$(PYTHON)'; sed '/^#!/d' $<) > $@ + +cufflinks_SOURCES = cufflinks.cpp +cufflinks_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +cufflinks_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) #$(ZLIB_LDFLAGS) + +cuffcompare_SOURCES = cuffcompare.cpp +cuffcompare_LDADD = libgc.a + +gffread_SOURCES = gffread.cpp +gffread_LDADD = libgc.a + +cuffdiff_SOURCES = cuffdiff.cpp +cuffdiff_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +cuffdiff_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) + +gtf_to_sam_SOURCES = gtf_to_sam.cpp +gtf_to_sam_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +gtf_to_sam_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) + +#cuffcluster_SOURCES = cuffcluster.cpp +#cuffcluster_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +#cuffcluster_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) + +compress_gtf_SOURCES = compress_gtf.cpp +compress_gtf_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +compress_gtf_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) + +#gtf_reads_SOURCES = gtf_reads.cpp +#gtf_reads_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +#gtf_reads_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) #$(ZLIB_LDFLAGS) diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..362d5c1 --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,1578 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +#include $(top_srcdir)/build-aux/cufflinks.mk + + + + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = cufflinks$(EXEEXT) cuffcompare$(EXEEXT) \ + cuffdiff$(EXEEXT) gtf_to_sam$(EXEEXT) compress_gtf$(EXEEXT) \ + gffread$(EXEEXT) +subdir = src +DIST_COMMON = $(dist_bin_SCRIPTS) $(noinst_HEADERS) \ + $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/ax_boost_base.m4 \ + $(top_srcdir)/ax_boost_thread.m4 $(top_srcdir)/ax_bam.m4 \ + $(top_srcdir)/ax_check_zlib.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +LIBRARIES = $(noinst_LIBRARIES) +AR = ar +ARFLAGS = cru +libcufflinks_a_AR = $(AR) $(ARFLAGS) +libcufflinks_a_LIBADD = +am_libcufflinks_a_OBJECTS = clustering.$(OBJEXT) \ + differential.$(OBJEXT) common.$(OBJEXT) assemble.$(OBJEXT) \ + tokenize.$(OBJEXT) abundances.$(OBJEXT) scaffolds.$(OBJEXT) \ + hits.$(OBJEXT) genes.$(OBJEXT) bundles.$(OBJEXT) \ + filters.$(OBJEXT) scaffold_graph.$(OBJEXT) \ + matching_merge.$(OBJEXT) graph_optimize.$(OBJEXT) \ + biascorrection.$(OBJEXT) sampling.$(OBJEXT) adap.$(OBJEXT) \ + ar_funs.$(OBJEXT) arith.$(OBJEXT) band.$(OBJEXT) \ + c_args.$(OBJEXT) c_plot.$(OBJEXT) cmd.$(OBJEXT) \ + dens_haz.$(OBJEXT) dens_int.$(OBJEXT) dens_odi.$(OBJEXT) \ + density.$(OBJEXT) dist.$(OBJEXT) ev_atree.$(OBJEXT) \ + ev_interp.$(OBJEXT) ev_kdtre.$(OBJEXT) ev_main.$(OBJEXT) \ + ev_trian.$(OBJEXT) family.$(OBJEXT) fitted.$(OBJEXT) \ + frend.$(OBJEXT) help.$(OBJEXT) lf_dercor.$(OBJEXT) \ + lf_fitfun.$(OBJEXT) lf_robust.$(OBJEXT) lf_vari.$(OBJEXT) \ + lfd.$(OBJEXT) lfstr.$(OBJEXT) linalg.$(OBJEXT) \ + locfit.$(OBJEXT) m_chol.$(OBJEXT) m_eigen.$(OBJEXT) \ + m_jacob.$(OBJEXT) m_max.$(OBJEXT) makecmd.$(OBJEXT) \ + math.$(OBJEXT) minmax.$(OBJEXT) nbhd.$(OBJEXT) pcomp.$(OBJEXT) \ + pout.$(OBJEXT) preplot.$(OBJEXT) random.$(OBJEXT) \ + readfile.$(OBJEXT) scb.$(OBJEXT) scb_cons.$(OBJEXT) \ + simul.$(OBJEXT) solve.$(OBJEXT) startlf.$(OBJEXT) \ + strings.$(OBJEXT) vari.$(OBJEXT) wdiag.$(OBJEXT) \ + weight.$(OBJEXT) replicates.$(OBJEXT) multireads.$(OBJEXT) \ + jensen_shannon.$(OBJEXT) +libcufflinks_a_OBJECTS = $(am_libcufflinks_a_OBJECTS) +libgc_a_AR = $(AR) $(ARFLAGS) +libgc_a_LIBADD = +am_libgc_a_OBJECTS = codons.$(OBJEXT) GArgs.$(OBJEXT) GBase.$(OBJEXT) \ + gdna.$(OBJEXT) GStr.$(OBJEXT) GFaSeqGet.$(OBJEXT) \ + GFastaIndex.$(OBJEXT) gff.$(OBJEXT) gff_utils.$(OBJEXT) \ + gtf_tracking.$(OBJEXT) +libgc_a_OBJECTS = $(am_libgc_a_OBJECTS) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +PROGRAMS = $(bin_PROGRAMS) +am_compress_gtf_OBJECTS = compress_gtf.$(OBJEXT) +compress_gtf_OBJECTS = $(am_compress_gtf_OBJECTS) +am__DEPENDENCIES_1 = +compress_gtf_DEPENDENCIES = libcufflinks.a libgc.a \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +am_cuffcompare_OBJECTS = cuffcompare.$(OBJEXT) +cuffcompare_OBJECTS = $(am_cuffcompare_OBJECTS) +cuffcompare_DEPENDENCIES = libgc.a +am_cuffdiff_OBJECTS = cuffdiff.$(OBJEXT) +cuffdiff_OBJECTS = $(am_cuffdiff_OBJECTS) +cuffdiff_DEPENDENCIES = libcufflinks.a libgc.a $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +am_cufflinks_OBJECTS = cufflinks.$(OBJEXT) +cufflinks_OBJECTS = $(am_cufflinks_OBJECTS) +cufflinks_DEPENDENCIES = libcufflinks.a libgc.a $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +am_gffread_OBJECTS = gffread.$(OBJEXT) +gffread_OBJECTS = $(am_gffread_OBJECTS) +gffread_DEPENDENCIES = libgc.a +am_gtf_to_sam_OBJECTS = gtf_to_sam.$(OBJEXT) +gtf_to_sam_OBJECTS = $(am_gtf_to_sam_OBJECTS) +gtf_to_sam_DEPENDENCIES = libcufflinks.a libgc.a $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +dist_binSCRIPT_INSTALL = $(INSTALL_SCRIPT) +SCRIPTS = $(dist_bin_SCRIPTS) +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +SOURCES = $(libcufflinks_a_SOURCES) $(libgc_a_SOURCES) \ + $(compress_gtf_SOURCES) $(cuffcompare_SOURCES) \ + $(cuffdiff_SOURCES) $(cufflinks_SOURCES) $(gffread_SOURCES) \ + $(gtf_to_sam_SOURCES) +DIST_SOURCES = $(libcufflinks_a_SOURCES) $(libgc_a_SOURCES) \ + $(compress_gtf_SOURCES) $(cuffcompare_SOURCES) \ + $(cuffdiff_SOURCES) $(cufflinks_SOURCES) $(gffread_SOURCES) \ + $(gtf_to_sam_SOURCES) +HEADERS = $(noinst_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BAM_CPPFLAGS = @BAM_CPPFLAGS@ +BAM_LDFLAGS = @BAM_LDFLAGS@ +BAM_LIB = @BAM_LIB@ +BOOST_CPPFLAGS = @BOOST_CPPFLAGS@ +BOOST_LDFLAGS = @BOOST_LDFLAGS@ +BOOST_THREAD_LIB = @BOOST_THREAD_LIB@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PYTHON = @PYTHON@ +PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@ +PYTHON_PLATFORM = @PYTHON_PLATFORM@ +PYTHON_PREFIX = @PYTHON_PREFIX@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +ZLIB = @ZLIB@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pkgpyexecdir = @pkgpyexecdir@ +pkgpythondir = @pkgpythondir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +pyexecdir = @pyexecdir@ +pythondir = @pythondir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +AM_CPPFLAGS = -I$(top_srcdir)/src +EXTRA_DIST = \ + $(top_srcdir)/AUTHORS $(top_srcdir)/make_bin.sh + +AM_CXXFLAGS = +# cuffcluster +# gtf_reads +noinst_HEADERS = \ + update_check.h \ + biascorrection.h \ + codons.h \ + clustering.h \ + differential.h \ + GArgs.h \ + GBase.h \ + gdna.h \ + GFaSeqGet.h \ + GFastaIndex.h \ + gff.h \ + GHash.hh \ + GList.hh \ + GStr.h \ + assemble.h \ + bundles.h \ + filters.h \ + hits.h \ + tokenize.h \ + scaffolds.h \ + abundances.h \ + genes.h \ + transitive_closure.h \ + transitive_reduction.h \ + gff_utils.h \ + gtf_tracking.h \ + progressbar.h \ + bundles.h \ + scaffold_graph.h \ + matching_merge.h \ + graph_optimize.h \ + jensen_shannon.h \ + sampling.h \ + lemon/maps.h \ + lemon/bin_heap.h \ + lemon/fib_heap.h \ + lemon/bipartite_matching.h \ + lemon/topology.h \ + lemon/bucket_heap.h \ + lemon/smart_graph.h \ + lemon/list_graph.h \ + lemon/error.h \ + lemon/graph_utils.h \ + lemon/bfs.h \ + lemon/dfs.h \ + lemon/math.h \ + lemon/tolerance.h \ + lemon/graph_adaptor.h \ + lemon/concept_check.h \ + lemon/bits/default_map.h \ + lemon/bits/array_map.h \ + lemon/bits/vector_map.h \ + lemon/bits/debug_map.h \ + lemon/bits/map_extender.h \ + lemon/bits/base_extender.h \ + lemon/bits/graph_adaptor_extender.h \ + lemon/bits/graph_extender.h \ + lemon/bits/invalid.h \ + lemon/bits/path_dump.h \ + lemon/bits/traits.h \ + lemon/bits/utility.h \ + lemon/bits/alteration_notifier.h \ + lemon/bits/variant.h \ + lemon/concepts/graph.h \ + lemon/concepts/bpugraph.h \ + lemon/concepts/graph_components.h \ + lemon/concepts/ugraph.h \ + lemon/concepts/heap.h \ + lemon/concepts/maps.h \ + lemon/concepts/path.h \ + lemon/concepts/matrix_maps.h \ + locfit/design.h \ + locfit/lfcons.h \ + locfit/lfstruc.h \ + locfit/local.h \ + locfit/imatlb.h \ + locfit/lffuns.h \ + locfit/lfwin.h \ + locfit/mutil.h \ + locfit/vari.hpp \ + replicates.h \ + multireads.h \ + common.h + +noinst_LIBRARIES = libcufflinks.a libgc.a +libcufflinks_a_SOURCES = \ + clustering.cpp \ + differential.cpp \ + common.cpp \ + assemble.cpp \ + tokenize.cpp \ + abundances.cpp \ + scaffolds.cpp \ + hits.cpp \ + genes.cpp \ + bundles.cpp \ + filters.cpp \ + scaffold_graph.cpp \ + matching_merge.cpp \ + graph_optimize.cpp \ + biascorrection.cpp \ + sampling.cpp \ + locfit/adap.c \ + locfit/ar_funs.c \ + locfit/arith.c \ + locfit/band.c \ + locfit/c_args.c \ + locfit/c_plot.c \ + locfit/cmd.c \ + locfit/dens_haz.c \ + locfit/dens_int.c \ + locfit/dens_odi.c \ + locfit/density.c \ + locfit/dist.c \ + locfit/ev_atree.c \ + locfit/ev_interp.c \ + locfit/ev_kdtre.c \ + locfit/ev_main.c \ + locfit/ev_trian.c \ + locfit/family.c \ + locfit/fitted.c \ + locfit/frend.c \ + locfit/help.c \ + locfit/lf_dercor.c \ + locfit/lf_fitfun.c \ + locfit/lf_robust.c \ + locfit/lf_vari.c \ + locfit/lfd.c \ + locfit/lfstr.c \ + locfit/linalg.c \ + locfit/locfit.c \ + locfit/m_chol.c \ + locfit/m_eigen.c \ + locfit/m_jacob.c \ + locfit/m_max.c \ + locfit/makecmd.c \ + locfit/math.c \ + locfit/minmax.c \ + locfit/nbhd.c \ + locfit/pcomp.c \ + locfit/pout.c \ + locfit/preplot.c \ + locfit/random.c \ + locfit/readfile.c \ + locfit/scb.c \ + locfit/scb_cons.c \ + locfit/simul.c \ + locfit/solve.c \ + locfit/startlf.c \ + locfit/strings.c \ + locfit/vari.cpp \ + locfit/wdiag.c \ + locfit/weight.c \ + replicates.cpp \ + multireads.cpp \ + jensen_shannon.cpp + +libgc_a_SOURCES = \ + codons.cpp \ + GArgs.cpp \ + GBase.cpp \ + gdna.cpp \ + GStr.cpp \ + GFaSeqGet.cpp \ + GFastaIndex.cpp \ + gff.cpp \ + gff_utils.cpp \ + gtf_tracking.cpp + + +#-- scripts to be installed in $prefix/bin +dist_bin_SCRIPTS = \ + cuffmerge + +CLEANFILES = $(bin_SCRIPTS) + +#SUFFIXES = .py +#.py: +# (echo '#!$(PYTHON)'; sed '/^#!/d' $<) > $@ +cufflinks_SOURCES = cufflinks.cpp +cufflinks_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +cufflinks_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) #$(ZLIB_LDFLAGS) +cuffcompare_SOURCES = cuffcompare.cpp +cuffcompare_LDADD = libgc.a +gffread_SOURCES = gffread.cpp +gffread_LDADD = libgc.a +cuffdiff_SOURCES = cuffdiff.cpp +cuffdiff_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +cuffdiff_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) +gtf_to_sam_SOURCES = gtf_to_sam.cpp +gtf_to_sam_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +gtf_to_sam_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) + +#cuffcluster_SOURCES = cuffcluster.cpp +#cuffcluster_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +#cuffcluster_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) +compress_gtf_SOURCES = compress_gtf.cpp +compress_gtf_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +compress_gtf_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cpp .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) +libcufflinks.a: $(libcufflinks_a_OBJECTS) $(libcufflinks_a_DEPENDENCIES) + -rm -f libcufflinks.a + $(libcufflinks_a_AR) libcufflinks.a $(libcufflinks_a_OBJECTS) $(libcufflinks_a_LIBADD) + $(RANLIB) libcufflinks.a +libgc.a: $(libgc_a_OBJECTS) $(libgc_a_DEPENDENCIES) + -rm -f libgc.a + $(libgc_a_AR) libgc.a $(libgc_a_OBJECTS) $(libgc_a_LIBADD) + $(RANLIB) libgc.a +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) +compress_gtf$(EXEEXT): $(compress_gtf_OBJECTS) $(compress_gtf_DEPENDENCIES) + @rm -f compress_gtf$(EXEEXT) + $(CXXLINK) $(compress_gtf_LDFLAGS) $(compress_gtf_OBJECTS) $(compress_gtf_LDADD) $(LIBS) +cuffcompare$(EXEEXT): $(cuffcompare_OBJECTS) $(cuffcompare_DEPENDENCIES) + @rm -f cuffcompare$(EXEEXT) + $(CXXLINK) $(cuffcompare_LDFLAGS) $(cuffcompare_OBJECTS) $(cuffcompare_LDADD) $(LIBS) +cuffdiff$(EXEEXT): $(cuffdiff_OBJECTS) $(cuffdiff_DEPENDENCIES) + @rm -f cuffdiff$(EXEEXT) + $(CXXLINK) $(cuffdiff_LDFLAGS) $(cuffdiff_OBJECTS) $(cuffdiff_LDADD) $(LIBS) +cufflinks$(EXEEXT): $(cufflinks_OBJECTS) $(cufflinks_DEPENDENCIES) + @rm -f cufflinks$(EXEEXT) + $(CXXLINK) $(cufflinks_LDFLAGS) $(cufflinks_OBJECTS) $(cufflinks_LDADD) $(LIBS) +gffread$(EXEEXT): $(gffread_OBJECTS) $(gffread_DEPENDENCIES) + @rm -f gffread$(EXEEXT) + $(CXXLINK) $(gffread_LDFLAGS) $(gffread_OBJECTS) $(gffread_LDADD) $(LIBS) +gtf_to_sam$(EXEEXT): $(gtf_to_sam_OBJECTS) $(gtf_to_sam_DEPENDENCIES) + @rm -f gtf_to_sam$(EXEEXT) + $(CXXLINK) $(gtf_to_sam_LDFLAGS) $(gtf_to_sam_OBJECTS) $(gtf_to_sam_LDADD) $(LIBS) +install-dist_binSCRIPTS: $(dist_bin_SCRIPTS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" + @list='$(dist_bin_SCRIPTS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f $$d$$p; then \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " $(dist_binSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(dist_binSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(bindir)/$$f"; \ + else :; fi; \ + done + +uninstall-dist_binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(dist_bin_SCRIPTS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/GArgs.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/GBase.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/GFaSeqGet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/GFastaIndex.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/GStr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/abundances.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ar_funs.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arith.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assemble.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/band.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/biascorrection.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bundles.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/c_args.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/c_plot.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clustering.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codons.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/common.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compress_gtf.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cuffcompare.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cuffdiff.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cufflinks.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dens_haz.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dens_int.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dens_odi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/density.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/differential.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ev_atree.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ev_interp.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ev_kdtre.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ev_main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ev_trian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/family.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filters.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fitted.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/frend.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gdna.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genes.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gff.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gff_utils.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gffread.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/graph_optimize.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gtf_to_sam.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gtf_tracking.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/help.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hits.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jensen_shannon.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lf_dercor.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lf_fitfun.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lf_robust.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lf_vari.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lfd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lfstr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linalg.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/locfit.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/m_chol.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/m_eigen.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/m_jacob.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/m_max.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/makecmd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matching_merge.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/math.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minmax.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multireads.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nbhd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pcomp.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pout.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/preplot.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/random.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readfile.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/replicates.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sampling.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaffold_graph.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaffolds.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scb_cons.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simul.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solve.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/startlf.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strings.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tokenize.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vari.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wdiag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/weight.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +adap.o: locfit/adap.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT adap.o -MD -MP -MF "$(DEPDIR)/adap.Tpo" -c -o adap.o `test -f 'locfit/adap.c' || echo '$(srcdir)/'`locfit/adap.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/adap.Tpo" "$(DEPDIR)/adap.Po"; else rm -f "$(DEPDIR)/adap.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/adap.c' object='adap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o adap.o `test -f 'locfit/adap.c' || echo '$(srcdir)/'`locfit/adap.c + +adap.obj: locfit/adap.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT adap.obj -MD -MP -MF "$(DEPDIR)/adap.Tpo" -c -o adap.obj `if test -f 'locfit/adap.c'; then $(CYGPATH_W) 'locfit/adap.c'; else $(CYGPATH_W) '$(srcdir)/locfit/adap.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/adap.Tpo" "$(DEPDIR)/adap.Po"; else rm -f "$(DEPDIR)/adap.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/adap.c' object='adap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o adap.obj `if test -f 'locfit/adap.c'; then $(CYGPATH_W) 'locfit/adap.c'; else $(CYGPATH_W) '$(srcdir)/locfit/adap.c'; fi` + +ar_funs.o: locfit/ar_funs.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ar_funs.o -MD -MP -MF "$(DEPDIR)/ar_funs.Tpo" -c -o ar_funs.o `test -f 'locfit/ar_funs.c' || echo '$(srcdir)/'`locfit/ar_funs.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ar_funs.Tpo" "$(DEPDIR)/ar_funs.Po"; else rm -f "$(DEPDIR)/ar_funs.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ar_funs.c' object='ar_funs.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ar_funs.o `test -f 'locfit/ar_funs.c' || echo '$(srcdir)/'`locfit/ar_funs.c + +ar_funs.obj: locfit/ar_funs.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ar_funs.obj -MD -MP -MF "$(DEPDIR)/ar_funs.Tpo" -c -o ar_funs.obj `if test -f 'locfit/ar_funs.c'; then $(CYGPATH_W) 'locfit/ar_funs.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ar_funs.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ar_funs.Tpo" "$(DEPDIR)/ar_funs.Po"; else rm -f "$(DEPDIR)/ar_funs.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ar_funs.c' object='ar_funs.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ar_funs.obj `if test -f 'locfit/ar_funs.c'; then $(CYGPATH_W) 'locfit/ar_funs.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ar_funs.c'; fi` + +arith.o: locfit/arith.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT arith.o -MD -MP -MF "$(DEPDIR)/arith.Tpo" -c -o arith.o `test -f 'locfit/arith.c' || echo '$(srcdir)/'`locfit/arith.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/arith.Tpo" "$(DEPDIR)/arith.Po"; else rm -f "$(DEPDIR)/arith.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/arith.c' object='arith.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o arith.o `test -f 'locfit/arith.c' || echo '$(srcdir)/'`locfit/arith.c + +arith.obj: locfit/arith.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT arith.obj -MD -MP -MF "$(DEPDIR)/arith.Tpo" -c -o arith.obj `if test -f 'locfit/arith.c'; then $(CYGPATH_W) 'locfit/arith.c'; else $(CYGPATH_W) '$(srcdir)/locfit/arith.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/arith.Tpo" "$(DEPDIR)/arith.Po"; else rm -f "$(DEPDIR)/arith.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/arith.c' object='arith.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o arith.obj `if test -f 'locfit/arith.c'; then $(CYGPATH_W) 'locfit/arith.c'; else $(CYGPATH_W) '$(srcdir)/locfit/arith.c'; fi` + +band.o: locfit/band.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT band.o -MD -MP -MF "$(DEPDIR)/band.Tpo" -c -o band.o `test -f 'locfit/band.c' || echo '$(srcdir)/'`locfit/band.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/band.Tpo" "$(DEPDIR)/band.Po"; else rm -f "$(DEPDIR)/band.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/band.c' object='band.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o band.o `test -f 'locfit/band.c' || echo '$(srcdir)/'`locfit/band.c + +band.obj: locfit/band.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT band.obj -MD -MP -MF "$(DEPDIR)/band.Tpo" -c -o band.obj `if test -f 'locfit/band.c'; then $(CYGPATH_W) 'locfit/band.c'; else $(CYGPATH_W) '$(srcdir)/locfit/band.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/band.Tpo" "$(DEPDIR)/band.Po"; else rm -f "$(DEPDIR)/band.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/band.c' object='band.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o band.obj `if test -f 'locfit/band.c'; then $(CYGPATH_W) 'locfit/band.c'; else $(CYGPATH_W) '$(srcdir)/locfit/band.c'; fi` + +c_args.o: locfit/c_args.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT c_args.o -MD -MP -MF "$(DEPDIR)/c_args.Tpo" -c -o c_args.o `test -f 'locfit/c_args.c' || echo '$(srcdir)/'`locfit/c_args.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/c_args.Tpo" "$(DEPDIR)/c_args.Po"; else rm -f "$(DEPDIR)/c_args.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/c_args.c' object='c_args.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o c_args.o `test -f 'locfit/c_args.c' || echo '$(srcdir)/'`locfit/c_args.c + +c_args.obj: locfit/c_args.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT c_args.obj -MD -MP -MF "$(DEPDIR)/c_args.Tpo" -c -o c_args.obj `if test -f 'locfit/c_args.c'; then $(CYGPATH_W) 'locfit/c_args.c'; else $(CYGPATH_W) '$(srcdir)/locfit/c_args.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/c_args.Tpo" "$(DEPDIR)/c_args.Po"; else rm -f "$(DEPDIR)/c_args.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/c_args.c' object='c_args.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o c_args.obj `if test -f 'locfit/c_args.c'; then $(CYGPATH_W) 'locfit/c_args.c'; else $(CYGPATH_W) '$(srcdir)/locfit/c_args.c'; fi` + +c_plot.o: locfit/c_plot.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT c_plot.o -MD -MP -MF "$(DEPDIR)/c_plot.Tpo" -c -o c_plot.o `test -f 'locfit/c_plot.c' || echo '$(srcdir)/'`locfit/c_plot.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/c_plot.Tpo" "$(DEPDIR)/c_plot.Po"; else rm -f "$(DEPDIR)/c_plot.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/c_plot.c' object='c_plot.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o c_plot.o `test -f 'locfit/c_plot.c' || echo '$(srcdir)/'`locfit/c_plot.c + +c_plot.obj: locfit/c_plot.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT c_plot.obj -MD -MP -MF "$(DEPDIR)/c_plot.Tpo" -c -o c_plot.obj `if test -f 'locfit/c_plot.c'; then $(CYGPATH_W) 'locfit/c_plot.c'; else $(CYGPATH_W) '$(srcdir)/locfit/c_plot.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/c_plot.Tpo" "$(DEPDIR)/c_plot.Po"; else rm -f "$(DEPDIR)/c_plot.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/c_plot.c' object='c_plot.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o c_plot.obj `if test -f 'locfit/c_plot.c'; then $(CYGPATH_W) 'locfit/c_plot.c'; else $(CYGPATH_W) '$(srcdir)/locfit/c_plot.c'; fi` + +cmd.o: locfit/cmd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cmd.o -MD -MP -MF "$(DEPDIR)/cmd.Tpo" -c -o cmd.o `test -f 'locfit/cmd.c' || echo '$(srcdir)/'`locfit/cmd.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/cmd.Tpo" "$(DEPDIR)/cmd.Po"; else rm -f "$(DEPDIR)/cmd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/cmd.c' object='cmd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmd.o `test -f 'locfit/cmd.c' || echo '$(srcdir)/'`locfit/cmd.c + +cmd.obj: locfit/cmd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cmd.obj -MD -MP -MF "$(DEPDIR)/cmd.Tpo" -c -o cmd.obj `if test -f 'locfit/cmd.c'; then $(CYGPATH_W) 'locfit/cmd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/cmd.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/cmd.Tpo" "$(DEPDIR)/cmd.Po"; else rm -f "$(DEPDIR)/cmd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/cmd.c' object='cmd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmd.obj `if test -f 'locfit/cmd.c'; then $(CYGPATH_W) 'locfit/cmd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/cmd.c'; fi` + +dens_haz.o: locfit/dens_haz.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_haz.o -MD -MP -MF "$(DEPDIR)/dens_haz.Tpo" -c -o dens_haz.o `test -f 'locfit/dens_haz.c' || echo '$(srcdir)/'`locfit/dens_haz.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_haz.Tpo" "$(DEPDIR)/dens_haz.Po"; else rm -f "$(DEPDIR)/dens_haz.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_haz.c' object='dens_haz.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_haz.o `test -f 'locfit/dens_haz.c' || echo '$(srcdir)/'`locfit/dens_haz.c + +dens_haz.obj: locfit/dens_haz.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_haz.obj -MD -MP -MF "$(DEPDIR)/dens_haz.Tpo" -c -o dens_haz.obj `if test -f 'locfit/dens_haz.c'; then $(CYGPATH_W) 'locfit/dens_haz.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_haz.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_haz.Tpo" "$(DEPDIR)/dens_haz.Po"; else rm -f "$(DEPDIR)/dens_haz.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_haz.c' object='dens_haz.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_haz.obj `if test -f 'locfit/dens_haz.c'; then $(CYGPATH_W) 'locfit/dens_haz.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_haz.c'; fi` + +dens_int.o: locfit/dens_int.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_int.o -MD -MP -MF "$(DEPDIR)/dens_int.Tpo" -c -o dens_int.o `test -f 'locfit/dens_int.c' || echo '$(srcdir)/'`locfit/dens_int.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_int.Tpo" "$(DEPDIR)/dens_int.Po"; else rm -f "$(DEPDIR)/dens_int.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_int.c' object='dens_int.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_int.o `test -f 'locfit/dens_int.c' || echo '$(srcdir)/'`locfit/dens_int.c + +dens_int.obj: locfit/dens_int.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_int.obj -MD -MP -MF "$(DEPDIR)/dens_int.Tpo" -c -o dens_int.obj `if test -f 'locfit/dens_int.c'; then $(CYGPATH_W) 'locfit/dens_int.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_int.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_int.Tpo" "$(DEPDIR)/dens_int.Po"; else rm -f "$(DEPDIR)/dens_int.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_int.c' object='dens_int.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_int.obj `if test -f 'locfit/dens_int.c'; then $(CYGPATH_W) 'locfit/dens_int.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_int.c'; fi` + +dens_odi.o: locfit/dens_odi.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_odi.o -MD -MP -MF "$(DEPDIR)/dens_odi.Tpo" -c -o dens_odi.o `test -f 'locfit/dens_odi.c' || echo '$(srcdir)/'`locfit/dens_odi.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_odi.Tpo" "$(DEPDIR)/dens_odi.Po"; else rm -f "$(DEPDIR)/dens_odi.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_odi.c' object='dens_odi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_odi.o `test -f 'locfit/dens_odi.c' || echo '$(srcdir)/'`locfit/dens_odi.c + +dens_odi.obj: locfit/dens_odi.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dens_odi.obj -MD -MP -MF "$(DEPDIR)/dens_odi.Tpo" -c -o dens_odi.obj `if test -f 'locfit/dens_odi.c'; then $(CYGPATH_W) 'locfit/dens_odi.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_odi.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dens_odi.Tpo" "$(DEPDIR)/dens_odi.Po"; else rm -f "$(DEPDIR)/dens_odi.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dens_odi.c' object='dens_odi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dens_odi.obj `if test -f 'locfit/dens_odi.c'; then $(CYGPATH_W) 'locfit/dens_odi.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dens_odi.c'; fi` + +density.o: locfit/density.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT density.o -MD -MP -MF "$(DEPDIR)/density.Tpo" -c -o density.o `test -f 'locfit/density.c' || echo '$(srcdir)/'`locfit/density.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/density.Tpo" "$(DEPDIR)/density.Po"; else rm -f "$(DEPDIR)/density.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/density.c' object='density.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o density.o `test -f 'locfit/density.c' || echo '$(srcdir)/'`locfit/density.c + +density.obj: locfit/density.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT density.obj -MD -MP -MF "$(DEPDIR)/density.Tpo" -c -o density.obj `if test -f 'locfit/density.c'; then $(CYGPATH_W) 'locfit/density.c'; else $(CYGPATH_W) '$(srcdir)/locfit/density.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/density.Tpo" "$(DEPDIR)/density.Po"; else rm -f "$(DEPDIR)/density.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/density.c' object='density.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o density.obj `if test -f 'locfit/density.c'; then $(CYGPATH_W) 'locfit/density.c'; else $(CYGPATH_W) '$(srcdir)/locfit/density.c'; fi` + +dist.o: locfit/dist.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dist.o -MD -MP -MF "$(DEPDIR)/dist.Tpo" -c -o dist.o `test -f 'locfit/dist.c' || echo '$(srcdir)/'`locfit/dist.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dist.Tpo" "$(DEPDIR)/dist.Po"; else rm -f "$(DEPDIR)/dist.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dist.c' object='dist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dist.o `test -f 'locfit/dist.c' || echo '$(srcdir)/'`locfit/dist.c + +dist.obj: locfit/dist.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT dist.obj -MD -MP -MF "$(DEPDIR)/dist.Tpo" -c -o dist.obj `if test -f 'locfit/dist.c'; then $(CYGPATH_W) 'locfit/dist.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dist.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/dist.Tpo" "$(DEPDIR)/dist.Po"; else rm -f "$(DEPDIR)/dist.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/dist.c' object='dist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o dist.obj `if test -f 'locfit/dist.c'; then $(CYGPATH_W) 'locfit/dist.c'; else $(CYGPATH_W) '$(srcdir)/locfit/dist.c'; fi` + +ev_atree.o: locfit/ev_atree.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_atree.o -MD -MP -MF "$(DEPDIR)/ev_atree.Tpo" -c -o ev_atree.o `test -f 'locfit/ev_atree.c' || echo '$(srcdir)/'`locfit/ev_atree.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_atree.Tpo" "$(DEPDIR)/ev_atree.Po"; else rm -f "$(DEPDIR)/ev_atree.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_atree.c' object='ev_atree.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_atree.o `test -f 'locfit/ev_atree.c' || echo '$(srcdir)/'`locfit/ev_atree.c + +ev_atree.obj: locfit/ev_atree.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_atree.obj -MD -MP -MF "$(DEPDIR)/ev_atree.Tpo" -c -o ev_atree.obj `if test -f 'locfit/ev_atree.c'; then $(CYGPATH_W) 'locfit/ev_atree.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_atree.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_atree.Tpo" "$(DEPDIR)/ev_atree.Po"; else rm -f "$(DEPDIR)/ev_atree.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_atree.c' object='ev_atree.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_atree.obj `if test -f 'locfit/ev_atree.c'; then $(CYGPATH_W) 'locfit/ev_atree.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_atree.c'; fi` + +ev_interp.o: locfit/ev_interp.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_interp.o -MD -MP -MF "$(DEPDIR)/ev_interp.Tpo" -c -o ev_interp.o `test -f 'locfit/ev_interp.c' || echo '$(srcdir)/'`locfit/ev_interp.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_interp.Tpo" "$(DEPDIR)/ev_interp.Po"; else rm -f "$(DEPDIR)/ev_interp.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_interp.c' object='ev_interp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_interp.o `test -f 'locfit/ev_interp.c' || echo '$(srcdir)/'`locfit/ev_interp.c + +ev_interp.obj: locfit/ev_interp.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_interp.obj -MD -MP -MF "$(DEPDIR)/ev_interp.Tpo" -c -o ev_interp.obj `if test -f 'locfit/ev_interp.c'; then $(CYGPATH_W) 'locfit/ev_interp.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_interp.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_interp.Tpo" "$(DEPDIR)/ev_interp.Po"; else rm -f "$(DEPDIR)/ev_interp.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_interp.c' object='ev_interp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_interp.obj `if test -f 'locfit/ev_interp.c'; then $(CYGPATH_W) 'locfit/ev_interp.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_interp.c'; fi` + +ev_kdtre.o: locfit/ev_kdtre.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_kdtre.o -MD -MP -MF "$(DEPDIR)/ev_kdtre.Tpo" -c -o ev_kdtre.o `test -f 'locfit/ev_kdtre.c' || echo '$(srcdir)/'`locfit/ev_kdtre.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_kdtre.Tpo" "$(DEPDIR)/ev_kdtre.Po"; else rm -f "$(DEPDIR)/ev_kdtre.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_kdtre.c' object='ev_kdtre.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_kdtre.o `test -f 'locfit/ev_kdtre.c' || echo '$(srcdir)/'`locfit/ev_kdtre.c + +ev_kdtre.obj: locfit/ev_kdtre.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_kdtre.obj -MD -MP -MF "$(DEPDIR)/ev_kdtre.Tpo" -c -o ev_kdtre.obj `if test -f 'locfit/ev_kdtre.c'; then $(CYGPATH_W) 'locfit/ev_kdtre.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_kdtre.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_kdtre.Tpo" "$(DEPDIR)/ev_kdtre.Po"; else rm -f "$(DEPDIR)/ev_kdtre.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_kdtre.c' object='ev_kdtre.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_kdtre.obj `if test -f 'locfit/ev_kdtre.c'; then $(CYGPATH_W) 'locfit/ev_kdtre.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_kdtre.c'; fi` + +ev_main.o: locfit/ev_main.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_main.o -MD -MP -MF "$(DEPDIR)/ev_main.Tpo" -c -o ev_main.o `test -f 'locfit/ev_main.c' || echo '$(srcdir)/'`locfit/ev_main.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_main.Tpo" "$(DEPDIR)/ev_main.Po"; else rm -f "$(DEPDIR)/ev_main.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_main.c' object='ev_main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_main.o `test -f 'locfit/ev_main.c' || echo '$(srcdir)/'`locfit/ev_main.c + +ev_main.obj: locfit/ev_main.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_main.obj -MD -MP -MF "$(DEPDIR)/ev_main.Tpo" -c -o ev_main.obj `if test -f 'locfit/ev_main.c'; then $(CYGPATH_W) 'locfit/ev_main.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_main.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_main.Tpo" "$(DEPDIR)/ev_main.Po"; else rm -f "$(DEPDIR)/ev_main.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_main.c' object='ev_main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_main.obj `if test -f 'locfit/ev_main.c'; then $(CYGPATH_W) 'locfit/ev_main.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_main.c'; fi` + +ev_trian.o: locfit/ev_trian.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_trian.o -MD -MP -MF "$(DEPDIR)/ev_trian.Tpo" -c -o ev_trian.o `test -f 'locfit/ev_trian.c' || echo '$(srcdir)/'`locfit/ev_trian.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_trian.Tpo" "$(DEPDIR)/ev_trian.Po"; else rm -f "$(DEPDIR)/ev_trian.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_trian.c' object='ev_trian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_trian.o `test -f 'locfit/ev_trian.c' || echo '$(srcdir)/'`locfit/ev_trian.c + +ev_trian.obj: locfit/ev_trian.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ev_trian.obj -MD -MP -MF "$(DEPDIR)/ev_trian.Tpo" -c -o ev_trian.obj `if test -f 'locfit/ev_trian.c'; then $(CYGPATH_W) 'locfit/ev_trian.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_trian.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/ev_trian.Tpo" "$(DEPDIR)/ev_trian.Po"; else rm -f "$(DEPDIR)/ev_trian.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/ev_trian.c' object='ev_trian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ev_trian.obj `if test -f 'locfit/ev_trian.c'; then $(CYGPATH_W) 'locfit/ev_trian.c'; else $(CYGPATH_W) '$(srcdir)/locfit/ev_trian.c'; fi` + +family.o: locfit/family.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT family.o -MD -MP -MF "$(DEPDIR)/family.Tpo" -c -o family.o `test -f 'locfit/family.c' || echo '$(srcdir)/'`locfit/family.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/family.Tpo" "$(DEPDIR)/family.Po"; else rm -f "$(DEPDIR)/family.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/family.c' object='family.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o family.o `test -f 'locfit/family.c' || echo '$(srcdir)/'`locfit/family.c + +family.obj: locfit/family.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT family.obj -MD -MP -MF "$(DEPDIR)/family.Tpo" -c -o family.obj `if test -f 'locfit/family.c'; then $(CYGPATH_W) 'locfit/family.c'; else $(CYGPATH_W) '$(srcdir)/locfit/family.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/family.Tpo" "$(DEPDIR)/family.Po"; else rm -f "$(DEPDIR)/family.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/family.c' object='family.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o family.obj `if test -f 'locfit/family.c'; then $(CYGPATH_W) 'locfit/family.c'; else $(CYGPATH_W) '$(srcdir)/locfit/family.c'; fi` + +fitted.o: locfit/fitted.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT fitted.o -MD -MP -MF "$(DEPDIR)/fitted.Tpo" -c -o fitted.o `test -f 'locfit/fitted.c' || echo '$(srcdir)/'`locfit/fitted.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/fitted.Tpo" "$(DEPDIR)/fitted.Po"; else rm -f "$(DEPDIR)/fitted.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/fitted.c' object='fitted.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o fitted.o `test -f 'locfit/fitted.c' || echo '$(srcdir)/'`locfit/fitted.c + +fitted.obj: locfit/fitted.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT fitted.obj -MD -MP -MF "$(DEPDIR)/fitted.Tpo" -c -o fitted.obj `if test -f 'locfit/fitted.c'; then $(CYGPATH_W) 'locfit/fitted.c'; else $(CYGPATH_W) '$(srcdir)/locfit/fitted.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/fitted.Tpo" "$(DEPDIR)/fitted.Po"; else rm -f "$(DEPDIR)/fitted.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/fitted.c' object='fitted.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o fitted.obj `if test -f 'locfit/fitted.c'; then $(CYGPATH_W) 'locfit/fitted.c'; else $(CYGPATH_W) '$(srcdir)/locfit/fitted.c'; fi` + +frend.o: locfit/frend.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT frend.o -MD -MP -MF "$(DEPDIR)/frend.Tpo" -c -o frend.o `test -f 'locfit/frend.c' || echo '$(srcdir)/'`locfit/frend.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/frend.Tpo" "$(DEPDIR)/frend.Po"; else rm -f "$(DEPDIR)/frend.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/frend.c' object='frend.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o frend.o `test -f 'locfit/frend.c' || echo '$(srcdir)/'`locfit/frend.c + +frend.obj: locfit/frend.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT frend.obj -MD -MP -MF "$(DEPDIR)/frend.Tpo" -c -o frend.obj `if test -f 'locfit/frend.c'; then $(CYGPATH_W) 'locfit/frend.c'; else $(CYGPATH_W) '$(srcdir)/locfit/frend.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/frend.Tpo" "$(DEPDIR)/frend.Po"; else rm -f "$(DEPDIR)/frend.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/frend.c' object='frend.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o frend.obj `if test -f 'locfit/frend.c'; then $(CYGPATH_W) 'locfit/frend.c'; else $(CYGPATH_W) '$(srcdir)/locfit/frend.c'; fi` + +help.o: locfit/help.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT help.o -MD -MP -MF "$(DEPDIR)/help.Tpo" -c -o help.o `test -f 'locfit/help.c' || echo '$(srcdir)/'`locfit/help.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/help.Tpo" "$(DEPDIR)/help.Po"; else rm -f "$(DEPDIR)/help.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/help.c' object='help.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o help.o `test -f 'locfit/help.c' || echo '$(srcdir)/'`locfit/help.c + +help.obj: locfit/help.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT help.obj -MD -MP -MF "$(DEPDIR)/help.Tpo" -c -o help.obj `if test -f 'locfit/help.c'; then $(CYGPATH_W) 'locfit/help.c'; else $(CYGPATH_W) '$(srcdir)/locfit/help.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/help.Tpo" "$(DEPDIR)/help.Po"; else rm -f "$(DEPDIR)/help.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/help.c' object='help.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o help.obj `if test -f 'locfit/help.c'; then $(CYGPATH_W) 'locfit/help.c'; else $(CYGPATH_W) '$(srcdir)/locfit/help.c'; fi` + +lf_dercor.o: locfit/lf_dercor.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_dercor.o -MD -MP -MF "$(DEPDIR)/lf_dercor.Tpo" -c -o lf_dercor.o `test -f 'locfit/lf_dercor.c' || echo '$(srcdir)/'`locfit/lf_dercor.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_dercor.Tpo" "$(DEPDIR)/lf_dercor.Po"; else rm -f "$(DEPDIR)/lf_dercor.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_dercor.c' object='lf_dercor.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_dercor.o `test -f 'locfit/lf_dercor.c' || echo '$(srcdir)/'`locfit/lf_dercor.c + +lf_dercor.obj: locfit/lf_dercor.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_dercor.obj -MD -MP -MF "$(DEPDIR)/lf_dercor.Tpo" -c -o lf_dercor.obj `if test -f 'locfit/lf_dercor.c'; then $(CYGPATH_W) 'locfit/lf_dercor.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_dercor.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_dercor.Tpo" "$(DEPDIR)/lf_dercor.Po"; else rm -f "$(DEPDIR)/lf_dercor.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_dercor.c' object='lf_dercor.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_dercor.obj `if test -f 'locfit/lf_dercor.c'; then $(CYGPATH_W) 'locfit/lf_dercor.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_dercor.c'; fi` + +lf_fitfun.o: locfit/lf_fitfun.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_fitfun.o -MD -MP -MF "$(DEPDIR)/lf_fitfun.Tpo" -c -o lf_fitfun.o `test -f 'locfit/lf_fitfun.c' || echo '$(srcdir)/'`locfit/lf_fitfun.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_fitfun.Tpo" "$(DEPDIR)/lf_fitfun.Po"; else rm -f "$(DEPDIR)/lf_fitfun.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_fitfun.c' object='lf_fitfun.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_fitfun.o `test -f 'locfit/lf_fitfun.c' || echo '$(srcdir)/'`locfit/lf_fitfun.c + +lf_fitfun.obj: locfit/lf_fitfun.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_fitfun.obj -MD -MP -MF "$(DEPDIR)/lf_fitfun.Tpo" -c -o lf_fitfun.obj `if test -f 'locfit/lf_fitfun.c'; then $(CYGPATH_W) 'locfit/lf_fitfun.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_fitfun.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_fitfun.Tpo" "$(DEPDIR)/lf_fitfun.Po"; else rm -f "$(DEPDIR)/lf_fitfun.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_fitfun.c' object='lf_fitfun.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_fitfun.obj `if test -f 'locfit/lf_fitfun.c'; then $(CYGPATH_W) 'locfit/lf_fitfun.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_fitfun.c'; fi` + +lf_robust.o: locfit/lf_robust.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_robust.o -MD -MP -MF "$(DEPDIR)/lf_robust.Tpo" -c -o lf_robust.o `test -f 'locfit/lf_robust.c' || echo '$(srcdir)/'`locfit/lf_robust.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_robust.Tpo" "$(DEPDIR)/lf_robust.Po"; else rm -f "$(DEPDIR)/lf_robust.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_robust.c' object='lf_robust.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_robust.o `test -f 'locfit/lf_robust.c' || echo '$(srcdir)/'`locfit/lf_robust.c + +lf_robust.obj: locfit/lf_robust.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_robust.obj -MD -MP -MF "$(DEPDIR)/lf_robust.Tpo" -c -o lf_robust.obj `if test -f 'locfit/lf_robust.c'; then $(CYGPATH_W) 'locfit/lf_robust.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_robust.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_robust.Tpo" "$(DEPDIR)/lf_robust.Po"; else rm -f "$(DEPDIR)/lf_robust.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_robust.c' object='lf_robust.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_robust.obj `if test -f 'locfit/lf_robust.c'; then $(CYGPATH_W) 'locfit/lf_robust.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_robust.c'; fi` + +lf_vari.o: locfit/lf_vari.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_vari.o -MD -MP -MF "$(DEPDIR)/lf_vari.Tpo" -c -o lf_vari.o `test -f 'locfit/lf_vari.c' || echo '$(srcdir)/'`locfit/lf_vari.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_vari.Tpo" "$(DEPDIR)/lf_vari.Po"; else rm -f "$(DEPDIR)/lf_vari.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_vari.c' object='lf_vari.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_vari.o `test -f 'locfit/lf_vari.c' || echo '$(srcdir)/'`locfit/lf_vari.c + +lf_vari.obj: locfit/lf_vari.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lf_vari.obj -MD -MP -MF "$(DEPDIR)/lf_vari.Tpo" -c -o lf_vari.obj `if test -f 'locfit/lf_vari.c'; then $(CYGPATH_W) 'locfit/lf_vari.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_vari.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lf_vari.Tpo" "$(DEPDIR)/lf_vari.Po"; else rm -f "$(DEPDIR)/lf_vari.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lf_vari.c' object='lf_vari.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lf_vari.obj `if test -f 'locfit/lf_vari.c'; then $(CYGPATH_W) 'locfit/lf_vari.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lf_vari.c'; fi` + +lfd.o: locfit/lfd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lfd.o -MD -MP -MF "$(DEPDIR)/lfd.Tpo" -c -o lfd.o `test -f 'locfit/lfd.c' || echo '$(srcdir)/'`locfit/lfd.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lfd.Tpo" "$(DEPDIR)/lfd.Po"; else rm -f "$(DEPDIR)/lfd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lfd.c' object='lfd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lfd.o `test -f 'locfit/lfd.c' || echo '$(srcdir)/'`locfit/lfd.c + +lfd.obj: locfit/lfd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lfd.obj -MD -MP -MF "$(DEPDIR)/lfd.Tpo" -c -o lfd.obj `if test -f 'locfit/lfd.c'; then $(CYGPATH_W) 'locfit/lfd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lfd.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lfd.Tpo" "$(DEPDIR)/lfd.Po"; else rm -f "$(DEPDIR)/lfd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lfd.c' object='lfd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lfd.obj `if test -f 'locfit/lfd.c'; then $(CYGPATH_W) 'locfit/lfd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lfd.c'; fi` + +lfstr.o: locfit/lfstr.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lfstr.o -MD -MP -MF "$(DEPDIR)/lfstr.Tpo" -c -o lfstr.o `test -f 'locfit/lfstr.c' || echo '$(srcdir)/'`locfit/lfstr.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lfstr.Tpo" "$(DEPDIR)/lfstr.Po"; else rm -f "$(DEPDIR)/lfstr.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lfstr.c' object='lfstr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lfstr.o `test -f 'locfit/lfstr.c' || echo '$(srcdir)/'`locfit/lfstr.c + +lfstr.obj: locfit/lfstr.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lfstr.obj -MD -MP -MF "$(DEPDIR)/lfstr.Tpo" -c -o lfstr.obj `if test -f 'locfit/lfstr.c'; then $(CYGPATH_W) 'locfit/lfstr.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lfstr.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lfstr.Tpo" "$(DEPDIR)/lfstr.Po"; else rm -f "$(DEPDIR)/lfstr.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/lfstr.c' object='lfstr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lfstr.obj `if test -f 'locfit/lfstr.c'; then $(CYGPATH_W) 'locfit/lfstr.c'; else $(CYGPATH_W) '$(srcdir)/locfit/lfstr.c'; fi` + +linalg.o: locfit/linalg.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT linalg.o -MD -MP -MF "$(DEPDIR)/linalg.Tpo" -c -o linalg.o `test -f 'locfit/linalg.c' || echo '$(srcdir)/'`locfit/linalg.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/linalg.Tpo" "$(DEPDIR)/linalg.Po"; else rm -f "$(DEPDIR)/linalg.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/linalg.c' object='linalg.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o linalg.o `test -f 'locfit/linalg.c' || echo '$(srcdir)/'`locfit/linalg.c + +linalg.obj: locfit/linalg.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT linalg.obj -MD -MP -MF "$(DEPDIR)/linalg.Tpo" -c -o linalg.obj `if test -f 'locfit/linalg.c'; then $(CYGPATH_W) 'locfit/linalg.c'; else $(CYGPATH_W) '$(srcdir)/locfit/linalg.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/linalg.Tpo" "$(DEPDIR)/linalg.Po"; else rm -f "$(DEPDIR)/linalg.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/linalg.c' object='linalg.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o linalg.obj `if test -f 'locfit/linalg.c'; then $(CYGPATH_W) 'locfit/linalg.c'; else $(CYGPATH_W) '$(srcdir)/locfit/linalg.c'; fi` + +locfit.o: locfit/locfit.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT locfit.o -MD -MP -MF "$(DEPDIR)/locfit.Tpo" -c -o locfit.o `test -f 'locfit/locfit.c' || echo '$(srcdir)/'`locfit/locfit.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/locfit.Tpo" "$(DEPDIR)/locfit.Po"; else rm -f "$(DEPDIR)/locfit.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/locfit.c' object='locfit.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o locfit.o `test -f 'locfit/locfit.c' || echo '$(srcdir)/'`locfit/locfit.c + +locfit.obj: locfit/locfit.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT locfit.obj -MD -MP -MF "$(DEPDIR)/locfit.Tpo" -c -o locfit.obj `if test -f 'locfit/locfit.c'; then $(CYGPATH_W) 'locfit/locfit.c'; else $(CYGPATH_W) '$(srcdir)/locfit/locfit.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/locfit.Tpo" "$(DEPDIR)/locfit.Po"; else rm -f "$(DEPDIR)/locfit.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/locfit.c' object='locfit.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o locfit.obj `if test -f 'locfit/locfit.c'; then $(CYGPATH_W) 'locfit/locfit.c'; else $(CYGPATH_W) '$(srcdir)/locfit/locfit.c'; fi` + +m_chol.o: locfit/m_chol.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_chol.o -MD -MP -MF "$(DEPDIR)/m_chol.Tpo" -c -o m_chol.o `test -f 'locfit/m_chol.c' || echo '$(srcdir)/'`locfit/m_chol.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_chol.Tpo" "$(DEPDIR)/m_chol.Po"; else rm -f "$(DEPDIR)/m_chol.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_chol.c' object='m_chol.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_chol.o `test -f 'locfit/m_chol.c' || echo '$(srcdir)/'`locfit/m_chol.c + +m_chol.obj: locfit/m_chol.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_chol.obj -MD -MP -MF "$(DEPDIR)/m_chol.Tpo" -c -o m_chol.obj `if test -f 'locfit/m_chol.c'; then $(CYGPATH_W) 'locfit/m_chol.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_chol.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_chol.Tpo" "$(DEPDIR)/m_chol.Po"; else rm -f "$(DEPDIR)/m_chol.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_chol.c' object='m_chol.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_chol.obj `if test -f 'locfit/m_chol.c'; then $(CYGPATH_W) 'locfit/m_chol.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_chol.c'; fi` + +m_eigen.o: locfit/m_eigen.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_eigen.o -MD -MP -MF "$(DEPDIR)/m_eigen.Tpo" -c -o m_eigen.o `test -f 'locfit/m_eigen.c' || echo '$(srcdir)/'`locfit/m_eigen.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_eigen.Tpo" "$(DEPDIR)/m_eigen.Po"; else rm -f "$(DEPDIR)/m_eigen.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_eigen.c' object='m_eigen.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_eigen.o `test -f 'locfit/m_eigen.c' || echo '$(srcdir)/'`locfit/m_eigen.c + +m_eigen.obj: locfit/m_eigen.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_eigen.obj -MD -MP -MF "$(DEPDIR)/m_eigen.Tpo" -c -o m_eigen.obj `if test -f 'locfit/m_eigen.c'; then $(CYGPATH_W) 'locfit/m_eigen.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_eigen.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_eigen.Tpo" "$(DEPDIR)/m_eigen.Po"; else rm -f "$(DEPDIR)/m_eigen.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_eigen.c' object='m_eigen.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_eigen.obj `if test -f 'locfit/m_eigen.c'; then $(CYGPATH_W) 'locfit/m_eigen.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_eigen.c'; fi` + +m_jacob.o: locfit/m_jacob.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_jacob.o -MD -MP -MF "$(DEPDIR)/m_jacob.Tpo" -c -o m_jacob.o `test -f 'locfit/m_jacob.c' || echo '$(srcdir)/'`locfit/m_jacob.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_jacob.Tpo" "$(DEPDIR)/m_jacob.Po"; else rm -f "$(DEPDIR)/m_jacob.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_jacob.c' object='m_jacob.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_jacob.o `test -f 'locfit/m_jacob.c' || echo '$(srcdir)/'`locfit/m_jacob.c + +m_jacob.obj: locfit/m_jacob.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_jacob.obj -MD -MP -MF "$(DEPDIR)/m_jacob.Tpo" -c -o m_jacob.obj `if test -f 'locfit/m_jacob.c'; then $(CYGPATH_W) 'locfit/m_jacob.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_jacob.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_jacob.Tpo" "$(DEPDIR)/m_jacob.Po"; else rm -f "$(DEPDIR)/m_jacob.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_jacob.c' object='m_jacob.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_jacob.obj `if test -f 'locfit/m_jacob.c'; then $(CYGPATH_W) 'locfit/m_jacob.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_jacob.c'; fi` + +m_max.o: locfit/m_max.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_max.o -MD -MP -MF "$(DEPDIR)/m_max.Tpo" -c -o m_max.o `test -f 'locfit/m_max.c' || echo '$(srcdir)/'`locfit/m_max.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_max.Tpo" "$(DEPDIR)/m_max.Po"; else rm -f "$(DEPDIR)/m_max.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_max.c' object='m_max.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_max.o `test -f 'locfit/m_max.c' || echo '$(srcdir)/'`locfit/m_max.c + +m_max.obj: locfit/m_max.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT m_max.obj -MD -MP -MF "$(DEPDIR)/m_max.Tpo" -c -o m_max.obj `if test -f 'locfit/m_max.c'; then $(CYGPATH_W) 'locfit/m_max.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_max.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/m_max.Tpo" "$(DEPDIR)/m_max.Po"; else rm -f "$(DEPDIR)/m_max.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/m_max.c' object='m_max.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o m_max.obj `if test -f 'locfit/m_max.c'; then $(CYGPATH_W) 'locfit/m_max.c'; else $(CYGPATH_W) '$(srcdir)/locfit/m_max.c'; fi` + +makecmd.o: locfit/makecmd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT makecmd.o -MD -MP -MF "$(DEPDIR)/makecmd.Tpo" -c -o makecmd.o `test -f 'locfit/makecmd.c' || echo '$(srcdir)/'`locfit/makecmd.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/makecmd.Tpo" "$(DEPDIR)/makecmd.Po"; else rm -f "$(DEPDIR)/makecmd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/makecmd.c' object='makecmd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o makecmd.o `test -f 'locfit/makecmd.c' || echo '$(srcdir)/'`locfit/makecmd.c + +makecmd.obj: locfit/makecmd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT makecmd.obj -MD -MP -MF "$(DEPDIR)/makecmd.Tpo" -c -o makecmd.obj `if test -f 'locfit/makecmd.c'; then $(CYGPATH_W) 'locfit/makecmd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/makecmd.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/makecmd.Tpo" "$(DEPDIR)/makecmd.Po"; else rm -f "$(DEPDIR)/makecmd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/makecmd.c' object='makecmd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o makecmd.obj `if test -f 'locfit/makecmd.c'; then $(CYGPATH_W) 'locfit/makecmd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/makecmd.c'; fi` + +math.o: locfit/math.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT math.o -MD -MP -MF "$(DEPDIR)/math.Tpo" -c -o math.o `test -f 'locfit/math.c' || echo '$(srcdir)/'`locfit/math.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/math.Tpo" "$(DEPDIR)/math.Po"; else rm -f "$(DEPDIR)/math.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/math.c' object='math.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o math.o `test -f 'locfit/math.c' || echo '$(srcdir)/'`locfit/math.c + +math.obj: locfit/math.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT math.obj -MD -MP -MF "$(DEPDIR)/math.Tpo" -c -o math.obj `if test -f 'locfit/math.c'; then $(CYGPATH_W) 'locfit/math.c'; else $(CYGPATH_W) '$(srcdir)/locfit/math.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/math.Tpo" "$(DEPDIR)/math.Po"; else rm -f "$(DEPDIR)/math.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/math.c' object='math.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o math.obj `if test -f 'locfit/math.c'; then $(CYGPATH_W) 'locfit/math.c'; else $(CYGPATH_W) '$(srcdir)/locfit/math.c'; fi` + +minmax.o: locfit/minmax.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minmax.o -MD -MP -MF "$(DEPDIR)/minmax.Tpo" -c -o minmax.o `test -f 'locfit/minmax.c' || echo '$(srcdir)/'`locfit/minmax.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/minmax.Tpo" "$(DEPDIR)/minmax.Po"; else rm -f "$(DEPDIR)/minmax.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/minmax.c' object='minmax.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minmax.o `test -f 'locfit/minmax.c' || echo '$(srcdir)/'`locfit/minmax.c + +minmax.obj: locfit/minmax.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minmax.obj -MD -MP -MF "$(DEPDIR)/minmax.Tpo" -c -o minmax.obj `if test -f 'locfit/minmax.c'; then $(CYGPATH_W) 'locfit/minmax.c'; else $(CYGPATH_W) '$(srcdir)/locfit/minmax.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/minmax.Tpo" "$(DEPDIR)/minmax.Po"; else rm -f "$(DEPDIR)/minmax.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/minmax.c' object='minmax.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minmax.obj `if test -f 'locfit/minmax.c'; then $(CYGPATH_W) 'locfit/minmax.c'; else $(CYGPATH_W) '$(srcdir)/locfit/minmax.c'; fi` + +nbhd.o: locfit/nbhd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nbhd.o -MD -MP -MF "$(DEPDIR)/nbhd.Tpo" -c -o nbhd.o `test -f 'locfit/nbhd.c' || echo '$(srcdir)/'`locfit/nbhd.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/nbhd.Tpo" "$(DEPDIR)/nbhd.Po"; else rm -f "$(DEPDIR)/nbhd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/nbhd.c' object='nbhd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nbhd.o `test -f 'locfit/nbhd.c' || echo '$(srcdir)/'`locfit/nbhd.c + +nbhd.obj: locfit/nbhd.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nbhd.obj -MD -MP -MF "$(DEPDIR)/nbhd.Tpo" -c -o nbhd.obj `if test -f 'locfit/nbhd.c'; then $(CYGPATH_W) 'locfit/nbhd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/nbhd.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/nbhd.Tpo" "$(DEPDIR)/nbhd.Po"; else rm -f "$(DEPDIR)/nbhd.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/nbhd.c' object='nbhd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nbhd.obj `if test -f 'locfit/nbhd.c'; then $(CYGPATH_W) 'locfit/nbhd.c'; else $(CYGPATH_W) '$(srcdir)/locfit/nbhd.c'; fi` + +pcomp.o: locfit/pcomp.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pcomp.o -MD -MP -MF "$(DEPDIR)/pcomp.Tpo" -c -o pcomp.o `test -f 'locfit/pcomp.c' || echo '$(srcdir)/'`locfit/pcomp.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/pcomp.Tpo" "$(DEPDIR)/pcomp.Po"; else rm -f "$(DEPDIR)/pcomp.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/pcomp.c' object='pcomp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pcomp.o `test -f 'locfit/pcomp.c' || echo '$(srcdir)/'`locfit/pcomp.c + +pcomp.obj: locfit/pcomp.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pcomp.obj -MD -MP -MF "$(DEPDIR)/pcomp.Tpo" -c -o pcomp.obj `if test -f 'locfit/pcomp.c'; then $(CYGPATH_W) 'locfit/pcomp.c'; else $(CYGPATH_W) '$(srcdir)/locfit/pcomp.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/pcomp.Tpo" "$(DEPDIR)/pcomp.Po"; else rm -f "$(DEPDIR)/pcomp.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/pcomp.c' object='pcomp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pcomp.obj `if test -f 'locfit/pcomp.c'; then $(CYGPATH_W) 'locfit/pcomp.c'; else $(CYGPATH_W) '$(srcdir)/locfit/pcomp.c'; fi` + +pout.o: locfit/pout.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pout.o -MD -MP -MF "$(DEPDIR)/pout.Tpo" -c -o pout.o `test -f 'locfit/pout.c' || echo '$(srcdir)/'`locfit/pout.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/pout.Tpo" "$(DEPDIR)/pout.Po"; else rm -f "$(DEPDIR)/pout.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/pout.c' object='pout.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pout.o `test -f 'locfit/pout.c' || echo '$(srcdir)/'`locfit/pout.c + +pout.obj: locfit/pout.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pout.obj -MD -MP -MF "$(DEPDIR)/pout.Tpo" -c -o pout.obj `if test -f 'locfit/pout.c'; then $(CYGPATH_W) 'locfit/pout.c'; else $(CYGPATH_W) '$(srcdir)/locfit/pout.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/pout.Tpo" "$(DEPDIR)/pout.Po"; else rm -f "$(DEPDIR)/pout.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/pout.c' object='pout.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pout.obj `if test -f 'locfit/pout.c'; then $(CYGPATH_W) 'locfit/pout.c'; else $(CYGPATH_W) '$(srcdir)/locfit/pout.c'; fi` + +preplot.o: locfit/preplot.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT preplot.o -MD -MP -MF "$(DEPDIR)/preplot.Tpo" -c -o preplot.o `test -f 'locfit/preplot.c' || echo '$(srcdir)/'`locfit/preplot.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/preplot.Tpo" "$(DEPDIR)/preplot.Po"; else rm -f "$(DEPDIR)/preplot.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/preplot.c' object='preplot.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o preplot.o `test -f 'locfit/preplot.c' || echo '$(srcdir)/'`locfit/preplot.c + +preplot.obj: locfit/preplot.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT preplot.obj -MD -MP -MF "$(DEPDIR)/preplot.Tpo" -c -o preplot.obj `if test -f 'locfit/preplot.c'; then $(CYGPATH_W) 'locfit/preplot.c'; else $(CYGPATH_W) '$(srcdir)/locfit/preplot.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/preplot.Tpo" "$(DEPDIR)/preplot.Po"; else rm -f "$(DEPDIR)/preplot.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/preplot.c' object='preplot.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o preplot.obj `if test -f 'locfit/preplot.c'; then $(CYGPATH_W) 'locfit/preplot.c'; else $(CYGPATH_W) '$(srcdir)/locfit/preplot.c'; fi` + +random.o: locfit/random.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT random.o -MD -MP -MF "$(DEPDIR)/random.Tpo" -c -o random.o `test -f 'locfit/random.c' || echo '$(srcdir)/'`locfit/random.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/random.Tpo" "$(DEPDIR)/random.Po"; else rm -f "$(DEPDIR)/random.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/random.c' object='random.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o random.o `test -f 'locfit/random.c' || echo '$(srcdir)/'`locfit/random.c + +random.obj: locfit/random.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT random.obj -MD -MP -MF "$(DEPDIR)/random.Tpo" -c -o random.obj `if test -f 'locfit/random.c'; then $(CYGPATH_W) 'locfit/random.c'; else $(CYGPATH_W) '$(srcdir)/locfit/random.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/random.Tpo" "$(DEPDIR)/random.Po"; else rm -f "$(DEPDIR)/random.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/random.c' object='random.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o random.obj `if test -f 'locfit/random.c'; then $(CYGPATH_W) 'locfit/random.c'; else $(CYGPATH_W) '$(srcdir)/locfit/random.c'; fi` + +readfile.o: locfit/readfile.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT readfile.o -MD -MP -MF "$(DEPDIR)/readfile.Tpo" -c -o readfile.o `test -f 'locfit/readfile.c' || echo '$(srcdir)/'`locfit/readfile.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/readfile.Tpo" "$(DEPDIR)/readfile.Po"; else rm -f "$(DEPDIR)/readfile.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/readfile.c' object='readfile.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o readfile.o `test -f 'locfit/readfile.c' || echo '$(srcdir)/'`locfit/readfile.c + +readfile.obj: locfit/readfile.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT readfile.obj -MD -MP -MF "$(DEPDIR)/readfile.Tpo" -c -o readfile.obj `if test -f 'locfit/readfile.c'; then $(CYGPATH_W) 'locfit/readfile.c'; else $(CYGPATH_W) '$(srcdir)/locfit/readfile.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/readfile.Tpo" "$(DEPDIR)/readfile.Po"; else rm -f "$(DEPDIR)/readfile.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/readfile.c' object='readfile.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o readfile.obj `if test -f 'locfit/readfile.c'; then $(CYGPATH_W) 'locfit/readfile.c'; else $(CYGPATH_W) '$(srcdir)/locfit/readfile.c'; fi` + +scb.o: locfit/scb.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scb.o -MD -MP -MF "$(DEPDIR)/scb.Tpo" -c -o scb.o `test -f 'locfit/scb.c' || echo '$(srcdir)/'`locfit/scb.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/scb.Tpo" "$(DEPDIR)/scb.Po"; else rm -f "$(DEPDIR)/scb.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/scb.c' object='scb.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o scb.o `test -f 'locfit/scb.c' || echo '$(srcdir)/'`locfit/scb.c + +scb.obj: locfit/scb.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scb.obj -MD -MP -MF "$(DEPDIR)/scb.Tpo" -c -o scb.obj `if test -f 'locfit/scb.c'; then $(CYGPATH_W) 'locfit/scb.c'; else $(CYGPATH_W) '$(srcdir)/locfit/scb.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/scb.Tpo" "$(DEPDIR)/scb.Po"; else rm -f "$(DEPDIR)/scb.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/scb.c' object='scb.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o scb.obj `if test -f 'locfit/scb.c'; then $(CYGPATH_W) 'locfit/scb.c'; else $(CYGPATH_W) '$(srcdir)/locfit/scb.c'; fi` + +scb_cons.o: locfit/scb_cons.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scb_cons.o -MD -MP -MF "$(DEPDIR)/scb_cons.Tpo" -c -o scb_cons.o `test -f 'locfit/scb_cons.c' || echo '$(srcdir)/'`locfit/scb_cons.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/scb_cons.Tpo" "$(DEPDIR)/scb_cons.Po"; else rm -f "$(DEPDIR)/scb_cons.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/scb_cons.c' object='scb_cons.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o scb_cons.o `test -f 'locfit/scb_cons.c' || echo '$(srcdir)/'`locfit/scb_cons.c + +scb_cons.obj: locfit/scb_cons.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scb_cons.obj -MD -MP -MF "$(DEPDIR)/scb_cons.Tpo" -c -o scb_cons.obj `if test -f 'locfit/scb_cons.c'; then $(CYGPATH_W) 'locfit/scb_cons.c'; else $(CYGPATH_W) '$(srcdir)/locfit/scb_cons.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/scb_cons.Tpo" "$(DEPDIR)/scb_cons.Po"; else rm -f "$(DEPDIR)/scb_cons.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/scb_cons.c' object='scb_cons.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o scb_cons.obj `if test -f 'locfit/scb_cons.c'; then $(CYGPATH_W) 'locfit/scb_cons.c'; else $(CYGPATH_W) '$(srcdir)/locfit/scb_cons.c'; fi` + +simul.o: locfit/simul.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT simul.o -MD -MP -MF "$(DEPDIR)/simul.Tpo" -c -o simul.o `test -f 'locfit/simul.c' || echo '$(srcdir)/'`locfit/simul.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/simul.Tpo" "$(DEPDIR)/simul.Po"; else rm -f "$(DEPDIR)/simul.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/simul.c' object='simul.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o simul.o `test -f 'locfit/simul.c' || echo '$(srcdir)/'`locfit/simul.c + +simul.obj: locfit/simul.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT simul.obj -MD -MP -MF "$(DEPDIR)/simul.Tpo" -c -o simul.obj `if test -f 'locfit/simul.c'; then $(CYGPATH_W) 'locfit/simul.c'; else $(CYGPATH_W) '$(srcdir)/locfit/simul.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/simul.Tpo" "$(DEPDIR)/simul.Po"; else rm -f "$(DEPDIR)/simul.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/simul.c' object='simul.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o simul.obj `if test -f 'locfit/simul.c'; then $(CYGPATH_W) 'locfit/simul.c'; else $(CYGPATH_W) '$(srcdir)/locfit/simul.c'; fi` + +solve.o: locfit/solve.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT solve.o -MD -MP -MF "$(DEPDIR)/solve.Tpo" -c -o solve.o `test -f 'locfit/solve.c' || echo '$(srcdir)/'`locfit/solve.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/solve.Tpo" "$(DEPDIR)/solve.Po"; else rm -f "$(DEPDIR)/solve.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/solve.c' object='solve.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o solve.o `test -f 'locfit/solve.c' || echo '$(srcdir)/'`locfit/solve.c + +solve.obj: locfit/solve.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT solve.obj -MD -MP -MF "$(DEPDIR)/solve.Tpo" -c -o solve.obj `if test -f 'locfit/solve.c'; then $(CYGPATH_W) 'locfit/solve.c'; else $(CYGPATH_W) '$(srcdir)/locfit/solve.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/solve.Tpo" "$(DEPDIR)/solve.Po"; else rm -f "$(DEPDIR)/solve.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/solve.c' object='solve.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o solve.obj `if test -f 'locfit/solve.c'; then $(CYGPATH_W) 'locfit/solve.c'; else $(CYGPATH_W) '$(srcdir)/locfit/solve.c'; fi` + +startlf.o: locfit/startlf.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT startlf.o -MD -MP -MF "$(DEPDIR)/startlf.Tpo" -c -o startlf.o `test -f 'locfit/startlf.c' || echo '$(srcdir)/'`locfit/startlf.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/startlf.Tpo" "$(DEPDIR)/startlf.Po"; else rm -f "$(DEPDIR)/startlf.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/startlf.c' object='startlf.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o startlf.o `test -f 'locfit/startlf.c' || echo '$(srcdir)/'`locfit/startlf.c + +startlf.obj: locfit/startlf.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT startlf.obj -MD -MP -MF "$(DEPDIR)/startlf.Tpo" -c -o startlf.obj `if test -f 'locfit/startlf.c'; then $(CYGPATH_W) 'locfit/startlf.c'; else $(CYGPATH_W) '$(srcdir)/locfit/startlf.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/startlf.Tpo" "$(DEPDIR)/startlf.Po"; else rm -f "$(DEPDIR)/startlf.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/startlf.c' object='startlf.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o startlf.obj `if test -f 'locfit/startlf.c'; then $(CYGPATH_W) 'locfit/startlf.c'; else $(CYGPATH_W) '$(srcdir)/locfit/startlf.c'; fi` + +strings.o: locfit/strings.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT strings.o -MD -MP -MF "$(DEPDIR)/strings.Tpo" -c -o strings.o `test -f 'locfit/strings.c' || echo '$(srcdir)/'`locfit/strings.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/strings.Tpo" "$(DEPDIR)/strings.Po"; else rm -f "$(DEPDIR)/strings.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/strings.c' object='strings.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o strings.o `test -f 'locfit/strings.c' || echo '$(srcdir)/'`locfit/strings.c + +strings.obj: locfit/strings.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT strings.obj -MD -MP -MF "$(DEPDIR)/strings.Tpo" -c -o strings.obj `if test -f 'locfit/strings.c'; then $(CYGPATH_W) 'locfit/strings.c'; else $(CYGPATH_W) '$(srcdir)/locfit/strings.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/strings.Tpo" "$(DEPDIR)/strings.Po"; else rm -f "$(DEPDIR)/strings.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/strings.c' object='strings.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o strings.obj `if test -f 'locfit/strings.c'; then $(CYGPATH_W) 'locfit/strings.c'; else $(CYGPATH_W) '$(srcdir)/locfit/strings.c'; fi` + +wdiag.o: locfit/wdiag.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT wdiag.o -MD -MP -MF "$(DEPDIR)/wdiag.Tpo" -c -o wdiag.o `test -f 'locfit/wdiag.c' || echo '$(srcdir)/'`locfit/wdiag.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/wdiag.Tpo" "$(DEPDIR)/wdiag.Po"; else rm -f "$(DEPDIR)/wdiag.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/wdiag.c' object='wdiag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o wdiag.o `test -f 'locfit/wdiag.c' || echo '$(srcdir)/'`locfit/wdiag.c + +wdiag.obj: locfit/wdiag.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT wdiag.obj -MD -MP -MF "$(DEPDIR)/wdiag.Tpo" -c -o wdiag.obj `if test -f 'locfit/wdiag.c'; then $(CYGPATH_W) 'locfit/wdiag.c'; else $(CYGPATH_W) '$(srcdir)/locfit/wdiag.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/wdiag.Tpo" "$(DEPDIR)/wdiag.Po"; else rm -f "$(DEPDIR)/wdiag.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/wdiag.c' object='wdiag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o wdiag.obj `if test -f 'locfit/wdiag.c'; then $(CYGPATH_W) 'locfit/wdiag.c'; else $(CYGPATH_W) '$(srcdir)/locfit/wdiag.c'; fi` + +weight.o: locfit/weight.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT weight.o -MD -MP -MF "$(DEPDIR)/weight.Tpo" -c -o weight.o `test -f 'locfit/weight.c' || echo '$(srcdir)/'`locfit/weight.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/weight.Tpo" "$(DEPDIR)/weight.Po"; else rm -f "$(DEPDIR)/weight.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/weight.c' object='weight.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o weight.o `test -f 'locfit/weight.c' || echo '$(srcdir)/'`locfit/weight.c + +weight.obj: locfit/weight.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT weight.obj -MD -MP -MF "$(DEPDIR)/weight.Tpo" -c -o weight.obj `if test -f 'locfit/weight.c'; then $(CYGPATH_W) 'locfit/weight.c'; else $(CYGPATH_W) '$(srcdir)/locfit/weight.c'; fi`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/weight.Tpo" "$(DEPDIR)/weight.Po"; else rm -f "$(DEPDIR)/weight.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='locfit/weight.c' object='weight.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o weight.obj `if test -f 'locfit/weight.c'; then $(CYGPATH_W) 'locfit/weight.c'; else $(CYGPATH_W) '$(srcdir)/locfit/weight.c'; fi` + +.cpp.o: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +vari.o: locfit/vari.cpp +@am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT vari.o -MD -MP -MF "$(DEPDIR)/vari.Tpo" -c -o vari.o `test -f 'locfit/vari.cpp' || echo '$(srcdir)/'`locfit/vari.cpp; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/vari.Tpo" "$(DEPDIR)/vari.Po"; else rm -f "$(DEPDIR)/vari.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='locfit/vari.cpp' object='vari.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o vari.o `test -f 'locfit/vari.cpp' || echo '$(srcdir)/'`locfit/vari.cpp + +vari.obj: locfit/vari.cpp +@am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT vari.obj -MD -MP -MF "$(DEPDIR)/vari.Tpo" -c -o vari.obj `if test -f 'locfit/vari.cpp'; then $(CYGPATH_W) 'locfit/vari.cpp'; else $(CYGPATH_W) '$(srcdir)/locfit/vari.cpp'; fi`; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/vari.Tpo" "$(DEPDIR)/vari.Po"; else rm -f "$(DEPDIR)/vari.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='locfit/vari.cpp' object='vari.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o vari.obj `if test -f 'locfit/vari.cpp'; then $(CYGPATH_W) 'locfit/vari.cpp'; else $(CYGPATH_W) '$(srcdir)/locfit/vari.cpp'; fi` +uninstall-info-am: + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + $(mkdir_p) $(distdir)/$(top_srcdir) $(distdir)/lemon $(distdir)/lemon/bits $(distdir)/lemon/concepts $(distdir)/locfit + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(mkdir_p) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-noinstLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS \ + uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-noinstLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am \ + install-dist_binSCRIPTS install-exec install-exec-am \ + install-info install-info-am install-man install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-binPROGRAMS uninstall-dist_binSCRIPTS \ + uninstall-info-am + + +#gtf_reads_SOURCES = gtf_reads.cpp +#gtf_reads_LDADD = libcufflinks.a libgc.a $(BOOST_THREAD_LIB) $(BAM_LIB) +#gtf_reads_LDFLAGS = $(BOOST_LDFLAGS) $(BAM_LDFLAGS) #$(ZLIB_LDFLAGS) +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/abundances.cpp b/src/abundances.cpp new file mode 100644 index 0000000..d8f81d0 --- /dev/null +++ b/src/abundances.cpp @@ -0,0 +1,3930 @@ +/* + * abundances.cpp + * cufflinks + * + * Created by Cole Trapnell on 4/27/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + * NOTE: some of the code in this file was derived from (Eriksson et al, 2008) + */ + +#include "abundances.h" +#include +#include +#include + +#include +#include +#include +#include + +//#define BOOST_UBLAS_TYPE_CHECK 0 +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "filters.h" +#include "replicates.h" +#include "sampling.h" +#include "jensen_shannon.h" + + + + + +//#define USE_LOG_CACHE + +void compute_compatibilities(vector >& transcripts, + const vector& alignments, + vector >& compatibilities) +{ + int M = alignments.size(); + int N = transcripts.size(); + + vector alignment_scaffs; + + for (size_t i = 0; i < alignments.size(); ++i) + { + const MateHit& hit = alignments[i]; + alignment_scaffs.push_back(Scaffold(hit)); + } + + for (int j = 0; j < N; ++j) + { + shared_ptr transfrag_j = transcripts[j]->transfrag(); + for (int i = 0; i < M; ++i) + { + if (transfrag_j->contains(alignment_scaffs[i]) + && Scaffold::compatible(*transfrag_j, alignment_scaffs[i])) + { + compatibilities[j][i] = 1; + } + } + } +} + +AbundanceGroup::AbundanceGroup(const vector >& abundances, + const ublas::matrix& gamma_covariance, + const ublas::matrix& gamma_bootstrap_covariance, + const ublas::matrix& iterated_exp_count_covariance, + const ublas::matrix& count_covariance, + const ublas::matrix& fpkm_covariance, + const long double max_mass_variance, + const set >& rg_props) : + _abundances(abundances), + _iterated_exp_count_covariance(iterated_exp_count_covariance), + _count_covariance(count_covariance), + _fpkm_covariance(fpkm_covariance), + _gamma_covariance(gamma_covariance), + _gamma_bootstrap_covariance(gamma_bootstrap_covariance), + _max_mass_variance(max_mass_variance), + _salient_frags(0.0), + _total_frags(0.0), + _read_group_props(rg_props) +{ + // Calling calculate_FPKM_covariance() also estimates cross-replicate + // count variances + // calculate_FPKM_covariance(); + double fpkm_var = 0.0; + for (size_t i = 0; i < _fpkm_covariance.size1(); ++i) + { + for (size_t j = 0; j < _fpkm_covariance.size2(); ++j) + { + fpkm_var += _fpkm_covariance(i,j); + } + } + + ublas::matrix test = _count_covariance; + double ret = cholesky_factorize(test); + if (ret != 0) + { + //fprintf(stderr, "Warning: total count covariance is not positive definite!\n"); + for (size_t j = 0; j < _abundances.size(); ++j) + { + _abundances[j]->status(NUMERIC_FAIL); + } + } + + _FPKM_variance = fpkm_var; + + if (final_est_run && library_type != "transfrags") + { + test = _fpkm_covariance; + ret = cholesky_factorize(test); + if (ret != 0 || (_FPKM_variance < 0 && status() == NUMERIC_OK)) + { + //fprintf(stderr, "Warning: total count covariance is not positive definite!\n"); + for (size_t j = 0; j < _abundances.size(); ++j) + { + _abundances[j]->status(NUMERIC_FAIL); + } + } + assert (FPKM() == 0 || fpkm_var > 0 || status() != NUMERIC_OK); + } + + + + calculate_conf_intervals(); + calculate_kappas(); +} + +AbundanceStatus AbundanceGroup::status() const +{ + bool has_lowdata_member = false; + bool has_ok_member = false; + foreach(shared_ptr ab, _abundances) + { + if (ab->status() == NUMERIC_FAIL) + { + return NUMERIC_FAIL; + } + else if (ab->status() == NUMERIC_LOW_DATA) + { + has_lowdata_member = true; + //return NUMERIC_LOW_DATA; + } + else if (ab->status() == NUMERIC_HI_DATA) + { + return NUMERIC_HI_DATA; + } + else if (ab->status() == NUMERIC_OK) + { + has_ok_member = true; + } + } + + if (has_ok_member == false) + return NUMERIC_LOW_DATA; + + + + // check that the variance of the group is stable (w.r.t to bootstrap) + double total_cov = 0.0; + double total_gamma = 0.0; + for (size_t i = 0; i < _gamma_covariance.size1(); ++i) + { + for (size_t j = 0; j < _gamma_covariance.size2(); ++j) + { + total_cov += _gamma_covariance(i,j); + //total_bootstrap_cov += _gamma_bootstrap_covariance(i,j); + } + + + total_gamma = _abundances[i]->gamma(); + //total_cov += _gamma_covariance(i,i); + //total_gamma += _gamma_bootstrap_covariance(i,i); + + } +// if (total_cov > 0 && total_gamma > 0) +// { +// double bootstrap_gamma_delta = total_cov/total_gamma; +// //double gap = bootstrap_delta_gap * total_cov; +// if (bootstrap_gamma_delta > bootstrap_delta_gap) +// { +// return NUMERIC_LOW_DATA; +// } +// } + + return NUMERIC_OK; +} + +void TranscriptAbundance::FPKM_variance(double v) +{ + assert (v >= 0); + assert(!isnan(v)); + _FPKM_variance = v; +} + +bool AbundanceGroup::has_member_with_status(AbundanceStatus member_status) +{ + foreach(shared_ptr ab, _abundances) + { + if (ab->status() == member_status) + { + return true; + } + } + return false; +} + +double AbundanceGroup::num_fragments() const +{ + double num_f = 0; + + foreach(shared_ptr ab, _abundances) + { + num_f += ab->num_fragments(); + } + assert (!isnan(num_f)); + return num_f; +} + +double AbundanceGroup::mass_fraction() const +{ + double mass = 0; + + foreach(shared_ptr ab, _abundances) + { + mass += ab->mass_fraction(); + } + return mass; +} + +double AbundanceGroup::mass_variance() const +{ + double mass_var = 0; + + foreach(shared_ptr ab, _abundances) + { + mass_var += ab->mass_variance(); + } + return mass_var; +} + +double AbundanceGroup::FPKM() const +{ + double fpkm = 0; + + foreach(shared_ptr ab, _abundances) + { + fpkm += ab->FPKM(); + } + + return fpkm; +} + +double AbundanceGroup::gamma() const +{ + double gamma = 0; + + foreach(shared_ptr ab, _abundances) + { + gamma += ab->gamma(); + } + + return gamma; +} + +void AbundanceGroup::filter_group(const vector& to_keep, + AbundanceGroup& filtered_group) const +{ + //filtered_group = AbundanceGroup(); + + assert (to_keep.size() == _abundances.size()); + + size_t num_kept = 0; + foreach(bool keeper, to_keep) + { + num_kept += keeper; + } + + ublas::matrix new_cov = ublas::zero_matrix(num_kept,num_kept); + ublas::matrix new_iterated_em_count_cov = ublas::zero_matrix(num_kept,num_kept); + ublas::matrix new_count_cov = ublas::zero_matrix(num_kept,num_kept); + ublas::matrix new_fpkm_cov = ublas::zero_matrix(num_kept,num_kept); + ublas::matrix new_boot_cov = ublas::zero_matrix(num_kept,num_kept); + vector > new_ab; + + // rebuild covariance matrix and abundance vector after filtration + + size_t next_cov_row = 0; + for (size_t i = 0; i < _abundances.size(); ++i) + { + if (to_keep[i]) + { + new_ab.push_back(_abundances[i]); + size_t next_cov_col = 0; + for (size_t j = 0; j < _abundances.size(); ++j) + { + if (to_keep[j]) + { + new_cov(next_cov_row,next_cov_col) = _gamma_covariance(i, j); + new_iterated_em_count_cov(next_cov_row,next_cov_col) = _iterated_exp_count_covariance(i, j); + new_count_cov(next_cov_row,next_cov_col) = _count_covariance(i, j); + new_fpkm_cov(next_cov_row,next_cov_col) = _fpkm_covariance(i, j); + new_boot_cov(next_cov_row,next_cov_col) = _gamma_bootstrap_covariance(i, j); + next_cov_col++; + } + } + next_cov_row++; + } + } + + filtered_group = AbundanceGroup(new_ab, + new_cov, + new_boot_cov, + new_iterated_em_count_cov, + new_count_cov, + new_fpkm_cov, + _max_mass_variance, + _read_group_props); +} + +void AbundanceGroup::get_transfrags(vector >& transfrags) const +{ + transfrags.clear(); + foreach(shared_ptr pA, _abundances) + { + shared_ptr pS = pA->transfrag(); + if (pS) + { + transfrags.push_back(pA); + } + } +} + +set AbundanceGroup::gene_id() const +{ + set s; + + foreach (shared_ptr pA, _abundances) + { + set sub = pA->gene_id(); + s.insert(sub.begin(), sub.end()); + } + + return s; +} + +set AbundanceGroup::gene_name() const +{ + set s; + + foreach (shared_ptr pA, _abundances) + { + set sub = pA->gene_name(); + s.insert(sub.begin(), sub.end()); + } + + return s; +} + + +set AbundanceGroup::tss_id() const +{ + set s; + + foreach (shared_ptr pA, _abundances) + { + set sub = pA->tss_id(); + s.insert(sub.begin(), sub.end()); + } + + return s; +} + +set AbundanceGroup::protein_id() const +{ + set s; + + foreach (shared_ptr pA, _abundances) + { + set sub = pA->protein_id(); + s.insert(sub.begin(), sub.end()); + } + + return s; +} + +const string& AbundanceGroup::locus_tag() const +{ + static string default_locus_tag = "-"; + const string* pLast = NULL; + foreach (shared_ptr pA, _abundances) + { + if (pLast) + { + if (pA->locus_tag() != *pLast) + { + assert (false); + return default_locus_tag; + } + } + pLast = &(pA->locus_tag()); + } + if (pLast) + { + return *pLast; + } + assert (false); + return default_locus_tag; +} + +const string& AbundanceGroup::reference_tag() const +{ + static string default_reference_tag = "-"; + const string* pLast = NULL; + foreach (shared_ptr pA, _abundances) + { + if (pLast) + { + if (pA->reference_tag() != *pLast) + { + assert (false); + return default_reference_tag; + } + } + pLast = &(pA->reference_tag()); + } + if (pLast) + { + return *pLast; + } + assert (false); + return default_reference_tag; +} + +double AbundanceGroup::effective_length() const +{ + double eff_len = 0.0; + double group_fpkm = FPKM(); + if (group_fpkm == 0) + return 0; + foreach (shared_ptr ab, _abundances) + { + eff_len += (ab->effective_length() * (ab->FPKM() / group_fpkm)); + } + return eff_len; +} + +//void AbundanceGroup::collect_read_group_props() +//{ +// size_t M = alignments.size(); +// +// for (size_t i = 0; i < M; ++i) +// { +// if (!alignments[i].left_alignment()) +// continue; +// shared_ptr rg_props = alignments[i].read_group_props(); +// +// _read_group_props.insert(rg_props; +// } +//} + +void AbundanceGroup::calculate_locus_scaled_mass_and_variance(const vector& alignments, + const vector >& transcripts) +{ + size_t M = alignments.size(); + size_t N = transcripts.size(); + + if (transcripts.empty()) + return; + + map, double> count_per_replicate; + + for (size_t i = 0; i < M; ++i) + { + if (!alignments[i].left_alignment()) + continue; + + bool mapped = false; + for (size_t j = 0; j < N; ++j) + { + if (_abundances[j]->cond_probs()->at(i) > 0) + { + mapped = true; + break; + } + } + if (mapped) + { + shared_ptr rg_props = alignments[i].read_group_props(); + //assert (parent != NULL); + pair, double>::iterator, bool> inserted; + inserted = count_per_replicate.insert(make_pair(rg_props, 0.0)); + _read_group_props.insert(rg_props); + + double more_mass = alignments[i].collapse_mass(); + inserted.first->second += more_mass; + } + } + + double avg_X_g = 0.0; + double avg_mass_fraction = 0.0; + + // as long as all the read groups share the same dispersion model (currently true) + // then all the variances from each read group will be the same, so this + // averaging step isn't strictly necessary. Computing it this way is simply + // convenient. + vector avg_mass_variances(N, 0.0); + + double max_mass_var = 0.0; + for (map, double>::iterator itr = count_per_replicate.begin(); + itr != count_per_replicate.end(); + ++itr) + { + shared_ptr rg_props = itr->first; + double scaled_mass = itr->second; //rg_props->scale_mass(itr->second); + double scaled_total_mass = rg_props->scale_mass(rg_props->normalized_map_mass()); + avg_X_g += scaled_mass; + shared_ptr disperser = rg_props->mass_dispersion_model(); + for (size_t j = 0; j < N; ++j) + { + double scaled_variance; + scaled_variance = disperser->scale_mass_variance(scaled_mass * _abundances[j]->gamma()); + avg_mass_variances[j] += scaled_variance; + } + assert (disperser->scale_mass_variance(scaled_mass) != 0 || scaled_mass == 0); + max_mass_var += disperser->scale_mass_variance(scaled_mass); + assert (scaled_total_mass != 0.0); + avg_mass_fraction += (scaled_mass / scaled_total_mass); + } + + // Set the maximum mass variance in case we get an identifiability failure + // and need to bound the group expression. + if (!count_per_replicate.empty()) + max_mass_var /= count_per_replicate.size(); + + + double num_replicates = count_per_replicate.size(); + + if (num_replicates) + { + avg_X_g /= num_replicates; + avg_mass_fraction /= num_replicates; + for (size_t j = 0; j < N; ++j) + { + avg_mass_variances[j] /= num_replicates; + } + } + + assert (max_mass_var != 0 || avg_X_g == 0); + max_mass_variance(max_mass_var); + + for (size_t j = 0; j < _abundances.size(); ++j) + { + _abundances[j]->num_fragments(_abundances[j]->gamma() * avg_X_g); + + double j_avg_mass_fraction = _abundances[j]->gamma() * avg_mass_fraction; + _abundances[j]->mass_fraction(j_avg_mass_fraction); + _abundances[j]->mass_variance(avg_mass_variances[j]); + + if (j_avg_mass_fraction > 0) + { + double FPKM = j_avg_mass_fraction * 1000000000/ _abundances[j]->effective_length(); + _abundances[j]->FPKM(FPKM); + } + else + { + _abundances[j]->FPKM(0); + _abundances[j]->mass_variance(0); + _abundances[j]->mass_fraction(0); + } + } + +} + +int total_cond_prob_calls = 0; +void collapse_equivalent_hits(const vector& alignments, + vector >& transcripts, + vector >& mapped_transcripts, + vector& nr_alignments, + vector& log_conv_factors, + bool require_overlap = true) +{ + int N = transcripts.size(); + int M = alignments.size(); + + nr_alignments.clear(); + + vector > compatibilities(N, vector(M,0)); + compute_compatibilities(transcripts, alignments, compatibilities); + + vector > cached_cond_probs (M, vector()); + + vector replaced(M, false); + int num_replaced = 0; + + vector bchs; + for (size_t j = 0; j < N; ++j) + { + bchs.push_back(BiasCorrectionHelper(transcripts[j]->transfrag())); + } + + for(int i = 0 ; i < M; ++i) + { + vector cond_probs_i(N,0); + if (replaced[i] == true) + continue; + + if (cached_cond_probs[i].empty()) + { + for (int j = 0; j < N; ++j) + { + shared_ptr transfrag = transcripts[j]->transfrag(); + + if (compatibilities[j][i]==1) + { + total_cond_prob_calls++; + cond_probs_i[j] = bchs[j].get_cond_prob(alignments[i]); + } + + } + cached_cond_probs[i] = cond_probs_i; + } + else + { + cond_probs_i = cached_cond_probs[i]; + } + + MateHit* curr_align = NULL; + + nr_alignments.push_back(alignments[i]); + curr_align = &nr_alignments.back(); + log_conv_factors.push_back(0); + + if (alignments[i].is_multi()) // don't reduce other hits into multihits + continue; + + bool seen_olap = false; + + for(int k = i + 1 ; k < M; ++k) + { + if (replaced[k] || alignments[k].is_multi() || alignments[i].read_group_props() != alignments[k].read_group_props()) + continue; + if (require_overlap && !::overlap_in_genome(curr_align->left(), curr_align->right(), + alignments[k].left(), alignments[k].right())) + { + if (seen_olap) + break; + else + continue; + } + else + { + seen_olap = true; + } + + vector* cond_probs_k; + double last_cond_prob = -1; + + bool equiv = true; + + if (cached_cond_probs[k].empty()) + { + cached_cond_probs[k] = vector(N, 0.0); + cond_probs_k = &cached_cond_probs[k]; + for (int j = 0; j < N; ++j) + { + shared_ptr transfrag = transcripts[j]->transfrag(); + + if (compatibilities[j][k]==1) + { + total_cond_prob_calls++; + (*cond_probs_k)[j] = bchs[j].get_cond_prob(alignments[k]); + } + } + //cached_cond_probs[k] = cond_probs_k; + } + else + { + cond_probs_k = &cached_cond_probs[k]; + } + + + for (int j = 0; j < N; ++j) + { + if ((*cond_probs_k)[j] != 0 && cond_probs_i[j] != 0) + { + double ratio = (*cond_probs_k)[j] / cond_probs_i[j]; + if (last_cond_prob == -1) + { + //assert(ratio < 5); + last_cond_prob = ratio; + } + else + { + if (last_cond_prob != ratio) + { + equiv = false; + break; + } + } + } + else if ((*cond_probs_k)[j] == 0 && cond_probs_i[j] == 0) + { + // just do nothing in this iter. + // last_cond_prob = 0.0; + } + else + { + equiv = false; + break; + } + } + + // cond_prob_i vector is a scalar multiple of cond_prob_k, so we + // can collapse k into i via the mass. + if (equiv && last_cond_prob > 0.0) + { + assert(curr_align->read_group_props() == alignments[k].read_group_props()); + assert (last_cond_prob > 0); + //double mass_muliplier = sqrt(last_cond_prob); + double mass_multiplier = log(last_cond_prob); + //assert(last_cond_prob < 5); + assert (!isinf(mass_multiplier) && !isnan(mass_multiplier)); + log_conv_factors[log_conv_factors.size() - 1] += mass_multiplier; + replaced[k] = true; + cached_cond_probs[k].clear(); + vector(cached_cond_probs[k]).swap(cached_cond_probs[k]); + num_replaced++; + + //double scale_factor = alignments[k].common_scale_mass(); + //double curr_align_mass = curr_align->collapse_mass(); + + //double more_mass = alignments[k].common_scale_mass() * alignments[k].collapse_mass() ; + double more_mass = alignments[k].collapse_mass(); + curr_align->incr_collapse_mass(more_mass); + } + } + } + + N = transcripts.size(); + //M = nr_alignments.size(); + + for (int j = 0; j < N; ++j) + { + shared_ptr transfrag = transcripts[j]->transfrag(); + vector& cond_probs = *(new vector(nr_alignments.size(),0)); + + BiasCorrectionHelper& bch = bchs[j]; + + size_t last_cond_prob_idx = 0; + for(int i = 0 ; i < M; ++i) + { + if (!cached_cond_probs[i].empty()) + { + if (compatibilities[j][i]==1) + { + assert (cached_cond_probs[i].size() > j); + cond_probs[last_cond_prob_idx] = cached_cond_probs[i][j]; + } + last_cond_prob_idx++; + } + } + + assert (last_cond_prob_idx == nr_alignments.size()); + + transcripts[j]->effective_length(bch.get_effective_length()); + transcripts[j]->cond_probs(&cond_probs); + + if (bch.is_mapped()) + mapped_transcripts.push_back(transcripts[j]); + } + if (nr_alignments.size()) + { + verbose_msg("\nReduced %lu frags to %lu (%lf percent)\n", alignments.size(), nr_alignments.size(), 100.0 * nr_alignments.size()/(double)alignments.size()); + } +} + +void collapse_equivalent_hits_helper(const vector& alignments, + vector >& transcripts, + vector >& mapped_transcripts, + vector& nr_alignments, + vector& log_conv_factors) +{ + int N = transcripts.size(); + int M = alignments.size(); + + // If there's a lot of transcripts, just use the old, overlap constrained + // version of the equivalence collapse. + if (N > 24) + { + collapse_equivalent_hits(alignments, + transcripts, + mapped_transcripts, + nr_alignments, + log_conv_factors, + true); + return; + } + + vector > compat_table(1 << N); + vector > compatibilities(N, vector(M,0)); + compute_compatibilities(transcripts, alignments, compatibilities); + + for(int i = 0; i < M; ++i) + { + size_t compat_mask = 0; + for (int j = 0; j < N; ++j) + { + compat_mask |= ((compatibilities[j][i] !=0) << j); + } + assert (compat_mask < compat_table.size()); + compat_table[compat_mask].push_back(&(alignments[i])); + } + + for (size_t i = 0; i < compat_table.size(); ++i) + { + vector tmp_hits; + vector tmp_nr_hits; + vector tmp_log_conv_factors; + vector > tmp_mapped_transcripts; + for (size_t j = 0; j < compat_table[i].size(); ++j) + { + tmp_hits.push_back(*(compat_table[i][j])); + } + if (tmp_hits.empty()) + continue; + collapse_equivalent_hits(tmp_hits, + transcripts, + tmp_mapped_transcripts, + tmp_nr_hits, + tmp_log_conv_factors, + false); + copy(tmp_nr_hits.begin(), tmp_nr_hits.end(), back_inserter(nr_alignments)); + copy(tmp_log_conv_factors.begin(), tmp_log_conv_factors.end(), back_inserter(log_conv_factors)); + } +} + +#define PERFORM_EQUIV_COLLAPSE 1 + +void AbundanceGroup::calculate_abundance(const vector& alignments) +{ + vector > transcripts; + get_transfrags(transcripts); + vector > mapped_transcripts; // This collects the transcripts that have alignments mapping to them + + vector nr_alignments; + + if (cond_prob_collapse) + { + collapse_hits(alignments, nr_alignments); + } + else + { + nr_alignments = alignments; + } + + vector non_equiv_alignments; + vector log_conv_factors; + if (cond_prob_collapse) + { + collapse_equivalent_hits_helper(nr_alignments, transcripts, mapped_transcripts, non_equiv_alignments, log_conv_factors); + assert (non_equiv_alignments.size() == log_conv_factors.size()); + log_conv_factors = vector(nr_alignments.size(), 0); + nr_alignments.clear(); + mapped_transcripts.clear(); + compute_cond_probs_and_effective_lengths(non_equiv_alignments, transcripts, mapped_transcripts); + } + else + { + non_equiv_alignments = nr_alignments; + compute_cond_probs_and_effective_lengths(non_equiv_alignments, transcripts, mapped_transcripts); + } + + calculate_gammas(non_equiv_alignments, log_conv_factors, transcripts, mapped_transcripts); + + //non_equiv_alignments.clear(); + //collapse_hits(alignments, nr_alignments); + //This will also compute the transcript level FPKMs + calculate_locus_scaled_mass_and_variance(non_equiv_alignments, transcripts); + + calculate_iterated_exp_count_covariance(non_equiv_alignments, transcripts); + + // Refresh the variances to match the new gammas computed during iterated + // expectation + calculate_locus_scaled_mass_and_variance(non_equiv_alignments, transcripts); + + + if(corr_multi && !final_est_run) + { + update_multi_reads(non_equiv_alignments, mapped_transcripts); + } + + if (final_est_run) // Only on last estimation run + { + // Calling calculate_FPKM_covariance() also estimates cross-replicate + // count variances + calculate_FPKM_covariance(); + + // Derive confidence intervals from the FPKM variance/covariance matrix + calculate_conf_intervals(); + + // Calculate the inter-group relative abundances and variances + calculate_kappas(); + } + + for (size_t i = 0; i < _abundances.size(); ++i) + { + for (size_t j = 0; j < _abundances.size(); ++j) + { + if (i != j) + { + if (_abundances[i]->transfrag()->contains(*_abundances[j]->transfrag()) && + Scaffold::compatible(*_abundances[i]->transfrag(),*_abundances[j]->transfrag())) + { + _abundances[j]->status(NUMERIC_LOW_DATA); + } + } + } + } + + //fprintf(stderr, "Total calls to get_cond_prob = %d\n", total_cond_prob_calls); +} + +void AbundanceGroup::update_multi_reads(const vector& alignments, vector > transcripts) +{ + size_t M = alignments.size(); + size_t N = transcripts.size(); + + if (transcripts.empty()) + return; + + for (size_t i = 0; i < M; ++i) + { + if (alignments[i].is_multi()) + { + double expr = 0.0; + for (size_t j = 0; j < N; ++j) + { + expr += _abundances[j]->cond_probs()->at(i) * _abundances[j]->FPKM() * _abundances[j]->effective_length(); + } + alignments[i].read_group_props()->multi_read_table()->add_expr(alignments[i], expr); + } + } +} + + +long double solve_beta(long double A, long double B, long double C) +{ + long double a = -C/B; + long double b = (A + 4*A*C/(B*B) - (4*C/B)); + long double c = -A + B - 5*A*A*C/(B*B*B) + 10*A*C/(B*B) - 5*C/B; + long double d = 2*A*A*A*C/(B*B*B*B) - 6*A*A*C/(B*B*B) + 6*A*C/(B*B) - 2*C/B; + complex q((3*a*c - b*b)/(a*a*9.0)); + complex r((9.0*a*c*b - 27.0*a*a*d - 2.0*b*b*b)/(a*a*a*54.0)); + complex s1 = std::pow((r + std::sqrt(q*q*q + r*r)),complex(1/3.0)); + complex s2 = std::pow((r - std::sqrt(q*q*q + r*r)),complex(1/3.0)); + complex R1 = s1 + s2 - complex(b/(a*3.0)); + complex R2 = -(s1+s2)/complex(2.0) - complex(b/(a*3.0)) + (s1-s2) * complex(0, sqrtl(3.0)/2.0); + complex R3 = -(s1+s2)/complex(2.0) - complex(b/(a*3.0)) - (s1-s2) * complex(0, sqrtl(3.0)/2.0); + + vector roots; + if (R1.imag() == 0) + roots.push_back(R1.real()); + if (R2.imag() == 0) + roots.push_back(R2.real()); + if (R3.imag() == 0) + roots.push_back(R3.real()); + sort(roots.begin(), roots.end()); + + if (roots.empty()) + return 0; + + long double root = roots.back(); + return root; +} + + +// This function takes the point estimate of the number of fragments from +// a transcript, the iterated expection count matrix, and the locus level +// cross replicate variance, and calculates the transcript-level cross-replicate +// count variance +bool estimate_count_variance(long double& variance, + double gamma_t, + double psi_t_count_var, + double X_g, + double V_X_g_t, + double l_t, + double M) +{ + if (l_t == 0) + { + return 0; + } + + long double A = X_g * gamma_t; + + long double B = V_X_g_t; + + long double C = psi_t_count_var; + + variance = 0.0; + bool numeric_ok = true; + + long double dispersion = V_X_g_t - (X_g * gamma_t); + + if (psi_t_count_var < 0) + { + //fprintf (stderr, "Warning: psi_t is negative! (psi_t = %lf)\n", psi_t); + psi_t_count_var = 0; + } + assert (psi_t_count_var >= 0); + + // we multiply A with the constants here to make things work out + // at the end of the routine when we multiply by the square of those + // constants + long double poisson_variance = A + psi_t_count_var; + long double alpha = 0.0; + long double beta = 0.0; + long double bnb_mean = 0.0; + long double r = 0.0; + + if (dispersion < -1 || abs(dispersion) < 1) + { + // default to poisson dispersion + variance = poisson_variance; + } + else // there's some detectable overdispersion here, use mixture of negative binomials + { + if (psi_t_count_var < 1) + { + // default to regular negative binomial case. + variance = V_X_g_t; + } + else + { + r = ceil((A * A) / (B - A)); + + if (r < 0) + { + numeric_ok = false; + } + + // exact cubic + beta = solve_beta(A,B,C); + alpha = 1.0 - (A/(A-B)) * beta; + + if (beta <= 2 || alpha <= 1) + { + //printf ("Warning: beta for is %Lg\n", beta); + numeric_ok = false; + variance = V_X_g_t; + } + else + { + bnb_mean = r * beta / (alpha - 1.0); + variance = r * (alpha + r - 1.0) * beta * (alpha + beta - 1); + variance /= (alpha - 2.0) * (alpha - 1.0) * (alpha - 1.0); + } + if (variance < 0) + { + numeric_ok = false; + variance = V_X_g_t; + } + + if (variance == 0 && A != 0) + { + variance = poisson_variance; + } + + assert (!numeric_ok || variance >= poisson_variance); + assert (!numeric_ok || variance >= V_X_g_t); + + if (variance < poisson_variance) + variance = poisson_variance; + + if (variance < V_X_g_t) + variance = V_X_g_t; + + //assert (abs(FPKM - mean) < 1e-3); + } + } + + if (variance < 0) + variance = 0; + + variance = ceil(variance); + + assert (!numeric_ok || (!isinf(variance) && !isnan(variance))); + assert (!numeric_ok || variance != 0 || A == 0); + return numeric_ok; +} + +//bool estimate_group_count_variance(long double& variance, +// const vector& gammas, +// const ublas::matrix& psis, +// double X_g, +// const vector& V_X_gs, +// const vector& ls, +// double M) +//{ +// size_t len = gammas.size(); +// if (len == 1) +// return estimate_count_variance(variance, gammas.front(), 0.0, X_g, V_X_gs.front(), ls.front(), M); +// +// double total_var = 0.0; +// bool numeric_ok = true; +// for (size_t i = 0; i < len; ++i) +// { +// bool ok = true; +// long double var = 0.0; +// ok = _count_covariance; +// total_var += var; +// } +// +// double cov = 0.0; +// +// for (size_t i = 0; i < len; ++i) +// { +// for (size_t j = 0; j < len; ++j) +// { +// if (ls[i] && ls[j]) +// { +// assert(!isnan(psis(i,j))); +// double L = ls[i] * ls[j]; +// assert(!isnan(L)); +// if (L != 0.0) +// { +// double g = psis(i,j) / L; +// cov += g; +// } +// } +// } +// } +// +// double C = (1000000000.0 / M); +// C *= C; +// cov *= C; +// +// if (cov < 0) +// { +// //fprintf (stderr, "Warning: cov is negative! (cov = %lf)\n", cov); +// cov = 0; +// } +// +// assert (!numeric_ok || cov >= 0.0); +// +// variance = total_var + cov; +// assert (!isinf(variance) && !isnan(variance)); +// +// return numeric_ok; +//} + +void AbundanceGroup::estimate_count_covariance() +{ + vector gammas; + vector ls; + vector V_X_gs; + + for (size_t j = 0; j < _abundances.size(); ++j) + { + gammas.push_back(_abundances[j]->gamma()); + ls.push_back(_abundances[j]->effective_length()); + V_X_gs.push_back(_abundances[j]->mass_variance()); + } + + _count_covariance = ublas::zero_matrix(_abundances.size(), _abundances.size()); + + AbundanceStatus group_status = status(); + + if (group_status == NUMERIC_OK || group_status == NUMERIC_LOW_DATA) + { + // This will compute the transcript level cross-replicate counts + for (size_t j = 0; j < _abundances.size(); ++j) + { + if (_abundances[j]->effective_length() > 0.0 && mass_fraction() > 0) + { + assert (!isnan(_gamma_covariance(j,j))); + + long double count_var = 0.0; + + bool numerics_ok = estimate_count_variance(count_var, + _abundances[j]->gamma(), + _iterated_exp_count_covariance(j,j), + num_fragments(), + _abundances[j]->mass_variance(), + _abundances[j]->effective_length(), + num_fragments()/mass_fraction()); + if (numerics_ok == false) + { + _abundances[j]->status(NUMERIC_LOW_DATA); + } + else + { + assert (!isinf(count_var) && !isnan(count_var)); + _count_covariance(j,j) = count_var; + } + } + else + { + // nothing to do here, variances and covariances should be zero. + //assert(false); + } + } + + if (group_status == NUMERIC_LOW_DATA) + { + // if the entire group is unstable, then set LOWDATA on all members of + // it to reduce false positives in differential expression analysis. + foreach(shared_ptr ab, _abundances) + { + ab->status(NUMERIC_LOW_DATA); + } + } + + if (_abundances.size() > 1) + { + for (size_t j = 0; j < _abundances.size(); ++j) + { + double scale_j = 0.0; + double poisson_variance_j = _abundances[j]->num_fragments(); + if (poisson_variance_j == 0) + { + scale_j = 0.0; + } + else + { + + scale_j = _abundances[j]->mass_variance() / poisson_variance_j; +// if (-scale_j * _iterated_exp_count_covariance(i,j) > _abundances[j]->mass_variance()) +// scale_j = -_abundances[j]->mass_variance() / _iterated_exp_count_covariance(i,j); + } + for (size_t i = 0; i < _abundances.size(); ++i) + { + if (i != j) + { + double scale_i = 0.0; + double poisson_variance_i = _abundances[i]->num_fragments(); + if (poisson_variance_i == 0) + { + scale_i = 0.0; + } + else + { + scale_i = _abundances[i]->mass_variance() / poisson_variance_i; + } + if (scale_i != 0 && scale_j != 0) + { + double poisson_scale = sqrt(scale_j) * sqrt(scale_i); + + double before = _iterated_exp_count_covariance(i,j); + + long double scale = poisson_scale; + + assert (!isinf(scale) && !isnan(scale)); + if (scale < 1.0) + scale = 1.0; + + double after = scale * before; + //assert (after <= _abundances[i]->mass_variance() + _abundances[j]->mass_variance()); + + assert (_iterated_exp_count_covariance(i,j) <= 0); + assert (before >= after); + _count_covariance(i,j) = after; + } + else + { + _count_covariance(i,j) = 0; + } + assert (!isinf(_count_covariance(i,j)) && !isnan(_count_covariance(i,j))); + // TODO: attach per-transcript cross-replicate count variance here? + } + } + } + } + } + else + { + // if we get here, there was an EM or IS failure, and the covariances can't be reliably calculated. + // assert(false); + } + + ublas::matrix test = _count_covariance; + double ret = cholesky_factorize(test); + if (ret != 0) + { + //fprintf(stderr, "Warning: total count covariance is not positive definite!\n"); + for (size_t j = 0; j < _abundances.size(); ++j) + { + _abundances[j]->status(NUMERIC_FAIL); + } + } + +// cerr << "full count: " << endl; +// for (unsigned i = 0; i < _count_covariance.size1 (); ++ i) +// { +// ublas::matrix_row > mr (_count_covariance, i); +// cerr << i << " : " << _abundances[i]->num_fragments() << " : "; +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "======" << endl; + +// cerr << "ITERATED:" << endl; +// cerr <<_iterated_exp_count_covariance << endl; +// +// cerr << "ITERATED:" << endl; +// cerr <<_iterated_exp_count_covariance << endl; +} + +void AbundanceGroup::calculate_FPKM_covariance() +{ + if (mass_fraction() == 0 || effective_length() == 0) + { + _fpkm_covariance = ublas::zero_matrix(_abundances.size(), _abundances.size()); + return; + } + + long double M = num_fragments()/mass_fraction(); + + estimate_count_covariance(); + + long double total_var = 0.0; + long double total_count_var = 0.0; + long double total_iterated = 0.0; + + double dummy_var = 0.0; + + double abundance_weighted_length = 0.0; + double total_abundance = 0.0; + for (size_t j = 0; j < _abundances.size(); ++j) + { + abundance_weighted_length += _abundances[j]->effective_length() * _abundances[j]->FPKM(); + total_abundance += _abundances[j]->FPKM(); + + for (size_t i = 0; i < _abundances.size(); ++i) + { + _fpkm_covariance(i,j) = _count_covariance(i,j); + assert (!isinf(_count_covariance(i,j)) && !isnan(_fpkm_covariance(i,j))); + + long double length_i = _abundances[i]->effective_length(); + long double length_j = _abundances[j]->effective_length(); + assert (!isinf(length_i) && !isnan(length_i)); + assert (!isinf(length_j) && !isnan(length_j)); + if (length_i > 0 && length_j > 0) + { + _fpkm_covariance(i,j) *= + ((1000000000.0 / (length_j *M)))*((1000000000.0 / (length_i *M))); + assert (!isinf(_fpkm_covariance(i,j)) && !isnan(_fpkm_covariance(i,j))); + assert (_fpkm_covariance(i,j) <= _fpkm_covariance(i,i)+_fpkm_covariance(j,j)); + + } + else + { + _fpkm_covariance(i,j) = 0.0; + } + + if (i == j) + { + assert (_abundances[i]->FPKM() == 0 || _fpkm_covariance(i,j) > 0 || _abundances[i]->status() != NUMERIC_OK); + _abundances[i]->FPKM_variance(_fpkm_covariance(i,j)); + dummy_var += _fpkm_covariance(i,i); + } + else + { + dummy_var += _iterated_exp_count_covariance(i,j) * ((1000000000.0 / (length_j *M)))*((1000000000.0 / (length_i *M)));; + } + + total_count_var += _count_covariance(i,j); + total_var += _fpkm_covariance(i,j); + total_iterated += _iterated_exp_count_covariance(i,j); + } + } + + _FPKM_variance = total_var; + if (final_est_run && library_type != "transfrags") + { + ublas::matrix test = _fpkm_covariance; + double ret = cholesky_factorize(test); + if (ret != 0 || (_FPKM_variance < 0 && status() == NUMERIC_OK)) + { + //fprintf(stderr, "Warning: total count covariance is not positive definite!\n"); + for (size_t j = 0; j < _abundances.size(); ++j) + { + _abundances[j]->status(NUMERIC_FAIL); + } + } + + assert (FPKM() == 0 || _FPKM_variance > 0 || status() != NUMERIC_OK); + } + assert (!isinf(_FPKM_variance) && !isnan(_FPKM_variance)); +} + +void AbundanceGroup::calculate_conf_intervals() +{ + // We only really ever call this function for primary abundance groups + // (i.e. the transcript groups and read bundles with which we calculate + // transcript MLE expression levels. Genes, TSS groups, etc get broken + // off of primary bundles, so we should not call this function on those + // secondary groups. The group splitting code needs to manage the task + // of splitting up all the variout covariance matrices we're calculating + // here. + if (status() == NUMERIC_OK) + { + // This will compute the transcript level FPKM confidence intervals + for (size_t j = 0; j < _abundances.size(); ++j) + { + if (_abundances[j]->effective_length() > 0.0 && mass_fraction() > 0) + { + assert (!isnan(_gamma_covariance(j,j))); + + long double fpkm_var = _abundances[j]->FPKM_variance(); + double FPKM_hi = 0.0; + double FPKM_lo = 0.0; + if (_abundances[j]->status() != NUMERIC_FAIL) + { + FPKM_hi = _abundances[j]->FPKM() + 2 * sqrt(fpkm_var); + FPKM_lo = max(0.0, (double)(_abundances[j]->FPKM() - 2 * sqrt(fpkm_var))); + if (!(FPKM_lo <= _abundances[j]->FPKM() && _abundances[j]->FPKM() <= FPKM_hi)) + { + //fprintf(stderr, "Error: confidence intervals are illegal! var = %Lg, fpkm = %lg, lo = %lg, hi %lg, status = %d\n", fpkm_var, _abundances[j]->FPKM(), FPKM_lo, FPKM_hi, _abundances[j]->status()); + } + assert (FPKM_lo <= _abundances[j]->FPKM() && _abundances[j]->FPKM() <= FPKM_hi); + ConfidenceInterval conf(FPKM_lo, FPKM_hi); + _abundances[j]->FPKM_conf(conf); + //_abundances[j]->FPKM_variance(fpkm_var); + } + else + { + // we shouldn't be able to get here + assert(false); + // TODO: nothing to do here? + } + } + else + { + _abundances[j]->FPKM_conf(ConfidenceInterval(0.0, 0.0)); + //_abundances[j]->FPKM_variance(0.0); + } + } + + // Now build a confidence interval for the whole abundance group + double group_fpkm = FPKM(); + if (group_fpkm > 0.0) + { + double FPKM_hi = FPKM() + 2 * sqrt(FPKM_variance()); + double FPKM_lo = max(0.0, FPKM() - 2 * sqrt(FPKM_variance())); + ConfidenceInterval conf(FPKM_lo, FPKM_hi); + FPKM_conf(conf); + } + else + { + _FPKM_variance = 0.0; + ConfidenceInterval conf(0.0, 0.0); + FPKM_conf(conf); + } + } + else + { + double sum_transfrag_FPKM_hi = 0; + double max_fpkm = 0.0; + //double min_fpkm = 1e100; + foreach(shared_ptr pA, _abundances) + { + double FPKM_hi; + double FPKM_lo; + if (pA->effective_length() > 0) + { + double norm_frag_density = 1000000000; + norm_frag_density /= pA->effective_length(); + + norm_frag_density *= mass_fraction(); + double fpkm_high = norm_frag_density; + + double var_fpkm = fpkm_high; + + FPKM_hi = fpkm_high + 2 * sqrt(var_fpkm); + FPKM_lo = 0.0; + ConfidenceInterval conf(FPKM_lo, FPKM_hi); + assert (FPKM_lo <= pA->FPKM() && pA->FPKM() <= FPKM_hi); + pA->FPKM_conf(conf); + //pA->FPKM_variance(var_fpkm); + max_fpkm = max(sum_transfrag_FPKM_hi, FPKM_hi); + } + else + { + FPKM_hi = 0.0; + FPKM_lo = 0.0; + ConfidenceInterval conf(0.0, 0.0); + pA->FPKM_conf(conf); + //pA->FPKM_variance(0.0); + } + + } + // In the case of a numeric failure, the groups error bars need to be + // set such that + FPKM_conf(ConfidenceInterval(0.0, max_fpkm + 2 * sqrt(FPKM_variance()))); + } +} + + +//void AbundanceGroup::calculate_conf_intervals() +//{ +// if (status() == NUMERIC_OK) +// { +// // This will compute the transcript level FPKM confidence intervals +// for (size_t j = 0; j < _abundances.size(); ++j) +// { +// //fprintf(stderr, "%s\n", _abundances[j]->description().c_str()); +// if (_abundances[j]->effective_length() > 0.0 && mass_fraction() > 0) +// { +// assert (!isnan(_gamma_covariance(j,j))); +// +// long double fpkm_var = 0.0; +// double FPKM_hi = 0.0; +// double FPKM_lo = 0.0; +// +// bool numerics_ok = calculate_fpkm_variance(fpkm_var, +// _abundances[j]->gamma(), +// _iterated_exp_count_covariance(j,j), +// num_fragments(), +// _abundances[j]->mass_variance(), +// _abundances[j]->effective_length(), +// num_fragments()/mass_fraction()); +// if (numerics_ok == false) +// { +// _abundances[j]->status(NUMERIC_LOW_DATA); +// } +// else +// { +// double gamma_cov_j = _gamma_covariance(j,j); +// double bootstrap_j = _gamma_bootstrap_covariance(j,j); +// double bootstrap_gamma_delta = abs(bootstrap_j - gamma_cov_j); +// if (bootstrap_gamma_delta > bootstrap_delta_gap * gamma_cov_j && _abundances.size() > 1) +// { +// _abundances[j]->status(NUMERIC_LOW_DATA); +// } +// } +// +// +// if (fpkm_var < 0) +// { +// //fprintf(stderr, "Warning: FPKM variance < 0 (FPKM = %lf, FPKM variance = %Lf\n", _abundances[j]->FPKM(), fpkm_var); +// } +// +// FPKM_hi = _abundances[j]->FPKM() + 2 * sqrt(fpkm_var); +// FPKM_lo = max(0.0, (double)(_abundances[j]->FPKM() - 2 * sqrt(fpkm_var))); +// assert (!numerics_ok || FPKM_lo <= _abundances[j]->FPKM() && _abundances[j]->FPKM() <= FPKM_hi); +// ConfidenceInterval conf(FPKM_lo, FPKM_hi); +// _abundances[j]->FPKM_conf(conf); +// _abundances[j]->FPKM_variance(fpkm_var); +// } +// else +// { +// _abundances[j]->FPKM_conf(ConfidenceInterval(0.0, 0.0)); +// _abundances[j]->FPKM_variance(0.0); +// } +// } +// +// double group_fpkm = FPKM(); +// if (group_fpkm > 0.0) +// { +// calculate_FPKM_variance(); +// double FPKM_hi = FPKM() + 2 * sqrt(FPKM_variance()); +// double FPKM_lo = max(0.0, FPKM() - 2 * sqrt(FPKM_variance())); +// ConfidenceInterval conf(FPKM_lo, FPKM_hi); +// FPKM_conf(conf); +// } +// else +// { +// _FPKM_variance = 0.0; +// ConfidenceInterval conf(0.0, 0.0); +// FPKM_conf(conf); +// } +// } +// else +// { +// double sum_transfrag_FPKM_hi = 0; +// double max_fpkm = 0.0; +// //double min_fpkm = 1e100; +// foreach(shared_ptr pA, _abundances) +// { +// double FPKM_hi; +// double FPKM_lo; +// if (pA->effective_length() > 0) +// { +// double norm_frag_density = 1000000000; +// norm_frag_density /= pA->effective_length(); +// +// norm_frag_density *= mass_fraction(); +// double fpkm_high = norm_frag_density; +// +// double var_fpkm = fpkm_high; +// +// FPKM_hi = fpkm_high + 2 * sqrt(var_fpkm); +// FPKM_lo = 0.0; +// ConfidenceInterval conf(FPKM_lo, FPKM_hi); +// assert (FPKM_lo <= pA->FPKM() && pA->FPKM() <= FPKM_hi); +// pA->FPKM_conf(conf); +// pA->FPKM_variance(var_fpkm); +// max_fpkm = max(sum_transfrag_FPKM_hi, FPKM_hi); +// } +// else +// { +// FPKM_hi = 0.0; +// FPKM_lo = 0.0; +// ConfidenceInterval conf(0.0, 0.0); +// pA->FPKM_conf(conf); +// pA->FPKM_variance(0.0); +// } +// +// } +// calculate_FPKM_variance(); +// // In the case of a numeric failure, the groups error bars need to be +// // set such that +// FPKM_conf(ConfidenceInterval(0.0, max_fpkm + 2 * sqrt(FPKM_variance()))); +// +// } +//} +// +//void AbundanceGroup::calculate_FPKM_variance() +//{ +// if (mass_fraction() == 0 || effective_length() == 0) +// { +// _FPKM_variance = 0.0; +// return; +// } +// +// vector gammas; +// vector ls; +// vector V_X_gs; +// +// for (size_t j = 0; j < _abundances.size(); ++j) +// { +// gammas.push_back(_abundances[j]->gamma()); +// ls.push_back(_abundances[j]->effective_length()); +// V_X_gs.push_back(_abundances[j]->mass_variance()); +// } +// +// if (status() == NUMERIC_OK) +// { +// long double var = 0.0; +// compute_fpkm_group_variance(var, +// gammas, +// _iterated_exp_count_covariance, +// num_fragments(), +// V_X_gs, +// ls, +// num_fragments()/mass_fraction()); +// _FPKM_variance = var; +// } +// else +// { +// long double max_var = 0.0; +// for (size_t i = 0; i < _abundances.size(); ++i) +// { +// bool ok = true; +// long double var = 0.0; +// ok = compute_fpkm_variance(var, 1.0, 0.0, num_fragments(), max_mass_variance(), ls[i], num_fragments()/mass_fraction()); +// max_var = max(max_var,var); +// } +// _FPKM_variance = max_var; +// assert (_FPKM_variance != 0 || FPKM() == 0); +// } +// +// assert (!isinf(_FPKM_variance) && !isnan(_FPKM_variance)); +//} + +void AbundanceGroup::compute_cond_probs_and_effective_lengths(const vector& alignments, + vector >& transcripts, + vector >& mapped_transcripts) +{ + int N = transcripts.size(); + int M = alignments.size(); + + vector > compatibilities(N, vector(M,0)); + compute_compatibilities(transcripts, alignments, compatibilities); + + for (int j = 0; j < N; ++j) + { + shared_ptr transfrag = transcripts[j]->transfrag(); + vector& cond_probs = *(new vector(M,0)); + + BiasCorrectionHelper bch(transfrag); + + for(int i = 0 ; i < M; ++i) + { + if (compatibilities[j][i]==1) + { + total_cond_prob_calls++; + cond_probs[i] = bch.get_cond_prob(alignments[i]); + } + } + + transcripts[j]->effective_length(bch.get_effective_length()); + transcripts[j]->cond_probs(&cond_probs); + + if (bch.is_mapped()) + mapped_transcripts.push_back(transcripts[j]); + } +} + + +double trace(const ublas::matrix& m) +{ + + double t = 0.0; + for (size_t i = 0.0; i < m.size1(); ++i) + { + t += m(i,i); + } + + return t; +} + + +// FIXME: This function doesn't really need to copy the transcripts out of +// the cluster. Needs refactoring +bool AbundanceGroup::calculate_gammas(const vector& nr_alignments, + const vector& log_conv_factors, + const vector >& transcripts, + const vector >& mapped_transcripts) +{ + if (mapped_transcripts.empty()) + { + //gammas = vector(transfrags.size(), 0.0); + foreach (shared_ptr ab, _abundances) + { + ab->gamma(0); + } + _gamma_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _count_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _iterated_exp_count_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _fpkm_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _gamma_bootstrap_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + return true; + } + + vector gammas; + + verbose_msg( "Calculating intial MLE\n"); + + AbundanceStatus mle_success = gamma_mle(mapped_transcripts, + nr_alignments, + log_conv_factors, + gammas); + + verbose_msg( "Tossing likely garbage isoforms\n"); + + for (size_t i = 0; i < gammas.size(); ++i) + { + if (isnan(gammas[i])) + { + verbose_msg("Warning: isoform abundance is NaN!\n"); + } + } + + double locus_mass = 0.0; + + for (size_t i = 0; i < nr_alignments.size(); ++i) + { + const MateHit& alignment = nr_alignments[i]; + locus_mass += alignment.collapse_mass(); + } + + vector > filtered_transcripts = mapped_transcripts; + vector filtered_gammas = gammas; + filter_junk_isoforms(filtered_transcripts, filtered_gammas, mapped_transcripts, locus_mass); + + if (filtered_transcripts.empty()) + { + //gammas = vector(transfrags.size(), 0.0); + foreach (shared_ptr ab, _abundances) + { + ab->gamma(0); + } + _gamma_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _count_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _iterated_exp_count_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _fpkm_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + _gamma_bootstrap_covariance = ublas::zero_matrix(transcripts.size(), + transcripts.size()); + return true; + } + + if (filtered_transcripts.size() != mapped_transcripts.size()) + { + filtered_gammas.clear(); + + verbose_msg( "Revising MLE\n"); + + mle_success = gamma_mle(filtered_transcripts, + nr_alignments, + log_conv_factors, + filtered_gammas); + } + + for (size_t i = 0; i < filtered_gammas.size(); ++i) + { + if (isnan(filtered_gammas[i])) + { + verbose_msg("Warning: isoform abundance is NaN!\n"); + } + } + + size_t N = transcripts.size(); + + set > rg_props; + for (size_t i = 0; i < nr_alignments.size(); ++i) + { + rg_props.insert(nr_alignments[i].read_group_props()); + } + + AbundanceStatus map_success = NUMERIC_OK; + if (final_est_run) // Only on last estimation run. + { + ublas::vector gamma_mle(filtered_gammas.size()); + std::copy(filtered_gammas.begin(), filtered_gammas.end(), gamma_mle.begin()); + + ublas::vector gamma_map_estimate = ublas::zero_vector(filtered_gammas.size()); + ublas::matrix gamma_map_covariance = ublas::zero_matrix(N,N); + double cross_replicate_js = 0.0; + + ublas::matrix empir_covariance = ublas::zero_matrix(N,N); + + } + + for (size_t i = 0; i < filtered_gammas.size(); ++i) + { + if (isnan(gammas[i])) + { + verbose_msg( "Warning: isoform abundance is NaN!\n"); + map_success = NUMERIC_FAIL; + } + } + + // Now we need to fill in zeros for the isoforms we filtered out of the + // MLE/MAP calculation + vector updated_gammas = vector(N, 0.0); + + + ublas::matrix updated_gamma_cov; + updated_gamma_cov = ublas::zero_matrix(N, N); + ublas::matrix updated_gamma_bootstrap_cov; + updated_gamma_bootstrap_cov = ublas::zero_matrix(N, N); + ublas::matrix updated_count_cov; + updated_count_cov = ublas::zero_matrix(N, N); + ublas::matrix updated_iterated_exp_count_cov; + updated_iterated_exp_count_cov = ublas::zero_matrix(N, N); + ublas::matrix updated_fpkm_cov; + updated_fpkm_cov = ublas::zero_matrix(N, N); + + size_t cfs = 0; + shared_ptr curr_filtered_scaff = filtered_transcripts[cfs]->transfrag(); + StructurallyEqualScaffolds se; + vector scaff_present(N, N); + + for (size_t i = 0; i < N; ++i) + { + shared_ptr scaff_i = transcripts[i]->transfrag(); + if (cfs < filtered_transcripts.size()) + { + curr_filtered_scaff = filtered_transcripts[cfs]->transfrag(); + if (se(scaff_i, curr_filtered_scaff)) + { + scaff_present[i] = cfs; + cfs++; + } + } + } + + for (size_t i = 0; i < N; ++i) + { + if (scaff_present[i] != N) + { + // then scaffolds[i] has a non-zero abundance, we need to fill + // that in along with relevant cells from the covariance matrix + updated_gammas[i] = filtered_gammas[scaff_present[i]]; + //cerr << updated_gammas[i] << ","; + + for (size_t j = 0; j < N; ++j) + { + if (scaff_present[j] != N) + { + updated_gamma_cov(i,j) = _gamma_covariance(scaff_present[i], + scaff_present[j]); + updated_gamma_bootstrap_cov(i,j) = _gamma_bootstrap_covariance(scaff_present[i], + scaff_present[j]); + updated_iterated_exp_count_cov(i,j) = _iterated_exp_count_covariance(scaff_present[i], + scaff_present[j]); + // Should still be empty but let's do these for consistency: + updated_count_cov(i,j) = _count_covariance(scaff_present[i], + scaff_present[j]); + updated_fpkm_cov(i,j) = _fpkm_covariance(scaff_present[i], + scaff_present[j]); + assert (!isinf(updated_gamma_cov(i,j))); + assert (!isnan(updated_gamma_cov(i,j))); + } + } + } + } + + //cerr << endl; + + AbundanceStatus numeric_status = NUMERIC_OK; + if (mle_success == NUMERIC_LOW_DATA) + { + numeric_status = NUMERIC_LOW_DATA; + } + else if (mle_success == NUMERIC_FAIL) + { + numeric_status = NUMERIC_FAIL; + } + else + { + assert (mle_success == NUMERIC_OK); + if (map_success == NUMERIC_FAIL) + { + numeric_status = NUMERIC_FAIL; + } + else if (map_success == NUMERIC_LOW_DATA) + { + numeric_status = NUMERIC_LOW_DATA; + } + // otherwise, we're cool. + } + + + + // All scaffolds that go in get abundances, but those that get "filtered" + // from the calculation get zeros. + //gammas = updated_gammas; + for (size_t i = 0; i < _abundances.size(); ++i) + { + _abundances[i]->gamma(updated_gammas[i]); + _abundances[i]->status(numeric_status); + } + _gamma_covariance = updated_gamma_cov; + _count_covariance = updated_count_cov; + _iterated_exp_count_covariance = updated_iterated_exp_count_cov; + _gamma_bootstrap_covariance = updated_gamma_bootstrap_cov; + _fpkm_covariance = updated_fpkm_cov; + + return (status() == NUMERIC_OK); +} + +void AbundanceGroup::calculate_iterated_exp_count_covariance(const vector& nr_alignments, + const vector >& transcripts) +{ + // Now calculate the _iterated_exp_count_covariance matrix via iterated expectation + vector > cond_probs(transcripts.size(), vector()); + for(size_t j = 0; j < transcripts.size(); ++j) + { + cond_probs[j]= *(transcripts[j]->cond_probs()); + } + + vector u(nr_alignments.size()); + for (size_t i = 0; i < nr_alignments.size(); ++i) + { + u[i] = nr_alignments[i].collapse_mass(); + } + + ublas::matrix count_covariance = ublas::zero_matrix(transcripts.size(), transcripts.size()); + + ublas::vector total_cond_prob = ublas::zero_vector(nr_alignments.size()); + + for (size_t i = 0; i < nr_alignments.size(); ++i) + { + // the replicate gamma mles might not be available, if one of the + // replicates returned an error, we'll consider all to be unreliable + for (size_t j = 0; j < cond_probs.size(); ++j) + { + if (cond_probs[j][i] > 0) + { + total_cond_prob(i) += transcripts[j]->gamma() * cond_probs[j][i]; + assert (!isnan(total_cond_prob(i) && ! isinf(total_cond_prob(i)))); + } + } + } + + // Compute the marginal conditional probability for each fragment against each isoform + ublas::matrix marg_cond_prob = ublas::zero_matrix(transcripts.size(), nr_alignments.size()); + + for (size_t i = 0; i < nr_alignments.size(); ++i) + { + // the replicate gamma mles might not be available, if one of the + // replicates returned an error, we'll consider all to be unreliable + for (size_t j = 0; j < cond_probs.size(); ++j) + { + if (total_cond_prob(i)) + { + if (cond_probs[j][i] > 0) + { + marg_cond_prob(j,i) = (transcripts[j]->gamma() * cond_probs[j][i])/total_cond_prob(i); + } + } + } + } + + double total_var = 0.0; + + double num_salient_frags = 0.0; + //double num_unsalient_frags = 0.0; + double num_frags = 0.0; + + //iterate over fragments + for (size_t i = 0; i < marg_cond_prob.size2(); ++i) + { + num_frags += u[i]; + //cerr << u[i] << endl; + } + + ublas::vector expected_counts = ublas::zero_vector(cond_probs.size()); + + //iterate over fragments + for (size_t i = 0; i < marg_cond_prob.size2(); ++i) + { + + // iterate over transcripts + for (size_t j = 0; j < marg_cond_prob.size1(); ++j) + { + double c_j_i = marg_cond_prob(j,i); + expected_counts(j) += u[i] * marg_cond_prob(j,i); + + if (c_j_i == 0 || c_j_i == 1.0) + continue; + for (size_t k = 0; k < marg_cond_prob.size1(); ++k) + { + double c_k_i = marg_cond_prob(k,i); + if (c_k_i == 0 || c_k_i == 1.0) + continue; + + if (j == k) + { + double var = u[i] * c_k_i * (1.0 - c_k_i); + count_covariance(k,k) += var; + assert (var >= 0); + assert (!isnan(var) && !isinf(var)); + total_var += var; + } + else + { + double covar = -u[i] * c_k_i * c_j_i; + assert (covar <= 0); + assert (!isnan(covar) && !isinf(covar)); + count_covariance(k,j) += covar; + } + } + } + + } + + double total_counts = accumulate(expected_counts.begin(), expected_counts.end(), 0); + if (total_counts > 0) + { + for (size_t i = 0; i < transcripts.size(); ++i) + { + //_abundances[i]->num_fragments(expected_counts(i)); + _abundances[i]->gamma(expected_counts(i) / total_counts); + } + } + + _iterated_exp_count_covariance = count_covariance; + + // take care of little rounding errors + for (size_t i = 0; i < _iterated_exp_count_covariance.size1(); ++i) + { + for (size_t j = 0; j < _iterated_exp_count_covariance.size2(); ++j) + { + if (i == j) + { + double c = _iterated_exp_count_covariance(i,j); + if (c < 0) + _iterated_exp_count_covariance(i,j) = 0; + //assert(c >= 0); + } + else + { + double c = _iterated_exp_count_covariance(i,j); + if (c > 0) + _iterated_exp_count_covariance(i,j) = 0; + //assert(c <= 0); + } + } + } +} + +void AbundanceGroup::calculate_kappas() +{ + size_t num_members = _abundances.size(); + _kappa_covariance = ublas::matrix(num_members, + num_members); + //cerr << gamma_cov <(isos_in_tss); + + double S_FPKM = 0.0; + double Z_kappa = 0.0; + double X_S = 0.0; + foreach (shared_ptr pA, _abundances) + { + if (pA->effective_length() > 0) + { + S_FPKM += pA->FPKM(); + Z_kappa += pA->num_fragments() / pA->effective_length(); + X_S += pA->num_fragments(); + } + } + + //fprintf (stderr, "*********\n"); + foreach (shared_ptr pA, _abundances) + { + if (S_FPKM > 0) + { + pA->kappa(pA->FPKM() / S_FPKM); + double kappa = pA->kappa(); + //fprintf (stderr, "kappa = %lg\n", kappa); + //if (kappa < 0.05) + // pA->status(NUMERIC_LOW_DATA); + } + else + { + pA->kappa(0); + } + } + + for (size_t k = 0; k < num_members; ++k) + { + for (size_t m = 0; m < num_members; ++m) + { + double L = _abundances[k]->effective_length() * + _abundances[m]->effective_length(); + if (L == 0.0) + { + _kappa_covariance(k,m) = 0.0; + } + else if (m == k) + { + // Use the modeled count variance here instead + double l_t = _abundances[k]->effective_length(); + double M = num_fragments()/mass_fraction(); + double den = (1000000000.0 / (l_t * M)); + double counts = num_fragments(); + //double count_var2 = _abundances[k]->FPKM_variance() / (den*den); + double count_var = _count_covariance(k, m); + double kappa = _abundances[k]->kappa(); +// +// double kappa_var = count_var / (L * Z_kappa * Z_kappa); + double kappa_var; + if (S_FPKM) + { + kappa_var = _abundances[k]->FPKM_variance() / (S_FPKM * S_FPKM); + } + else + { + kappa_var = 0.0; + } + + assert (!isnan(kappa_var) && !isinf(kappa_var)); + _kappa_covariance(k,m) = kappa_var; + } + else + { + double kappa_covar; + if (S_FPKM) + { + kappa_covar = _fpkm_covariance(k,m) / (S_FPKM * S_FPKM); + } + else + { + kappa_covar = 0.0; + } + _kappa_covariance(k,m) = kappa_covar; + } + } + } +} + +void get_alignments_from_scaffolds(const vector >& abundances, + vector& alignments) +{ + set hits_in_gene_set; + + foreach(shared_ptr pA, abundances) + { + shared_ptr pS = pA->transfrag(); + assert (pS); + hits_in_gene_set.insert(pS->mate_hits().begin(), + pS->mate_hits().end()); + } + + for(set::iterator itr = hits_in_gene_set.begin(); + itr != hits_in_gene_set.end(); + ++itr) + { + alignments.push_back(**itr); + } + + sort(alignments.begin(), alignments.end(), mate_hit_lt); +} + +void round(vector & p) { + + double KILLP = 0; // kill all probabilities below this + + for (vector::iterator i = p.begin(); i != p.end(); ++i) { + if ((*i) < KILLP) + *i = 0; + } +} + +void Estep (int N, + int M, + vector const & p, + vector >& U, + const vector >& cond_probs, + const vector& u) { + // given p, fills U with expected frequencies + int i,j; + + vector frag_prob_sums(M, 0.0); + + for (j = 0; j < N; ++j) + { + for (i = 0; i < M; ++i) + { + frag_prob_sums [i] += cond_probs[j][i] * p[j]; + } + } + + for (i = 0; i < M; ++i) + { + frag_prob_sums[i] = frag_prob_sums[i] ? (1.0 / frag_prob_sums[i]) : 0.0; + } + + for (j = 0; j < N; ++j) + { + for (i = 0; i < M; ++i) + { + double ProbY = frag_prob_sums[i]; + double exp_i_j = u[i] * cond_probs[j][i] * p[j] * ProbY; + U[j][i] = exp_i_j; + } + } +} + + +void Mstep (int N, int M, vector & p, vector > const & U) { + vector v(N,0); + double m = 0; + int i,j; + + //#pragma omp parallel for + for (j = 0; j < N; ++j) { + //cout << "." << v[j] << ".\n"; + for (i = 0; i < M; ++i) { + // cout << U[i][j] << " \n"; + v[j] += U[j][i]; + } + m += v[j]; + } + + if (m) + { + for (j = 0; j < N; ++j) { + p[j] = v[j] / m; + } + } + else + { + for (j = 0; j < N; ++j) + { + p[j] = 0.0; + } + } +} + + +double logLike (int N, + int M, + vector & p, + const vector >& cond_prob, + const vector& u, + const vector& log_conv_factors) { + int i,j; + + double ell = accumulate(log_conv_factors.begin(), log_conv_factors.end(), 0.0); + double Prob_Y; + for (i= 0; i < M; i++) { + Prob_Y = 0; + for (j= 0; j < N; j++) { + Prob_Y += cond_prob[j][i] * p[j]; + } + if (Prob_Y > 0) { + ell += (u[i] * log(Prob_Y)); + } + } + return ell; +} + +void grad_ascent_step (int N, + int M, + vector const & p, + vector >& U, + const vector >& cond_probs, + const vector& u, + vector& newP, + double& epsilon) +{ + // given p, fills U with expected frequencies + //int i,j; + + vector dLL_dj(N, 0.0); + + for (size_t i = 0; i < M; ++i) + { + double denom = 0.0; + for (size_t j = 0; j < N; ++j) + { + denom += p[j] * cond_probs[j][i]; + } + + for (size_t j = 0; j < N; ++j) + { + if (denom > 0) + { + dLL_dj[j] += u[i] * cond_probs[j][i] / denom; + } + } + } + + for (size_t j = 0; j < N; ++j) + { + newP[j] = p[j] + epsilon * dLL_dj[j]; + } + + double m = accumulate(newP.begin(), newP.end(), 0.0); + if (m > 0) + { + for (int j = 0; j < N; ++j) { + newP[j] = newP[j] / m; + } + } + else + { + return; + } +} + +double grad_ascent (int N, int M, vector & newP, + const vector >& cond_prob, + vector const & u, + vector const & log_conv_factors, + bool& converged) +{ + converged = true; + double sum = 0; + double newEll = 0; + vector p(N,0); + vector > U(N, vector(M,0)); + double ell = 0; + int iter = 0; + int j; + + for (j = 0; j < N; ++j) { + p[j] = drand48(); + sum += p[j]; + } + for (j = 0; j < N; ++j) { + p[j] = p[j] / sum; + } + + ell = logLike(N, M, p, cond_prob, u, log_conv_factors); + + double epsilon = 1e-5; + + static const double ACCURACY = 1e-6; // convergence criteria + + while (iter <= 2 || iter < max_mle_iterations) + { + grad_ascent_step(N, M, p, U, cond_prob, u, newP, epsilon); + + newEll = logLike(N, M, newP, cond_prob,u, log_conv_factors); + + double delta = newEll - ell; + //fprintf (stderr, "%g\n", delta); + if (delta > 0) + { + //round(newP); + p = newP; + ell = newEll; + if (abs(delta) < ACCURACY) + { + break; + } + } + else + { + //verbose_msg("Reducing EPSILON \n"); + epsilon /= 10; + } + iter++; + } + if (iter == max_mle_iterations) + { + verbose_msg("Warning: ITERMAX reached in abundance estimation, estimation hasn't fully converged\n"); + converged = false; + } + verbose_msg("Convergence reached in %d iterations \n", iter); + return newEll; + +} + +double EM (int N, int M, vector & newP, + const vector >& cond_prob, + vector const & u, + vector const & log_conv_factors, + bool& converged, + vector* p_hint) +{ + converged = true; + //double sum = 0; + double newEll = 0; + vector p(N,0); + vector > U(N, vector(M,0)); + double ell = 0; + int iter = 0; + int j; + + if (p_hint == NULL) + { + for (j = 0; j < N; ++j) { + //p[j] = drand48(); + //sum += p[j]; + p[j] = 1.0/(double)N; + } + } + else + { + assert (p_hint->size() == N); + p = *p_hint; + } + +// for (j = 0; j < N; ++j) { +// p[j] = p[j] / sum; +// } + + //#ifdef DEBUG +// for (j = 0; j < N; ++j) { +// cout << p[j] << " "; +// } +// cout << endl; + //#endif + +// static const double ACCURACY = 1e-6; // convergence for EM + static const double ACCURACY = mle_accuracy; // convergence for EM + + while (((iter <= 2) || (abs(ell - newEll) > ACCURACY)) && (iter < max_mle_iterations)) { + if (iter > 0) { + round(newP); + p = newP; + ell = newEll; + } + + Estep(N, M, p, U, cond_prob, u); // fills U + Mstep(N, M, newP,U); // fills p + + newEll = logLike(N, M, newP, cond_prob,u, log_conv_factors); + + //fprintf(stderr, "%d\t%lf\n", iter, newEll); + + //printf("%.3f %.3f %.3f ", newP[0], newP[1], newP[2]); + //printf("%.3f %.3f %.3f ", newP[3], newP[4], newP[5]); + //printf("%.3f %.3f %.3f\n", newP[6], newP[7], newP[8]); + iter++; + } + if (iter >= max_mle_iterations) + { + verbose_msg("Warning: ITERMAX reached in abundance estimation, estimation hasn't fully converged\n"); + converged = false; + } + verbose_msg("Convergence reached in %d iterations \n", iter); + return newEll; +} + +void compute_fisher(const vector >& transcripts, + const ublas::vector& abundances, + const vector& alignments, + const vector& u, + boost::numeric::ublas::matrix& fisher) +{ + int M = alignments.size(); + int N = transcripts.size(); + + vector denoms(M, 0.0); + vector > P(M,vector(N,0)); + + for (int j = 0; j < N; ++j) + { + const vector& cond_probs_j = *(transcripts[j]->cond_probs()); + for (int x = 0; x < M; ++x) + { + if (cond_probs_j[x]==0) + continue; + long double alpha = 0.0; + alpha = cond_probs_j[x]; + alpha *= abundances(j); + denoms[x] += alpha; + } + } + + for (int x = 0; x < M; ++x) + denoms[x] *= denoms[x]; + + + for (int j = 0; j < N; ++j) + { + const vector& cond_probs_j = *(transcripts[j]->cond_probs()); + for (int k = 0; k < N; ++k) + { + + const vector& cond_probs_k = *(transcripts[k]->cond_probs()); + + for (int x = 0; x < M; ++x) + { + if (cond_probs_j[x]==0 && cond_probs_k[x]==0) + continue; + + assert(denoms[x] != 0.0); + + double fisher_x_j_k = cond_probs_j[x] * cond_probs_k[x] / denoms[x]; + + fisher(j,k) += u[x] * fisher_x_j_k; + } + } + } +} + +void compute_sample_weights(const ublas::matrix& proposed_cov, + const vector >& cond_probs, + const vector >& samples, + const vector& u, + const vector& log_conv_factors, + double scale, + const ublas::vector& MLE, + vector >& weighted_samples, + vector >& sample_weights) +{ + if (cond_probs.empty()) + return; + + int M = cond_probs.front().size(); + int N = cond_probs.size(); + + //cerr << "Cov^-1"< sample(samples[i].begin(), samples[i].end()); + + //cerr << "s: "< diff = (samples[i] - MLE); + //cerr << "diff: "< diff_transpose = ublas::trans(diff); + //cerr << "diff^T" << diff_transpose << endl; + ublas::vector P = prod(proposed_cov, diff); + //cerr << "Prod: "<< P << endl; + double X = inner_prod(diff_transpose,P); + + //cerr << diff_transpose << " "<< P << " " << X << endl; + + double sample_prob = exp(-0.5 * X) / scale; + + if (sample_prob == 0.0) + { + // fprintf(stderr, "Error: sample_prob == 0, %lf after rounding. \n", X); + // cerr << "diff: "< >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_covariance, + std::map, ublas::vector >& mles_for_read_groups) +{ + size_t N = transcripts.size(); + size_t M = nr_alignments.size(); + + set > rg_props; + std::vector > mle_gammas; + for (size_t i = 0; i < M; ++i) + { + rg_props.insert(nr_alignments[i].read_group_props()); + } + + vector rep_hit_counts; + + for(set >::iterator itr = rg_props.begin(); + itr != rg_props.end(); + ++itr) + { + vector rep_hits; + vector rep_log_conv_factors; + rep_hit_counts.push_back(0); + for (size_t i = 0; i < M; ++i) + { + rep_hits.push_back(nr_alignments[i]); + rep_log_conv_factors.push_back(log_conv_factors[i]); + + if (nr_alignments[i].read_group_props() != *itr) + { + rep_hits.back().collapse_mass(0); + rep_log_conv_factors[rep_log_conv_factors.size() - 1] = 0; + } + rep_hit_counts[rep_hit_counts.size() - 1] += rep_hits.back().collapse_mass(); + } + + //fprintf(stderr,"Replicate # %lu has %lu fragments \n", mle_gammas.size(), rep_hits.size()); + vector rep_gammas(0.0, transcripts.size()); + + AbundanceStatus mle_success = gamma_mle(transcripts, + rep_hits, + rep_log_conv_factors, + rep_gammas); + if (mle_success == NUMERIC_OK) + { + ublas::vector mle = ublas::zero_vector(N); + for(size_t i = 0; i < N; ++i) + { + mle(i) = rep_gammas[i]; + } + cerr << mle << endl; + mle_gammas.push_back(mle); + mles_for_read_groups[*itr] = mle; + } + else + { + // if one replicate fails, let's just not trust any of them + mles_for_read_groups.clear(); + return mle_success; + } + } + +// cerr << "***" << endl; + gamma_covariance = ublas::zero_matrix(N,N); + ublas::vector expected_mle_gamma = ublas::zero_vector(N); +// + foreach(ublas::vector& mle, mle_gammas) + { + expected_mle_gamma += mle; + } + expected_mle_gamma /= mle_gammas.size(); +// +// ublas::vector expected_counts = ublas::zero_vector(N); +// +// for (size_t i = 0; i < mle_gammas.size(); ++i) +// { +// ublas::vector& mle = mle_gammas[i]; +// expected_counts += mle * rep_hit_counts[i]; +// } +// expected_counts /= mle_gammas.size(); +// + for (size_t i = 0; i < N; ++i) + { + for (size_t j = 0; j < N; ++j) + { + for (size_t k = 0 ; k < mle_gammas.size(); ++k) + { + double c = (mle_gammas[k](i) - expected_mle_gamma(i)) * (mle_gammas[k](j) - expected_mle_gamma(j)); + gamma_covariance(i,j) += c; + } + } + } + + gamma_covariance /= mle_gammas.size(); +// +// ublas::matrix count_covariance = ublas::zero_matrix(N,N); +// for (size_t k = 0 ; k < mle_gammas.size(); ++k) +// { +// ublas::vector& mle = mle_gammas[k]; +// ublas::vector counts = mle * rep_hit_counts[k]; +// +// for (size_t i = 0; i < N; ++i) +// { +// for (size_t j = 0; j < N; ++j) +// { +// double c = (counts(i) - expected_counts(i)) * (counts(j) - expected_counts(j)); +// count_covariance(i,j) += c; +// } +// } +// } +// +// count_covariance /= mle_gammas.size(); + +// cerr << "count mean: " << endl; +// cerr << expected_counts << endl; +// cerr << "count covariance: " << endl; +// for (unsigned i = 0; i < count_covariance.size1 (); ++ i) +// { +// ublas::matrix_row > mr (count_covariance, i); +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "======" << endl; + + gamma_map_estimate = expected_mle_gamma; + +// cerr << "MLE: " << expected_mle_gamma << endl; +// cerr << "COV:" << endl; +// cerr << gamma_covariance << endl; + //cerr << "*************" << endl; + return NUMERIC_OK; +} + +AbundanceStatus calculate_inverse_fisher(const vector >& transcripts, + const vector& alignments, + const ublas::vector& gamma_mean, + ublas::matrix& inverse_fisher) +{ +// size_t N = gamma_covariance.size1(); + +// gamma_map_covariance = ublas::zero_matrix(N); + + typedef ublas::matrix matrix_type; + matrix_type fisher = ublas::zero_matrix(gamma_mean.size(),gamma_mean.size()); + + vector u(alignments.size()); + for (size_t i = 0; i < alignments.size(); ++i) + { + u[i] = alignments[i].collapse_mass(); + } + + compute_fisher(transcripts, + gamma_mean, + alignments, + u, + fisher); + + ublas::matrix epsilon = ublas::zero_matrix(gamma_mean.size(),gamma_mean.size()); + for (size_t i = 0; i < gamma_mean.size(); ++i) + { + epsilon(i,i) = 1e-6; + } + + fisher += epsilon; // modify matrix to avoid problems during inverse + + ublas::matrix fisher_chol = fisher; + + double ch = cholesky_factorize(fisher_chol); + if (ch != 0.0) + { + verbose_msg("Warning: Fisher matrix is not positive definite (bad element: %lg)\n", ch); + return NUMERIC_FAIL; + } + + inverse_fisher = ublas::zero_matrix(gamma_mean.size(),gamma_mean.size()); + bool invertible = chol_invert_matrix(fisher_chol, inverse_fisher); + + ublas::matrix test_fisher = inverse_fisher; + ch = cholesky_factorize(test_fisher); + if (ch != 0.0 || !invertible) + { + verbose_msg("Warning: Fisher matrix is not inverible\n", ch); + return NUMERIC_FAIL; + } + + return NUMERIC_OK; +} + +AbundanceStatus bayesian_gammas(const vector >& transcripts, + const vector& alignments, + const vector& log_conv_factors, + const ublas::vector& gamma_mle, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance) +{ + + ublas::matrix inverse_fisher; + + // Calculate the mean gamma MLE and covariance matrix across replicates, so + // we can use it as the proposal distribution for importance sampling. This will + // make the Bayesian prior more conservative than using the inverse of the + // Fisher Information matrix on the mixed likelihood function. + AbundanceStatus fisher_status = calculate_inverse_fisher(transcripts, + alignments, + gamma_mle, + inverse_fisher); + + + double trace = 0.0; + for (size_t i = 0; i < gamma_mle.size(); ++i) + { + trace += inverse_fisher(i,i); + } + + ublas::matrix proposal = inverse_fisher; + +#if 1 + proposal += ublas::identity_matrix(gamma_mle.size()) * (trace / 10.0); + proposal *= 10.0; +#endif + + if (fisher_status != NUMERIC_OK) + return fisher_status; + + AbundanceStatus map_status = map_estimation(transcripts, + alignments, + log_conv_factors, + gamma_mle, + proposal, + gamma_map_estimate, + gamma_map_covariance); + + return map_status; +} + +AbundanceStatus bayesian_gammas_exact(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + const ublas::vector& gamma_mle, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance) +{ + + ublas::matrix inverse_fisher; + + // Calculate the mean gamma MLE and covariance matrix across replicates, so + // we can use it as the proposal distribution for importance sampling. This will + // make the Bayesian prior more conservative than using the inverse of the + // Fisher Information matrix on the mixed likelihood function. + AbundanceStatus fisher_status = calculate_inverse_fisher(transcripts, + nr_alignments, + gamma_mle, + inverse_fisher); + + + double trace = 0.0; + for (size_t i = 0; i < gamma_mle.size(); ++i) + { + trace += inverse_fisher(i,i); + } + + ublas::matrix proposal = inverse_fisher; + +#if 1 + proposal += ublas::identity_matrix(gamma_mle.size()) * (trace / 10.0); + proposal *= 4.0; +#endif + + if (fisher_status != NUMERIC_OK) + return fisher_status; + + AbundanceStatus map_status = map_estimation(transcripts, + nr_alignments, + log_conv_factors, + gamma_mle, + proposal, + gamma_map_estimate, + gamma_map_covariance); + return map_status; +} + + +AbundanceStatus bootstrap_gamma_mle(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_covariance, + double& cross_replicate_js) +{ + size_t N = transcripts.size(); + size_t M = nr_alignments.size(); + + if (N == 1) + { + gamma_map_estimate = ublas::vector(1); + gamma_map_estimate(0) = 1.0; + gamma_covariance = ublas::matrix(1,1); + gamma_covariance(0,0) = 0.0; + return NUMERIC_OK; + } + + vector alignments = nr_alignments; + vector scaled_masses; + vector unscaled_masses; + double num_uncollapsed_frags = 0.0; + for (size_t i = 0; i < M; ++i) + { + double uncollapsed_mass = alignments[i].collapse_mass() / alignments[i].common_scale_mass(); + num_uncollapsed_frags += (uncollapsed_mass); + scaled_masses.push_back(alignments[i].collapse_mass()); + unscaled_masses.push_back(uncollapsed_mass); + alignments[i].collapse_mass(uncollapsed_mass); + } + + // FIXME: this has already been computed above, so just pass it in. + vector orig_gammas(0.0, transcripts.size()); + gamma_mle(transcripts, + nr_alignments, + log_conv_factors, + orig_gammas, + false); + + std::vector > mle_gammas; + + boost::uniform_int<> uniform_dist(0,num_uncollapsed_frags-1); + boost::mt19937 rng; + boost::variate_generator > uniform_gen(rng, uniform_dist); + + int num_sample_frags = floor(num_uncollapsed_frags * bootstrap_fraction); + + if (num_sample_frags <= 0) + { + return NUMERIC_FAIL; + } + + for (size_t i = 0; i < num_bootstrap_samples; ++i) + { + vector sample_idxs; + for (size_t j = 0; j < num_sample_frags; ++j) + { + sample_idxs.push_back(uniform_gen()); + } + sort (sample_idxs.begin(), sample_idxs.end()); + assert (sample_idxs.empty() == false); + + size_t curr_sample = 0; + size_t processed_hits = 0; + vector adjusted_masses(alignments.size(), 0); + for (size_t j = 0; j < alignments.size(); ++j) + { + int adjusted_mass = 0.0; + while (curr_sample < sample_idxs.size() && + sample_idxs[curr_sample] >= processed_hits && + sample_idxs[curr_sample] < processed_hits + alignments[j].collapse_mass()) + { + adjusted_mass++; + curr_sample++; + } + processed_hits += alignments[j].collapse_mass(); + alignments[j].collapse_mass(adjusted_mass); + adjusted_masses[j] = adjusted_mass; + } + + for (size_t j = 0; j < alignments.size(); ++j) + { + alignments[j].collapse_mass(alignments[j].collapse_mass() * alignments[j].common_scale_mass()); + } + + vector bs_gammas(0.0, transcripts.size()); + + AbundanceStatus mle_success = gamma_mle(transcripts, + alignments, + log_conv_factors, + bs_gammas, + false, + &orig_gammas); + if (mle_success == NUMERIC_OK) + { + ublas::vector mle = ublas::zero_vector(N); + for(size_t j = 0; j < N; ++j) + { + mle(j) = bs_gammas[j]; + } + mle_gammas.push_back(mle); + } + + + + for (size_t j = 0; j < alignments.size(); ++j) + { + alignments[j].collapse_mass(unscaled_masses[j]); + } + } + + //fprintf(stderr, "Ran %lu bootstrap samples succesfully\n", mle_gammas.size()); + + if (mle_gammas.empty()) + return NUMERIC_FAIL; + + gamma_covariance = ublas::zero_matrix(N,N); + ublas::vector expected_mle_gamma = ublas::zero_vector(N); + + foreach(ublas::vector& mle, mle_gammas) + { + //cerr << "MLE # "<< MLENUM++ << endl; + //cerr << mle << endl; + expected_mle_gamma += mle; + } + expected_mle_gamma /= mle_gammas.size(); + + for (size_t i = 0; i < N; ++i) + { + for (size_t j = 0; j < N; ++j) + { + for (size_t k = 0 ; k < mle_gammas.size(); ++k) + { + double c = (mle_gammas[k](i) - expected_mle_gamma(i)) * (mle_gammas[k](j) - expected_mle_gamma(j)); + gamma_covariance(i,j) += c; + } + } + } + + gamma_covariance /= mle_gammas.size(); + gamma_map_estimate = expected_mle_gamma; + + //cerr << "MLE: " << expected_mle_gamma << endl; + //cerr << "COV:" << endl; + //cerr << gamma_covariance << endl; + //cerr << "*************" << endl; + return NUMERIC_OK; +} + +AbundanceStatus bootstrap_gammas(const vector >& transcripts, + const vector& alignments, + const vector& log_conv_factors, + ublas::vector& gamma_estimate, + ublas::matrix& gamma_covariance, + double& cross_replicate_js) +{ + ublas::vector empirical_gamma_mle = gamma_estimate; + ublas::matrix empirical_gamma_covariance = gamma_covariance; + + // Calculate the mean gamma MLE and covariance matrix across replicates, so + // we can use it as the proposal distribution for importance sampling. This will + // make the Bayesian prior more conservative than using the inverse of the + // Fisher Information matrix on the mixed likelihood function. + AbundanceStatus empirical_mle_status = bootstrap_gamma_mle(transcripts, + alignments, + log_conv_factors, + empirical_gamma_mle, + empirical_gamma_covariance, + cross_replicate_js); + + if (empirical_mle_status != NUMERIC_OK) + return empirical_mle_status; + + gamma_estimate = empirical_gamma_mle; + gamma_covariance = empirical_gamma_covariance; + + + + + + return NUMERIC_OK; +} + +AbundanceStatus empirical_replicate_gammas(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_estimate, + ublas::matrix& gamma_covariance, + std::map, ublas::vector >& mles_for_read_groups) +{ + ublas::vector empirical_gamma_mle = gamma_estimate; + ublas::matrix empirical_gamma_covariance = gamma_covariance; + + // Calculate the mean gamma MLE and covariance matrix across replicates, so + // we can use it as the proposal distribution for importance sampling. This will + // make the Bayesian prior more conservative than using the inverse of the + // Fisher Information matrix on the mixed likelihood function. + AbundanceStatus empirical_mle_status = empirical_mean_replicate_gamma_mle(transcripts, + nr_alignments, + log_conv_factors, + empirical_gamma_mle, + empirical_gamma_covariance, + mles_for_read_groups); + + + if (empirical_mle_status != NUMERIC_OK) + return empirical_mle_status; + + gamma_estimate = empirical_gamma_mle; + gamma_covariance = empirical_gamma_covariance; + +#if 0 +// // Perform a bayesian estimation to improve the gamma estimate and their covariances +// ublas::matrix epsilon = ublas::zero_matrix(empirical_gamma_mle.size(),empirical_gamma_mle.size()); +// for (size_t i = 0; i < empirical_gamma_mle.size(); ++i) +// { +// epsilon(i,i) = 1e-6; +// } +// +// empirical_gamma_covariance += epsilon; + + AbundanceStatus map_status = map_estimation(transcripts, + nr_alignments, + log_conv_factors, + empirical_gamma_mle, + empirical_gamma_covariance, + gamma_estimate, + gamma_covariance); + if (map_status != NUMERIC_OK) + return map_status; +#endif + return NUMERIC_OK; +} + +AbundanceStatus revise_map_mean_and_cov_estimate(double log_total_weight, + const ublas::vector& expectation, + const vector >& sample_weights, + const vector >& weighted_samples, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance) +{ + int N = expectation.size(); + + // revise gamma by setting it to the posterior expectation computed via the + // importance sampling + gamma_map_estimate = expectation; + + // calculate the sample - mean vectors, store them in log space + vector > sample_expectation_diffs; + + ublas::vector check_expectation = ublas::zero_vector(expectation.size()); + + for (size_t j = 0; j < weighted_samples.size(); ++j) + { + ublas::vector sample = weighted_samples[j]; + double log_sample_weight = sample_weights[j].second; + + for (size_t e = 0; e < expectation.size(); ++e) + { + // sample is already log transformed after it was weighted, so we + // need to divide by the sample weight to recover the original sample + // value, then undo the log transform, then subtract the mean from it + sample(e) = (exp(((long double)sample(e) - log_sample_weight)) - expectation(e)); + //sample(e) *= exp((log_sample_weight - log_total_weight)); + } + //cerr << sample << endl; + sample_expectation_diffs.push_back(sample); + } + + // We want to revise the covariance matrix from the samples, since we'll + // need it later for the CIs. + ublas::matrix revised_cov = ublas::zero_matrix(N,N); + + // accumulate the contributions from the other samples (doing one cell of + // covariance matrix per outer (i x j) loop iteration. + + for (size_t j = 0; j < sample_expectation_diffs.size(); ++j) + { + double log_sample_weight = sample_weights[j].second; + double w = exp((log_sample_weight - log_total_weight)); + ublas::vector sample = weighted_samples[j]; + + for (size_t e = 0; e < expectation.size(); ++e) + { + // sample is already log transformed after it was weighted, so we + // need to divide by the sample weight to recover the original sample + // value, then undo the log transform, then subtract the mean from it + sample(e) = exp(sample(e) - log_sample_weight); + //sample(e) *= exp((log_sample_weight - log_total_weight)); + } + + revised_cov += w * (outer_prod(sample,sample)); + } + + revised_cov -= outer_prod(expectation,expectation); + + //cerr << "Revised COV" << endl; + //cerr << revised_cov << endl; + gamma_map_covariance = revised_cov; + + //cerr << "Revised MAP estimate: " << expectation << endl; + //cerr << "Revised Covariance matrix:" << endl; + //cerr << gamma_map_covariance << endl; + //cerr << "*************" << endl; + + return NUMERIC_OK; +} + +AbundanceStatus calc_is_scale_factor(const ublas::matrix& covariance_chol, + double& is_scale_factor) +{ + double det = determinant(covariance_chol); + is_scale_factor = pow(2.0*boost::math::constants::pi(), covariance_chol.size1()/2.0); + double s = sqrt(det); + is_scale_factor *= s; + + //assert (det); + if (s == 0.0) + { + verbose_msg("Error: sqrt(det(cov)) == 0, %lf after rounding. \n", det); + //cerr << covariance << endl; + return NUMERIC_FAIL; + } + assert (s); + assert (is_scale_factor); + return NUMERIC_OK; +} + +AbundanceStatus map_estimation(const vector >& transcripts, + const vector& alignments, + const vector& log_conv_factors, + const ublas::vector& proposal_gamma_mean, + const ublas::matrix& proposal_gamma_covariance, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance) +{ + ublas::matrix covariance_chol = proposal_gamma_covariance; + ublas::matrix inv_cov = covariance_chol; + double ch = cholesky_factorize(covariance_chol); + + if (ch != 0.0) + { + verbose_msg("Warning: Covariance matrix is not positive definite (bad element: %lg)\n", ch); + return NUMERIC_FAIL; + } + + bool invertible = chol_invert_matrix(covariance_chol, inv_cov); + + if (!invertible) + { + verbose_msg("Warning: Covariance matrix is not invertible\n"); + return NUMERIC_FAIL; + } + + //cerr << "Cholesky decomposed proposal covariance" << endl; + //cerr << covariance_chol << endl; + + multinormal_generator generator(proposal_gamma_mean, covariance_chol); + vector > samples; + + generate_importance_samples(generator, samples, num_importance_samples, false); + + if (samples.size() < 100) + { + verbose_msg("Warning: not-enough samples for MAP re-estimation\n"); + return NUMERIC_FAIL; + } + + double is_scale_factor = 0.0; + + // Calculate the scaling factor for correcting the proposal distribution bias + // during importance sampling + AbundanceStatus scale_status = calc_is_scale_factor(covariance_chol, is_scale_factor); + + if (scale_status == NUMERIC_FAIL) + { + return NUMERIC_FAIL; + } + + vector > sample_weights; + + ublas::vector expectation(transcripts.size()); + vector > weighted_samples; + + vector > cond_probs(transcripts.size(), vector()); + for(size_t j = 0; j < transcripts.size(); ++j) + { + cond_probs[j]= *(transcripts[j]->cond_probs()); + } + + vector u(alignments.size()); + for (size_t i = 0; i < alignments.size(); ++i) + { + u[i] = alignments[i].collapse_mass(); + } + + compute_sample_weights(proposal_gamma_covariance, + cond_probs, + samples, + u, + log_conv_factors, + is_scale_factor, + proposal_gamma_mean, + weighted_samples, + sample_weights); + + long double log_total_weight = 0.0; + + AbundanceStatus expectation_ok = compute_posterior_expectation(weighted_samples, + sample_weights, + expectation, + log_total_weight); + if (expectation_ok != NUMERIC_OK) + { + return expectation_ok; + } + + revise_map_mean_and_cov_estimate(log_total_weight, + expectation, + sample_weights, + weighted_samples, + gamma_map_estimate, + gamma_map_covariance); + + return NUMERIC_OK; +} + +template +bool is_identifiable(M &m, PM &pm) +{ + using namespace ublas; + typedef M matrix_type; + typedef typename M::size_type size_type; + typedef typename M::value_type value_type; + + int singular = 0; + size_type size1 = m.size1 (); + size_type size2 = m.size2 (); + size_type size = (std::min) (size1, size2); + for (size_type i = 0; i < size; ++ i) { + matrix_column mci (column (m, i)); + matrix_row mri (row (m, i)); + size_type i_norm_inf = i + index_norm_inf (project (mci, range (i, size1))); + if (m (i_norm_inf, i) != value_type/*zero*/()) { + if (i_norm_inf != i) { + pm (i) = i_norm_inf; + row (m, i_norm_inf).swap (mri); + } else { + //BOOST_UBLAS_CHECK (pm (i) == i_norm_inf, external_logic ()); + } + project (mci, range (i + 1, size1)) *= value_type (1) / m (i, i); + } else if (singular == 0) { + singular = i + 1; + } + project (m, range (i + 1, size1), range (i + 1, size2)).minus_assign (outer_prod (project (mci, range (i + 1, size1)), + project (mri, range (i + 1, size2)))); + } + return singular == 0; +} + +AbundanceStatus gamma_mle(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + vector& gammas, + bool check_identifiability, + vector* p_hint) +{ + gammas.clear(); + if (transcripts.empty()) + return NUMERIC_OK; + + //long double bundle_mass_fraction = bundle_mass / (long double) map_mass; + if (transcripts.size() == 1) + { + gammas.push_back(1.0); + return NUMERIC_OK; + } + + size_t M = nr_alignments.size(); + size_t N = transcripts.size(); + + bool converged = true; + bool identifiable = true; + + if (M > 0) + { + + //vector > saliencies (M,vector(N,0)); + + + //compute_saliencies(cond_probs, saliencies, saliency_weight); + + vector prob(N,0); + + double logL; + + + vector > cond_probs(N, vector()); + for (size_t j = 0; j < N; ++j) + { + cond_probs[j] = *(transcripts[j]->cond_probs()); + } + + if (check_identifiability) + { + ublas::matrix compat = ublas::zero_matrix(M,N); + + for (size_t j = 0; j < N; ++j) + { + for (size_t i = 0; i < M; ++i) + { + if (cond_probs[j][i]) + { + //compat(i,j) = cond_probs[j][i]; + compat(i,j) = 1; + } + } + } + + vector transcripts_with_frags; + for (size_t j = 0; j < N; ++j) + { + bool has_fragment = false; + for (size_t i = 0; i < M; ++i) + { + if (compat(i,j)) + { + has_fragment = true; + break; + } + } + if (has_fragment) + transcripts_with_frags.push_back(j); + } + ublas::matrix reduced_compat = ublas::zero_matrix(M,transcripts_with_frags.size()); + for (size_t j = 0; j < transcripts_with_frags.size(); ++j) + { + column(reduced_compat, j) = column(compat, transcripts_with_frags[j]); + } + + + typedef ublas::permutation_matrix pmatrix; + + // create a permutation matrix for the LU-factorization + pmatrix pm(reduced_compat.size1()); + + // cerr << compat.size2() <,pmatrix>(reduced_compat,pm); + } + + vector u(M); + for (size_t i = 0; i < M; ++i) + { + u[i] = nr_alignments[i].collapse_mass(); + } + + if (use_em) + { + logL = EM(N, M, prob, cond_probs, u, log_conv_factors, converged, p_hint); + } + else + { + logL = grad_ascent(N, M, prob, cond_probs, u, log_conv_factors, converged); + } + + gammas = prob; + + for (size_t i = 0; i < gammas.size(); ++i) + { + if (isnan(gammas[i]) || isinf(gammas[i])) + { + return NUMERIC_FAIL; + } + } + } + else + { + gammas = vector(N, 0.0); + } + + double round_err = 0.0; + double num_good = 0; + foreach (double& g, gammas) + { + if (g < min_isoform_fraction) + { + round_err += g; + g = 0.0; + } + else + { + num_good += 1; + } + } + foreach (double& g, gammas) + { + if (g != 0) + { + g += (round_err/num_good); + } + } + + if (converged && identifiable) + return NUMERIC_OK; + else + { + if (!identifiable) + //return NUMERIC_LOW_DATA; + return NUMERIC_OK; + else + return NUMERIC_FAIL; + } + + return NUMERIC_OK; +} + +void calc_isoform_fpkm_conf_intervals(double FPKM, + double variance, + ConfidenceInterval& FPKM_conf) +{ + double FPKM_lo = 0.0; + double FPKM_hi = 0.0; + FPKM_hi = FPKM + 2 * sqrt(variance); + FPKM_lo = max(0.0, FPKM - 2 * sqrt(variance)); + FPKM_conf = ConfidenceInterval(FPKM_lo, FPKM_hi); +} + +bool not_intronic(int p, vector& depth_of_coverage, vector& intronic_cov, float min_intra_intron_fraction, + int& intronic_status) { + bool not_an_intron = (intronic_cov[p]==0 || + depth_of_coverage[p]/intronic_cov[p] >= min_intra_intron_fraction); + if (not_an_intron) intronic_status--; + else intronic_status++; + return not_an_intron; +} + + +double compute_doc(int bundle_origin, + const vector& scaffolds, + vector& depth_of_coverage, + map, float>& intron_depth_of_coverage, + bool exclude_intra_intron, + vector* intronic_cov, + vector* scaff_intronic_status) +{ + vector i_status; + if (scaff_intronic_status==NULL) + scaff_intronic_status=&i_status; + *scaff_intronic_status = vector(scaffolds.size(), 0); + vector intronic; + if (intronic_cov==NULL) + intronic_cov=&intronic; + *intronic_cov = vector(depth_of_coverage.size(), 0); + //vector intronic(depth_of_coverage.size(), false); + depth_of_coverage = vector(depth_of_coverage.size(), 0); + + set hits_in_gene_set; + for (size_t i = 0; i < scaffolds.size(); ++i) + { + hits_in_gene_set.insert(scaffolds[i].mate_hits().begin(), + scaffolds[i].mate_hits().end()); + } + + vector hits; + + for(set::iterator itr = hits_in_gene_set.begin(); + itr != hits_in_gene_set.end(); + ++itr) + { + hits.push_back(Scaffold(**itr)); + hits.back().fpkm((**itr).mass()); + } + + /* + //no need for this here, we do it below with depth_of_coverage + for (size_t i = 0; i < hits.size(); ++i) + { + const vector& aug_ops = hits[i].augmented_ops(); + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_INTRON) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + intronic[K - bundle_origin] = true; + } + } + } + } + */ + for (size_t i = 0; i < hits.size(); ++i) + { + const vector& aug_ops = hits[i].augmented_ops(); + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_MATCH) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + depth_of_coverage[K - bundle_origin] += hits[i].fpkm(); + } + } + else if (op.opcode == CUFF_INTRON) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + (*intronic_cov)[K - bundle_origin] += hits[i].fpkm(); + //intronic[K - bundle_origin] = true; + } + + pair,float>::iterator, bool> is = intron_depth_of_coverage.insert(make_pair(make_pair(op.g_left(), op.g_right()), 0)); + is.first->second += hits[i].fpkm(); + } + } + } + + vector knockout(depth_of_coverage); + + double total_doc = 0; + int total_len = 0; + float min_intra_intron_fraction = min(pre_mrna_fraction, min_isoform_fraction); + //for (size_t i = 0; i < hits.size(); ++i) + for (size_t i = 0; i < scaffolds.size(); ++i) + { + //const vector& aug_ops = hits[i].augmented_ops(); + const vector& aug_ops = scaffolds[i].augmented_ops(); + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_MATCH) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + //if (!exclude_intra_intron || !intronic[K - bundle_origin]) + if (!exclude_intra_intron || + not_intronic(K-bundle_origin, depth_of_coverage, *intronic_cov, min_intra_intron_fraction, + (*scaff_intronic_status)[i]) ) + { + total_doc += knockout[K - bundle_origin]; + total_len += (knockout[K - bundle_origin] != 0); + knockout[K - bundle_origin] = 0; + } + } + } + } + } + + return total_doc/(double)total_len; +} + +double major_isoform_intron_doc(map, float>& intron_doc) +{ + double major_isoform_intron_doc = 0; + int num_major_introns = 0; + for(map, float>::const_iterator itr = intron_doc.begin(); + itr != intron_doc.end(); + ++itr) + { + bool heaviest = true; + + for (map, float>::const_iterator itr2 = intron_doc.begin(); + itr2 != intron_doc.end(); + ++itr2) + { + if (itr != itr2 && + itr->second < itr2->second && + overlap_in_genome(itr->first.first, + itr->first.second, + itr2->first.first, + itr2->first.second)) + { + heaviest = false; + break; + } + } + + if (heaviest) + { + major_isoform_intron_doc += itr->second; + num_major_introns++; + } + } + if (num_major_introns) + { + return major_isoform_intron_doc / num_major_introns; + } + else + { + return 0.0; + } +} + +void record_min_doc_for_scaffolds(int bundle_origin, + const vector& hits, + const vector& depth_of_coverage, + const map, float>& intron_depth_of_coverage, + vector& scaff_doc) +{ + for (size_t h = 0; h < hits.size(); ++h) + { + double doc = 99999999.0; + if (hits[h].has_intron()) + doc = get_intron_doc(hits[h], intron_depth_of_coverage); + + doc = min(doc, get_scaffold_min_doc(bundle_origin, + hits[h], + depth_of_coverage)); + scaff_doc.push_back(doc); + } +} + +void record_doc_for_scaffolds(int bundle_origin, + const vector& hits, + const vector& depth_of_coverage, + vector& scaff_doc) +{ + for (size_t h = 0; h < hits.size(); ++h) + { + double doc; + doc = get_scaffold_doc(bundle_origin, + hits[h], + depth_of_coverage); + scaff_doc.push_back(doc); + } +} + +void record_doc_for_scaffolds(int bundle_origin, + const vector& hits, + const vector& depth_of_coverage, + const map, float>& intron_depth_of_coverage, + vector& scaff_doc) +{ + for (size_t h = 0; h < hits.size(); ++h) + { + double doc; + if (hits[h].has_intron()) + doc = get_intron_doc(hits[h], intron_depth_of_coverage); + else + doc = get_scaffold_doc(bundle_origin, + hits[h], + depth_of_coverage); + scaff_doc.push_back(doc); + } +} + +double get_intron_doc(const Scaffold& s, + const map, float >& intron_depth_of_coverage) +{ + const vector& aug_ops = s.augmented_ops(); + int num_introns = 0; + double doc = 0; + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_INTRON) + { + num_introns++; + pair op_intron(op.g_left(), op.g_right()); + map, float >::const_iterator itr = intron_depth_of_coverage.find(op_intron); + // assert (itr != intron_depth_of_coverage.end()); + if (itr == intron_depth_of_coverage.end()) + { + map, float >::const_iterator zi; + for (zi = intron_depth_of_coverage.begin(); + zi != intron_depth_of_coverage.end(); + ++zi) + { + verbose_msg( "Warning: intron not within scaffold ([%d-%d], %d)\n", zi->first.first, zi->first.second, zi->second); + } + } + + doc += itr->second; + } + } + return doc / (double)num_introns; +} + +double get_scaffold_doc(int bundle_origin, + const Scaffold& s, + const vector& depth_of_coverage) +{ + const vector& aug_ops = s.augmented_ops(); + int m_len = 0; + double doc = 0; + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_MATCH) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + m_len++; + doc += depth_of_coverage[K - bundle_origin]; + } + } + } + + return doc/(double)m_len; +} + +double get_scaffold_min_doc(int bundle_origin, + const Scaffold& s, + const vector& depth_of_coverage) +{ + const vector& aug_ops = s.augmented_ops(); + float min_doc = 99999999; + + for (size_t j = 0; j < aug_ops.size(); ++j) + { + const AugmentedCuffOp& op = aug_ops[j]; + if (op.opcode == CUFF_MATCH) + { + for (int K = op.g_left(); K < op.g_right(); ++K) + { + if (min_doc > depth_of_coverage[K - bundle_origin]) + min_doc = depth_of_coverage[K - bundle_origin]; + } + } + } + + return min_doc; +} diff --git a/src/abundances.h b/src/abundances.h new file mode 100644 index 0000000..ff30b77 --- /dev/null +++ b/src/abundances.h @@ -0,0 +1,567 @@ +#ifndef ABUNDANCES_H +#define ABUNDANCES_H +/* + * abundances.h + * cufflinks + * + * Created by Cole Trapnell on 4/27/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include "hits.h" +#include "scaffolds.h" +#include "bundles.h" +#include "biascorrection.h" + +namespace ublas = boost::numeric::ublas; + +struct ConfidenceInterval +{ + ConfidenceInterval(double Low = 0.0, double High = 0.0) + : low(Low), high(High) {} + double low; + double high; +}; + +enum AbundanceStatus { NUMERIC_OK, NUMERIC_FAIL, NUMERIC_LOW_DATA, NUMERIC_HI_DATA }; + +class Abundance +{ +public: + virtual ~Abundance() {} + + // Status of the numerical calculation performed on this object. Safe to + // do testing only if status == NUMERIC_OK + virtual AbundanceStatus status() const = 0; + virtual void status(AbundanceStatus s) = 0; + + // Fragments Per Kilbase of transcript per Million fragments mapped + virtual double FPKM() const = 0; + virtual void FPKM(double fpkm) = 0; + virtual double FPKM_variance() const = 0; + virtual void FPKM_variance(double v) = 0; + + virtual ConfidenceInterval FPKM_conf() const = 0; + virtual void FPKM_conf(const ConfidenceInterval& cf) = 0; + + // gamma is a fixed property of each transcript or transcript group. It's + // the probability that one would draw a fragment from this object, scaled + // to an arbitrary locus' worth of fragments. + virtual double gamma() const = 0; + virtual void gamma(double g) = 0; + + // Kappa is only really meaningful when this Abundance record is part of a + // group - it's the relative abundance of this object within the larger + // group. + virtual double kappa() const = 0; + virtual void kappa(double k) = 0; + + virtual double num_fragments() const = 0; + virtual void num_fragments(double nf) = 0; + + virtual double mass_fraction() const = 0; + virtual void mass_fraction(double mf) = 0; + + virtual double mass_variance() const = 0; + virtual void mass_variance(double mv) = 0; + + virtual double effective_length() const= 0; + virtual void effective_length(double el) = 0; + + virtual const vector* cond_probs() const { return NULL; } + virtual void cond_probs(vector* cp) = 0; + + // The structural information for the object, if defined. + virtual shared_ptr transfrag() const { return shared_ptr(); } + + + virtual set gene_id() const = 0; + virtual set gene_name() const = 0; + virtual set tss_id() const = 0; + virtual set protein_id() const = 0; + + virtual const string& description() const = 0; + virtual void description(const string& d) = 0; + + virtual const string& locus_tag() const = 0; + virtual void locus_tag(const string& L) = 0; + + virtual const string& reference_tag() const = 0; + virtual void reference_tag(const string& r) = 0; +}; + +class TranscriptAbundance : public Abundance +{ +public: + + TranscriptAbundance() : + _status(NUMERIC_OK), + _transfrag(shared_ptr()), + _FPKM(0), + _FPKM_variance(0), + _gamma(0), + _kappa(1.0), + _num_fragments(0), + _eff_len(0), + _cond_probs(NULL), + _sample_mass_fraction(0.0), + _sample_mass_variance(0.0){} + + TranscriptAbundance(const TranscriptAbundance& other) + { + _status = other._status; + _transfrag = other._transfrag; + _FPKM = other._FPKM; + _FPKM_conf = other._FPKM_conf; + _gamma = other._gamma; + _num_fragments = other._num_fragments; + _eff_len = other._eff_len; + _cond_probs = other._cond_probs; + _sample_mass_fraction = other._sample_mass_fraction; + _sample_mass_variance = other._sample_mass_variance; + } + + ~TranscriptAbundance() + { + if (_cond_probs != NULL) + { + delete _cond_probs; + _cond_probs = NULL; + } + } + + AbundanceStatus status() const { return _status; } + void status(AbundanceStatus s) { _status = s; } + + double FPKM() const { return _FPKM; } + void FPKM(double fpkm) + { + _FPKM = fpkm; + _transfrag->fpkm(fpkm); + } + double FPKM_variance() const { return _FPKM_variance; } + void FPKM_variance(double v); + + ConfidenceInterval FPKM_conf() const { return _FPKM_conf; } + void FPKM_conf(const ConfidenceInterval& cf) { _FPKM_conf = cf; } + + double gamma() const { return _gamma; } + void gamma(double g) { assert(!isnan(g)); _gamma = g; }; + + double kappa() const { return _kappa; } + void kappa(double k) { _kappa = k; } + + double num_fragments() const { return _num_fragments; } + void num_fragments(double nf) { assert (!isnan(nf)); _num_fragments = nf; } + + double mass_fraction() const { return _sample_mass_fraction; } + void mass_fraction(double mf) { _sample_mass_fraction = mf; } + + double mass_variance() const { return _sample_mass_variance; } + void mass_variance(double mv) { _sample_mass_variance = mv; } + + void transfrag(shared_ptr tf) { _transfrag = tf; } + shared_ptr transfrag() const { return _transfrag; } + + double effective_length() const { return _eff_len; } + void effective_length(double el) { _eff_len = el; } + + const vector* cond_probs() const { return _cond_probs; } + void cond_probs(vector* cp) + { + if(_cond_probs != NULL) { delete _cond_probs; }; + _cond_probs = cp; + } + + + set gene_id() const + { + if (_transfrag) + { + set s; + s.insert(_transfrag->annotated_gene_id()); + return s; + } + else + { + assert (false); + return set(); + } + } + + set gene_name() const + { + if (_transfrag) + { + set s; + s.insert(_transfrag->annotated_gene_name()); + return s; + } + else + { + assert (false); + return set(); + } + } + + set tss_id() const + { + if (_transfrag) + { + set s; + s.insert(_transfrag->annotated_tss_id()); + return s; + } + else + { + assert (false); + return set(); + } + } + + set protein_id() const + { + if (_transfrag) + { + set s; + s.insert(_transfrag->annotated_protein_id()); + return s; + } + else + { + assert (false); + return set(); + } + } + + virtual const string& description() const { return _description; } + virtual void description(const string& d) { _description = d; } + + virtual const string& locus_tag() const { return _locus_tag; } + virtual void locus_tag(const string& L) { _locus_tag = L; } + + virtual const string& reference_tag() const { return _ref_tag; } + virtual void reference_tag(const string& r) { _ref_tag = r; } + +private: + + void calculate_FPKM_err_bar(double variance); + + AbundanceStatus _status; + shared_ptr _transfrag; + double _FPKM; + double _FPKM_variance; + ConfidenceInterval _FPKM_conf; + double _gamma; + double _kappa; + double _num_fragments; + double _eff_len; + vector* _cond_probs; + + string _description; + string _locus_tag; + string _ref_tag; + + long double _sample_mass_fraction; + long double _sample_mass_variance; +}; + +class AbundanceGroup : public Abundance +{ +public: + AbundanceGroup() : _kappa(1.0), _FPKM_variance(0.0), _max_mass_variance(0.0), _salient_frags(0.0), _total_frags(0.0) {} + + AbundanceGroup(const AbundanceGroup& other) + { + _abundances = other._abundances; + _iterated_exp_count_covariance = other._iterated_exp_count_covariance; + _count_covariance = other._count_covariance; + _fpkm_covariance = other._fpkm_covariance; + _gamma_covariance = other._gamma_covariance; + _gamma_bootstrap_covariance = other._gamma_bootstrap_covariance; + _FPKM_conf = other._FPKM_conf; + _kappa = other._kappa; + _kappa_covariance = other._kappa_covariance; + _FPKM_variance = other._FPKM_variance; + _description = other._description; + _max_mass_variance = other._max_mass_variance; + _salient_frags = other._salient_frags; + _total_frags = other._total_frags; + _read_group_props = other._read_group_props; + } + + AbundanceGroup(const vector >& abundances) : + _abundances(abundances), + _iterated_exp_count_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _count_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _fpkm_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _gamma_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _gamma_bootstrap_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _kappa_covariance(ublas::zero_matrix(abundances.size(), abundances.size())), + _kappa(1.0), + _FPKM_variance(0.0), + _max_mass_variance(0.0), + _salient_frags(0.0), + _total_frags(0.0) {} + + AbundanceGroup(const vector >& abundances, + const ublas::matrix& gamma_covariance, + const ublas::matrix& gamma_bootstrap_covariance, + const ublas::matrix& iterated_exp_count_covariance, + const ublas::matrix& count_covariance, + const ublas::matrix& fpkm_covariance, + const long double max_mass_variance, + const std::set >& rg_props); + + AbundanceStatus status() const; + void status(AbundanceStatus s) { } + bool has_member_with_status(AbundanceStatus member_status); + + double FPKM() const; + void FPKM(double fpkm) { } + + double FPKM_variance() const { return _FPKM_variance; } + void FPKM_variance(double v) { } + + ConfidenceInterval FPKM_conf() const { return _FPKM_conf; } + + + double gamma() const; + void gamma(double g) { }; + + double kappa() const { return _kappa; } + void kappa(double k) { _kappa = k; } + + double num_fragments() const; + void num_fragments(double nf) { } + + double mass_fraction() const; + void mass_fraction(double mf) { } + + double mass_variance() const; + void mass_variance(double mf) { } + + set gene_id() const; + set gene_name() const; + set tss_id() const; + set protein_id() const; + + virtual const string& description() const { return _description; } + virtual void description(const string& d) { _description = d; } + + virtual const string& locus_tag() const; + virtual void locus_tag(const string& L) { } + + virtual const string& reference_tag() const; + virtual void reference_tag(const string& r) { } + + double effective_length() const; + + //DUMMY FUNCTIONS + void effective_length(double ef) {} + void cond_probs(vector* cp) {} + + + void filter_group(const vector& to_keep, + AbundanceGroup& filtered_group) const; + + void get_transfrags(vector >& transfrags) const; + + vector >& abundances() { return _abundances; } + const vector >& abundances() const { return _abundances; } + + const ublas::matrix& gamma_cov() const { return _gamma_covariance; } + + const ublas::matrix& gamma_bootstrap_cov() const { return _gamma_bootstrap_covariance; } + + const ublas::matrix& iterated_count_cov() const { return _iterated_exp_count_covariance; } + + const ublas::matrix& count_cov() const { return _count_covariance; } + + const ublas::matrix& kappa_cov() const { return _kappa_covariance; } + + const ublas::matrix& fpkm_cov() const { return _kappa_covariance; } + + + void calculate_abundance(const vector& alignments); + + void max_mass_variance(double mmv) { _max_mass_variance = mmv; } + double max_mass_variance() const { return _max_mass_variance; } + + double salient_frags() const { return _salient_frags; } + void salient_frags(double nf) { _salient_frags = nf; } + + double total_frags() const { return _total_frags; } + void total_frags(double nf) { _total_frags = nf; } + + const std::set >& rg_props() const { return _read_group_props; } + +private: + + void FPKM_conf(const ConfidenceInterval& cf) { _FPKM_conf = cf; } + + bool calculate_gammas(const vector& nr_alignments, + const vector& log_conv_factors, + const vector >& transcripts, + const vector >& mapped_transcripts); + void calculate_FPKM_covariance(); + void estimate_count_covariance(); + void calculate_conf_intervals(); + void calculate_locus_scaled_mass_and_variance(const vector& nr_alignments, + const vector >& transcripts); + void calculate_iterated_exp_count_covariance(const vector& nr_alignments, + const vector >& transcripts); + void calculate_kappas(); + + + void update_multi_reads(const vector& alignments, vector > transcripts); + + + void compute_cond_probs_and_effective_lengths(const vector& alignments, + vector >& transcripts, + vector >& mapped_transcripts); + + void update_transcript_expression(double locus_mass, double locus_mass_fraction); + + + + //void collect_read_group_props(); + + vector > _abundances; + + // _count_covariance is the final count covariance matrix. It's includes our estimates + // of transcript-level biological variability on counts + ublas::matrix _count_covariance; + + // _iterated_exp_count_covariance is the ITERATED EXPECTATION count covariance matrix. It's not the + // estimated count covariance matrix (i.e. it doesn't include biological variability from + // the fitted model. + ublas::matrix _iterated_exp_count_covariance; + ublas::matrix _fpkm_covariance; + ublas::matrix _gamma_covariance; + ublas::matrix _gamma_bootstrap_covariance; + + ConfidenceInterval _FPKM_conf; + + ublas::matrix _kappa_covariance; + double _kappa; + double _FPKM_variance; + string _description; + double _max_mass_variance; // upper bound on the count variance that could come from this group. + double _salient_frags; + double _total_frags; + + std::set > _read_group_props; + //std::map, ublas::vector > _mles_for_read_groups; +}; + +void compute_compatibilities(vector >& transcripts, + const vector& alignments, + vector >& compatibilities); + +void get_alignments_from_scaffolds(const vector >& abundances, + vector& alignments); + +AbundanceStatus empirical_mean_replicate_gamma_mle(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_covariance, + std::map, ublas::vector >& mles_for_read_groups); + +AbundanceStatus empirical_replicate_gammas(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance, + std::map, ublas::vector >& mles_for_read_groups); + +AbundanceStatus bootstrap_gamma_mle(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_covariance, + double& cross_replicate_js); + +AbundanceStatus bootstrap_gammas(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance, + double& cross_replicate_js); + +AbundanceStatus bayesian_gammas(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + const ublas::vector& gamma_mle, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance); + +AbundanceStatus map_estimation(const vector >& transcripts, + const vector& alignments, + const vector& log_conv_factors, + const ublas::vector& proposal_gamma_mean, + const ublas::matrix& proposal_gamma_covariance, + ublas::vector& gamma_map_estimate, + ublas::matrix& gamma_map_covariance); + +AbundanceStatus gamma_mle(const vector >& transcripts, + const vector& nr_alignments, + const vector& log_conv_factors, + vector& gammas, + bool check_identifiability = true, + vector* p_hint = NULL); + +double compute_doc(int bundle_origin, + const vector& scaffolds, + vector& depth_of_coverage, + map, float>& intron_depth_of_coverage, + bool exclude_intra_intron=false, + vector* intronic_cov=NULL, + vector* scaff_intronic_status=NULL); + +double major_isoform_intron_doc(map, float>& intron_doc); + +void record_doc_for_scaffolds(int bundle_origin, + const std::vector& hits, + const std::vector& depth_of_coverage, + const std::map, float>& intron_depth_of_coverage, + std::vector& scaff_doc); + +void record_doc_for_scaffolds(int bundle_origin, + const std::vector& hits, + const std::vector& depth_of_coverage, + std::vector& scaff_doc); + +void record_min_doc_for_scaffolds(int bundle_origin, + const std::vector& hits, + const std::vector& depth_of_coverage, + const std::map, float>& intron_depth_of_coverage, + std::vector& scaff_doc); + + +double get_intron_doc(const Scaffold& s, + const map, float>& intron_depth_of_coverage); + +double get_scaffold_doc(int bundle_origin, + const Scaffold& s, + const vector& depth_of_coverage); + +double get_scaffold_min_doc(int bundle_origin, + const Scaffold& s, + const vector& depth_of_coverage); + +AbundanceStatus calculate_inverse_fisher(const vector >& transcripts, + const vector& alignments, + const ublas::vector& gamma_mean, + ublas::matrix& inverse_fisher); +#endif diff --git a/src/assemble.cpp b/src/assemble.cpp new file mode 100644 index 0000000..d0e69f3 --- /dev/null +++ b/src/assemble.cpp @@ -0,0 +1,568 @@ +/* + * assemble.cpp + * cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include + +//#include +#include + +#include +// DON'T move this, or mystery compiler errors will result. Affects gcc >= 4.1 +#include + +#include + +#include +//#include + +#include + +#if (BOOST_VERSION < 103800) +#include +#else +#include +#endif + +#include // for normal_distribution +using boost::math::normal; // typedef provides default type is double. + +#include "transitive_closure.h" +#include "transitive_reduction.h" + +#include "common.h" +#include "assemble.h" + +#include "bundles.h" +//#include "filters.h" +//#include "genes.h" +#include "scaffolds.h" +#include "clustering.h" +#include "matching_merge.h" +#include "graph_optimize.h" + +using namespace boost; +using namespace std; + +bool mate_graphs(const HitBundle& bundle, BundleStats* stats); + +typedef lemon::SmartBpUGraph ReachGraph; + +long long weight_of_merge(Scaffold& lhs, + Scaffold& rhs, + double source_psi, + double target_psi) +{ + //double expected_cov_diff = max(1.0, 0.1 * (source_doc + target_doc)); + //normal cov_norm(0, expected_cov_diff); + + normal cov_test_norm(0, 1.0); + + double score = 0.0; + + // HACK: This early breakout prevents spliced reads that cross exactly one + // intron from being matched up if they both cross the same intron. + // Otherwise, we get phasing problems, as introns aren't matched up long + // distance. Ugh.. + if (Scaffold::overlap_in_genome(lhs, rhs, 0)) + { + bool lh_intron = lhs.has_intron(); + bool rh_intron = rhs.has_intron(); + + if (lh_intron && rh_intron) + { + vector > lh_gaps = lhs.gaps(); + vector > rh_gaps = rhs.gaps(); + if (lh_gaps.size() == 1 && lh_gaps == rh_gaps) + return 999999999; + } + } + + if (source_psi > 0 && target_psi > 0 ) + { + double test_stat = log(1.0 - abs(source_psi - target_psi)); + score = test_stat; + assert (score <= 0.0); + } + else + { + return 999999999; + } + + assert (score <= 0.0); + if (score >= -1e-6) + score = -1e-6; + int weight = (int)(score * -1e6); + return weight; +} + + +typedef map > DagToBp; +void create_reachability_bp_graph(DAG& dag, + ReachGraph& reach_graph, + vector b_to_a, + DagToBp& dag_to_bp, + const adjacency_list<>& TC, + const vector& scaffold_mask) +{ + //typedef graph_traits::vertex_descriptor Vertex; + HitsForNodeMap hits_for_node = get(vertex_name, dag); + + //fprintf (stdout, "\tclosure edges:\t\t\t\%d\n", num_edges(TC)); + graph_traits < adjacency_list<> >::vertex_iterator v, vend; + + b_to_a.resize(num_vertices(TC)); + + for (tie(v, vend) = vertices(TC); v != vend; ++v) + { + DagToBp::iterator itr = dag_to_bp.find(*v); + if (itr == dag_to_bp.end()) + { + ReachGraph::ANode A = reach_graph.addANode(); + int a = reach_graph.aNodeId(A); + + ReachGraph::BNode B = reach_graph.addBNode(); + int b = reach_graph.bNodeId(B); + b_to_a[b] = A; + dag_to_bp[*v] = make_pair(a, b); + + } + } + + reach_graph.reserveEdge(num_edges(TC)); + reach_graph.reserveANode(num_vertices(TC)); + reach_graph.reserveBNode(num_vertices(TC)); + + graph_traits < adjacency_list<> >::edge_iterator i, end; + for (tie(i, end) = edges(TC); i != end; ++i) + { + int a_id = -1; + int b_id = -1; + DAGNode s = source(*i, TC); + DAGNode t = target(*i, TC); + + DagToBp::iterator itr = dag_to_bp.find(s); + if (itr == dag_to_bp.end()) + { + assert (false); + } + else + { + a_id = itr->second.first; + } + + itr = dag_to_bp.find(t); + + if (itr == dag_to_bp.end()) + { + assert(false); + } + else + { + b_id = itr->second.second; + } + + if (in_degree(s, dag) == 0 /* virtual "source"? */|| + out_degree(t, dag) == 0 /* virtual "sink"?*/) + continue; + + assert (a_id != -1); + assert (b_id != -1); + + ReachGraph::ANode a_node = reach_graph.nodeFromANodeId(a_id); + ReachGraph::BNode b_node = reach_graph.nodeFromBNodeId(b_id); + ReachGraph::ANode a_for_b = b_to_a[b_id]; + + assert (a_for_b != a_node); + + if (scaffold_mask[a_id] && + scaffold_mask[b_id]) + { + reach_graph.addEdge(a_node, + b_node); + } + } +} + +void add_weights_to_reachability_bp_graph(ReachGraph& bp, + const HitsForNodeMap& hits_for_node, + const vector& hits, + const vector& scaffolds, + ReachGraph::UEdgeMap& weights) +{ + // number of reads of reads spliced in to each scaffold + vector spliced_in(scaffolds.size(), 0.0); + + // number spliced in / length of scaffold + vector density(scaffolds.size(), 0.0); + + // number of reads spliced in to scaffold + those that just overlap it + //vector overlapping(scaffolds.size(), 0.0); + + for (size_t i = 0; i < scaffolds.size(); ++i) + { + for (size_t j = 0; j < hits.size(); ++j) + { + if (!hits[j].is_ref() && scaffolds[i].contains(hits[j])) + { + if (Scaffold::compatible(scaffolds[i],hits[j])) + { + spliced_in[i]++; + } + } + } + density[i] = spliced_in[i] / scaffolds[i].length(); + } + + // percent spliced in = density / (total density of overlapping scaffolds) + vector psi(scaffolds.size(), 0.0); + vector local_density(scaffolds.size(), 0.0); + for (size_t i = 0; i < scaffolds.size(); ++i) + { + double total_density = 0.0; + int num_overlaps = 0; + double compatible_density = 0.0; + for (size_t j = 0; j < scaffolds.size(); ++j) + { + if (Scaffold::overlap_in_genome(scaffolds[i],scaffolds[j], 0)) + { + total_density += density[j]; + num_overlaps++; + if (Scaffold::compatible(scaffolds[i], scaffolds[j])) + { + compatible_density += density[j]; + } + } + } + if (total_density) + psi[i] = compatible_density / total_density; + local_density[i] = compatible_density; + } + + for (ReachGraph::UEdgeIt i(bp); i!=lemon::INVALID; ++i) + { + ReachGraph::ANode a = bp.source(i); + ReachGraph::BNode b = bp.target(i); + DAGNode a_dag = bp.aNodeId(a); + int a_id_for_b = bp.aNodeId(b); + ReachGraph::ANode a_for_b = bp.nodeFromANodeId(a_id_for_b); + assert (a_for_b != lemon::INVALID); + DAGNode b_dag = a_id_for_b; + + Scaffold* a_scaff = hits_for_node[a_dag]; + Scaffold* b_scaff = hits_for_node[b_dag]; + + size_t aidx = a_scaff - &scaffolds[0]; + size_t bidx = b_scaff - &scaffolds[0]; + + double a_psi = psi[aidx]; + + double b_psi = psi[bidx]; + + + //assert (a_psi != 1.0); + //assert (b_psi != 1.0); + + long long weight = weight_of_merge(*a_scaff, + *b_scaff, + a_psi, + b_psi); + + if (weight < 0) + weight = 10000000; + + //fprintf(stderr, "Similarity between %d, %d = %.20lf\n", bp.aNodeId(a), a_id_for_b, weight); + weights[i] = weight; + } +} + +void holdout_transitivity_hazards(vector& hits, + vector& hazards) +{ + vector > introns; + for (size_t i = 0; i < hits.size(); ++i) + { + const vector& ops = hits[i].augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + const AugmentedCuffOp& op = ops[j]; + if (op.opcode == CUFF_INTRON) + introns.push_back(make_pair(op.g_left(), op.g_right())); + } + } + + sort(introns.begin(), introns.end()); + vector >::iterator new_end = unique(introns.begin(), + introns.end()); + introns.erase(new_end, introns.end()); + + vector > evil_introns; + for (size_t i = 0; i < introns.size(); ++i) + { + for (size_t j = i + 1; j < introns.size(); ++j) + { + if (overlap_in_genome(introns[i].first, introns[i].second, + introns[j].first, introns[j].second)) + { + evil_introns.push_back(introns[i]); + evil_introns.push_back(introns[j]); + } + } + } + + sort(evil_introns.begin(), evil_introns.end()); + new_end = unique(evil_introns.begin(), evil_introns.end()); + evil_introns.erase(new_end, evil_introns.end()); + + vector filtered_hits; + for (size_t i = 0; i < hits.size(); ++i) + { + bool overlaps_evil_intron = false; + const vector& ops = hits[i].augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + const AugmentedCuffOp& op = ops[j]; + if (op.opcode == CUFF_UNKNOWN) + { + for (size_t k = 0; k < evil_introns.size(); ++k) + { + + if (overlap_in_genome(op.g_left(), op.g_right(), + evil_introns[k].first, evil_introns[k].second)) + { + overlaps_evil_intron = true; + } + } + } + } + if (overlaps_evil_intron) + { +// if (hits[i].has_intron()) +// { +// fprintf(stderr, "&&& Holding out intron-containing hazard at %d-%d\n", hits[i].left(), hits[i].right()); +// } + hazards.push_back(hits[i]); + } + else + { + filtered_hits.push_back(hits[i]); + } + } + + verbose_msg( "%s\tHeld out %lu scaffolds as transitivity hazards\n", bundle_label->c_str(), hazards.size()); + + hits = filtered_hits; +} + +bool make_scaffolds(int bundle_left, + int bundle_length, + vector& hits, + vector& scaffolds) +{ + if (hits.empty()) + return true; + + bool intron_hits = false; + for (size_t i = 0; i < hits.size(); ++i) + { + if (hits[i].has_intron()) + { + intron_hits = true; + break; + } + } + + if (!intron_hits) + { + verbose_msg( "%s\tNo introns in bundle, collapsing all hits to single transcript\n", bundle_label->c_str()); + scaffolds.push_back(Scaffold(hits)); + fill_gaps(scaffolds, 2 * olap_radius); + } + else + { + verbose_msg( "%s\tBundle has spliced reads\n", bundle_label->c_str()); + + vector hazards; + holdout_transitivity_hazards(hits, hazards); + + vector split_hazards; + // Cleave the partials at their unknowns to minimize FPKM dilation on + // the low end of the expression profile. + for (size_t i = 0; i < hazards.size(); ++i) + { + vector c; + hazards[i].get_complete_subscaffolds(c); + split_hazards.insert(split_hazards.end(), c.begin(), c.end()); + } + + vector orig_hits = hits; + + hits.insert(hits.end(), split_hazards.begin(), split_hazards.end()); + + + compress_fragments(hits); + + verbose_msg( "%s\tAssembling bundle with %lu hits\n", bundle_label->c_str(), hits.size()); + + vector depth_of_coverage(bundle_length,0); + map, float> intron_depth_of_coverage; + compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_depth_of_coverage, + false); + + normal norm(0, 0.1); + + vector prev_chaff; + while (true) + { + static size_t MAX_BUNDLE_ALIGNMENTS = 0xFFFF; + + sort(hits.begin(), hits.end(), scaff_lt); + + //fprintf(stderr, "\tCurrent bundle has %d non-constitutive fragments\n", hits.size()); + + DAG bundle_dag; + + if (hits.empty()) + return true; + verbose_msg( "%s\tCalculating scaffold densities\n", bundle_label->c_str()); + vector scaff_doc; + record_doc_for_scaffolds(bundle_left, + hits, + depth_of_coverage, + intron_depth_of_coverage, + scaff_doc); + verbose_msg( "%s\tCreating compatibility graph\n", bundle_label->c_str()); + + if (!create_overlap_dag(hits, bundle_dag)) + { + break; + } + + HitsForNodeMap hits_for_node = get(vertex_name, bundle_dag); + + compress_overlap_dag_paths(bundle_dag, hits); + + if (hits.size() >= MAX_BUNDLE_ALIGNMENTS) + { + verbose_msg( "%s\tWarning: bundle too large, skipping assembly\n", bundle_label->c_str()); + return false; + } + + pair terminal = add_terminal_nodes(bundle_dag); + DAGNode source = terminal.first; + DAGNode sink = terminal.second; + + ReachGraph bp; + + verbose_msg( "%s\tConstructing reachability graph\n", bundle_label->c_str()); + + vector b_to_a; + adjacency_list<> TC; + + transitive_closure(bundle_dag, TC); + DagToBp dag_to_bp; + + // TODO: deprecate dependence of create_reachability_bp_graph() on scaffold_mask + vector scaffold_mask(num_vertices(bundle_dag), true); + + create_reachability_bp_graph(bundle_dag, bp, b_to_a, dag_to_bp, TC, scaffold_mask); + + ReachGraph::UEdgeMap cov_weights(bp); + add_weights_to_reachability_bp_graph(bp, hits_for_node, orig_hits, hits, cov_weights); + + verbose_msg( "%s\tPerforming weighted matching\n", bundle_label->c_str()); + + typedef lemon::MinCostMaxBipartiteMatching > Matcher; + Matcher matcher(bp, cov_weights); + matcher.run(); + + vector > chains; + make_chains_from_matching(bp, matcher, chains); + + verbose_msg( "%s\tFound %d distinct chains\n", bundle_label->c_str(), (int)chains.size()); + + vector > paths; + extend_chains_to_paths(bundle_dag, chains, TC, source, sink, paths); + + verbose_msg( "%s\tCreating scaffolds for %d paths\n", bundle_label->c_str(), (int)paths.size()); + + vector new_scaffs; + make_scaffolds_from_paths(bundle_dag, paths, new_scaffs); + + verbose_msg( "%s\tCollapsing scaffolds\n", bundle_label->c_str()); + + collapse_contained_transfrags(new_scaffs); + hits = new_scaffs; + } + + scaffolds = hits; + + // One last collapse attempt... + vector new_scaffs = scaffolds; + + verbose_msg( "%s\tPerforming final collapse round\n", bundle_label->c_str()); + + fill_gaps(new_scaffs, 2 * olap_radius); + + scaffolds = new_scaffs; + + // Cleave the partials at their unknowns to minimize FPKM dilation on + // the low end of the expression profile. + vector completes; + for (size_t i = 0; i < scaffolds.size(); ++i) + { + vector c; + scaffolds[i].get_complete_subscaffolds(c); + completes.insert(completes.end(), c.begin(), c.end()); + } + + verbose_msg( "Extracted %lu contiguous transfrags from %lu scaffolds\n", completes.size(), scaffolds.size()); + + new_scaffs = completes; + sort(new_scaffs.begin(), new_scaffs.end(), scaff_lt); + + collapse_contained_transfrags(new_scaffs); + sort(new_scaffs.begin(), new_scaffs.end(), scaff_lt); + scaffolds = new_scaffs; + } + + // TODO: refactor into subroutine. This routine shouldn't actually be + // necessary, and should be eliminated after thorough testing that it really + // isn't needed + for (size_t i = 0; i < scaffolds.size(); ++i) + { + //assert(!scaffolds[i].has_unknown()); + + const vector& supporting = scaffolds[i].mate_hits(); + CuffStrand s = CUFF_STRAND_UNKNOWN; + for (size_t j = 0; j < supporting.size(); ++j) + { +// assert (supporting[j]->strand() == CUFF_STRAND_UNKNOWN || +// s == supporting[j]->strand()); + if (supporting[j]->strand() != CUFF_STRAND_UNKNOWN) + s = supporting[j]->strand(); + } + if (scaffolds[i].strand() == CUFF_STRAND_UNKNOWN) + scaffolds[i].strand(s); + } + // end refactor + + return true; +} diff --git a/src/assemble.h b/src/assemble.h new file mode 100644 index 0000000..a3b4852 --- /dev/null +++ b/src/assemble.h @@ -0,0 +1,44 @@ +#ifndef ASSEMBLE_H +#define ASSEMBLE_H + +/* + * assemble.h + * cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + + +#include "bundles.h" +#include "scaffolds.h" + +class BiasLearner; + +bool assemble_hits(BundleFactory& bundle_factory, BiasLearner* bl_ptr); + +//bool intron_compatible(const MateHit& lhs, const MateHit& rhs); +bool read_hits_overlap(const ReadHit* lhs, const ReadHit* rhs); +bool read_hits_intron_agree(const ReadHit* h1, const ReadHit* h2); + +int match_length(const MateHit& m, int left, int right); + + +bool mate_graphs(const HitBundle& bundle, BundleStats* stats); + + + +bool make_scaffolds(int bundle_left, + int bundle_length, + vector& hits, + vector& scaffolds); + +#endif diff --git a/src/biascorrection.cpp b/src/biascorrection.cpp new file mode 100644 index 0000000..0d49851 --- /dev/null +++ b/src/biascorrection.cpp @@ -0,0 +1,830 @@ +/* + * biascorrection.cpp + * cufflinks + * + * Created by Adam Roberts on 5/20/10. + * Copyright 2010 Adam Roberts. All rights reserved. + * + */ + +#include "biascorrection.h" +#include "scaffolds.h" +#include "abundances.h" +#include "progressbar.h" +#include "bundles.h" + +#include +#include + +using namespace std; + +void output_vector(vector& v, char* fname) +{ + ofstream myfile1; + string filename = output_dir + "/" + fname; + myfile1.open (filename.c_str()); + + for (size_t i = 0; i < v.size(); ++i) + myfile1 << v[i] <<","; + myfile1 << endl; + + myfile1.close(); +} + +double colSums(const ublas::matrix& A, vector& sums) +{ + long double total = 0.0; + sums = vector(A.size2(),0.0); + for (size_t i = 0; i < A.size1(); ++i) + for (size_t j = 0; j < A.size2(); ++j) + { + sums[j] += A(i,j); + total += A(i,j); + } + return total; +} + +double fourSums(const ublas::matrix& A, ublas::matrix& sums) +{ + long double total = 0.0; + sums = ublas::zero_matrix(A.size1(), A.size2()/4); + for (size_t i = 0; i < A.size1(); ++i) + for (size_t j = 0; j < A.size2(); ++j) + { + sums(i,j/4) += A(i,j); + total += A(i,j); + } + + return total; +} + +void ones(ublas::matrix& A) +{ + for (size_t i = 0; i < A.size1(); ++i) + for (size_t j = 0; j < A.size2(); ++j) + A(i,j) = 1; +} + +void get_compatibility_list(const vector >& transcripts, + const vector& alignments, + vector >& compatibilities) +{ + int M = alignments.size(); + int N = transcripts.size(); + + vector alignment_scaffs; + + for (size_t i = 0; i < alignments.size(); ++i) + { + const MateHit& hit = alignments[i]; + alignment_scaffs.push_back(Scaffold(hit)); + } + + for (int i = 0; i < M; ++i) + { + for (int j = 0; j < N; ++j) + { + if (transcripts[j]->strand() != CUFF_STRAND_UNKNOWN + && transcripts[j]->contains(alignment_scaffs[i]) + && Scaffold::compatible(*transcripts[j],alignment_scaffs[i])) + { + compatibilities[i].push_back(j); + } + } + } +} + +void learn_bias(BundleFactory& bundle_factory, BiasLearner& bl, bool progress_bar) +{ + HitBundle bundle; + RefSequenceTable& rt = bundle_factory.ref_table(); + + ProgressBar p_bar; + if (progress_bar) + p_bar = ProgressBar("Learning bias parameters.", bundle_factory.read_group_properties()->total_map_mass()); + + while(true) + { + HitBundle* bundle_ptr = new HitBundle(); + + if (!bundle_factory.next_bundle(*bundle_ptr)) + { + delete bundle_ptr; + break; + } + + HitBundle& bundle = *bundle_ptr; + + char bundle_label_buf[2048]; + sprintf(bundle_label_buf, "%s:%d-%d", rt.get_name(bundle.ref_id()), bundle.left(), bundle.right()); + if (progress_bar) + p_bar.update(bundle_label_buf, bundle.raw_mass()); + + if (bundle.non_redundant_hits().size()==0 || bundle.ref_scaffolds().size() != 1) + { + delete bundle_ptr; + continue; + } + + bl.preProcessTranscript(*(bundle.ref_scaffolds()[0])); + + delete bundle_ptr; + } + + if (progress_bar) + p_bar.complete(); + + bl.normalizeParameters(); + + if (output_bias_params) + bl.output(); +} + +const int BiasLearner::pow4[] = {1,4,16,64}; +const int BiasLearner::siteSpec[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; +const int BiasLearner::vlmmSpec[] = {1,1,1,1,1,2,2,2,3,3,3,3,3,3,3,3,2,2,2,1,1}; //Length of connections at each position in the window +const int BiasLearner::MAX_SLICE = 3; // Maximum connection length +const int BiasLearner::CENTER = 8; //Index in paramTypes[] of first element in read +const int BiasLearner::_m = 21; //Number of positions spanned by window +const int BiasLearner::_n = 64; //Length of maximum connection in VLMM +const int BiasLearner::lengthBins[] = {791,1265,1707,2433}; //Quantiles derived from human mRNA length distribution in UCSC genome browser +const double BiasLearner::positionBins[] = {.02,.04,.06,.08,.10,.15,.2,.3,.4,.5,.6,.7,.8,.85,.9,.92,.94,.96,.98,1}; + +BiasLearner::BiasLearner(shared_ptr frag_len_dist) +{ + paramTypes = vlmmSpec; + if (bias_mode==SITE || bias_mode==POS_SITE) + { + paramTypes = siteSpec; + } + _frag_len_dist = frag_len_dist; + + _startSeqParams = ublas::zero_matrix(_m,_n); + _startSeqExp = ublas::zero_matrix(_m,_n); + _endSeqParams = ublas::zero_matrix(_m,_n); + _endSeqExp = ublas::zero_matrix(_m,_n); + _startPosParams = ublas::zero_matrix(20,5); + _startPosExp = ublas::zero_matrix(20,5); + _endPosParams = ublas::zero_matrix(20,5); + _endPosExp = ublas::zero_matrix(20,5); +} + + +inline int BiasLearner::seqToInt(const char* seqSlice, int n) const +{ + int c = 0; + for(int i = 0; i < n; i++) + { + if (seqSlice[i] == 4) return -1;//N + c += (seqSlice[i])*pow4[n-i-1]; + } + return c; +} + +inline void BiasLearner::getSlice(const char* seq, char* slice, int start, int end) const// INCLUSIVE! +{ + if (end >= start) + { + for (int i = start; i <= end; ++i) + { + slice[i-start] = seq[i]; + } + } + else + { + for(int i = start; i >= end; --i) + { + slice[start-i] = seq[i]; + } + } +} + +void BiasLearner::preProcessTranscript(const Scaffold& transcript) +{ + if (transcript.strand()==CUFF_STRAND_UNKNOWN || transcript.fpkm() < 1 || transcript.seq()=="") + return; + + vector startHist(transcript.length()+1, 0.0); // +1 catches overhangs + vector endHist(transcript.length()+1, 0.0); + + foreach (const MateHit* hit_p, transcript.mate_hits()) + { + const MateHit& hit = *hit_p; + if (!hit.left_alignment() && !hit.right_alignment()) + continue; + + double mass = hit.mass(); + + int start; + int end; + int frag_len; + + transcript.map_frag(hit, start, end, frag_len); + startHist[start] += mass; + endHist[end] += mass; + } + processTranscript(startHist, endHist, transcript); +} + + +void BiasLearner::processTranscript(const std::vector& startHist, const std::vector& endHist, const Scaffold& transcript) +{ + double fpkm = transcript.fpkm(); + int seqLen = transcript.length(); + + char seq[seqLen]; + char c_seq[seqLen]; + encode_seq(transcript.seq(), seq, c_seq); + + char seqSlice[MAX_SLICE]; + + int lenClass=0; + while (seqLen > lengthBins[lenClass] && lenClass < 4) + { + lenClass++; + } + + // We want to only use the portion of the transcript where fragments can start/end + int min_frag_len = _frag_len_dist->min(); + int currStartBin = 0; + int startBinCutoff = positionBins[currStartBin]*(seqLen - min_frag_len); + int currEndBin = 0; + int endBinCutoff = positionBins[currStartBin]*(seqLen - min_frag_len); + +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_bl_lock); +#endif + + for (int i=0; i < seqLen; i++) + { + + //Position Bias + if (i > startBinCutoff && currStartBin < (int)_startPosParams.size1()-1) + startBinCutoff=positionBins[++currStartBin]*(seqLen - min_frag_len); + if (i - min_frag_len > endBinCutoff) + endBinCutoff = positionBins[++currEndBin]*(seqLen - min_frag_len); + + _startPosParams(currStartBin, lenClass) += startHist[i]/fpkm; + _startPosExp(currStartBin, lenClass) += !(_frag_len_dist->too_short(seqLen-i)); + _endPosParams(currEndBin, lenClass) += endHist[i]/fpkm; + _endPosExp(currEndBin, lenClass) += !(_frag_len_dist->too_short(i+1)); + + + bool start_in_bounds = i-CENTER >= 0 && i+(_m-1)-CENTER < seqLen; + bool end_in_bounds = i+CENTER-(_m-1) >= 0 && i+CENTER < seqLen; + + if (!start_in_bounds && !end_in_bounds) // Make sure we are in bounds of the sequence + continue; + + //Sequence Bias + for(int j=0; j < _m; j++) + { + // Start Bias + if (start_in_bounds) // Make sure we are in bounds of the sequence + { + int k = i+j-CENTER; + getSlice(seq, seqSlice, k-(paramTypes[j]-1), k); + int v = seqToInt(seqSlice,paramTypes[j]); + if (v >= 0) + { + _startSeqParams(j,v) += startHist[i]/fpkm; + _startSeqExp(j,v) += !(_frag_len_dist->too_short(seqLen-i)); + } + else // There is an N. Average over all possible values of N + { + list nList(1,0); + genNList(seqSlice, 0, paramTypes[j],nList); + for (list::iterator it=nList.begin(); it!=nList.end(); ++it) + { + _startSeqParams(j,*it) += startHist[i]/(fpkm * (double)nList.size()); + _startSeqExp(j,*it) += !(_frag_len_dist->too_short(seqLen-i))/(double)nList.size(); + } + } + } + // End Bias + if (end_in_bounds) // Make sure we are in bounds of the sequence + { + int k = i+CENTER-j; + getSlice(c_seq, seqSlice, k+(paramTypes[j]-1), k); + int v = seqToInt(seqSlice, paramTypes[j]); + if (v >= 0) + { + _endSeqParams(j,v) += endHist[i]/fpkm; + _endSeqExp(j,v) += !(_frag_len_dist->too_short(seqLen-i)); + } + else // There is an N. Average over all possible values of N + { + list nList(1,0); + genNList(seqSlice, 0, paramTypes[j], nList); + for (list::iterator it=nList.begin(); it!=nList.end(); ++it) + { + _endSeqParams(j,*it) += endHist[i]/(fpkm * (double)nList.size()); + _endSeqExp(j,*it) += !(_frag_len_dist->too_short(seqLen-i))/(double)nList.size(); + } + } + } + } + } +} + +void BiasLearner::getBias(const Scaffold& transcript, vector& startBiases, vector& endBiases) const +{ + if (transcript.seq()=="") + return; + + int seqLen = transcript.length(); + + char seq[seqLen]; + char c_seq[seqLen]; + encode_seq(transcript.seq(), seq, c_seq); + + char seqSlice[MAX_SLICE]; + + int lenClass=0; + while (seqLen > lengthBins[lenClass] && lenClass < 4) + { + lenClass++; + } + + int min_frag_len = _frag_len_dist->min(); + int currStartBin = 0; + int startBinCutoff = positionBins[currStartBin]*(seqLen - min_frag_len); + int currEndBin = 0; + int endBinCutoff = positionBins[currEndBin]*(seqLen - min_frag_len); + + for (int i=0; i < seqLen; i++) + { + //Position Bias + if (i > startBinCutoff && currStartBin < (int)_startPosParams.size1()-1) + startBinCutoff=positionBins[++currStartBin]*(seqLen - min_frag_len); + if (i - min_frag_len > endBinCutoff) + endBinCutoff = positionBins[++currEndBin]*(seqLen - min_frag_len); + + double startBias = _startPosParams(currStartBin, lenClass); + double endBias = _endPosParams(currEndBin,lenClass); + + //Sequence Bias + + bool start_in_bounds = i-CENTER >= 0 && i+(_m-1)-CENTER < seqLen; + bool end_in_bounds = i+CENTER-(_m-1) >= 0 && i+CENTER < seqLen - _frag_len_dist->mean(); // don't count bias near end since we're over-counting these fragments + + if (start_in_bounds || end_in_bounds) // Make sure we are in bounds of the sequence + { + for(int j=0; j < _m; j++) + { + // Start Bias + if (start_in_bounds) // Make sure we are in bounds of the sequence + { + int k = i+j-CENTER; + getSlice(seq, seqSlice, k-(paramTypes[j]-1), k); + int v = seqToInt(seqSlice, paramTypes[j]); + if (v >= 0) + { + startBias *= _startSeqParams(j,v); + } + else // There is an N. Average over all possible values of N + { + list nList(1,0); + double tot = 0; + genNList(seqSlice, 0, paramTypes[j],nList); + + for (list::iterator it=nList.begin(); it!=nList.end(); ++it) + { + tot += _startSeqParams(j,*it); + } + startBias *= tot/nList.size(); + } + } + + // End Bias + if (end_in_bounds) // Make sure we are in bounds of the sequence + { + int k = i+CENTER-j; + getSlice(c_seq, seqSlice, k+(paramTypes[j]-1), k); + int v = seqToInt(seqSlice,paramTypes[j]); + if (v >= 0) + { + endBias *= _endSeqParams(j,v); + } + else // There is an N. Average over all possible values of N + { + list nList(1,0); + double tot = 0; + genNList(seqSlice, 0, paramTypes[j],nList); + for (list::iterator it=nList.begin(); it!=nList.end(); ++it) + { + tot += _endSeqParams(j,*it); + } + endBias *= tot/nList.size(); + } + } + } + } + assert(finite(startBias) && finite(endBias)); + startBiases[i] = startBias; + endBiases[i] = endBias; + } +} + +void BiasLearner::genNList(const char* seqSlice, int start, int n, list& nList) const +{ + + if (n > 1) + genNList(seqSlice, start+1, n-1, nList); + + + if (n==1 && seqSlice[start]==4) + { + for (int j=0; j<4; ++j) + nList.push_back(j); + } + else if (n==1) + { + nList.push_back(seqSlice[start]); + } + else if (seqSlice[start]==4) + { + for (int i = nList.size()-1; i>=0; --i) + { + for (int j=0; j<4; ++j) + nList.push_back(nList.front()+j*pow4[n-1]); + nList.pop_front(); + } + } + else + { + for (list::iterator it=nList.begin(); it!=nList.end(); ++it) + (*it)+=seqSlice[start]*pow4[n-1]; + } + +} + +void BiasLearner::normalizeParameters() +{ + double THRESH = 100; + + + //Normalize position parameters + vector startPosParam_sums; + vector startPosExp_sums; + double start_tot = colSums(_startPosParams, startPosParam_sums); // Total starts for each length class + colSums(_startPosExp, startPosExp_sums); // Total FPKM for each length class + + vector endPosParam_sums; + vector endPosExp_sums; + double end_tot = colSums(_endPosParams, endPosParam_sums); // Total starts for each length class + colSums(_endPosExp, endPosExp_sums); // Total FPKM for each length class + + for(size_t i=0; i < _startPosParams.size1(); i++) + { + for(size_t j=0; j < _startPosParams.size2(); j++) + { + if (startPosParam_sums[j] < THRESH) + { + _startPosParams(i,j) = 1; + } + else + { + _startPosParams(i,j) /= startPosParam_sums[j]; + _startPosExp(i,j) /= startPosExp_sums[j]; + if (_startPosExp(i,j) == 0) + _startPosParams(i,j) = numeric_limits::max(); + else + _startPosParams(i,j) /= _startPosExp(i,j); + } + + if (endPosParam_sums[j] < THRESH) + { + _endPosParams(i,j) = 1; + } + else + { + _endPosParams(i,j) /= endPosParam_sums[j]; + _endPosExp(i,j) /= endPosExp_sums[j]; + if (_endPosExp(i,j) == 0) + _endPosParams(i,j) = numeric_limits::max(); + else + _endPosParams(i,j) /= _endPosExp(i,j); + } + } + } + + if (start_tot == 0.0) + ones(_startPosParams); + if (end_tot == 0.0) + ones(_endPosParams); + + ublas::matrix startSeqExp_sums; + ublas::matrix startParam_sums; + start_tot = fourSums(_startSeqParams, startParam_sums); + fourSums(_startSeqExp, startSeqExp_sums); + + ublas::matrix endSeqExp_sums; + ublas::matrix endParam_sums; + end_tot = fourSums(_endSeqParams, endParam_sums); + fourSums(_endSeqExp, endSeqExp_sums); + + //Normalize sequence parameters + for(int i=0; i < _m; i++) + { + for(int j=0; j < pow4[paramTypes[i]]; j++) + { + if (startParam_sums(i,j/4) < THRESH) + { + _startSeqParams(i,j) = 1; + } + else + { + _startSeqParams(i,j) /= startParam_sums(i,j/4); + _startSeqExp(i,j) /= startSeqExp_sums(i,j/4); + _startSeqParams(i,j) /= _startSeqExp(i,j); + } + if (endParam_sums(i,j/4) < THRESH) + { + _endSeqParams(i,j) = 1; + } + else + { + _endSeqParams(i,j) /= endParam_sums(i,j/4); + _endSeqExp(i,j) /= endSeqExp_sums(i,j/4); + _endSeqParams(i,j) /= _endSeqExp(i,j); + } + } + } + + if (start_tot==0.0) + ones(_startSeqParams); + if (end_tot==0.0) + ones(_endSeqParams); + + if (bias_mode==VLMM || bias_mode==SITE) + { + ones(_startPosParams); + ones(_endPosParams); + } + else if (bias_mode == POS) + { + ones(_startSeqParams); + ones(_endSeqParams); + } +} + +void BiasLearner::output() +{ + ofstream myfile1; + string filename = output_dir + "/biasParams.csv"; + myfile1.open (filename.c_str()); + + // StartSeq + for (int i = 0; i < _n; ++i) + { + for(int j = 0; j < _m; ++j) + myfile1 << _startSeqParams(j,i) <<","; + myfile1 << endl; + } + myfile1 << endl; + + // EndSeq + for (int i = 0; i < _n; ++i) + { + for(int j = 0; j < _m; ++j) + myfile1 << _endSeqParams(j,i) <<","; + myfile1 << endl; + } + myfile1 << endl; + + // Start Pos + for (size_t i = 0; i < _startPosParams.size2(); ++i) + { + for(size_t j = 0; j < _startPosParams.size1(); ++j) + myfile1 << _startPosParams(j,i) <<","; + myfile1 < rgp) +{ + int trans_len = _transcript->length(); + _rg_index.insert(make_pair(rgp, _size)); + + // Defaults are values for a run not using bias correction + vector start_bias(trans_len+1, 1.0); + vector end_bias(trans_len+1, 1.0); + double eff_len = 0.0; + + shared_ptr fld = rgp->frag_len_dist(); + + vector tot_bias_for_len(trans_len+1, 0); + vector start_bias_for_len(trans_len+1, 0); + vector end_bias_for_len(trans_len+1, 0); + + tot_bias_for_len[trans_len] = trans_len; + start_bias_for_len[trans_len] = trans_len; + end_bias_for_len[trans_len] = trans_len; + + if (final_est_run && corr_bias && _transcript->strand()!=CUFF_STRAND_UNKNOWN) + { + rgp->bias_learner()->getBias(*_transcript, start_bias, end_bias); + + for(int l = fld->min(); l <= trans_len; l++) + { + for(int i = 0; i <= trans_len - l; i++) + { + double tot_bias = start_bias[i]*end_bias[i+l-1]; + tot_bias_for_len[l] += tot_bias; + start_bias_for_len[l] += start_bias[i]; + end_bias_for_len[l] += end_bias[i+l-1]; + + double frag_prob = (bias_mode == POS || bias_mode == POS_VLMM || bias_mode == POS_SITE) ? fld->npdf(l, trans_len-i) : fld->pdf(l); + eff_len += tot_bias * frag_prob; + } + } + } + else + { + for(int l = fld->min(); l <= trans_len; l++) + { + tot_bias_for_len[l] = trans_len - l + 1; + start_bias_for_len[l] = trans_len - l + 1; + end_bias_for_len[l] = trans_len - l + 1; + eff_len += fld->pdf(l) * (trans_len - l + 1); + } + } + + assert(eff_len > 0); + _start_biases.push_back(start_bias); + _end_biases.push_back(end_bias); + _tot_biases_for_len.push_back(tot_bias_for_len); + _eff_lens.push_back(eff_len); + _start_biases_for_len.push_back(start_bias_for_len); + _end_biases_for_len.push_back(end_bias_for_len); + _rg_masses.push_back(0.0); + + return _size++; // Index of new element +} + +int num_adds = 0; +int BiasCorrectionHelper::get_index(shared_ptr rgp) +{ + boost::unordered_map, int>::iterator iter; + iter = _rg_index.find(rgp); + + if (iter==_rg_index.end()) //This rg is not yet in the index, so add it. + { + num_adds++; + return add_read_group(rgp); + } + + return iter->second; +} + +// Hit needs to be from the collapsed (non_redundant) list to match indexing +double BiasCorrectionHelper::get_cond_prob(const MateHit& hit) +{ + shared_ptr rgp = hit.read_group_props(); + + int i = get_index(rgp); + + int start; + int end; + int frag_len; + int trans_len = _transcript->length(); + + _transcript->map_frag(hit, start, end, frag_len); + + shared_ptr fld = rgp->frag_len_dist(); + + double cond_prob = 1.0; + cond_prob *= _start_biases[i][start]; + cond_prob *= _end_biases[i][end]; + double frag_prob = (bias_mode == POS || bias_mode == POS_VLMM || bias_mode == POS_SITE) ? fld->npdf(frag_len, trans_len-start) : fld->pdf(frag_len); + cond_prob *= frag_prob; + + if (cond_prob==0.0) + return 0.0; + + if (hit.is_pair() || hit.read_group_props()->complete_fragments()) + { + if (frag_len >= (int)_tot_biases_for_len[i].size()) + cond_prob = 0.0; + else + cond_prob /= _tot_biases_for_len[i][frag_len]; + } + else if (start!=trans_len && end==trans_len) // The hit is a singleton at the start of a fragment + cond_prob /= _start_biases_for_len[i][frag_len]; + else if (start==trans_len && end!=trans_len) // The hit is a singleton at the end of a fragment + cond_prob /= _end_biases_for_len[i][frag_len]; + else if (frag_len==trans_len) // We don't actually know where we start or end and can't subtract off the frag_len or we'll get inf + cond_prob /= trans_len; + else + { + if (trans_len < frag_len) + { + cond_prob = 0; + } + else + { + // Single-end read w/ library type FF or RR + cond_prob /= trans_len-frag_len; + } + } + + if (cond_prob > 0 && hit.collapse_mass() > 0) + { + _rg_masses[i] += hit.collapse_mass(); + _mapped = true; + } + +#if DEBUG + if (isinf(cond_prob)) + { + double cond_prob = 1.0; + cond_prob *= _start_biases[i][start]; + cond_prob *= _end_biases[i][end]; + double frag_prob = (bias_mode == POS || bias_mode == POS_VLMM || bias_mode == POS_SITE) ? fld->npdf(frag_len, trans_len-start) : fld->pdf(frag_len); + cond_prob *= frag_prob; + + if (cond_prob==0.0) + return 0.0; + + if (hit.is_pair()) + { + if (frag_len >= _tot_biases_for_len[i].size()) + cond_prob = 0.0; + else + cond_prob /= _tot_biases_for_len[i][frag_len]; + } + else if (start!=trans_len && end==trans_len) // The hit is a singleton at the start of a fragment + cond_prob /= _start_biases_for_len[i][frag_len]; + else if (start==trans_len && end!=trans_len) // The hit is a singleton at the end of a fragment + cond_prob /= _end_biases_for_len[i][frag_len]; + else if (frag_len==trans_len) // We don't actually know where we start or end and can't subtract off the frag_len or we'll get inf + cond_prob /= trans_len; + else + { + if (trans_len < frag_len) + { + cond_prob = 0; + } + else + { + // Single-end read w/ library type FF or RR + cond_prob /= trans_len-frag_len; + } + } + } +#endif + + assert(!isinf(cond_prob)); + assert(!isnan(cond_prob)); + + if (isinf(cond_prob) || isnan(cond_prob)) + cond_prob = 0.0; + + return cond_prob; +} + +double BiasCorrectionHelper::get_effective_length() +{ + + if (_size==0) + return _transcript->length(); + + + double tot_mass = accumulate( _rg_masses.begin(), _rg_masses.end(), 0.0 ); + double eff_len = 0.0; + + if (tot_mass==0) + return _transcript->length(); + + for (boost::unordered_map, int>::iterator itr = _rg_index.begin(); + itr != _rg_index.end(); + ++itr) + { + int i = itr->second; + double rg_eff_len = _eff_lens[i]; + eff_len += rg_eff_len * (_rg_masses[i]/tot_mass); + } + + assert(eff_len>0); + //assert(eff_len>1); + assert(!isnan(eff_len)); + return eff_len; +} + + + + diff --git a/src/biascorrection.h b/src/biascorrection.h new file mode 100644 index 0000000..a66d643 --- /dev/null +++ b/src/biascorrection.h @@ -0,0 +1,118 @@ +#ifndef BIASCORRECTION_H +#define BIASCORRECTION_H + +/* + * biascorrection.h + * cufflinks + * + * Created by Adam Roberts on 5/20/10. + * Copyright 2010 Adam Roberts. All rights reserved. + * + */ + + +#include +#include +#include +#include +#include +#include +#include "common.h" + +class MateHit; +class Scaffold; +class BundleFactory; +class HitBundle; + +namespace ublas = boost::numeric::ublas; + +void get_compatibility_list(const std::vector& transcripts, + const std::vector& alignments, + std::vector >& compatibilities); + +class BiasLearner{ + static const int pow4[]; + static const int MAX_SLICE; + static const int CENTER; + static const int _m; + static const int _n; + + static const int lengthBins[]; + static const double positionBins[]; + static const int siteSpec[]; + static const int vlmmSpec[]; + + const int* paramTypes; + boost::shared_ptr _frag_len_dist; + ublas::matrix _startSeqParams; + ublas::matrix _startSeqExp; + ublas::matrix _endSeqParams; + ublas::matrix _endSeqExp; + ublas::matrix _startPosParams; + ublas::matrix _startPosExp; + ublas::matrix _endPosParams; + ublas::matrix _endPosExp; + + int seqToInt(const char* seqSlice, int n) const; + void getSlice(const char* seq, char* slice, int start, int end) const; + void genNList(const char* seqSlice, int start, int n, std::list& nList) const; + +#if ENABLE_THREADS + boost::mutex _bl_lock; +#endif + +public: + + BiasLearner(boost::shared_ptr frag_len_dist); + void preProcessTranscript(const Scaffold& transcript); + + void processTranscript(const std::vector& startHist, const std::vector& endHist, const Scaffold& transcript); + void normalizeParameters(); + void output(); + + void getBias(const Scaffold& transcript, std::vector& startBiases, std::vector& endBiases) const; + +}; + +void learn_bias(BundleFactory& bundle_factory, BiasLearner& bl, bool progress_bar = true); +void process_bundle(HitBundle& bundle, BiasLearner& bl); + +// Helps with the complexities of bias correction with replicates in cond_probs and eff_lens +class BiasCorrectionHelper{ + + boost::shared_ptr _transcript; + boost::unordered_map, int> _rg_index; + int _size; + bool _mapped; + + std::vector > _start_biases; + std::vector > _end_biases; + std::vector > _pos_biases; + std::vector > _tot_biases_for_len; + std::vector > _start_biases_for_len; + std::vector > _end_biases_for_len; + + std::vector _eff_lens; + std::vector _rg_masses; + + int add_read_group(boost::shared_ptr rgp); + int get_index(boost::shared_ptr rgp); + +public: + + BiasCorrectionHelper(boost::shared_ptr transcript) + { + _transcript = transcript; + _mapped = false; + _size = 0; + } + + + double get_cond_prob(const MateHit& hit); + + double get_effective_length(); + bool is_mapped() { return _mapped; } + +}; + +#endif diff --git a/src/bundles.cpp b/src/bundles.cpp new file mode 100644 index 0000000..ead07f2 --- /dev/null +++ b/src/bundles.cpp @@ -0,0 +1,1543 @@ +/* + * bundles.cpp + * cufflinks + * + * Created by Cole Trapnell on 9/6/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "common.h" +#include "bundles.h" +#include "scaffolds.h" + +using namespace std; +using boost::math::binomial; + +//struct ScaffoldSorter +//{ +// ScaffoldSorter(RefSequenceTable& _rt) : rt(_rt) {} +// bool operator()(shared_ptr lhs, shared_ptr rhs) +// { +// assert (lhs); +// assert (rhs); +// const char* lhs_name = rt.get_name(lhs->ref_id()); +// const char* rhs_name = rt.get_name(rhs->ref_id()); +// int c = strcmp(lhs_name, rhs_name); +// if (c != 0) +// { +// return c < 0; +// } +// if (lhs->left() != rhs->left()) +// { +// return lhs->left() < rhs->left(); +// } +// return false; +// } +// +// RefSequenceTable& rt; +//}; + +struct ScaffoldSorter +{ + ScaffoldSorter(RefSequenceTable& _rt) : rt(_rt) {} + bool operator()(shared_ptr lhs, shared_ptr rhs) + { + //assert (lhs); + //assert (rhs); + if (!lhs || !rhs) + return false; + int lhs_order = rt.observation_order(lhs->ref_id()); + assert (lhs_order != -1); + int rhs_order = rt.observation_order(rhs->ref_id()); + assert (rhs_order != -1); + + if (lhs_order != rhs_order) + { + return lhs_order < rhs_order; + } + if (lhs->left() != rhs->left()) + { + return lhs->left() < rhs->left(); + } + return false; + } + + RefSequenceTable& rt; +}; + +//FIXME: needs refactoring +void load_ref_rnas(FILE* ref_mRNA_file, + RefSequenceTable& rt, + vector >& ref_mRNAs, + bool loadSeqs, + bool loadFPKM) +{ + if (loadSeqs) + ProgressBar p_bar("Loading reference annotation and sequence.",0); + else + ProgressBar p_bar("Loading reference annotation.",0); + + GList ref_rnas; + + // If the RefSequenceTable already has entries, we will sort the GTF records + // according to their observation order. Otherwise, we will sort the + // RefSequenceTable's records lexicographically. + bool reorder_GTF_recs_lexicographically = false; + if (rt.size() == 0) + { + reorder_GTF_recs_lexicographically = true; + } + + if (ref_mRNA_file) + { + gtf_tracking_verbose=cuff_verbose; + read_transcripts(ref_mRNA_file, ref_rnas, true); + } + + int last_gseq_id = -1; + GFaSeqGet* faseq = NULL; + GFastaHandler gfasta(fasta_dir.c_str()); + // Geo groups them by chr. + if (ref_rnas.Count()>0) //if any ref data was loaded + { + for (int j = 0; j < ref_rnas.Count(); ++j) + { //ref data is grouped by genomic sequence + //const char* name = ref_rnas[j]->gseq_name; + + int f = 0; + int r = 0; + int u = 0; + GffObj* rna_p; + RefID ref_id = rt.get_id(ref_rnas[j]->gseq_name, NULL); + int f_count = ref_rnas[j]->mrnas_f.Count(); + int r_count = ref_rnas[j]->mrnas_r.Count(); + int u_count = ref_rnas[j]->umrnas.Count(); + + while(!(f==f_count && r==r_count && u==u_count)) + { + CuffStrand strand; + + if (f < f_count) + { + rna_p = ref_rnas[j]->mrnas_f[f++]; + strand = CUFF_FWD; + } + else if (r < r_count) + { + rna_p = ref_rnas[j]->mrnas_r[r++]; + strand = CUFF_REV; + } + else + { + rna_p = ref_rnas[j]->umrnas[u++]; + strand = CUFF_STRAND_UNKNOWN; + } + + GffObj& rna = *rna_p; + + if (loadSeqs && rna.gseq_id != last_gseq_id) //next chromosome + { + delete faseq; + faseq = NULL; + last_gseq_id = rna.gseq_id; + faseq = gfasta.fetch(last_gseq_id); + if (faseq==NULL) + { + fprintf(stderr,"This contig will not be bias corrected.\n"); + } + } + + vector ops; + for (int e = 0; e < rna.exons.Count(); ++e) + { + GffExon& ex = *(rna.exons[e]); + ops.push_back(AugmentedCuffOp(CUFF_MATCH, ex.start - 1, ex.end - ex.start + 1)); + + if (e + 1 < rna.exons.Count()) + { + GffExon& next_ex = *(rna.exons[e+1]); + ops.push_back(AugmentedCuffOp(CUFF_INTRON, ex.end, next_ex.start - ex.end - 1)); + } + } + + Scaffold ref_scaff(ref_id, strand, ops, true); + + char* rna_seq = 0; + int seqlen=0; + if (loadSeqs && faseq){ + rna_seq = rna.getSpliced(faseq, false, &seqlen); + } + + if (rna.getID()) + ref_scaff.annotated_trans_id(rna.getID()); + + + if (rna.getGeneID()) + ref_scaff.annotated_gene_id(rna.getGeneID()); + + if (rna.getGeneName()) + ref_scaff.annotated_gene_name(rna.getGeneName()); + + + char* nearest_ref_match = rna.getAttr("nearest_ref"); + char* class_code = rna.getAttr("class_code"); + + if (nearest_ref_match && class_code) + { + ref_scaff.nearest_ref_id(nearest_ref_match); + ref_scaff.nearest_ref_classcode(*class_code); + } + + char* protein_id = rna.getAttr("p_id"); + if (protein_id) + ref_scaff.annotated_protein_id(protein_id); + + + char* tss_id = rna.getAttr("tss_id"); + if (tss_id) + ref_scaff.annotated_tss_id(tss_id); + + + if (loadFPKM) + { + const char* expr = rna.getAttr("FPKM"); + if (expr!=NULL) { + if (expr[0]=='"') expr++; + ref_scaff.fpkm(strtod(expr, NULL)); + } + } + + if (loadSeqs) + { + string rs = (rna_seq) ? rna_seq:""; + std::transform(rs.begin(), rs.end(), rs.begin(), (int (*)(int))std::toupper); + ref_scaff.seq(rs); + GFREE(rna_seq); + } + + shared_ptr scaff(new Scaffold()); + *scaff = ref_scaff; + assert (scaff); + ref_mRNAs.push_back(scaff); + } + } + + foreach (shared_ptr s, ref_mRNAs) + { + assert (s); + } + + if (reorder_GTF_recs_lexicographically) + { + rt.order_recs_lexicographically(); + } + + ScaffoldSorter sorter(rt); + sort(ref_mRNAs.begin(), ref_mRNAs.end(), sorter); + + } + delete faseq; +} + + +int HitBundle::_next_id = 0; + +bool HitBundle::add_hit(const MateHit& hit) +{ + if (_final) + { + return false; + } + + // Update the bounds on the span + if (hit.left() < _leftmost) + _leftmost = hit.left(); + if (hit.right() > _rightmost) + _rightmost = hit.right(); + + + _hits.push_back(hit); + return true; +} + +struct HitlessScaffold +{ + bool operator()(shared_ptr x) + { + return x->mate_hits().empty(); + } +}; + +bool unmapped_hit(const MateHit& x) +{ + return !(x.is_mapped()); +} + + +bool HitBundle::add_open_hit(shared_ptr rg_props, + const ReadHit* bh, + bool expand_by_partner) +{ + _leftmost = min(_leftmost, bh->left()); + _ref_id = bh->ref_id(); + + if (bh->is_singleton() || no_read_pairs) + { + _rightmost = max(_rightmost, bh->right()); + MateHit m(rg_props, bh->ref_id(), bh, NULL); + if (m.right() - m.left() > max_gene_length) + { + fprintf(stderr, "Warning: hit is longer than max_gene_length, skipping\n"); + return false; + } + add_hit(m); + } + else + { + if (abs(bh->right() - bh->partner_pos()+1) > max_gene_length) + { + fprintf(stderr, "Warning: hit is longer than max_gene_length, skipping\n"); + return false; + } + if (expand_by_partner) + _rightmost = max(max(_rightmost, bh->right()), bh->partner_pos()+1); + OpenMates::iterator mi = _open_mates.find(bh->left()); + + // Does this read hit close an open mate? + if (mi == _open_mates.end()) + { + // No, so add it to the list of open mates, unless we would + // already have seen it's partner + if(bh->left() <= bh->partner_pos()) + { + MateHit open_hit(rg_props, + bh->ref_id(), + bh, + NULL); + + pair ret; + ret = _open_mates.insert(make_pair(bh->partner_pos(), + list())); + + ret.first->second.push_back(open_hit); + } + else + { + // This should never happen during hit_driven or ref_guided bundling, and in the case of + // ref_driven, this read clearly shouldn't map to any of the transcripts anyways. + // Adding this hit would cause problems with multi-reads that straddle boundaries after assembly. + // add_hit(MateHit(rg_props,bh->ref_id(), bh, NULL)); + return false; + } + } + else + { + + bool found_partner = false; + // Maybe, see if we can find an ID match in the list of + // open mates expecting a partner at this position + for (list::iterator pi = mi->second.begin(); + pi != mi->second.end(); + ++pi) + { + MateHit& pm = *pi; + + if (pm.insert_id() == bh->insert_id()) + { + // Found a partner? + + Scaffold L(MateHit(rg_props, bh->ref_id(), pm.left_alignment(), NULL)); + Scaffold R(MateHit(rg_props, bh->ref_id(), bh, NULL)); + + bool strand_agree = L.strand() == CUFF_STRAND_UNKNOWN || + R.strand() == CUFF_STRAND_UNKNOWN || + L.strand() == R.strand(); + + //bool orientation_agree = pm.left_alignment()->antisense_align() != bh->antisense_align(); + + if (strand_agree && + (!Scaffold::overlap_in_genome(L, R, olap_radius) || + Scaffold::compatible(L,R))) + { + pm.right_alignment(bh); + add_hit(pm); + mi->second.erase(pi); + if (mi->second.empty()) + _open_mates.erase(mi); + + found_partner = true; + break; + } + } + } + + if (!found_partner) + { + // If we got here, couldn't actually close any mates with + // this read hit, so open a new one, unless we can never + // close this one + if(bh->left() <= bh->partner_pos()) + { + MateHit open_hit(rg_props, bh->ref_id(), bh, NULL); + + pair ret; + ret = _open_mates.insert(make_pair(bh->partner_pos(), + list())); + + ret.first->second.push_back(open_hit); + } + else + { + // This should never happen during hit_driven or ref_guided bundling, and in the case of + // ref_driven, this read clearly shouldn't map to any of the transcripts anyways. + // Adding this hit would cause problems with multi-reads that straddle boundaries after assembly. + // add_hit(MateHit(rg_props, bh->ref_id(), bh, NULL)); + return false; + } + } + } + } + return true; +} + +void HitBundle::collapse_hits() +{ + ::collapse_hits(_hits, _non_redundant); +} + +void HitBundle::finalize_open_mates() +{ + // We don't want to split reads accross boundaries since this would only occur + // in ref_driven mode and the read shouldn't map to any of the references in this case. + + for(OpenMates::iterator itr = _open_mates.begin(); itr != _open_mates.end(); ++itr) + { + foreach (MateHit& hit, itr->second) + { + delete hit.left_alignment(); + delete hit.right_alignment(); + } + } + _open_mates.clear(); +} + +void HitBundle::remove_hitless_scaffolds() +{ + vector >::iterator new_end = remove_if(_ref_scaffs.begin(), + _ref_scaffs.end(), + HitlessScaffold()); + _ref_scaffs.erase(new_end, _ref_scaffs.end()); +} + +void HitBundle::remove_unmapped_hits() +{ + + foreach (MateHit& hit, _hits) + { + if (unmapped_hit(hit)) + { + delete hit.left_alignment(); + delete hit.right_alignment(); + } + } + + vector::iterator new_end = remove_if(_hits.begin(), + _hits.end(), + unmapped_hit); + + _hits.erase(new_end, _hits.end()); + + new_end = remove_if(_non_redundant.begin(), + _non_redundant.end(), + unmapped_hit); + _non_redundant.erase(new_end, _non_redundant.end()); + +} + +void HitBundle::combine(const vector& in_bundles, + HitBundle& out_bundle) +{ + out_bundle._hits.clear(); + out_bundle._non_redundant.clear(); + out_bundle._ref_scaffs.clear(); + + for (size_t i = 1; i < in_bundles.size(); ++i) + { + assert(in_bundles[i]->ref_id() == in_bundles[i-1]->ref_id()); + } + + // Merge hits + vector indices(in_bundles.size(),0); + while(true) + { + int next_bundle = -1; + const MateHit* next_hit=NULL; + for(size_t i = 0; i < in_bundles.size(); ++i) + { + const vector& curr_hits = in_bundles[i]->hits(); + + if (indices[i] == curr_hits.size()) + continue; + + const MateHit* curr_hit = &curr_hits[indices[i]]; + + if (next_bundle == -1 || mate_hit_lt(*curr_hit, *next_hit)) + { + next_bundle = i; + next_hit = curr_hit; + } + } + + if(next_bundle==-1) + break; + + out_bundle._hits.push_back(*next_hit); + indices[next_bundle]++; + } + + // Merge collapsed hits + indices = vector(in_bundles.size(), 0); + while(true) + { + int next_bundle = -1; + const MateHit* next_hit = NULL; + for(size_t i = 0; i < in_bundles.size(); ++i) + { + const vector& curr_non_redundant_hits = in_bundles[i]->non_redundant_hits(); + + if (indices[i] == curr_non_redundant_hits.size()) + continue; + + const MateHit* curr_hit = &curr_non_redundant_hits[indices[i]]; + + if (next_bundle == -1 || mate_hit_lt(*curr_hit, *next_hit)) + { + next_bundle = i; + next_hit = curr_hit; + } + } + + if(next_bundle==-1) + break; + + out_bundle._non_redundant.push_back(*next_hit); + indices[next_bundle]++; + } + + for(size_t i = 0; i < in_bundles.size(); ++i) + { + for (size_t j = 0; j < in_bundles[i]->_ref_scaffs.size(); ++j) + { + in_bundles[i]->_ref_scaffs[j]->clear_hits(); + } + } + + // Merge ref scaffolds + indices = vector(in_bundles.size(), 0); + while(true) + { + int next_bundle = -1; + shared_ptr next_scaff; + for(size_t i = 0; i < in_bundles.size(); ++i) + { + const vector >& curr_scaffs = in_bundles[i]->_ref_scaffs; + + if (indices[i] == curr_scaffs.size()) + continue; + + shared_ptr curr_scaff = curr_scaffs[indices[i]]; + + if (next_bundle == -1 || scaff_lt_rt_oplt(*curr_scaff, *next_scaff)) + { + next_bundle = i; + next_scaff = curr_scaff; + } + } + + if(next_bundle==-1) + break; + + if (out_bundle._ref_scaffs.size()==0 || out_bundle._ref_scaffs.back()->annotated_trans_id() != next_scaff->annotated_trans_id()) + out_bundle.add_ref_scaffold(next_scaff); + indices[next_bundle]++; + } + + out_bundle.finalize(true); // true means everything is already sorted, etc. + out_bundle._num_replicates = (int)in_bundles.size(); +} + + +void HitBundle::finalize(bool is_combined) +{ + _final = true; + + if (!is_combined) + { + sort(_hits.begin(), _hits.end(), mate_hit_lt); + if (cond_prob_collapse) + { + collapse_hits(); + } + else + { + foreach (MateHit& hit, _hits) + { + hit.incr_collapse_mass(hit.common_scale_mass()); + } + _non_redundant = _hits; + + } + sort(_ref_scaffs.begin(), _ref_scaffs.end(), scaff_lt_rt_oplt_sp); + vector >::iterator new_end = unique(_ref_scaffs.begin(), + _ref_scaffs.end(), + StructurallyEqualScaffolds()); + _ref_scaffs.erase(new_end, _ref_scaffs.end()); + vector >(_ref_scaffs).swap(_ref_scaffs); + } + + for (size_t j = 0; j < _ref_scaffs.size(); ++j) + { + _ref_scaffs[j]->clear_hits(); + } + + _compatible_mass = 0.0; + + for (size_t i = 0; i < _hits.size(); ++i) + { + MateHit& hit = _hits[i]; + + Scaffold hs(hit); + + if (i >= 1) + { + assert (hit.ref_id() == _hits[i-1].ref_id()); + } + hit.is_mapped(false); + for (size_t j = 0; j < _ref_scaffs.size(); ++j) + { + // add hit only adds if the hit is structurally compatible + if (_ref_scaffs[j]->contains(hs)) + { + bool added = _ref_scaffs[j]->add_hit(&hit); + if (added) + hit.is_mapped(true); + } + } + if (hit.is_mapped()) + { + _compatible_mass += hit.mass(); + } + } + +} + +void print_sort_error(const char* last_chr_name, + int last_chr_pos, + const char* bh_name, + int bh_pos) +{ + fprintf(stderr, "\nError: this SAM file doesn't appear to be correctly sorted!\n"); + fprintf(stderr, "\tcurrent hit is at %s:%d, last one was at %s:%d\n", + bh_name, + bh_pos, + last_chr_name, + last_chr_pos); + fprintf(stderr, "Cufflinks requires that if your file has SQ records in\nthe SAM header that they appear in the same order as the chromosomes names \nin the alignments.\nIf there are no SQ records in the header, or if the header is missing,\nthe alignments must be sorted lexicographically by chromsome\nname and by position.\n \n"); +} + + +double BundleFactory::next_valid_alignment(const ReadHit*& bh) +{ + const char* hit_buf; + size_t hit_buf_size = 0; + bh = NULL; + + // Keep track of mass of hits we skip + double raw_mass = 0; + + while (true) + { + + if (!_hit_fac->next_record(hit_buf, hit_buf_size)) + break; + + ReadHit tmp; + if (!_hit_fac->get_hit_from_buf(hit_buf, tmp, false)) + continue; + + if (tmp.ref_id() == 12638153115695167477) // corresponds to SAM "*" under FNV hash. unaligned read record + continue; + + raw_mass += tmp.mass(); + + if (_hit_fac->ref_table().get_name(tmp.ref_id())==NULL) // unaligned read record (!?) + continue; + + if (spans_bad_intron(tmp)) + continue; + + int order = _hit_fac->ref_table().observation_order(tmp.ref_id()); + if (_prev_pos != 0) + { + int prev_order = _hit_fac->ref_table().observation_order(_prev_ref_id); + + if (prev_order > order || (prev_order == order && _prev_pos > tmp.left())) + { + const char* bh_chr_name = _hit_fac->ref_table().get_name(tmp.ref_id()); + const char* last_bh_chr_name = _hit_fac->ref_table().get_name(_prev_ref_id); + + print_sort_error(last_bh_chr_name, + _prev_pos, + bh_chr_name, + tmp.left()); + exit(1); + } + } + + _prev_ref_id = tmp.ref_id(); + _prev_pos = tmp.left(); + + bool hit_within_mask = false; + + // We want to skip stuff that overlaps masked GTF records, so + // sync up the masking chromosome + if (!mask_gtf_recs.empty() && + next_mask_scaff != mask_gtf_recs.end() && + (*next_mask_scaff)->ref_id() != tmp.ref_id()) + { + bool found_scaff = false; + vector >::iterator curr_mask_scaff = mask_gtf_recs.begin(); + for (size_t i = 0; i < _mask_scaff_offsets.size(); ++i) + { + if (_mask_scaff_offsets[i].first == tmp.ref_id()) + { + curr_mask_scaff = _mask_scaff_offsets[i].second; + found_scaff = true; + break; + } + } + + next_mask_scaff = curr_mask_scaff; + } + + //check that we aren't sitting in the middle of a masked scaffold + while (next_mask_scaff != mask_gtf_recs.end() && + (*next_mask_scaff)->ref_id() == tmp.ref_id() && + (*next_mask_scaff)->right() <= tmp.left()) + { + if ((*next_mask_scaff)->left() >= tmp.left()) + { + //REMOVE ME: + int a = 4; + break; + } + + next_mask_scaff++; + } + + if (next_mask_scaff != mask_gtf_recs.end() && + (*next_mask_scaff)->ref_id() == tmp.ref_id() && + (*next_mask_scaff)->left() <= tmp.left() && + (*next_mask_scaff)->right() >= tmp.right()) + { + hit_within_mask = true; + } + + if (hit_within_mask) + continue; + + // if the user's asked for read trimming, do it here. + if (trim_read_length > 0) + { + tmp.trim(trim_read_length); + } + + bh = new ReadHit(tmp); + + break; + } + + return raw_mass; +} + +double BundleFactory::rewind_hit(const ReadHit* rh) +{ + double mass = rh->mass(); + delete rh; + _hit_fac->undo_hit(); + return mass; +} + +bool BundleFactory::next_bundle_hit_driven(HitBundle& bundle) +{ + const ReadHit* bh = NULL; + + bool skip_read = false; + + while(bh == NULL) + { + if (!_hit_fac->records_remain()) + { + return false; + } + + // If we are randomly throwing out reads, check to see + // whether this one should be kept. + if (read_skip_fraction > 0.0 && _zeroone() < read_skip_fraction || + bundle.hits().size() >= max_frags_per_bundle) + { + skip_read = true; + next_valid_alignment(bh); + } + else + { + bundle.add_raw_mass(next_valid_alignment(bh)); + } + } + + if (skip_read || !bundle.add_open_hit(read_group_properties(), bh)) + { + delete bh; + bh = NULL; + } + _expand_by_hits(bundle); + + assert(bundle.left() != -1); + bundle.finalize_open_mates(); + bundle.finalize(); + assert(bundle.right() != -1); + + return true; +} + +bool BundleFactory::next_bundle_ref_driven(HitBundle& bundle) +{ + if (next_ref_scaff == ref_mRNAs.end()) + { + const ReadHit* bh = NULL; + while(_hit_fac->records_remain()) + { + if (read_skip_fraction == 0.0 || _zeroone() >= read_skip_fraction || + bundle.hits().size() >= max_frags_per_bundle) + { + bundle.add_raw_mass(next_valid_alignment(bh)); + } + } + bundle.finalize(); + return false; + } + + bundle.add_ref_scaffold(*next_ref_scaff); + next_ref_scaff++; + + _expand_by_refs(bundle); + + // The most recent RefID and position we've seen in the hit stream + RefID last_hit_ref_id_seen = 0; + int last_hit_pos_seen = 0; + + // include hits that lay within the bundle interval + while(true) + { + const ReadHit* bh = NULL; + + bool skip_read = false; + // If we are randomly throwing out reads, check to see + // whether this one should be kept. + double t = _zeroone(); + if (read_skip_fraction > 0.0 && _zeroone() < read_skip_fraction || + bundle.hits().size() >= max_frags_per_bundle) + { + next_valid_alignment(bh); + skip_read = true; + } + else + { + bundle.add_raw_mass(next_valid_alignment(bh)); + } + + if (bh == NULL) + { + if (_hit_fac->records_remain()) + continue; + else + break; + } + + last_hit_ref_id_seen = bh->ref_id(); + last_hit_pos_seen = bh->left(); + + // test if the hit stream needs to catch up or has gone too far based on ref_id + if (bh->ref_id() != bundle.ref_id()) + { + int bh_chr_order = _hit_fac->ref_table().observation_order(bh->ref_id()); + int bundle_chr_order = _hit_fac->ref_table().observation_order(bundle.ref_id()); + + if (bh_chr_order < bundle_chr_order) // the hit stream has not caught up, skip + { + delete bh; + continue; + } + else // the hit stream has gone too far, rewind and break + { + double mass = rewind_hit(bh); + if (skip_read == false) + { + bundle.rem_raw_mass(mass); + } + break; + } + } + + if (bh->left() >= bundle.left() && bh->right() <= bundle.right()) + { + if (skip_read) + { + delete bh; + bh = NULL; + } + else + { + if (!bundle.add_open_hit(read_group_properties(), bh, false)) + { + delete bh; + bh = NULL; + } + } + } + else if (bh->left() >= bundle.right()) + { + if (!skip_read) + { + bundle.rem_raw_mass(rewind_hit(bh)); + } + break; + } + else + { + // It's not within the bundle bounds, but it's also not past the + // right end, so skip it. + delete bh; + } + } + + assert(bundle.left() != -1); + bundle.finalize_open_mates(); + bundle.finalize(); + assert(bundle.right() != -1); + + return true; +} + +// NOTE: does not support read skipping yet or max hits per bundle yet. +bool BundleFactory::next_bundle_ref_guided(HitBundle& bundle) +{ + + if (next_ref_scaff == ref_mRNAs.end()) + { + return next_bundle_hit_driven(bundle); + } + + const ReadHit* bh = NULL; + while(bh == NULL) + { + if (!_hit_fac->records_remain()) + { + return next_bundle_ref_driven(bundle); + } + bundle.add_raw_mass(next_valid_alignment(bh)); + } + + if (bh->ref_id() != (*next_ref_scaff)->ref_id()) + { + int bh_chr_order = _hit_fac->ref_table().observation_order(bh->ref_id()); + int scaff_chr_order = _hit_fac->ref_table().observation_order((*next_ref_scaff)->ref_id()); + + bundle.rem_raw_mass(rewind_hit(bh)); + + if (bh_chr_order < scaff_chr_order) + { + return next_bundle_hit_driven(bundle); + } + else + { + return next_bundle_ref_driven(bundle); + } + } + + if (bh->left() < (*next_ref_scaff)->left()) + { + if (!bundle.add_open_hit(read_group_properties(), bh)) + { + delete bh; + bh = NULL; + } + } + else + { + bundle.rem_raw_mass(rewind_hit(bh)); + bundle.add_ref_scaffold(*next_ref_scaff); + next_ref_scaff++; + _expand_by_refs(bundle); + } + + while(_expand_by_hits(bundle) || + _expand_by_refs(bundle)) {} + + assert(bundle.left() != -1); + bundle.finalize_open_mates(); + bundle.finalize(); + assert(bundle.right() != -1); + + return true; +} + +// expand the bundle interval as far as needed to include the overlapping +// chain of reference transcripts that also overlap the initial bundle +// interval +bool BundleFactory::_expand_by_refs(HitBundle& bundle) +{ + int initial_right = bundle.right(); + while(next_ref_scaff < ref_mRNAs.end()) + { + assert(bundle.ref_id() != (*next_ref_scaff)->ref_id() || (*next_ref_scaff)->left() >= bundle.left()); + if (bundle.ref_id() == (*next_ref_scaff)->ref_id() + && overlap_in_genome((*next_ref_scaff)->left(),(*next_ref_scaff)->right(),bundle.left(), bundle.right())) + { + bundle.add_ref_scaffold(*next_ref_scaff++); + } + else + { + break; + } + } + + + return (bundle.right() > initial_right); +} + +// expand bundle by chaining overlapping hits +bool BundleFactory::_expand_by_hits(HitBundle& bundle) +{ + int initial_right = bundle.right(); + while(true) + { + bool skip_read = false; + const ReadHit* bh = NULL; + + if (read_skip_fraction > 0.0 && _zeroone() < read_skip_fraction) + { + skip_read = true; + } + else + { + bundle.add_raw_mass(next_valid_alignment(bh)); + } + + if (bh == NULL) + { + if (_hit_fac->records_remain()) + { + continue; + } + else + { + break; + } + } + + if (bh->ref_id() == bundle.ref_id() && bh->left() < bundle.right() + olap_radius) + { + if (skip_read || !bundle.add_open_hit(read_group_properties(), bh)) + { + delete bh; + bh = NULL; + } + } + else + { + bundle.rem_raw_mass(rewind_hit(bh)); + break; + } + } + + return (bundle.right() > initial_right); +} + +bool BundleFactory::next_bundle(HitBundle& bundle) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + switch(_bundle_mode) + { + case HIT_DRIVEN: + _curr_bundle++; + return next_bundle_hit_driven(bundle); + break; + case REF_DRIVEN: + _curr_bundle++; + return next_bundle_ref_driven(bundle); + break; + case REF_GUIDED: + _curr_bundle++; + return next_bundle_ref_guided(bundle); + break; + } + return false; +} + + +struct IntronSpanCounter +{ + IntronSpanCounter() : left_reads(0), little_reads(0), total_reads(0), multimap_reads(0), fwd_strand_frags(0) {} + size_t left_reads; + size_t little_reads; // small span overhang + size_t total_reads; + size_t multimap_reads; + size_t fwd_strand_frags; + vector hist; +}; + +typedef map IntronCountTable; + +void count_introns_in_read(const ReadHit& read, + IntronCountTable& intron_counts) +{ + const vector& cig = read.cigar(); + + int read_len = read.read_len(); + int small_anchor = (int)floor(read_len * small_anchor_fraction); + + int r_left = 0; + int g_left = read.left(); + + for (size_t i = 0; i < cig.size(); ++i) + { + assert(cig[i].length >= 0); + switch(cig[i].opcode) + { + case MATCH: + //ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, cig[i].length)); + g_left += cig[i].length; + r_left += cig[i].length; + break; + + case REF_SKIP: + { + AugmentedCuffOp intron(CUFF_INTRON, g_left, cig[i].length); + pair ins_itr; + ins_itr = intron_counts.insert(make_pair(intron, IntronSpanCounter())); + IntronCountTable::iterator itr = ins_itr.first; + itr->second.total_reads++; + + if (read.num_hits() > 10) + { + itr->second.multimap_reads++; + } + + if ( r_left <= small_anchor || (read_len - r_left) < small_anchor) + { + itr->second.little_reads++; + } + + if (read.source_strand() == CUFF_FWD) + { + //itr->second.fwd_strand_frags; + } + else + { + assert(read.source_strand() == CUFF_REV); + } + + + vector& hist = itr->second.hist; + if (hist.size() < (size_t)read_len) + { + size_t num_new_bins = read_len - hist.size(); + size_t new_left_bins = (size_t)floor(num_new_bins / 2.0); + size_t new_right_bins = (size_t)ceil(num_new_bins / 2.0); + hist.insert(hist.begin(), new_left_bins, 0); + hist.insert(hist.end(), new_right_bins, 0); + } + + assert (r_left < hist.size()); + hist[r_left]++; + //ops.push_back(AugmentedCuffOp(CUFF_INTRON, g_left, cig[i].length)); + g_left += cig[i].length; + break; + } + + case SOFT_CLIP: + g_left += cig[i].length; + break; + case HARD_CLIP: + break; + case INS: + g_left -= cig[i].length; + break; + case DEL: + g_left += cig[i].length; + break; + default: + assert(false); + break; + } + } +} + +void minor_introns(int bundle_length, + int bundle_left, + const IntronCountTable& intron_counts, + vector& bad_introns, + double fraction) + +{ + for(IntronCountTable::const_iterator itr = intron_counts.begin(); + itr != intron_counts.end(); + ++itr) + { + pair itr_cnt_pair = *itr; + const IntronSpanCounter itr_spans = itr_cnt_pair.second; + + double doc = itr_spans.total_reads; + + for (IntronCountTable::const_iterator itr2 = intron_counts.begin(); + itr2 != intron_counts.end(); + ++itr2) + { + if (itr == itr2 || + !AugmentedCuffOp::overlap_in_genome(itr->first, itr2->first)) + { + continue; + } + + pair itr2_cnt_pair = *itr2; + const IntronSpanCounter itr2_spans = itr2_cnt_pair.second; + + double thresh = itr2_spans.total_reads * fraction; + if (doc < thresh) + { + //#if verbose_msg + // fprintf(stderr, "\t Filtering intron (due to overlap) %d - %d: %f thresh %f\n", itr->first.first, itr->first.second, doc, bundle_avg_thresh); + //#endif + bool exists = binary_search(bad_introns.begin(), + bad_introns.end(), + itr->first); + if (!exists) + { + verbose_msg("Filtering intron %d-%d spanned by %lu reads based on overlap with much more abundant intron: %d-%d spanned by %lu reads\n", + itr->first.g_left(), + itr->first.g_right(), + itr->second.total_reads, + itr2->first.g_left(), + itr2->first.g_right(), + itr2->second.total_reads); + + bad_introns.push_back(itr->first); + sort(bad_introns.begin(), bad_introns.end()); + } + } + +// if ((itr->second.fwd_strand_frags == 0 && +// itr2->second.fwd_strand_frags != 0) || +// (itr2->second.fwd_strand_frags == 0 && +// itr->second.fwd_strand_frags != 0)) +// { +// int itr1_L = itr->first.g_left(); +// int itr1_R = itr->first.g_right(); +// int itr2_L = itr2->first.g_left(); +// int itr2_R = itr2->first.g_right(); +// +// if (abs(itr1_L - itr2_L) < 25 && abs(itr1_R - itr2_R) < 25) +// { +// int a = 3; +// } +// } + } + } +} + +void multimapping_introns(int bundle_length, + int bundle_left, + const IntronCountTable& intron_counts, + vector& bad_introns, + double fraction) + +{ + for(IntronCountTable::const_iterator itr = intron_counts.begin(); + itr != intron_counts.end(); + ++itr) + { + pair itr_cnt_pair = *itr; + const IntronSpanCounter itr_spans = itr_cnt_pair.second; + + double doc = itr_spans.total_reads; + double multi = itr_spans.multimap_reads; + + double multi_fraction = multi / doc; + + if (multi_fraction > fraction) + { + bool exists = binary_search(bad_introns.begin(), + bad_introns.end(), + itr->first); + if (!exists) + { + verbose_msg("Filtering intron %d-%d spanned by %lu reads because %lg percent are multireads.\n", + itr->first.g_left(), + itr->first.g_right(), + itr->second.total_reads, + multi_fraction * 100); + + bad_introns.push_back(itr->first); + sort(bad_introns.begin(), bad_introns.end()); + } + } + } +} + + +void identify_bad_splices(const HitBundle& bundle, + BadIntronTable& bad_splice_ops) +{ + // Tracks, for each intron, how many reads + IntronCountTable intron_counts; + + RefID ref_id = bundle.ref_id(); + + pair ins_itr; + ins_itr = bad_splice_ops.insert(make_pair(ref_id, vector())); + vector& bad_introns = ins_itr.first->second; + + foreach (const MateHit& hit, bundle.hits()) + { + if (hit.left_alignment()) + { + count_introns_in_read(*hit.left_alignment(), intron_counts); + } + if (hit.right_alignment()) + { + count_introns_in_read(*hit.right_alignment(), intron_counts); + } + } + + minor_introns(bundle.length(), bundle.left(), intron_counts, bad_introns, min_isoform_fraction); + // [Geo] disable filtering of multi-mapped introns: + // multimapping_introns(bundle.length(), bundle.left(), intron_counts, bad_introns, 0.5); + for (IntronCountTable::iterator itr = intron_counts.begin(); + itr != intron_counts.end(); + ++itr) + { + if (binary_search(bad_introns.begin(), + bad_introns.end(), + itr->first)) + { + continue; + } + pair cnt_pair = *itr; + try + { + const IntronSpanCounter spans = cnt_pair.second; + + // binomial read_half_dist(spans.total_reads, success_fraction); + // double left_side_p = cdf(read_half_dist, spans.total_reads - spans.left_reads); + // double right_side_p = cdf(complement(read_half_dist, spans.left_reads)); + + + double success = 2 * small_anchor_fraction; + + binomial read_half_dist(spans.total_reads, success); + double right_side_p; + + // right_side_p describes the chance that we'd observe at least + // this many small overhang reads by chance with an unbiased + // distribution over a normal (e.g. non-artifact) junction + if (spans.little_reads > 0) + { + right_side_p = 1.0 - cdf(read_half_dist, spans.little_reads - 1); + } + else + { + right_side_p = 1.0; + } + + double left_side_p = 0; + double expected = success * spans.total_reads; + + //double excess = spans.little_reads - expected; + + // left_side_p describes the chance that we'd observe this few or + // fewer small overhang reads by chance with an unbiased + // distribution over a normal (e.g. non-artifact) junction + if (spans.little_reads > 0) + { + left_side_p = cdf(read_half_dist, spans.little_reads); + } + else + { + left_side_p = cdf(read_half_dist, 0); + } + + //double alpha = 0.05; + //double right_side_p = 0; + + // Two-tailed binomial test: +// if (left_side_p < (binomial_junc_filter_alpha / 2.0) || +// right_side_p < (binomial_junc_filter_alpha / 2.0)) + // One-tailed binomial test + + bool filtered = false; + + const IntronSpanCounter& counter = itr->second; + + if (right_side_p < (binomial_junc_filter_alpha)) + { + double overhang_ratio = counter.little_reads / (double) counter.total_reads; + if (counter.total_reads < 100 || overhang_ratio >= 0.50) + { + verbose_msg("Filtering intron %d-%d spanned by %lu reads (%lu low overhang, %lg expected) left P = %lg, right P = %lg\n", + itr->first.g_left(), + itr->first.g_right(), + itr->second.total_reads, + itr->second.little_reads, + expected, + left_side_p, + right_side_p); + filtered = true; + + bool exists = binary_search(bad_introns.begin(), + bad_introns.end(), + itr->first); + if (!exists) + { + bad_introns.push_back(itr->first); + sort(bad_introns.begin(), bad_introns.end()); + } + } + } + + vector hist = itr->second.hist; + if (itr->second.total_reads > 1000) + { + sort(hist.begin(), hist.end()); + size_t median = (size_t)floor(hist.size() / 2); + if (median <= hist.size() && hist[median] == 0) + { + verbose_msg("Filtering intron %d-%d spanned by %lu reads (%lu low overhang, %lg expected) left P = %lg, right P = %lg\n", + itr->first.g_left(), + itr->first.g_right(), + itr->second.total_reads, + itr->second.little_reads, + expected, + left_side_p, + right_side_p); + + filtered = true; + + bool exists = binary_search(bad_introns.begin(), + bad_introns.end(), + itr->first); + if (!exists) + { + bad_introns.push_back(itr->first); + sort(bad_introns.begin(), bad_introns.end()); + } + } + } + + if (!filtered) + { + verbose_msg("Accepting intron %d-%d spanned by %lu reads (%lu low overhang, %lg expected) left P = %lg, right P = %lg\n", + itr->first.g_left(), + itr->first.g_right(), + itr->second.total_reads, + itr->second.little_reads, + expected, + left_side_p, + right_side_p); + + } + } + + + catch(const std::exception& e) + { + // + /*` + [#coinflip_eg_catch] + It is always essential to include try & catch blocks because + default policies are to throw exceptions on arguments that + are out of domain or cause errors like numeric-overflow. + + Lacking try & catch blocks, the program will abort, whereas the + message below from the thrown exception will give some helpful + clues as to the cause of the problem. + */ + std::cout << + "\n""Message from thrown exception was:\n " << e.what() << std::endl; + } + + } +} + +bool BundleFactory::spans_bad_intron(const ReadHit& read) +{ + + const vector& cig = read.cigar(); + + size_t g_left = read.left(); + BadIntronTable::const_iterator itr = _bad_introns.find(read.ref_id()); + if (itr == _bad_introns.end()) + return false; + + const vector& bi = itr->second; + for (size_t i = 0; i < cig.size(); ++i) + { + assert(cig[i].length >= 0); + switch(cig[i].opcode) + { + case MATCH: + //ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, cig[i].length)); + g_left += cig[i].length; + break; + + case REF_SKIP: + { + AugmentedCuffOp intron(CUFF_INTRON, g_left, cig[i].length); + if (binary_search(bi.begin(), bi.end(), intron)) + { + return true; + } + + //ops.push_back(AugmentedCuffOp(CUFF_INTRON, g_left, cig[i].length)); + g_left += cig[i].length; + break; + } + + case SOFT_CLIP: + g_left += cig[i].length; + break; + + case HARD_CLIP: + break; + case INS: + g_left -= cig[i].length; + break; + case DEL: + g_left += cig[i].length; + break; + default: + assert(false); + break; + } + } + + return false; +} diff --git a/src/bundles.h b/src/bundles.h new file mode 100644 index 0000000..15f51ee --- /dev/null +++ b/src/bundles.h @@ -0,0 +1,753 @@ +#ifndef BUNDLES_H +#define BUNDLES_H +/* + * bundles.h + * cufflinks + * + * Created by Cole Trapnell on 9/6/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include +#include +#include "common.h" +#include "hits.h" +#include "scaffolds.h" +#include "gtf_tracking.h" +#include "progressbar.h" + +struct BundleStats +{ + BundleStats() : + compatible(0), + uncollapsible(0), + closure_edges(0), + matched_edges(0) + {} + + int compatible; + int uncollapsible; + int closure_edges; + int matched_edges; +}; + +typedef map > BadIntronTable; + + +/******************************************************************************* + HitBundle is a set of MateHit objects that, were you to look at the interval + graph of their spanning intervals in genomic coordinates, you'd see a single + connected component. Note that bundles do not correspond to single transcripts, + or even single genes + *******************************************************************************/ +class HitBundle +{ +private: + HitBundle(const HitBundle& rhs) {} +public: + HitBundle() + : _leftmost(INT_MAX), _rightmost(-1), _final(false), _id(++_next_id), _ref_id(0), _raw_mass(0.0), _num_replicates(1), _compatible_mass(0.0) {} + + ~HitBundle() + { + vector >& bundle_ref_scaffs = ref_scaffolds(); + foreach(shared_ptr& ref_scaff, bundle_ref_scaffs) + { + // This bundle and the factory that actually owns the ref_mRNAs + // are the only objects that should have access to these scaffolds + // so if the use count is 2, we can clear these guys. + // Updated to 3 since the bias learner now uses them. + if (ref_scaff.use_count() <= 3) + { + ref_scaff->clear_hits(); + } + else if (ref_scaff->mate_hits().size() > 0) + { + fprintf(stderr, "Warning: bundle %d-%d shared reference scaffolds with others. Possible soft memory leak.\n", left(), right()); + } + } + + foreach (MateHit& hit, _hits) + { + delete hit.left_alignment(); + delete hit.right_alignment(); + } + + for(OpenMates::iterator itr = _open_mates.begin(); itr != _open_mates.end(); ++itr) + { + foreach (MateHit& hit, itr->second) + { + delete hit.left_alignment(); + delete hit.right_alignment(); + } + } + + } + int left() const { return _leftmost; } + int right() const { return _rightmost; } + int length() const { return _rightmost - _leftmost; } + + // Returns true if the hit was added successfully. + bool add_hit(const MateHit& hit); + + // This is to keep track of mass of all hits, including + // thosethat are not added to any bundle + // but are skipped during the creation of this bundle + void add_raw_mass(double rm) { _raw_mass += rm; } + void rem_raw_mass(double rm) { _raw_mass -= rm; } + double raw_mass() { return _raw_mass; } + + double compatible_mass() const + { + return _compatible_mass; + } + + void clear_hits() + { + _hits.clear(); + _non_redundant.clear(); + vector >& bundle_ref_scaffs = ref_scaffolds(); + foreach(shared_ptr& ref_scaff, bundle_ref_scaffs) + { + if (ref_scaff.use_count() <= 3) + { + ref_scaff->clear_hits(); + } + else + { + fprintf(stderr, "Warning: bundle %d-%d shared reference scaffolds with others. Possible soft memory leak.\n", left(), right()); + } + } + } + + const std::vector& hits() const { return _hits; } + const std::vector& non_redundant_hits() const { return _non_redundant; } + + RefID ref_id() const {return _ref_id; } + + int id() const { return _id; } + + void add_ref_scaffold(shared_ptr scaff) + { + if (scaff->left() < _leftmost) + _leftmost = scaff->left(); + if (scaff->right() > _rightmost) + _rightmost = scaff->right(); + _ref_scaffs.push_back(scaff); + _ref_id = scaff->ref_id(); + } + + vector >& ref_scaffolds() { return _ref_scaffs; } + + // Adds a Bowtie hit to the open hits buffer. The Bundle will handle turning + // the Bowtie hit into a properly mated Cufflinks hit record + bool add_open_hit(shared_ptr rg_props, + const ReadHit* bh, + bool expand_by = true); + + // Commits any mates still open as singleton hits + void finalize_open_mates(); + + // Sorts the hits, and performs other functions needed to prepare this + // bundle for assembly and quantitation + void finalize(bool is_combined=false); + + void remove_hitless_scaffolds(); + void remove_unmapped_hits(); + void collapse_hits(); + + int num_replicates() const { return _num_replicates; } + + double mass() const + { + double mass = 0; + for(size_t i = 0; i < _non_redundant.size(); i++) + { + mass += _non_redundant[i].collapse_mass(); + } + return mass; + } + + static void combine(const vector& in_bundles, + HitBundle& out_bundle); + +private: + int _leftmost; + int _rightmost; + std::vector _hits; + std::vector _non_redundant; + std::vector > _ref_scaffs; // user-supplied reference annotations overlapping the bundle + bool _final; + int _id; + RefID _ref_id; + double _raw_mass; + + + static int _next_id; + + typedef map > OpenMates; + OpenMates _open_mates; + int _num_replicates; + double _compatible_mass; +}; + +void load_ref_rnas(FILE* ref_mRNA_file, + RefSequenceTable& rt, + vector >& ref_mRNAs, + bool loadSeqs=false, + bool loadFPKM=false); + +class BundleFactory +{ + boost::mt19937 rng; + boost::uniform_01 _zeroone; + +public: + + BundleFactory(shared_ptr fac, BundleMode bm) + : _hit_fac(fac), _bundle_mode(bm), _prev_pos(0), _prev_ref_id(0), _curr_bundle(0), _zeroone(rng) + { + _rg_props = shared_ptr(new ReadGroupProperties(fac->read_group_properties())); + + + + } + + bool bundles_remain() + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + return _curr_bundle < num_bundles(); + } + + bool next_bundle(HitBundle& bundle_out); + bool next_bundle_hit_driven(HitBundle& bundle_out); + bool next_bundle_ref_driven(HitBundle& bundle_out); + bool next_bundle_ref_guided(HitBundle& bundle_out); + + + RefSequenceTable& ref_table() { return _hit_fac->ref_table(); } + + // Not available until after inspect_bundle + int num_bundles() const { return _num_bundles; } + void num_bundles(int n) { _num_bundles = n; } + + void reset() + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + _curr_bundle = 0; + //rewind(hit_file); + _hit_fac->reset(); + next_ref_scaff = ref_mRNAs.begin(); + next_mask_scaff = mask_gtf_recs.begin(); + + foreach(shared_ptr ref_scaff, ref_mRNAs) + { + ref_scaff->clear_hits(); + } + + _prev_pos = 0; + _prev_ref_id = 0; + } + + // This function NEEDS to deep copy the ref_mRNAs, otherwise cuffdiff'd + // samples will clobber each other + void set_ref_rnas(const vector >& mRNAs) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + ref_mRNAs.clear(); + for (vector >::const_iterator i = mRNAs.begin(); i < mRNAs.end(); ++i) + { + ref_mRNAs.push_back(shared_ptr(new Scaffold(**i))); + } + + RefID last_id = 0; + for (vector >::iterator i = ref_mRNAs.begin(); i < ref_mRNAs.end(); ++i) + { + if ((*i)->ref_id() != last_id) + { + _ref_scaff_offsets.push_back(make_pair((*i)->ref_id(), i)); + } + last_id = (*i)->ref_id(); + } + + next_ref_scaff = ref_mRNAs.begin(); + } + + void set_mask_rnas(const vector >& masks) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + mask_gtf_recs = masks; + RefID last_id = 0; + for (vector >::iterator i = mask_gtf_recs.begin(); i < mask_gtf_recs.end(); ++i) + { + if ((*i)->ref_id() != last_id) + { + _mask_scaff_offsets.push_back(make_pair((*i)->ref_id(), i)); + } + last_id = (*i)->ref_id(); + } + + next_mask_scaff = mask_gtf_recs.begin(); + } + + void bad_intron_table(const BadIntronTable& bad_introns) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + _bad_introns = bad_introns; + } + + void read_group_properties(shared_ptr rg) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_factory_lock); +#endif + _rg_props = rg; + } + + shared_ptr read_group_properties() + { + return _rg_props; + } + + bool spans_bad_intron(const ReadHit& read); + +private: + + bool _expand_by_hits(HitBundle& bundle); + bool _expand_by_refs(HitBundle& bundle); + + shared_ptr _hit_fac; + + vector > ref_mRNAs; + //FILE* ref_mRNA_file; + vector >::iterator> > _ref_scaff_offsets; + vector >::iterator next_ref_scaff; + + vector > mask_gtf_recs; + //FILE* mask_file; + vector >::iterator> > _mask_scaff_offsets; + vector >::iterator next_mask_scaff; + + BadIntronTable _bad_introns; + + shared_ptr _rg_props; + + // Sets nva to point to the next valid alignment + // Returns the mass of any alignments that are seen, valid or not + double next_valid_alignment(const ReadHit*& nva); + + // Backs up the factory to before the last valid alignment + // and returns the mass of that alignment (rh) + double rewind_hit(const ReadHit* rh); + + BundleMode _bundle_mode; + int _prev_pos; + RefID _prev_ref_id; + int _num_bundles; + int _curr_bundle; +#if ENABLE_THREADS + boost::mutex _factory_lock; +#endif +}; + +void identify_bad_splices(const HitBundle& bundle, + BadIntronTable& bad_splice_ops); + +template +void inspect_map(BundleFactoryType& bundle_factory, + BadIntronTable* bad_introns, + vector& count_table, + bool progress_bar = true) +{ + + ProgressBar p_bar; + if (progress_bar) + p_bar = ProgressBar("Inspecting reads and determining fragment length distribution.",bundle_factory.ref_table().size()); + RefID last_chrom = 0; + + long double map_mass = 0.0; + long double norm_map_mass = 0.0; + + int min_len = numeric_limits::max(); + int max_len = def_max_frag_len; + vector frag_len_hist(def_max_frag_len+1,0); + bool has_pairs = false; + + int num_bundles = 0; + size_t total_hits = 0; + size_t total_non_redundant_hits = 0; + + //To be used for quartile normalization + vector mass_dist; + + // Store the maximum read length for "first" and "second" reads to report to user. + int max_1 = 0; + int max_2 = 0; + + shared_ptr mrt(new MultiReadTable()); + + while(true) + { + HitBundle* bundle_ptr = new HitBundle(); + + bool valid_bundle = bundle_factory.next_bundle(*bundle_ptr); + HitBundle& bundle = *bundle_ptr; + + if (use_compat_mass) //only count hits that are compatible with ref transcripts + { + // Take raw mass even if bundle is "empty", since we could be out of refs + // with remaining hits + map_mass += bundle.compatible_mass(); + if (use_quartile_norm && bundle.compatible_mass() > 0) + { + mass_dist.push_back(bundle.compatible_mass()); + } + } + else if (use_total_mass) //use all raw mass + { + + // Take raw mass even if bundle is "empty", since we could be out of refs + // with remaining hits + map_mass += bundle.raw_mass(); + if (use_quartile_norm && bundle.raw_mass() > 0) + { + mass_dist.push_back(bundle.raw_mass()); + } + } + else + { + fprintf(stderr, "Error: hit counting scheme for normalization is not set!\n"); + assert(false); + exit(1); + } + + const RefSequenceTable& rt = bundle_factory.ref_table(); + const char* chrom = rt.get_name(bundle.ref_id()); + char bundle_label_buf[2048]; + if (chrom) + { + sprintf(bundle_label_buf, "%s:%d-%d", chrom, bundle.left(), bundle.right()); + verbose_msg("Inspecting bundle %s with %lu reads\n", bundle_label_buf, bundle.hits().size()); + count_table.push_back(LocusCount(bundle_label_buf, bundle.raw_mass(), bundle.ref_scaffolds().size())); + } + + if (!valid_bundle) + { + delete bundle_ptr; + break; + } + num_bundles++; + + if (progress_bar) + { + double inc_amt = last_chrom == bundle.ref_id() ? 0.0 : 1.0; + p_bar.update(bundle_label_buf, inc_amt); + last_chrom = bundle.ref_id(); + } + + if (bad_introns != NULL) + { + identify_bad_splices(bundle, *bad_introns); + } + + const vector& hits = bundle.non_redundant_hits(); + if (hits.empty()) + { + delete bundle_ptr; + continue; + } + + list > open_ranges; + int curr_range_start = hits[0].left(); + int curr_range_end = numeric_limits::max(); + int next_range_start = -1; + + total_non_redundant_hits += bundle.non_redundant_hits().size(); + total_hits += bundle.hits().size(); + + // This first loop calclates the map mass and finds ranges with no introns + // Note that we are actually looking at non-redundant hits, which is why we use collapse_mass + // This loop will also add multi-reads to the MultiReads table + for (size_t i = 0; i < hits.size(); ++i) + { + assert(hits[i].left_alignment()); + + // Add to table if multi-read + if (hits[i].is_multi()) + { + mrt->add_hit(hits[i]); + } + + // Find left length + int left_len = hits[i].left_alignment()->right()-hits[i].left_alignment()->left(); + min_len = min(min_len, left_len); + if (!hits[i].left_alignment()->contains_splice()) + { + if (hits[i].left_alignment()->is_first()) + max_1 = max(max_1, left_len); + else + max_2 = max(max_2, left_len); + } + + // Find right length + if (hits[i].right_alignment()) + { + int right_len = hits[i].right_alignment()->right()-hits[i].right_alignment()->left(); + min_len = min(min_len, right_len); + if (!hits[i].right_alignment()->contains_splice()) + { + if (hits[i].right_alignment()->is_first()) + max_1 = max(max_1, right_len); + else + max_2 = max(max_2, right_len); + } + has_pairs = true; + } + + // Find fragment length + if (bundle.ref_scaffolds().size()==1 && hits[i].is_pair()) + // Annotation provided and single isoform gene + { + int start, end, mate_length; + shared_ptr scaff = bundle.ref_scaffolds()[0]; + if (scaff->map_frag(hits[i], start, end, mate_length)) + { + if (mate_length >= min_len && mate_length <= max_len) + frag_len_hist[mate_length] += hits[i].collapse_mass(); + } + } + else if (bundle.ref_scaffolds().empty()) + // No annotation provided. Look for ranges. + { + if (hits[i].left() > curr_range_end) + { + if (curr_range_end - curr_range_start > max_len) + open_ranges.push_back(make_pair(curr_range_start, curr_range_end)); + curr_range_start = next_range_start; + curr_range_end = numeric_limits::max(); + } + if (hits[i].left_alignment()->contains_splice()) + { + if (hits[i].left() - curr_range_start > max_len) + open_ranges.push_back(make_pair(curr_range_start, hits[i].left()-1)); + curr_range_start = max(next_range_start, hits[i].left_alignment()->right()); + } + if (hits[i].right_alignment() && hits[i].right_alignment()->contains_splice()) + { + assert(hits[i].right_alignment()->left() >= hits[i].left()); + curr_range_end = min(curr_range_end, hits[i].right_alignment()->left()-1); + next_range_start = max(next_range_start, hits[i].right()); + } + } + } + + if (bundle.ref_scaffolds().empty() && has_pairs) // No annotation provided + { + pair curr_range(-1,-1); + + // This second loop uses the ranges found above to find the estimated frag length distribution + // It also finds the minimum read length to use in the linear interpolation + for (size_t i = 0; i < hits.size(); ++i) + { + if (hits[i].left() > curr_range.second && open_ranges.empty()) + break; + + if (hits[i].left() > curr_range.second) + { + curr_range = open_ranges.front(); + open_ranges.pop_front(); + } + + if (hits[i].left() >= curr_range.first && hits[i].right() <= curr_range.second && hits[i].is_pair()) + { + int mate_len = hits[i].right()-hits[i].left(); + if (mate_len <= max_len) + frag_len_hist[mate_len] += hits[i].collapse_mass(); + } + } + } + + open_ranges.clear(); + delete bundle_ptr; + } + + norm_map_mass = map_mass; + + if (use_quartile_norm && mass_dist.size() > 0) + { + sort(mass_dist.begin(),mass_dist.end()); + int upper_quart_index = mass_dist.size() * 0.75; + norm_map_mass = mass_dist[upper_quart_index]; + } + + if (bad_introns != NULL) + { + size_t alloced = 0; + size_t used = 0; + size_t num_introns = 0; + for (BadIntronTable::const_iterator itr = bad_introns->begin(); + itr != bad_introns->end(); + ++itr) + { + alloced += itr->second.capacity() * sizeof(AugmentedCuffOp); + used += itr->second.size() * sizeof(AugmentedCuffOp); + num_introns += itr->second.size(); + } + + verbose_msg( "Bad intron table has %lu introns: (%lu alloc'd, %lu used)\n", num_introns, alloced, used); + verbose_msg( "Map has %lu hits, %lu are non-redundant\n", total_hits, total_non_redundant_hits); + } + + if (progress_bar) + p_bar.complete(); + + vector frag_len_pdf(max_len+1, 0.0); + vector frag_len_cdf(max_len+1, 0.0); + long double tot_count = accumulate(frag_len_hist.begin(), frag_len_hist.end(), 0.0 ); + bool empirical = false; + + if (user_provided_fld && has_pairs && tot_count >= 10000) + { + fprintf(stderr, "Warning: Overriding empirical fragment length distribution with user-specified parameters is not recommended.\n"); + } + + if (!has_pairs || tot_count < 10000) + { + if (has_pairs && !user_provided_fld) + { + fprintf(stderr, "Warning: Using default Gaussian distribution due to insufficient paired-end reads in open ranges. It is recommended that correct parameters (--frag-len-mean and --frag-len-std-dev) be provided.\n"); + } + tot_count = 0; + normal frag_len_norm(def_frag_len_mean, def_frag_len_std_dev); + max_len = def_frag_len_mean + 3*def_frag_len_std_dev; + for(int i = min_len; i <= max_len; i++) + { + frag_len_hist[i] = cdf(frag_len_norm, i+0.5)-cdf(frag_len_norm, i-0.5); + tot_count += frag_len_hist[i]; + } + } + else + // Calculate the max frag length and interpolate all zeros between min read len and max frag len + { + empirical = true; + double curr_total = 0; + size_t last_nonzero = min_len-1; + for(size_t i = last_nonzero+1; i < frag_len_hist.size(); i++) + { + if (frag_len_hist[i] > 0) + { + if (last_nonzero != i-1) + { + double b = frag_len_hist[last_nonzero]; + double m = (frag_len_hist[i] - b)/(i-last_nonzero); + for (size_t x = 1; x < i - last_nonzero; x++) + { + frag_len_hist[last_nonzero+x] = m * x + b; + tot_count += frag_len_hist[last_nonzero+x]; + curr_total += frag_len_hist[last_nonzero+x]; + } + } + last_nonzero = i; + } + + curr_total += frag_len_hist[i]; + + if (curr_total/tot_count > 0.9999) + { + max_len = i; + tot_count = curr_total; + break; + } + } + } + + double mean = 0.0; + + if (output_fld) + { + FILE* fhist = fopen(string(output_dir + "/frag_len_hist.csv").c_str(),"w"); + fprintf(fhist, "Length,Count\n"); + for(size_t i = 1; i < frag_len_hist.size(); i++) + { + fprintf(fhist, "%zu,%f\n", i, frag_len_hist[i]); + } + fclose(fhist); + } + + // Convert histogram to pdf and cdf, calculate mean + int frag_len_mode = 0; + for(size_t i = min_len; i <= (size_t)max_len; i++) + { + frag_len_pdf[i] = frag_len_hist[i]/tot_count; + frag_len_cdf[i] = frag_len_cdf[i-1] + frag_len_pdf[i]; + + if (frag_len_pdf[i] > frag_len_pdf[frag_len_mode]) + frag_len_mode = i; + mean += frag_len_pdf[i] * i; + } + + double std_dev = 0.0; + for(size_t i = 1; i < frag_len_hist.size(); i++) + { + std_dev += frag_len_pdf[i] * ((i - mean) * (i - mean)); + } + + std_dev = sqrt(std_dev); + + shared_ptr rg_props = bundle_factory.read_group_properties(); + shared_ptr fld(new EmpDist(frag_len_pdf, frag_len_cdf, frag_len_mode, mean, std_dev, min_len, max_len)); + rg_props->multi_read_table(mrt); + rg_props->frag_len_dist(fld); + rg_props->normalized_map_mass(norm_map_mass); + rg_props->total_map_mass(map_mass); + + fprintf(stderr, "> Map Properties:\n"); + if (use_quartile_norm) + fprintf(stderr, ">\tUpper Quartile: %.2Lf\n", norm_map_mass); + else + fprintf(stderr, ">\tTotal Map Mass: %.2Lf\n", norm_map_mass); + if (corr_multi) + fprintf(stderr,">\tNumber of Multi-Reads: %zu (with %zu total hits)\n", mrt->num_multireads(), mrt->num_multihits()); +// if (has_pairs) +// fprintf(stderr, ">\tRead Type: %dbp x %dbp\n", max_1, max_2); +// else +// fprintf(stderr, ">\tRead Type: %dbp single-end\n", max(max_1,max_2)); + + if (empirical) + { + fprintf(stderr, ">\tFragment Length Distribution: Empirical (learned)\n"); + fprintf(stderr, ">\t Estimated Mean: %.2f\n", mean); + fprintf(stderr, ">\t Estimated Std Dev: %.2f\n", std_dev); + } + else + { + if (user_provided_fld) + fprintf(stderr, ">\tFragment Length Distribution: Truncated Gaussian (user-specified)\n"); + else + fprintf(stderr, ">\tFragment Length Distribution: Truncated Gaussian (default)\n"); + fprintf(stderr, ">\t Default Mean: %d\n", def_frag_len_mean); + fprintf(stderr, ">\t Default Std Dev: %d\n", def_frag_len_std_dev); + } + + bundle_factory.num_bundles(num_bundles); + bundle_factory.reset(); + return; +} + +#endif diff --git a/src/clustering.cpp b/src/clustering.cpp new file mode 100644 index 0000000..73d1558 --- /dev/null +++ b/src/clustering.cpp @@ -0,0 +1,130 @@ +/* + * clustering.cpp + * cufflinks + * + * Created by Cole Trapnell on 3/15/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include "clustering.h" + +void ConnectByExonOverlap::operator()(const AbundanceGroup& cluster, + AbundanceGraph& G) +{ + const vector >& abundances = cluster.abundances(); + for (size_t i = 0; i < abundances.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < abundances.size(); ++i) + { + shared_ptr scaff_i = abundances[i]->transfrag(); + assert (scaff_i); + + for (size_t j = i + 1; j < abundances.size(); ++j) + { + shared_ptr scaff_j = abundances[j]->transfrag(); + assert (scaff_j); + + if (Scaffold::exons_overlap(*scaff_i, *scaff_j)) + add_edge(i, j, G); + } + } +} + +void ConnectByAnnotatedGeneId::operator()(const AbundanceGroup& cluster, + AbundanceGraph& G) +{ + const vector >& abundances = cluster.abundances(); + for (size_t i = 0; i < abundances.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < abundances.size(); ++i) + { + set i_gene_id = abundances[i]->gene_id(); + for (size_t j = i + 1; j < abundances.size(); ++j) + { + set j_gene_id = abundances[j]->gene_id(); + if (i_gene_id == j_gene_id) + { + add_edge(i, j, G); + } + } + } +} + +void ConnectByAnnotatedTssId::operator()(const AbundanceGroup& cluster, + AbundanceGraph& G) +{ + const vector >& abundances = cluster.abundances(); + for (size_t i = 0; i < abundances.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < abundances.size(); ++i) + { + set i_tss_id = abundances[i]->tss_id(); + for (size_t j = i + 1; j < abundances.size(); ++j) + { + set j_tss_id = abundances[j]->tss_id(); + if (i_tss_id == j_tss_id) + { + add_edge(i, j, G); + } + } + } +} + +void ConnectByAnnotatedProteinId::operator()(const AbundanceGroup& cluster, + AbundanceGraph& G) +{ + const vector >& abundances = cluster.abundances(); + for (size_t i = 0; i < abundances.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < abundances.size(); ++i) + { + set i_p_id = abundances[i]->protein_id(); + for (size_t j = i + 1; j < abundances.size(); ++j) + { + set j_p_id = abundances[j]->protein_id(); + if (i_p_id == j_p_id) + { + add_edge(i, j, G); + } + } + } +} + +void ConnectByStrand::operator()(const AbundanceGroup& cluster, + AbundanceGraph& G) +{ + const vector >& abundances = cluster.abundances(); + for (size_t i = 0; i < abundances.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < abundances.size(); ++i) + { + shared_ptr scaff_i = abundances[i]->transfrag(); + assert (scaff_i); + + for (size_t j = i + 1; j < abundances.size(); ++j) + { + shared_ptr scaff_j = abundances[j]->transfrag(); + assert (scaff_j); + if (scaff_i->strand() == scaff_j->strand()) + { + add_edge(i, j, G); + } + } + } +} diff --git a/src/clustering.h b/src/clustering.h new file mode 100644 index 0000000..7a9a1f8 --- /dev/null +++ b/src/clustering.h @@ -0,0 +1,170 @@ +#ifndef CLUSTERING_H +#define CLUSTERING_H +/* + * abundances.h + * cufflinks + * + * Created by Cole Trapnell on 4/27/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +// DON'T move this, or mystery compiler errors will result. Affects gcc >= 4.1 +#include + +#include +#include +#include +#include +#include + +#ifdef DEBUG +#include +#endif + +#include + +#if (BOOST_VERSION < 103800) +#include +#else +#include +#endif + + +#include "abundances.h" + +using namespace boost; + +typedef adjacency_list AbundanceGraph; + +struct ConnectByExonOverlap +{ + void operator()(const AbundanceGroup& cluster, + AbundanceGraph& G); +}; + +struct ConnectByAnnotatedGeneId +{ + void operator()(const AbundanceGroup& cluster, + AbundanceGraph& G); +}; + +struct ConnectByAnnotatedTssId +{ + void operator()(const AbundanceGroup& cluster, + AbundanceGraph& G); +}; + +struct ConnectByAnnotatedProteinId +{ + void operator()(const AbundanceGroup& cluster, + AbundanceGraph& G); +}; + +struct ConnectByStrand +{ + void operator()(const AbundanceGroup& cluster, + AbundanceGraph& G); +}; + +// A "transcript cluster is a set of transcripts whose projections into the +// genome overlap on the same strand. They may thus share fragment alignments, +// and so they need to be quantitated together. After quantitation, they +// can be picked apart. +template +void cluster_transcripts(const AbundanceGroup& transfrags, + vector& transfrags_by_cluster, + ublas::matrix* new_gamma = NULL, + ublas::matrix* new_iterated_count = NULL, + ublas::matrix* new_count = NULL, + ublas::matrix* new_fpkm = NULL, + ublas::matrix* new_gamma_bootstrap = NULL) +{ + adjacency_list G; + + transfrags_by_cluster.clear(); + + cluster_policy cp; + + cp(transfrags, G); + + std::vector component(num_vertices(G)); + connected_components(G, &component[0]); + + vector > clusters(transfrags.abundances().size(), + vector(transfrags.abundances().size(), false)); + + vector > cluster_indices(transfrags.abundances().size()); + for (size_t i = 0; i < transfrags.abundances().size(); ++i) + { + clusters[component[i]][i] = true; + cluster_indices[component[i]].push_back(i); + } + for (size_t i = 0; i < cluster_indices.size(); ++i) + { + if (cluster_indices[i].empty()) + { + cluster_indices.resize(i); + break; + } + } + for (size_t i = 0; i < clusters.size(); ++i) + { + AbundanceGroup cluster; + transfrags.filter_group(clusters[i], cluster); + if (!cluster.abundances().empty()) + transfrags_by_cluster.push_back(cluster); + } + + if (new_gamma != NULL) + { + const ublas::matrix& trans_gamma_cov = transfrags.gamma_cov(); + const ublas::matrix& trans_gamma_bootstrap_cov = transfrags.gamma_bootstrap_cov(); + const ublas::matrix& trans_iterated_count_cov = transfrags.iterated_count_cov(); + const ublas::matrix& trans_count_cov = transfrags.count_cov(); + const ublas::matrix& trans_fpkm_cov = transfrags.fpkm_cov(); + + ublas::matrix& cov = *new_gamma; + ublas::matrix& boot_cov = *new_gamma_bootstrap; + ublas::matrix& iterated_count_cov = *new_iterated_count; + ublas::matrix& count_cov = *new_count; + ublas::matrix& fpkm_cov = *new_fpkm; + + // number of primary transcripts for this gene + size_t num_pt = cluster_indices.size(); + cov = ublas::zero_matrix(num_pt, num_pt); + boot_cov = ublas::zero_matrix(num_pt, num_pt); + count_cov = ublas::zero_matrix(num_pt, num_pt); + iterated_count_cov = ublas::zero_matrix(num_pt, num_pt); + fpkm_cov = ublas::zero_matrix(num_pt, num_pt); + //cerr << "combined " << combined << endl; + + //cerr << "locus isoform gamma cov" << gamma_cov << endl; + for (size_t L = 0; L < cluster_indices.size(); ++L) + { + const vector& L_isos = cluster_indices[L]; + for (size_t K = 0; K < cluster_indices.size(); ++K) + { + const vector& K_isos = cluster_indices[K]; + for (size_t l = 0; l < L_isos.size(); ++l) + { + for (size_t k = 0; k < K_isos.size(); ++k) + { + cov(L,K) += trans_gamma_cov(L_isos[l],K_isos[k]); + boot_cov(L,K) += trans_gamma_bootstrap_cov(L_isos[l],K_isos[k]); + count_cov(L,K) += trans_count_cov(L_isos[l],K_isos[k]); + iterated_count_cov(L,K) += trans_iterated_count_cov(L_isos[l],K_isos[k]); + fpkm_cov(L,K) += trans_fpkm_cov(L_isos[l],K_isos[k]); + } + } + } + } + } +} + +#endif + diff --git a/src/codons.cpp b/src/codons.cpp new file mode 100644 index 0000000..a459250 --- /dev/null +++ b/src/codons.cpp @@ -0,0 +1,90 @@ +#include "codons.h" + +static char codonTable[32768]; //32K table for fasta codon decoding + // codons are encoded as triplets of 5-bit-encoded nucleotides + // (so any codon can be encoded/decoded as a unique 15-bit value) + +static char codonData[]={ //long list of 3+1 characters (codon+translation) +'A','A','A','K', 'A','A','C','N', 'A','A','G','K', 'A','A','R','K', 'A','A','T','N', +'A','A','Y','N', 'A','C','A','T', 'A','C','B','T', 'A','C','C','T', 'A','C','D','T', +'A','C','G','T', 'A','C','H','T', 'A','C','K','T', 'A','C','M','T', 'A','C','N','T', +'A','C','R','T', 'A','C','S','T', 'A','C','T','T', 'A','C','V','T', 'A','C','W','T', +'A','C','Y','T', 'A','G','A','R', 'A','G','C','S', 'A','G','G','R', 'A','G','R','R', +'A','G','T','S', 'A','G','Y','S', 'A','T','A','I', 'A','T','C','I', 'A','T','G','M', +'A','T','H','I', 'A','T','M','I', 'A','T','T','I', 'A','T','W','I', 'A','T','Y','I', +'C','A','A','Q', 'C','A','C','H', 'C','A','G','Q', 'C','A','R','Q', 'C','A','T','H', +'C','A','Y','H', 'C','C','A','P', 'C','C','B','P', 'C','C','C','P', 'C','C','D','P', +'C','C','G','P', 'C','C','H','P', 'C','C','K','P', 'C','C','M','P', 'C','C','N','P', +'C','C','R','P', 'C','C','S','P', 'C','C','T','P', 'C','C','V','P', 'C','C','W','P', +'C','C','Y','P', 'C','G','A','R', 'C','G','B','R', 'C','G','C','R', 'C','G','D','R', +'C','G','G','R', 'C','G','H','R', 'C','G','K','R', 'C','G','M','R', 'C','G','N','R', +'C','G','R','R', 'C','G','S','R', 'C','G','T','R', 'C','G','V','R', 'C','G','W','R', +'C','G','Y','R', 'C','T','A','L', 'C','T','B','L', 'C','T','C','L', 'C','T','D','L', +'C','T','G','L', 'C','T','H','L', 'C','T','K','L', 'C','T','M','L', 'C','T','N','L', +'C','T','R','L', 'C','T','S','L', 'C','T','T','L', 'C','T','V','L', 'C','T','W','L', +'C','T','Y','L', 'G','A','A','E', 'G','A','C','D', 'G','A','G','E', 'G','A','R','E', +'G','A','T','D', 'G','A','Y','D', 'G','C','A','A', 'G','C','B','A', 'G','C','C','A', +'G','C','D','A', 'G','C','G','A', 'G','C','H','A', 'G','C','K','A', 'G','C','M','A', +'G','C','N','A', 'G','C','R','A', 'G','C','S','A', 'G','C','T','A', 'G','C','V','A', +'G','C','W','A', 'G','C','Y','A', 'G','G','A','G', 'G','G','B','G', 'G','G','C','G', +'G','G','D','G', 'G','G','G','G', 'G','G','H','G', 'G','G','K','G', 'G','G','M','G', +'G','G','N','G', 'G','G','R','G', 'G','G','S','G', 'G','G','T','G', 'G','G','V','G', +'G','G','W','G', 'G','G','Y','G', 'G','T','A','V', 'G','T','B','V', 'G','T','C','V', +'G','T','D','V', 'G','T','G','V', 'G','T','H','V', 'G','T','K','V', 'G','T','M','V', +'G','T','N','V', 'G','T','R','V', 'G','T','S','V', 'G','T','T','V', 'G','T','V','V', +'G','T','W','V', 'G','T','Y','V', 'M','G','A','R', 'M','G','G','R', 'M','G','R','R', +'N','N','N','X', 'R','A','Y','B', 'S','A','R','Z', 'T','A','A','.', 'T','A','C','Y', +'T','A','G','.', 'T','A','R','.', 'T','A','T','Y', 'T','A','Y','Y', 'T','C','A','S', +'T','C','B','S', 'T','C','C','S', 'T','C','D','S', 'T','C','G','S', 'T','C','H','S', +'T','C','K','S', 'T','C','M','S', 'T','C','N','S', 'T','C','R','S', 'T','C','S','S', +'T','C','T','S', 'T','C','V','S', 'T','C','W','S', 'T','C','Y','S', 'T','G','A','.', +'T','G','C','C', 'T','G','G','W', 'T','G','T','C', 'T','G','Y','C', 'T','R','A','.', +'T','T','A','L', 'T','T','C','F', 'T','T','G','L', 'T','T','R','L', 'T','T','T','F', +'T','T','Y','F', 'X','X','X','X', 'Y','T','A','L', 'Y','T','G','L', 'Y','T','R','L' +}; + + +static bool isCodonTableReady=codonTableInit(); + +unsigned short packCodon(char n1, char n2, char n3) { + //assumes they are uppercase already! + byte b1=n1-'A'; + byte b2=n2-'A'; + byte b3=n3-'A'; + b1 |= (b2 << 5); + b2 = (b2 >> 3) | (b3 << 2); + return ( ((unsigned short)b2) << 8) + b1; + } + +bool codonTableInit() { + memset((void*)codonTable, 'X', 32768); + int cdsize=sizeof(codonData); + for (int i=0;i + +unsigned short packCodon(char n1, char n2, char n3); +//assumes n1,n2,n3 are UPPERCASE! + +struct Codon { + char nuc[3]; + Codon(char* str=NULL) { + if (str==NULL) { + nuc[0]='N'; + nuc[1]='N'; + nuc[2]='N'; + } + else { + nuc[0]=toupper(str[0]); + nuc[1]=toupper(str[1]); + nuc[2]=toupper(str[2]); + } + } + + Codon(char s1, char s2, char s3) { + nuc[0]=toupper(s1); + nuc[1]=toupper(s2); + nuc[2]=toupper(s3); + } + + + char& operator[](int idx) { + if (idx<0 || idx>2) + GError("Error: Codon index out of bounds!\n"); + return nuc[idx]; + } + + char operator[](int idx) const { + if (idx<0 || idx>2) + GError("Error: Codon index out of bounds!\n"); + return nuc[idx]; + } + + char translate(); + }; + +//simple 1st frame forward translation of a given DNA string +//will allocated memory for the translation -- the caller is +// responsible for freeing the returned string! +char* translateDNA(const char* dnastr, int& aalen, int dnalen=0); + + +bool codonTableInit(); + +#endif diff --git a/src/common.cpp b/src/common.cpp new file mode 100644 index 0000000..01131a4 --- /dev/null +++ b/src/common.cpp @@ -0,0 +1,376 @@ +/* + * common.cpp + * cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2008 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "getopt.h" +#include "common.h" +#include "replicates.h" + +using namespace std; + + +// Non-option globals +bool final_est_run = true; +bool allow_junk_filtering = true; +bool user_provided_fld = false; + +// Behavior options +int num_threads = 1; +bool no_update_check = false; +bool cuff_quiet = false; +#if ASM_VERBOSE +bool cuff_verbose = true; +#else +bool cuff_verbose = false; +#endif +bool output_fld = false; +bool output_bias_params = false; + +// General options +BundleMode bundle_mode = HIT_DRIVEN; +BundleMode init_bundle_mode = HIT_DRIVEN; +int max_partner_dist = 50000; +uint32_t max_gene_length = 3500000; +std::string ref_gtf_filename = ""; +std::string mask_gtf_filename = ""; +std::string output_dir = "./"; +std::string fasta_dir; +string default_library_type = "fr-unstranded"; +string library_type = default_library_type; + + +// Abundance estimation options +bool corr_bias = false; +bool corr_multi = false; +bool use_quartile_norm = false; +bool poisson_dispersion = false; +BiasMode bias_mode = POS_VLMM; +int def_frag_len_mean = 200; +int def_frag_len_std_dev = 80; +int def_max_frag_len = 800; +int max_frag_len = 800; +int min_frag_len = 1; +float min_isoform_fraction = 0.1; +int max_mle_iterations = 5000; +int num_importance_samples = 10000; +bool use_compat_mass = false; +bool use_total_mass = false; + + +// Ref-guided assembly options +int overhang_3 = 600; +int ref_merge_overhang_tolerance = 30; +int tile_len = 405; +int tile_off = 15; +bool enable_faux_reads = true; +bool enable_5_extend = true; + +// Assembly options +uint32_t min_intron_length = 50; +uint32_t max_intron_length = 300000; +int olap_radius = 50; +int bowtie_overhang_tolerance = 8; // Typically don't need to change this, except in special cases, such as meta-assembly. +int min_frags_per_transfrag = 10; +int microexon_length = 25; +float pre_mrna_fraction = 0.15; +float high_phred_err_prob = 0.50; // about MAPQ = 3 +double small_anchor_fraction = 7 / 75.0; +double binomial_junc_filter_alpha = 0.001; +double trim_3_dropoff_frac = .1; +double trim_3_avgcov_thresh = 10.0; +std::string user_label = "CUFF"; + +bool use_em = true; +bool cond_prob_collapse = true; + +bool emit_count_tables = false; +bool use_fisher_covariance = true; +bool split_variance = false; +bool bootstrap = true; +int num_bootstrap_samples = 20; +double bootstrap_fraction = 1.0; +double bootstrap_delta_gap = 0.001; +int max_frags_per_bundle = 1000000; + +// SECRET OPTIONS: +// These options are just for instrumentation and benchmarking code + +float read_skip_fraction = 0.0; +bool no_read_pairs = false; +int trim_read_length = -1; +double mle_accuracy = 1e-6; + +// END SECRET OPTIONS + + +map library_type_table; +const ReadGroupProperties* global_read_properties = NULL; + +#if ENABLE_THREADS +boost::thread_specific_ptr bundle_label; +#else +boost::shared_ptr bundle_label; +#endif + +long random_seed = 0; + +extern void print_usage(); + +bool gaurd_assembly() +{ + return ref_gtf_filename == ""; +} + +void asm_verbose(const char* fmt,...) +{ +#if !ASM_VERBOSE + return; +#endif + va_list argp; + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); +} + +void verbose_msg(const char* fmt,...) { + + if (!cuff_verbose) + return; + + va_list argp; + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); +} + + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ + + +int parseInt(int lower, const char *errmsg, void (*print_usage)()) { + long l; + char *endPtr= NULL; + l = strtol(optarg, &endPtr, 10); + if (endPtr != NULL) { + if (l < lower) { + cerr << errmsg << endl; + print_usage(); + exit(1); + } + return (int32_t)l; + } + cerr << errmsg << endl; + print_usage(); + exit(1); + return -1; +} + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ +float parseFloat(float lower, float upper, const char *errmsg, void (*print_usage)()) { + float l; + l = (float)atof(optarg); + + if (l < lower) { + cerr << errmsg << endl; + print_usage(); + exit(1); + } + + if (l > upper) + { + cerr << errmsg << endl; + print_usage(); + exit(1); + } + + return l; + + cerr << errmsg << endl; + print_usage(); + exit(1); + return -1; +} + +/* Function with behaviour like `mkdir -p' */ +/* found at: http://niallohiggins.com/2009/01/08/mkpath-mkdir-p-alike-in-c-for-unix/ */ + +int mkpath(const char *s, mode_t mode) +{ + char *q, *r = NULL, *path = NULL, *up = NULL; + int rv; + + rv = -1; + if (strcmp(s, ".") == 0 || strcmp(s, "/") == 0) + return (0); + + if ((path = strdup(s)) == NULL) + exit(1); + + if ((q = strdup(s)) == NULL) + exit(1); + + if ((r = dirname(q)) == NULL) + goto out; + + if ((up = strdup(r)) == NULL) + exit(1); + + if ((mkpath(up, mode) == -1) && (errno != EEXIST)) + goto out; + + if ((mkdir(path, mode) == -1) && (errno != EEXIST)) + rv = -1; + else + rv = 0; + +out: + if (up != NULL) + free(up); + free(q); + free(path); + return (rv); +} + +void init_library_table() +{ + ReadGroupProperties fr_unstranded; + fr_unstranded.platform(UNKNOWN_PLATFORM); + fr_unstranded.mate_strand_mapping(FR); + fr_unstranded.std_mate_orientation(MATES_POINT_TOWARD); + fr_unstranded.strandedness(UNSTRANDED_PROTOCOL); + + library_type_table["fr-unstranded"] = fr_unstranded; + + ReadGroupProperties fr_firststrand; + fr_firststrand.platform(UNKNOWN_PLATFORM); + fr_firststrand.mate_strand_mapping(RF); + fr_firststrand.std_mate_orientation(MATES_POINT_TOWARD); + fr_firststrand.strandedness(STRANDED_PROTOCOL); + + library_type_table["fr-firststrand"] = fr_firststrand; + + ReadGroupProperties fr_secondstrand; + fr_secondstrand.platform(UNKNOWN_PLATFORM); + fr_secondstrand.mate_strand_mapping(FR); + fr_secondstrand.std_mate_orientation(MATES_POINT_TOWARD); + fr_secondstrand.strandedness(STRANDED_PROTOCOL); + + library_type_table["fr-secondstrand"] = fr_secondstrand; + + ReadGroupProperties ff_unstranded; + ff_unstranded.platform(UNKNOWN_PLATFORM); + ff_unstranded.mate_strand_mapping(FF); + ff_unstranded.std_mate_orientation(MATES_POINT_TOWARD); + ff_unstranded.strandedness(UNSTRANDED_PROTOCOL); + + library_type_table["ff-unstranded"] = ff_unstranded; + + ReadGroupProperties ff_firststrand; + ff_firststrand.platform(UNKNOWN_PLATFORM); + ff_firststrand.mate_strand_mapping(FF); + ff_firststrand.std_mate_orientation(MATES_POINT_TOWARD); + ff_firststrand.strandedness(STRANDED_PROTOCOL); + + library_type_table["ff-firststrand"] = ff_firststrand; + + ReadGroupProperties ff_secondstrand; + ff_secondstrand.platform(UNKNOWN_PLATFORM); + ff_secondstrand.mate_strand_mapping(RR); + ff_secondstrand.std_mate_orientation(MATES_POINT_TOWARD); + ff_secondstrand.strandedness(STRANDED_PROTOCOL); + + library_type_table["ff-secondstrand"] = ff_secondstrand; + + ReadGroupProperties transfrags; + transfrags.platform(UNKNOWN_PLATFORM); + transfrags.mate_strand_mapping(FR); + transfrags.std_mate_orientation(MATES_POINT_TOWARD); + transfrags.strandedness(UNSTRANDED_PROTOCOL); + transfrags.complete_fragments(true); + + library_type_table["transfrags"] = transfrags; + + //global_read_properties = &(library_type_table.find(default_library_type)->second); +} + +void print_library_table() +{ + fprintf (stderr, "\nSupported library types:\n"); + for (map::const_iterator itr = library_type_table.begin(); + itr != library_type_table.end(); + ++itr) + { + if (itr->first == default_library_type) + { + fprintf(stderr, "\t%s (default)\n", itr->first.c_str()); + } + else + { + fprintf(stderr, "\t%s\n", itr->first.c_str()); + } + } +} + + +// c_seq is complement, *NOT* REVERSE complement +void encode_seq(const string seqStr, char* seq, char* c_seq) +{ + + for (size_t i = 0; i < seqStr.length(); ++i) + { + switch(seqStr[i]) + { + case 'A' : + case 'a' : seq[i] = 0; c_seq[i] = 3; break; + case 'c' : + case 'C' : seq[i] = 1; c_seq[i] = 2; break; + case 'G' : + case 'g' : seq[i] = 2; c_seq[i] = 1; break; + case 'T' : + case 't' : seq[i] = 3; c_seq[i] = 0; break; + default : seq[i] = 4; c_seq[i] = 4; break; // N + } + } +} + + +ReadGroupProperties::ReadGroupProperties() : + _strandedness(UNKNOWN_STRANDEDNESS), + _std_mate_orient(UNKNOWN_MATE_ORIENTATION), + _platform(UNKNOWN_PLATFORM), + _total_map_mass(0.0), + _norm_map_mass(0.0), + _mass_scaling_factor(1.0), + _complete_fragments(false) +{ + _mass_dispersion_model = boost::shared_ptr(new PoissonDispersionModel); +} diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..b715e9c --- /dev/null +++ b/src/common.h @@ -0,0 +1,436 @@ +#ifndef COMMON_H +#define COMMON_H +/* + * common.h + * Cufflinks + * + * Created by Cole Trapnell on 11/26/08. + * Copyright 2008 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include +using boost::math::normal; + +#include +#define foreach BOOST_FOREACH +#define reverse_foreach BOOST_REVERSE_FOREACH + +#include +#include + +// Non-option globals +extern bool final_est_run; +extern bool allow_junk_filtering; +extern bool user_provided_fld; +extern int def_max_frag_len; +extern int max_frag_len; +extern int min_frag_len; + +// Behavior options +extern int num_threads; +extern bool no_update_check; +extern bool cuff_quiet; +extern bool cuff_verbose; +extern bool output_fld; +extern bool output_bias_params; + +// General options +extern int max_partner_dist; +extern uint32_t max_gene_length; +extern std::string ref_gtf_filename; +extern std::string mask_gtf_filename; +extern std::string output_dir; +extern std::string fasta_dir; +extern std::string library_type; + +// Abundance estimation options +extern bool corr_bias; +extern bool corr_multi; +extern bool use_quartile_norm; +extern bool poisson_dispersion; +extern int def_frag_len_mean; +extern int def_frag_len_std_dev; +extern int max_mle_iterations; +extern int num_importance_samples; +extern float min_isoform_fraction; +extern bool use_em; +extern bool cond_prob_collapse; +extern bool use_compat_mass; +extern bool use_total_mass; + +// Ref-guided assembly options +extern int overhang_3; +extern int ref_merge_overhang_tolerance; +extern int tile_len; +extern int tile_off; +extern bool enable_faux_reads; +extern bool enable_5_extend; + +// Assembly options +extern uint32_t min_intron_length; +extern uint32_t max_intron_length; +extern int olap_radius; +extern int bowtie_overhang_tolerance; +extern int min_frags_per_transfrag; +extern int microexon_length; +extern float pre_mrna_fraction; +extern float high_phred_err_prob; +extern double trim_3_dropoff_frac; +extern double trim_3_avgcov_thresh; +extern double small_anchor_fraction; +extern double binomial_junc_filter_alpha; +extern std::string user_label; +extern long random_seed; +extern bool emit_count_tables; +extern bool use_fisher_covariance; +extern bool split_variance; +extern bool bootstrap; +extern int num_bootstrap_samples; +extern double bootstrap_fraction; +extern double bootstrap_delta_gap; +extern int max_frags_per_bundle; + +// SECRET OPTIONS: +// These options are just for instrumentation and benchmarking code + +extern bool no_read_pairs; +extern float read_skip_fraction; +extern int trim_read_length; +extern double mle_accuracy; + +// END SECRET OPTIONS + +#define ASM_VERBOSE 0 +#define ENABLE_THREADS 1 + +#if ENABLE_THREADS +extern boost::thread_specific_ptr bundle_label; // for consistent, traceable logging +#else +extern boost::shared_ptr bundle_label; +#endif + + +bool gaurd_assembly(); + +void asm_verbose(const char* fmt,...); +void verbose_msg(const char* fmt,...); + +int parseInt(int lower, + const char *errmsg, + void (*print_usage)()); + +float parseFloat(float lower, + float upper, + const char *errmsg, + void (*print_usage)()); + +void encode_seq(const std::string seqStr, char* seq, char* c_seq); +int mkpath(const char *s, mode_t mode); + + +template +OutputIterator copy_if(InputIterator begin, + InputIterator end, + OutputIterator destBegin, + Predicate p) +{ + while (begin != end) + { + if (p(*begin)) *destBegin++ = *begin; + ++begin; + } + return destBegin; +} + +enum BundleMode +{ + HIT_DRIVEN, + REF_DRIVEN, + REF_GUIDED +}; +extern BundleMode bundle_mode; +extern BundleMode init_bundle_mode; + +enum BiasMode +{ + SITE, + VLMM, + POS, + POS_VLMM, + POS_SITE +}; +extern BiasMode bias_mode; + +enum Strandedness +{ + UNKNOWN_STRANDEDNESS, + STRANDED_PROTOCOL, + UNSTRANDED_PROTOCOL +}; + +enum StandardMateOrientation +{ + UNKNOWN_MATE_ORIENTATION, + MATES_POINT_TOWARD, + MATES_POINT_SAME, + MATES_POINT_AWAY, + UNPAIRED, +}; + +enum MateStrandMapping +{ + FF, + FR, + RF, // This is really FR with first-strandedness + RR // This is really FF with first-strandedness +}; + +enum Platform +{ + UNKNOWN_PLATFORM, + ILLUMINA, + SOLID +}; + +class EmpDist +{ + //Vectors only valid between min and max! + std::vector _pdf; + std::vector _cdf; + int _mode; + double _mean; + double _std_dev; + int _min; + int _max; + +public: + EmpDist(std::vector& pdf, std::vector& cdf, int mode, double mean, double std_dev, int min, int max) + : _pdf(pdf), _cdf(cdf), _mode(mode), _mean(mean), _std_dev(std_dev), _min(min), _max(max) {} + + void pdf(std::vector& pdf) { _pdf = pdf; } + double pdf(int l) const + { + if (!valid_len(l)) + return 0.0; + return _pdf[l]; + } + + // pdf renomalized over the lengths <= r + double npdf(int l, int r) const + { + if (!valid_len(l)) + return 0.0; + + if (r > _max || r == 0) + return pdf(l); + + return pdf(l)/cdf(r); + } + + void cdf(std::vector& cdf) { _cdf = cdf; } + double cdf(int l) const + { + if (l > _max) + return 1.0; + if (l < 0) + return 0.0; + return _cdf[l]; + } + + bool valid_len(int l) const { return (l >= _min && l <= _max); } + bool too_short(int l) const { return (l < _min); } + + void mode(int mode) { _mode = mode; } + int mode() const { return _mode; } + + void max(int max) { _max = max; } + int max() const { return _max; } + + void min(int min) { _min = min; } + int min() const { return _min; } + + void mean(double mean) { _mean = mean; } + double mean() const { return _mean; } + + void std_dev(double std_dev) { _std_dev = std_dev; } + double std_dev() const { return _std_dev; } +}; + +class BiasLearner; +class MultiReadTable; + +class MassDispersionModel; + +struct LocusCount +{ + LocusCount(std::string ld, double c, int nt) : + locus_desc(ld), count(c), num_transcripts(nt) {} + std::string locus_desc; + double count; + int num_transcripts; +}; + +class ReadGroupProperties +{ +public: + + ReadGroupProperties(); + + Strandedness strandedness() const { return _strandedness; } + void strandedness(Strandedness s) { _strandedness = s; } + + StandardMateOrientation std_mate_orientation() const { return _std_mate_orient; } + void std_mate_orientation(StandardMateOrientation so) { _std_mate_orient = so; } + + MateStrandMapping mate_strand_mapping() const { return _mate_strand_mapping; } + void mate_strand_mapping(MateStrandMapping msm) { _mate_strand_mapping = msm; } + + Platform platform() const { return _platform; } + void platform(Platform p) { _platform = p; } + + long double total_map_mass() const { return _total_map_mass; } + void total_map_mass(long double p) { _total_map_mass = p; } + + long double normalized_map_mass() const { return _norm_map_mass; } + void normalized_map_mass(long double p) { _norm_map_mass = p; } + + boost::shared_ptr frag_len_dist() const { return _frag_len_dist; } + void frag_len_dist(boost::shared_ptr p) { _frag_len_dist = p; } + + boost::shared_ptr bias_learner() const { return _bias_learner; } + void bias_learner(boost::shared_ptr bl) { _bias_learner = bl; } + + void mass_scale_factor(double sf) { _mass_scaling_factor = sf; } + double mass_scale_factor() const { return _mass_scaling_factor; } + + void complete_fragments(bool c) { _complete_fragments = c; } + bool complete_fragments() const { return _complete_fragments; } + + double scale_mass(double unscaled_mass) const + { + if (_mass_scaling_factor == 0) + return unscaled_mass; + + return unscaled_mass * (1.0 / _mass_scaling_factor); + } + + boost::shared_ptr mass_dispersion_model() const + { + return _mass_dispersion_model; + }; + + void mass_dispersion_model(boost::shared_ptr nm) + { + _mass_dispersion_model = nm; + } + + const std::vector& common_scale_counts() { return _common_scale_counts; } + void common_scale_counts(const std::vector& counts) { _common_scale_counts = counts; } + + boost::shared_ptr multi_read_table() const {return _multi_read_table; } + void multi_read_table(boost::shared_ptr mrt) { _multi_read_table = mrt; } + +private: + + Strandedness _strandedness; + StandardMateOrientation _std_mate_orient; + MateStrandMapping _mate_strand_mapping; + Platform _platform; + long double _total_map_mass; + long double _norm_map_mass; + boost::shared_ptr _frag_len_dist; + boost::shared_ptr _bias_learner; + boost::shared_ptr _multi_read_table; + + double _mass_scaling_factor; + boost::shared_ptr _mass_dispersion_model; + std::vector _common_scale_counts; + + bool _complete_fragments; +}; + +extern std::map library_type_table; + +extern const ReadGroupProperties* global_read_properties; + +void print_library_table(); +void init_library_table(); + + +template +std::string cat_strings(const T& container, const char* delimiter=",") +{ + std::string cat; + if (container.empty()) + { + cat = ""; + } + else + { + typename T::const_iterator itr = container.begin(); + //cat = *(itr); + for (; itr != container.end(); itr++) + { + if (!(*itr).empty()) { + if (!cat.empty()) cat += delimiter; + cat += *itr; + } + } + } + + return cat; +} + +#define OPT_NUM_IMP_SAMPLES 260 +#define OPT_MLE_MAX_ITER 261 +#define OPT_FDR 262 +#define OPT_LIBRARY_TYPE 263 +#define OPT_OVERHANG_TOLERANCE 264 +#define OPT_MAX_BUNDLE_LENGTH 265 +#define OPT_MIN_FRAGS_PER_TRANSFRAG 266 +#define OPT_BIAS_MODE 267 +#define OPT_MIN_INTRON_LENGTH 268 +#define OPT_3_PRIME_AVGCOV_THRESH 269 +#define OPT_3_PRIME_DROPOFF_FRAC 270 +#define OPT_POISSON_DISPERSION 271 +#define OPT_NO_UPDATE_CHECK 272 +#define OPT_OUTPUT_FLD 273 +#define OPT_OUTPUT_BIAS_PARAMS 274 +#define OPT_USE_EM 275 +#define OPT_COLLAPSE_COND_PROB 276 +#define OPT_RANDOM_SEED 277 +#define OPT_NO_FAUX_READS 278 +#define OPT_3_OVERHANG_TOLERANCE 279 +#define OPT_INTRON_OVERHANG_TOLERANCE 280 +#define OPT_EMIT_COUNT_TABLES 281 +#define OPT_USE_COMPAT_MASS 282 +#define OPT_USE_TOTAL_MASS 283 +#define OPT_USE_FISHER_COVARIANCE 284 +#define OPT_USE_EMPIRICAL_COVARIANCE 285 +#define OPT_SPLIT_MASS 286 +#define OPT_SPLIT_VARIANCE 287 +#define OPT_BOOTSTRAP 288 +#define OPT_NUM_BOOTSTRAP_SAMPLES 289 +#define OPT_BOOTSTRAP_FRACTION 290 +#define OPT_TILE_LEN 291 +#define OPT_TILE_SEP 292 +#define OPT_NO_5_EXTEND 293 +#define OPT_MAX_FRAGS_PER_BUNDLE 294 +#define OPT_READ_SKIP_FRACTION 295 +#define OPT_NO_READ_PAIRS 296 +#define OPT_TRIM_READ_LENGTH 297 +#define OPT_MAX_DELTA_GAP 298 +#define OPT_MLE_MIN_ACC 299 +#endif diff --git a/src/compress_gtf.cpp b/src/compress_gtf.cpp new file mode 100644 index 0000000..a2cd10a --- /dev/null +++ b/src/compress_gtf.cpp @@ -0,0 +1,427 @@ +/* + * gtf_to_sam.cpp + * Cufflinks + * + * Created by Cole Trapnell on 8/1/10. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#else +#define PACKAGE_VERSION "INTERNAL" +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "hits.h" +#include "bundles.h" + +#include "gtf_tracking.h" +#include "scaffolds.h" +#include "tokenize.h" +#include "genes.h" + +using namespace boost; +using namespace std; + +#if ENABLE_THREADS +const char *short_options = "r:F"; +#else +const char *short_options = "r:F"; +#endif + +bool raw_fpkm = false; +bool proj_union = false; +bool proj_intersection = false; + +static struct option long_options[] = { +{"reference-seq", required_argument, 0, 'r'}, +{"raw-fpkm", no_argument, 0, 'F'}, +{"union", no_argument, 0, 'U'}, +{"intersection", no_argument, 0, 'I'}, + + +{0, 0, 0, 0} // terminator +}; + +void print_usage() +{ + //NOTE: SPACES ONLY, bozo + fprintf(stderr, "compress_gtf v%s\n", PACKAGE_VERSION); + fprintf(stderr, "linked against Boost version %d\n", BOOST_VERSION); + fprintf(stderr, "-----------------------------\n"); + fprintf(stderr, "Usage: compress_gtf [options] \n"); + fprintf(stderr, "Options:\n\n"); + fprintf(stderr, "-r/--reference-seq reference fasta file [ default: NULL ]\n"); + fprintf(stderr, "-F/--raw-fpkm use FPKM instead of isoform fraction \n"); + fprintf(stderr, "-U/--union report projective union [ default: OFF ]\n"); + fprintf(stderr, "-I/--intersection report projective intersection [ default: ON ]\n"); +} + +int parse_options(int argc, char** argv) +{ + int option_index = 0; + int next_option; + do { + next_option = getopt_long(argc, argv, short_options, long_options, &option_index); + switch (next_option) { + case -1: /* Done with options. */ + break; + case 'r': + { + fasta_dir = optarg; + break; + } + case 'F': + { + raw_fpkm = true; + break; + } + case 'U': + { + proj_union = true; + break; + } + case 'I': + { + proj_intersection = true; + break; + } + default: + print_usage(); + return 1; + } + } while(next_option != -1); + + if (proj_union && proj_intersection) + { + fprintf (stderr, "Error: please specify only one of --union and --intersection"); + exit(1); + } + +// if (!proj_union && !proj_intersection) +// proj_intersection = true; + return 0; +} + +void compress_genes(FILE* ftranscripts, + RefSequenceTable& rt, + vector >& ref_mRNAs) +{ + adjacency_list G; + + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + shared_ptr scaff_i = ref_mRNAs[i]; + for (size_t j = 0; j < ref_mRNAs.size(); ++j) + { + shared_ptr scaff_j = ref_mRNAs[j]; + if (scaff_i->annotated_gene_id() == scaff_j->annotated_gene_id()) + add_edge(i, j, G); + } + } + + std::vector component(num_vertices(G)); + connected_components(G, &component[0]); + + vector > clusters(ref_mRNAs.size(), + vector(ref_mRNAs.size(), false)); + + //vector > cluster_indices(three_prime_ends.size()); + + vector > > grouped_scaffolds(ref_mRNAs.size()); + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + clusters[component[i]][i] = true; + grouped_scaffolds[component[i]].push_back(ref_mRNAs[i]); + } + + for (size_t i = 0; i < grouped_scaffolds.size(); ++i) + { + vector >& gene = grouped_scaffolds[i]; + vector gene_scaffs; + string gene_id; + foreach (shared_ptr s, gene) + { + if (gene_id == "") + gene_id = s->annotated_gene_id(); + + gene_scaffs.push_back(*s); + } + + if (gene_scaffs.empty()) + continue; + + next_gene_id++; + + Scaffold smashed_gene; + if (!proj_intersection && !proj_union) + { + foreach (shared_ptr s, gene) + { + /* + *transfrag, + gene_id, + (int)isoforms.size() + 1, + FPKM, + iso_ab->effective_length(), + iso_ab->gamma(), + iso_ab->FPKM_conf(), + density_per_bp, + estimated_count, + density_score, + iso_ab->status(), + ref_gene_id)*/ + + Isoform iso(*s, + -1, + 1, + 0.0, + s->length(), + 0.0, + ConfidenceInterval(0.0,0.0), + 0, + 0, + 0, + NUMERIC_OK, + gene_id); + vector isoform_exon_recs; + + iso.get_gtf(isoform_exon_recs, rt); + + for (size_t g = 0; g < isoform_exon_recs.size(); ++g) + { + fprintf(ftranscripts, "%s", isoform_exon_recs[g].c_str()); + } + } + } + else + { + if (proj_union) + Scaffold::merge(gene_scaffs, smashed_gene, false); + else if (proj_intersection) + { + vector iso_ops; + + int gmax = -1; + int gmin = numeric_limits::max(); + + foreach (shared_ptr s, gene) + { + //iso_ops.push_back(s->augmented_ops()); + //sort (iso_ops.back().begin(), iso_ops.back().end()); + if (s->left() < gmin) + gmin = s->left(); + if (s->right() > gmax) + gmax = s->right(); + } + + foreach (shared_ptr s, gene) + { + if (s->left() > gmin) + { + iso_ops.push_back(AugmentedCuffOp(CUFF_INTRON, gmin, s->left() - gmin)); + } + if (s->right() < gmax) + { + iso_ops.push_back(AugmentedCuffOp(CUFF_INTRON, s->right(), gmax - s->right())); + } + iso_ops.insert(iso_ops.end(), s->augmented_ops().begin(), s->augmented_ops().end()); + } +// vector intersect = iso_ops.front(); +// for (size_t j = 1; j < iso_ops.size(); ++j) +// { +// vector tmp; +// const vector& iso_ops_j = iso_ops[j]; +// //set_intersection(intersect.begin(), intersect.end(), iso_ops_j.begin(), iso_ops_j.end(), back_inserter(tmp)); +// intersect.insert(intersect.end(), iso_ops_j.begin(), iso_ops_j.end()); +// +// intersect.push_back( +// assert (tmp.size() <= intersect.size()); +// //intersect = tmp; +// //sort(intersect.begin(), intersect.end()); +// } +// + sort(iso_ops.begin(), iso_ops.end(), AugmentedCuffOp::g_left_lt); +// +// while (!intersect.empty() && intersect.front().opcode != CUFF_MATCH) +// { +// intersect.erase(intersect.begin()); +// } +// +// while (!intersect.empty() && intersect.back().opcode != CUFF_MATCH) +// { +// intersect.pop_back(); +// } +// +// if (intersect.empty()) +// continue; + + vector merged_ops; + AugmentedCuffOp::merge_ops(iso_ops, merged_ops, true, true); + vector::iterator first_match = merged_ops.begin(); + vector::iterator last_match = merged_ops.end(); + last_match--; + while(first_match < merged_ops.end()) + { + if (first_match->opcode == CUFF_MATCH) + break; + first_match++; + } + while(last_match >= merged_ops.begin() && last_match< merged_ops.end()) + { + if (last_match->opcode == CUFF_MATCH) + break; + last_match--; + } + + vector internal_matches; + if (last_match >= first_match && last_match < merged_ops.end()) + { + last_match++; + + internal_matches.insert(internal_matches.end(), first_match, last_match); + smashed_gene = Scaffold(gene.front()->ref_id(), gene.front()->strand(), internal_matches); + } + else + { + + fprintf(stderr, "Could not find consitutive region for %s\n", gene_id.c_str()); + continue; + } + + } + else + assert(false); + assert (smashed_gene.ref_id()); + + Isoform iso(smashed_gene, + -1, + 1, + 0.0, + smashed_gene.length(), + 0.0, + ConfidenceInterval(0.0,0.0), + 0, + 0, + 0, + NUMERIC_OK, + gene_id); + vector isoform_exon_recs; + + iso.get_gtf(isoform_exon_recs, rt); + + for (size_t g = 0; g < isoform_exon_recs.size(); ++g) + { + fprintf(ftranscripts, "%s", isoform_exon_recs[g].c_str()); + } + } + + fflush(ftranscripts); + } +} + +void driver(vector ref_gtf_files, FILE* gtf_out) +{ + ReadTable it; + RefSequenceTable rt(true, false); + + vector > > ref_mRNA_table; + vector > > sample_count_table; + + foreach (FILE* ref_gtf, ref_gtf_files) + { + vector > ref_mRNAs; + ::load_ref_rnas(ref_gtf, rt, ref_mRNAs, false, true); + ref_mRNA_table.push_back(ref_mRNAs); + } + + for (size_t j = 0; j < ref_mRNA_table.size(); ++j) + { + vector > ref_mRNAs = ref_mRNA_table[j]; + + if (!raw_fpkm) + compress_genes(gtf_out, rt, ref_mRNAs); + } +} + +int main(int argc, char** argv) +{ + init_library_table(); + + int parse_ret = parse_options(argc,argv); + if (parse_ret) + return parse_ret; + + + if(optind >= argc) + { + print_usage(); + return 1; + } + + string ref_gtf_in_filenames = argv[optind++]; + + if(optind >= argc) + { + print_usage(); + return 1; + } + + string gtf_out_filename = argv[optind++]; + + vector ref_gtf_filenames; + tokenize(ref_gtf_in_filenames, ",", ref_gtf_filenames); + + vector ref_gtf_files; + + foreach (const string& ref_gtf_in_filename, ref_gtf_filenames) + { + FILE* ref_gtf = NULL; + if (ref_gtf_in_filename != "") + { + ref_gtf = fopen(ref_gtf_in_filename.c_str(), "r"); + if (!ref_gtf) + { + fprintf(stderr, "Error: cannot open GTF file %s for reading\n", + ref_gtf_in_filename.c_str()); + exit(1); + } + ref_gtf_files.push_back(ref_gtf); + } + } + + FILE* gtf_out = NULL; + if (gtf_out_filename != "") + { + gtf_out = fopen(gtf_out_filename.c_str(), "w"); + if (!gtf_out) + { + fprintf(stderr, "Error: cannot open GTF file %s for writing\n", + gtf_out_filename.c_str()); + exit(1); + } + } + + driver(ref_gtf_files, gtf_out); + + return 0; +} diff --git a/src/cuffcompare.cpp b/src/cuffcompare.cpp new file mode 100644 index 0000000..07257bb --- /dev/null +++ b/src/cuffcompare.cpp @@ -0,0 +1,2646 @@ +#ifdef HAVE_CONFIG_H +#include +#else +#define PACKAGE_VERSION "INTERNAL" +#define SVN_REVISION "SVN" +#endif + +#include "GArgs.h" +#include +#include +#include "gtf_tracking.h" + +#ifdef HAVE_CONFIG_H +#include "update_check.h" +#endif + +#define USAGE "Usage:\n\ +cuffcompare [-r ] [-R] [-T] [-V] [-s ] \n\ + [-o ] [-p ] \n\ + {-i | [ .. ]}\n\ +\n\ + Cuffcompare provides classification, reference annotation mapping and various\n\ + statistics for Cufflinks transfrags.\n\ + Cuffcompare clusters and tracks transfrags across multiple samples, writing\n\ + matching transcripts (intron chains) into .tracking, and a GTF\n\ + file .combined.gtf containing a nonredundant set of transcripts \n\ + across all input files (with a single representative transfrag chosen\n\ + for each clique of matching transfrags across samples).\n\ +\n\ +Options:\n\ +-i provide a text file with a list of Cufflinks GTF files to process instead\n\ + of expecting them as command line arguments (useful when a large number\n\ + of GTF files should be processed)\n\ +\n\ +-r a set of known mRNAs to use as a reference for assessing \n\ + the accuracy of mRNAs or gene models given in \n\ +\n\ +-R for -r option, reduce the set of reference transcripts to \n\ + only those found to overlap any of the input loci\n\ +-M discard (ignore) single-exon transfrags and reference transcripts\n\ +-N discard (ignore) single-exon reference transcripts\n\ +\n\ +-s can be a multi-fasta file with all the genomic sequences or \n\ + a directory containing multiple single-fasta files (one file per contig);\n\ + lower case bases will be used to classify input transcripts as repeats\n\ +\n\ +-d max distance (range) for grouping transcript start sites (100)\n\ +-p the name prefix to use for consensus transcripts in the \n\ + .combined.gtf file (default: 'TCONS')\n\ +-C include the \"contained\" transcripts in the .combined.gtf file\n\ +-G generic GFF input file(s) (do not assume Cufflinks GTF)\n\ +-T do not generate .tmap and .refmap files for each input file\n\ +-V verbose processing mode (showing all GFF parsing warnings)\n\ +" +bool debug=false; +bool perContigStats=false; // -S to enable stats for every single contig +bool generic_GFF=false; //-G, don't assume Cufflinks GTF as input +bool showContained=false; // -C +bool reduceRefs=false; +bool checkFasta=false; +bool tmapFiles=true; +bool only_spliced_refs=false; +int debugCounter=0; + +int polyrun_range=2000; //polymerase run range 2KB +double scoreThreshold=0; +double exprThreshold=0; +char* cprefix=NULL; +FILE* ffasta=NULL; //genomic seq file +FILE *f_ref=NULL; //reference mRNA GFF, if provided +FILE* f_in=NULL; //sequentially, each input GFF file +FILE* f_out=NULL; //stdout if not provided +GFastaHandler gfasta; +int xlocnum=0; +int tsscl_num=0; //for tss cluster IDs +int protcl_num=0; //for "unique" protein IDs within TSS clusters +int tssDist=100; +//int total_tcons=0; +int total_xloci_alt=0; + +void openfwrite(FILE* &f, GArgs& args, char opt) { + GStr s=args.getOpt(opt); + if (!s.is_empty()) { + if (s=='-') + f=stdout; + else { + f=fopen(s,"w"); + if (f==NULL) GError("Error creating file: %s\n", s.chars()); + } + } +} + +//-- structure to keep track of data from multiple qry input files for a single genomic seq +class GSeqTrack { + int gseq_id; + public: + const char* gseq_name; + GList* rloci_f; //reference loci for this genomic sequence + GList* rloci_r; + GList xloci_f; // extended super-loci across all qry datasets + GList xloci_r; // extended super-loci across all qry datasets + GList xloci_u; // extended super-loci across all qry datasets + GSeqData* qdata[MAX_QFILES]; //fixed order array with GSeqData for each qry input + //element in array is NULL if a qry file has no transcripts on this genomic sequence + int get_gseqid() { return gseq_id; } + GSeqTrack(int gid=-1):xloci_f(true,true,false), + xloci_r(true,true,false), xloci_u(true,true,false) { + gseq_id=gid; + if (gseq_id>=0) { + gseq_name=GffObj::names->gseqs.getName(gseq_id); + } + rloci_f=NULL; + rloci_r=NULL; + for (int i=0;i(GSeqTrack& d){ + return (gseq_id>d.gseq_id); + } + bool operator<(GSeqTrack& d){ + return (gseq_id ref_data(true,true,true); //list of reference mRNAs and loci data for each genomic seq + //each locus will keep track of any superloci which includes it, formed during the analysis + +void processLoci(GSeqData& seqdata, GSeqData* refdata=NULL, int qfidx=0); + +void reportStats(FILE* fout, const char* setname, GSuperLocus& stotal, + GSeqData* seqdata=NULL, GSeqData* refdata=NULL); + +GSeqData* getQryData(int gid, GList& qdata); +void trackGData(int qcount, GList& gtracks, GStr& fbasename, FILE** ftr, FILE** frs); + +#define FWCLOSE(fh) if (fh!=NULL && fh!=stdout) fclose(fh) +#define FRCLOSE(fh) if (fh!=NULL && fh!=stdin) fclose(fh) + +FILE* f_mintr=NULL; //missed ref introns + +bool multiexon_only=false; +bool multiexonrefs_only=false; + +GHash refdescr; +void loadRefDescr(const char* fname); + +GList qryfiles(false,true,false); + +//list of GSeqTrack data, sorted by gseq_id +GList gseqtracks(true,true,true); +GSeqTrack* findGSeqTrack(int gsid); + + +int cmpGTrackByName(const pointer p1, const pointer p2) { + return strcmp(((GSeqTrack*)p1)->gseq_name, ((GSeqTrack*)p2)->gseq_name); +} + + +void show_usage() { + GMessage("cuffcompare v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); + GMessage( "-----------------------------\n"); + GMessage("%s\n", USAGE); + } + +int main(int argc, char * const argv[]) { + GArgs args(argc, argv, "XDTMNVGSCKRLhp:c:d:s:i:n:r:o:"); + int e; + if ((e=args.isError())>0) { + show_usage(); + GMessage("Invalid argument: %s\n", argv[e]); + exit(1); + } + if (args.getOpt('h')!=NULL){ + show_usage(); + exit(1); + } + showContained=(args.getOpt('C')!=NULL); + debug=(args.getOpt('D')!=NULL); + tmapFiles=(args.getOpt('T')==NULL); + multiexon_only=(args.getOpt('M')!=NULL); + multiexonrefs_only=(args.getOpt('N')!=NULL); + perContigStats=(args.getOpt('S')!=NULL); + checkFasta=(args.getOpt('K')!=NULL); + gtf_tracking_verbose=((args.getOpt('V')!=NULL) || debug); + FILE* finlst=NULL; + GStr s=args.getOpt('i'); + if (!s.is_empty()) { + if (s=='-') + finlst=stdin; + else { + finlst=fopen(s,"r"); + if (finlst==NULL) GError("Error opening file: %s\n", s.chars()); + } + } + int numqryfiles=0; + if (finlst) { + GLineReader* lr=new GLineReader(finlst); + char* l=NULL; + while ((l=lr->getLine())!=NULL) { + if (strlen(l)<2 || startsWith(l,"# ") || isspace(*l)) continue; + if (!fileExists(l)) GError("Error: cannot locate input file: %s\n", l); + qryfiles.Add(new GStr(l)); + } + delete lr; + //if (qryfiles.Count()>10) + gtf_tracking_largeScale=true; + } + else { + numqryfiles=args.startNonOpt(); + char *infile=NULL; + if (numqryfiles>0) { + while ((infile=args.nextNonOpt())!=NULL) { + if (!fileExists(infile)) GError("Error: cannot locate input file: %s\n", infile); + qryfiles.Add(new GStr(infile)); + } //for each argument + } + } + numqryfiles=qryfiles.Count(); + if (numqryfiles==0) { + show_usage(); + exit(1); + } + if (numqryfiles>MAX_QFILES) { + GMessage("Error: too many input files (limit set to %d at compile time)\n",MAX_QFILES); + GMessage("(if you need to raise this limit set a new value for\nMAX_QFILES in gtf_tracking.h and recompile)\n"); + exit(0x5000); + } + #ifdef HAVE_CONFIG_H + check_version(PACKAGE_VERSION); + #endif + gfasta.init(args.getOpt('s')); + // determine if -s points to a multi-fasta file or a directory + s=args.getOpt('c'); + if (!s.is_empty()) scoreThreshold=s.asReal(); + s=args.getOpt('p'); + if (!s.is_empty()) cprefix=Gstrdup(s.chars()); + else cprefix=Gstrdup("TCONS"); + s=args.getOpt('e'); + if (!s.is_empty()) exprThreshold=s.asReal(); + s=args.getOpt('d'); + if (!s.is_empty()) { + tssDist=s.asInt(); + } + + s=args.getOpt('n'); + if (!s.is_empty()) loadRefDescr(s.chars()); + s=args.getOpt('r'); + if (!s.is_empty()) { + f_ref=fopen(s,"r"); + if (f_ref==NULL) GError("Error opening reference gff: %s\n",s.chars()); + haveRefs=true; + if (gtf_tracking_verbose) GMessage("Loading reference transcripts..\n"); + read_mRNAs(f_ref, ref_data, &ref_data, true, -1, s.chars(), (multiexonrefs_only || multiexon_only)); + haveRefs=(ref_data.Count()>0); + reduceRefs=(args.getOpt('R')!=NULL); + if (gtf_tracking_verbose) GMessage("..reference annotation loaded\n"); + } + bool discard_redundant=true; //discard redundant input transfrags + generic_GFF=args.getOpt('G'); + if (generic_GFF) discard_redundant=false; //generic GTF, don't try to discard "redundant" transcripts + //if a full pathname is given + //the other common output files will still be created in the current directory: + // .loci, .tracking, .stats + GStr outbasename; //include path, if provided + GStr outprefix; //without path and/or extension + GStr outstats=args.getOpt('o'); + if (outstats.is_empty() || outstats=="-") { + outstats="cuffcmp"; + } + outbasename=outstats; + GStr outext(getFileExt(outstats.chars())); + if (outext.is_empty()) { + outext="stats"; + outstats.append(".stats"); + outbasename=outstats; + } + else outext.lower(); + if (outext=="txt" || outext=="out" || outext=="stats" || outext=="summary") { + outbasename.cut(outbasename.length()-outext.length()-1); + } + + outprefix=outbasename; + int di=outprefix.rindex(CHPATHSEP); + if (di>=0) outprefix.cut(0,di+1); + + if (debug) { //create a few more files potentially useful for debugging + s=outbasename; + s.append(".missed_introns.gtf"); + f_mintr=fopen(s.chars(),"w"); + if (f_mintr==NULL) GError("Error creating file %s!\n",s.chars()); + /* + s=outbasename; + s.append(".noTP_introns.gtf"); + f_nintr=fopen(s.chars(),"w"); + s=outbasename; + s.append(".wrong_Qintrons.gtf"); + f_qintr=fopen(s.chars(),"w"); + */ + } + + f_out=fopen(outstats, "w"); + if (f_out==NULL) GError("Error creating output file %s!\n", outstats.chars()); + if (gtf_tracking_verbose) GMessage("Prefix for output files: %s\n", outprefix.chars()); + fprintf(f_out, "# Cuffcompare v%s | Command line was:\n#", PACKAGE_VERSION); + for (int i=0;i** qrysdata=NULL; + FILE** tfiles=NULL; + FILE** rtfiles=NULL; + GMALLOC(qrysdata, numqryfiles*sizeof(GList*)); + if (tmapFiles) { + GMALLOC(tfiles, numqryfiles*sizeof(FILE*)); + if (haveRefs) { + GMALLOC(rtfiles, numqryfiles*sizeof(FILE*)); + } + } + for (int fi=0;fichars()); + GStr infname(getFileName(qryfiles[fi]->chars())); //file name only + GStr indir(qryfiles[fi]->chars()); + di=indir.rindex(CHPATHSEP); + if (di>=0) indir.cut(di+1); //directory path for this input file + else indir=""; //current directory + + if (debug || (gtf_tracking_verbose && !gtf_tracking_largeScale)) + GMessage("Processing qfile #%d: %s\n",fi+1, in_file.chars()); + if (in_file=="-") { f_in=stdin; in_file="stdin"; } + else { + f_in=fopen(in_file.chars(),"r"); + if (f_in==NULL) + GError("Cannot open input file %s!\n",in_file.chars()); + } + //f_in is the query gff file to process + + GStr sbase(indir); + sbase.append(outprefix); + sbase.append("."); + sbase.append(infname); + if (tmapFiles) { + //-- we should keep the infname path, otherwise the remaining file names + // may be the same and clobber each other + s=sbase; + s.append(".tmap"); + tfiles[fi]=fopen(s.chars(),"w"); + if (tfiles[fi]==NULL) + GError("Error creating file '%s'!\n",s.chars()); + fprintf(tfiles[fi],"ref_gene_id\tref_id\tclass_code\tcuff_gene_id\tcuff_id\tFMI\tFPKM\tFPKM_conf_lo\tFPKM_conf_hi\tcov\tlen\tmajor_iso_id\tref_match_len\n"); + if (haveRefs) { + s=sbase; + s.append(".refmap"); + rtfiles[fi]=fopen(s.chars(),"w"); + if (rtfiles[fi]==NULL) + GError("Error creating file '%s'!\n",s.chars()); + fprintf(rtfiles[fi],"ref_gene_id\tref_id\tclass_code\tcuff_id_list\n"); + } + } + + GList* pdata=new GList(true,true,true); + qrysdata[fi]=pdata; + if (gtf_tracking_verbose) GMessage("Loading transcripts from %s..\n",in_file.chars()); + read_mRNAs(f_in, *pdata, &ref_data, discard_redundant, fi, in_file.chars(), multiexon_only); + GSuperLocus gstats; + GFaSeqGet *faseq=NULL; + for (int g=0;gCount();g++) { //for each seqdata related to a genomic sequence + int gsid=pdata->Get(g)->get_gseqid(); + GSeqData* refdata=getRefData(gsid, ref_data);//ref data for this contig + if (!gtf_tracking_largeScale) + processLoci(*(pdata->Get(g)), refdata, fi); + GSeqTrack* seqtrack=findGSeqTrack(gsid); //this will add a gseqtrack if it doesn't exist + // for gsid + if (refdata!=NULL) { + seqtrack->rloci_f=&(refdata->loci_f); + seqtrack->rloci_r=&(refdata->loci_r); + } + seqtrack->qdata[fi]=pdata->Get(g); + //will only gather data into stats if perContig==false + if (!gtf_tracking_largeScale) reportStats(f_out, getGSeqName(gsid), gstats, + pdata->Get(g), refdata); + if (faseq!=NULL) delete faseq; + } //for each genomic sequence data + //there could be genomic sequences with no qry transcripts + //but with reference transcripts + if (haveRefs && !reduceRefs && !gtf_tracking_largeScale) { + for (int r=0;rget_gseqid(); + if (getQryData(gsid, *pdata)==NULL) { + reportStats(f_out, getGSeqName(gsid), gstats, NULL, refdata); + }//completely missed all refdata on this contig + } + } + //now report the summary: + if (!gtf_tracking_largeScale) reportStats(f_out, in_file.chars(), gstats); + if (f_in!=stdin) fclose(f_in); + //qfileno++; + }//for each input file + if (f_mintr!=NULL) fclose(f_mintr); + gseqtracks.setSorted(&cmpGTrackByName); + if (gtf_tracking_verbose) GMessage("Tracking transcripts across %d query files..\n", numqryfiles); + trackGData(numqryfiles, gseqtracks, outbasename, tfiles, rtfiles); + fprintf(f_out, "\n Total union super-loci across all input datasets: %d \n", xlocnum); + if (numqryfiles>1) { + fprintf(f_out, " (%d multi-transcript, ~%.1f transcripts per locus)\n", + total_xloci_alt, ((double)(GXConsensus::count))/xlocnum); + } + if (gtf_tracking_verbose) GMessage("Cleaning up..\n"); + GFREE(cprefix); + // clean up + for (int i=0;istart, m.exons[i]->end); + else fprintf(f,"%d-%d,",m.exons[i]->start, m.exons[i]->end); + } +} + +bool ichainMatch(GffObj* t, GffObj* r, bool& exonMatch, int fuzz=0) { + //t's intron chain is considered matching to reference r + //if r chain is the same or a subset of t's chain + exonMatch=false; + int imax=r->exons.Count()-1; + int jmax=t->exons.Count()-1; + if (imax==0 || jmax==0) { //single-exon mRNAs + if (imax!=jmax) return false; + exonMatch=r->exons[0]->coordMatch(t->exons[0],fuzz); + /*if (exonMatch) return true; + else return (r->exons[0]->start>=t->exons[0]->start && + r->exons[0]->end<=t->exons[0]->end);*/ + return exonMatch; + } + + if (r->exons[imax]->startexons[0]->end || + t->exons[jmax]->startexons[0]->end ) //intron chains do not overlap at all + { + return false; + } + //check intron overlaps + int i=1; + int j=1; + bool exmism=false; //any mismatch + while (i<=imax && j<=jmax) { + uint rstart=r->exons[i-1]->end; + uint rend=r->exons[i]->start; + uint tstart=t->exons[j-1]->end; + uint tend=t->exons[j]->start; + if (tend1 || i>imax || j>jmax) { + return false; //no intron overlaps found at all + //or first intron of ref not overlapping + } + //from now on we expect intron matches up to imax + if (i!=j || imax!=jmax) { exmism=true; if (fuzz==0) return false; } + for (;i<=imax && j<=jmax;i++,j++) { + if (abs((int)(r->exons[i-1]->end-t->exons[j-1]->end))>fuzz || + abs((int)(r->exons[i]->start-t->exons[j]->start))>fuzz) { + return false; //just run away + } + } + //if we made it here, we have matching intron chains up to MIN(imax,jmax) + if (imax!=jmax) { + exmism=true; + if (jmaxexons[0]->start-t->exons[0]->start))<=fuzz && + abs((int)(r->exons[imax]->end-t->exons[jmax]->end))<=fuzz ); + return true; +} + + +void compareLoci2R(GList& loci, GList& cmpdata, + GList& refloci, int qfidx) { + cmpdata.Clear();//a new list of superloci will be built + if (refloci.Count()==0 || loci.Count()==0) return; + //reset cmpovl and stats + for (int i=0;icreset(); + //find loci with overlapping refloci + //and store cmpovl links both ways for ALL loci and refloci on this strand + for (int l=0;lcreset(); + for (int j=0;jstart>locus->end) break; + if (refloci[j]->start>locus->end) { + if (refloci[j]->start-locus->end > GFF_MAX_LOCUS) break; + continue; + } + if (locus->start>refloci[j]->end) continue; + // then we must have overlap here: + //if (locus->overlap(refloci[j]->start, refloci[j]->end)) { + locus->cmpovl.Add(refloci[j]); + refloci[j]->cmpovl.Add(locus); + //} + }//for each reflocus + } //for each locus + + //create corresponding "superloci" from transitive overlapping between loci and ref + for (int l=0;lv!=0) continue; //skip, already processed + GSuperLocus* super=new GSuperLocus(); + super->qfidx=qfidx; + //try to find all other loci connected to this locus loci[l] + GList lstack(false,false,false); //traversal stack + lstack.Push(loci[l]); + while (lstack.Count()>0) { + GLocus* locus=lstack.Pop(); + if (locus->v!=0 || locus->cmpovl.Count()==0) continue; + super->addQlocus(*locus); + locus->v=1; + for (int r=0;rcmpovl.Count();r++) { + GLocus* rloc=locus->cmpovl[r]; + if (rloc->v==0) { + super->addRlocus(*rloc); + rloc->v=1; + for (int ll=0;llcmpovl.Count();ll++) { + if (rloc->cmpovl[ll]->v==0) lstack.Push(rloc->cmpovl[ll]); + } + } + } //for each overlapping reflocus + } //while linking + + if (super->qloci.Count()==0) { + delete super; + continue; //try next query loci + } + //--here we have a "superlocus" region data on both qry and ref + // -- analyze mexons matching (base level metrics) + cmpdata.Add(super); + //make each ref locus keep track of all superloci containing it + for (int rl=0;rlrloci.Count();rl++) { + super->rloci[rl]->superlst->Add(super); + } + for (int x=0;xrmexons.Count();x++) { + super->rbases_all += super->rmexons[x].end-super->rmexons[x].start+1; + } + for (int x=0;xqmexons.Count();x++) { + super->qbases_all += super->qmexons[x].end-super->qmexons[x].start+1; + } + int i=0; //locus mexons + int j=0; //refmexons + while (iqmexons.Count() && jrmexons.Count()) { + uint istart=super->qmexons[i].start; + uint iend=super->qmexons[i].end; + uint jstart=super->rmexons[j].start; + uint jend=super->rmexons[j].end; + if (iendistart? jstart : istart; + uint ovlend = iendbaseTP+=ovlen; //qbases_cov + if (iendbaseFP=super->qbases_all-super->baseTP; + super->baseFN=super->rbases_all-super->baseTP; + } + */ + // -- exon level comparison: + int* qexovl; //flags for qry exons with ref overlap + GCALLOC(qexovl,super->quexons.Count()*sizeof(int)); + int* rexovl; //flags for ref exons with qry overlap + GCALLOC(rexovl,super->ruexons.Count()*sizeof(int)); + for (int i=0;iquexons.Count();i++) { + uint istart=super->quexons[i].start; + uint iend=super->quexons[i].end; + for (int j=0;jruexons.Count();j++) { + uint jstart=super->ruexons[j].start; + uint jend=super->ruexons[j].end; + if (iendquexons[i].coordMatch(&super->ruexons[j],5)) { + super->exonATP++; + if (super->quexons[i].coordMatch(&super->ruexons[j])) { + super->exonTP++; + } //exact match + } //fuzzy match + } //ref uexon loop + } //qry uexon loop + super->m_exons=0; //ref exons with no query overlap + super->w_exons=0; //qry exons with no ref overlap + for (int x=0;xquexons.Count();x++) + if (qexovl[x]==0) super->w_exons++; + for (int x=0;xruexons.Count();x++) + if (rexovl[x]==0) super->m_exons++; + GFREE(rexovl); + GFREE(qexovl); + + //-- intron level stats: + //query: + int* qinovl=NULL; //flags for qry introns with at least some ref overlap + int* qtpinovl=NULL; //flags for qry introns with perfect ref overlap + if (super->qintrons.Count()>0) { + GCALLOC(qinovl,super->qintrons.Count()*sizeof(int)); + GCALLOC(qtpinovl,super->qintrons.Count()*sizeof(int)); + } + //-- reference: + int* rinovl=NULL; //flags for ref introns with qry overlap + int* rtpinovl=NULL; //ref introns with perfect qry intron overlap + if (super->rintrons.Count()>0) { + GCALLOC(rinovl,super->rintrons.Count()*sizeof(int)); + GCALLOC(rtpinovl,super->rintrons.Count()*sizeof(int)); + } + for (int i=0;iqintrons.Count();i++) { + uint istart=super->qintrons[i].start; + uint iend=super->qintrons[i].end; + for (int j=0;jrintrons.Count();j++) { + uint jstart=super->rintrons[j].start; + uint jend=super->rintrons[j].end; + if (iendqintrons[i].coordMatch(&super->rintrons[j],5)) { + super->intronATP++; + if (super->qintrons[i].coordMatch(&super->rintrons[j])) { + super->intronTP++; + qtpinovl[i]++; + rtpinovl[j]++; + } //exact match + } //fuzzy match + } //ref intron loop + } //qry intron loop + super->m_introns=0; //ref introns with no query overlap + super->w_introns=0; //qry introns with no ref overlap + for (int x=0;xqintrons.Count();x++) { + if (qinovl[x]==0) { super->w_introns++; + //qry introns with no ref intron overlap AT ALL + super->i_qwrong.Add(super->qintrons[x]); + } + else + if (qtpinovl[x]==0) { + super->i_qnotp.Add(super->qintrons[x]); + } + } + for (int x=0;xrintrons.Count();x++) { + if (rinovl[x]==0) { //no intron overlap at all + super->m_introns++; + super->i_missed.Add(super->rintrons[x]); + } + else if (rtpinovl[x]==0) { //no perfect intron match + super->i_notp.Add(super->rintrons[x]); + } + } + GFREE(rinovl); + GFREE(rtpinovl); + GFREE(qinovl); + GFREE(qtpinovl); + + // ---- now intron-chain and transcript comparison + for (int i=0;iqmrnas.Count();i++) { + uint istart=super->qmrnas[i]->exons.First()->start; + uint iend=super->qmrnas[i]->exons.Last()->end; + for (int j=0;jrmrnas.Count();j++) { + uint jstart=super->rmrnas[j]->exons.First()->start; + uint jend=super->rmrnas[j]->exons.Last()->end; + if (iendqmrnas[i]->udata & 3) > 1) continue; //already counted a ichainTP for this qry + if (ichainMatch(super->qmrnas[i],super->rmrnas[j],exonMatch, 5)) { //fuzzy match + GLocus* qlocus=((CTData*)super->qmrnas[i]->uptr)->locus; + GLocus* rlocus=((CTData*)super->rmrnas[j]->uptr)->locus; + if (super->qmrnas[i]->exons.Count()>1) { + super->ichainATP++; + qlocus->ichainATP++; + rlocus->ichainATP++; + } + if (exonMatch) { + super->mrnaATP++; + qlocus->mrnaATP++; + rlocus->mrnaATP++; + } + if (ichainMatch(super->qmrnas[i],super->rmrnas[j],exonMatch)) { //exact match + if (super->qmrnas[i]->exons.Count()>1) { + super->qmrnas[i]->udata|=1; + super->ichainTP++; + qlocus->ichainTP++; + rlocus->ichainTP++; + } + if (exonMatch) { + super->qmrnas[i]->udata|=2; + super->mrnaTP++; + qlocus->mrnaTP++; + rlocus->mrnaTP++; + } + } //exact match + } //fuzzy match + } //ref mrna loop + } //qry mrna loop + for (int ql=0;qlqloci.Count();ql++) { + if (super->qloci[ql]->ichainTP+super->qloci[ql]->mrnaTP >0 ) + super->locusQTP++; + if (super->qloci[ql]->ichainATP+super->qloci[ql]->mrnaATP>0) + super->locusAQTP++; + } + for (int rl=0;rlrloci.Count();rl++) { + if (super->rloci[rl]->ichainTP+super->rloci[rl]->mrnaTP >0 ) + super->locusTP++; + if (super->rloci[rl]->ichainATP+super->rloci[rl]->mrnaATP>0) + super->locusATP++; + } + + }//for each unlinked locus + +} + +//look for qry data for a specific genomic sequence +GSeqData* getQryData(int gid, GList& qdata) { + int qi=-1; + GSeqData f(gid); + GSeqData* q=NULL; + if (qdata.Found(&f,qi)) + q=qdata[qi]; + return q; +} + +const char* findDescr(GffObj* gfobj) { + if (refdescr.Count()==0) return NULL; + GStr* s=refdescr.Find(gfobj->getID()); + if (s==NULL) { + s=refdescr.Find(gfobj->getGeneName()); + if (s==NULL) s=refdescr.Find(gfobj->getGeneID()); + } + if (s!=NULL) + return s->chars(); + return NULL; +} + +const char* getGeneID(GffObj* gfobj) { + //returns anything that might resemble a gene identifier for this transcript + //or, if everything fails, returns the transcript ID + const char* s=gfobj->getGeneName(); + if (s) return s; + if ((s=gfobj->getGeneID())!=NULL) return s; + if ((s=gfobj->getAttr("Name"))!=NULL) return s; + return gfobj->getID(); +} + +const char* getGeneID(GffObj& gfobj) { + return getGeneID(&gfobj); +} + +void writeLoci(FILE* f, GList & loci) { + for (int l=0;lgetID(), + loc.mrna_maxcov->getGSeqName(), + loc.mrna_maxcov->strand, loc.start,loc.end); + //now print all transcripts in this locus, comma delimited + int printfd=0; + for (int i=0;igetID()); + else fprintf(f,",%s",loc.mrnas[i]->getID()); + printfd++; + } + const char* rdescr=findDescr(loc.mrna_maxcov); + if (rdescr==NULL) fprintf(f,"\t\n"); + else fprintf(f,"\t%s\n",rdescr); + } +} + +void printXQ1(FILE* f, int qidx, GList& qloci) { + int printfd=0; + //print + for (int i=0;iqfidx!=qidx) continue; + for (int j=0;jmrnas.Count();j++) { + if (printfd==0) fprintf(f,"%s",qloci[i]->mrnas[j]->getID()); + else fprintf(f,",%s",qloci[i]->mrnas[j]->getID()); + printfd++; + } + } + if (printfd==0) fprintf(f,"-"); + } + +void numXLoci(GList& xloci, int& last_id) { + for (int l=0;lqloci.Count()==0) continue; //we never print ref-only xloci + last_id++; + xloci[l]->id=last_id; + } +} + + +class GProtCl { + public: + GList protcl; + GProtCl(GXConsensus* c=NULL):protcl(true,false,false) { + if (c!=NULL) + protcl.Add(c); + } + bool add_Pcons(GXConsensus* c) { + if (c==NULL || c->aalen==0) return false; + if (protcl.Count()==0) { + protcl.Add(c); + return true; + } + if (protcl[0]->aalen!=c->aalen) return false; + if (strcmp(protcl[0]->aa,c->aa)!=0) return false; + protcl.Add(c); + return true; + } + + void addMerge(GProtCl& pcl, GXConsensus* pclnk) { + for (int i=0;iaalen; + } + bool operator==(GProtCl& cl) { + return this==&cl; + } + bool operator>(GProtCl& cl) { + return (this>&cl); + } + bool operator<(GProtCl& cl) { + return (this<&cl); + } +}; + +class GTssCl:public GSeg { //experiment cluster of ref loci (isoforms) + public: + uint fstart; //lowest coordinate of the first exon + uint fend; //highest coordinate of the first exon + GList tsscl; + GTssCl(GXConsensus* c=NULL):tsscl(true,false,false) { + start=0; + end=0; + fstart=0; + fend=0; + if (c!=NULL) addFirst(c); + } + + void addFirst(GXConsensus* c) { + tsscl.Add(c); + start=c->start; + end=c->end; + GffExon* fexon=(c->tcons->strand=='-') ? c->tcons->exons.Last() : + c->tcons->exons.First(); + fstart=fexon->start; + fend=fexon->end; + } + bool add_Xcons(GXConsensus* c) { + if (tsscl.Count()==0) { + addFirst(c); + return true; + } + //check if it can be added to existing xconsensi + uint nfend=0; + uint nfstart=0; + /* + if (tsscl.Get(0)->tcons->getGeneID()!=NULL && + c->tcons->getGeneID()!=NULL && + strcmp(tsscl.Get(0)->tcons->getGeneID(), c->tcons->getGeneID())) + //don't tss cluster if they don't have the same GeneID (?) + //FIXME: we might not want this if input files are not from Cufflinks + // and they could simply lack proper GeneID + return false; + */ + if (c->tcons->strand=='-') { + //no, the first exons don't have to overlap + //if (!c->tcons->exons.Last()->overlap(fstart,fend)) return false; + nfstart=c->tcons->exons.Last()->start; + nfend=c->tcons->exons.Last()->end; + //proximity check for the transcript start: + if (nfend>fend+tssDist || fend>nfend+tssDist) + return false; + } + else { + //if (!c->tcons->exons.First()->overlap(fstart,fend)) return false; + nfstart=c->tcons->exons.First()->start; + nfend=c->tcons->exons.First()->end; + if (nfstart>fstart+tssDist || fstart>nfstart+tssDist) + return false; + } + // -- if we are here, we can add to tss cluster + + tsscl.Add(c); + if (fstart>nfstart) fstart=nfstart; + if (fendc->start) start=c->start; + if (endend) end=c->end; + return true; + } + + void addMerge(GTssCl& cl, GXConsensus* clnk) { + for (int i=0;icl.fstart) fstart=cl.fstart; + if (fendcl.start) start=cl.start; + if (endaalen-c2->aalen); + if (diflen>=6) return diflen; + //obvious case: same CDS + if (diflen==0 && strcmp(c1->aa, c2->aa)==0) return 0; + //simple edit distance calculation + IntArray dist(c1->aalen+1, c2->aalen+1); + for (int i=0;i<=c1->aalen;i++) { + dist.data(i,0) = i; + } + for (int j = 0; j <= c2->aalen; j++) { + dist.data(0,j) = j; + } + for (int i = 1; i <= c1->aalen; i++) + for (int j = 1; j <= c2->aalen; j++) { + dist.data(i,j) = GMIN3( dist.data(i-1,j)+1, + dist.data(i,j-1)+1, + dist.data(i-1,j-1)+((c1->aa[i-1] == c2->aa[j-1]) ? 0 : 1) ); + } + int r=dist.data(c1->aalen,c2->aalen); + return r; +} +*/ +void printConsGTF(FILE* fc, GXConsensus* xc, int xlocnum) { + for (int i=0;itcons->exons.Count();i++) { + fprintf(fc, + "%s\t%s\texon\t%d\t%d\t.\t%c\t.\tgene_id \"XLOC_%06d\"; transcript_id \"%s_%08d\"; exon_number \"%d\";", + xc->tcons->getGSeqName(),xc->tcons->getTrackName(),xc->tcons->exons[i]->start, xc->tcons->exons[i]->end, xc->tcons->strand, + xlocnum, cprefix, xc->id, i+1); + //if (i==0) { + const char* gene_name=NULL; + if (xc->ref) { + gene_name=xc->ref->getGeneName(); + if (gene_name==NULL) gene_name=xc->ref->getGeneID(); + if (gene_name) { + fprintf (fc, " gene_name \"%s\";", gene_name); + } + } + if (!haveRefs) { + if (gene_name==NULL && xc->tcons->getGeneName()) + fprintf (fc, " gene_name \"%s\";", xc->tcons->getGeneName()); + char* s=xc->tcons->getAttr("nearest_ref", true); + if (s) fprintf(fc, " nearest_ref \"%s\";",s); + s=xc->tcons->getAttr("class_code", true); + if (s) fprintf(fc, " class_code \"%s\";", s); + } + fprintf(fc, " oId \"%s\";",xc->tcons->getID()); + if (xc->contained) { + fprintf(fc, " contained_in \"%s_%08d\";", cprefix, xc->contained->id); + } + if (haveRefs) { + if (xc->ref!=NULL) + fprintf(fc, " nearest_ref \"%s\";",xc->ref->getID()); + fprintf(fc, " class_code \"%c\";",xc->refcode ? xc->refcode : '.'); + } + if (xc->tss_id>0) fprintf(fc, " tss_id \"TSS%d\";",xc->tss_id); + if (xc->p_id>0) fprintf(fc, " p_id \"P%d\";",xc->p_id); + // } + fprintf(fc,"\n"); + } +} + +void tssCluster(GXLocus& xloc) +{ + GList xpcls(true,true,false); + for (int i=0;itcons->exons.Count()<2) continue; //should we skip single-exon transcripts ?? + GArray mrgloci(true); + int lfound=0; + for (int l=0;lendtcons->exons.First()->start) continue; + if (xpcls[l]->start>c->tcons->exons.Last()->end) break; + if (xpcls[l]->add_Xcons(c)) + { + lfound++; + mrgloci.Add(l); + + } + + } // for each xpcluster + if (lfound==0) + { + //create a xpcl with only this xconsensus + xpcls.Add(new GTssCl(c)); + + } + else if (lfound>1) + { + for (int l=1;laddMerge(*xpcls[mlidx], c); + xpcls.Delete(mlidx); + } + } + + }//for each xconsensus in this xlocus + for (int l=0;ltsscl.Count()<2) continue; + tsscl_num++; + for (int i=0;itsscl.Count();i++) + xpcls[l]->tsscl[i]->tss_id=tsscl_num; + //processTssCl(xcds_num, xpcls[l], faseq); + } +} + +void protCluster(GXLocus& xloc, GFaSeqGet *faseq) { + if (!faseq) + return; + GList xpcls(true,true,false); + for (int i=0;iref==NULL || c->ref->CDstart==0) continue; //no ref or CDS available + if (c->refcode!='=') continue; + //get the CDS translation here + if (c->aa==NULL) { + c->aa=c->ref->getSplicedTr(faseq, true, &c->aalen); + if (c->aalen>0 && c->aa[c->aalen-1]=='.') { + //discard the final stop codon + c->aalen--; + c->aa[c->aalen]=0; + } + } + GArray mrgloci(true); + int lfound=0; + for (int l=0;laalen()!=c->aalen) continue; + if (xpcls[l]->add_Pcons(c)) { + lfound++; + mrgloci.Add(l); + } + } // for each xpcluster + if (lfound==0) { + //create a xpcl with only this xconsensus + xpcls.Add(new GProtCl(c)); + } + else if (lfound>1) { + for (int l=1;laddMerge(*xpcls[mlidx], c); + xpcls.Delete(mlidx); + } + } + }//for each xconsensus in this xlocus + for (int l=0;lprotcl.Count();i++) + xpcls[l]->protcl[i]->p_id=protcl_num; + } + for (int i=0;iaa!=NULL) { GFREE(c->aa); } + } +} + +void printXLoci(FILE* f, FILE* fc, int qcount, GList& xloci, GFaSeqGet *faseq) { + for (int l=0;lqloci.Count()==0) continue; + GXLocus& xloc=*(xloci[l]); + xloc.checkContainment(); + tssCluster(xloc);//cluster and assign tss_id and cds_id to each xconsensus in xloc + protCluster(xloc,faseq); + for (int c=0;ccontained==NULL) + printConsGTF(fc,xloc.tcons[c],xloc.id); + } + fprintf(f,"XLOC_%06d\t%s[%c]%d-%d\t", xloc.id, + xloc.qloci[0]->mrna_maxcov->getGSeqName(), + xloc.strand, xloc.start,xloc.end); + //now print all transcripts in this locus, comma delimited + //first, ref loci, if any + int printfd=0; + if (xloc.rloci.Count()>0) { + for (int i=0;imrnas.Count();j++) { + if (printfd==0) fprintf(f,"%s|%s",getGeneID(xloc.rloci[i]->mrnas[j]), + xloc.rloci[i]->mrnas[j]->getID()); + else fprintf(f,",%s|%s",getGeneID(xloc.rloci[i]->mrnas[j]), + xloc.rloci[i]->mrnas[j]->getID()); + printfd++; + } + } + } + else { + fprintf(f,"-"); + } + //second, all the cufflinks transcripts + for (int qi=0;qi& mrnas, bool wrong=false) { +//find a ref mrna having this intron + GffObj* rm=NULL; + for (int i=0;istart>iseg.end) break; + if (m->endexons.Count();j++) { + if (iseg.start==m->exons[j-1]->end+1 && + iseg.end==m->exons[j]->start-1) { rm=m; break; } //match found + }//for each intron + if (rm!=NULL) break; + } //for each ref mrna in this locus + if (rm==NULL) GError("Error: couldn't find ref mrna for intron %d-%d! (BUG)\n", + iseg.start,iseg.end); + int ilen=iseg.end-iseg.start+1; + fprintf(f,"%s\t%s\tintron\t%d\t%d\t.\t%c\t.\t", + rm->getGSeqName(),rm->getTrackName(),iseg.start,iseg.end,strand); + if (faseq!=NULL) { + const char* gseq=faseq->subseq(iseg.start, ilen); + char* cseq=Gstrdup(gseq, gseq+ilen-1); + if (strand=='-') reverseComplement(cseq, ilen); + fprintf(f,"spl=\"%c%c..%c%c\"; ", toupper(cseq[0]),toupper(cseq[1]), + toupper(cseq[ilen-2]),toupper(cseq[ilen-1])); + GFREE(cseq); + } + fprintf(f,"transcript_id \"%s\";", rm->getID()); + if (wrong) fprintf(f," noOvl=1;"); + fprintf(f,"\n"); + +} + +void reportMIntrons(FILE* fm, FILE* fn, FILE* fq, char strand, + GList& cmpdata) { + if (fm==NULL) return; + for (int l=0;li_missed.Count();i++) + writeIntron(fm, strand, NULL, sl->i_missed[i], sl->rmrnas); + if (fn!=NULL) { + for (int i=0;ii_notp.Count();i++) + writeIntron(fn, strand, NULL, sl->i_notp[i], sl->rmrnas); + } + if (fq!=NULL) { + for (int i=0;ii_qwrong.Count();i++) { + writeIntron(fq, strand, NULL, sl->i_qwrong[i], sl->qmrnas, true); + } + for (int i=0;ii_qnotp.Count();i++) { + writeIntron(fq, strand, NULL, sl->i_qnotp[i], sl->qmrnas); + } + } + } +} + + +void processLoci(GSeqData& seqdata, GSeqData* refdata, int qfidx) { + //GList& glstloci, GList& cmpdata) + + if (refdata!=NULL) { + //if (gtf_tracking_verbose) GMessage(" ..comparing to reference loci..\n") ; + compareLoci2R(seqdata.loci_f, seqdata.gstats_f, refdata->loci_f, qfidx); + compareLoci2R(seqdata.loci_r, seqdata.gstats_r, refdata->loci_r, qfidx); + // -- report + + if (f_mintr!=NULL) { + GMessage(" ..reporting missed ref introns..\n"); + //reportIntrons(f_mintr, f_nintr, f_qintr, faseq, '+', seqdata.gstats_f); + //reportIntrons(f_mintr, f_nintr, f_qintr, faseq, '-', seqdata.gstats_r); + reportMIntrons(f_mintr, NULL, NULL, '+', seqdata.gstats_f); + reportMIntrons(f_mintr, NULL, NULL, '-', seqdata.gstats_r); + } + } +} + +//adjust stats for a list of unoverlapped (completely missed) ref loci +void collectRLocData(GSuperLocus& stats, GLocus& loc) { +stats.total_rmrnas+=loc.mrnas.Count(); +stats.total_rexons+=loc.uexons.Count(); +stats.total_rintrons+=loc.introns.Count(); +stats.total_rmexons+=loc.mexons.Count(); +stats.total_richains+=loc.ichains; +stats.m_exons+=loc.uexons.Count(); +stats.m_introns+=loc.introns.Count(); +stats.total_rloci++; +for (int e=0;e& loci) { + for (int l=0;l0 && loc.mrnas.Count()>1) + stats.total_qloci_alt++; + stats.w_exons+=loc.uexons.Count(); + stats.w_introns+=loc.introns.Count(); + for (int e=0;e& loci, GList& nloci) { + for (int l=0;l& loci, GList& nloci) { + for (int l=0;lcmpovl.Count()==0) {//locus with no ref loci overlaps + stats.w_loci++; //novel/wrong loci + nloci.Add(loci[l]); + collectQLocData(stats,*loci[l]); + } + } +} + +void collectQU(GSuperLocus& stats, GList& nloci) { + for (int l=0;lprintGtf(f); + } +} + +void collectRNOvl(GSuperLocus& stats, GList& loci) { //, const char* gseqname) { + for (int l=0;lcmpovl.Count()==0) { + stats.m_loci++; //missed ref loci + //if (f_mloci!=NULL) + // printLocus(f_mloci,*loci[l], gseqname); + collectRLocData(stats,*loci[l]); + } + } +} + + +void collectCmpData(GSuperLocus& stats, GList& cmpdata) { //, const char* gseqname) { + for (int c=0;clocusTP==0 && cmpdata[c]->rloci.Count()>0) { + fprintf(f_nloci, "# Superlocus %s:%d-%d\n",gseqname, cmpdata[c]->start, cmpdata[c]->end); + for (int l=0;lrloci.Count();l++) { + printLocus(f_nloci,*cmpdata[c]->rloci[l], gseqname); + } + } + */ + } +} + +void collectStats(GSuperLocus& stats, GSeqData* seqdata, GSeqData* refdata) { + //collect all stats for a single genomic sequence into stats + if (seqdata==NULL) { + if (reduceRefs || refdata==NULL) return; + //special case with completely missed all refs on a contig/chromosome + collectRData(stats, refdata->loci_f); + collectRData(stats, refdata->loci_r); + return; + } + if (refdata==NULL) {//reference data missing on this contig + collectQData(stats, seqdata->loci_f, seqdata->nloci_f); + collectQData(stats, seqdata->loci_r, seqdata->nloci_r); + collectQU(stats, seqdata->nloci_u); + return; + } + + /*stats.total_qloci+=seqdata->loci_f.Count(); + stats.total_qloci+=seqdata->loci_r.Count(); + if (reduceRefs) { //only collect ref loci from superloci + + } + else { + stats.total_rloci+=refdata->loci_f.Count(); + stats.total_rloci+=refdata->loci_r.Count(); + } + */ + //collect data for overlapping superloci (already in seqdata->gstats_f/_r) + //char* gseqname=getGSeqName(seqdata->gseq_id); + collectCmpData(stats, seqdata->gstats_f); + collectCmpData(stats, seqdata->gstats_r); + //for non-overlapping qry loci, always add them as false positives FP + collectQNOvl(stats, seqdata->loci_f, seqdata->nloci_f); + collectQNOvl(stats, seqdata->loci_r, seqdata->nloci_r); + collectQU(stats, seqdata->nloci_u); + if (!reduceRefs) { //find ref loci with empty cmpovl and add them + collectRNOvl(stats, refdata->loci_f); + collectRNOvl(stats, refdata->loci_r); + } +} + +void reportStats(FILE* fout, const char* setname, GSuperLocus& stotal, + GSeqData* seqdata, GSeqData* refdata) { + GSuperLocus stats; + bool finalSummary=(seqdata==NULL && refdata==NULL); + GSuperLocus *ps=(finalSummary ? &stotal : &stats ); + if (!finalSummary) { //collecting contig stats + //gather statistics for all loci/superloci here + collectStats(stats, seqdata, refdata); + stotal.addStats(stats); + if (!perContigStats) return; + } + ps->calcF(); + if (seqdata!=NULL) fprintf(fout, "#> Genomic sequence: %s \n", setname); + else fprintf(fout, "\n#= Summary for dataset: %s :\n", setname); + + fprintf(fout, "# Query mRNAs : %7d in %7d loci (%d multi-exon transcripts)\n", + ps->total_qmrnas, ps->total_qloci, ps->total_qichains); + fprintf(fout, "# (%d multi-transcript loci, ~%.1f transcripts per locus)\n", + ps->total_qloci_alt, ((double)ps->total_qmrnas/ps->total_qloci)); + + if (haveRefs) { + fprintf(fout, "# Reference mRNAs : %7d in %7d loci (%d multi-exon)\n", + ps->total_rmrnas, ps->total_rloci, ps->total_richains); + if (ps->baseTP+ps->baseFP==0 || ps->baseTP+ps->baseFN==0) return; + fprintf(fout, "# Corresponding super-loci: %7d\n",ps->total_superloci); + + /*if (seqdata!=NULL) { + fprintf(fout, " ( %d/%d on forward/reverse strand)\n", + seqdata->gstats_f.Count(),seqdata->gstats_r.Count()); + }*/ + fprintf(fout, "#--------------------| Sn | Sp | fSn | fSp \n"); + double sp=(100.0*(double)ps->baseTP)/(ps->baseTP+ps->baseFP); + double sn=(100.0*(double)ps->baseTP)/(ps->baseTP+ps->baseFN); + fprintf(fout, " Base level: \t%5.1f\t%5.1f\t - \t - \n",sn, sp); + sp=(100.0*(double)ps->exonTP)/(ps->exonTP+ps->exonFP); + sn=(100.0*(double)ps->exonTP)/(ps->exonTP+ps->exonFN); + double fsp=(100.0*(double)ps->exonATP)/(ps->exonATP+ps->exonAFP); + double fsn=(100.0*(double)ps->exonATP)/(ps->exonATP+ps->exonAFN); + if (fsp>100.0) fsp=100.0; + if (fsn>100.0) fsn=100.0; + fprintf(fout, " Exon level: \t%5.1f\t%5.1f\t%5.1f\t%5.1f\n",sn, sp, fsn, fsp); + if (ps->total_rintrons>0) { + //intron level + sp=(100.0*(double)ps->intronTP)/(ps->intronTP+ps->intronFP); + sn=(100.0*(double)ps->intronTP)/(ps->intronTP+ps->intronFN); + fsp=(100.0*(double)ps->intronATP)/(ps->intronATP+ps->intronAFP); + fsn=(100.0*(double)ps->intronATP)/(ps->intronATP+ps->intronAFN); + if (fsp>100.0) fsp=100.0; + if (fsn>100.0) fsn=100.0; + fprintf(fout, " Intron level: \t%5.1f\t%5.1f\t%5.1f\t%5.1f\n",sn, sp, fsn, fsp); + //intron chains: + sp=(100.0*(double)ps->ichainTP)/(ps->ichainTP+ps->ichainFP); + sn=(100.0*(double)ps->ichainTP)/(ps->ichainTP+ps->ichainFN); + if (sp>100.0) sp=100.0; + if (sn>100.0) sn=100.0; + fsp=(100.0*(double)ps->ichainATP)/(ps->ichainATP+ps->ichainAFP); + fsn=(100.0*(double)ps->ichainATP)/(ps->ichainATP+ps->ichainAFN); + if (fsp>100.0) fsp=100.0; + if (fsn>100.0) fsn=100.0; + fprintf(fout, "Intron chain level: \t%5.1f\t%5.1f\t%5.1f\t%5.1f\n",sn, sp, fsn, fsp); + } + else { + fprintf(fout, " Intron level: \t - \t - \t - \t - \n"); + fprintf(fout, "Intron chain level: \t - \t - \t - \t - \n"); + } + sp=(100.0*(double)ps->mrnaTP)/(ps->mrnaTP+ps->mrnaFP); + sn=(100.0*(double)ps->mrnaTP)/(ps->mrnaTP+ps->mrnaFN); + fsp=(100.0*(double)ps->mrnaATP)/(ps->mrnaATP+ps->mrnaAFP); + fsn=(100.0*(double)ps->mrnaATP)/(ps->mrnaATP+ps->mrnaAFN); + if (fsp>100.0) fsp=100.0; + if (fsn>100.0) fsn=100.0; + fprintf(fout, " Transcript level: \t%5.1f\t%5.1f\t%5.1f\t%5.1f\n",sn, sp, fsn, fsp); + //sp=(100.0*(double)ps->locusTP)/(ps->locusTP+ps->locusFP); + sp=(100.0*(double)ps->locusQTP)/ps->total_qloci; + sn=(100.0*(double)ps->locusTP)/ps->total_rloci; //(ps->locusTP+ps->locusFN); + fsp=(100.0*(double)ps->locusAQTP)/ps->total_qloci; //(ps->locusATP+ps->locusAFP); + fsn=(100.0*(double)ps->locusATP)/ps->total_rloci; //(ps->locusATP+ps->locusAFN); + fprintf(fout, " Locus level: \t%5.1f\t%5.1f\t%5.1f\t%5.1f\n",sn, sp, fsn, fsp); + //fprintf(fout, " (locus TP=%d, total ref loci=%d)\n",ps->locusTP, ps->total_rloci); + fprintf(fout, "\nMatching intron chains: %7d\n",ps->ichainTP); + fprintf(fout, " Matching loci: %7d\n",ps->locusTP); + fprintf(fout, "\n"); + sn=(100.0*(double)ps->m_exons)/(ps->total_rexons); + fprintf(fout, " Missed exons: %7d/%d\t(%5.1f%%)\n",ps->m_exons, ps->total_rexons, sn); + sn=(100.0*(double)ps->w_exons)/(ps->total_qexons); + fprintf(fout, " Novel exons: %7d/%d\t(%5.1f%%)\n",ps->w_exons, ps->total_qexons,sn); + if (ps->total_rintrons>0) { + sn=(100.0*(double)ps->m_introns)/(ps->total_rintrons); + fprintf(fout, " Missed introns: %7d/%d\t(%5.1f%%)\n",ps->m_introns, ps->total_rintrons, sn); + } + if (ps->total_qintrons>0) { + sn=(100.0*(double)ps->w_introns)/(ps->total_qintrons); + fprintf(fout, " Novel introns: %7d/%d\t(%5.1f%%)\n",ps->w_introns, ps->total_qintrons,sn); + } + if (ps->total_rloci>0) { + sn=(100.0*(double)ps->m_loci)/(ps->total_rloci); + fprintf(fout, " Missed loci: %7d/%d\t(%5.1f%%)\n",ps->m_loci, ps->total_rloci, sn); + } + if (ps->total_qloci>0) { + sn=(100.0*(double)ps->w_loci)/(ps->total_qloci); + fprintf(fout, " Novel loci: %7d/%d\t(%5.1f%%)\n",ps->w_loci, ps->total_qloci,sn); + } + + } +} + +int inbuf_len=1024; //starting inbuf capacity +char* inbuf=NULL; // incoming buffer for sequence lines. + +void loadRefDescr(const char* fname) { + if (inbuf==NULL) { GMALLOC(inbuf, inbuf_len); } + FILE *f=fopen(fname, "rb"); + if (f==NULL) GError("Error opening exon file: %s\n",fname); + char* line; + int llen=0; + off_t fpos; + while ((line=fgetline(inbuf, inbuf_len, f, &fpos, &llen))!=NULL) { + if (strlen(line)<=2) continue; + int idlen=strcspn(line,"\t "); + char* p=line+idlen; + if (idlen0) { + *p=0; + p++; + refdescr.Add(line, new GStr(p)); + } + } +} + +GSeqTrack* findGSeqTrack(int gsid) { + GSeqTrack f(gsid); + int fidx=-1; + if (gseqtracks.Found(&f,fidx)) + return gseqtracks[fidx]; + fidx=gseqtracks.Add(new GSeqTrack(gsid)); + return gseqtracks[fidx]; +} + + + +GffObj* findRefMatch(GffObj& m, GLocus& rloc, int& ovlen) { + ovlen=0; + CTData* mdata=((CTData*)m.uptr); + if (mdata->eqref!=NULL && ((CTData*)(mdata->eqref->uptr))->locus==&rloc) { + mdata->eqref=mdata->ovls.First()->mrna; //this should be unnecessary + //check it? + return mdata->ovls.First()->mrna; + } + //if (rloc==NULL|| m==NULL) return NULL; + GffObj* ret=NULL; + for (int r=0;rmrnas[r]; + /* + if (ovlenaddOvl('=',rloc.mrnas[r], olen); + ret=mdata->ovls.First()->mrna; + //this must be called only for the head of an equivalency chain + CTData* rdata=(CTData*)rloc.mrnas[r]->uptr; + rdata->addOvl('=',&m,olen); + //if (rdata->eqnext==NULL) rdata->eqnext=&m; + } + } + if (ret!=NULL) + mdata->eqref=ret; + return ret; + } + + +void addXCons(GXLocus* xloc, GffObj* ref, char ovlcode, GffObj* tcons, CEqList* ts) { + GXConsensus* c=new GXConsensus(tcons, ts, ref, ovlcode); + //xloc->tcons.Add(c); + //this will also check c against the other tcons for containment: + xloc->addXCons(c); +} + + +const uint pre_mrna_threshold = 100; + +char getOvlCode(GffObj& m, GffObj& r, int& ovlen) { + ovlen=0; + if (!m.overlap(r.start,r.end)) return 'u'; + int jmax=r.exons.Count()-1; + + if (m.exons.Count()==1) { //single-exon transfrag + GSeg mseg(m.start, m.end); + if (jmax==0) { //also single-exon ref + ovlen=mseg.overlapLen(r.start,r.end); + int lmax=GMAX(r.covlen, m.covlen); + if (ovlen >= lmax*0.8) return '='; //fuzz matching for single-exon transcripts: 80% of the longer one + //if (m.covlen<=ovlen+12 && m.covlen= m.covlen*0.8) return 'c'; + return 'o'; //just plain overlapping + } + //single-exon qry overlaping multi-exon ref + for (int j=0;j<=jmax;j++) { + //check if it's contained by an exon + if (m.start>r.exons[j]->start-8 && m.endend+8) + return 'c'; + if (j==jmax) break; + //check if it's contained by an intron + if (m.endstart && m.start>r.exons[j]->end) + return 'i'; + // check if it's a potential pre-mRNA transcript + // (if overlaps an intron at least 10 bases) + uint iovlen=mseg.overlapLen(r.exons[j]->end+1, r.exons[j+1]->start-1); + if (iovlen>=10 && mseg.len()>iovlen+10) return 'e'; + } + return 'o'; //plain overlap, uncategorized + } //single-exon transfrag + //-- from here on we have a multi-exon transfrag -- + // * check if contained by a ref intron + for (int j=0;jstart && m.start>r.exons[j]->end) + return 'i'; + } + //> check if m's intron chain is a subset of r's intron chain + int imax=m.exons.Count()-1;// imax>0 here + if (m.exons[imax]->startend || + r.exons[jmax]->startend ) //intron chains do not overlap at all + return 'o'; //but terminal exons do, otherwise we wouldn't be here + int i=1; //index of exon to the right of current qry intron + int j=1; //index of exon to the right of current ref intron + //find first intron overlap + while (i<=imax && j<=jmax) { + if (r.exons[j]->startend) { j++; continue; } + if (m.exons[i]->startend) { i++; continue; } + break; //here we have an intron overlap + } + if (i>imax || j>jmax) + return 'o'; //no initial intron overlap found + //from here on we check all qry introns against ref introns + bool jmatch=false; //true if at least a junction match is found + bool icmatch=(i==1); //intron chain match - it will be updated as introns are checked + //bool exovli=false; // if any terminal exon of qry extends into a ref intron + int jmstart=j; //index of first intron overlap of reference + int jmend=0; //index of last intron overlap of reference + int imend=0; //index of last intron overlap of query + //check for intron matches + while (i<=imax && j<=jmax) { + uint mstart=m.exons[i-1]->end; + uint mend=m.exons[i]->start; + uint rstart=r.exons[j-1]->end; + uint rend=r.exons[j]->start; + if (rendrend) j++; else i++; + } + } //while checking intron overlaps + + if (icmatch && imend==imax) { // qry intron chain match + if (jmstart==1 && jmend==jmax) return '='; //identical intron chains + // -- qry intron chain is shorter than ref intron chain -- + int l_iovh=0; // overhang of leftmost q exon left boundary beyond the end of ref intron to the left + int r_iovh=0; // same type of overhang through the ref intron on the right + if (jmstart>1 && r.exons[jmstart-1]->start>m.start) + l_iovh = r.exons[jmstart-1]->start - m.start; + if (jmend r.exons[jmend]->end) + r_iovh = m.end - r.exons[jmend]->end; + if (l_iovh<4 && r_iovh<4) return 'c'; + //TODO? check if any x_iovl>10 and return 'e' to signal an "unspliced intron" ? + // or we can check if any of them are >= the length of the corresponding ref intron on that side + return 'j'; + } + /* + if (icmatch && (jmax>=imax)) { //all qry introns match + //but they may overlap + // if ((lbound && lbound > m.exons[0]->start+10) || + // (j<=jmax && m.exons[i-1]->end > r.exons[j-1]->end+10)) return 'j'; + // return 'c'; + // } + int code = 'c'; + if (lbound) + { + uint ref_boundary = lbound; + uint cuff_boundary = m.exons[0]->start; + if (ref_boundary > (cuff_boundary + pre_mrna_threshold)) // cuff extends a lot + { + code = 'j'; + } + if (ref_boundary > cuff_boundary) // cuff extends just a bit into a ref intron + { + code = 'e'; + } + } + if (j <= jmax) + { + uint ref_boundary = r.exons[j-1]->end; + uint cuff_boundary = m.exons[i-1]->end; + if (cuff_boundary > (ref_boundary + pre_mrna_threshold)) // cuff extends a lot + { + code = 'j'; + } + if (cuff_boundary > ref_boundary) // cuff extends just a bit into a ref intron + { + code = 'e'; + } + } + //if ((lbound && lbound > m.exons[0]->start+10) || + // (j<=jmax && m.exons[i-1]->end > r.exons[j-1]->end+10)) return 'j'; + return code; + } + +// if (!ichain) // first and last exons more or less match, but there's a different intron somewhere +// { +// +// } + + */ + return jmatch ? 'j':'o'; +} + +char getRefOvl(GffObj& m, GLocus& rloc, GffObj*& rovl, int& ovlen) { + rovl=NULL; + ovlen=0; + if (m.start>rloc.end || m.end=rloc.start-polyrun_range)) { + rovl=rloc.mrna_maxcov; + ((CTData*)m.uptr)->addOvl('p',rloc.mrna_maxcov); + return 'p'; + } + */ + return 0; //unknown -> intergenic space + } + for (int i=0;iaddOvl(ovlcode,r,olen); + if (olen>ovlen) ovlen=olen; + if (ovlcode=='c' || ovlcode=='=') //keep match/containment for each reference transcript + ((CTData*)r->uptr)->addOvl(ovlcode,&m,olen); + } + }//for each ref in rloc + // i,j,o + return ((CTData*)m.uptr)->getBestCode(); +} + +/* +void findTMatches(GTrackLocus& loctrack, int qcount) { + //perform an all vs. all ichain-match for all transcripts across all loctrack[i]->qloci +for (int q=0;qCount();qi++) { // for each transcript in q dataset + GffObj* qi_t=loctrack[q]->Get(qi); + CTData* qi_d=(CTData*)qi_t->uptr; + if ((qi_d->eqdata & EQHEAD_TAG) !=0) { //this is set as an EQ chain head already + //if (qi_t->exons.Count()>1) + continue; + } + for (int n=q+1;nCount();ni++) { + GffObj* ni_t=loctrack[n]->Get(ni); + CTData* ni_d=(CTData*)ni_t->uptr; + //if (ni_d->eqdata!=0) continue; //already part of an EQ chain + //single exon transfrags have special treatment: + bool s_match=(ni_t->exons.Count()==1 && qi_t->exons.Count()==1); + if (ni_d->eqdata!=0 && !s_match) continue; //already part of an EQ chain + if (ni_d->eqnext!=NULL) continue; + int ovlen=0; + + if (qi_d->eqnext!=NULL) { + if (!s_match) continue; + //test all in the EQ list for a match + bool matchFound=false; + CTData* next_eq_d=qi_d; + if (tMatch(*qi_t, *ni_t, ovlen, true)) { + matchFound=true; + } + else { + while (next_eq_d->eqnext!=NULL) { + if (tMatch(*(next_eq_d->eqnext), *ni_t, ovlen, true)) { + matchFound=true; + break; + } + next_eq_d=(CTData*)(next_eq_d->eqnext->uptr); + } //last in the chain + } + if (matchFound) { + //add this to the end of the EQ chain instead + next_eq_d=(CTData*)(qi_d->eqnext->uptr); + while (next_eq_d->eqnext!=NULL) { + next_eq_d=(CTData*)(next_eq_d->eqnext->uptr); + } //last in the chain + next_eq_d->eqnext=ni_t; + ni_d->eqdata=n+1; + ni_d->eqnext=NULL; ///TEST + } + } + else { + if (tMatch(*qi_t,*ni_t, ovlen, true)) { + qi_d->eqnext=ni_t; + ni_d->eqnext=NULL; ///TEST + if (qi_d->eqdata == 0) {//only start of chain is tagged + qi_d->eqdata = ((q+1) | EQHEAD_TAG); + } + ni_d->eqdata=n+1; //EQ chain member only marked with qry# (1-based) + } + } //multi-exon case + } //for each transfrag in the next qry dataset + + if (qi_d->eqnext!=NULL && qi_t->exons.Count()>1) break; + //part of a chain already, skip other datasets + } // for each successor dataset + } //for each transcript in qry dataset + } //for each qry dataset +} +*/ + +void findTMatches(GTrackLocus& loctrack, int qcount) { + //perform an all vs. all ichain-match for all transcripts across all loctrack[i]->qloci +for (int q=0;qCount();qi++) { // for each transcript in q dataset + GffObj* qi_t=loctrack[q]->Get(qi); + CTData* qi_d=(CTData*)qi_t->uptr; + if (qi_d->eqlist!=NULL && qi_t->exons.Count()>1) { + continue; //this is part of an EQ chain already + } + for (int n=q+1;nCount();ni++) { + GffObj* ni_t=loctrack[n]->Get(ni); + CTData* ni_d=(CTData*)ni_t->uptr; + bool singleExon=(ni_t->exons.Count()==1 && qi_t->exons.Count()==1); + if (ni_d->eqlist!=NULL && + (ni_d->eqlist==qi_d->eqlist || !singleExon)) continue; + int ovlen=0; + if ((ni_d->eqlist==qi_d->eqlist && qi_d->eqlist!=NULL) || + tMatch(*qi_t,*ni_t, ovlen, singleExon)) { + qi_d->joinEqList(ni_t); + } + } + } // for each successor dataset + } //for each transcript in qry dataset + } //for each qry dataset +} + + +int cmpTData_qset(const pointer* p1, const pointer* p2) { + CTData* d1=(CTData*)(((GffObj*)p1)->uptr); + CTData* d2=(CTData*)(((GffObj*)p2)->uptr); + return (d1->qset - d2->qset); + } + +void printITrack(FILE* ft, GList& mrnas, int qcount, int& cnum) { + for (int i=0;iqset; + char ovlcode=qtdata->classcode; + //GList eqchain(false,false,false); + CEqList* eqchain=qtdata->eqlist; + GffObj* ref=NULL; //related ref -- it doesn't have to be fully matching + GffObj* eqref=NULL; //fully ichain-matching ref + GffObj* tcons=NULL; //"consensus" (largest) transcript for a clique + int tmaxcov=0; + //eqchain.Add(&qt); + eqref=qtdata->eqref; + if (qtdata->ovls.Count()>0 && qtdata->ovls[0]->mrna!=NULL) { + //if it has ovlcode with a ref + ref=qtdata->ovls[0]->mrna; + //consistency check: qtdata->ovls[0]->code==ovlcode + // -- let tcons be a transfrag, not a ref transcript + //tcons=eqref; + //if (tcons!=NULL) tmaxcov=tcons->covlen; + } + //chain pre-check + if (tcons==NULL || mrnas[i]->covlen>tmaxcov) { + tcons=mrnas[i]; + tmaxcov=tcons->covlen; + } + if (qtdata->isEqHead()) {//head of a equivalency chain + //check if all transcripts in this chain have the same ovlcode + for (int k=0;keqlist->Count();k++) { + GffObj* m=qtdata->eqlist->Get(k); + if (m->covlen>tmaxcov) { + tmaxcov=m->covlen; + tcons=m; + } + if (ovlcode!='=' && ovlcode!='.' && ((CTData*)m->uptr)->getBestCode()!=ovlcode) { + ovlcode='.'; //non-uniform ovlcode + } + } + /* + GffObj* m=mrnas[i]; + while (((CTData*)m->uptr)->eqnext!=NULL) { + m=((CTData*)m->uptr)->eqnext; + eqchain.Add(m); + if (m->covlen>tmaxcov) { + tmaxcov=m->covlen; + tcons=m; + } + if (ovlcode!='=' && ovlcode!='.' && ((CTData*)m->uptr)->getBestCode()!=ovlcode) { + ovlcode='.'; //non-uniform ovlcode + //break; + } + } //while elements in chain + */ + + }//chain check + //if (ovlcode=='p') ref=NULL; //ignore polymerase runs? + if (ovlcode==0 || ovlcode=='-') ovlcode = (ref==NULL) ? 'u' : '.'; + //-- print columns 1 and 2 as LOCUS_ID and TCONS_ID + //bool chainHead=(qtdata->eqnext!=NULL && ((qtdata->eqdata & EQHEAD_TAG)!=0)); + bool chainHead=qtdata->isEqHead(); + //bool noChain=((qtdata->eqdata & EQCHAIN_TAGMASK)==0); + bool noChain=(eqchain==NULL); + if (chainHead || noChain) { + cnum++; + if (ft!=NULL) fprintf(ft,"%s_%08d\t",cprefix,cnum); + GXLocus* xloc=qtdata->locus->xlocus; + if (xloc!=NULL) { + if (ft!=NULL) fprintf(ft, "XLOC_%06d\t",xloc->id); + if (tcons->exons.Count()>1) { + //! only multi-exon mRNAs are counted for multi-transcript xloci ! + xloc->num_mtcons++; + if (xloc->num_mtcons==2) + total_xloci_alt++; + } + } + else { + //should NEVER happen! + int fidx=qtdata->qset; + GError("Error: no XLocus created for transcript %s (file %s) [%d, %d], on %s%c:%d-%d\n", qt.getID(), + qryfiles[qtdata->locus->qfidx]->chars(), qtdata->locus->qfidx, fidx, qt.getGSeqName(), qt.strand, qt.start, qt.end); + } + addXCons(xloc, ref, ovlcode, tcons, eqchain); + } // if chain head or uniq entry (not part of a chain) + if (ft==NULL) continue; + if (chainHead) { + //this is the start of a equivalence class as a printing chain + if (ref!=NULL) fprintf(ft,"%s|%s\t%c", getGeneID(ref),ref->getID(), ovlcode); + else fprintf(ft,"-\t%c", ovlcode); + GffObj* m=mrnas[i]; + CTData* mdata=(CTData*)m->uptr; + + int lastpq=-1; + /* + for (int ptab=mdata->qset-lastpq; ptab>0;ptab--) + if (ptab>1) fprintf(ft,"\t-"); + else fprintf(ft,"\t"); + lastpq=mdata->qset; + fprintf(ft,"q%d:%s|%s|%d|%8.6f|%8.6f|%8.6f|%8.6f|%d", lastpq+1, getGeneID(m), m->getID(), + iround(m->gscore/10), mdata->FPKM, mdata->conf_lo, mdata->conf_hi, mdata->cov, m->covlen); + //traverse linked list of matching transcripts + while (mdata->eqnext!=NULL) { + m=mdata->eqnext; + mdata=(CTData*)m->uptr; + for (int ptab=mdata->qset-lastpq;ptab>0;ptab--) + if (ptab>1) fprintf(ft,"\t-"); + else fprintf(ft,"\t"); + lastpq = mdata->qset; + fprintf(ft,"q%d:%s|%s|%d|%8.6f|%8.6f|%8.6f|%8.6f|%d", lastpq+1, getGeneID(m), m->getID(), + iround(m->gscore/10), mdata->FPKM,mdata->conf_lo,mdata->conf_hi,mdata->cov, m->covlen); + } //traverse and print row + */ + eqchain->setUnique(false); + eqchain->setSorted((GCompareProc*) cmpTData_qset); + + for (int k=0;kCount();k++) { + m=eqchain->Get(k); + mdata=(CTData*)m->uptr; + if (mdata->qset==lastpq) { + //shouldn't happen, unless this input set is messed up (has duplicates/redundant transfrags) + fprintf(ft,",%s|%s|%d|%8.6f|%8.6f|%8.6f|%8.6f|%d", getGeneID(m), m->getID(), + iround(m->gscore/10), mdata->FPKM,mdata->conf_lo,mdata->conf_hi,mdata->cov, m->covlen); + continue; + } + for (int ptab=mdata->qset-lastpq;ptab>0;ptab--) + if (ptab>1) fprintf(ft,"\t-"); + else fprintf(ft,"\t"); + lastpq = mdata->qset; + fprintf(ft,"q%d:%s|%s|%d|%8.6f|%8.6f|%8.6f|%8.6f|%d", lastpq+1, getGeneID(m), m->getID(), + iround(m->gscore/10), mdata->FPKM,mdata->conf_lo,mdata->conf_hi,mdata->cov, m->covlen); + } + for (int ptab=qcount-lastpq-1;ptab>0;ptab--) + fprintf(ft,"\t-"); + fprintf(ft,"\n"); + continue; + } //start of eq class (printing chain) + + if (eqchain!=NULL) continue; //part of a matching chain, dealt with previously + + //--------- not in an ichain-matching class, print as singleton + + if (ref!=NULL) fprintf(ft,"%s|%s\t%c",getGeneID(ref), ref->getID(), ovlcode); + else fprintf(ft,"-\t%c",ovlcode); + for (int ptab=qfidx;ptab>=0;ptab--) + if (ptab>0) fprintf(ft,"\t-"); + else fprintf(ft,"\t"); + fprintf(ft,"q%d:%s|%s|%d|%8.6f|%8.6f|%8.6f|%8.6f|-",qfidx+1, getGeneID(qt), qt.getID(),iround(qt.gscore/10), + qtdata->FPKM, qtdata->conf_lo,qtdata->conf_hi,qtdata->cov); + for (int ptab=qcount-qfidx-1;ptab>0;ptab--) + fprintf(ft,"\t-"); + fprintf(ft,"\n"); + } //for each transcript +} + + +void findTRMatch(GTrackLocus& loctrack, int qcount, GLocus& rloc) { + //requires loctrack to be already populated with overlapping qloci by findTMatches() + // which also found (and tagged) all matching qry transcripts + for (int q=0;qCount();qi++) { // for each transcript in q dataset + //if (loctrack[q]->cl[qi]->exons.Count()<2) continue; //skip single-exon transcripts + GffObj& qt=*(loctrack[q]->Get(qi)); + CTData* qtdata=(CTData*)qt.uptr; + GffObj* rmatch=NULL; //== ref match for this row + int rovlen=0; + //if (qtdata->eqnext!=NULL && ((qtdata->eqdata & EQHEAD_TAG)!=0)) { + if (qtdata->isEqHead()) { + //EQ chain head -- transfrag equivalency list starts here + if (qtdata->eqref==NULL) { //find rloc overlap + if (qt.overlap(rloc.start, rloc.end)) { + rmatch=findRefMatch(qt, rloc, rovlen); + } + } else rmatch=qtdata->eqref; + if (rmatch!=NULL) { + /* + GffObj* m=loctrack[q]->Get(qi); + //traverse linked list of matching transcripts + while (((CTData*)m->uptr)->eqnext!=NULL) { + m=((CTData*)m->uptr)->eqnext; + if (rmatch!=NULL) { + ((CTData*)m->uptr)->addOvl('=',rmatch,rovlen); + } + } //traverse qry data sets + continue; + } + */ + for (int k=0;keqlist->Count();k++) { + GffObj* m=qtdata->eqlist->Get(k); + ((CTData*)m->uptr)->addOvl('=',rmatch,rovlen); + continue; + } + } + //if (rmatch!=NULL) continue; + } //equivalence class (chain of intron-matching) + //if ((qtdata->eqdata & EQCHAIN_TAGMASK)!=0) continue; //part of a matching chain, dealt with previously + + //--------- qry mrna not in a '=' matching clique + if (qtdata->eqref==NULL) { //find any rloc overlap + if (qt.overlap(rloc.start, rloc.end)) { + rmatch=findRefMatch(qt, rloc, rovlen); + if (rmatch==NULL) { + //not an ichain match, look for other codes + GffObj* rovl=NULL; + int rovlen=0; + //char ovlcode= + getRefOvl(qt, rloc,rovl,rovlen); + } + } + } + else rmatch=qtdata->eqref; + } //for each qry transcript + }//for each qry dataset +} + + +bool inPolyRun(char strand, GffObj& m, GList* rloci, int& rlocidx) { + //we are only here if there is no actual overlap between m and any locus in rloci + if (rloci==NULL || rloci->Count()==0) return false; // || m.exons.Count()>1 + if (strand=='-') { + rlocidx=qsearch_loci(m.end, *rloci); + //returns index of locus starting just ABOVE m.end + // or -1 if last locus start <= m.end + GLocus* rloc=NULL; + if (rlocidx<0) return false; + while (rlocidxCount()) { + rloc=rloci->Get(rlocidx); + if (rloc->start>m.end+polyrun_range) break; + if (rloc->start+6>m.end) return true; + rlocidx++; + } + } + else { // strand == '+' (or '.' ?) + rlocidx=qsearch_loci(m.end, *rloci); + GLocus* rloc=NULL; + //returns index of closest locus starting ABOVE m.end + // or -1 if last locus start <= m.end + if (rlocidx<0) rlocidx=rloci->Count(); //this may actually start below m.end + while ((--rlocidx)>=0) { + rloc=rloci->Get(rlocidx); + if (m.start>rloc->start+GFF_MAX_LOCUS) break; + if (m.start+6>rloc->end && m.startend+polyrun_range) return true; + } + } + return false; +} + +CTData* getBestOvl(GffObj& m) { + //CTData* mdata=(CTData*)m.uptr; + //return mdata->getBestCode(); + if ( ((CTData*)m.uptr)->ovls.Count()>0) + return (CTData*)m.uptr; + return NULL; +} + +void reclass_XStrand(GList& mrnas, GList* rloci) { + //checking for relationship with ref transcripts on opposite strand + if (rloci==NULL || rloci->Count()<1) return; + int j=0;//current rloci index + for (int i=0;igetBestCode(); + if (ovlcode>47 && strchr("=cjeo",ovlcode)!=NULL) continue; + GLocus* rloc=rloci->Get(j); + if (rloc->start>m.end) continue; //check next transfrag + while (m.start>rloc->end && j+1Count()) { + j++; + rloc=rloci->Get(j); + } + if (rloc->start>m.end) continue; //check next transfrag + //m overlaps rloc: + //check if m has a fuzzy intron overlap -> 's' (shadow, mapped on the wrong strand) + // then if m is contained within an intron -> 'i' + // otherwise it's just a plain cross-strand overlap: 'x' + int jm=0; + do { //while rloci overlap this transfrag (m) + rloc=rloci->Get(j+jm); + bool is_shadow=false; + GffObj* sovl=NULL; + bool is_intraintron=false; + GffObj* iovl=NULL; + if (rloc->introns.Count()>0) { + for (int n=0;nintrons.Count();n++) { + GISeg& rintron=rloc->introns[n]; + if (rintron.start>m.end) break; + if (m.start>rintron.end) continue; + //overlap between m and intron + if (m.end<=rintron.end && m.start>=rintron.start) { + is_intraintron=true; + if (iovl==NULL || iovl->covlencovlen) iovl=rintron.t; + continue; + } + //check if any intron of m has a fuzz-match with rintron + for (int e=1;eend+1,m.exons[e]->start-1); + if (rintron.coordMatch(&mintron,10)) { + is_shadow=true; + if (sovl==NULL || sovl->covlencovlen) sovl=rintron.t; + break; + } + } //for each m intron + } //for each intron of rloc + }//rloc has introns + bool xcode=true; + if (is_shadow) { ((CTData*)m.uptr)->addOvl('s', sovl); xcode=false; } + // else + if (ovlcode!='i' && is_intraintron) { ((CTData*)m.uptr)->addOvl('i', iovl); xcode=false; } + if (xcode) { + // just plain overlap, find the overlapping mrna in rloc + GffObj* maxovl=NULL; + int ovlen=0; + GffObj* max_lovl=NULL; //max len ref transcript + // having no exon overlap but simply range overlap (interleaved exons) + for (int ri=0;rimrnas.Count();ri++) { + if (!m.overlap(*(rloc->mrnas[ri]))) continue; + int o=m.exonOverlapLen(*(rloc->mrnas[ri])); + if (o>0) { + if (o>ovlen) { + ovlen=o; + maxovl=rloc->mrnas[ri]; + } + } + else { //no exon overlap, but still overlapping (interleaved exons) + if (max_lovl==NULL || max_lovl->covlenmrnas[ri]->covlen) + max_lovl=rloc->mrnas[ri]; + } + } + if (maxovl) ((CTData*)m.uptr)->addOvl('x',maxovl); + else if (max_lovl) ((CTData*)m.uptr)->addOvl('x',max_lovl); + } //'x' + jm++; + } while (j+jmCount() && rloci->Get(j+jm)->overlap(m)); + } //for each transfrag +} + +void reclass_mRNAs(char strand, GList& mrnas, GList* rloci, GFaSeqGet *faseq) { + int rlocidx=-1; + for (int i=0;igetBestCode(); + //if (ovlcode=='u' || ovlcode=='i' || ovlcode==0) { + if (ovlcode=='u' || ovlcode<47) { + //check for overlaps with ref transcripts on the other strand + if (m.exons.Count()==1 && inPolyRun(strand, m, rloci, rlocidx)) { + ((CTData*)m.uptr)->addOvl('p',rloci->Get(rlocidx)->mrna_maxcov); + } + else { //check for repeat content + if (faseq!=NULL) { + int seqlen; + char* seq=m.getSpliced(faseq, false, &seqlen); + //get percentage of lowercase + int numlc=0; + for (int c=0;c='a') numlc++; + if (numlc > seqlen/2) + ((CTData*)m.uptr)->addOvl('r'); + GFREE(seq); + } + } + } //for unassigned class + }//for each mrna + +} + +void reclassLoci(char strand, GList& qloci, GList* rloci, GFaSeqGet *faseq) { + for (int ql=0;qlmrnas, rloci, faseq); + //find closest upstream ref locus for this q locus + } //for each locus +} + +//for a single genomic sequence, all qry data and ref data is stored in gtrack +//check for all 'u' transfrags if they are repeat ('r') or polymerase run 'p' or anything else +void umrnaReclass(int qcount, GSeqTrack& gtrack, FILE** ftr, GFaSeqGet* faseq=NULL) { + for (int q=0;qloci_f, gtrack.rloci_f, faseq); + reclassLoci('-', gtrack.qdata[q]->loci_r, gtrack.rloci_r, faseq); + reclass_mRNAs('+', gtrack.qdata[q]->umrnas, gtrack.rloci_f, faseq); + reclass_mRNAs('-', gtrack.qdata[q]->umrnas, gtrack.rloci_r, faseq); + //and also check for special cases with cross-strand overlaps: + reclass_XStrand(gtrack.qdata[q]->mrnas_f, gtrack.rloci_r); + reclass_XStrand(gtrack.qdata[q]->mrnas_r, gtrack.rloci_f); + // print all tmap data here here: + for (int i=0;itdata.Count();i++) { + CTData* mdata=gtrack.qdata[q]->tdata[i]; + if (mdata->mrna==NULL) continue; //invalidated -- removed earlier + //GLocus* rlocus=NULL; + mdata->classcode='u'; + GffObj* ref=NULL; + if (mdata->ovls.Count()>0) { + mdata->classcode=mdata->ovls[0]->code; + ref=mdata->ovls[0]->mrna; + } + //if (mdata->classcode<33) mdata->classcode='u'; + if (mdata->classcode<47) mdata->classcode='u'; // if 0, '-' or '.' + if (tmapFiles) { + char ref_match_len[2048]; + if (ref!=NULL) { + sprintf(ref_match_len, "%d",ref->covlen); + fprintf(ftr[q],"%s\t%s\t",getGeneID(ref),ref->getID()); + //rlocus=((CTData*)(ref->uptr))->locus; + } + else { + fprintf(ftr[q],"-\t-\t"); + strcpy(ref_match_len, "-"); + } + //fprintf(ftr[q],"%c\t%s\t%d\t%8.6f\t%8.6f\t%d\n", ovlcode, mdata->mrna->getID(), + // iround(mdata->mrna->gscore/10), mdata->FPKM, mdata->cov, mdata->mrna->covlen); + const char* mlocname = (mdata->locus!=NULL) ? mdata->locus->mrna_maxcov->getID() : mdata->mrna->getID(); + fprintf(ftr[q],"%c\t%s\t%s\t%d\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%d\t%s\t%s\n", mdata->classcode, getGeneID(mdata->mrna), mdata->mrna->getID(), + iround(mdata->mrna->gscore/10), mdata->FPKM, mdata->conf_lo,mdata->conf_hi, mdata->cov, mdata->mrna->covlen, mlocname, ref_match_len); + } + } //for each tdata + } //for each qdata +} + +void buildXLoci(GTrackLocus& loctrack, int qcount, GSeqTrack& gtrack, char strand, + GList* retxloci=NULL) { + GList* dest_xloci=NULL; + GList tmpxloci(true,false,true); //local set of newly created xloci + GList* xloci=&tmpxloci; + if (strand=='+') { + dest_xloci=& gtrack.xloci_f; + } + else if (strand=='-') { + dest_xloci = & gtrack.xloci_r; + } + else dest_xloci= & gtrack.xloci_u; + + if (retxloci==NULL) { + //if no return set of build xloci was given + //take it as a directive to work directly on the global xloci + xloci=dest_xloci; + dest_xloci=NULL; + } + for (int q=-1;q* wrkloci=NULL; + if (q<0) { + if (loctrack.rloci.Count()==0) continue; + //loci=new GList(true,false,false); + //loci->Add(loctrack.rloc); + wrkloci = &(loctrack.rloci); + } + else { + if (loctrack[q]==NULL) continue; + wrkloci = &(loctrack[q]->qloci); + } + + for (int t=0;tCount();t++) { + GLocus* loc=wrkloci->Get(t); + int xfound=0; //count of parent xloci + if (loc->xlocus!=NULL) continue; //already assigned a superlocus + GArray mrgxloci(true); + for (int xl=0;xlCount();xl++) { + GXLocus& xloc=*(xloci->Get(xl)); + if (xloc.start>loc->end) { + if (xloc.start-loc->end > GFF_MAX_LOCUS) break; + continue; + } + if (loc->start>xloc.end) continue; + if (xloc.add_Locus(loc)) { + xfound++; + mrgxloci.Add(xl); + } + } //for each existing Xlocus + if (xfound==0) { + xloci->Add(new GXLocus(loc)); + } + else { + int il=mrgxloci[0]; + GXLocus& xloc=*(xloci->Get(il)); + if (xfound>1) { + for (int l=1;lGet(mlidx))); + GXLocus* ldel=xloci->Get(mlidx); + xloci->Delete(mlidx); + if (retxloci!=NULL) + delete ldel; + } + } + //in case xloc.start was decreased, bubble-down until it's in the proper order + while (il>0 && xloc<*(xloci->Get(il-1))) { + il--; + xloci->Swap(il,il+1); + } + } //at least one locus is being merged + }//for each locus + }//for each set of loci in the region (refs and each qry set) + //-- add xloci to the global set of xloci unless retxloci was given, + if (retxloci!=NULL) retxloci->Add(*xloci); + else dest_xloci->Add(*xloci); +} + +void singleQData(GList& qloci, GList& loctracks) { + for (int i=0;it_ptr==NULL) { + GTrackLocus* tloc=new GTrackLocus(); + tloc->addQLocus(qloci[i],0); + loctracks.Add(tloc); + } + } +} + +void recheckUmrnas(GSeqData* gseqdata, GList& mrnas, + GList& loci, GList& nloci, GList& oloci) { + GList reassignedLocs(false,false); + for (int u=0;uumrnas.Count();u++) { + for (int l=0;lumrnas[u]==NULL) break; + if (gseqdata->umrnas[u]->endstart) break; //try next umrna + if (oloci[l]->endumrnas[u]->start) continue; //try next locus + if (gseqdata->umrnas[u]->strand=='+' || gseqdata->umrnas[u]->strand=='-') { + gseqdata->umrnas.Forget(u); + continue; //already reassigned earlier + } + //umrna overlaps locus region + GffObj* umrna=gseqdata->umrnas[u]; + for (int m=0;mmrnas.Count();m++) { + if (oloci[l]->mrnas[m]->exonOverlap(umrna)) { + gseqdata->umrnas.Forget(u); + CTData* umdata=((CTData*)umrna->uptr); + //must be in a Loci anyway + if (umdata==NULL || umdata->locus==NULL) + GError("Error: no locus pointer for umrna %s!\n",umrna->getID()); + for (int i=0;ilocus->mrnas.Count();i++) { + GffObj* um=umdata->locus->mrnas[i]; + um->strand=oloci[l]->mrnas[m]->strand; + } + reassignedLocs.Add(umdata->locus); + break; + } + } //for each mrna in locus + } //for each locus + } //for each umrna + if (reassignedLocs.Count()>0) { + gseqdata->umrnas.Pack(); + gseqdata->nloci_u.setFreeItem(false); + for (int i=0;imrnas.Count();m++) { + mrnas.Add(loc->mrnas[m]); + } + loci.Add(loc); + nloci.Add(loc); + gseqdata->nloci_u.Remove(loc); + } + gseqdata->nloci_u.setFreeItem(true); + } +} + +void umrnasXStrand(GList& xloci, GSeqTrack& gtrack) { + //try to determine the strand of unoriented transfrags based on possible overlaps + //with other, oriented transfrags + for (int x=0;xstrand=='.') continue; + if (xloci[x]->qloci.Count()==0) continue; + //go through all qloci in this xlocus + for (int l = 0; l < xloci[x]->qloci.Count(); l++) { + char locstrand=xloci[x]->qloci[l]->mrna_maxcov->strand; + if (locstrand=='.') { + //this is a umrna cluster + GLocus* qloc=xloci[x]->qloci[l]; + //we don't really need to update loci lists (loci_f, nloci_f etc.) + /* + if (xloci[x]->strand=='+') { + } + else { // - strand + } + */ + for (int i=0;imrnas.Count();i++) { + qloc->mrnas[i]->strand=xloci[x]->strand; + int uidx=gtrack.qdata[qloc->qfidx]->umrnas.IndexOf(qloc->mrnas[i]); + if (uidx>=0) { + gtrack.qdata[qloc->qfidx]->umrnas.Forget(uidx); + gtrack.qdata[qloc->qfidx]->umrnas.Delete(uidx); + if (xloci[x]->strand=='+') + gtrack.qdata[qloc->qfidx]->mrnas_f.Add(qloc->mrnas[i]); + else + gtrack.qdata[qloc->qfidx]->mrnas_r.Add(qloc->mrnas[i]); + } + } + } //unknown strand + } //for each xloci[x].qloci (l) + + } //for each xloci (x) +} + +//cluster loci across all datasets +void xclusterLoci(int qcount, char strand, GSeqTrack& gtrack) { + //gtrack holds data for all input qry datasets for a chromosome/contig + //cluster QLoci + GList loctracks(true,true,false); + //all vs all clustering across all qry data sets + ref + //one-strand set of loci from all datasets + ref loci + GList* wrkloci=NULL; + //build xloci without references first + //then add references only if they overlap an existing xloci + + int nq=0; + for (int q=0;q<=qcount+1;q++) { + bool refcheck=false; + if (q==qcount) { // check the unoriented loci for each query file + while (nqnloci_u.Count()==0)) + nq++; //skip query files with no unoriented loci + if (nqnloci_u); + nq++; + if (nqloci_f); + else wrkloci=&(gtrack.qdata[q]->loci_r); + } + // now do the all-vs-all clustering thing: + for (int t=0;tCount();t++) { + GLocus* loc=wrkloci->Get(t); + int xfound=0; //count of parent loctracks + if (loc->t_ptr!=NULL) continue; //already assigned a loctrack + GArray mrgloctracks(true); + for (int xl=0;xlloc->end) break; + if (loc->start>trackloc.end) continue; + if (trackloc.add_Locus(loc)) { + xfound++; + mrgloctracks.Add(xl); + } + } //for each existing Xlocus + if (xfound==0) { + if (!refcheck) //we really don't care about ref-only clusters + loctracks.Add(new GTrackLocus(loc)); + } + else { + int il=mrgloctracks[0]; + GTrackLocus& tloc=*(loctracks.Get(il)); + if (xfound>1) { + for (int l=1;l0 && tloc<*(loctracks[il-1])) { + il--; + loctracks.Swap(il,il+1); + } + } //at least one locus found + }//for each wrklocus + } //for each set of loci (q) + //loctracks is now set with all x-clusters on this strand + for (int i=0;ihasQloci) continue; //we really don't care here about reference-only clusters + GTrackLocus& loctrack=*loctracks[i]; + findTMatches(loctrack, qcount); //find matching transfrags in this xcluster + for (int rl=0; rl < loctrack.rloci.Count(); rl++) { + findTRMatch(loctrack, qcount, *(loctrack.rloci[rl])); + //find matching reference annotation for this xcluster and assign class codes to transfrags + } + GList xloci(false,false,false); + buildXLoci(loctrack, qcount, gtrack, strand, &xloci); + //the newly created xloci are in xloci + umrnasXStrand(xloci, gtrack); + //also merge these xloci into the global list of xloci + for (int l=0; l < xloci.Count(); l++) { + if (xloci[l]->strand=='+') { + gtrack.xloci_f.Add(xloci[l]); + } + else if (xloci[l]->strand=='-') { + gtrack.xloci_r.Add(xloci[l]); + } + else gtrack.xloci_u.Add(xloci[l]); + } + }//for each xcluster +} + + +void printRefMap(FILE** frs, int qcount, GList* rloci) { + if (rloci==NULL) return; + + for (int l=0;lCount(); l++) { + for (int r=0;rGet(l)->mrnas.Count(); r++) { + GffObj& ref = *(rloci->Get(l)->mrnas[r]); + CTData* refdata = ((CTData*)ref.uptr); + GStr* clist = new GStr[qcount]; + GStr* eqlist = new GStr[qcount]; + for (int i = 0; iovls.Count(); i++) { + GffObj* m=refdata->ovls[i]->mrna; + char ovlcode=refdata->ovls[i]->code; + if (m==NULL) { + GMessage("Warning: NULL mRNA found for ref %s with ovlcode '%c'\n", + ref.getID(), refdata->ovls[i]->code); + continue; + } + int qfidx = ((CTData*)m->uptr)->qset; + if (ovlcode == '=') { + eqlist[qfidx].append(getGeneID(m)); + eqlist[qfidx].append('|'); + eqlist[qfidx].append(m->getID()); + eqlist[qfidx].append(','); + } + else if (ovlcode == 'c') { + clist[qfidx].append(getGeneID(m)); + clist[qfidx].append('|'); + clist[qfidx].append(m->getID()); + clist[qfidx].append(','); + } + }//for each reference overlap + for (int q=0;q& gtracks, GStr& fbasename, FILE** ftr, FILE** frs) { + FILE* f_ltrack=NULL; + FILE* f_itrack=NULL; + FILE* f_ctrack=NULL; + FILE* f_xloci=NULL; + int cnum=0; //consensus numbering for printITrack() + GStr s=fbasename; + //if (qcount>1 || generic_GFF) { //doesn't make much sense for only 1 query file + s.append(".tracking"); + f_itrack=fopen(s.chars(),"w"); + if (f_itrack==NULL) GError("Error creating file %s !\n",s.chars()); + // } + s=fbasename; + s.append(".combined.gtf"); + f_ctrack=fopen(s.chars(),"w"); + if (f_ctrack==NULL) GError("Error creating file %s !\n",s.chars()); + + s=fbasename; + s.append(".loci"); + f_xloci=fopen(s.chars(),"w"); + if (f_xloci==NULL) GError("Error creating file %s !\n",s.chars()); + for (int g=0;g1) + for (int q=0;qmrnas_f, qcount, cnum); + printITrack(f_itrack, gseqtrack.qdata[q]->mrnas_r, qcount, cnum); + //just for the sake of completion: + printITrack(f_itrack, gseqtrack.qdata[q]->umrnas, qcount, cnum); + } + //print XLoci and XConsensi within each xlocus + //also TSS clustering and protein ID assignment for XConsensi + printXLoci(f_xloci, f_ctrack, qcount, gseqtrack.xloci_f, faseq); + printXLoci(f_xloci, f_ctrack, qcount, gseqtrack.xloci_r, faseq); + printXLoci(f_xloci, f_ctrack, qcount, gseqtrack.xloci_u, faseq); + if (tmapFiles && haveRefs) { + printRefMap(frs, qcount, gseqtrack.rloci_f); + printRefMap(frs, qcount, gseqtrack.rloci_r); + } + delete faseq; + } + if (tmapFiles) { + for (int q=0;q +#else +#define PACKAGE_VERSION "INTERNAL" +#define SVN_REVISION "XXX" +#endif + + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "hits.h" +#include "bundles.h" +#include "abundances.h" +#include "tokenize.h" +#include "biascorrection.h" +#include "update_check.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "differential.h" + +// Need at least this many reads in a locus to do any testing on it + +vector sample_labels; + +double FDR = 0.05; +bool samples_are_time_series = false; +using namespace std; +using namespace boost; + +// We leave out the short codes for options that don't take an argument +#if ENABLE_THREADS +const char *short_options = "m:p:s:c:I:j:L:M:o:b:TNqvuF:"; +#else +const char *short_options = "m:s:c:I:j:L:M:o:b:TNqvuF:"; +#endif + + + +static struct option long_options[] = { +{"frag-len-mean", required_argument, 0, 'm'}, +{"frag-len-std-dev", required_argument, 0, 's'}, +{"transcript-score-thresh", required_argument, 0, 't'}, +{"pre-mrna-fraction", required_argument, 0, 'j'}, +{"max-intron-length", required_argument, 0, 'I'}, +{"labels", required_argument, 0, 'L'}, +{"min-alignment-count", required_argument, 0, 'c'}, +{"FDR", required_argument, 0, OPT_FDR}, +{"seed", required_argument, 0, OPT_RANDOM_SEED}, +{"mask-gtf", required_argument, 0, 'M'}, +{"output-dir", required_argument, 0, 'o'}, +{"verbose", no_argument, 0, 'v'}, +{"quiet", no_argument, 0, 'q'}, +{"frag-bias-correct", required_argument, 0, 'b'}, +{"multi-read-correct", no_argument, 0, 'u'}, +{"time-series", no_argument, 0, 'T'}, +{"upper-quartile-norm", no_argument, 0, 'N'}, +{"min-isoform-fraction", required_argument, 0, 'F'}, +#if ENABLE_THREADS +{"num-threads", required_argument, 0, 'p'}, +#endif +{"library-type", required_argument, 0, OPT_LIBRARY_TYPE}, +{"seed", required_argument, 0, OPT_RANDOM_SEED}, +{"no-collapse-cond-prob", no_argument, 0, OPT_COLLAPSE_COND_PROB}, +{"num-importance-samples", required_argument, 0, OPT_NUM_IMP_SAMPLES}, +{"max-mle-iterations", required_argument, 0, OPT_MLE_MAX_ITER}, +{"min-mle-accuracy", required_argument, 0, OPT_MLE_MIN_ACC}, +{"poisson-dispersion", no_argument, 0, OPT_POISSON_DISPERSION}, +{"bias-mode", required_argument, 0, OPT_BIAS_MODE}, +{"no-update-check", no_argument, 0, OPT_NO_UPDATE_CHECK}, +{"emit-count-tables", no_argument, 0, OPT_EMIT_COUNT_TABLES}, +{"compatible-hits-norm", no_argument, 0, OPT_USE_COMPAT_MASS}, +{"total-hits-norm", no_argument, 0, OPT_USE_TOTAL_MASS}, + +// Some options for testing different stats policies +{"fisher-covariance", no_argument, 0, OPT_USE_FISHER_COVARIANCE}, +{"empirical-covariance", no_argument, 0, OPT_USE_EMPIRICAL_COVARIANCE}, +{"split-mass", no_argument, 0, OPT_SPLIT_MASS}, +{"split-variance", no_argument, 0, OPT_SPLIT_VARIANCE}, +{"num-bootstrap-samples", required_argument, 0, OPT_NUM_BOOTSTRAP_SAMPLES}, +{"bootstrap-fraction", required_argument, 0, OPT_BOOTSTRAP_FRACTION}, +{"max-bundle-frags", required_argument, 0, OPT_MAX_FRAGS_PER_BUNDLE}, +{"read-skip-fraction", required_argument, 0, OPT_READ_SKIP_FRACTION}, +{"no-read-pairs", no_argument, 0, OPT_NO_READ_PAIRS}, +{"trim-read-length", required_argument, 0, OPT_TRIM_READ_LENGTH}, +{"cov-delta", required_argument, 0, OPT_MAX_DELTA_GAP}, +{0, 0, 0, 0} // terminator +}; + +void print_usage() +{ + fprintf(stderr, "cuffdiff v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); + fprintf(stderr, "-----------------------------\n"); + + //NOTE: SPACES ONLY, bozo + fprintf(stderr, "Usage: cuffdiff [options] [... sampleN_hits.sam]\n"); + fprintf(stderr, " Supply replicate SAMs as comma separated lists for each condition: sample1_rep1.sam,sample1_rep2.sam,...sample1_repM.sam\n"); + fprintf(stderr, "General Options:\n"); + fprintf(stderr, " -o/--output-dir write all output files to this directory [ default: ./ ]\n"); + fprintf(stderr, " --seed value of random number generator seed [ default: 0 ]\n"); + fprintf(stderr, " -T/--time-series treat samples as a time-series [ default: FALSE ]\n"); + fprintf(stderr, " -c/--min-alignment-count minimum number of alignments in a locus for testing [ default: 10 ]\n"); + fprintf(stderr, " --FDR False discovery rate used in testing [ default: 0.05 ]\n"); + fprintf(stderr, " -M/--mask-file ignore all alignment within transcripts in this file [ default: NULL ]\n"); + fprintf(stderr, " -b/--frag-bias-correct use bias correction - reference fasta required [ default: NULL ]\n"); + fprintf(stderr, " -u/--multi-read-correct use 'rescue method' for multi-reads (more accurate) [ default: FALSE ]\n"); + fprintf(stderr, " -N/--upper-quartile-norm use upper-quartile normalization [ default: FALSE ]\n"); + fprintf(stderr, " -L/--labels comma-separated list of condition labels\n"); +#if ENABLE_THREADS + fprintf(stderr, " -p/--num-threads number of threads used during quantification [ default: 1 ]\n"); +#endif + fprintf(stderr, "\nAdvanced Options:\n"); + fprintf(stderr, " --library-type Library prep used for input reads [ default: below ]\n"); + fprintf(stderr, " -m/--frag-len-mean average fragment length (unpaired reads only) [ default: 200 ]\n"); + fprintf(stderr, " -s/--frag-len-std-dev fragment length std deviation (unpaired reads only) [ default: 80 ]\n"); + fprintf(stderr, " --num-importance-samples number of importance samples for MAP restimation [ default: 1000 ]\n"); + fprintf(stderr, " --num-bootstrap-samples Number of bootstrap replications [ default: 20 ]\n"); + fprintf(stderr, " --bootstrap-fraction Fraction of fragments in each bootstrap sample [ default: 1.0 ]\n"); + fprintf(stderr, " --max-mle-iterations maximum iterations allowed for MLE calculation [ default: 5000 ]\n"); + fprintf(stderr, " --compatible-hits-norm count hits compatible with reference RNAs only [ default: TRUE ]\n"); + fprintf(stderr, " --total-hits-norm count all hits for normalization [ default: FALSE ]\n"); + fprintf(stderr, " --poisson-dispersion Don't fit fragment counts for overdispersion [ default: FALSE ]\n"); + fprintf(stderr, " -v/--verbose log-friendly verbose processing (no progress bar) [ default: FALSE ]\n"); + fprintf(stderr, " -q/--quiet log-friendly quiet processing (no progress bar) [ default: FALSE ]\n"); + fprintf(stderr, " --no-update-check do not contact server to check for update availability[ default: FALSE ]\n"); + fprintf(stderr, " --emit-count-tables print count tables used to fit overdispersion [ default: FALSE ]\n"); + fprintf(stderr, " --max-bundle-frags maximum fragments allowed in a bundle before skipping [ default: 500000 ]\n"); + fprintf(stderr, "\nDebugging use only:\n"); + fprintf(stderr, " --read-skip-fraction Skip a random subset of reads this size [ default: 0.0 ]\n"); + fprintf(stderr, " --no-read-pairs Break all read pairs [ default: FALSE ]\n"); + fprintf(stderr, " --trim-read-length Trim reads to be this long (keep 5' end) [ default: none ]\n"); + fprintf(stderr, " --cov-delta Maximum gap between bootstrap and IS [ default: 2.0 ]\n"); + print_library_table(); +} + +int parse_options(int argc, char** argv) +{ + int option_index = 0; + int next_option; + string sample_label_list; + + do { + next_option = getopt_long_only(argc, argv, short_options, long_options, &option_index); + if (next_option == -1) /* Done with options. */ + break; + switch (next_option) { + case 0: + /* If this option set a flag, do nothing else now. */ + if (long_options[option_index].flag != 0) + break; + break; + + case 'm': + user_provided_fld = true; + def_frag_len_mean = (uint32_t)parseInt(0, "-m/--frag-len-mean arg must be at least 0", print_usage); + break; + case 'c': + min_read_count = (uint32_t)parseInt(0, "-c/--min-alignment-count arg must be at least 0", print_usage); + break; + case 's': + user_provided_fld = true; + def_frag_len_std_dev = (uint32_t)parseInt(0, "-s/--frag-len-std-dev arg must be at least 0", print_usage); + break; + case 'p': + num_threads = (uint32_t)parseInt(1, "-p/--num-threads arg must be at least 1", print_usage); + break; + case 'F': + min_isoform_fraction = parseFloat(0, 1.0, "-F/--min-isoform-fraction must be between 0 and 1.0", print_usage); + break; + case 'L': + sample_label_list = optarg; + break; + case OPT_FDR: + FDR = (double)parseFloat(0.00, 1.00, "--FDR arg must be between 0 and 1", print_usage); + break; + case OPT_NUM_IMP_SAMPLES: + num_importance_samples = parseInt(1, "--num-importance-samples must be at least 1", print_usage); + break; + case OPT_MLE_MAX_ITER: + max_mle_iterations = parseInt(1, "--max-mle-iterations must be at least 1", print_usage); + break; + case OPT_BIAS_MODE: + if (!strcmp(optarg, "site")) + bias_mode = SITE; + else if (!strcmp(optarg, "pos")) + bias_mode = POS; + else if (!strcmp(optarg, "pos_vlmm")) + bias_mode = POS_VLMM; + else if (!strcmp(optarg, "vlmm")) + bias_mode = VLMM; + else if (!strcmp(optarg, "pos_site")) + bias_mode = POS_SITE; + else + { + fprintf(stderr, "Unknown bias mode.\n"); + exit(1); + } + break; + case 'M': + { + mask_gtf_filename = optarg; + break; + } + case 'v': + { + if (cuff_quiet) + { + fprintf(stderr, "Warning: Can't be both verbose and quiet! Setting verbose only.\n"); + } + cuff_quiet = false; + cuff_verbose = true; + break; + } + case 'q': + { + if (cuff_verbose) + { + fprintf(stderr, "Warning: Can't be both verbose and quiet! Setting quiet only.\n"); + } + cuff_verbose = false; + cuff_quiet = true; + break; + } + case 'o': + { + output_dir = optarg; + break; + } + case 'b': + { + fasta_dir = optarg; + corr_bias = true; + break; + } + + case 'T': + { + samples_are_time_series = true; + break; + } + case 'N': + { + use_quartile_norm = true; + break; + } + case 'u': + { + corr_multi = true; + break; + } + case OPT_LIBRARY_TYPE: + { + library_type = optarg; + break; + } + case OPT_POISSON_DISPERSION: + { + poisson_dispersion = true; + break; + } + case OPT_NO_UPDATE_CHECK: + { + no_update_check = true; + break; + } + case OPT_RANDOM_SEED: + { + random_seed = parseInt(0, "--seed must be at least 0", print_usage); + break; + } + case OPT_EMIT_COUNT_TABLES: + { + emit_count_tables = true; + break; + } + case OPT_COLLAPSE_COND_PROB: + { + cond_prob_collapse = false; + break; + } + case OPT_USE_COMPAT_MASS: + { + use_compat_mass = true; + break; + } + case OPT_USE_TOTAL_MASS: + { + use_total_mass = true; + break; + } + case OPT_USE_FISHER_COVARIANCE: + { + use_fisher_covariance = true; + break; + } + case OPT_USE_EMPIRICAL_COVARIANCE: + { + use_fisher_covariance = false; + break; + } + case OPT_SPLIT_MASS: + { + split_variance = false; + break; + } + case OPT_SPLIT_VARIANCE: + { + split_variance = true; + break; + } + case OPT_NUM_BOOTSTRAP_SAMPLES: + { + num_bootstrap_samples = parseInt(0, "--num-bootstrap-samples must be at least 1", print_usage); + break; + } + case OPT_BOOTSTRAP_FRACTION: + { + bootstrap_fraction = parseFloat(0, 1.0, "--bootstrap-fraction must be between 0 and 1.0", print_usage); + break; + } + case OPT_MAX_FRAGS_PER_BUNDLE: + { + max_frags_per_bundle = parseInt(0, "--max-bundle-frags must be at least 0", print_usage); + break; + } + case OPT_READ_SKIP_FRACTION: + { + read_skip_fraction = parseFloat(0, 1.0, "--read-skip-fraction must be between 0 and 1.0", print_usage); + break; + } + case OPT_NO_READ_PAIRS: + { + no_read_pairs = true; + break; + } + case OPT_TRIM_READ_LENGTH: + { + trim_read_length = parseInt(0, "--trim-read-length must be at least 1", print_usage); + break; + } + case OPT_MAX_DELTA_GAP: + { + bootstrap_delta_gap = parseFloat(0, 10000000.0, "--read-skip-fraction must be between 0 and 10000000.0", print_usage); + break; + } + case OPT_MLE_MIN_ACC: + { + bootstrap_delta_gap = parseFloat(0, 10000000.0, "--read-skip-fraction must be between 0 and 10000000.0", print_usage); + break; + } + default: + print_usage(); + return 1; + } + } while(next_option != -1); + + if (library_type != "") + { + map::iterator lib_itr = + library_type_table.find(library_type); + if (lib_itr == library_type_table.end()) + { + fprintf(stderr, "Error: Library type %s not supported\n", library_type.c_str()); + exit(1); + } + else + { + if (library_type == "transfrags") + { + allow_junk_filtering = false; + } + global_read_properties = &lib_itr->second; + } + } + + if (use_total_mass && use_compat_mass) + { + fprintf (stderr, "Error: please supply only one of --compatibile-hits-norm and --total-hits-norm\n"); + exit(1); + } + + tokenize(sample_label_list, ",", sample_labels); + + allow_junk_filtering = false; + + return 0; +} + +void print_tests(FILE* fout, + const char* sample_1_label, + const char* sample_2_label, + const SampleDiffs& de_tests) +{ + for (SampleDiffs::const_iterator itr = de_tests.begin(); + itr != de_tests.end(); + ++itr) + { + const SampleDifference& test = itr->second; + + string all_gene_ids = cat_strings(test.meta_data->gene_ids); + if (all_gene_ids == "") + all_gene_ids = "-"; + + string all_gene_names = cat_strings(test.meta_data->gene_names); + if (all_gene_names == "") + all_gene_names = "-"; + + string all_protein_ids = cat_strings(test.meta_data->protein_ids); + if (all_protein_ids == "") + all_protein_ids = "-"; + + fprintf(fout, "%s\t%s\t%s\t%s\t%s\t%s", + itr->first.c_str(), + all_gene_ids.c_str(), + all_gene_names.c_str(), + test.meta_data->locus_desc.c_str(), + sample_1_label, + sample_2_label); + + double t = test.test_stat; + double r1 = test.value_1; + double r2 = test.value_2; + double d = test.differential; + double p = test.p_value; + double q = test.corrected_p; + const char* sig; + if (test.significant && test.test_status == OK) + sig = "yes"; + else + sig = "no"; + + const char* status; + if (test.test_status == OK) + status = "OK"; + else if (test.test_status == LOWDATA) + status = "LOWDATA"; + else if (test.test_status == HIDATA) + status = "HIDATA"; + else if (test.test_status == NOTEST) + status = "NOTEST"; + else + status = "FAIL"; + + fprintf(fout, "\t%s\t%lg\t%lg\t%lg\t%lg\t%lg\t%lg\t%s", status, r1, r2, d, t, p, q, sig); + fprintf(fout, "\n"); + } +} + +void print_FPKM_tracking(FILE* fout, + const FPKMTrackingTable& tracking) +{ + fprintf(fout,"tracking_id\tclass_code\tnearest_ref_id\tgene_id\tgene_short_name\ttss_id\tlocus\tlength\tcoverage"); + FPKMTrackingTable::const_iterator first_itr = tracking.begin(); + if (first_itr != tracking.end()) + { + const FPKMTracking& track = first_itr->second; + const vector& fpkms = track.fpkm_series; + for (size_t i = 0; i < fpkms.size(); ++i) + { + fprintf(fout, "\t%s_FPKM\t%s_conf_lo\t%s_conf_hi\t%s_status", sample_labels[i].c_str(), sample_labels[i].c_str(), sample_labels[i].c_str(), sample_labels[i].c_str()); + } + } + fprintf(fout, "\n"); + for (FPKMTrackingTable::const_iterator itr = tracking.begin(); itr != tracking.end(); ++itr) + { + const string& description = itr->first; + const FPKMTracking& track = itr->second; + const vector& fpkms = track.fpkm_series; + + AbundanceStatus status = NUMERIC_OK; + foreach (const FPKMContext& c, fpkms) + { + if (c.status == NUMERIC_FAIL) + status = NUMERIC_FAIL; + } + + string all_gene_ids = cat_strings(track.gene_ids); + if (all_gene_ids == "") + all_gene_ids = "-"; + + string all_gene_names = cat_strings(track.gene_names); + if (all_gene_names == "") + all_gene_names = "-"; + + string all_tss_ids = cat_strings(track.tss_ids); + if (all_tss_ids == "") + all_tss_ids = "-"; + + char length_buff[33] = "-"; + if (track.length) + sprintf(length_buff, "%d", track.length); + + fprintf(fout, "%s\t%c\t%s\t%s\t%s\t%s\t%s\t%s\t%s", + description.c_str(), + track.classcode ? track.classcode : '-', + track.ref_match.c_str(), + all_gene_ids.c_str(), + all_gene_names.c_str(), + all_tss_ids.c_str(), + track.locus_tag.c_str(), + length_buff, + "-"); + + for (size_t i = 0; i < fpkms.size(); ++i) + { + double fpkm = fpkms[i].FPKM; + double std_dev = sqrt(fpkms[i].FPKM_variance); + double fpkm_conf_hi = fpkm + 2.0 * std_dev; + double fpkm_conf_lo = max(0.0, fpkm - 2.0 * std_dev); + const char* status_str = "OK"; + + if (fpkms[i].status == NUMERIC_OK) + { + status_str = "OK"; + } + else if (fpkms[i].status == NUMERIC_FAIL) + { + status_str = "FAIL"; + } + else if (fpkms[i].status == NUMERIC_LOW_DATA) + { + status_str = "LOWDATA"; + } + else if (fpkms[i].status == NUMERIC_HI_DATA) + { + status_str = "HIDATA"; + } + else + { + assert(false); + } + + fprintf(fout, "\t%lg\t%lg\t%lg\t%s", fpkm, fpkm_conf_lo, fpkm_conf_hi, status_str); + } + + fprintf(fout, "\n"); + } +} + +bool p_value_lt(const SampleDifference* lhs, const SampleDifference* rhs) +{ + return lhs->p_value < rhs->p_value; +} + +//// Benjamani-Hochberg procedure +//int fdr_significance(double fdr, +// vector& tests) +//{ +// sort(tests.begin(), tests.end(), p_value_lt); +// vector passing; +// +// for (int k = 0; k < (int)tests.size(); ++k) +// { +// if (tests[k]->test_status == OK) +// { +// passing.push_back(tests[k]); +// } +// else +// { +// tests[k]->significant = false; +// } +// } +// int significant = 0; +// for (int k = 0; k < (int)passing.size(); ++k) +// { +// double r = (double)passing.size() / ((double) k + 1); +// double corrected_p = passing[k]->p_value * r; +// passing[k]->corrected_p = corrected_p; +// passing[k]->significant = (corrected_p <= fdr); +// significant += passing[k]->significant; +// } +// +// return passing.size(); +//} + +// Benjamani-Hochberg procedure +int fdr_significance(double fdr, + vector& tests) +{ + sort(tests.begin(), tests.end(), p_value_lt); + vector passing; + + for (int k = 0; k < (int)tests.size(); ++k) + { + if (tests[k]->test_status == OK) + { + passing.push_back(tests[k]); + } + else + { + tests[k]->significant = false; + } + } + int significant = 0; + float pmin=1; + int n = (int) passing.size(); + //use the same procedure as p.adjust(...,"BH") in R + for (int k = n-1; k >= 0; k--) + { + double corrected_p = (double) passing[k]->p_value * ((double) n/(double) (k+1)); + //make sure that no entry with lower p-value will get higher q-value than any entry with higher p-value + if (corrected_p < pmin) + { + pmin = corrected_p; + } + else + { + corrected_p = pmin; + } + // make sure that the q-value is always <= 1 + passing[k]->corrected_p = (corrected_p < 1 ? corrected_p : 1); + passing[k]->significant = (corrected_p <= fdr); + significant += passing[k]->significant; + } + + return passing.size(); +} + + +void extract_sample_diffs(SampleDiffs& diff_map, + vector& diffs) +{ + for (SampleDiffs::iterator itr = diff_map.begin(); + itr != diff_map.end(); + ++itr) + { + diffs.push_back(&(itr->second)); + } +} + +#if ENABLE_THREADS +boost::mutex inspect_lock; +#endif + +void inspect_map_worker(ReplicatedBundleFactory& fac, + int& tmp_min_frag_len, + int& tmp_max_frag_len) +{ +#if ENABLE_THREADS + boost::this_thread::at_thread_exit(decr_pool_count); +#endif + + int min_f = std::numeric_limits::max(); + int max_f = 0; + + fac.inspect_replicate_maps(min_f, max_f); + +#if ENABLE_THREADS + inspect_lock.lock(); +#endif + tmp_min_frag_len = min(min_f, tmp_min_frag_len); + tmp_max_frag_len = max(max_f, tmp_max_frag_len); +#if ENABLE_THREADS + inspect_lock.unlock(); +#endif +} + +void learn_bias_worker(shared_ptr fac) +{ +#if ENABLE_THREADS + boost::this_thread::at_thread_exit(decr_pool_count); +#endif + shared_ptr rg_props = fac->read_group_properties(); + BiasLearner* bl = new BiasLearner(rg_props->frag_len_dist()); + learn_bias(*fac, *bl, false); + rg_props->bias_learner(shared_ptr(bl)); +} + + +shared_ptr test_launcher; + +bool quantitate_next_locus(const RefSequenceTable& rt, + vector >& bundle_factories, + shared_ptr launcher) +{ + for (size_t i = 0; i < bundle_factories.size(); ++i) + { + shared_ptr s_ab = shared_ptr(new SampleAbundances); + +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads < locus_num_threads) + { + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + + } + + locus_curr_threads++; + locus_thread_pool_lock.unlock(); + + thread quantitate(sample_worker, + boost::ref(rt), + boost::ref(*(bundle_factories[i])), + s_ab, + i, + launcher); +#else + sample_worker(boost::ref(rt), + boost::ref(*(bundle_factories[i])), + s_ab, + i, + launcher); +#endif + } + return true; +} + +void driver(FILE* ref_gtf, FILE* mask_gtf, vector& sam_hit_filename_lists, Outfiles& outfiles) +{ + + ReadTable it; + RefSequenceTable rt(true, false); + + vector > ref_mRNAs; + + vector > bundle_factories; + vector > all_read_groups; + vector > all_hit_factories; + + for (size_t i = 0; i < sam_hit_filename_lists.size(); ++i) + { + vector sam_hit_filenames; + tokenize(sam_hit_filename_lists[i], ",", sam_hit_filenames); + + vector > replicate_factories; + for (size_t j = 0; j < sam_hit_filenames.size(); ++j) + { + shared_ptr hs; + try + { + hs = shared_ptr(new BAMHitFactory(sam_hit_filenames[j], it, rt)); + } + catch (std::runtime_error& e) + { + try + { + fprintf(stderr, "File %s doesn't appear to be a valid BAM file, trying SAM...\n", + sam_hit_filename_lists[i].c_str()); + hs = shared_ptr(new SAMHitFactory(sam_hit_filenames[j], it, rt)); + } + catch (std::runtime_error& e) + { + fprintf(stderr, "Error: cannot open alignment file %s for reading\n", + sam_hit_filenames[j].c_str()); + exit(1); + } + } + + all_hit_factories.push_back(hs); + + shared_ptr hf(new BundleFactory(hs, REF_DRIVEN)); + shared_ptr rg_props(new ReadGroupProperties); + + if (global_read_properties) + { + *rg_props = *global_read_properties; + } + else + { + *rg_props = hs->read_group_properties(); + } + + all_read_groups.push_back(rg_props); + + hf->read_group_properties(rg_props); + + replicate_factories.push_back(hf); + //replicate_factories.back()->set_ref_rnas(ref_mRNAs); + } + + string condition_name = sample_labels[i]; + bundle_factories.push_back(shared_ptr(new ReplicatedBundleFactory(replicate_factories, condition_name))); + } + + ::load_ref_rnas(ref_gtf, rt, ref_mRNAs, corr_bias, false); + if (ref_mRNAs.empty()) + return; + + vector > mask_rnas; + if (mask_gtf) + { + ::load_ref_rnas(mask_gtf, rt, mask_rnas, false, false); + } + + foreach (shared_ptr fac, bundle_factories) + { + fac->set_ref_rnas(ref_mRNAs); + if (mask_gtf) + fac->set_mask_rnas(mask_rnas); + } + +#if ENABLE_THREADS + locus_num_threads = num_threads; +#endif + + int tmp_min_frag_len = numeric_limits::max(); + int tmp_max_frag_len = 0; + + ProgressBar p_bar("Inspecting maps and determining fragment length distributions.",0); + foreach (shared_ptr fac, bundle_factories) + { +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads < locus_num_threads) + { + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + } + + locus_curr_threads++; + locus_thread_pool_lock.unlock(); + + thread inspect(inspect_map_worker, + boost::ref(*fac), + boost::ref(tmp_min_frag_len), + boost::ref(tmp_max_frag_len)); +#else + inspect_map_worker(boost::ref(*fac), + boost::ref(tmp_min_frag_len), + boost::ref(tmp_max_frag_len)); +#endif + } + + // wait for the workers to finish up before reporting everthing. +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads == 0) + { + locus_thread_pool_lock.unlock(); + break; + } + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + } +#endif + + + if (use_quartile_norm) + { + long double total_mass = 0.0; + long double total_norm_mass = 0.0; + foreach (shared_ptr rg, all_read_groups) + { + total_mass += rg->total_map_mass(); + total_norm_mass += rg->normalized_map_mass(); + } + + if (total_mass > 0) + { + double scaling_factor = total_mass / total_norm_mass; + foreach (shared_ptr rg, all_read_groups) + { + double scaled_mass = scaling_factor * rg->normalized_map_mass(); + + rg->normalized_map_mass(scaled_mass); + } + } + } + + int most_reps = -1; + int most_reps_idx = 0; + + bool single_replicate_fac = false; + + for (size_t i = 0; i < bundle_factories.size(); ++i) + { + ReplicatedBundleFactory& fac = *(bundle_factories[i]); + if (fac.num_replicates() > most_reps) + { + most_reps = fac.num_replicates(); + most_reps_idx = i; + } + if (most_reps == 1) + { + single_replicate_fac = true; + } + } + + if (most_reps != 1 && poisson_dispersion == false) + { + foreach (shared_ptr fac, bundle_factories) + { + if (fac->num_replicates() == 1) + { + fac->mass_dispersion_model(bundle_factories[most_reps_idx]->mass_dispersion_model()); + } + } + } + + if (most_reps == 1 && poisson_dispersion == false) + { + vector sample_count_table; + for (size_t i = 0; i < all_read_groups.size(); ++i) + { + shared_ptr rg_props = all_read_groups[i]; + const vector& common_count_table = rg_props->common_scale_counts(); + double unscaling_factor = 1.0 / rg_props->mass_scale_factor(); + for (size_t j = 0; j < common_count_table.size(); ++j) + { + if (sample_count_table.size() == j) + { + const string& locus_id = common_count_table[j].locus_desc; + int num_transcripts = common_count_table[j].num_transcripts; + sample_count_table.push_back(LocusCountList(locus_id,all_read_groups.size(), num_transcripts)); + } + double scaled = common_count_table[j].count; + sample_count_table[j].counts[i] = scaled * unscaling_factor; + assert(sample_count_table[j].counts[i] >= 0 && !isinf(sample_count_table[j].counts[i])); + } + } + + vector scale_factors(all_read_groups.size(), 0.0); + + // TODO: needs to be refactored - similar code exists in replicates.cpp + calc_scaling_factors(sample_count_table, scale_factors); + + for (size_t i = 0; i < all_read_groups.size(); ++i) + { + shared_ptr rg_props = all_read_groups[i]; + rg_props->mass_scale_factor(scale_factors[i]); + } + + // Transform raw counts to the common scale + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + LocusCountList& p = sample_count_table[i]; + for (size_t j = 0; j < p.counts.size(); ++j) + { + assert (scale_factors.size() > j); + p.counts[j] *= (1.0 / scale_factors[j]); + } + } + + for (size_t i = 0; i < all_read_groups.size(); ++i) + { + shared_ptr rg_props = all_read_groups[i]; + vector scaled_counts; + for (size_t j = 0; j < sample_count_table.size(); ++j) + { + string& locus_id = sample_count_table[j].locus_desc; + double count = sample_count_table[j].counts[i]; + int num_transcripts = sample_count_table[j].num_transcripts; + LocusCount locus_count(locus_id, count, num_transcripts); + scaled_counts.push_back(locus_count); + } + rg_props->common_scale_counts(scaled_counts); + // revert each read group back to native scaling to avoid a systematic fold change toward the mean. + + rg_props->mass_scale_factor(1.0); + } + + shared_ptr disperser; + disperser = fit_dispersion_model("pooled", scale_factors, sample_count_table); + + foreach (shared_ptr rg_props, all_read_groups) + { + rg_props->mass_dispersion_model(disperser); + } + + } + + long double total_norm_mass = 0.0; + long double total_mass = 0.0; + foreach (shared_ptr rg_props, all_read_groups) + { + total_norm_mass += rg_props->normalized_map_mass(); + total_mass += rg_props->total_map_mass(); + } + + // scale the normalized masses so that both quantile total count normalization + // are roughly on the same numerical scale + foreach (shared_ptr rg_props, all_read_groups) + { + long double new_norm = rg_props->normalized_map_mass() * (total_mass / total_norm_mass); + rg_props->normalized_map_mass(new_norm); + } + + min_frag_len = tmp_min_frag_len; + max_frag_len = tmp_max_frag_len; + + final_est_run = false; + + double num_bundles = (double)bundle_factories[0]->num_bundles(); + + //test_launcher = shared_ptr(new TestLauncher(bundle_factories.size(), &tests, &tracking, samples_are_time_series, p_bar) + + if (corr_bias || corr_multi) // Only run initial estimation if correcting bias or multi-reads + { + if (corr_bias && corr_multi) + p_bar = ProgressBar("Calculating initial abundance estimates for bias and multi-read correction.", num_bundles); + else if (corr_bias) + p_bar = ProgressBar("Calculating initial abundance estimates for bias correction.", num_bundles); + else if (corr_multi) + p_bar = ProgressBar("Calculating initial abundance estimates for multi-read correction.", num_bundles); + + while (1) + { + //p_bar.update("",1); + test_launcher = shared_ptr(new TestLauncher((int)bundle_factories.size(), NULL, NULL, samples_are_time_series, &p_bar)); + + shared_ptr > > abundances(new vector >()); + quantitate_next_locus(rt, bundle_factories, test_launcher); + bool more_loci_remain = false; + foreach (shared_ptr rep_fac, bundle_factories) + { + if (rep_fac->bundles_remain()) + { + more_loci_remain = true; + break; + } + } + + if (!more_loci_remain) + { + // wait for the workers to finish up before breaking out. +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads == 0) + { + locus_thread_pool_lock.unlock(); + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + + } +#endif + break; + } + } + + foreach (shared_ptr rep_fac, bundle_factories) + { + rep_fac->reset(); + } + + p_bar.complete(); + } + if (corr_bias) + { + p_bar = ProgressBar("Learning bias parameters.", 0); + foreach (shared_ptr rep_fac, bundle_factories) + { + foreach (shared_ptr fac, rep_fac->factories()) + { +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads < locus_num_threads) + { + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + } + locus_curr_threads++; + locus_thread_pool_lock.unlock(); + + thread bias(learn_bias_worker, fac); +#else + learn_bias_worker(fac); +#endif + } + } + + // wait for the workers to finish up before reporting everthing. +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads == 0) + { + locus_thread_pool_lock.unlock(); + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + } +#endif + foreach (shared_ptr rep_fac, bundle_factories) + { + rep_fac->reset(); + } + } + + + + Tests tests; + + int N = (int)sam_hit_filename_lists.size(); + + tests.isoform_de_tests = vector >(N); + tests.tss_group_de_tests = vector >(N); + tests.gene_de_tests = vector >(N); + tests.cds_de_tests = vector >(N); + tests.diff_splicing_tests = vector >(N); + tests.diff_promoter_tests = vector >(N); + tests.diff_cds_tests = vector >(N); + + for (int i = 1; i < N; ++i) + { + tests.isoform_de_tests[i] = vector(i); + tests.tss_group_de_tests[i] = vector(i); + tests.gene_de_tests[i] = vector(i); + tests.cds_de_tests[i] = vector(i); + tests.diff_splicing_tests[i] = vector(i); + tests.diff_promoter_tests[i] = vector(i); + tests.diff_cds_tests[i] = vector(i); + } + + Tracking tracking; + + final_est_run = true; + p_bar = ProgressBar("Testing for differential expression and regulation in locus.", num_bundles); + + test_launcher = shared_ptr(new TestLauncher(bundle_factories.size(), &tests, &tracking, samples_are_time_series, &p_bar)); + + while (true) + { + //shared_ptr > > abundances(new vector >()); + quantitate_next_locus(rt, bundle_factories, test_launcher); + bool more_loci_remain = false; + foreach (shared_ptr rep_fac, bundle_factories) + { + if (rep_fac->bundles_remain()) + { + more_loci_remain = true; + break; + } + } + if (!more_loci_remain) + { + // wait for the workers to finish up before doing the cross-sample testing. +#if ENABLE_THREADS + while(1) + { + locus_thread_pool_lock.lock(); + if (locus_curr_threads == 0) + { + locus_thread_pool_lock.unlock(); + break; + } + + locus_thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + + } +#endif + break; + } + } + + p_bar.complete(); + + //double FDR = 0.05; + int total_iso_de_tests = 0; + + vector isoform_exp_diffs; + for (size_t i = 1; i < tests.isoform_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + total_iso_de_tests += tests.isoform_de_tests[i][j].size(); + extract_sample_diffs(tests.isoform_de_tests[i][j], isoform_exp_diffs); + } + } + int iso_exp_tests = fdr_significance(FDR, isoform_exp_diffs); + fprintf(stderr, "Performed %d isoform-level transcription difference tests\n", iso_exp_tests); + fprintf(outfiles.isoform_de_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tlog2(fold_change)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.isoform_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.isoform_de_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.isoform_de_tests[i][j]); + } + } + + int total_group_de_tests = 0; + vector tss_group_exp_diffs; + for (size_t i = 1; i < tests.tss_group_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + extract_sample_diffs(tests.tss_group_de_tests[i][j], tss_group_exp_diffs); + total_group_de_tests += tests.tss_group_de_tests[i][j].size(); + } + } + + int tss_group_exp_tests = fdr_significance(FDR, tss_group_exp_diffs); + fprintf(stderr, "Performed %d tss-level transcription difference tests\n", tss_group_exp_tests); + fprintf(outfiles.group_de_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tlog2(fold_change)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.tss_group_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.group_de_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.tss_group_de_tests[i][j]); + } + } + + int total_gene_de_tests = 0; + vector gene_exp_diffs; + for (size_t i = 1; i < tests.gene_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + total_gene_de_tests += tests.gene_de_tests[i][j].size(); + extract_sample_diffs(tests.gene_de_tests[i][j], gene_exp_diffs); + } + } + + //fprintf(stderr, "***There are %lu difference records in gene_exp_diffs\n", gene_exp_diffs.size()); + + int gene_exp_tests = fdr_significance(FDR, gene_exp_diffs); + fprintf(stderr, "Performed %d gene-level transcription difference tests\n", gene_exp_tests); + fprintf(outfiles.gene_de_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tlog2(fold_change)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.gene_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.gene_de_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.gene_de_tests[i][j]); + } + } + + int total_cds_de_tests = 0; + vector cds_exp_diffs; + for (size_t i = 1; i < tests.cds_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + total_cds_de_tests += tests.cds_de_tests[i][j].size(); + extract_sample_diffs(tests.cds_de_tests[i][j], cds_exp_diffs); + } + } + int cds_exp_tests = fdr_significance(FDR, cds_exp_diffs); + fprintf(stderr, "Performed %d CDS-level transcription difference tests\n", cds_exp_tests); + fprintf(outfiles.cds_de_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tlog2(fold_change)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.cds_de_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.cds_de_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.cds_de_tests[i][j]); + } + } + + int total_diff_splice_tests = 0; + vector splicing_diffs; + for (size_t i = 1; i < tests.diff_splicing_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + total_diff_splice_tests += tests.diff_splicing_tests[i][j].size(); + extract_sample_diffs(tests.diff_splicing_tests[i][j], splicing_diffs); + } + } + + int splicing_tests = fdr_significance(FDR, splicing_diffs); + fprintf(stderr, "Performed %d splicing tests\n", splicing_tests); + fprintf(outfiles.diff_splicing_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tsqrt(JS)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.diff_splicing_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + const SampleDiffs& diffs = tests.diff_splicing_tests[i][j]; + print_tests(outfiles.diff_splicing_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), diffs); + } + } + + int total_diff_promoter_tests = 0; + vector promoter_diffs; + for (size_t i = 1; i < tests.diff_splicing_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + total_diff_promoter_tests += tests.diff_promoter_tests[i][j].size(); + extract_sample_diffs(tests.diff_promoter_tests[i][j], promoter_diffs); + } + } + int promoter_tests = fdr_significance(FDR, promoter_diffs); + fprintf(stderr, "Performed %d promoter preference tests\n", promoter_tests); + fprintf(outfiles.diff_promoter_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tsqrt(JS)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.diff_promoter_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.diff_promoter_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.diff_promoter_tests[i][j]); + } + } + + int total_diff_cds_tests = 0; + vector cds_use_diffs; + for (size_t i = 1; i < tests.diff_cds_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + extract_sample_diffs(tests.diff_cds_tests[i][j], cds_use_diffs); + total_diff_cds_tests += tests.diff_cds_tests[i][j].size(); + } + } + int cds_use_tests = fdr_significance(FDR, cds_use_diffs); + fprintf(stderr, "Performing %d relative CDS output tests\n", cds_use_tests); + fprintf(outfiles.diff_cds_outfile, "test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tsqrt(JS)\ttest_stat\tp_value\tq_value\tsignificant\n"); + for (size_t i = 1; i < tests.diff_cds_tests.size(); ++i) + { + for (size_t j = 0; j < i; ++j) + { + print_tests(outfiles.diff_cds_outfile, sample_labels[j].c_str(), sample_labels[i].c_str(), tests.diff_cds_tests[i][j]); + } + } + + FILE* fiso_fpkm_tracking = outfiles.isoform_fpkm_tracking_out; + fprintf(stderr, "Writing isoform-level FPKM tracking\n"); + print_FPKM_tracking(fiso_fpkm_tracking,tracking.isoform_fpkm_tracking); + + FILE* ftss_fpkm_tracking = outfiles.tss_group_fpkm_tracking_out; + fprintf(stderr, "Writing TSS group-level FPKM tracking\n"); + print_FPKM_tracking(ftss_fpkm_tracking,tracking.tss_group_fpkm_tracking); + + FILE* fgene_fpkm_tracking = outfiles.gene_fpkm_tracking_out; + fprintf(stderr, "Writing gene-level FPKM tracking\n"); + print_FPKM_tracking(fgene_fpkm_tracking,tracking.gene_fpkm_tracking); + + FILE* fcds_fpkm_tracking = outfiles.cds_fpkm_tracking_out; + fprintf(stderr, "Writing CDS-level FPKM tracking\n"); + print_FPKM_tracking(fcds_fpkm_tracking,tracking.cds_fpkm_tracking); +} + +int main(int argc, char** argv) +{ + init_library_table(); + + min_isoform_fraction = 1e-5; + + int parse_ret = parse_options(argc,argv); + if (parse_ret) + return parse_ret; + + if (!use_total_mass && !use_compat_mass) + { + use_total_mass = false; + use_compat_mass = true; + } + + if(optind >= argc) + { + print_usage(); + return 1; + } + + if (!no_update_check) + check_version(PACKAGE_VERSION); + + + string ref_gtf_filename = argv[optind++]; + + vector sam_hit_filenames; + while(optind < argc) + { + string sam_hits_file_name = argv[optind++]; + sam_hit_filenames.push_back(sam_hits_file_name); + } + + while (sam_hit_filenames.size() < 2) + { + fprintf(stderr, "Error: cuffdiff requires at least 2 SAM files\n"); + exit(1); + } + + if (sample_labels.size() == 0) + { + for (size_t i = 1; i < sam_hit_filenames.size() + 1; ++i) + { + char buf[256]; + sprintf(buf, "q%lu", i); + sample_labels.push_back(buf); + } + } + + if (sam_hit_filenames.size() != sample_labels.size()) + { + fprintf(stderr, "Error: number of labels must match number of conditions\n"); + exit(1); + } + + if (random_seed == -1) + random_seed = time(NULL); + + // seed the random number generator - we'll need it for the importance + // sampling during MAP estimation of the gammas + srand48(random_seed); + + FILE* ref_gtf = NULL; + if (ref_gtf_filename != "") + { + ref_gtf = fopen(ref_gtf_filename.c_str(), "r"); + if (!ref_gtf) + { + fprintf(stderr, "Error: cannot open reference GTF file %s for reading\n", + ref_gtf_filename.c_str()); + exit(1); + } + } + + FILE* mask_gtf = NULL; + if (mask_gtf_filename != "") + { + mask_gtf = fopen(mask_gtf_filename.c_str(), "r"); + if (!mask_gtf) + { + fprintf(stderr, "Error: cannot open mask GTF file %s for reading\n", + mask_gtf_filename.c_str()); + exit(1); + } + } + + + + // Note: we don't want the assembly filters interfering with calculations + // here + + pre_mrna_fraction = 0.0; + olap_radius = 0; + + Outfiles outfiles; + + if (output_dir != "") + { + int retcode = mkpath(output_dir.c_str(), 0777); + if (retcode == -1) + { + if (errno != EEXIST) + { + fprintf (stderr, + "Error: cannot create directory %s\n", + output_dir.c_str()); + exit(1); + } + } + } + + static const int filename_buf_size = 2048; + + char out_file_prefix[filename_buf_size]; + sprintf(out_file_prefix, "%s/", output_dir.c_str()); + char iso_out_file_name[filename_buf_size]; + sprintf(iso_out_file_name, "%sisoform_exp.diff", out_file_prefix); + FILE* iso_out = fopen(iso_out_file_name, "w"); + if (!iso_out) + { + fprintf(stderr, "Error: cannot open differential isoform transcription file %s for writing\n", + iso_out_file_name); + exit(1); + } + + char group_out_file_name[filename_buf_size]; + sprintf(group_out_file_name, "%stss_group_exp.diff", out_file_prefix); + FILE* group_out = fopen(group_out_file_name, "w"); + if (!group_out) + { + fprintf(stderr, "Error: cannot open differential TSS group transcription file %s for writing\n", + group_out_file_name); + exit(1); + } + + char gene_out_file_name[filename_buf_size]; + sprintf(gene_out_file_name, "%sgene_exp.diff", out_file_prefix); + FILE* gene_out = fopen(gene_out_file_name, "w"); + if (!group_out) + { + fprintf(stderr, "Error: cannot open gene expression file %s for writing\n", + gene_out_file_name); + exit(1); + } + + char cds_out_file_name[filename_buf_size]; + sprintf(cds_out_file_name, "%scds_exp.diff", out_file_prefix); + FILE* cds_out = fopen(cds_out_file_name, "w"); + if (!cds_out) + { + fprintf(stderr, "Error: cannot open cds expression file %s for writing\n", + cds_out_file_name); + exit(1); + } + + char diff_splicing_out_file_name[filename_buf_size]; + sprintf(diff_splicing_out_file_name, "%ssplicing.diff", out_file_prefix); + FILE* diff_splicing_out = fopen(diff_splicing_out_file_name, "w"); + if (!diff_splicing_out) + { + fprintf(stderr, "Error: cannot open differential splicing file %s for writing\n", + diff_splicing_out_file_name); + exit(1); + } + + char diff_promoter_out_file_name[filename_buf_size]; + sprintf(diff_promoter_out_file_name, "%spromoters.diff", out_file_prefix); + FILE* diff_promoter_out = fopen(diff_promoter_out_file_name, "w"); + if (!diff_promoter_out) + { + fprintf(stderr, "Error: cannot open differential transcription start file %s for writing\n", + diff_promoter_out_file_name); + exit(1); + } + + char diff_cds_out_file_name[filename_buf_size]; + sprintf(diff_cds_out_file_name, "%scds.diff", out_file_prefix); + FILE* diff_cds_out = fopen(diff_cds_out_file_name, "w"); + if (!diff_cds_out) + { + fprintf(stderr, "Error: cannot open differential relative CDS file %s for writing\n", + diff_cds_out_file_name); + exit(1); + } + + outfiles.isoform_de_outfile = iso_out; + outfiles.group_de_outfile = group_out; + outfiles.gene_de_outfile = gene_out; + outfiles.cds_de_outfile = cds_out; + outfiles.diff_splicing_outfile = diff_splicing_out; + outfiles.diff_promoter_outfile = diff_promoter_out; + outfiles.diff_cds_outfile = diff_cds_out; + + char isoform_fpkm_tracking_name[filename_buf_size]; + sprintf(isoform_fpkm_tracking_name, "%s/isoforms.fpkm_tracking", output_dir.c_str()); + FILE* isoform_fpkm_out = fopen(isoform_fpkm_tracking_name, "w"); + if (!isoform_fpkm_out) + { + fprintf(stderr, "Error: cannot open isoform-level FPKM tracking file %s for writing\n", + isoform_fpkm_tracking_name); + exit(1); + } + outfiles.isoform_fpkm_tracking_out = isoform_fpkm_out; + + char tss_group_fpkm_tracking_name[filename_buf_size]; + sprintf(tss_group_fpkm_tracking_name, "%s/tss_groups.fpkm_tracking", output_dir.c_str()); + FILE* tss_group_fpkm_out = fopen(tss_group_fpkm_tracking_name, "w"); + if (!tss_group_fpkm_out) + { + fprintf(stderr, "Error: cannot open TSS group-level FPKM tracking file %s for writing\n", + tss_group_fpkm_tracking_name); + exit(1); + } + outfiles.tss_group_fpkm_tracking_out = tss_group_fpkm_out; + + char cds_fpkm_tracking_name[filename_buf_size]; + sprintf(cds_fpkm_tracking_name, "%s/cds.fpkm_tracking", output_dir.c_str()); + FILE* cds_fpkm_out = fopen(cds_fpkm_tracking_name, "w"); + if (!cds_fpkm_out) + { + fprintf(stderr, "Error: cannot open CDS level FPKM tracking file %s for writing\n", + cds_fpkm_tracking_name); + exit(1); + } + outfiles.cds_fpkm_tracking_out = cds_fpkm_out; + + char gene_fpkm_tracking_name[filename_buf_size]; + sprintf(gene_fpkm_tracking_name, "%s/genes.fpkm_tracking", output_dir.c_str()); + FILE* gene_fpkm_out = fopen(gene_fpkm_tracking_name, "w"); + if (!gene_fpkm_out) + { + fprintf(stderr, "Error: cannot open gene-level FPKM tracking file %s for writing\n", + gene_fpkm_tracking_name); + exit(1); + } + outfiles.gene_fpkm_tracking_out = gene_fpkm_out; + + driver(ref_gtf, mask_gtf, sam_hit_filenames, outfiles); + +#if 0 + if (emit_count_tables) + { + dump_locus_variance_info(output_dir + string("/locus_var.txt")); + } +#endif + + return 0; +} + diff --git a/src/cufflinks.cpp b/src/cufflinks.cpp new file mode 100644 index 0000000..796af98 --- /dev/null +++ b/src/cufflinks.cpp @@ -0,0 +1,1711 @@ +/* + * cufflinks.cpp + * Cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#else +#define PACKAGE_VERSION "INTERNAL" +#endif + +#include +#include +#include + +#include "common.h" +#include "hits.h" + +#include + +#include "update_check.h" +#include "clustering.h" +#include "abundances.h" +#include "bundles.h" +#include "filters.h" +#include "genes.h" +#include "assemble.h" +#include "biascorrection.h" +#include "multireads.h" + +using namespace std; + +#if ENABLE_THREADS +const char *short_options = "m:p:s:F:I:j:Q:L:G:g:o:M:b:a:A:Nqvu"; +#else +const char *short_options = "m:s:F:I:j:Q:L:G:g:o:M:b:a:A:Nqvu"; +#endif + +static struct option long_options[] = { +// general options +{"GTF", required_argument, 0, 'G'}, +{"GTF-guide", required_argument, 0, 'g'}, +{"mask-gtf", required_argument, 0, 'M'}, +{"library-type", required_argument, 0, OPT_LIBRARY_TYPE}, +{"seed", required_argument, 0, OPT_RANDOM_SEED}, + +// program behavior +{"output-dir", required_argument, 0, 'o'}, +{"verbose", no_argument, 0, 'v'}, +{"quiet", no_argument, 0, 'q'}, +{"no-update-check", no_argument, 0, OPT_NO_UPDATE_CHECK}, +#if ENABLE_THREADS + {"num-threads", required_argument, 0, 'p'}, +#endif +{"output-fld", no_argument, 0, OPT_OUTPUT_FLD}, +{"output-bias-params", no_argument, 0, OPT_OUTPUT_BIAS_PARAMS}, + +// abundance estimation +{"frag-len-mean", required_argument, 0, 'm'}, +{"frag-len-std-dev", required_argument, 0, 's'}, +{"min-isoform-fraction", required_argument, 0, 'F'}, +{"upper-quartile-normalization", no_argument, 0, 'N'}, +{"frag-bias-correct", required_argument, 0, 'b'}, +{"multi-read-correct", no_argument, 0, 'u'}, + +{"num-importance-samples", required_argument, 0, OPT_NUM_IMP_SAMPLES}, +{"max-mle-iterations", required_argument, 0, OPT_MLE_MAX_ITER}, +{"bias-mode", required_argument, 0, OPT_BIAS_MODE}, +{"use-grad-ascent", no_argument, 0, OPT_USE_EM}, +{"no-collapse-cond-prob", no_argument, 0, OPT_COLLAPSE_COND_PROB}, +{"compatible-hits-norm", no_argument, 0, OPT_USE_COMPAT_MASS}, +{"total-hits-norm", no_argument, 0, OPT_USE_TOTAL_MASS}, + +// assembly +{"pre-mrna-fraction", required_argument, 0, 'j'}, +{"junc-alpha", required_argument, 0, 'a'}, +{"small-anchor-fraction", required_argument, 0, 'A'}, +{"max-intron-length", required_argument, 0, 'I'}, +{"label", required_argument, 0, 'L'}, +{"overhang-tolerance", required_argument, 0, OPT_OVERHANG_TOLERANCE}, +{"min-frags-per-transfrag",required_argument, 0, OPT_MIN_FRAGS_PER_TRANSFRAG}, +{"min-intron-length", required_argument, 0, OPT_MIN_INTRON_LENGTH}, +{"max-bundle-length", required_argument, 0, OPT_MAX_BUNDLE_LENGTH}, +{"trim-3-dropoff-frac", required_argument, 0, OPT_3_PRIME_AVGCOV_THRESH}, +{"trim-3-avgcov-thresh", required_argument, 0, OPT_3_PRIME_AVGCOV_THRESH}, + +{"3-overhang-tolerance", required_argument, 0, OPT_3_OVERHANG_TOLERANCE}, +{"intron-overhang-tolerance", required_argument, 0, OPT_INTRON_OVERHANG_TOLERANCE}, +{"no-faux-reads", no_argument, 0, OPT_NO_FAUX_READS}, +{"no-5-extend", no_argument, 0, OPT_NO_5_EXTEND}, +{"tile-read-len", required_argument, 0, OPT_TILE_LEN}, +{"tile-read-sep", required_argument, 0, OPT_TILE_SEP}, + +{"max-bundle-frags", required_argument, 0, OPT_MAX_FRAGS_PER_BUNDLE}, +{0, 0, 0, 0} // terminator +}; + +void print_usage() +{ + //NOTE: SPACES ONLY, bozo + fprintf(stderr, "cufflinks v%s\n", PACKAGE_VERSION); + fprintf(stderr, "linked against Boost version %d\n", BOOST_VERSION); + fprintf(stderr, "-----------------------------\n"); + fprintf(stderr, "Usage: cufflinks [options] \n"); + fprintf(stderr, "General Options:\n"); + fprintf(stderr, " -o/--output-dir write all output files to this directory [ default: ./ ]\n"); +#if ENABLE_THREADS + fprintf(stderr, " -p/--num-threads number of threads used during analysis [ default: 1 ]\n"); +#endif + fprintf(stderr, " --seed value of random number generator seed [ default: 0 ]\n"); + fprintf(stderr, " -G/--GTF quantitate against reference transcript annotations \n"); + fprintf(stderr, " -g/--GTF-guide use reference transcript annotation to guide assembly \n"); + fprintf(stderr, " -M/--mask-file ignore all alignment within transcripts in this file \n"); + fprintf(stderr, " -b/--frag-bias-correct use bias correction - reference fasta required [ default: NULL ]\n"); + fprintf(stderr, " -u/--multi-read-correct use 'rescue method' for multi-reads (more accurate) [ default: FALSE ]\n"); + fprintf(stderr, " --library-type library prep used for input reads [ default: below ]\n"); + + fprintf(stderr, "\nAdvanced Abundance Estimation Options:\n"); + fprintf(stderr, " -m/--frag-len-mean average fragment length (unpaired reads only) [ default: 200 ]\n"); + fprintf(stderr, " -s/--frag-len-std-dev fragment length std deviation (unpaired reads only) [ default: 80 ]\n"); + fprintf(stderr, " --upper-quartile-norm use upper-quartile normalization [ default: FALSE ]\n"); + fprintf(stderr, " --max-mle-iterations maximum iterations allowed for MLE calculation [ default: 5000 ]\n"); + fprintf(stderr, " --num-importance-samples number of importance samples for MAP restimation [ default: 1000 ]\n"); + fprintf(stderr, " --compatible-hits-norm count hits compatible with reference RNAs only [ default: FALSE ]\n"); + fprintf(stderr, " --total-hits-norm count all hits for normalization [ default: TRUE ]\n"); + + fprintf(stderr, "\nAdvanced Assembly Options:\n"); + fprintf(stderr, " -L/--label assembled transcripts have this ID prefix [ default: CUFF ]\n"); + fprintf(stderr, " -F/--min-isoform-fraction suppress transcripts below this abundance level [ default: 0.10 ]\n"); + fprintf(stderr, " -j/--pre-mrna-fraction suppress intra-intronic transcripts below this level [ default: 0.15 ]\n"); + fprintf(stderr, " -I/--max-intron-length ignore alignments with gaps longer than this [ default: 300000 ]\n"); + fprintf(stderr, " -a/--junc-alpha alpha for junction binomial test filter [ default: 0.001 ]\n"); + fprintf(stderr, " -A/--small-anchor-fraction percent read overhang taken as 'suspiciously small' [ default: 0.09 ]\n"); + fprintf(stderr, " --min-frags-per-transfrag minimum number of fragments needed for new transfrags [ default: 10 ]\n"); + fprintf(stderr, " --overhang-tolerance number of terminal exon bp to tolerate in introns [ default: 8 ]\n"); + fprintf(stderr, " --max-bundle-length maximum genomic length allowed for a given bundle [ default:3500000 ]\n"); + fprintf(stderr, " --max-bundle-frags maximum fragments allowed in a bundle before skipping [ default: 500000 ]\n"); + fprintf(stderr, " --min-intron-length minimum intron size allowed in genome [ default: 50 ]\n"); + fprintf(stderr, " --trim-3-avgcov-thresh minimum avg coverage required to attempt 3' trimming [ default: 10 ]\n"); + fprintf(stderr, " --trim-3-dropoff-frac fraction of avg coverage below which to trim 3' end [ default: 0.1 ]\n"); + + fprintf(stderr, "\nAdvanced Reference Annotation Guided Assembly Options:\n"); +// fprintf(stderr, " --tile-read-len length of faux-reads [ default: 405 ]\n"); +// fprintf(stderr, " --tile-read-sep distance between faux-reads [ default: 15 ]\n"); + fprintf(stderr, " --no-faux-reads disable tiling by faux reads [ default: FALSE ]\n"); + fprintf(stderr, " --3-overhang-tolerance overhang allowed on 3' end when merging with reference[ default: 600 ]\n"); + fprintf(stderr, " --intron-overhang-tolerance overhang allowed inside reference intron when merging [ default: 30 ]\n"); + + fprintf(stderr, "\nAdvanced Program Behavior Options:\n"); + fprintf(stderr, " -v/--verbose log-friendly verbose processing (no progress bar) [ default: FALSE ]\n"); + fprintf(stderr, " -q/--quiet log-friendly quiet processing (no progress bar) [ default: FALSE ]\n"); + fprintf(stderr, " --no-update-check do not contact server to check for update availability[ default: FALSE ]\n"); + print_library_table(); +} + +int parse_options(int argc, char** argv) +{ + int option_index = 0; + int next_option; + bool F_set = false; + + do { + next_option = getopt_long(argc, argv, short_options, long_options, &option_index); + switch (next_option) { + case -1: /* Done with options. */ + break; + case 'm': + user_provided_fld = true; + def_frag_len_mean = (uint32_t)parseInt(0, "-m/--frag-len-mean arg must be at least 0", print_usage); + break; + case 's': + user_provided_fld = true; + def_frag_len_std_dev = (uint32_t)parseInt(0, "-s/--frag-len-std-dev arg must be at least 0", print_usage); + break; + case 'p': + num_threads = (uint32_t)parseInt(1, "-p/--num-threads arg must be at least 1", print_usage); + break; + case 'F': + min_isoform_fraction = parseFloat(0, 1.0, "-F/--min-isoform-fraction must be between 0 and 1.0", print_usage); + F_set = true; + break; + case 'I': + max_intron_length = parseInt(1, "-I/--max-intron-length must be at least 1", print_usage); + break; + case 'j': + pre_mrna_fraction = parseFloat(0, 1.0, "-I/--pre-mrna-fraction must be at least 0", print_usage); + break; + + case 'a': + binomial_junc_filter_alpha = parseFloat(0, 1.0, "-a/--junc-alpha must be between 0 and 1.0", print_usage); + break; + case 'A': + small_anchor_fraction = parseFloat(0, 1.0, "-A/--small-anchor-fraction must be between 0 and 1.0", print_usage); + break; + case OPT_OVERHANG_TOLERANCE: + bowtie_overhang_tolerance = parseInt(0, "--overhang-tolerance must be at least 0", print_usage); + break; + case OPT_NUM_IMP_SAMPLES: + num_importance_samples = parseInt(1, "--num-importance-samples must be at least 1", print_usage); + break; + case OPT_MLE_MAX_ITER: + max_mle_iterations = parseInt(1, "--max-mle-iterations must be at least 1", print_usage); + break; + case OPT_BIAS_MODE: + if (!strcmp(optarg, "site")) + bias_mode = SITE; + else if (!strcmp(optarg, "pos")) + bias_mode = POS; + else if (!strcmp(optarg, "pos_vlmm")) + bias_mode = POS_VLMM; + else if (!strcmp(optarg, "vlmm")) + bias_mode = VLMM; + else if (!strcmp(optarg, "pos_site")) + bias_mode = POS_SITE; + else + { + fprintf(stderr, "Unknown bias mode.\n"); + exit(1); + } + break; + case 'L': + { + user_label = optarg; + break; + } + case 'G': + { + ref_gtf_filename = optarg; + bundle_mode = REF_DRIVEN; + init_bundle_mode = REF_DRIVEN; + break; + } + case 'g': + { + ref_gtf_filename = optarg; + bundle_mode = REF_GUIDED; + init_bundle_mode = REF_GUIDED; + break; + } + case 'M': + { + mask_gtf_filename = optarg; + break; + } + case 'v': + { + if (cuff_quiet) + { + fprintf(stderr, "Warning: Can't be both verbose and quiet! Setting verbose only.\n"); + } + cuff_quiet = false; + cuff_verbose = true; + break; + } + case 'q': + { + if (cuff_verbose) + { + fprintf(stderr, "Warning: Can't be both verbose and quiet! Setting quiet only.\n"); + } + cuff_verbose = false; + cuff_quiet = true; + break; + } + case 'N': + { + use_quartile_norm = true; + break; + } + + case 'o': + { + output_dir = optarg; + break; + } + case 'b': + { + fasta_dir = optarg; + corr_bias = true; + break; + } + case 'u': + { + corr_multi = true; + break; + } + case OPT_LIBRARY_TYPE: + { + library_type = optarg; + break; + } + case OPT_MAX_BUNDLE_LENGTH: + { + max_gene_length = parseInt(1, "--max-bundle-length must be at least 1", print_usage);; + break; + } + case OPT_MIN_FRAGS_PER_TRANSFRAG: + { + min_frags_per_transfrag = parseInt(0, "--min-frags-per-transfrag must be at least 0", print_usage);; + break; + } + case OPT_MIN_INTRON_LENGTH: + { + min_intron_length = parseInt(0, "--min-intron-length must be at least 0", print_usage); + break; + } + case OPT_3_PRIME_AVGCOV_THRESH: + { + trim_3_avgcov_thresh = parseFloat(0, 9999999, "--trim-3-avgcov-thresh must be at least 0", print_usage); + break; + } + case OPT_3_PRIME_DROPOFF_FRAC: + { + trim_3_dropoff_frac = parseFloat(0, 1.0, "--trim-3-dropoff-frac must be between 0 and 1.0", print_usage); + break; + } + case OPT_NO_UPDATE_CHECK: + { + no_update_check = true; + break; + } + case OPT_OUTPUT_FLD: + { + output_fld = true; + break; + } + case OPT_OUTPUT_BIAS_PARAMS: + { + output_bias_params = true; + break; + } + case OPT_USE_EM: + { + use_em = false; + break; + } + case OPT_COLLAPSE_COND_PROB: + { + cond_prob_collapse = false; + break; + } + case OPT_NO_FAUX_READS: + { + enable_faux_reads = false; + break; + } + case OPT_NO_5_EXTEND: + { + enable_5_extend = false; + break; + } + case OPT_3_OVERHANG_TOLERANCE: + { + overhang_3 = parseInt(0, "--3-overhang-tolernace must be at least 0", print_usage); + break; + } + case OPT_TILE_LEN: + { + tile_len = parseInt(0, "--tile-read-len must be at least 0", print_usage); + break; + } + case OPT_TILE_SEP: + { + tile_off = parseInt(0, "--tile-read-sep must be at least 0", print_usage); + break; + } + case OPT_INTRON_OVERHANG_TOLERANCE: + { + ref_merge_overhang_tolerance = parseInt(0, "--intron-overhang-tolerance must be at least 0", print_usage); + break; + } + case OPT_RANDOM_SEED: + { + random_seed = parseInt(0, "--seed must be at least 0", print_usage); + break; + } + case OPT_USE_COMPAT_MASS: + { + use_compat_mass = true; + break; + } + case OPT_USE_TOTAL_MASS: + { + use_total_mass = true; + break; + } + case OPT_MAX_FRAGS_PER_BUNDLE: + { + max_frags_per_bundle = parseInt(0, "--max-bundle-frags must be at least 0", print_usage); + break; + } + default: + print_usage(); + return 1; + } + } while(next_option != -1); + + + if (bundle_mode == REF_DRIVEN) + { + if (!F_set) + { + min_isoform_fraction = 0.0; + } + } + + if (bundle_mode == REF_DRIVEN) + { + allow_junk_filtering = false; + } + + if (library_type != "") + { + map::iterator lib_itr = + library_type_table.find(library_type); + if (lib_itr == library_type_table.end()) + { + fprintf(stderr, "Error: Library type %s not supported\n", library_type.c_str()); + exit(1); + } + else + { +// if (library_type == "transfrags") +// { +// allow_junk_filtering = false; +// } + global_read_properties = &lib_itr->second; + } + } + + if (use_total_mass && use_compat_mass) + { + fprintf (stderr, "Error: please supply only one of --compatibile-hits-norm and --total-hits-norm\n"); + exit(1); + } + if (use_compat_mass && bundle_mode != REF_DRIVEN) + { + fprintf (stderr, "Error: cannot use --compatible-hits-norm without --GTF\n"); + exit(1); + } + + return 0; +} + +void combine_strand_assemblies(vector& lhs, + vector& rhs, + vector& scaffolds, + vector >* ref_scaffs) +{ + // first check for strand support + for (size_t l = 0; l < lhs.size(); ++l) + { + if (!lhs[l].has_strand_support(ref_scaffs)) + lhs[l].strand(CUFF_STRAND_UNKNOWN); + } + for (size_t r = 0; r < rhs.size(); ++r) + { + if (!rhs[r].has_strand_support(ref_scaffs)) + rhs[r].strand(CUFF_STRAND_UNKNOWN); + } + + vector kept_lhs(lhs.size(), true); + vector kept_rhs(rhs.size(), true); + + // next filter both lists based on reference transcripts (if available) + if (ref_scaffs != NULL) + { + for(size_t l = 0; l < lhs.size(); ++l) + { + foreach(shared_ptr ref_scaff, *ref_scaffs) + { + // if we're past all the overlaps, just stop + if (ref_scaff->left() >= lhs[l].right() + overhang_3) + { + //break; + } + // don't emit assembled transfrags that are contained within reference ones + else if (ref_scaff->contains(lhs[l], 0, overhang_3) && Scaffold::compatible(*ref_scaff, lhs[l], ref_merge_overhang_tolerance)) + { + kept_lhs[l] = false; + } + // if they're compatible but not equal, let's check a few more criteria before + // we decide to emit the assembled guy + else if (ref_scaff->overlapped_3(lhs[l], 0, overhang_3) && Scaffold::compatible(*ref_scaff, lhs[l], ref_merge_overhang_tolerance)) + { + if (ref_scaff->gaps() == lhs[l].gaps()) + { + kept_lhs[l] = false; + } + else + { +// if (enable_5_extend) +// { +// ref_scaff->extend_5(lhs[l]); +// kept_lhs[l] = false; +// } + } + } + } + } + for(size_t r = 0; r < rhs.size(); ++r) + { + foreach(shared_ptr ref_scaff, *ref_scaffs) + { + if (ref_scaff->left() >= rhs[r].right() + overhang_3) + { + //break; + } + else if (ref_scaff->contains(rhs[r], 0, overhang_3) && Scaffold::compatible(*ref_scaff, rhs[r], ref_merge_overhang_tolerance)) + { + kept_rhs[r] = false; + } + else if (ref_scaff->overlapped_3(rhs[r], 0, overhang_3) && Scaffold::compatible(*ref_scaff, rhs[r], ref_merge_overhang_tolerance)) + { + if (ref_scaff->gaps() == rhs[r].gaps()) + { + kept_rhs[r] = false; + } + else + { +// if (enable_5_extend) +// { +// ref_scaff->extend_5(rhs[r]); +// kept_rhs[r] = false; +// } + } + } + } + } + } + + // We want to keep all fwd, all reverse, and only the non-redundant unknowns + // if two unknown strand frags olap, merge them. + for (size_t l = 0; l < lhs.size(); ++l) + { + if (!kept_lhs[l]) + continue; + bool lhs_support = (lhs[l].strand() != CUFF_STRAND_UNKNOWN); + + for (size_t r = 0; r < rhs.size(); ++r) + { + if (!kept_rhs[r]) + continue; + if (Scaffold::overlap_in_genome(lhs[l], rhs[r], 0)) + { + if (Scaffold::compatible(lhs[l], rhs[r])) + { + bool rhs_support = (rhs[r].strand() != CUFF_STRAND_UNKNOWN); + if (!lhs_support && !rhs_support) + { + Scaffold merged; + Scaffold::merge(lhs[l],rhs[r],merged, true); + scaffolds.push_back(merged); + kept_lhs[l] = false; + kept_rhs[r] = false; + break; + } + else if (lhs_support && !rhs_support) + { + kept_rhs[r] = false; + } + else if (!lhs_support && rhs_support) + { + kept_lhs[l] = false; + break; + } + } + } + } + } + + // first trim off any polymerase run-ons, and make 3' ends consistent + clip_by_3_prime_dropoff(lhs); + clip_by_3_prime_dropoff(rhs); + + for (size_t i = 0; i < lhs.size(); ++i) + { + if (kept_lhs[i]) + scaffolds.push_back(lhs[i]); + } + + for (size_t i = 0; i < rhs.size(); ++i) + { + if (kept_rhs[i]) + scaffolds.push_back(rhs[i]); + } +} + +void guess_strand(int bundle_origin, + const vector& hits, + vector& strand_guess) +{ + + for (size_t i = 0; i < hits.size(); ++i) + { + if (hits[i].strand() == CUFF_STRAND_UNKNOWN) + continue; + + for (int K = hits[i].left(); K < hits[i].right(); ++K) + strand_guess[K - bundle_origin] |= hits[i].strand(); + + } +} + +CuffStrand guess_strand_for_interval(const vector& strand_guess, + int left, + int right) +{ + uint8_t guess = CUFF_STRAND_UNKNOWN; + + for (int i = left; i < right; ++i) + { + if (guess == CUFF_BOTH) + return (CuffStrand)guess; + guess |= strand_guess[i]; + } + return (CuffStrand)guess; +} + + +bool scaffolds_for_bundle(const HitBundle& bundle, + vector >& scaffolds, + vector >* ref_scaffs = NULL, + BundleStats* stats = NULL) +{ + if (bundle.hits().size() >= max_frags_per_bundle) + return false; + + bool ref_guided = (ref_scaffs != NULL); + + vector hits; + vector tmp_scaffs; + + for (size_t i = 0; i < bundle.hits().size(); ++i) + { + const MateHit& hit = bundle.hits()[i]; + hits.push_back(Scaffold(hit)); + } + + vector depth_of_coverage(bundle.length(),0); + vector scaff_doc; + map, float> intron_doc; + + // Make sure the avg only uses stuff we're sure isn't pre-mrna fragments + double bundle_avg_doc = compute_doc(bundle.left(), + hits, + depth_of_coverage, + intron_doc, + true); + + if (bundle_avg_doc > 3000) + { + filter_introns(bundle.length(), + bundle.left(), + hits, + min_isoform_fraction, + false, + true); + } + + if (ref_guided && enable_faux_reads && !hits.empty()) + { + vector pseudohits; + foreach(shared_ptr ref_scaff, *ref_scaffs) + { + ref_scaff->tile_with_scaffs(pseudohits, tile_len, tile_off); + } + hits.insert(hits.end(), + pseudohits.begin(), + pseudohits.end()); + inplace_merge(hits.begin(),hits.end()-pseudohits.size(), hits.end(), scaff_lt); + } + + vector strand_guess(bundle.length(), CUFF_STRAND_UNKNOWN); + guess_strand(bundle.left(), + hits, + strand_guess); + + for (size_t i = 0; i < hits.size(); ++i) + { + if (hits[i].strand() == CUFF_STRAND_UNKNOWN) + { + assert (!hits[i].has_intron()); + uint8_t guess = CUFF_STRAND_UNKNOWN; + Scaffold& hit = hits[i]; + const vector& ops = hit.augmented_ops(); + + for (size_t j = 0; j < ops.size(); ++j) + { + const AugmentedCuffOp& op = ops[j]; + if (op.opcode == CUFF_UNKNOWN && op.genomic_length > (int)min_intron_length) + { + guess |= guess_strand_for_interval(strand_guess, + hit.left() - bundle.left(), + hit.right() - bundle.left()); + + break; + } + } + + + if (guess != CUFF_BOTH && guess != CUFF_STRAND_UNKNOWN) + hits[i].strand((CuffStrand)guess); + //else + // fprintf(stderr, "Unknown strand for pair [%d-%d]\n", hit.left(), hit.right()); + } + } + + bool saw_fwd = false; + bool saw_rev = false; + + for (size_t i = 0; i < hits.size(); ++i) + { + const Scaffold& hit = hits[i]; + CuffStrand hs = hit.strand(); + + if (hs == CUFF_FWD) + saw_fwd = true; + if (hs == CUFF_REV) + saw_rev = true; + +// if (hs != CUFF_REV) +// fwd_hits.push_back(hit); +// if (hs != CUFF_FWD) +// rev_hits.push_back(hit); + } + + + vector fwd_scaffolds; + vector rev_scaffolds; + + bool assembled_successfully = false; + + if (saw_fwd && saw_rev) + { + // Forward strand hits + { + vector fwd_hits; + for (size_t i = 0; i < hits.size(); ++i) + { + const Scaffold& hit = hits[i]; + CuffStrand hs = hit.strand(); + if (hs != CUFF_REV) + fwd_hits.push_back(hit); + } + + verbose_msg ("%s\tFiltering forward strand\n", bundle_label->c_str()); + filter_hits(bundle.length(), bundle.left(), fwd_hits); + assembled_successfully |= make_scaffolds(bundle.left(), + bundle.length(), + fwd_hits, + fwd_scaffolds); + } + + // Reverse strand hits + { + vector rev_hits; + for (size_t i = 0; i < hits.size(); ++i) + { + const Scaffold& hit = hits[i]; + CuffStrand hs = hit.strand(); + if (hs != CUFF_FWD) + rev_hits.push_back(hit); + } + + verbose_msg ("%s\tFiltering reverse strand\n", bundle_label->c_str()); + filter_hits(bundle.length(), bundle.left(), rev_hits); + assembled_successfully |= make_scaffolds(bundle.left(), + bundle.length(), + rev_hits, + rev_scaffolds); + } + } + else + { + if (saw_fwd || (!saw_fwd && !saw_rev)) + { + // Forward strand hits + { + vector fwd_hits; + for (size_t i = 0; i < hits.size(); ++i) + { + const Scaffold& hit = hits[i]; + CuffStrand hs = hit.strand(); + if (hs != CUFF_REV) + fwd_hits.push_back(hit); + } + + verbose_msg ("%s\tFiltering forward strand\n", bundle_label->c_str()); + filter_hits(bundle.length(), bundle.left(), fwd_hits); + assembled_successfully |= make_scaffolds(bundle.left(), + bundle.length(), + fwd_hits, + fwd_scaffolds); + + } + } + else + { + // Reverse strand hits + { + vector rev_hits; + for (size_t i = 0; i < hits.size(); ++i) + { + const Scaffold& hit = hits[i]; + CuffStrand hs = hit.strand(); + if (hs != CUFF_FWD) + rev_hits.push_back(hit); + } + + verbose_msg ("%s\tFiltering reverse strand\n", bundle_label->c_str()); + filter_hits(bundle.length(), bundle.left(), rev_hits); + assembled_successfully |= make_scaffolds(bundle.left(), + bundle.length(), + rev_hits, + rev_scaffolds); + } + } + } + + combine_strand_assemblies(fwd_scaffolds, rev_scaffolds, tmp_scaffs, ref_scaffs); + + + // Make sure all the reads are accounted for, including the redundant ones... + for (size_t i = 0; i < tmp_scaffs.size(); ++i) + { + tmp_scaffs[i].clear_hits(); + for (size_t j = 0; j < bundle.hits().size(); ++j) + { + const MateHit& h = bundle.hits()[j]; + tmp_scaffs[i].add_hit(&h); + } + } + + if (ref_guided) + { + scaffolds = *ref_scaffs; + } + if (assembled_successfully) + { + foreach(Scaffold& scaff, tmp_scaffs) + { + scaffolds.push_back(shared_ptr(new Scaffold(scaff))); + } + } + sort(scaffolds.begin(), scaffolds.end(), scaff_lt_sp); + + return assembled_successfully; +} + +//static long double min_abundance = 0.000001; + +#if ENABLE_THREADS +boost::mutex out_file_lock; +boost::mutex thread_pool_lock; +int curr_threads = 0; + +void decr_pool_count() +{ + thread_pool_lock.lock(); + curr_threads--; + thread_pool_lock.unlock(); +} +#endif + +void quantitate_transcript_cluster(AbundanceGroup& transfrag_cluster, + //const RefSequenceTable& rt, + double total_map_mass, + vector& genes, + bool bundle_too_large) +{ + if (transfrag_cluster.abundances().empty()) + return; + + vector gammas; + + vector hits_in_cluster; + + get_alignments_from_scaffolds(transfrag_cluster.abundances(), + hits_in_cluster); + + + // need the avg read length for depth of coverage calculation + double avg_read_length = 0; + foreach (MateHit& hit, hits_in_cluster) + { + if (hit.left_alignment()) + avg_read_length += hit.left_alignment()->read_len(); + if (hit.right_alignment()) + avg_read_length += hit.right_alignment()->read_len(); + } + + if (hits_in_cluster.size()) + avg_read_length /= hits_in_cluster.size(); + + if (library_type != "transfrags") + { + if (bundle_too_large == false) + { + transfrag_cluster.calculate_abundance(hits_in_cluster); + } + else + { + foreach(shared_ptr ab, transfrag_cluster.abundances()) + { + ab->status(NUMERIC_HI_DATA); + } + } + } + else + { + vector >& abundances = transfrag_cluster.abundances(); + + int N = abundances.size(); + double total_fpkm = 0.0; + vector gammas; + for (size_t j = 0; j < N; ++j) + { + double FPKM = abundances[j]->transfrag()->fpkm(); + abundances[j]->FPKM(FPKM); + total_fpkm += FPKM; + gammas.push_back(FPKM); + } + + for (size_t j = 0; j < N; ++j) + { + if (total_fpkm) + gammas[j] /= total_fpkm; + } + + vector > filtered_transcripts = abundances; + filter_junk_isoforms(filtered_transcripts, gammas, abundances, 0); + vector to_keep (abundances.size(), false); + for(size_t i = 0; i < abundances.size(); ++i) + { + shared_ptr ab_i = abundances[i]; + bool found = false; + foreach (shared_ptr ab_j, filtered_transcripts) + { + if (ab_i == ab_j) + { + found = true; + break; + } + } + if (found) + to_keep[i] = true; + } + + AbundanceGroup kept; + transfrag_cluster.filter_group(to_keep, kept); + transfrag_cluster = kept; + } + + vector transfrags_by_strand; + cluster_transcripts(transfrag_cluster, + transfrags_by_strand); + + + foreach (const AbundanceGroup& strand_group, transfrags_by_strand) + { + vector transfrags_by_gene; + + if (bundle_mode == REF_DRIVEN) + { + cluster_transcripts(strand_group, transfrags_by_gene); + } + else + { + cluster_transcripts(strand_group, transfrags_by_gene); + } + + foreach(const AbundanceGroup& gene, transfrags_by_gene) + { + const vector >& iso_abundances = gene.abundances(); + vector isoforms; + + int gene_id = -1; + int num_ref_gene_ids = 0; + bool has_novel_isoform = false; + string ref_gene_id = ""; + + double major_isoform_FPKM = 0; + foreach (shared_ptr iso_ab, iso_abundances) + { + if (iso_ab->transfrag()->is_ref()) + { + if (iso_ab->transfrag()->annotated_gene_id() != ref_gene_id) + { + ref_gene_id = iso_ab->transfrag()->annotated_gene_id(); + num_ref_gene_ids++; + } + } + else + { + has_novel_isoform = true; + } + major_isoform_FPKM = max(iso_ab->FPKM(), major_isoform_FPKM); + } + + foreach (shared_ptr iso_ab, iso_abundances) + { + // Calculate transcript depth of coverage and FMI from FPKM + double FPKM = iso_ab->FPKM(); + double density_score = major_isoform_FPKM ? (FPKM / major_isoform_FPKM) : 0; + double density_per_bp = FPKM; + + shared_ptr transfrag = iso_ab->transfrag(); + assert(transfrag); + + double s_len = transfrag->length(); + + density_per_bp *= (total_map_mass / 1000000.0); // yields (mass/(length/1000)) + density_per_bp *= (s_len/ 1000.0); + double estimated_count = density_per_bp; + density_per_bp /= s_len; + density_per_bp *= avg_read_length; + //double density_per_bp = (FPKM * (map_mass / 1000000.0) * 1000.0); + + if (!allow_junk_filtering || transfrag->is_ref() || density_score > min_isoform_fraction) + { + if (gene_id == -1 && (has_novel_isoform || num_ref_gene_ids > 1)) + gene_id = get_next_gene_id(); + + isoforms.push_back(Isoform(*transfrag, + gene_id, + (int)isoforms.size() + 1, + FPKM, + iso_ab->effective_length(), + iso_ab->gamma(), + iso_ab->FPKM_conf(), + density_per_bp, + estimated_count, + density_score, + iso_ab->status(), + ref_gene_id)); + } + } + + if (!isoforms.empty()) + { + Gene g(isoforms, gene.FPKM(), gene.FPKM_conf(), gene.status()); + genes.push_back(g); + } + } + + } + +} + +void quantitate_transcript_clusters(vector >& scaffolds, + long double total_map_mass, + vector& genes, + bool bundle_too_large) +{ + //vector > partials; + //vector > completes; + + vector > split_partials; + // Cleave the partials at their unknowns to minimize FPKM dilation on + // the low end of the expression profile. + for (size_t i = 0; i < scaffolds.size(); ++i) + { + vector c; + scaffolds[i]->get_complete_subscaffolds(c); + foreach (Scaffold& s, c) + { + split_partials.push_back(shared_ptr(new Scaffold(s))); + } + } + + scaffolds = split_partials; + + vector > abundances; + foreach(shared_ptr s, scaffolds) + { + TranscriptAbundance* pT = new TranscriptAbundance; + pT->transfrag(s); + shared_ptr ab(pT); + abundances.push_back(ab); + } + + AbundanceGroup transfrags = AbundanceGroup(abundances); + + vector transfrags_by_cluster; + + cluster_transcripts(transfrags, + transfrags_by_cluster); + + foreach(AbundanceGroup& cluster, transfrags_by_cluster) + { + quantitate_transcript_cluster(cluster, total_map_mass, genes, bundle_too_large); + } + verbose_msg( "%s\tBundle quantitation complete\n", bundle_label->c_str()); +} + +void assemble_bundle(const RefSequenceTable& rt, + HitBundle* bundle_ptr, + BiasLearner* bl_ptr, + long double map_mass, + FILE* ftranscripts, + FILE* fgene_abundances, + FILE* ftrans_abundances, + FILE* fskipped) +{ + HitBundle& bundle = *bundle_ptr; + + char bundle_label_buf[2048]; + sprintf(bundle_label_buf, + "%s:%d-%d", + rt.get_name(bundle.ref_id()), + bundle.left(), + bundle.right()); + +#if ENABLE_THREADS + bundle_label.reset(new string(bundle_label_buf)); +#else + bundle_label = shared_ptr(new string(bundle_label_buf)); +#endif + + verbose_msg( "%s\tProcessing new bundle with %d alignments\n", + bundle_label->c_str(), + (int)bundle.hits().size()); + +#if ENABLE_THREADS + boost::this_thread::at_thread_exit(decr_pool_count); +#endif + + vector > scaffolds; + + bool successfully_assembled = true; + + switch(bundle_mode) + { + case REF_DRIVEN: + scaffolds = bundle.ref_scaffolds(); + if (!final_est_run && scaffolds.size() != 1) // Only learn bias on single isoforms + { + delete bundle_ptr; + return; + } + break; + case REF_GUIDED: + successfully_assembled = scaffolds_for_bundle(bundle, scaffolds, &bundle.ref_scaffolds()); + break; + case HIT_DRIVEN: + successfully_assembled = scaffolds_for_bundle(bundle, scaffolds); + break; + default: + assert(false); + } + + if (successfully_assembled == false) + { + +#if ENABLE_THREADS + out_file_lock.lock(); +#endif + + int mask_region_id = get_next_skipped_region_id(); + fprintf(fskipped, + "%s\tCufflinks\texon\t%d\t%d\t%d\t%s\t.\tgene_id \"mask_%d\"; transcript_id \"mask_id%d/+\";\n", + rt.get_name(bundle.ref_id()), + bundle.left() + 1, + bundle.right(), // GTF intervals are inclusive on both ends, but ours are half-open + 0, + "+", + mask_region_id, + mask_region_id); + + fprintf(fskipped, + "%s\tCufflinks\texon\t%d\t%d\t%d\t%s\t.\tgene_id \"mask_%d\"; transcript_id \"mask_id%d/-\";\n", + rt.get_name(bundle.ref_id()), + bundle.left() + 1, + bundle.right(), // GTF intervals are inclusive on both ends, but ours are half-open + 0, + "-", + mask_region_id, + mask_region_id); + + +#if ENABLE_THREADS + out_file_lock.unlock(); +#endif + delete bundle_ptr; + return; + } + + if (scaffolds.empty()) + { + delete bundle_ptr; + return; + } + + vector genes; + + bool bundle_too_large = bundle_ptr->hits().size() >= max_frags_per_bundle; + + // FIXME: this routine does more than just quantitation, and should be + // renamed or refactored. + quantitate_transcript_clusters(scaffolds, + map_mass, + genes, + bundle_too_large); + + verbose_msg( "%s\tFiltering bundle assembly\n", bundle_label->c_str()); + + if (allow_junk_filtering) + filter_junk_genes(genes); + + + if (!final_est_run && bundle_mode==REF_DRIVEN) // Bias needs to be learned + { + for (size_t i = 0; i < genes.size(); ++i) + { + if (genes[i].isoforms().size() == 1) + { + bl_ptr -> preProcessTranscript(genes[i].isoforms()[0].scaffold()); + } + } + } + +#if ENABLE_THREADS + out_file_lock.lock(); +#endif + + // Get hit_introns for full_read_support test if ref-guided + set* hit_introns = NULL; + if (init_bundle_mode == REF_GUIDED) + { + hit_introns = new set(); + foreach(const MateHit& h, bundle.non_redundant_hits()) + { + Scaffold s(h); + foreach (AugmentedCuffOp a, s.augmented_ops()) + { + if (a.opcode == CUFF_INTRON) + { + hit_introns->insert(a); + } + } + } + } + + + size_t num_scaffs_reported = 0; + for (size_t i = 0; i < genes.size(); ++i) + { + const Gene& gene = genes[i]; + const vector& isoforms = gene.isoforms(); + set annotated_gene_names; + set annotated_tss_ids; + for (size_t j = 0; j < isoforms.size(); ++j) + { + const Isoform& iso = isoforms[j]; + + vector H(iso.scaffold().mate_hits().size(), 0); + copy(iso.scaffold().mate_hits().begin(), + iso.scaffold().mate_hits().end(), + H.begin()); + + vector isoform_exon_recs; + + iso.get_gtf(isoform_exon_recs, rt, hit_introns); + + for (size_t g = 0; g < isoform_exon_recs.size(); ++g) + { + fprintf(ftranscripts, "%s", isoform_exon_recs[g].c_str()); + } + + fflush(ftranscripts); + + const char* status; + if (iso.status()==NUMERIC_OK) + status = "OK"; + else if (iso.status() == NUMERIC_LOW_DATA) + status = "LOWDATA"; + else if (iso.status() == NUMERIC_HI_DATA) + status = "HIDATA"; + else if (iso.status() == NUMERIC_FAIL) + status = "FAIL"; + else + assert (false); + + fprintf(ftrans_abundances,"%s\t%c\t%s\t%s\t%s\t%s\t%s:%d-%d\t%d\t%lg\t%lg\t%lg\t%lg\t%s\n", + iso.trans_id().c_str(), + (iso.scaffold().nearest_ref_classcode() == 0 ? '-' : iso.scaffold().nearest_ref_classcode()), + (iso.scaffold().nearest_ref_id() == "" ? "-" : iso.scaffold().nearest_ref_id().c_str()), + gene.gene_id().c_str(), + (iso.scaffold().annotated_gene_name() == "" ? "-" : iso.scaffold().annotated_gene_name().c_str()), + (iso.scaffold().annotated_tss_id() == "" ? "-" : iso.scaffold().annotated_tss_id().c_str()), + rt.get_name(bundle.ref_id()), + iso.scaffold().left(), + iso.scaffold().right(), + iso.scaffold().length(), + iso.coverage(), + iso.FPKM(), + iso.confidence().low, + iso.confidence().high, + status); + fflush(ftrans_abundances); + + annotated_gene_names.insert(iso.scaffold().annotated_gene_name()); + annotated_tss_ids.insert(iso.scaffold().annotated_tss_id()); + + num_scaffs_reported++; + } + + const char* status = "OK"; + if (gene.status()==NUMERIC_OK) + status = "OK"; + else if (gene.status() == NUMERIC_LOW_DATA) + status = "LOWDATA"; + else if (gene.status() == NUMERIC_HI_DATA) + status = "HIDATA"; + else if (gene.status() == NUMERIC_FAIL) + status = "FAIL"; + else + assert (false); + + string gene_names = cat_strings(annotated_gene_names); + if (gene_names == "") gene_names = "-"; + string tss_ids = cat_strings(annotated_tss_ids); + if (tss_ids == "") tss_ids = "-"; + + fprintf(fgene_abundances,"%s\t%c\t%s\t%s\t%s\t%s\t%s:%d-%d\t%s\t%s\t%lg\t%lg\t%lg\t%s\n", + gene.gene_id().c_str(), + '-', + "-", + gene.gene_id().c_str(), + gene_names.c_str(), + tss_ids.c_str(), + rt.get_name(bundle.ref_id()), + gene.left(), + gene.right(), + "-", + "-", + gene.FPKM(), + gene.confidence().low, + gene.confidence().high, + status); + fflush(fgene_abundances); + } + delete hit_introns; + //fprintf(fbundle_tracking, "CLOSE %d\n", bundle.id()); + + if (bundle_mode==REF_DRIVEN && num_scaffs_reported > bundle.ref_scaffolds().size()) + { + fprintf(stderr, "Error: reported more isoforms than in reference!\n"); + exit(1); + } + + verbose_msg( "%s\tBundle complete\n", bundle_label->c_str()); + +#if ENABLE_THREADS + out_file_lock.unlock(); +#endif + + genes.clear(); + scaffolds.clear(); + delete bundle_ptr; +} + +bool assemble_hits(BundleFactory& bundle_factory, BiasLearner* bl_ptr) +{ + //srand(time(0)); + + RefSequenceTable& rt = bundle_factory.ref_table(); + + //FILE* fbundle_tracking = fopen("open_bundles", "w"); + + //FILE* fstats = fopen("bundles.stats", "w"); + FILE* ftrans_abundances = fopen(string(output_dir + "/" + "isoforms.fpkm_tracking").c_str(), "w"); + //fprintf(ftrans_abundances,"trans_id\tbundle_id\tchr\tleft\tright\tFPKM\tFMI\tfrac\tFPKM_conf_lo\tFPKM_conf_hi\tcoverage\tlength\teffective_length\tstatus\n"); + fprintf(ftrans_abundances,"tracking_id\tclass_code\tnearest_ref_id\tgene_id\tgene_short_name\ttss_id\tlocus\tlength\tcoverage\tFPKM\tFPKM_conf_lo\tFPKM_conf_hi\tFPKM_status\n"); + FILE* fgene_abundances = fopen(string(output_dir + "/" + "genes.fpkm_tracking").c_str(), "w"); + //fprintf(fgene_abundances,"gene_id\tbundle_id\tchr\tleft\tright\tFPKM\tFPKM_conf_lo\tFPKM_conf_hi\tstatus\n"); + fprintf(fgene_abundances,"tracking_id\tclass_code\tnearest_ref_id\tgene_id\tgene_short_name\ttss_id\tlocus\tlength\tcoverage\tFPKM\tFPKM_conf_lo\tFPKM_conf_hi\tFPKM_status\n"); + + FILE* ftranscripts = fopen(string(output_dir + "/" + "transcripts.gtf").c_str(), "w"); + FILE* fskipped = fopen(string(output_dir + "/" + "skipped.gtf").c_str(), "w"); + + string process; + if (corr_bias && corr_multi && final_est_run) + process = "Re-estimating abundances with bias and multi-read correction."; + else if (corr_multi && final_est_run) + process = "Re-estimating abundances with multi-read correction."; + else if (corr_bias && final_est_run) + process = "Re-estimating abundances with bias correction."; + else if (bundle_mode==REF_DRIVEN && final_est_run) + process = "Estimating transcript abundances."; + else if (bundle_mode==REF_DRIVEN && corr_bias) + process = "Learning bias parameters."; + else if (bundle_mode==REF_DRIVEN && corr_multi) + process = "Initializing transcript abundances for multi-read correction."; + else if (corr_multi) + process = "Assembling transcripts and initializing abundances for multi-read correction."; + else + process = "Assembling transcripts and estimating abundances."; + + ProgressBar p_bar(process, bundle_factory.read_group_properties()->total_map_mass()); + + while(true) + { + HitBundle* bundle_ptr = new HitBundle(); + + if (!bundle_factory.next_bundle(*bundle_ptr)) + { + delete bundle_ptr; + break; + } + + HitBundle& bundle = *bundle_ptr; + + char bundle_label_buf[2048]; + sprintf(bundle_label_buf, + "%s:%d-%d", + rt.get_name(bundle.ref_id()), + bundle.left(), + bundle.right()); + + if (bundle.right() - bundle.left() > max_gene_length) + { + fprintf(stderr, "\n%s\tWarning: Skipping large bundle.\n", bundle_label_buf); + delete bundle_ptr; + continue; + } + + BundleStats stats; +#if ENABLE_THREADS + while(1) + { + thread_pool_lock.lock(); + if (curr_threads < num_threads) + { + thread_pool_lock.unlock(); + break; + } + + thread_pool_lock.unlock(); + + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + + } +#endif + p_bar.update(bundle_label_buf, bundle.raw_mass()); + +#if ENABLE_THREADS + thread_pool_lock.lock(); + curr_threads++; + thread_pool_lock.unlock(); + + thread asmbl(assemble_bundle, + boost::cref(rt), + bundle_ptr, + bl_ptr, + bundle_factory.read_group_properties()->normalized_map_mass(), + ftranscripts, + fgene_abundances, + ftrans_abundances, + fskipped); +#else + assemble_bundle(boost::cref(rt), + bundle_ptr, + bl_ptr, + bundle_factory.read_group_properties()->normalized_map_mass(), + ftranscripts, + fgene_abundances, + ftrans_abundances, + fskipped); +#endif + + } + +#if ENABLE_THREADS + while(1) + { + thread_pool_lock.lock(); + if (curr_threads == 0) + { + thread_pool_lock.unlock(); + break; + } + p_bar.remaining(curr_threads); + + thread_pool_lock.unlock(); + //fprintf(stderr, "waiting to exit\n"); + boost::this_thread::sleep(boost::posix_time::milliseconds(5)); + } +#endif + + p_bar.complete(); + + if(!final_est_run && bundle_mode==REF_DRIVEN) // We are learning bias + { + bl_ptr->normalizeParameters(); + if (output_bias_params) + bl_ptr->output(); + } + + fclose(ftranscripts); + fclose(ftrans_abundances); + fclose(fgene_abundances); + fclose(fskipped); + return true; +} + +void driver(const string& hit_file_name, FILE* ref_gtf, FILE* mask_gtf) +{ + ReadTable it; + RefSequenceTable rt(true, false); + + shared_ptr hit_factory; + + try + { + hit_factory = shared_ptr(new BAMHitFactory(hit_file_name, it, rt)); + } + catch (std::runtime_error& e) + { + fprintf(stderr, "File %s doesn't appear to be a valid BAM file, trying SAM...\n", + hit_file_name.c_str()); + + try + { + hit_factory = shared_ptr(new SAMHitFactory(hit_file_name, it, rt)); + } + catch (std::runtime_error& e) + { + fprintf(stderr, "Error: cannot open alignment file %s for reading\n", + hit_file_name.c_str()); + exit(1); + } + } + + BundleFactory& bundle_factory = *(new BundleFactory(hit_factory, bundle_mode)); + shared_ptr rg_props =bundle_factory.read_group_properties(); + BadIntronTable bad_introns; + + rt.print_rec_ordering(); + + vector > ref_mRNAs; + if (ref_gtf) + { + ::load_ref_rnas(ref_gtf, bundle_factory.ref_table(), ref_mRNAs, corr_bias && bundle_mode == REF_DRIVEN, false); + bundle_factory.set_ref_rnas(ref_mRNAs); + } + rt.print_rec_ordering(); + vector > mask_rnas; + if (mask_gtf) + { + ::load_ref_rnas(mask_gtf, bundle_factory.ref_table(), mask_rnas, false, false); + bundle_factory.set_mask_rnas(mask_rnas); + } + + vector count_table; + if (bundle_mode != HIT_DRIVEN) + inspect_map(bundle_factory, NULL, count_table); + else + inspect_map(bundle_factory, &bad_introns, count_table); + + + verbose_msg("%d ReadHits still live\n", num_deleted); + verbose_msg("Found %lu reference contigs\n", rt.size()); + + foreach(shared_ptr ref_scaff, ref_mRNAs) + { + ref_scaff->clear_hits(); + } + + //fprintf(stderr, "ReadHit delete count is %d\n", num_deleted); + + BiasLearner* bl_ptr = new BiasLearner(rg_props->frag_len_dist()); + bundle_factory.read_group_properties(rg_props); + + //if (ref_gtf) -- why? bad introns are bad + bundle_factory.bad_intron_table(bad_introns); + + max_frag_len = rg_props->frag_len_dist()->max(); + min_frag_len = rg_props->frag_len_dist()->min(); + verbose_msg("\tTotal map density: %Lf\n", rg_props->total_map_mass()); + + if (corr_bias || corr_multi) final_est_run = false; + + assemble_hits(bundle_factory, bl_ptr); + + if (final_est_run) + { + ref_mRNAs.clear(); + return; + } + + hit_factory->reset(); + delete &bundle_factory; + BundleFactory bundle_factory2(hit_factory, REF_DRIVEN); + rg_props->bias_learner(shared_ptr(bl_ptr)); + rg_props->multi_read_table()->valid_mass(true); + bundle_factory2.read_group_properties(rg_props); + + if (bundle_mode==HIT_DRIVEN || bundle_mode==REF_GUIDED) + { + ref_gtf = fopen(string(output_dir + "/transcripts.gtf").c_str(), "r"); + ref_mRNAs.clear(); + ::load_ref_rnas(ref_gtf, bundle_factory2.ref_table(), ref_mRNAs, corr_bias, true); + } + bundle_factory2.set_ref_rnas(ref_mRNAs); + if (mask_gtf) + { + mask_rnas.clear(); + ::load_ref_rnas(mask_gtf, bundle_factory2.ref_table(), mask_rnas, false, false); + bundle_factory2.set_mask_rnas(mask_rnas); + } + bundle_factory2.reset(); + + if(corr_bias && (bundle_mode==HIT_DRIVEN || bundle_mode==REF_GUIDED)) + { + // We still need to learn the bias since we didn't have the sequences before assembly + learn_bias(bundle_factory2, *bl_ptr); + bundle_factory2.reset(); + } + + bundle_mode = REF_DRIVEN; + final_est_run = true; + assemble_hits(bundle_factory2, bl_ptr); + ref_mRNAs.clear(); +} + +int main(int argc, char** argv) +{ + init_library_table(); + string cmdline; + for (int i=0;i= argc) + { + print_usage(); + return 1; + } + + if (!no_update_check) + check_version(PACKAGE_VERSION); + + if (cuff_quiet || cuff_verbose) + fprintf(stderr, "Command line:\n%s\n", cmdline.c_str()); + string sam_hits_file_name = argv[optind++]; + + + if (random_seed == -1) + random_seed = time(NULL); + + srand48(random_seed); + + FILE* ref_gtf = NULL; + if (ref_gtf_filename != "") + { + ref_gtf = fopen(ref_gtf_filename.c_str(), "r"); + if (!ref_gtf) + { + fprintf(stderr, "Error: cannot open reference GTF file %s for reading\n", + ref_gtf_filename.c_str()); + exit(1); + } + } + + FILE* mask_gtf = NULL; + if (mask_gtf_filename != "") + { + mask_gtf = fopen(mask_gtf_filename.c_str(), "r"); + if (!mask_gtf) + { + fprintf(stderr, "Error: cannot open mask GTF file %s for reading\n", + mask_gtf_filename.c_str()); + exit(1); + } + } + + if (output_dir != "") + { + int retcode = mkpath(output_dir.c_str(), 0777); + if (retcode == -1) + { + if (errno != EEXIST) + { + fprintf (stderr, + "Error: cannot create directory %s\n", + output_dir.c_str()); + exit(1); + } + } + } + + driver(sam_hits_file_name, ref_gtf, mask_gtf); + + return 0; +} diff --git a/src/cuffmerge b/src/cuffmerge new file mode 100755 index 0000000..a2b6f4d --- /dev/null +++ b/src/cuffmerge @@ -0,0 +1,576 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +cuffmerge.py + +Created by Cole Trapnell on 2011-03-17. +Copyright (c) 2011 Cole Trapnell. All rights reserved. +""" + +import sys +import getopt +from datetime import datetime, date, time +import shutil +import subprocess +import errno +import os +import tempfile +import warnings +import types + +help_message = ''' +cuffmerge takes two or more Cufflinks GTF files and merges them into a +single unified transcript catalog. Optionally, you can provide the script +with a reference GTF, and the script will use it to attach gene names and other +metadata to the merged catalog. + +Usage: + cuffmerge [Options] + +Options: + -h/--help Prints the help message and exits + -o Directory where merged assembly will be written [ default: ./merged_asm ] + -g/--ref-gtf An optional "reference" annotation GTF. + -s/--ref-sequence / Genomic DNA sequences for the reference. + --min-isoform-fraction <0-1.0> Discard isoforms with abundance below this [ default: 0.05 ] + -p/--num-threads Use this many threads to merge assemblies. [ default: 1 ] + --keep-tmp Keep all intermediate files during merge +''' + +output_dir = "./merged_asm/" +logging_dir = output_dir + "logs/" +run_log = None +run_cmd = None +tmp_dir = output_dir + "/meta_asm_tmp/" +bin_dir = sys.path[0] + "/" +run_meta_assembly = True +fail_str = "\t[FAILED]\n" +params = None + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + +class TestParams: + + class SystemParams: + def __init__(self, + threads, + keep_tmp): + self.threads = threads + self.keep_tmp = keep_tmp + + def parse_options(self, opts): + for option, value in opts: + if option in ("-p", "--num-threads"): + self.threads = int(value) + if option in ("--keep-tmp"): + self.keep_tmp = True + + def check(self): + pass + + def __init__(self): + self.system_params = self.SystemParams(1, # threads + False) # keep_tmp + self.ref_gtf = None + self.fasta = None + self.min_isoform_frac = 0.05 + + def check(self): + self.system_params.check() + + def parse_options(self, argv): + try: + opts, args = getopt.getopt(argv[1:], + "hvp:o:g:M:s:q:F:", + ["version", + "help", + "ref-sequence=", + "ref-gtf=", + "output-dir=", + "num-threads=", + "keep-tmp", + "min-isoform-fraction="]) + except getopt.error, msg: + raise Usage(msg) + + self.system_params.parse_options(opts) + + global output_dir + global logging_dir + global tmp_dir + + # option processing + for option, value in opts: + if option in ("-v", "--version"): + print "merge_cuff_asms v%s" % (get_version()) + exit(0) + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-g", "--ref-gtf"): + self.ref_gtf = value + if option in ("-s", "--ref-sequence"): + self.fasta = value + if option in ("-F", "--min-isoform-fraction"): + self.min_isoform_frac = float(value) + if option in ("-o", "--output-dir"): + output_dir = value + "/" + logging_dir = output_dir + "logs/" + tmp_dir = output_dir + "tmp/" + + return args + + +def right_now(): + curr_time = datetime.now() + return curr_time.strftime("%c") + +def prepare_output_dir(): + + print >> sys.stderr, "[%s] Preparing output location %s" % (right_now(), output_dir) + if os.path.exists(output_dir): + pass + else: + os.makedirs(output_dir) + + #print >> sys.stderr, "Checking for %s", logging_dir + if os.path.exists(logging_dir): + pass + else: + #print >> sys.stderr, "Creating %s", logging_dir + os.makedirs(logging_dir) + + if os.path.exists(tmp_dir): + pass + else: + os.makedirs(tmp_dir) + +def formatTD(td): + hours = td.seconds // 3600 + minutes = (td.seconds % 3600) // 60 + seconds = td.seconds % 60 + return '%02d:%02d:%02d' % (hours, minutes, seconds) + +def tmp_name(prefix): + tmp_root = output_dir + "tmp/" + if os.path.exists(tmp_root): + pass + else: + os.mkdir(tmp_root) + return tmp_root + prefix + os.tmpnam().split('/')[-1] + +def cufflinks(out_dir, + sam_file, + min_isoform_frac, + gtf_file=None, + extra_opts=["-q", "--overhang-tolerance", "200", "--library-type=transfrags", "-A","0.0", "--min-frags-per-transfrag", "0", "--no-5-extend"], + lsf=False, + curr_queue=None): + if gtf_file != None: + print >> sys.stderr, "[%s] Quantitating transcripts" % (right_now()) + else: + print >> sys.stderr, "[%s] Assembling transcripts" % (right_now()) + + cmd = ["cufflinks"] + + if out_dir != None and out_dir != "": + cmd.extend(["-o", out_dir]) + + cmd.extend(["-F", str(min_isoform_frac)]) + + if gtf_file != None: + cmd.extend(["-g", gtf_file]) + + if extra_opts != None: + cmd.extend(extra_opts) + global params + # Run Cufflinks with more than one thread? + cmd.extend(["-p", str(params.system_params.threads)]) + + cmd.append(sam_file) + + try: + print >> run_log, " ".join(cmd) + ret = subprocess.call(cmd) + if ret != 0: + print >> sys.stderr, fail_str, "Error: could not execute cufflinks" + exit(1) + # cufflinks not found + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?" + exit(1) + +def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf): + + print >> sys.stderr, "[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf) + cmd = ["cuffcompare"] + + if prefix != None: + cmd.extend(["-o", prefix]) + if ref_gtf != None: + cmd.extend(["-r", ref_gtf]) + if fasta != None: + cmd.extend(["-s", fasta]) + if type(cuff_gtf) == types.ListType: + for g in cuff_gtf: + cmd.extend([g]) + else: + cmd.extend([cuff_gtf]) + + try: + print >> run_log, " ".join(cmd) + ret = subprocess.call(cmd) + if ret != 0: + print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + exit(1) + # cuffcompare not found + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + exit(1) + +def gtf_to_sam(gtf_filename): + + sam_out = tmp_name("gtf2sam_") + + cmd = ["gtf_to_sam"] + cmd.append("-F") + cmd.append(gtf_filename) + cmd.append(sam_out) + try: + print >> run_log, " ".join(cmd) + ret = subprocess.call(cmd) + if ret != 0: + print >> sys.stderr, fail_str, "Error: could not execute gtf_to_sam" + exit(1) + # gtf_to_sam not found + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?" + exit(1) + return sam_out + +def test_input_files(filename_list): + """This function takes a file that contains a list of GTF files, + tests accessibility of each, and returns the list of filenames""" + + OK = True + input_files = [] + for line in filename_list: + line = line.strip() + + # Skip comment line + if len(line) == 0 or line[0] == "#": + continue + try: + g = open(line,"r") + input_files.append(line) + + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: could not open %s" % line + OK = False + if not OK: + sys.exit(1) + return input_files + +def convert_gtf_to_sam(gtf_filename_list): + """This function takes a list of GTF files, converts them all to + temporary SAM files, and returns the list of temporary file names.""" + print >> sys.stderr, "[%s] Converting GTF files to SAM" % (right_now()) + OK = True + sam_input_filenames = [] + for line in gtf_filename_list: + try: + sam_out = gtf_to_sam(line) + sam_input_filenames.append(sam_out) + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: could not open %s" % line + OK = False + if not OK: + sys.exit(1) + return sam_input_filenames + +def merge_sam_inputs(sam_input_list, header): + sorted_map_name = tmp_name( "mergeSam_") + + sorted_map = open(sorted_map_name, "w") + + #print header + + # The header was built from a dict keyed by chrom, so + # the records will be lexicographically ordered and + # should match the BAM after the sort below. + print >> sorted_map, header, + + sorted_map.close() + sort_cmd =["sort", + "-k", + "3,3", + "-k", + "4,4n", + "--temporary-directory="+tmp_dir] + sort_cmd.extend(sam_input_list) + + print >> run_log, " ".join(sort_cmd), ">", sorted_map_name + subprocess.call(sort_cmd, + stdout=open(sorted_map_name, "a")) + return sorted_map_name + +def compare_to_reference(meta_asm_gtf, ref_gtf, fasta): + print >> sys.stderr, "[%s] Comparing against reference file %s" % (right_now(), ref_gtf) + ref_str = "" + if ref_gtf != None: + ref_str = " -r %s " % ref_gtf + + if fasta != None: + comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s -s %s %s''' % (ref_str, fasta, meta_asm_gtf) + else: + comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s %s''' % (ref_str, meta_asm_gtf) + + #cmd = bsub_cmd(comp_cmd, "/gencode_cmp", True, job_mem=8) + cmd = comp_cmd + + try: + print >> run_log, cmd + ret = subprocess.call(cmd,shell=True) + if ret != 0: + print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + exit(1) + #tmap_out = meta_asm_gtf.split("/")[-1] + ".tmap" + tfpath, tfname = os.path.split(meta_asm_gtf) + if tfpath: tfpath+='/' + tmap_out = tfpath+'tmp_meta_asm.'+tfname+".tmap" + return tmap_out + # cuffcompare not found + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + exit(1) + +def select_gtf(gtf_in_filename, ids, gtf_out_filename): + f_gtf = open(gtf_in_filename) + #print >> sys.stderr, "Select GTF: Ids are: " + #print >> sys.stderr, ids + #print >> sys.stderr, "reference gtf file name:" + #print >> sys.stderr, gtf_in_filename + out_gtf = open(gtf_out_filename, "w") + for line in f_gtf: + line = line.strip() + cols = line.split('\t') + if len(cols) < 9: + continue + attrs = cols[8] + attr_cols = attrs.split(';') + for col in attr_cols: + if col.find("transcript_id") != -1: + first_quote = col.find('"') + last_quote = col.find('"', first_quote + 1) + transcript = col[first_quote + 1:last_quote] + #print >> sys.stderr, transcript + if transcript in ids: + print >> out_gtf, line + + +def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None): + print >> sys.stderr, "[%s] Merging linc gtf files with cuffcompare" % (right_now()) + cmd = ["cuffcompare"] + + cmd.extend(["-o", merged_gtf]) + if ref_gtf != None: + cmd.extend(["-r", ref_gtf]) + + cmd.extend(gtf_filenames) + cmd = " ".join(cmd) + #cmd = bsub_cmd(cmd, "/merge_gtf", True, job_mem=8) + + try: + print >> run_log, cmd + ret = subprocess.call(cmd, shell=True) + if ret != 0: + print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + exit(1) + return merged_gtf + ".combined.gtf" + # cuffcompare not found + except OSError, o: + if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: + print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + exit(1) + +def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_codes=["c", "i", "r", "p", "e"]): + #print >> sys.stderr, "Cuffcmpare all assemblies GTFs" + + tmap = compare_to_reference(gtf_input_file, ref_gtf, fasta_file) + + #print >> sys.stderr, "Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes) + selected_ids= set([]) + f_tmap = open(tmap) + #out = open("tmp_meta_asm_selectedIds.txt", "w") + for line in f_tmap: + line = line.strip() + cols = line.split('\t') + if len(cols) < 5: + continue + class_code = cols[2] + name = cols[4] + if class_code not in class_codes: + selected_ids.add(name) + + global output_dir + asm_dir = output_dir + + if os.path.exists(asm_dir): + pass + else: + os.mkdir(asm_dir) + current_asm_gtf = output_dir +"transcripts.gtf" + select_gtf(current_asm_gtf, selected_ids, output_dir + "/merged.gtf") + mtmap = compare_to_reference(output_dir + "/merged.gtf", ref_gtf, fasta_file) + if os.path.exists(mtmap): + os.remove(mtmap) + if os.path.exists(mtmap.split(".tmap")[0]+".refmap"): + os.remove(mtmap.split(".tmap")[0]+".refmap") + + shutil.move("tmp_meta_asm.combined.gtf", output_dir + "/merged.gtf") + +# os.remove("tmp_meta_asm.combined.gtf") + if os.path.exists("tmp_meta_asm.loci"): + os.remove("tmp_meta_asm.loci") + if os.path.exists("tmp_meta_asm.tracking"): + os.remove("tmp_meta_asm.tracking") + if os.path.exists("tmp_meta_asm.stats"): + os.remove("tmp_meta_asm.stats") + if os.path.exists(tmap): + os.remove(tmap) + if os.path.exists(tmap.split(".tmap")[0]+".refmap"): + os.remove(tmap.split(".tmap")[0]+".refmap") + #tmp_dir = asm_dir + #tmp_files = os.listdir(tmp_dir) + #for t in tmp_files: + # os.remove(tmp_dir+t) + #os.rmdir(tmp_dir) + +#os.remove("tmp_meta_asm.tmap") + +def get_version(): + return "1.0.0" + +def get_gtf_chrom_info(gtf_filename, known_chrom_info=None): + gtf_file = open(gtf_filename) + if known_chrom_info == None: + chroms = {} + else: + chroms = known_chrom_info + for line in gtf_file: + line = line.strip() + + if len(line) == 0 or line[0] == "#": + continue + cols = line.split('\t') + if len(cols) < 8: + continue + chrom = cols[0] + left = int(cols[3]) + right = int(cols[4]) + bounds = chroms.setdefault(chrom, [9999999999,-1]) + if bounds[0] > left: + bounds[0] = left + if bounds[1] < right: + bounds[1] = right + return chroms + +def header_for_chrom_info(chrom_info): + header_strs = ["""@HD\tVN:1.0\tSO:coordinate"""] + chrom_list = [(chrom, limits) for chrom, limits in chrom_info.iteritems()] + chrom_list.sort(lambda x,y: cmp(x[0],y[0])) + #print chrom_list + for chrom, limits in chrom_list: + line = "@SQ\tSN:%s\tLN:\t%d" % (chrom, limits[1]) + header_strs.append(line) + header_strs.append("@PG\tID:cuffmerge\tVN:1.0.0\n") + header = "\n".join(header_strs) + return header + +def main(argv=None): + + # Set this so sort is consistent across platforms and python versions + # (and always character-lexicographic) + os.environ['LC_ALL']='C' + warnings.filterwarnings("ignore", "tmpnam is a potential security risk") + global params + params = TestParams() + + try: + if argv is None: + argv = sys.argv + args = params.parse_options(argv) + params.check() + + if len(args) < 1: + raise(Usage(help_message)) + + global run_log + global run_cmd + + print >> sys.stderr + print >> sys.stderr, "[%s] Beginning transcriptome assembly merge" % (right_now()) + print >> sys.stderr, "-------------------------------------------" + print >> sys.stderr + + start_time = datetime.now() + prepare_output_dir() + + run_log = open(logging_dir + "run.log", "w", 0) + run_cmd = " ".join(argv) + print >> run_log, run_cmd + + transfrag_list_file = open(args[0], "r") + + if params.ref_gtf != None: + test_input_files([params.ref_gtf]) + else: + print >> sys.stderr, "Warning: no reference GTF provided!" + + # Check that all the primary assemblies are accessible before starting the time consuming stuff + gtf_input_files = test_input_files(transfrag_list_file) + + all_gtfs = [] + all_gtfs.extend(gtf_input_files) + if params.ref_gtf != None: + all_gtfs.append(params.ref_gtf) + chrom_info = {} + for gtf in all_gtfs: + chrom_info = get_gtf_chrom_info(gtf, chrom_info) + + header = header_for_chrom_info(chrom_info) + + #Meta assembly option: + global run_meta_assembly + if run_meta_assembly: + # Convert the primary assemblies to SAM format + sam_input_files = convert_gtf_to_sam(gtf_input_files) + # Merge the primary assembly SAMs into a single input SAM file + merged_sam_filename = merge_sam_inputs(sam_input_files, header) + # Run cufflinks on the primary assembly transfrags to generate a meta-assembly + cufflinks(output_dir, merged_sam_filename, params.min_isoform_frac, params.ref_gtf) + compare_meta_asm_against_ref(params.ref_gtf, params.fasta, output_dir+"/transcripts.gtf") + #Meta Cuffcompare option: + else: + cuffcompare_all_assemblies(gtf_input_files) #FIXME: where is this function ? + + + if not params.system_params.keep_tmp: + tmp_files = os.listdir(tmp_dir) + for t in tmp_files: + os.remove(tmp_dir+t) + os.rmdir(tmp_dir) + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + return 2 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/differential.cpp b/src/differential.cpp new file mode 100644 index 0000000..3e5cff0 --- /dev/null +++ b/src/differential.cpp @@ -0,0 +1,1791 @@ +/* + * differential.cpp + * cufflinks + * + * Created by Cole Trapnell on 3/15/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "abundances.h" +#include "differential.h" +#include "clustering.h" +#include "differential.h" +#include "sampling.h" + +using namespace std; + +double min_read_count = 10; + +#if ENABLE_THREADS +mutex _launcher_lock; +mutex locus_thread_pool_lock; +int locus_curr_threads = 0; +int locus_num_threads = 0; + +void decr_pool_count() +{ + locus_thread_pool_lock.lock(); + locus_curr_threads--; + locus_thread_pool_lock.unlock(); +} +#endif + +TestLauncher::launcher_sample_table::iterator TestLauncher::find_locus(const string& locus_id) +{ + launcher_sample_table::iterator itr = _samples.begin(); + for(; itr != _samples.end(); ++itr) + { + if (itr->first == locus_id) + return itr; + } + return _samples.end(); +} + +void TestLauncher::register_locus(const string& locus_id) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_launcher_lock); +#endif + + launcher_sample_table::iterator itr = find_locus(locus_id); + if (itr == _samples.end()) + { + pair p; + vector >abs(_orig_workers); + _samples.push_back(make_pair(locus_id, abs)); + } +} + +void TestLauncher::abundance_avail(const string& locus_id, + shared_ptr ab, + size_t factory_id) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_launcher_lock); +#endif + launcher_sample_table::iterator itr = find_locus(locus_id); + if (itr == _samples.end()) + { + assert(false); + } + itr->second[factory_id] = ab; + //itr->second(factory_id] = ab; +} + +// Note: this routine should be called under lock - it doesn't +// acquire the lock itself. +bool TestLauncher::all_samples_reported_in(vector >& abundances) +{ + foreach (shared_ptr ab, abundances) + { + if (!ab) + { + return false; + } + } + return true; +} + +// Note: this routine should be called under lock - it doesn't +// acquire the lock itself. +void TestLauncher::perform_testing(vector >& abundances) +{ + assert (abundances.size() == _orig_workers); + + // Just verify that all the loci from each factory match up. + for (size_t i = 1; i < abundances.size(); ++i) + { + const SampleAbundances& curr = *(abundances[i]); + const SampleAbundances& prev = *(abundances[i-1]); + + assert (curr.locus_tag == prev.locus_tag); + + const AbundanceGroup& s1 = curr.transcripts; + const AbundanceGroup& s2 = prev.transcripts; + + assert (s1.abundances().size() == s2.abundances().size()); + + for (size_t j = 0; j < s1.abundances().size(); ++j) + { + assert (s1.abundances()[j]->description() == s2.abundances()[j]->description()); + } + } + + test_differential(abundances.front()->locus_tag, abundances, *_tests, *_tracking, _samples_are_time_series); +} + +void TestLauncher::test_finished_loci() +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_launcher_lock); +#endif + + launcher_sample_table::iterator itr = _samples.begin(); + while(itr != _samples.end()) + { + if (all_samples_reported_in(itr->second)) + { + // In some abundance runs, we don't actually want to perform testing + // (eg initial quantification before bias correction). + // _tests and _tracking will be NULL in these cases. + if (_tests != NULL && _tracking != NULL) + { + if (_p_bar) + { + verbose_msg("Testing for differential expression and regulation in locus [%s]\n", itr->second.front()->locus_tag.c_str()); + _p_bar->update(itr->second.front()->locus_tag.c_str(), 1); + } + perform_testing(itr->second); + } + else + { + if (_p_bar) + { + //verbose_msg("Testing for differential expression and regulation in locus [%s]\n", abundances.front()->locus_tag.c_str()); + _p_bar->update(itr->second.front()->locus_tag.c_str(), 1); + } + } + itr = _samples.erase(itr); + } + else + { + + ++itr; + } + } +} + +// This performs a between-group test on an isoform or TSS grouping, on two +// different samples. +bool test_diffexp(const FPKMContext& curr, + const FPKMContext& prev, + SampleDifference& test) +{ + bool performed_test = false; + if (curr.FPKM > 0.0 && prev.FPKM > 0.0) + { + //assert (curr.FPKM_variance > 0.0 && prev.FPKM_variance > 0.0); +// double log_curr = log(curr.counts); +// double log_prev = log(prev.counts); + + double stat = 0.0; + double p_value = 1.0; + + if (curr.FPKM_variance > 0.0 || prev.FPKM_variance > 0.0) + { + double curr_log_fpkm_var = (curr.FPKM_variance) / (curr.FPKM * curr.FPKM); + double prev_log_fpkm_var = (prev.FPKM_variance) / (prev.FPKM * prev.FPKM); + + double numerator = log(prev.FPKM / curr.FPKM); + + double denominator = sqrt(prev_log_fpkm_var + curr_log_fpkm_var); + stat = numerator / denominator; + + + normal norm; + double t1, t2; + if (stat > 0.0) + { + t1 = stat; + t2 = -stat; + } + else + { + t1 = -stat; + t2 = stat; + } + + if (isnan(t1) || isinf(t1) || isnan(t2) || isnan(t2)) + { + + //fprintf(stderr, "Warning: test statistic is NaN! %s (samples %lu and %lu)\n", test.locus_desc.c_str(), test.sample_1, test.sample_2); + p_value = 1.0; + } + else + { + double tail_1 = cdf(norm, t1); + double tail_2 = cdf(norm, t2); + p_value = 1.0 - (tail_1 - tail_2); + } + } + + double differential = log2(curr.FPKM) - log2(prev.FPKM); + + //test = SampleDifference(sample1, sample2, prev.FPKM, curr.FPKM, stat, p_value, transcript_group_id); + test.p_value = p_value; + test.differential = differential; + test.test_stat = stat; + test.value_1 = prev.FPKM; + test.value_2 = curr.FPKM; + + performed_test = true; + } + else + { + if (curr.FPKM > 0.0) + { + if (curr.status != NUMERIC_LOW_DATA && curr.FPKM_variance > 0.0) + { + normal norm(curr.FPKM, sqrt(curr.FPKM_variance)); + test.p_value = cdf(norm, 0); + performed_test = true; + test.differential = numeric_limits::max();; + test.test_stat = numeric_limits::max(); + test.value_1 = 0; + test.value_2 = curr.FPKM; + } + else + { + test.differential = -numeric_limits::max(); + test.test_stat = -numeric_limits::max(); + test.value_1 = prev.FPKM; + test.value_2 = 0; + test.p_value = 1; + performed_test = false; + } + } + else if (prev.FPKM > 0.0) + { + if (prev.status != NUMERIC_LOW_DATA && prev.FPKM_variance > 0.0) + { + normal norm(prev.FPKM, sqrt(prev.FPKM_variance)); + test.p_value = cdf(norm, 0); + performed_test = true; + + test.differential = -numeric_limits::max(); + test.test_stat = -numeric_limits::max(); + test.value_1 = prev.FPKM; + test.value_2 = 0; + } + else + { + test.differential = -numeric_limits::max(); + test.test_stat = -numeric_limits::max(); + test.value_1 = prev.FPKM; + test.value_2 = 0; + test.p_value = 1; + performed_test = false; + } + } + else + { + assert (prev.FPKM == 0.0 && curr.FPKM == 0.0); + performed_test = false; + } + + } + + test.test_status = performed_test ? OK : NOTEST; + return performed_test; +} + +SampleDiffMetaDataTable meta_data_table; +#if ENABLE_THREADS +boost::mutex meta_data_lock; +#endif + +shared_ptr get_metadata(const string description) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(meta_data_lock); +#endif + pair p; + p = meta_data_table.insert(make_pair(description, new SampleDifferenceMetaData())); + return p.first->second; +} + +// This performs between-group tests on isoforms or TSS groupings in a single +// locus, on two different samples. +pair get_de_tests(const string& description, + const FPKMContext& prev_abundance, + const FPKMContext& curr_abundance, + SampleDiffs& de_tests, + bool enough_reads) +{ + int total_iso_de_tests = 0; + + SampleDifference test; + + pair inserted; +// inserted = de_tests.insert(make_pair(curr_abundance.description(), +// SampleDifference())); + inserted = de_tests.insert(make_pair(description, + SampleDifference())); + + const FPKMContext& r1 = curr_abundance; + const FPKMContext& r2 = prev_abundance; + + if (curr_abundance.status == NUMERIC_FAIL || + prev_abundance.status == NUMERIC_FAIL || + prev_abundance.status == NUMERIC_HI_DATA || + curr_abundance.status == NUMERIC_HI_DATA) + { + test_diffexp(r1, r2, test); + test.test_stat = 0; + test.p_value = 1.0; + test.differential = 0.0; + if (curr_abundance.status == NUMERIC_FAIL || + prev_abundance.status == NUMERIC_FAIL) + { + test.test_status = FAIL; + } + else if (prev_abundance.status == NUMERIC_HI_DATA || + curr_abundance.status == NUMERIC_HI_DATA) + { + test.test_status = HIDATA; + } + } + else if (curr_abundance.status == NUMERIC_LOW_DATA || + prev_abundance.status == NUMERIC_LOW_DATA) + { + // perform the test, but mark it as not significant and don't add it to the + // pile. This way we don't penalize for multiple testing, but users can still + // see the fold change. + test_diffexp(r1, r2, test); + test.test_stat = 0; + test.p_value = 1.0; + //test.differential = 0.0; + + test.test_status = LOWDATA; + } + else // at least one is OK, the other might be LOW_DATA + { + test.test_status = FAIL; + + if (test_diffexp(r1, r2, test)) + { + total_iso_de_tests++; + } + else + { + test.test_stat = 0; + test.p_value = 1.0; + test.differential = 0.0; + } + if (enough_reads) + test.test_status = OK; + else + test.test_status = NOTEST; + + } + + + inserted.first->second = test; + + return make_pair(total_iso_de_tests, inserted.first); +} + + +//bool generate_js_samples(const AbundanceGroup& prev_abundance, +// const AbundanceGroup& curr_abundance, +// size_t num_js_samples, +// vector& js_samples) +//{ +// ublas::vector prev_kappa_mean(curr_abundance.abundances().size()); +// for (size_t i = 0; i < prev_abundance.abundances().size(); ++i) +// { +// prev_kappa_mean(i) = prev_abundance.abundances()[i]->kappa(); +// } +// +// ublas::vector curr_kappa_mean(curr_abundance.abundances().size()); +// for (size_t i = 0; i < curr_abundance.abundances().size(); ++i) +// { +// curr_kappa_mean(i) = curr_abundance.abundances()[i]->kappa(); +// } +// +// ublas::matrix prev_kappa_cov = prev_abundance.kappa_cov(); +// double prev_ret = cholesky_factorize(prev_kappa_cov); +// if (prev_ret != 0) +// return false; +// +// ublas::matrix curr_kappa_cov = curr_abundance.kappa_cov(); +// double curr_ret = cholesky_factorize(curr_kappa_cov); +// if (curr_ret != 0) +// return false; +// +// vector > samples; +// +// multinormal_generator generator(prev_kappa_mean, prev_kappa_cov); +// //vector > prev_samples; +// generate_importance_samples(generator, samples, num_js_samples / 2, true); +// +// // It's a little silly that we have to do this, but since we always initialize +// // the random number generators to random_seed, instead of time(NULL), simply +// // creating a new generator (rather than re-using it) +// generator.set_parameters(curr_kappa_mean, curr_kappa_cov); +// +// //multinormal_generator curr_generator(curr_kappa_mean, curr_kappa_cov); +// //vector > curr_samples; +// generate_importance_samples(generator, samples, num_js_samples / 2, true); +// +// // We want to revise the covariance matrix from the samples, since we'll +// // need it later for the CIs. +// ublas::matrix null_kappa_cov; +// null_kappa_cov = ublas::zero_matrix(curr_kappa_cov.size1(), +// curr_kappa_cov.size2()); +// +// +// ublas::vector null_kappa_mean; +// null_kappa_mean = ublas::zero_vector(curr_kappa_cov.size1()); +// +// foreach(ublas::vector& sample, samples) +// { +// null_kappa_mean += sample; +// } +// null_kappa_mean /= samples.size(); +// +// for (size_t i = 0; i < null_kappa_cov.size1(); ++i) +// { +// for (size_t j = 0; j < null_kappa_cov.size2(); ++j) +// { +// for (size_t k = 0 ; k < samples.size(); ++k) +// { +// double c = (samples[k](i) - null_kappa_mean(i)) * (samples[k](j) - null_kappa_mean(j)); +// null_kappa_cov(i,j) += c; +// } +// } +// } +// +// null_kappa_cov /= samples.size(); +// +// static const double epsilon = 1e-6; +// +// null_kappa_cov += (epsilon * ublas::identity_matrix(null_kappa_cov.size1())); +// +// double null_ret = cholesky_factorize(null_kappa_cov); +// if (null_ret != 0) +// return false; +// +// generator.set_parameters(null_kappa_mean, null_kappa_cov); +// samples.clear(); +// generate_importance_samples(generator, samples, num_js_samples, true); +// +// cerr << "prev: " << endl; +// cerr << prev_kappa_mean << endl; +// for (unsigned i = 0; i < prev_kappa_cov.size1 (); ++ i) +// { +// ublas::matrix_row > mr (prev_kappa_cov, i); +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "curr: " << endl; +// cerr << curr_kappa_mean << endl; +// for (unsigned i = 0; i < curr_kappa_cov.size1 (); ++ i) +// { +// ublas::matrix_row > mr (curr_kappa_cov, i); +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "null: " << endl; +// cerr << null_kappa_mean << endl; +// for (unsigned i = 0; i < null_kappa_cov.size1 (); ++ i) +// { +// ublas::matrix_row > mr (null_kappa_cov, i); +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "======" << endl; +// +// js_samples.clear(); +// +// +// //size_t num_samples = std::min(prev_samples.size(), curr_samples.size()); +// size_t num_samples = num_js_samples; +// vector > sample_kappas(2); +// +// boost::uniform_int<> uniform_dist(0,samples.size()-1); +// boost::mt19937 rng; +// boost::variate_generator > uniform_gen(rng, uniform_dist); +// +// for (size_t i = 0; i < num_samples; ++i) +// { +// sample_kappas[0] = samples[uniform_gen()]; +// sample_kappas[1] = samples[uniform_gen()]; +// +// double js = jensen_shannon_distance(sample_kappas); +// cerr << sample_kappas[0] << " vs. " << sample_kappas[1] << " = " << js << endl; +// js_samples.push_back(js); +// } +// +// sort(js_samples.begin(), js_samples.end()); +// +//// for (size_t i = 0; i < 100; ++i) +//// { +//// fprintf(stderr, "%lg\n", js_samples[i]); +//// } +// return true; +//} + +bool generate_null_js_samples(const AbundanceGroup& null_abundance, + size_t num_js_samples, + multinormal_generator& generator, + vector& js_samples) +{ + ublas::vector null_kappa_mean(null_abundance.abundances().size()); + for (size_t i = 0; i < null_abundance.abundances().size(); ++i) + { + null_kappa_mean(i) = null_abundance.abundances()[i]->kappa(); + } + + ublas::matrix null_kappa_cov = null_abundance.kappa_cov(); + + double prev_ret = cholesky_factorize(null_kappa_cov); + if (prev_ret != 0) + return false; + +// cerr << endl << null_kappa_mean << endl; +// for (unsigned i = 0; i < null_kappa_cov.size1 (); ++ i) +// { +// ublas::matrix_row > mr (null_kappa_cov, i); +// std::cerr << i << " : " << mr << std::endl; +// } +// cerr << "======" << endl; + + vector > null_samples; + + // It's a little silly that we have to do this, but since we always initialize + // the random number generators to random_seed, instead of time(NULL), simply + // creating a new generator (rather than re-using it) + generator.set_parameters(null_kappa_mean, null_kappa_cov); + + generate_importance_samples(generator, null_samples, num_js_samples, false); + if (null_samples.size() == 0) + return false; + + js_samples.clear(); + + //size_t num_samples = std::min(prev_samples.size(), curr_samples.size()); + size_t num_samples = num_js_samples; + vector > sample_kappas(2); + + boost::uniform_int<> null_uniform_dist(0,null_samples.size()-1); + boost::mt19937 null_rng; + boost::variate_generator > null_uniform_gen(null_rng, null_uniform_dist); + + for (size_t i = 0; i < num_samples; ++i) + { + sample_kappas[0] = null_samples[null_uniform_gen()]; + sample_kappas[1] = null_samples[null_uniform_gen()]; + + double js = jensen_shannon_distance(sample_kappas); + //cerr << sample_kappas[0] << " vs. " << sample_kappas[1] << " = " << js << endl; + js_samples.push_back(js); + } + + sort(js_samples.begin(), js_samples.end()); + + // for (size_t i = 0; i < 100; ++i) + // { + // fprintf(stderr, "%lg\n", js_samples[i]); + // } + return true; +} + +// Calculates the probability that drawing two samples from the provided +// relative abundance distribution would have produced a value at least as +// extreme as the given js value. +bool one_sided_js_test(const AbundanceGroup& null_abundances, + size_t num_samples, + multinormal_generator& generator, + double js, + double& p_val) +{ + vector js_samples; + + + bool success = generate_null_js_samples(null_abundances, num_samples, generator, js_samples); + if (success == false) + return false; + + vector::iterator lb = lower_bound(js_samples.begin(), js_samples.end(), js); + if (lb != js_samples.end()) + { + size_t num_less_extreme_samples = lb - js_samples.begin(); + p_val = 1.0 - ((double)num_less_extreme_samples/js_samples.size()); + } + else if (num_samples) + { + p_val = 1.0/num_samples; + } + else + { + p_val = 1.0; + return false; + } + return true; +} + +bool test_js(const AbundanceGroup& prev_abundance, + const AbundanceGroup& curr_abundance, + double& js, + double& p_val) +{ + vector > sample_kappas; + ublas::vector curr_kappas(curr_abundance.abundances().size()); + for (size_t i = 0; i < curr_abundance.abundances().size(); ++i) + { + curr_kappas(i) = curr_abundance.abundances()[i]->kappa(); + } + + ublas::vector prev_kappas(prev_abundance.abundances().size()); + for (size_t i = 0; i < prev_abundance.abundances().size(); ++i) + { + prev_kappas(i) = prev_abundance.abundances()[i]->kappa(); + } + + sample_kappas.push_back(prev_kappas); + sample_kappas.push_back(curr_kappas); + + js = jensen_shannon_distance(sample_kappas); + + if (isinf(js) || isnan(js)) + return false; + + static const int num_samples = 100000; + +// bool success = generate_js_samples(prev_abundance, curr_abundance, num_samples, js_samples); +// if (success == false) +// return false; + + multinormal_generator gen(ublas::zero_vector(prev_abundance.kappa_cov().size1()), + ublas::zero_matrix(prev_abundance.kappa_cov().size1(), + prev_abundance.kappa_cov().size2())); + + double prev_p_val = 1.0; + bool prev_succ = one_sided_js_test(prev_abundance, num_samples, gen, js, prev_p_val); + + double curr_p_val = 1.0; + bool curr_succ = one_sided_js_test(curr_abundance, num_samples, gen, js, curr_p_val); + + if (!curr_succ || !prev_succ) + return false; + + p_val = (prev_p_val + curr_p_val)/2; +// double mean_js = accumulate(js_samples.begin(), js_samples.end(), 0.0); +// if (js_samples.size() == 0) +// return false; +// mean_js /= js_samples.size(); +// +// double var_js = 0.0; +// for (size_t i = 0; i < js_samples.size(); ++i) +// { +// double s = js_samples[i] - mean_js; +// s *= s; +// var_js += s; +// } +// var_js /= js_samples.size(); +// +// if (var_js > 0.0) +// { +// // We're dealing with a standard normal that's been truncated below zero +// // so pdf(js) is twice the standard normal, and cdf is 0.5 * (cdf of normal - 1) +// +// normal test_dist(0,1.0); +// //double denom = sqrt(js_var); +// double se_js = sqrt(var_js); +// double p = mean_js/se_js; +// p_val = 1.0 - ((cdf(test_dist, p) - 0.5) / 0.5); +// } +// else +// { +// return false; +// } + + return true; +} + +// This performs within-group tests on a set of isoforms or a set of TSS groups. +// This is a way of looking for meaningful differential splicing or differential +// promoter use. +void get_ds_tests(const AbundanceGroup& prev_abundance, + const AbundanceGroup& curr_abundance, + SampleDiffs& diff_tests, + bool enough_reads) +{ + const string& name = curr_abundance.description(); + + pair inserted; + inserted = diff_tests.insert(make_pair(name,SampleDifference())); + SampleDifference test; + + shared_ptr meta_data = get_metadata(name); + + meta_data->gene_ids = curr_abundance.gene_id(); + meta_data->gene_names = curr_abundance.gene_name(); + meta_data->protein_ids = curr_abundance.protein_id(); + meta_data->locus_desc = curr_abundance.locus_tag(); + meta_data->description = curr_abundance.description(); + + test.meta_data = meta_data; + + test.test_status = NOTEST; + + AbundanceStatus prev_status = curr_abundance.status(); + AbundanceStatus curr_status = prev_abundance.status(); + + vector to_keep(curr_abundance.abundances().size(), false); + //vector to_keep(curr_abundance.abundances().size(), false); + + for (size_t k = 0; k < prev_abundance.abundances().size(); ++k) + { + // assert (false); + + bool prev_enough_reads = false; + // do both curr and prev so that groups have the same number of slices. + if (prev_abundance.abundances()[k]->num_fragments() && prev_abundance.abundances()[k]->effective_length()) + { + double frags_per_kb = prev_abundance.abundances()[k]->num_fragments() / (prev_abundance.abundances()[k]->effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + prev_enough_reads = true; + } + + bool curr_enough_reads = false; + if (curr_abundance.abundances()[k]->num_fragments() && curr_abundance.abundances()[k]->effective_length()) + { + double frags_per_kb = curr_abundance.abundances()[k]->num_fragments() / (curr_abundance.abundances()[k]->effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + curr_enough_reads = true; + } + + if (curr_enough_reads || prev_enough_reads) + to_keep[k] = true; + } + + AbundanceGroup filtered_prev; + prev_abundance.filter_group(to_keep, filtered_prev); + + AbundanceGroup filtered_curr; + curr_abundance.filter_group(to_keep, filtered_curr); + + if (filtered_prev.abundances().size() > 1 && + /*filtered_prev.has_member_with_status(NUMERIC_LOW_DATA) == false && + filtered_curr.has_member_with_status(NUMERIC_LOW_DATA) == false &&*/ + prev_status == NUMERIC_OK && filtered_prev.num_fragments() > 0 && + curr_status == NUMERIC_OK && filtered_curr.num_fragments() > 0) + { + vector > sample_kappas; + ublas::vector curr_kappas(filtered_curr.abundances().size()); + for (size_t i = 0; i < filtered_curr.abundances().size(); ++i) + { + curr_kappas(i) = filtered_curr.abundances()[i]->kappa(); + } + + ublas::vector prev_kappas(filtered_prev.abundances().size()); + for (size_t i = 0; i < filtered_prev.abundances().size(); ++i) + { + prev_kappas(i) = filtered_prev.abundances()[i]->kappa(); + } + + sample_kappas.push_back(prev_kappas); + sample_kappas.push_back(curr_kappas); + + double js = 0.0; + double p_val = 1.0; + + bool success = test_js(filtered_prev, filtered_curr, js, p_val); + if (js == 0.0 || success == false) + { + test.test_stat = 0; + test.p_value = 1.0; + test.value_1 = 0; + test.value_2 = 0; + test.differential = 0; + test.test_status = NOTEST; + } + else + { + test.test_stat = 0; + test.p_value = p_val; + test.value_1 = 0; + test.value_2 = 0; + test.differential = js; + test.test_status = enough_reads ? OK : NOTEST; + + /////////////////// +#if 1 + ublas::vector js_gradient; + jensen_shannon_gradient(sample_kappas, js, js_gradient); + + vector > covariances; + + covariances.push_back(filtered_prev.kappa_cov()); + covariances.push_back(filtered_curr.kappa_cov()); + + ublas::matrix js_covariance; + assert (covariances.size() > 0); + for (size_t i = 0; i < covariances.size(); ++i) + { + assert (covariances[i].size1() > 0 && covariances[i].size2() > 0); + } + make_js_covariance_matrix(covariances,js_covariance); + assert (js_covariance.size1() > 0 && js_covariance.size2() > 0); + + double js_var = inner_prod(js_gradient, + prod(js_covariance, js_gradient)); + assert (!isinf(js_var) && !isnan(js_var)); + + if (js_var > 0.0) + { + // We're dealing with a standard normal that's been truncated below zero + // so pdf(js) is twice the standard normal, and cdf is 0.5 * (cdf of normal - 1) + + normal test_dist(0,1.0); + //double denom = sqrt(js_var); + double p = js/sqrt(js_var); + //test.test_stat = 2 * pdf(test_dist, p); + // analytic p_value: + test.test_stat = 1.0 - ((cdf(test_dist, p) - 0.5) / 0.5); + } + +#endif + /////////////////// + + + +// ublas::vector js_gradient; +// jensen_shannon_gradient(sample_kappas, js, js_gradient); +// +// vector > covariances; +// +// covariances.push_back(filtered_prev.kappa_cov()); +// covariances.push_back(filtered_curr.kappa_cov()); +// +// ublas::matrix js_covariance; +// assert (covariances.size() > 0); +// for (size_t i = 0; i < covariances.size(); ++i) +// { +// assert (covariances[i].size1() > 0 && covariances[i].size2() > 0); +// } +// make_js_covariance_matrix(covariances,js_covariance); +// assert (js_covariance.size1() > 0 && js_covariance.size2() > 0); +// +// double js_var = inner_prod(js_gradient, +// prod(js_covariance, js_gradient)); +// assert (!isinf(js_var) && !isnan(js_var)); +// +//#ifdef DEBUG +// if (isinf(js_var) || isnan(js_var)) +// { +// cerr << "grad: " << js_gradient << endl; +// cerr << "js_cov: " << js_covariance << endl; +// cerr << prod(js_covariance, js_gradient) << endl; +// } +//#endif +// if (js_var <= 0.0) +// { +// +// test.test_stat = 0; +// test.p_value = 1.0; +// test.value_1 = 0; +// test.value_2 = 0; +// test.differential = 0; +// test.test_status = NOTEST; +// } +// else +// { +// // We're dealing with a standard normal that's been truncated below zero +// // so pdf(js) is twice the standard normal, and cdf is 0.5 * (cdf of normal - 1) +// +// normal test_dist(0,1.0); +// //double denom = sqrt(js_var); +// double p = js/sqrt(js_var); +// test.test_stat = 2 * pdf(test_dist, p); +// test.p_value = 1.0 - ((cdf(test_dist, p) - 0.5) / 0.5); +// test.value_1 = 0; +// test.value_2 = 0; +// test.differential = js; +// test.test_status = enough_reads ? OK : NOTEST; +// } +// if (isinf(test.test_stat) || isnan(test.test_stat)) +// { +// fprintf(stderr, "Warning: test stat is invalid!\n"); +// exit(1); +// } + } + + inserted.first->second = test; + } + else // we won't even bother with the JS-based testing in LOWDATA cases. + { + if (prev_status == NUMERIC_OK && curr_status == NUMERIC_OK && + filtered_prev.has_member_with_status(NUMERIC_LOW_DATA) == false && + filtered_curr.has_member_with_status(NUMERIC_LOW_DATA) == false) + test.test_status = NOTEST; + else if (prev_status == NUMERIC_FAIL || curr_status == NUMERIC_FAIL) + test.test_status = FAIL; + else + test.test_status = LOWDATA; + + test.test_stat = 0; + test.p_value = 0.0; + test.differential = 0.0; + inserted.first->second = test; + } +} + +string make_ref_tag(const string& ref, char classcode) +{ + char tag_buf[1024]; + + sprintf(tag_buf, + "%s(%c)", + ref.c_str(), + classcode); + + return string(tag_buf); +} +void add_to_tracking_table(size_t sample_index, + Abundance& ab, + FPKMTrackingTable& track) + +{ + pair inserted; + pair p; + p = make_pair(ab.description(), FPKMTracking()); + inserted = track.insert(p); + + FPKMTracking& fpkm_track = inserted.first->second; + + set tss = ab.tss_id(); + set gene_ids = ab.gene_id(); + set genes = ab.gene_name(); + set proteins = ab.protein_id(); + + fpkm_track.tss_ids.insert(tss.begin(), tss.end()); + fpkm_track.gene_ids.insert(gene_ids.begin(), gene_ids.end()); + fpkm_track.gene_names.insert(genes.begin(), genes.end()); + fpkm_track.protein_ids.insert(proteins.begin(), proteins.end()); + + if (inserted.second) + { + fpkm_track.locus_tag = ab.locus_tag(); + fpkm_track.description = ab.description(); + shared_ptr transfrag = ab.transfrag(); + if (transfrag && transfrag->nearest_ref_id() != "") + { + fpkm_track.classcode = transfrag->nearest_ref_classcode(); + fpkm_track.ref_match = transfrag->nearest_ref_id(); + } + else + { + fpkm_track.classcode = 0; + fpkm_track.ref_match = "-"; + } + if (transfrag) + { + fpkm_track.length = transfrag->length(); + } + else + { + fpkm_track.length = 0; + } + } + + FPKMContext r1 = FPKMContext(ab.num_fragments(), + ab.FPKM(), + ab.FPKM_variance(), + ab.status()); + + + + vector& fpkms = inserted.first->second.fpkm_series; + if (sample_index < fpkms.size()) + { + // if the fpkm series already has an entry matching this description + // for this sample index, then we are dealing with a group of transcripts + // that occupies multiple (genomically disjoint) bundles. We need + // to add this bundle's contribution to the FPKM, fragments, and variance + // to whatever's already there. + + // NOTE: we can simply sum the FKPM_variances, because we are currently + // assuming that transcripts in disjoint bundles share no alignments and + // thus have FPKM covariance == 0; This assumption will no longer be + // true if we decide to do multireads the right way. + + FPKMContext& existing = fpkms[sample_index]; + existing.FPKM += r1.FPKM; + existing.counts += r1.counts; + existing.FPKM_variance += r1.FPKM_variance; + if (existing.status == NUMERIC_FAIL || r1.status == NUMERIC_FAIL) + { + existing.status = NUMERIC_FAIL; + } + else + { + existing.status = NUMERIC_OK; + } + + } + else + { + fpkms.push_back(r1); + } +} + +string bundle_locus_tag(const RefSequenceTable& rt, + const HitBundle& bundle) +{ + char locus_buf[1024]; + RefID bundle_chr_id = bundle.ref_id(); + assert (bundle_chr_id != 0); + const char* chr_name = rt.get_name(bundle_chr_id); + + sprintf(locus_buf, + "%s:%d-%d", + chr_name, + bundle.left(), + bundle.right()); + + return string(locus_buf); +} + +#if ENABLE_THREADS +mutex test_storage_lock; // don't modify the above struct without locking here +#endif + +void sample_abundance_worker(const string& locus_tag, + SampleAbundances& sample, + HitBundle* sample_bundle, + bool perform_cds_analysis, + bool perform_tss_analysis) +{ + vector > abundances; + + foreach(shared_ptr s, sample_bundle->ref_scaffolds()) + { + TranscriptAbundance* pT = new TranscriptAbundance; + pT->transfrag(s); + shared_ptr ab(pT); + ab->description(s->annotated_trans_id()); + ab->locus_tag(locus_tag); + abundances.push_back(ab); + } + + sample.transcripts = AbundanceGroup(abundances); + + vector hits_in_cluster; + + if (sample_bundle->hits().size() < max_frags_per_bundle) + { + get_alignments_from_scaffolds(sample.transcripts.abundances(), + hits_in_cluster); + + // Compute the individual transcript FPKMs via each sample's + // AbundanceGroup for this locus. + + sample.transcripts.calculate_abundance(hits_in_cluster); + } + else + { + foreach(shared_ptr ab, abundances) + { + ab->status(NUMERIC_HI_DATA); + } + } + + // Cluster transcripts by gene_id + vector transcripts_by_gene_id; + cluster_transcripts(sample.transcripts, + transcripts_by_gene_id); + + foreach(AbundanceGroup& ab_group, transcripts_by_gene_id) + { + ab_group.locus_tag(locus_tag); + set gene_ids = ab_group.gene_id(); + assert (gene_ids.size() == 1); + ab_group.description(*(gene_ids.begin())); + } + + sample.genes = transcripts_by_gene_id; + + if (perform_cds_analysis) + { + // Cluster transcripts by CDS + vector transcripts_by_cds; + ublas::matrix cds_gamma_cov; + ublas::matrix cds_gamma_boot_cov; + ublas::matrix cds_count_cov; + ublas::matrix cds_iterated_exp_count_cov; + ublas::matrix cds_fpkm_cov; + cluster_transcripts(sample.transcripts, + transcripts_by_cds, + &cds_gamma_cov, + &cds_iterated_exp_count_cov, + &cds_count_cov, + &cds_fpkm_cov, + &cds_gamma_boot_cov); + foreach(AbundanceGroup& ab_group, transcripts_by_cds) + { + ab_group.locus_tag(locus_tag); + set protein_ids = ab_group.protein_id(); + assert (protein_ids.size() == 1); + string desc = *(protein_ids.begin()); + assert (desc != ""); + ab_group.description(*(protein_ids.begin())); + } + + sample.cds = transcripts_by_cds; + + // Group the CDS clusters by gene + vector > cds_abundances; + double max_cds_mass_variance = 0.0; + set > rg_props; + foreach (AbundanceGroup& ab_group, sample.cds) + { + cds_abundances.push_back(shared_ptr(new AbundanceGroup(ab_group))); + max_cds_mass_variance = max(ab_group.max_mass_variance(), max_cds_mass_variance); + rg_props.insert(ab_group.rg_props().begin(), ab_group.rg_props().end()); + } + AbundanceGroup cds(cds_abundances, + cds_gamma_cov, + cds_gamma_boot_cov, + cds_iterated_exp_count_cov, + cds_count_cov, + cds_fpkm_cov, + max_cds_mass_variance, + rg_props); + + vector cds_by_gene; + + cluster_transcripts(cds, + cds_by_gene); + + foreach(AbundanceGroup& ab_group, cds_by_gene) + { + ab_group.locus_tag(locus_tag); + set gene_ids = ab_group.gene_id(); + assert (gene_ids.size() == 1); + ab_group.description(*(gene_ids.begin())); + } + + sample.gene_cds = cds_by_gene; + } + + if (perform_tss_analysis) + { + // Cluster transcripts by start site (TSS) + vector transcripts_by_tss; + + ublas::matrix tss_gamma_cov; + ublas::matrix tss_gamma_boot_cov; + ublas::matrix tss_count_cov; + ublas::matrix tss_iterated_exp_count_cov; + ublas::matrix tss_fpkm_cov; + cluster_transcripts(sample.transcripts, + transcripts_by_tss, + &tss_gamma_cov, + &tss_iterated_exp_count_cov, + &tss_count_cov, + &tss_fpkm_cov, + &tss_gamma_boot_cov); + + + foreach(AbundanceGroup& ab_group, transcripts_by_tss) + { + ab_group.locus_tag(locus_tag); + set tss_ids = ab_group.tss_id(); + assert (tss_ids.size() == 1); + string desc = *(tss_ids.begin()); + assert (desc != ""); + ab_group.description(*(tss_ids.begin())); + + } + + sample.primary_transcripts = transcripts_by_tss; + double max_tss_mass_variance = 0.0; + + // Group TSS clusters by gene + vector > primary_transcript_abundances; + set > rg_props; + foreach (AbundanceGroup& ab_group, sample.primary_transcripts) + { + primary_transcript_abundances.push_back(shared_ptr(new AbundanceGroup(ab_group))); + max_tss_mass_variance = max(max_tss_mass_variance, ab_group.max_mass_variance()); + rg_props.insert(ab_group.rg_props().begin(), ab_group.rg_props().end()); + } + + AbundanceGroup primary_transcripts(primary_transcript_abundances, + tss_gamma_cov, + tss_gamma_boot_cov, + tss_iterated_exp_count_cov, + tss_count_cov, + tss_fpkm_cov, + max_tss_mass_variance, + rg_props); + + vector primary_transcripts_by_gene; + + cluster_transcripts(primary_transcripts, + primary_transcripts_by_gene); + + foreach(AbundanceGroup& ab_group, primary_transcripts_by_gene) + { + ab_group.locus_tag(locus_tag); + set gene_ids = ab_group.gene_id(); +// if (gene_ids.size() > 1) +// { +// foreach (string st, gene_ids) +// { +// fprintf(stderr, "%s\n", st.c_str()); +// } +// ab_group.gene_id(); +// } + assert (gene_ids.size() == 1); + ab_group.description(*(gene_ids.begin())); + } + + sample.gene_primary_transcripts = primary_transcripts_by_gene; + } +} + +struct LocusVarianceInfo +{ + int factory_id; + double mean_count; + double count_empir_var; + double locus_count_fitted_var; + double isoform_fitted_var_sum; + double cross_replicate_js; + int num_transcripts; + double bayes_gamma_trace; + double empir_gamma_trace; + vector gamma; + vector gamma_var; + vector gamma_bootstrap_var; + vector transcript_ids; + vector count_sharing; + double locus_salient_frags; + double locus_total_frags; + +}; + +#if ENABLE_THREADS +mutex variance_info_lock; // don't modify the above struct without locking here +#endif + +vector locus_variance_info_table; + + +void sample_worker(const RefSequenceTable& rt, + ReplicatedBundleFactory& sample_factory, + shared_ptr abundance, + size_t factory_id, + shared_ptr launcher) +{ +#if ENABLE_THREADS + boost::this_thread::at_thread_exit(decr_pool_count); +#endif + + HitBundle bundle; + bool non_empty = sample_factory.next_bundle(bundle); + + if (!non_empty || (!corr_multi && !final_est_run && bundle.ref_scaffolds().size() != 1)) // Only learn on single isoforms + { +#if !ENABLE_THREADS + // If Cuffdiff was built without threads, we need to manually invoke + // the testing functor, which will check to see if all the workers + // are done, and if so, perform the cross sample testing. + launcher->abundance_avail(locus_tag, abundance, factory_id); + launcher->test_finished_loci(); + //launcher(); +#endif + return; + } + + abundance->cluster_mass = bundle.mass(); + + char bundle_label_buf[2048]; + sprintf(bundle_label_buf, + "%s:%d-%d", + rt.get_name(bundle.ref_id()), + bundle.left(), + bundle.right()); + string locus_tag = bundle_label_buf; + + launcher->register_locus(locus_tag); + + abundance->locus_tag = locus_tag; + + bool perform_cds_analysis = final_est_run; + bool perform_tss_analysis = final_est_run; + + foreach(shared_ptr s, bundle.ref_scaffolds()) + { + if (s->annotated_tss_id() == "") + { + perform_tss_analysis = false; + } + if (s->annotated_protein_id() == "") + { + perform_cds_analysis = false; + } + } + + sample_abundance_worker(boost::cref(locus_tag), + boost::ref(*abundance), + &bundle, + perform_cds_analysis, + perform_tss_analysis); + +#if ENABLE_THREADS + variance_info_lock.lock(); +#endif + + /////////////////////////////////////////////// + shared_ptr disperser = sample_factory.mass_dispersion_model(); + pair locus_mv = disperser->get_raw_mean_and_var(locus_tag); + if (locus_mv.first != 0 && locus_mv.second != 0) + { + LocusVarianceInfo info; + info.factory_id = factory_id; + info.mean_count = locus_mv.first; + info.count_empir_var = locus_mv.second; + info.locus_count_fitted_var = disperser->scale_mass_variance(info.mean_count); + + double total_iso_scaled_var = 0.0; + + const AbundanceGroup& ab_group = abundance->transcripts; + info.locus_total_frags = ab_group.total_frags(); + info.locus_salient_frags = ab_group.salient_frags(); + //double group_counts = ab_group.total_frags(); + ublas::matrix cov = ab_group.iterated_count_cov(); + if (ab_group.num_fragments()) + cov /= ab_group.num_fragments(); + + double total_length = 0.0; + for (unsigned i = 0; i < ab_group.abundances().size(); ++ i) + { + total_length += ab_group.abundances()[i]->effective_length(); + } + +// if (total_length) +// { +// for (unsigned i = 0; i < ab_group.abundances().size(); ++ i) +// { +// fprintf(stderr, +// "%lg, %lg, %lg\n", +// _abundances[i]->gamma(), +// _abundances[i]->effective_length()/total_length, +// log2(_abundances[i]->gamma()/(_abundances[i]->effective_length()/total_length))); +// } +// } + + for (size_t i = 0; i < ab_group.abundances().size(); ++i) + { + +// double count_var = cov(i,i); +// double max_count_covar = 0.0; +// size_t max_covar_idx = 0.0; +// for (size_t j = 0; j < cov.size1(); ++j) +// { +// if (j != i && abs(cov(i,j)) > max_count_covar) +// { +// max_count_covar = abs(cov(i,j)); +// max_covar_idx = j; +// } +// } + double count_sharing = 0.0; +// if (cov(i,i) != 0 && cov(max_covar_idx,max_covar_idx) != 0) +// count_sharing = -1.0 * cov(i,max_covar_idx) / sqrt(cov(i,i) * cov(max_covar_idx,max_covar_idx)); + + + if (total_length) + count_sharing = log2(ab_group.abundances()[i]->gamma()/(ab_group.abundances()[i]->effective_length()/total_length)); + + shared_ptr ab = ab_group.abundances()[i]; + double scaled_var = disperser->scale_mass_variance(ab->num_fragments()); + total_iso_scaled_var += scaled_var; + info.gamma.push_back(ab->gamma()); + info.gamma_var.push_back(ab_group.gamma_cov()(i,i)); + info.gamma_bootstrap_var.push_back(ab_group.gamma_bootstrap_cov()(i,i)); + info.count_sharing.push_back(count_sharing); + info.transcript_ids.push_back(ab->description()); + } + + const ublas::matrix& bs_gamma_cov = ab_group.gamma_bootstrap_cov(); + const ublas::matrix& gamma_cov = ab_group.gamma_cov(); + info.bayes_gamma_trace = 0; + info.empir_gamma_trace = 0; + for (size_t i = 0; i < ab_group.abundances().size(); ++i) + { + //for (size_t j = 0; j < ab_group.abundances().size(); ++j) + { + info.bayes_gamma_trace += gamma_cov(i,i); + info.empir_gamma_trace += bs_gamma_cov(i,i); + } + } + + + info.cross_replicate_js = 30; + //assert (abundance->cluster_mass == locus_mv.first); + //assert (total_iso_scaled_var >= info.mean_count); + + info.isoform_fitted_var_sum = total_iso_scaled_var; + info.num_transcripts = ab_group.abundances().size(); +// info.bayes_gamma_trace = 0; +// info.empir_gamma_trace = 0; + locus_variance_info_table.push_back(info); + } + +#if ENABLE_THREADS + variance_info_lock.unlock(); +#endif + + /////////////////////////////////////////////// + + + foreach(shared_ptr ref_scaff, bundle.ref_scaffolds()) + { + ref_scaff->clear_hits(); + } + + launcher->abundance_avail(locus_tag, abundance, factory_id); + launcher->test_finished_loci(); + +#if !ENABLE_THREADS + // If Cuffdiff was built without threads, we need to manually invoke + // the testing functor, which will check to see if all the workers + // are done, and if so, perform the cross sample testing. + //launcher->test_finished_loci(); +#endif +} + +void dump_locus_variance_info(const string& filename) +{ +#if ENABLE_THREADS + variance_info_lock.lock(); +#endif + + FILE* fdump = fopen(filename.c_str(), "w"); + + fprintf(fdump, + "condition\tdescription\tlocus_counts\tempir_var\tlocus_fit_var\tsum_iso_fit_var\tcross_replicate_js\tnum_transcripts\tbayes_gamma_trace\tempir_gamma_trace\tcount_mean\tgamma_var\tgamma_bootstrap_var\tlocus_salient_frags\tlocus_total_frags\tcount_sharing\n"); + foreach (LocusVarianceInfo& L, locus_variance_info_table) + { + for (size_t i = 0; i < L.gamma.size(); ++i) + { + fprintf(fdump, "%d\t%s\t%lg\t%lg\t%lg\t%lg\t%lg\t%d\t%lg\t%lg\t%lg\t%lg\t%lg\t%lg\t%lg\t%lg\n", L.factory_id, L.transcript_ids[i].c_str(), L.mean_count, L.count_empir_var, L.locus_count_fitted_var, L.isoform_fitted_var_sum, L.cross_replicate_js, L.num_transcripts, L.bayes_gamma_trace, L.empir_gamma_trace,L.gamma[i],L.gamma_var[i],L.gamma_bootstrap_var[i], L.locus_salient_frags, L.locus_total_frags, L.count_sharing[i]); + } + + } + +#if ENABLE_THREADS + variance_info_lock.unlock(); +#endif +} + +int total_tests = 0; +void test_differential(const string& locus_tag, + const vector >& samples, + Tests& tests, + Tracking& tracking, + bool samples_are_time_series) +{ + if (samples.empty()) + return; + +#if ENABLE_THREADS + test_storage_lock.lock(); + total_tests++; +#endif + + //fprintf(stderr, "\nTesting in %s (%d total tests)\n", locus_tag.c_str(), total_tests); + + // Add all the transcripts, CDS groups, TSS groups, and genes to their + // respective FPKM tracking table. Whether this is a time series or an + // all pairs comparison, we should be calculating and reporting FPKMs for + // all objects in all samples + for (size_t i = 0; i < samples.size(); ++i) + { + const AbundanceGroup& ab_group = samples[i]->transcripts; + foreach (shared_ptr ab, ab_group.abundances()) + { + add_to_tracking_table(i, *ab, tracking.isoform_fpkm_tracking); + } + + foreach (AbundanceGroup& ab, samples[i]->cds) + { + add_to_tracking_table(i, ab, tracking.cds_fpkm_tracking); + } + + foreach (AbundanceGroup& ab, samples[i]->primary_transcripts) + { + add_to_tracking_table(i, ab, tracking.tss_group_fpkm_tracking); + } + + foreach (AbundanceGroup& ab, samples[i]->genes) + { + add_to_tracking_table(i, ab, tracking.gene_fpkm_tracking); + } + } + + // Perform pairwise significance testing between samples. If this is a + // time series, only test between successive pairs of samples, as supplied + // by the user. + for (size_t i = 1; i < samples.size(); ++i) + { + //bool multi_transcript_locus = samples[i]->transcripts.abundances().size() > 1; + + int sample_to_start_test_against = 0; + if (samples_are_time_series) + sample_to_start_test_against = i - 1; + + for (size_t j = sample_to_start_test_against; j < i; ++j) + { +// bool enough_reads = (samples[i]->cluster_mass >= min_read_count || +// samples[j]->cluster_mass >= min_read_count); + assert (samples[i]->transcripts.abundances().size() == + samples[j]->transcripts.abundances().size()); + for (size_t k = 0; k < samples[i]->transcripts.abundances().size(); ++k) + { + const Abundance& curr_abundance = *(samples[j]->transcripts.abundances()[k]); + const Abundance& prev_abundance = *(samples[i]->transcripts.abundances()[k]); + const string& desc = curr_abundance.description(); + FPKMTrackingTable::iterator itr = tracking.isoform_fpkm_tracking.find(desc); + assert (itr != tracking.isoform_fpkm_tracking.end()); + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + pair result; + result = get_de_tests(desc, + itr->second.fpkm_series[j], + itr->second.fpkm_series[i], + tests.isoform_de_tests[i][j], + enough_reads); + + shared_ptr meta_data = get_metadata(desc); + + meta_data->gene_ids = curr_abundance.gene_id(); + meta_data->gene_names = curr_abundance.gene_name(); + meta_data->protein_ids = curr_abundance.protein_id(); + meta_data->locus_desc = curr_abundance.locus_tag(); + meta_data->description = curr_abundance.description(); + result.second->second.meta_data = meta_data; + } + + for (size_t k = 0; k < samples[i]->cds.size(); ++k) + { + const Abundance& curr_abundance = samples[j]->cds[k]; + const Abundance& prev_abundance = samples[i]->cds[k]; + + const string& desc = curr_abundance.description(); + FPKMTrackingTable::iterator itr = tracking.cds_fpkm_tracking.find(desc); + assert (itr != tracking.cds_fpkm_tracking.end()); + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + pair result; + result = get_de_tests(desc, + itr->second.fpkm_series[j], + itr->second.fpkm_series[i], + tests.cds_de_tests[i][j], + enough_reads); + + shared_ptr meta_data = get_metadata(desc); + + meta_data->gene_ids = curr_abundance.gene_id(); + meta_data->gene_names = curr_abundance.gene_name(); + meta_data->protein_ids = curr_abundance.protein_id(); + meta_data->locus_desc = curr_abundance.locus_tag(); + meta_data->description = curr_abundance.description(); + result.second->second.meta_data = meta_data; + } + + for (size_t k = 0; k < samples[i]->primary_transcripts.size(); ++k) + { + const Abundance& curr_abundance = samples[j]->primary_transcripts[k]; + const Abundance& prev_abundance = samples[i]->primary_transcripts[k]; + + const string& desc = curr_abundance.description(); + FPKMTrackingTable::iterator itr = tracking.tss_group_fpkm_tracking.find(desc); + assert (itr != tracking.tss_group_fpkm_tracking.end()); + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + pair result; + result = get_de_tests(desc, + itr->second.fpkm_series[j], + itr->second.fpkm_series[i], + tests.tss_group_de_tests[i][j], + enough_reads); + + shared_ptr meta_data = get_metadata(desc); + + meta_data->gene_ids = curr_abundance.gene_id(); + meta_data->gene_names = curr_abundance.gene_name(); + meta_data->protein_ids = curr_abundance.protein_id(); + meta_data->locus_desc = curr_abundance.locus_tag(); + meta_data->description = curr_abundance.description(); + result.second->second.meta_data = meta_data; + } + + for (size_t k = 0; k < samples[i]->genes.size(); ++k) + { + const Abundance& curr_abundance = samples[j]->genes[k]; + const Abundance& prev_abundance = samples[i]->genes[k]; + const string& desc = curr_abundance.description(); + FPKMTrackingTable::iterator itr = tracking.gene_fpkm_tracking.find(desc); + assert (itr != tracking.gene_fpkm_tracking.end()); + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + pair result; + result = get_de_tests(desc, + itr->second.fpkm_series[j], + itr->second.fpkm_series[i], + tests.gene_de_tests[i][j], + enough_reads); + + shared_ptr meta_data = get_metadata(desc); + + meta_data->gene_ids = curr_abundance.gene_id(); + meta_data->gene_names = curr_abundance.gene_name(); + meta_data->protein_ids = curr_abundance.protein_id(); + meta_data->locus_desc = curr_abundance.locus_tag(); + meta_data->description = curr_abundance.description(); + result.second->second.meta_data = meta_data; + } + + // FIXME: the code below will not properly test for differential + // splicing/promoter use when a gene (e.g.) occupies two + // disjoint bundles. We need to store the covariance matrices (etc) + // in the FPKMContexts to handle that case properly. + + // Differential promoter use + for (size_t k = 0; k < samples[i]->gene_primary_transcripts.size(); ++k) + { + const Abundance& curr_abundance = samples[j]->gene_primary_transcripts[k]; + const Abundance& prev_abundance = samples[j]->gene_primary_transcripts[k]; + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + get_ds_tests(samples[j]->gene_primary_transcripts[k], + samples[i]->gene_primary_transcripts[k], + tests.diff_promoter_tests[i][j], + enough_reads); + } + + // Differential coding sequence output + for (size_t k = 0; k < samples[i]->gene_cds.size(); ++k) + { + const Abundance& curr_abundance = samples[j]->gene_cds[k]; + const Abundance& prev_abundance = samples[j]->gene_cds[k]; + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + get_ds_tests(samples[j]->gene_cds[k], + samples[i]->gene_cds[k], + tests.diff_cds_tests[i][j], + enough_reads); + } + + // Differential splicing of primary transcripts + for (size_t k = 0; k < samples[i]->primary_transcripts.size(); ++k) + { + + const Abundance& curr_abundance = samples[j]->primary_transcripts[k]; + const Abundance& prev_abundance = samples[j]->primary_transcripts[k]; + + bool enough_reads = false; + if (curr_abundance.num_fragments() && curr_abundance.effective_length()) + { + double frags_per_kb = curr_abundance.num_fragments() / (curr_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + if (prev_abundance.num_fragments() && prev_abundance.effective_length()) + { + double frags_per_kb = prev_abundance.num_fragments() / (prev_abundance.effective_length() / 1000.0); + if (frags_per_kb >= min_read_count) + enough_reads = true; + } + + get_ds_tests(samples[j]->primary_transcripts[k], + samples[i]->primary_transcripts[k], + tests.diff_splicing_tests[i][j], + enough_reads); + } + } + } + +#if ENABLE_THREADS + test_storage_lock.unlock(); +#endif +} diff --git a/src/differential.h b/src/differential.h new file mode 100644 index 0000000..9b2b246 --- /dev/null +++ b/src/differential.h @@ -0,0 +1,238 @@ +#ifndef DIFFERENTIAL_H +#define DIFFERENTIAL_H +/* + * differential.h + * cufflinks + * + * Created by Cole Trapnell on 3/15/10. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "abundances.h" +#include "jensen_shannon.h" +#include "replicates.h" + +using namespace std; + +enum TestStatus { + NOTEST, // successful calculation, test not performed + LOWDATA, // unsuccessful calculation due to low data, test not performed + HIDATA, // skipped calculation due to too many reads data, test not performed + OK, // successful numerical calc, test performed + FAIL // numerical exception, test not performed +}; + +struct SampleDifferenceMetaData +{ + string locus_desc; + set gene_ids; + set gene_names; + set protein_ids; + string description; // isoforms or tss groups (e.g.) involved in this test +}; + +// Stores the differential expression of an isoform or set of isoforms in two +// different samples, along with a significance test statistic for the difference. +struct SampleDifference +{ + SampleDifference() : + sample_1(-1), + sample_2(-1), + value_1(0.0), + value_2(0.0), + test_stat(0.0), + p_value(1.0), + corrected_p(1.0), + tested_group_id(-1), + test_status(NOTEST), + significant(false){} + + size_t sample_1; + size_t sample_2; + + double value_1; + double value_2; + double differential; + double test_stat; + double p_value; + double corrected_p; + + size_t tested_group_id; // which scaffolds' FPKMs contribute + + shared_ptr meta_data; + + TestStatus test_status; + bool significant; +}; + +typedef map SampleDiffs; +typedef map > SampleDiffMetaDataTable; + +struct Outfiles +{ + FILE* isoform_de_outfile; + FILE* group_de_outfile; + FILE* gene_de_outfile; + FILE* cds_de_outfile; + + FILE* diff_splicing_outfile; + FILE* diff_promoter_outfile; + FILE* diff_cds_outfile; + + FILE* isoform_fpkm_tracking_out; + FILE* tss_group_fpkm_tracking_out; + FILE* gene_fpkm_tracking_out; + FILE* cds_fpkm_tracking_out; +}; + +struct Tests +{ + vector > isoform_de_tests; + vector > tss_group_de_tests; + vector > gene_de_tests; + vector > cds_de_tests; + + vector > diff_splicing_tests; // to be performed on the isoforms of a single tss group + vector > diff_promoter_tests; // to be performed on the tss groups of a single gene + vector > diff_cds_tests; // to be performed on the cds groups of a single gene +}; + +struct FPKMContext +{ + FPKMContext(double c, double r, double v, AbundanceStatus s) + : counts(c), FPKM(r), FPKM_variance(v), status(s) {} + double counts; + double FPKM; + double FPKM_variance; + AbundanceStatus status; +}; + +struct FPKMTracking +{ + string locus_tag; + char classcode; + set tss_ids; // for individual isoforms only + set gene_ids; + set gene_names; + set protein_ids; + string description; // isoforms or tss groups (e.g.) involved in this test + string ref_match; + int length; + + TestStatus test_status; + + vector fpkm_series; +}; + +typedef map FPKMTrackingTable; + +struct Tracking +{ + FPKMTrackingTable isoform_fpkm_tracking; + FPKMTrackingTable tss_group_fpkm_tracking; + FPKMTrackingTable gene_fpkm_tracking; + FPKMTrackingTable cds_fpkm_tracking; +}; + +struct SampleAbundances +{ + string locus_tag; + AbundanceGroup transcripts; + vector primary_transcripts; + vector gene_primary_transcripts; + vector cds; + vector gene_cds; + vector genes; + double cluster_mass; +}; + +#if ENABLE_THREADS + extern boost::mutex _launcher_lock; +#endif + +struct TestLauncher +{ +private: + TestLauncher(TestLauncher& rhs) {} + +public: + TestLauncher(int num_samples, + Tests* tests, + Tracking* tracking, + bool ts, + ProgressBar* p_bar) + : + _orig_workers(num_samples), + _tests(tests), + _tracking(tracking), + _samples_are_time_series(ts), + _p_bar(p_bar) + { + } + + void operator()(); + + void register_locus(const string& locus_id); + void abundance_avail(const string& locus_id, + shared_ptr ab, + size_t factory_id); + void test_finished_loci(); + void perform_testing(vector >& abundances); + bool all_samples_reported_in(vector >& abundances); + bool all_samples_reported_in(const string& locus_id); + + typedef list > > > launcher_sample_table; + +private: + + launcher_sample_table::iterator find_locus(const string& locus_id); + + int _orig_workers; + launcher_sample_table _samples; + Tests* _tests; + Tracking* _tracking; + bool _samples_are_time_series; + ProgressBar* _p_bar; + +}; + +extern double min_read_count; + +void sample_worker(const RefSequenceTable& rt, + ReplicatedBundleFactory& sample_factory, + shared_ptr abundance, + size_t factory_id, + shared_ptr launcher); + +void test_differential(const string& locus_tag, + const vector >& samples, + Tests& tests, + Tracking& tracking, + bool samples_are_time_series); + +void dump_locus_variance_info(const string& filename); + +#if ENABLE_THREADS +void decr_pool_count(); +extern boost::mutex locus_thread_pool_lock; +extern int locus_curr_threads; +extern int locus_num_threads; +#endif + +#endif diff --git a/src/filters.cpp b/src/filters.cpp new file mode 100644 index 0000000..0a10b50 --- /dev/null +++ b/src/filters.cpp @@ -0,0 +1,1161 @@ +/* + * filters.cpp + * cufflinks + * + * Created by Cole Trapnell on 10/27/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#include "filters.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace boost; + +void filter_introns(int bundle_length, + int bundle_left, + vector& hits, + double fraction, + bool filter_on_intron_overlap, + bool filter_with_intron_doc) +{ + vector depth_of_coverage(bundle_length,0); + vector scaff_doc; + map, float> intron_doc; + vector filtered_hits; + vector toss(hits.size(), false); + + double bundle_avg_doc = compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_doc, + false); + + double bundle_avg_thresh = bundle_avg_doc * fraction; + + if (filter_with_intron_doc && !intron_doc.empty()) + { + bundle_avg_doc = major_isoform_intron_doc(intron_doc); + bundle_avg_thresh = fraction * bundle_avg_doc; + verbose_msg("\tFiltering bundle introns, avg (intron) doc = %lf, thresh = %f\n", bundle_avg_doc, bundle_avg_thresh); + } + else + { + verbose_msg("\tFiltering bundle introns, avg bundle doc = %lf, thresh = %f\n", bundle_avg_doc, bundle_avg_thresh); + } + + for(map, float>::const_iterator itr = intron_doc.begin(); + itr != intron_doc.end(); + ++itr) + { + for (size_t j = 0; j < hits.size(); ++j) + { + //fprintf(stderr, "considering read [%d-%d] with min doc = %lf contained in intron with doc = %lf\n", hits[j].left(), hits[j].right(), doc, idoc); + const vector& ops = hits[j].augmented_ops(); + + for (size_t i = 0; i < ops.size(); ++i) + { + if (ops[i].opcode == CUFF_INTRON) + { + map, float>::const_iterator itr; + itr = intron_doc.find(make_pair(ops[i].g_left(), ops[i].g_right())); + + double doc = itr->second; + if (doc < bundle_avg_thresh) + { + toss[j] = true; + verbose_msg("\t Filtering intron %d - %d: %f thresh %f\n", itr->first.first, itr->first.second, doc, bundle_avg_thresh); + continue; + } + + if (!filter_on_intron_overlap) + continue; + + for (map, float>::const_iterator itr2 = intron_doc.begin(); + itr2 != intron_doc.end(); + ++itr2) + { + if (itr == itr2 || + !overlap_in_genome(itr->first.first, + itr->first.second, + itr2->first.first, + itr2->first.second)) + continue; + + double thresh = itr2->second * fraction; + if (doc < thresh) + { + verbose_msg("\t Filtering intron (due to overlap) %d - %d: %f thresh %f\n", itr->first.first, itr->first.second, doc, bundle_avg_thresh); + toss[j] = true; + } + } + } + } + } + } + + for (size_t j = 0; j < hits.size(); ++j) + { + if (!toss[j]) + { + filtered_hits.push_back(hits[j]); +//#if verbose_msg +// if (hits[j].has_intron()) +// { +// fprintf(stderr, "KEEPING intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); +// } +//#endif + } + else + { + if (hits[j].has_intron()) + { + + verbose_msg("\tFiltering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); + } + } + } + + + verbose_msg("\tIntron filtering pass finished: excluded %d fragments\n", (int)hits.size() - (int)filtered_hits.size()); + hits = filtered_hits; +} + +double background_rate(const vector depth_of_coverage, + int left, + int right) +{ + vector tmp; + + size_t r_bound = (size_t)min(right, (int) depth_of_coverage.size()); + size_t l_bound = (size_t)max(left, 0); + + tmp.insert(tmp.end(), + depth_of_coverage.begin() + l_bound, + depth_of_coverage.begin() + r_bound); + + if (tmp.empty()) + return 0; + + vector::iterator new_end = remove(tmp.begin(), tmp.end(), 0); + tmp.erase(new_end, tmp.end()); + sort(tmp.begin(), tmp.end()); + + size_t median = (size_t)floor(tmp.size() / 2); + double median_doc = tmp[median]; + return median_doc; +} + +void pre_mrna_filter(int bundle_length, + int bundle_left, + vector& hits) +{ + vector depth_of_coverage(bundle_length,0); + vector scaff_doc; + map, float> intron_doc; + vector filtered_hits; + vector toss(hits.size(), false); + vector through_introns; //for each location, how many introns pass through + + vector scaff_intron_status; + // Make sure the avg only uses stuff we're sure isn't pre-mrna fragments + double bundle_avg_doc = compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_doc, + true, + &through_introns, + &scaff_intron_status); + verbose_msg("Pre-mRNA flt: bundle average doc = %lf\n", bundle_avg_doc); + /* + //2nd call not needed, the vectors won't change, only the return value + compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_doc, + false); + */ + record_doc_for_scaffolds(bundle_left, + hits, + depth_of_coverage, + intron_doc, + scaff_doc); + + for(map, float >::const_iterator itr = intron_doc.begin(); + itr != intron_doc.end(); + ++itr) + { + int i_left = itr->first.first; + int i_right = itr->first.second; + int i_doc = itr->second; + double intron_background = background_rate(depth_of_coverage, + i_left - bundle_left, + i_right - bundle_left); + double cumul_cov = 0; + for (int i = 0; i < i_right - i_left; ++i) + { + size_t pos = (i_left - bundle_left) + i; + cumul_cov += depth_of_coverage[pos]; + } + cumul_cov /= i_right - i_left; + verbose_msg("Pre-mRNA flt: intron %d-%d : background: %lf, inner coverage: %lf, junction coverage: %f\n", + i_left, i_right, intron_background, cumul_cov, i_doc); + if (cumul_cov / bundle_avg_doc >= pre_mrna_fraction) + { + //fprintf(stderr, "\tskipping\n"); + continue; + } + + ////double thresh = (1.0/pre_mrna_fraction) * intron_background; + double thresh = pre_mrna_fraction * intron_background; + float min_flt_fraction = min(pre_mrna_fraction, min_isoform_fraction); + //double thresh = min_flt_fraction * i_doc; + + for (size_t j = 0; j < hits.size(); ++j) + { + if (hits[j].left()>i_right) break; + if (hits[j].is_ref()) + continue; + if (toss[j]) + continue; + //find maximum intron support in the hit region + + int len = 0; + double doc = 0.0; + size_t curr_op = 0; + const vector& ops = hits[j].augmented_ops(); + while (curr_op != ops.size()) + { + const AugmentedCuffOp& op = ops[curr_op]; + if (op.opcode == CUFF_MATCH) + { + int op_len = 0; + double op_doc = 0.0; + int left_off = op.g_left(); + if (left_off + op.genomic_length > i_left && left_off < i_right) + { + if (left_off > i_left) + { + if (left_off + op.genomic_length <= i_right + 1) + { + op_len += op.genomic_length; + int L = left_off - bundle_left; + int R = L + op.genomic_length; + op_doc += accumulate(depth_of_coverage.begin() + L, depth_of_coverage.begin() + R, 0); + } + else + { + op_len += i_right - left_off; + int L = left_off - bundle_left; + int R = L + (i_right - left_off); + op_doc += accumulate(depth_of_coverage.begin() + L, depth_of_coverage.begin() + R, 0); + } + } + else + { + if (left_off + op.genomic_length <= i_right + 1) + { + op_len += (left_off + op.genomic_length - i_left); + int L = left_off - bundle_left; + int R = L + (left_off + op.genomic_length - i_left); + op_doc += accumulate(depth_of_coverage.begin() + L, depth_of_coverage.begin() + R, 0); + } + else + { + op_len = i_right - i_left; + int L = left_off - bundle_left; + int R = L + (i_right - i_left); + op_doc = accumulate(depth_of_coverage.begin() + L, depth_of_coverage.begin() + R, 0); + } + } + } + + len += op_len; + doc += op_doc; + } + + if (op.g_left() >= i_right) + break; + ++curr_op; + } + + if (len) + { + double hit_doc_in_region = doc / len; + if (hit_doc_in_region < thresh) + { + toss[j] = true; + if (hits[j].has_intron()) + { + // fprintf(stderr, "\t$$$ Filtering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); + + verbose_msg("\t@@@ Filtering intron scaff [%d-%d] (scaff_doc=%lf, doc_in_region=%lf)\n", + hits[j].left(), hits[j].right(), scaff_doc[j], hit_doc_in_region); + } + } + } + } //for each scaffold + } //for each intron + for (size_t j = 0; j < hits.size(); ++j) + { + if (!toss[j]) + { + filtered_hits.push_back(hits[j]); + } + /*else + { + if (hits[j].has_intron()) + { + + verbose_msg( "\t@@@ Filtering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); + } + } + */ + } + + if (cuff_verbose && hits.size()>filtered_hits.size()) + verbose_msg("\tPre-mRNA flt tossed %lu fragments\n", hits.size() - filtered_hits.size()); + + hits = filtered_hits; +} + +void filter_hits(int bundle_length, + int bundle_left, + vector& hits) +{ + + pre_mrna_filter(bundle_length, bundle_left, hits); + + vector depth_of_coverage(bundle_length+1,0); + vector scaff_doc; + map, float> intron_doc; + vector filtered_hits; + vector toss(hits.size(), false); + + // Make sure the avg only uses stuff we're sure isn't pre-mrna fragments + double bundle_avg_doc = compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_doc, + true); + + // recompute the real DoCs + /* not needed, vectors are not changed + compute_doc(bundle_left, + hits, + depth_of_coverage, + intron_doc, + false); + */ + + record_min_doc_for_scaffolds(bundle_left, + hits, + depth_of_coverage, + intron_doc, + scaff_doc); + + //double bundle_avg_thresh = min_isoform_fraction * bundle_avg_doc; + + if (!intron_doc.empty()) + { + double intron_avg_doc = major_isoform_intron_doc(intron_doc); + double intron_multiplier = intron_avg_doc / bundle_avg_doc; + + // we don't want this to be more than 1.0 ... + intron_multiplier = min(intron_avg_doc, 1.0); + //bundle_avg_thresh = min_isoform_fraction * bundle_avg_doc; + + set > tossed_introns; + for(map, float>::const_iterator itr = intron_doc.begin(); + itr != intron_doc.end(); + ++itr) + { + for (size_t j = 0; j < hits.size(); ++j) + { + if (hits[j].is_ref()) + { + continue; + } + int i_left = itr->first.first; + int i_right = itr->first.second; + int j_match_len = hits[j].match_length(i_left, i_right); + if (j_match_len > 0) + { + double idoc = itr->second; + double doc = scaff_doc[j]; + + if (!hits[j].has_intron() && + doc < pre_mrna_fraction * (idoc * intron_multiplier)) + { + toss[j] = true; + } + + const vector& ops = hits[j].augmented_ops(); + + unsigned int num_mismatches = 0; + assert (hits[j].mate_hits().size() == 1); + const MateHit& hit = **(hits[j].mate_hits().begin()); + num_mismatches = hit.edit_dist(); + + double percent_mismatches = num_mismatches / (double)hits[j].length(); + + bool intron_pokin_read = false; + + const AugmentedCuffOp& first = ops.front(); + // intron ========================= + // hit ****************** + if (first.g_left() < i_left && first.g_right() > i_left && first.g_right() < i_right) + { + intron_pokin_read = true; + } + + // intron ========================= + // hit ****************** + if (first.g_left() < i_right && first.g_right() > i_right && first.g_left() > i_left) + { + intron_pokin_read = true; + } + + const AugmentedCuffOp& last = ops.back(); + // intron ========================= + // hit ****************** + if (last.g_left() < i_left && last.g_right() > i_left && last.g_right() < i_right) + { + intron_pokin_read = true; + } + + // intron ========================= + // hit ****************** + if (last.g_left() < i_right && last.g_right() > i_right && last.g_left() > i_left) + { + intron_pokin_read = true; + } + + if (intron_pokin_read) + { + double fraction; +// if (!hits[j].has_intron()) +// { +// fraction = (3 * pre_mrna_fraction) + percent_mismatches; +// } +// else + { + fraction = pre_mrna_fraction + percent_mismatches; + } + double thresh = fraction * (intron_avg_doc * intron_multiplier); + if (doc < thresh) + { + toss[j] = true; +// if (hits[j].has_intron()) +// { +// fprintf(stderr, "\t^^^Filtering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); +// } + } + } + } + } + } + } + + for (size_t j = 0; j < hits.size(); ++j) + { + if (!toss[j]) + { + filtered_hits.push_back(hits[j]); +//#if verbose_msg +// if (hits[j].has_intron()) +// { +// +// fprintf(stderr, "KEEPING intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); +// } +//#endif + } + else + { + if (hits[j].has_intron()) + { + + verbose_msg("\t!!!Filtering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); + } + } + } + +//#if verbose_msg +// fprintf(stderr, "\tInitial filter pass complete\n"); +//#endif + + hits = filtered_hits; + + scaff_doc.clear(); + filtered_hits.clear(); + + toss = vector(hits.size(), false); + + map, float> dummy; + bundle_avg_doc = compute_doc(bundle_left, + hits, + depth_of_coverage, + dummy, + false); + +//#if verbose_msg +// fprintf(stderr, "\tUpdated avg bundle doc = %lf\n", bundle_avg_doc); +//#endif + + record_doc_for_scaffolds(bundle_left, + hits, + depth_of_coverage, + intron_doc, + scaff_doc); + + + +//#if verbose_msg +// double bundle_thresh = pre_mrna_fraction * bundle_avg_doc; +// fprintf(stderr, "\tthreshold is = %lf\n", bundle_thresh); +//#endif + + if (!intron_doc.empty()) + { +// filter_introns(bundle_length, +// bundle_left, +// hits, +// min_isoform_fraction, +// true, +// true); + if (bundle_avg_doc > 3000) + { + filter_introns(bundle_length, + bundle_left, + hits, + min_isoform_fraction, + true, + false); + } + } + + for (size_t j = 0; j < hits.size(); ++j) + { + if (!toss[j]) + { + filtered_hits.push_back(hits[j]); +//#if verbose_msg +// if (hits[j].has_intron()) +// { +// +// fprintf(stderr, "KEEPING intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); +// } +//#endif + } + else + { + if (hits[j].has_intron()) + { + verbose_msg("\t***Filtering intron scaff [%d-%d]\n", hits[j].left(), hits[j].right()); + } + } + } + + //fprintf(stderr, "\tTossed %d hits as noise\n", (int)hits.size() - filtered_hits.size()); + + hits = filtered_hits; +} + + +void filter_junk_isoforms(vector >& transcripts, + vector& abundances, + const vector >& mapped_transcripts, + double locus_mass) +{ + // vector::iterator max_ab = std::max_element(abundances.begin(), + // abundances.end()); + double max_fwd_ab = -1.0; + double max_rev_ab = -1.0; + + for (size_t t = 0; t < transcripts.size(); ++t) + { + shared_ptr scaff = transcripts[t]->transfrag(); + if (scaff->strand() == CUFF_FWD || scaff->strand() == CUFF_STRAND_UNKNOWN) + { + if (abundances[t] > max_fwd_ab) + max_fwd_ab = abundances[t]; + } + if (scaff->strand() == CUFF_REV || scaff->strand() == CUFF_STRAND_UNKNOWN) + { + if (abundances[t] > max_rev_ab) + max_rev_ab = abundances[t]; + } + } + + // Try to categorize the crap transcripts for suppression + vector pre_mrna_junk(transcripts.size(), false); //intra-intron, much lower abundance than container + vector chaff(transcripts.size(), false); // only a single MateHit, impossible to reliably quantitate + vector repeats(transcripts.size(), false); // too many low-quality hits + vector too_rare(transcripts.size(), false); // too rare to be reliably quantitated, could be error + + //cerr << "Chucked : "; + for (size_t t = 0; t < transcripts.size(); ++t) + { + shared_ptr scaff = transcripts[t]->transfrag(); + + if (!(scaff->is_ref()) && allow_junk_filtering) + { + const vector hits = scaff->mate_hits(); + + const vector& ops = scaff->augmented_ops(); + + if (ops.size() == 1 && ops[0].opcode == CUFF_MATCH) + { + for (size_t j = 0; j < transcripts.size(); ++j) + { + const vector& j_ops = scaff->augmented_ops(); + for (size_t L = 0; L < j_ops.size(); L++) + { + if (AugmentedCuffOp::overlap_in_genome(ops[0], j_ops[L]) && + j_ops[L].opcode == CUFF_INTRON) + { + pre_mrna_junk[t] = true; + } + } + } + } + + if (library_type != "transfrags") + { + double low_qual_hits = 0.0; + static const double low_qual_err_prob = high_phred_err_prob; // hits with error_prob() above this are low quality; + static const double low_qual_thresh = 0.75; // hits with more than this fraction of low qual hits are repeats + for (vector::const_iterator itr = hits.begin(); + itr != hits.end(); + ++itr) + { + double e = 1-(*itr)->mass(); + if (e >= low_qual_err_prob) + low_qual_hits += 1.0; + } + + double low_qual_frac = low_qual_hits / (double)hits.size(); + if (low_qual_frac > low_qual_thresh) + repeats[t] = true; + } + if (scaff->strand() == CUFF_FWD && + (abundances[t] / max_fwd_ab) < min_isoform_fraction) + too_rare[t] = true; + if ((scaff->strand() == CUFF_REV || scaff->strand() == CUFF_STRAND_UNKNOWN) && + (abundances[t] / max_rev_ab) < min_isoform_fraction) + too_rare[t] = true; + + const vector* cond_probs = (mapped_transcripts[t]->cond_probs()); + if (cond_probs) + { + assert (library_type != "transfrags"); + double supporting_hits = abundances[t] * locus_mass; + if (supporting_hits < min_frags_per_transfrag) + chaff[t] = true; + } + } +// else // we should still filter things that are zero to improve robustness of MAP estimation +// { +// if (abundances[t] == 0.0) +// too_rare[t] = true; +// } + } + + vector > non_junk_transcripts; + vector non_junk_abundances; + for (size_t t = 0; t < transcripts.size(); ++t) + { + if (!repeats[t] && !pre_mrna_junk[t] && !too_rare[t] && !chaff[t]) + { + non_junk_transcripts.push_back(transcripts[t]); + non_junk_abundances.push_back(abundances[t]); + } + else + { + verbose_msg( "Filtering isoform %d-%d\n", transcripts[t]->transfrag()->left(), transcripts[t]->transfrag()->right()); + } + } + + transcripts = non_junk_transcripts; + abundances = non_junk_abundances; +} + +// Designed to strip out remaining pre-mrna genes, assembled repeats, and +// fragments from isoforms too short to be reliably quantitated. +void filter_junk_genes(vector& genes) +{ + vector good_genes; + vector all_isoforms; + for (size_t i = 0; i < genes.size(); ++i) + { + all_isoforms.insert(all_isoforms.end(), + genes[i].isoforms().begin(), + genes[i].isoforms().end()); + } + + for (size_t i = 0; i < genes.size(); ++i) + { + const Gene& g = genes[i]; + + if(g.has_ref_trans()) + { + good_genes.push_back(g); + continue; + } + + bool good_gene = true; + for (size_t j = 0; j < all_isoforms.size(); ++j) + { + vector > introns = all_isoforms[j].scaffold().gaps(); + + //assert (!allow_junk_filtering || all_isoforms[j].scaffold().mate_hits().size() >= min_frags_per_transfrag); + for (size_t k = 0; k < introns.size(); ++k) + { + if (g.left() > introns[k].first && g.right() < introns[k].second && + g.FPKM() / all_isoforms[j].FPKM() < pre_mrna_fraction) + { + good_gene = false; + } + } + } + if (allow_junk_filtering) + { + if (g.FPKM() == 0) + { + good_gene = false; + } + } + if (good_gene) + { + good_genes.push_back(g); + } + else + { + verbose_msg("Filtering transfrags from gene %d-%d\n", g.left(), g.right()); + } + } + + genes = good_genes; + +} + +void clip_by_3_prime_dropoff(vector& scaffolds) +{ + vector > three_prime_ends; + + if (library_type != "transfrags") + { + foreach (Scaffold& scaff, scaffolds) + { + if (!(scaff.strand() == CUFF_FWD || scaff.strand() == CUFF_REV)) + continue; + + int scaff_len = scaff.length(); + vector coverage(scaff_len, 0.0); + + double total = 0; + foreach(const MateHit* hit, scaff.mate_hits()) + { + int start, end, frag_len; + if (!scaff.map_frag(*hit, start, end, frag_len)) continue; + + if (scaff.strand() == CUFF_REV) + { + start = scaff_len - 1 - start; + end = scaff_len - 1 - end; + swap(start, end); + } + + for(int i = start; i <= end; ++i) + { + coverage[i] += hit->mass(); + total += hit->mass(); + } + } + double avg_cov = total/scaff_len; + // if (avg_cov < trim_3_avgcov_thresh) + // continue; + + const AugmentedCuffOp* exon_3 = NULL; + int mult; + int offset; + + if (scaff.strand() == CUFF_REV) + { + mult = 1; + offset = 0; + exon_3 = &scaff.augmented_ops().front(); + } + else if (scaff.strand() == CUFF_FWD) + { + mult = -1; + offset = scaff_len - 1; + exon_3 = &scaff.augmented_ops().back(); + } + else + { + continue; + } + + int to_remove; + double min_cost = numeric_limits::max(); + double mean_to_keep = 0.0; + double mean_to_trim = 0.0; + double tmp_mean_to_trim = 0.0; + double tmp_mean_to_keep = 0.0; + double tmp_mean_3prime = 0.0; + for (int i = 0; i < exon_3->genomic_length; i++) + { + tmp_mean_3prime += coverage[offset + mult*i]; + } + tmp_mean_3prime /= exon_3->genomic_length; + + double base_cost = 0.0; + for (int i = 0; i < exon_3->genomic_length; i++) + { + double d = (coverage[offset + mult*i] - tmp_mean_3prime); + d *= d; + base_cost += d; + } + base_cost /= exon_3->genomic_length; + + size_t min_cost_x = -1; + for (to_remove = 1; to_remove < exon_3->genomic_length - 1; to_remove++) + { + tmp_mean_to_trim = 0.0; + tmp_mean_to_keep = 0.0; + for (size_t i = 0; i < exon_3->genomic_length; i++) + { + if (i <= to_remove) + { + tmp_mean_to_trim += coverage[offset + mult*i]; + } + else + { + tmp_mean_to_keep += coverage[offset + mult*i]; + } + } + + tmp_mean_to_trim /= to_remove; + tmp_mean_to_keep /= (exon_3->genomic_length - to_remove); + + double tmp_mean_trim_cost = 0.0; + double tmp_mean_keep_cost = 0.0; + for (int i = 0; i < exon_3->genomic_length; i++) + { + if (i <= to_remove) + { + double d = (coverage[offset + mult*i] - tmp_mean_to_trim); + d *= d; + tmp_mean_trim_cost += d; + } + else + { + double d = (coverage[offset + mult*i] - tmp_mean_to_keep); + d *= d; + tmp_mean_keep_cost += d; + } + } + + tmp_mean_trim_cost /= to_remove; + tmp_mean_keep_cost /= (exon_3->genomic_length - to_remove); + + double new_cost = tmp_mean_trim_cost + tmp_mean_keep_cost; + + if (new_cost < min_cost && trim_3_dropoff_frac * tmp_mean_to_keep > tmp_mean_to_trim && new_cost < base_cost && to_remove > scaff_len * 0.05) + { + min_cost = tmp_mean_trim_cost + tmp_mean_keep_cost; + min_cost_x = to_remove; + mean_to_keep = tmp_mean_to_keep; + mean_to_trim = tmp_mean_to_trim; + } + } + + // If trimming reduces the overall mean squared error of the coverage + // do it + if (avg_cov >= trim_3_avgcov_thresh && min_cost_x < exon_3->genomic_length) + { + scaff.trim_3(min_cost_x); + } + + // store the mean squared error for this exon + tmp_mean_3prime = 0.0; + for (int i = 0; i < exon_3->genomic_length; i++) + { + tmp_mean_3prime += coverage[offset + mult*i]; + } + tmp_mean_3prime /= exon_3->genomic_length; + + base_cost = 0.0; + for (int i = 0; i < exon_3->genomic_length; i++) + { + double d = (coverage[offset + mult*i] - tmp_mean_3prime); + d *= d; + base_cost += d; + } + base_cost /= exon_3->genomic_length; + three_prime_ends.push_back(make_pair(base_cost, &scaff)); + } + } + else + { + foreach (Scaffold& scaff, scaffolds) + { + if (!(scaff.strand() == CUFF_FWD || scaff.strand() == CUFF_REV)) + continue; + + int scaff_len = scaff.length(); + vector coverage(scaff_len, 0.0); + + double total = 0; + foreach(const MateHit* hit, scaff.mate_hits()) + { + int start, end, frag_len; + if (!scaff.map_frag(*hit, start, end, frag_len)) continue; + + if (scaff.strand() == CUFF_REV) + { + start = scaff_len - 1 - start; + end = scaff_len - 1 - end; + swap(start, end); + } + + for(int i = start; i <= end; ++i) + { + coverage[i] += hit->mass(); + total += hit->mass(); + } + } + double avg_cov = total/scaff_len; + // if (avg_cov < trim_3_avgcov_thresh) + // continue; + + const AugmentedCuffOp* exon_3 = NULL; + int mult; + int offset; + + if (scaff.strand() == CUFF_REV) + { + mult = 1; + offset = 0; + exon_3 = &scaff.augmented_ops().front(); + } + else if (scaff.strand() == CUFF_FWD) + { + mult = -1; + offset = scaff_len - 1; + exon_3 = &scaff.augmented_ops().back(); + } + else + { + continue; + } + + three_prime_ends.push_back(make_pair(scaff.fpkm(), &scaff)); + } + + } + + adjacency_list G; + + for (size_t i = 0; i < three_prime_ends.size(); ++i) + { + add_vertex(G); + } + + for (size_t i = 0; i < three_prime_ends.size(); ++i) + { + Scaffold* scaff_i = three_prime_ends[i].second; + //assert (scaff_i); + + const AugmentedCuffOp* scaff_i_exon_3 = NULL; + + if (scaff_i->strand() == CUFF_REV) + { + scaff_i_exon_3 = &(scaff_i->augmented_ops().front()); + } + else if (scaff_i->strand() == CUFF_FWD) + { + scaff_i_exon_3 = &(scaff_i->augmented_ops().back()); + } + + for (size_t j = i + 1; j < three_prime_ends.size(); ++j) + { + Scaffold* scaff_j = three_prime_ends[j].second; + + if (scaff_i->strand() != scaff_j->strand()) + continue; + + const AugmentedCuffOp* scaff_j_exon_3 = NULL; + + if (scaff_j->strand() == CUFF_REV) + { + scaff_j_exon_3 = &(scaff_j->augmented_ops().front()); + } + else if (scaff_j->strand() == CUFF_FWD) + { + scaff_j_exon_3 = &(scaff_j->augmented_ops().back()); + } + + if (AugmentedCuffOp::overlap_in_genome(*scaff_j_exon_3, *scaff_i_exon_3) && + AugmentedCuffOp::compatible(*scaff_j_exon_3, *scaff_i_exon_3, 0)) + add_edge(i, j, G); + } + } + + std::vector component(num_vertices(G)); + connected_components(G, &component[0]); + + vector > clusters(three_prime_ends.size(), + vector(three_prime_ends.size(), false)); + + //vector > cluster_indices(three_prime_ends.size()); + + vector > > grouped_scaffolds(three_prime_ends.size()); + for (size_t i = 0; i < three_prime_ends.size(); ++i) + { + clusters[component[i]][i] = true; + grouped_scaffolds[component[i]].push_back(three_prime_ends[i]); + } + + for (size_t i = 0; i < grouped_scaffolds.size(); ++i) + { + vector >& group = grouped_scaffolds[i]; + sort(group.begin(), group.end()); + if (group.empty()) + continue; + + Scaffold* group_leader = NULL; + int trim_point = -1; + + const AugmentedCuffOp* group_exon_3 = NULL; + vector >::iterator l_itr = group.begin(); + while (l_itr != group.end()) + { + Scaffold* possible_leader = l_itr->second; + bool ok_clip_leader = true; + vector >::iterator g_itr = group.begin(); + const AugmentedCuffOp* l_exon_3 = NULL; + CuffStrand s = possible_leader->strand(); + + if (s != CUFF_STRAND_UNKNOWN) + { + if (s == CUFF_REV) + l_exon_3 = &(possible_leader->augmented_ops().front()); + else + l_exon_3 = &(possible_leader->augmented_ops().back()); + for (; g_itr != group.end(); ++g_itr) + { + const AugmentedCuffOp* g_exon_3 = NULL; + if (s == CUFF_REV) + { + // bad: + // leader + // follower + g_exon_3 = &(g_itr->second->augmented_ops().front()); + if (g_exon_3->g_right() <= l_exon_3->g_left()) + ok_clip_leader = false; + + // for meta-assembly libraries, don't ever allow clipping, just extension + // bad: + // leader + // follower + if (library_type == "transfrags" && + g_exon_3->g_left() < l_exon_3->g_left()) + ok_clip_leader = false; + } + else + { + // bad: + // follower + // leader + g_exon_3 = &(g_itr->second->augmented_ops().back()); + if (g_exon_3->g_left() >= l_exon_3->g_right()) + ok_clip_leader = false; + + // for meta-assembly libraries, don't ever allow clipping, just extension + // bad: + // leader + // follower + if (library_type == "transfrags" && + g_exon_3->g_right() > l_exon_3->g_right()) + ok_clip_leader = false; + } + } + } + else + { + ok_clip_leader = false; + } + + if (ok_clip_leader) + { + if (s == CUFF_REV) + { + if (trim_point == -1) + trim_point = l_exon_3->g_left(); + else if (l_exon_3->g_left() < trim_point) + ok_clip_leader = false; + } + else + { + if (trim_point == -1) + trim_point = l_exon_3->g_right(); + else if (l_exon_3->g_right() > trim_point) + ok_clip_leader = false; + } + } + + if (ok_clip_leader) + { + group_leader = possible_leader; + group_exon_3 = l_exon_3; + break; + } + ++l_itr; + } + + if (!group_leader || !group_exon_3) + continue; + + for (size_t j = 0; j < group.size(); ++j) + { + const AugmentedCuffOp* exon_3 = NULL; + int end_diff = 0; + if (group_leader->strand() == CUFF_REV) + { + exon_3 = &(group[j].second->augmented_ops().front()); + end_diff = group_exon_3->g_left() - exon_3->g_left(); + } + else + { + exon_3 = &(group[j].second->augmented_ops().back()); + end_diff = exon_3->g_right() - group_exon_3->g_right(); + } + + if (end_diff > 0) + { + // leader + // follower + group[j].second->trim_3(end_diff); + } + else if (end_diff < 0) + { + // leader + // follower + group[j].second->extend_3(-end_diff); + } + } + } + + + return; + +} diff --git a/src/filters.h b/src/filters.h new file mode 100644 index 0000000..ed04859 --- /dev/null +++ b/src/filters.h @@ -0,0 +1,42 @@ +#ifndef FILTERS_H +#define FILTERS_H + +/* + * filters.h + * cufflinks + * + * Created by Cole Trapnell on 10/27/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "scaffolds.h" +#include "genes.h" + +void filter_junk_isoforms(vector >& transcripts, + vector& abundances, + const vector >& mapped_transcripts, + double locus_mass); + + +void filter_introns(int bundle_length, + int bundle_left, + vector& hits, + double fraction, + bool filter_on_intron_overlap, + bool filter_with_intron_doc); + +// Designed to strip out remaining pre-mrna genes, assembled repeats, and +// fragments from isoforms too short to be reliably quantitated. +void filter_junk_genes(vector& genes); + + +void filter_hits(int bundle_length, int bundle_left, vector& hits); + +void clip_by_3_prime_dropoff(vector& scaff); + +#endif diff --git a/src/gdna.cpp b/src/gdna.cpp new file mode 100644 index 0000000..6b250d0 --- /dev/null +++ b/src/gdna.cpp @@ -0,0 +1,53 @@ +#include "gdna.h" +#include + +#define IUPAC_DEFS "AaCcTtGgUuMmRrWwSsYyKkVvHhDdBbNnXx-*" +#define IUPAC_COMP "TtGgAaCcAaKkYyWwSsRrMmBbDdHhVvNnXx-*" + +unsigned char ntCompTable[256]; + +static bool gdna_ntCompTableReady=ntCompTableInit(); + +char ntComplement(char c) { + return ntCompTable[(int)c]; + } + +//in place reverse complement of nucleotide (sub)sequence +char* reverseComplement(char* seq, int slen) { + if (slen==0) slen=strlen(seq); + //reverseChars(seq,len); + int l=0; + int r=slen-1; + register char c; + while (l +#include "genes.h" + +using namespace boost; + +#if ENABLE_THREADS +mutex gene_id_lock; +#endif + +int next_isoform_id = 1; + +int get_next_isoform_id() +{ +#if ENABLE_THREADS + gene_id_lock.lock(); +#endif + int next = next_isoform_id++; +#if ENABLE_THREADS + gene_id_lock.unlock(); +#endif + return next; +} + +void Isoform::get_gtf(vector& gff_recs, + const RefSequenceTable& rt, + set* hit_introns) const +{ + const char* ref_name = rt.get_name(_scaffold.ref_id()); + + assert (ref_name != NULL); + + const char* strand_str = NULL; + if (_scaffold.strand() == CUFF_STRAND_UNKNOWN) + strand_str = "."; + else if (_scaffold.strand() == CUFF_FWD) + strand_str = "+"; + else + strand_str = "-"; + + int score = (int)(_FMI * 1000); + score = min(1000, score); + if (score == 0) + score = 1; + + char buf[2048]; + + if (hit_introns != NULL) + { + sprintf(buf, + "%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\"; full_read_support \"%s\";\n", + ref_name, + _scaffold.left() + 1, + _scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open + score, + strand_str, + gene_id().c_str(), + trans_id().c_str(), + _FPKM, + _fraction, + _confidence.low, + _confidence.high, + _coverage, + (_scaffold.has_struct_support(*hit_introns)) ? "yes":"no"); + } + else + { + sprintf(buf, + "%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n", + ref_name, + _scaffold.left() + 1, + _scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open + score, + strand_str, + gene_id().c_str(), + trans_id().c_str(), + _FPKM, + _fraction, + _confidence.low, + _confidence.high, + _coverage); + } + + + gff_recs.push_back(buf); + + int exon_num = 1; + for (size_t op_id = 0; op_id < _scaffold.augmented_ops().size(); ++op_id) + { + const AugmentedCuffOp& op = _scaffold.augmented_ops()[op_id]; + if (op.opcode == CUFF_MATCH || op.opcode == CUFF_UNKNOWN) + { + const char* type = op.opcode == CUFF_MATCH ? "exon" : "missing_data"; + + sprintf(buf, + "%s\tCufflinks\t\%s\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; exon_number \"%d\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n", + ref_name, + type, + op.g_left() + 1, + op.g_right(), // GTF intervals are inclusive on both ends, but ours are half-open + score, + strand_str, + gene_id().c_str(), + trans_id().c_str(), + exon_num, + _FPKM, + _fraction, + _confidence.low, + _confidence.high, + _coverage); + gff_recs.push_back(buf); + + exon_num++; + } + //gff_recs.push_back(buf); + } + +} + + +int next_gene_id = 1; + +int get_next_gene_id() +{ +#if ENABLE_THREADS + gene_id_lock.lock(); +#endif + int next = next_gene_id++; +#if ENABLE_THREADS + gene_id_lock.unlock(); +#endif + return next; +} + + +int next_skipped_region_id = 1; + +int get_next_skipped_region_id() +{ +#if ENABLE_THREADS + gene_id_lock.lock(); +#endif + int next = next_skipped_region_id++; +#if ENABLE_THREADS + gene_id_lock.unlock(); +#endif + return next; +} diff --git a/src/genes.h b/src/genes.h new file mode 100644 index 0000000..4dfa996 --- /dev/null +++ b/src/genes.h @@ -0,0 +1,217 @@ +#ifndef ISOFORM_H +#define ISOFORM_H + +/* + * genes.h + * cufflinks + * + * Created by Cole Trapnell on 8/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "scaffolds.h" +#include "abundances.h" +#include "common.h" + +extern int next_isoform_id; + +int get_next_isoform_id(); + +extern int next_gene_id; + +int get_next_gene_id(); + +extern int next_skipped_region_id; + +int get_next_skipped_region_id(); + +class Isoform +{ +public: + Isoform(const Scaffold& s, + int gid, + int tid, + double FPKM = 0.0, + double eff_len = 0.0, + double fraction = 0.0, + ConfidenceInterval ci = ConfidenceInterval(), + double cov = 0.0, + double est_frag_count = 0.0, + double fmi = 0.0, + AbundanceStatus status = NUMERIC_FAIL, + string ref_gene_id = "") : + _scaffold(s), + _FPKM(FPKM), + _eff_len(eff_len), + _fraction(fraction), + _confidence(ci), + _coverage(cov), + _estimated_count(est_frag_count), + _FMI(fmi), + _status(status) + { + _id = get_next_isoform_id(); + + char trans_id_str[256]; + if (_scaffold.annotated_trans_id() != "") + strncpy(trans_id_str, _scaffold.annotated_trans_id().c_str(), 255); + else if (gid == -1) + sprintf(trans_id_str, "%s.%s.%d", user_label.c_str(), ref_gene_id.c_str(), tid); + else + sprintf(trans_id_str, "%s.%d.%d", user_label.c_str(), gid, tid); + + _trans_id = trans_id_str; + + char gene_id_str[256]; + if(gid == -1) + strncpy(gene_id_str, ref_gene_id.c_str(), 255); + else + sprintf(gene_id_str, "%s.%d", user_label.c_str(), gid); + _gene_id = gene_id_str; + } + + const Scaffold& scaffold() const { return _scaffold; } + double FPKM() const { return _FPKM; } + void FPKM(double fpkm) { _FPKM = fpkm; } + + double effective_length() const { return _eff_len; } + void effective_length(double eff_len) { _eff_len = eff_len; } + + AbundanceStatus status() const { return _status; } + void status(AbundanceStatus status) { _status = status; } + + double fraction() const {return _fraction; } + void fraction(double f) { _fraction = f; } + + ConfidenceInterval confidence() const { return _confidence; } + void confidence(ConfidenceInterval c) { _confidence = c; } + + double coverage() const { return _coverage; } + void coverage(double cov) { _coverage = cov; } + + // fraction of major isoform expression + double FMI() const { return _FMI; } + void FMI(double fmi) { _FMI = fmi; } + + int ID() const { return _id; } + + void get_gtf(vector& gtf_recs, + const RefSequenceTable& rt, + set* hit_introns=NULL) const; + + void gene_id(string& gid) { _gene_id = gid; } + const string& gene_id() const { return _gene_id; } + const string& trans_id() const {return _trans_id; } + + bool is_ref_trans() const { return _scaffold.is_ref(); } + + double estimated_count() const { return _estimated_count; } + void estimated_count(double est) { _estimated_count = est; } +private: + + Scaffold _scaffold; + double _FPKM; + double _eff_len; + double _fraction; + ConfidenceInterval _confidence; + double _coverage; + double _estimated_count; + double _FMI; + int _id; + string _gene_id; + string _trans_id; + AbundanceStatus _status; +}; + +class Gene +{ +public: + Gene(const vector& isoforms, + double FPKM = 0.0, + const ConfidenceInterval& ci = ConfidenceInterval(), + AbundanceStatus status=NUMERIC_FAIL) : + _isoforms(isoforms), + _FPKM(FPKM), + _confidence(ci), + _status(status) + { + vector scaffolds; + for (size_t i = 0; i < isoforms.size(); ++i) + scaffolds.push_back(isoforms[i].scaffold()); + + // Now compute FPKM for the whole gene + Scaffold smashed_gene; + Scaffold::merge(scaffolds, smashed_gene, false); + _left = smashed_gene.left(); + _right = smashed_gene.right(); + + _gene_id = _isoforms.front().gene_id(); + } + + const vector& isoforms() const { return _isoforms; } + double FPKM() const { return _FPKM; } + + ConfidenceInterval confidence() const { return _confidence; } + void confidence(ConfidenceInterval c) { _confidence = c; } + + AbundanceStatus status() const { return _status; } + void status(AbundanceStatus status) { _status = status; } + + int left() const { return _left; } + int right() const { return _right; } + + const string& gene_id() const { return _gene_id; } + + bool has_ref_trans() const + { + foreach (const Isoform& iso, _isoforms) + { + if (iso.is_ref_trans()) + return true; + } + return false; + } + + double estimated_count() const + { + double est = 0.0; + foreach (const Isoform& iso, _isoforms) + { + est += iso.estimated_count(); + } + return est; + } + + double effective_length() const + { + double eff = 0.0; + double total_fpkm = 0; + foreach (const Isoform& iso, _isoforms) + { + eff += iso.FPKM() * iso.effective_length(); + total_fpkm += iso.FPKM(); + } + if (total_fpkm) + return eff / total_fpkm; + else + return 0; + } + +private: + + vector _isoforms; + double _FPKM; + ConfidenceInterval _confidence; + int _id; + int _left; + int _right; + string _gene_id; + AbundanceStatus _status; +}; + +#endif diff --git a/src/gff.cpp b/src/gff.cpp new file mode 100644 index 0000000..c950f89 --- /dev/null +++ b/src/gff.cpp @@ -0,0 +1,1866 @@ +#include "gff.h" + +//GffNames* GffReader::names=NULL; +GffNames* GffObj::names=NULL; +//global set of feature names, attribute names etc. +// -- common for all GffObjs in current application! + +const uint GFF_MAX_LOCUS = 7000000; //longest known gene in human is ~2.2M, UCSC claims a gene for mouse of ~ 3.1 M +const uint GFF_MAX_EXON = 30000; //longest known exon in human is ~11K +const uint GFF_MAX_INTRON= 6000000; //Ensembl shows a >5MB human intron +bool gff_show_warnings = false; //global setting, set by GffReader->showWarnings() +const int gff_fid_mRNA=0; +const int gff_fid_transcript=1; +const int gff_fid_exon=2; +const int gff_fid_CDS=3; //never really used in GffObj ftype_id or subftype_id +const uint gfo_flag_HAS_ERRORS = 0x00000001; +const uint gfo_flag_CHILDREN_PROMOTED= 0x00000002; +const uint gfo_flag_IS_GENE = 0x00000004; +const uint gfo_flag_IS_TRANSCRIPT = 0x00000008; +const uint gfo_flag_FROM_GFF3 = 0x00000010; +const uint gfo_flag_BY_EXON = 0x00000020; //created by subfeature (exon) directly +const uint gfo_flag_DISCARDED = 0x00000100; +const uint gfo_flag_LST_KEEP = 0x00000200; +const uint gfo_flag_LEVEL_MSK = 0x00FF0000; +const byte gfo_flagShift_LEVEL = 16; + +void gffnames_ref(GffNames* &n) { + if (n==NULL) n=new GffNames(); + n->numrefs++; +} + +void gffnames_unref(GffNames* &n) { + if (n==NULL) GError("Error: attempt to remove reference to null GffNames object!\n"); + n->numrefs--; + if (n->numrefs==0) { delete n; n=NULL; } +} + +int gfo_cmpByLoc(const pointer p1, const pointer p2) { + + GffObj& g1=*((GffObj*)p1); + GffObj& g2=*((GffObj*)p2); + if (g1.gseq_id==g2.gseq_id) { + if (g1.start!=g2.start) + return (int)(g1.start-g2.start); + else if (g1.getLevel()!=g2.getLevel()) + return (int)(g1.getLevel()-g2.getLevel()); + else + if (g1.end!=g2.end) + return (int)(g1.end-g2.end); + else return strcmp(g1.getID(), g2.getID()); + } + else return (int)(g1.gseq_id-g2.gseq_id); +} + +char* GffLine::extractAttr(const char* pre, bool caseStrict, bool enforce_GTF2) { + //parse a key attribute and remove it from the info string + //(only works for attributes that have values following them after ' ' or '=') + static const char GTF2_ERR[]="Error parsing attribute %s ('\"' required) at GTF line:\n%s\n"; + int lpre=strlen(pre); + char cend=pre[lpre-1]; + char* pos = (caseStrict) ? strstr(info, pre) : strifind(info, pre); + if (pos==NULL) return NULL; + char* findstart=info; + //require word boundary on the left: + while (pos!=NULL && pos!=info && *(pos-1)!=';' && *(pos-1)!=' ') { + findstart=pos+lpre; + pos = (caseStrict) ? strstr(findstart, pre) : strifind(findstart, pre); + } + if (pos==NULL) return NULL; + if (cend!=' ' && cend!='=') { + //require word boundary on the right: + while (pos!=NULL && *(pos+lpre)!=' ' && *(pos+lpre)!='=') { + findstart=pos+lpre; + pos = (caseStrict) ? strstr(findstart, pre) : strifind(findstart, pre); + } + } + if (pos==NULL) return NULL; + char* vp=pos+lpre; + while (*vp==' ') vp++; + if (*vp==';' || *vp==0) + GError("Error parsing value of GFF attribute \"%s\", line:\n%s\n", pre, dupline); + bool dq_enclosed=false; //value string enclosed by double quotes + if (*vp=='"') { + dq_enclosed=true; + vp++; + } + if (enforce_GTF2 && !dq_enclosed) + GError(GTF2_ERR,pre, dupline); + char* vend=vp; + if (dq_enclosed) { + while (*vend!='"' && *vend!=';' && *vend!=0) vend++; + } + else { + while (*vend!=';' && *vend!=0) vend++; + } + if (enforce_GTF2 && *vend!='"') + GError(GTF2_ERR, pre, dupline); + char *r=Gstrdup(vp, vend-1); + //-- now remove this attribute from the info string + while (*vend!=0 && (*vend=='"' || *vend==';' || *vend==' ')) vend++; + if (*vend==0) vend--; + for (char *src=vend, *dest=pos;;src++,dest++) { + *dest=*src; + if (*src==0) break; + } + return r; +} + +static char fnamelc[128]; + +GffLine::GffLine(GffReader* reader, const char* l) { + llen=strlen(l); + GMALLOC(line,llen+1); + memcpy(line, l, llen+1); + GMALLOC(dupline, llen+1); + memcpy(dupline, l, llen+1); + skip=true; + gseqname=NULL; + track=NULL; + ftype=NULL; + info=NULL; + _parents=NULL; + _parents_len=0; + num_parents=0; + parents=NULL; + is_gff3=false; + is_cds=false; + is_transcript=false; + is_exon=false; + is_gene=false; + exontype=0; + gene_id=NULL; + gene_name=NULL; + qstart=0; + qend=0; + qlen=0; + ID=NULL; + char* t[9]; + int i=0; + int tidx=1; + t[0]=line; + + while (line[i]!=0) { + if (line[i]=='\t') { + line[i]=0; + t[tidx]=line+i+1; + tidx++; + if (tidx>8) break; + } + i++; + } + + if (tidx<8) { // ignore non-GFF lines + // GMessage("Warning: error parsing GFF/GTF line:\n%s\n", l); + return; + } + gseqname=t[0]; + track=t[1]; + ftype=t[2]; + info=t[8]; + char* p=t[3]; + if (!parseUInt(p,fstart)) { + //FIXME: chromosome_band entries in Flybase + GMessage("Warning: invalid start coordinate at line:\n%s\n",l); + return; + } + p=t[4]; + if (!parseUInt(p,fend)) { + GMessage("Warning: invalid end coordinate at line:\n%s\n",l); + return; + } + if (fend=fend, always + p=t[5]; + if (p[0]=='.' && p[1]==0) { + score=0; + } + else { + if (!parseDouble(p,score)) + GError("Error parsing feature score from GFF line:\n%s\n",l); + } + strand=*t[6]; + if (strand!='+' && strand!='-' && strand!='.') + GError("Error parsing strand (%c) from GFF line:\n%s\n",strand,l); + phase=*t[7]; // must be '.', '0', '1' or '2' + ID=NULL; + // exon/CDS/mrna filter + strncpy(fnamelc, ftype, 127); + fnamelc[127]=0; + strlower(fnamelc); //convert to lower case + bool is_t_data=false; + if (strstr(fnamelc, "utr")!=NULL) { + exontype=exgffUTR; + is_exon=true; + is_t_data=true; + } + else if (endsWith(fnamelc, "exon")) { + exontype=exgffExon; + is_exon=true; + is_t_data=true; + } + else if (strstr(fnamelc, "stop") && + (strstr(fnamelc, "codon") || strstr(fnamelc, "cds"))){ + exontype=exgffStop; + is_cds=true; //though some place it outside the last CDS segment + is_t_data=true; + } + else if (strstr(fnamelc, "start") && + ((strstr(fnamelc, "codon")!=NULL) || strstr(fnamelc, "cds")!=NULL)){ + exontype=exgffStart; + is_cds=true; + is_t_data=true; + } + else if (strcmp(fnamelc, "cds")==0) { + exontype=exgffCDS; + is_cds=true; + is_t_data=true; + } + else if (endsWith(fnamelc, "gene") || startsWith(fnamelc, "gene")) { + is_gene=true; + is_t_data=true; //because its name will be attached to parented transcripts + } + else if (endsWith(fnamelc,"rna") || endsWith(fnamelc,"transcript")) { + is_transcript=true; + is_t_data=true; + } + +if (reader->transcriptsOnly && !is_t_data) { + char* id=extractAttr("ID="); + if (id==NULL) id=extractAttr("transcript_id"); + //GMessage("Discarding non-transcript line:\n%s\n",l); + if (id!=NULL) { + reader->discarded_ids.Add(id, new int(1)); + GFREE(id); + } + return; //skip this line, unwanted feature name + } + ID=extractAttr("ID="); + char* Parent=extractAttr("Parent="); + is_gff3=(ID!=NULL || Parent!=NULL); + if (is_gff3) { + //parse as GFF3 + if (ID!=NULL) { + //has ID attr so it's likely to be a parent feature + //look for explicit gene name + gene_name=extractAttr("gene_name=",false); + if (gene_name==NULL) { + gene_name=extractAttr("geneName=",false); + if (gene_name==NULL) { + gene_name=extractAttr("gene_sym=",false); + if (gene_name==NULL) { + gene_name=extractAttr("gene=",false); + } + } + } + gene_id=extractAttr("geneID=",false); + if (gene_id==NULL) { + gene_id=extractAttr("gene_id=",false); + } + if (is_gene) { + //special case: keep the Name and ID attributes of the gene feature + if (gene_name==NULL) + gene_name=extractAttr("Name="); + if (gene_id==NULL) //the ID is also gene_id in this case + gene_id=Gstrdup(ID); + //skip=false; + //return; + GFREE(Parent); //TMI, we really don't care about gene Parents? + } //gene feature + }// has GFF3 ID + if (Parent!=NULL) { + //keep Parent attr + //parse multiple parents + num_parents=1; + p=Parent; + int last_delim_pos=-1; + while (*p!=';' && *p!=0) { + if (*p==',' && *(p+1)!=0 && *(p+1)!=';') { + num_parents++; + last_delim_pos=(p-Parent); + } + p++; + } + _parents_len=p-Parent+1; + _parents=Parent; + GMALLOC(parents, num_parents*sizeof(char*)); + parents[0]=_parents; + int i=1; + if (last_delim_pos>0) { + for (p=_parents+1;p<=_parents+last_delim_pos;p++) { + if (*p==',') { + char* ep=p-1; + while (*ep==' ' && ep>_parents) ep--; + *(ep+1)=0; //end the string there + parents[i]=p+1; + i++; + } + } + } + } //has Parent field + } //GFF3 + else { // GTF-like expected + Parent=extractAttr("transcript_id"); + if (Parent!=NULL) { //GTF2 format detected + if (is_transcript) { + // atypical GTF with a parent transcript line declared + ID=Parent; + Parent=NULL; + } + gene_id=extractAttr("gene_id"); // for GTF this is the only attribute accepted as geneID + gene_name=extractAttr("gene_name"); + if (gene_name==NULL) { + gene_name=extractAttr("gene_sym"); + if (gene_name==NULL) + gene_name=extractAttr("gene"); + } + //prepare for parseAttr by adding '=' character instead of spaces for all attributes + //after the attribute name + p=info; + bool noed=true; //not edited after the last delim + bool nsp=false; //non-space found after last delim + while (*p!=0) { + if (*p==' ') { + if (nsp && noed) { + *p='='; + noed=false; + p++; + continue; + } + } + else nsp=true; //non-space + if (*p==';') { noed=true; nsp=false; } + p++; + } + } //GTF2 detected (no parent line) + else {// Parent is NULL, check for jigsaw format or other pre-GTF2 format + //char* fexon=strstr(fnamelc, "exon"); + //if (fexon!=NULL) { + if (exontype==exgffExon) { + if (startsWith(track,"jigsaw")) { + is_cds=true; + strcpy(track,"jigsaw"); + p=strchr(info,';'); + if (p==NULL) { Parent=Gstrdup(info); info=NULL; } + else { Parent=Gstrdup(info,p-1); + info=p+1; + } + } + } //exon feature? + if (Parent==NULL && exontype>=exgffCDS && + (i=strcspn(info,"; \t\n\r"))<=(int)(strlen(info)+1)) { + //one word ID ? really desperate attempt to parse it here + Parent=Gstrdup(info,info+i-1); + info=NULL; //discard anything else on the line + } + } + if (Parent!=NULL) { //GTF transcript_id for exon/CDS feature + _parents=Parent; + GMALLOC(parents,sizeof(char*)); + num_parents=1; + parents[0]=_parents; + } + } //GTF-like + + //parse other potentially useful features + if (is_gff3) { + if ((p=strstr(info,"Target="))!=NULL) { //has Target attr + p+=7; + while (*p!=';' && *p!=0 && *p!=' ') p++; + if (*p!=' ') { + GError("Error parsing target coordinates from GFF line:\n%s\n",l); + } + if (!parseUInt(p,qstart)) + GError("Error parsing target start coordinate from GFF line:\n%s\n",l); + if (*p!=' ') { + GError("Error parsing next target coordinate from GFF line:\n%s\n",l); + } + p++; + if (!parseUInt(p,qend)) + GError("Error parsing target end coordinate from GFF line:\n%s\n",l); + } + if ((p=strifind(info,"Qreg="))!=NULL) { //has Qreg attr + p+=5; + if (!parseUInt(p,qstart)) + GError("Error parsing target start coordinate from GFF line:\n%s\n",l); + if (*p!='-') { + GError("Error parsing next target coordinate from GFF line:\n%s\n",l); + } + p++; + if (!parseUInt(p,qend)) + GError("Error parsing target end coordinate from GFF line:\n%s\n",l); + if (*p=='|' || *p==':') { + p++; + if (!parseUInt(p,qlen)) + GError("Error parsing target length from GFF Qreg|: \n%s\n",l); + } + }//has Qreg attr + if (qlen==0 && (p=strifind(info,"Qlen="))!=NULL) { + p+=5; + if (!parseUInt(p,qlen)) + GError("Error parsing target length from GFF Qlen:\n%s\n",l); + } + }//parsing some useful attributes in GFF3 records + if (ID==NULL && parents==NULL) { + if (reader->gff_warns) + GMessage("Warning: could not parse ID or Parent from GFF line:\n%s\n",dupline); + return; //skip + } + skip=false; +} + + +void GffObj::addCDS(uint cd_start, uint cd_end, char phase) { + if (cd_start>=this->start) { + this->CDstart=cd_start; + if (strand=='+') this->CDphase=phase; + } + else this->CDstart=this->start; + if (cd_end<=this->end) { + this->CDend=cd_end; + if (strand=='-') this->CDphase=phase; + } + else this->CDend=this->end; + isTranscript(true); + exon_ftype_id=gff_fid_exon; + if (monoFeature()) { + if (exons.Count()==0) addExon(this->start, this->end,0,'.',0,0,false,exgffExon); + else exons[0]->exontype=exgffExon; + } +} + +int GffObj::addExon(GffReader* reader, GffLine* gl, bool keepAttr, bool noExonAttr) { + //this will make sure we have the right subftype_id! + //int subf_id=-1; + if (!isTranscript() && gl->is_cds) { + isTranscript(true); + exon_ftype_id=gff_fid_exon; + if (exons.Count()==1) exons[0]->exontype=exgffExon; + } + if (isTranscript()) { + if (exon_ftype_id<0) {//exon_ftype_id=gff_fid_exon; + if (gl->exontype>0) exon_ftype_id=gff_fid_exon; + else exon_ftype_id=names->feats.addName(gl->ftype); + } + //any recognized mRNA segment gets the generic "exon" type (also applies to CDS) + if (gl->exontype==0 && !gl->is_transcript) { + //extraneous mRNA feature, discard + if (reader->gff_warns) + GMessage("Warning: discarding unrecognized transcript subfeature %s of %s\n", + gl->ftype, gffID); + return -1; + } + } + else { //non-mRNA parent feature, check this subf type + int subf_id=names->feats.addName(gl->ftype); + if (exon_ftype_id<0 || exons.Count()==0) //never assigned a subfeature type before (e.g. first exon being added) + exon_ftype_id=subf_id; + else { + if (exon_ftype_id!=subf_id) { + // + if (exon_ftype_id==ftype_id && exons.Count()==1 && exons[0]->start==start && exons[0]->end==end) { + //the existing exon was just a dummy one created by default, discard it + exons.Clear(); + covlen=0; + exon_ftype_id=subf_id; //allow the new subfeature to completely takeover + } + else { //multiple subfeatures, prefer those with + if (reader->gff_warns) + GMessage("GFF Warning: multiple subfeatures (%s and %s) found for %s, discarding ", + names->feats.getName(subf_id), names->feats.getName(exon_ftype_id),gffID); + if (gl->exontype!=0) { //new feature is an exon, discard previously parsed subfeatures + if (reader->gff_warns) GMessage("%s.\n", names->feats.getName(exon_ftype_id)); + exon_ftype_id=subf_id; + exons.Clear(); + covlen=0; + } + else { //discard new feature + if (reader->gff_warns) GMessage("%s.\n", names->feats.getName(subf_id)); + return -1; //skip this 2nd subfeature type for this parent! + } + } + } //incoming subfeature is of different type + } //new subfeature type + } //non-mRNA parent + int eidx=addExon(gl->fstart, gl->fend, gl->score, gl->phase, + gl->qstart,gl->qend, gl->is_cds, gl->exontype); + if (eidx<0) return eidx; //this should never happen + if (keepAttr) { + if (noExonAttr) { + if (attrs==NULL) //place the parsed attributes directly at transcript level + parseAttrs(attrs, gl->info); + } + else { //need all exon-level attributes + parseAttrs(exons[eidx]->attrs, gl->info, true); + } + } + return eidx; +} + + +int GffObj::addExon(uint segstart, uint segend, double sc, char fr, int qs, int qe, bool iscds, char exontype) { + if (exons.Count()==0) { + if (iscds) isCDS=true; //for now, assume CDS only if first "exon" given is a CDS + if (exon_ftype_id<0) { + exon_ftype_id = isTranscript() ? gff_fid_exon : ftype_id; + } + } + //special treatment of start/stop codon features, they might be broken/split between exons + //and in that case some providers will still give the wrong end coordinate as start+2 (e.g. UCSC) + //so we should not trust the end coordinate for such features + if (exontype==exgffStart || exontype==exgffStop) { + if (strand=='-') segstart=segend; + else segend=segstart; + if (exontype==exgffStart) { + if (CDstart==0 || segstartCDend) CDend=segstart; + } + } + else if (iscds) { //update CDS anchors: + if (CDstart==0 || segstartCDend) { + if (exontype==exgffCDS && strand=='-') CDphase=fr; + CDend=segend; + } + } + else { // not a CDS/start/stop + isCDS=false; + } + if (qs || qe) { + if (qs>qe) swap(qs,qe); + if (qs==0) qs=1; + } + int ovlen=0; + if (exontype>0) { //check for overlaps between exon-type segments + int oi=exonOverlapIdx(segstart, segend, &ovlen); + if (oi>=0) { //overlap existing segment + if (ovlen==0) { + //adjacent segments will be merged + //e.g. CDS to (UTR|exon) + if ((exons[oi]->exontype>=exgffUTR && exontype==exgffCDS) || + (exons[oi]->exontype==exgffCDS && exontype>=exgffUTR)) { + expandExon(oi, segstart, segend, exgffCDSUTR, sc, fr, qs, qe); + return oi; + } + //CDS adjacent to stop_codon: UCSC does (did?) this + if ((exons[oi]->exontype==exgffStop && exontype==exgffCDS) || + (exons[oi]->exontype==exgffCDS && exontype==exgffStop)) { + expandExon(oi, segstart, segend, exgffCDS, sc, fr, qs, qe); + return oi; + } + } + //only allow this for CDS within exon, stop_codon within (CDS|UTR|exon), + // start_codon within (CDS|exon) + if (exons[oi]->exontype>exontype && + exons[oi]->start<=segstart && exons[oi]->end>=segend && + !(exons[oi]->exontype==exgffUTR && exontype==exgffCDS)) { + //larger segment given first, now the smaller included one is redundant + return oi; //only used to store attributes from current GffLine + } + if (exontype>exons[oi]->exontype && + segstart<=exons[oi]->start && segend>=exons[oi]->end && + !(exontype==exgffUTR && exons[oi]->exontype==exgffCDS)) { + //smaller segment given first, so we have to enlarge it + expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe); + //this should also check for overlapping next exon (oi+1) ? + return oi; + } + //there is also the special case of "ribosomal slippage exception" (programmed frameshift) + //where two CDS segments may actually overlap for 1 or 2 bases, but there should be only one encompassing exon + //if (ovlen>2 || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) { + // had to relax this because of some weird UCSC annotations with exons partially overlapping the CDS segments + /* + if (ovlen>2 && exons[oi]->exontype!=exgffUTR && exontype!=exgffUTR) { + if (gff_show_warnings) + GMessage("GFF Warning: discarding overlapping feature segment (%d-%d) (vs %d-%d (%s)) for GFF ID %s on %s\n", + segstart, segend, exons[oi]->start, exons[oi]->end, getSubfName(), gffID, getGSeqName()); + hasErrors(true); + return -1; //segment NOT added + } + */ + + if ((ovlen>2 || ovlen==0) || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) { + if (gff_show_warnings) + GMessage("GFF Warning: merging overlapping/adjacent feature segment (%d-%d) into (%d-%d) (%s) for GFF ID %s on %s\n", + segstart, segend, exons[oi]->start, exons[oi]->end, getSubfName(), gffID, getGSeqName()); + expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe); + return oi; + } + // else add the segment if the overlap is small and between two CDS segments + //TODO: we might want to add an attribute here with the slippage coordinate and size? + covlen-=ovlen; + }//overlap or adjacent to existing segment + } //check for overlap + // --- no overlap, or accepted micro-overlap (ribosomal slippage) + // create & add the new segment + /* + if (start>0 && exontype==exgffCDS && exons.Count()==0) { + //adding a CDS directly as the first subfeature of a declared parent + segstart=start; + segend=end; + } + */ + GffExon* enew=new GffExon(segstart, segend, sc, fr, qs, qe, exontype); + int eidx=exons.Add(enew); + if (eidx<0) { + //this would actually be acceptable if the object is a "Gene" and "exons" are in fact isoforms + if (gff_show_warnings) + GMessage("GFF Warning: failed adding segment %d-%d for %s (discarded)!\n", + segstart, segend, gffID); + delete enew; + hasErrors(true); + return -1; + } + covlen+=(int)(exons[eidx]->end-exons[eidx]->start)+1; + //adjust parent feature coordinates to contain this exon + if (start==0 || start>exons.First()->start) { + start=exons.First()->start; + } + if (endend) end=exons.Last()->end; + + if (uptr!=NULL) { //collect stats about the underlying genomic sequence + GSeqStat* gsd=(GSeqStat*)uptr; + if (startmincoord) gsd->mincoord=start; + if (end>gsd->maxcoord) gsd->maxcoord=end; + if (this->len()>gsd->maxfeat_len) { + gsd->maxfeat_len=this->len(); + gsd->maxfeat=this; + } + } + return eidx; +} + +void GffObj::expandExon(int oi, uint segstart, uint segend, char exontype, double sc, char fr, int qs, int qe) { + //oi is the index of the *first* overlapping segment found that must be enlarged + covlen-=exons[oi]->len(); + if (segstartstart) + exons[oi]->start=segstart; + if (qs && qsqstart) exons[oi]->qstart=qs; + if (segend>exons[oi]->end) + exons[oi]->end=segend; + if (qe && qe>exons[oi]->qend) exons[oi]->qend=qe; + //warning: score cannot be properly adjusted! e.g. if it's a p-value it's just going to get worse + if (sc!=0) exons[oi]->score=sc; + covlen+=exons[oi]->len(); + //if (exons[oi]->exontype< exontype) -- always true + exons[oi]->exontype = exontype; + if (exontype==exgffCDS) exons[oi]->phase=fr; + //we must check if any more exons are also overlapping this + int ni=oi+1; //next exon index after oi + while (ni=exons[ni]->start) { // next segment overlaps new enlarged segment + //only allow this if next segment is fully included, and a subordinate + if (exons[ni]->exontypeend<=segend) { +/* I guess we have to relax this due to stupid UCSC hg18 files having a start_codon sticking out +chr1 hg18_knownGene start_codon 69806911 69806913 0.000000 + . +chr1 hg18_knownGene CDS 69806911 69806912 0.000000 + 0 +chr1 hg18_knownGene exon 69805456 69806912 0.000000 + . +*/ + if (exons[ni]->qstartqstart) exons[oi]->qstart=exons[ni]->qstart; + if (exons[ni]->qend>exons[oi]->qend) exons[oi]->qend=exons[ni]->qend; + exons.Delete(ni); + } + else { + if (gff_show_warnings) GMessage("GFF Warning: overlapping existing exon(%d-%d) while expanding to %d-%d for GFF ID %s\n", + exons[ni]->start, exons[ni]->end, segstart, segend, gffID); + //hasErrors(true); + break; + } + } + // -- make sure any other related boundaries are updated: + start=exons.First()->start; + end=exons.Last()->end; + if (uptr!=NULL) { //collect stats about the underlying genomic sequence + GSeqStat* gsd=(GSeqStat*)uptr; + if (startmincoord) gsd->mincoord=start; + if (end>gsd->maxcoord) gsd->maxcoord=end; + if (this->len()>gsd->maxfeat_len) { + gsd->maxfeat_len=this->len(); + gsd->maxfeat=this; + } + } +} + +void GffObj::removeExon(int idx) { + /* + if (idx==0 && segs[0].start==gstart) + gstart=segs[1].start; + if (idx==segcount && segs[segcount].end==gend) + gend=segs[segcount-1].end; + */ + if (idx<0 || idx>=exons.Count()) return; + int segstart=exons[idx]->start; + int segend=exons[idx]->end; + exons.Delete(idx); + covlen -= (int)(segend-segstart)+1; + start=exons.First()->start; + end=exons.Last()->end; + if (isCDS) { CDstart=start; CDend=end; } +} + +void GffObj::removeExon(GffExon* p) { + for (int idx=0;idxstart; + int segend=exons[idx]->end; + exons.Delete(idx); + covlen -= (int)(segend-segstart)+1; + start=exons.First()->start; + end=exons.Last()->end; + if (isCDS) { CDstart=start; CDend=end; } + return; + } + } +} + + + +GffObj::GffObj(GffReader *gfrd, GffLine* gffline, bool keepAttr, bool noExonAttr): + GSeg(0,0), exons(true,true,false), children(1,false) { + xstart=0; + xend=0; + xstatus=0; + partial=false; + isCDS=false; + uptr=NULL; + ulink=NULL; + parent=NULL; + udata=0; + flags=0; + CDstart=0; + CDend=0; + CDphase=0; + geneID=NULL; + gene_name=NULL; + attrs=NULL; + gffID=NULL; + track_id=-1; + gseq_id=-1; + ftype_id=-1; + exon_ftype_id=-1; + strand='.'; + if (gfrd==NULL) + GError("Cannot use this GffObj constructor with a NULL GffReader!\n"); + gffnames_ref(names); + if (gfrd->names==NULL) gfrd->names=names; + //qlen=0;qstart=0;qend=0; + gscore=0; + uscore=0; + covlen=0; + qcov=0; + start=gffline->fstart; + end=gffline->fend; + gseq_id=names->gseqs.addName(gffline->gseqname); + track_id=names->tracks.addName(gffline->track); + strand=gffline->strand; + qlen=gffline->qlen; + qstart=gffline->qstart; + qend=gffline->qend; + //setup flags from gffline + isCDS=gffline->is_cds; //for now + isGene(gffline->is_gene); + isTranscript(gffline->is_transcript || gffline->exontype!=0); + fromGff3(gffline->is_gff3); + + if (gffline->parents!=NULL) { + //GTF style -- create a GffObj directly by subfeature + //(also possible orphan GFF3 exon line, or an exon given before its parent (chado)) + if (gffline->exontype!=0) { //recognized exon-like feature + ftype_id=gff_fid_transcript; //so this is some sort of transcript + exon_ftype_id=gff_fid_exon; //subfeatures MUST be exons + } + else {//unrecognized subfeatures + //make this GffObj of the same feature type + ftype_id=names->feats.addName(gffline->ftype); + } + if (gffline->ID==NULL) { //typical GTF + gffID=Gstrdup(gffline->parents[0]); + this->createdByExon(true); + //this is likely the first exon/segment of the feature + addExon(gfrd, gffline, keepAttr, noExonAttr); + } + else { //a parented feature with an ID -- probably an orphan GFF3 line + if (gffline->is_gff3 && gffline->exontype!=0) { + //premature exon given before its parent transcript + //create the transcript entry here + gffID=Gstrdup(gffline->parents[0]); + this->createdByExon(true); + //this is the first exon/segment of the transcript + addExon(gfrd, gffline, keepAttr, noExonAttr); + } + else { //unrecognized non-exon feature ? use the ID instead + gffID=Gstrdup(gffline->ID); + if (keepAttr) this->parseAttrs(attrs, gffline->info); + } + } + } //subfeature given directly + else { //gffline->parents==NULL + //create a parent feature in its own right + gscore=gffline->score; + if (gffline->ID==NULL || gffline->ID[0]==0) + GError("Error: no ID found for GFF record start\n"); + gffID=Gstrdup(gffline->ID); //there must be an ID here + //if (gffline->is_transcript) ftype_id=gff_fid_mRNA; + //else + ftype_id=names->feats.addName(gffline->ftype); + if (gffline->is_transcript) + exon_ftype_id=gff_fid_exon; + + if (keepAttr) this->parseAttrs(attrs, gffline->info); + }//no parent + + if (gffline->gene_name!=NULL) { + gene_name=Gstrdup(gffline->gene_name); + } + if (gffline->gene_id!=NULL) { + geneID=Gstrdup(gffline->gene_id); + } + + GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true); + uptr=gsd; + if (startmincoord) gsd->mincoord=start; + if (end>gsd->maxcoord) gsd->maxcoord=end; + if (this->len()>gsd->maxfeat_len) { + gsd->maxfeat_len=this->len(); + gsd->maxfeat=this; + } +} + +GffLine* GffReader::nextGffLine() { + if (gffline!=NULL) return gffline; //caller should free gffline after processing + while (gffline==NULL) { + int llen=0; + buflen=GFF_LINELEN-1; + char* l=fgetline(linebuf, buflen, fh, &fpos, &llen); + if (l==NULL) { + return NULL; //end of file + } + int ns=0; //first nonspace position + while (l[ns]!=0 && isspace(l[ns])) ns++; + if (l[ns]=='#' || llen<10) continue; + gffline=new GffLine(this, l); + if (gffline->skip) { + delete gffline; + gffline=NULL; + continue; + } + if (gffline->ID==NULL && gffline->parents==NULL) { //it must have an ID + //this might not be needed, already checked in the GffLine constructor + if (gff_warns) + GMessage("Warning: malformed GFF line, no parent or record Id (kipping\n"); + delete gffline; + gffline=NULL; + //continue; + } + } +return gffline; +} + +char* GffReader::gfoBuildId(const char* id, const char* ctg) { +//caller must free the returned pointer + char* buf=NULL; + int idlen=strlen(id); + GMALLOC(buf, idlen+strlen(ctg)+2); + strcpy(buf, id); + buf[idlen]='~'; + strcpy(buf+idlen+1, ctg); + return buf; +} + +void GffReader::gfoRemove(const char* id, const char* ctg) { + char* buf=gfoBuildId(id,ctg); + phash.Remove(buf); + GFREE(buf); +} + +//Warning: if gflst gets altered, idx becomes obsolete +GfoHolder* GffReader::gfoAdd(const char* id, const char* ctg, GffObj* gfo, int idx) { + char* buf=gfoBuildId(id,ctg); + GfoHolder* r=new GfoHolder(gfo,idx); + phash.Add(buf, r); + GFREE(buf); + return r; +} + +GfoHolder* GffReader::gfoFind(const char* id, const char* ctg) { + char* buf=gfoBuildId(id,ctg); + GfoHolder* r=phash.Find(buf); + GFREE(buf); + return r; +} + +GfoHolder* GffReader::replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx) { + GffObj* newgfo=new GffObj(this, gffline, keepAttr, noExonAttr); + GfoHolder* r=NULL; + if (replaceidx>=0) { + gflst.Put(replaceidx,newgfo); + r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, replaceidx); + } + else { + int gfoidx=gflst.Add(newgfo); + r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, gfoidx); + } + if (gff_warns) { + int* pcount=tids.Find(newgfo->gffID); + if (pcount!=NULL) { + if (gff_warns) GMessage("Warning: duplicate GFF ID: %s\n", newgfo->gffID); + (*pcount)++; + } + else { + tids.Add(newgfo->gffID,new int(1)); + } + } + return r; +} + +GfoHolder* GffReader::updateParent(GfoHolder* newgfh, GffObj* parent) { + //assert(parent); + //assert(newgfo); + parent->children.Add(newgfh->gffobj); + if (newgfh->gffobj->parent==NULL) newgfh->gffobj->parent=parent; + newgfh->gffobj->setLevel(parent->getLevel()+1); + if (parent->isGene()) { + if (parent->gene_name!=NULL && newgfh->gffobj->gene_name==NULL) + newgfh->gffobj->gene_name=Gstrdup(parent->gene_name); + if (parent->geneID!=NULL && newgfh->gffobj->geneID==NULL) + newgfh->gffobj->geneID=Gstrdup(parent->geneID); + } + + return newgfh; +} + +GfoHolder* GffReader::newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, + GffObj* parent, GffExon* pexon) { + GffObj* newgfo=new GffObj(this, gffline, keepAttr, noExonAttr); + GfoHolder* r=NULL; + int gfoidx=gflst.Add(newgfo); + r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, gfoidx); + if (parent!=NULL) { + updateParent(r, parent); + if (pexon!=NULL) parent->removeExon(pexon); + } + if (gff_warns) { + int* pcount=tids.Find(newgfo->gffID); + if (pcount!=NULL) { + if (gff_warns) GMessage("Warning: duplicate GFF ID: %s\n", newgfo->gffID); + (*pcount)++; + } + else { + tids.Add(newgfo->gffID,new int(1)); + } + } + return r; +} + +GfoHolder* GffReader::updateGffRec(GfoHolder* prevgfo, GffLine* gffline, + bool keepAttr) { + if (prevgfo==NULL) return NULL; + prevgfo->gffobj->createdByExon(false); + prevgfo->gffobj->ftype_id=prevgfo->gffobj->names->feats.addName(gffline->ftype); + prevgfo->gffobj->start=gffline->fstart; + prevgfo->gffobj->end=gffline->fend; + prevgfo->gffobj->isGene(gffline->is_gene); + prevgfo->gffobj->isTranscript(gffline->is_transcript || gffline->exontype!=0); + prevgfo->gffobj->fromGff3(gffline->is_gff3); + if (keepAttr) { + if (prevgfo->gffobj->attrs!=NULL) prevgfo->gffobj->attrs->Clear(); + prevgfo->gffobj->parseAttrs(prevgfo->gffobj->attrs, gffline->info); + } + return prevgfo; +} + + +bool GffReader::addExonFeature(GfoHolder* prevgfo, GffLine* gffline, GHash& pex, bool noExonAttr) { + bool r=true; + if (gffline->strand!=prevgfo->gffobj->strand) { + //TODO: add support for trans-splicing and even inter-chromosomal fusions + if (prevgfo->gffobj->strand=='.') { + prevgfo->gffobj->strand=gffline->strand; + } + else { + GMessage("GFF Error at %s (%c): exon %d-%d (%c) found on different strand; discarded.\n", + prevgfo->gffobj->gffID, prevgfo->gffobj->strand, + gffline->fstart, gffline->fend, gffline->strand, prevgfo->gffobj->getGSeqName()); + //r=false; + return true; //FIXME: split trans-spliced mRNAs by strand + } + } + int gdist=(gffline->fstart>prevgfo->gffobj->end) ? gffline->fstart-prevgfo->gffobj->end : + ((gffline->fendgffobj->start)? prevgfo->gffobj->start-gffline->fend : + 0 ); + if (gdist>(int)GFF_MAX_LOCUS) { //too far apart, most likely this is a duplicate ID + GMessage("Error: duplicate GFF ID '%s' (or exons too far apart)!\n",prevgfo->gffobj->gffID); + //validation_errors = true; + r=false; + if (!gff_warns) exit(1); + } + int eidx=prevgfo->gffobj->addExon(this, gffline, !noExonAttr, noExonAttr); + if (eidx>=0 && gffline->ID!=NULL && gffline->exontype==0) + subfPoolAdd(pex, prevgfo); + return r; +} + +CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash& pex, char*& subp_name) { + CNonExon* subp=NULL; + subp_name=NULL; + for (int i=0;inum_parents;i++) { + if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL) + continue; + subp_name=gfoBuildId(gffline->parents[i], gffline->gseqname); //e.g. mRNA name + subp=pex.Find(subp_name); + if (subp!=NULL) + return subp; + GFREE(subp_name); + } + return NULL; +} + +void GffReader::subfPoolAdd(GHash& pex, GfoHolder* newgfo) { +//this might become a parent feature later +if (newgfo->gffobj->exons.Count()>0) { + char* xbuf=gfoBuildId(gffline->ID, gffline->gseqname); + pex.Add(xbuf, new CNonExon(newgfo->idx, newgfo->gffobj, + newgfo->gffobj->exons[0], gffline)); + GFREE(xbuf); + } +} + +GfoHolder* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash& pex, + bool keepAttr, bool noExonAttr) { + GffObj* prevp=subp->parent; //grandparent of gffline (e.g. gene) + if (prevp!=gflst[subp->idx]) + GError("Error promoting subfeature %s, gflst index mismatch?!\n", subp->gffline->ID); + subp->gffline->discardParent(); + GfoHolder* gfoh=newGffRec(subp->gffline, keepAttr, noExonAttr, prevp, subp->exon); + pex.Remove(subp_name); //no longer a potential parent, moved it to phash already + prevp->promotedChildren(true); + return gfoh; //returns the holder of newly promoted feature +} + +//have to parse the whole file because exons can be scattered all over +void GffReader::readAll(bool keepAttr, bool mergeCloseExons, bool noExonAttr) { + bool validation_errors = false; + //loc_debug=false; + GHash pex; //keep track of any "exon"-like features that have an ID + //and thus could become promoted to parent features + while (nextGffLine()!=NULL) { + //seen this gff ID before? + GfoHolder* prevseen=NULL; + if (gffline->ID) //GFF3 + prevseen=gfoFind(gffline->ID, gffline->gseqname); + if (prevseen!=NULL) { + if (prevseen->gffobj->createdByExon()) { + updateGffRec(prevseen, gffline, keepAttr); + } + else { + GMessage("Error: duplicate GFF ID '%s' encountered!\n",gffline->ID); + validation_errors = true; + if (gff_warns) { + delete gffline; gffline=NULL; continue; + } + else exit(1); + } + } + if (gffline->parents==NULL) {//start GFF3-like record with no parent (mRNA, gene) + if (!prevseen) newGffRec(gffline, keepAttr, noExonAttr); + } + else { //--- it's a parented feature (could still be a mRNA) + bool found_parent=false; + GfoHolder* newgfo=prevseen; + for (int i=0;inum_parents;i++) { + if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL) + continue; //skipping discarded parent feature + GfoHolder* parentgfo=gfoFind(gffline->parents[i], gffline->gseqname); + if (parentgfo!=NULL) { //parent GffObj parsed earlier + found_parent=true; + if (parentgfo->gffobj->isGene() && gffline->is_transcript + && gffline->exontype==0) { + //not an exon, but a transcript parented by a gene + if (newgfo) { + updateParent(newgfo, parentgfo->gffobj); + } + else { + newgfo=newGffRec(gffline, keepAttr, noExonAttr, parentgfo->gffobj); + } + } + else { //potential exon subfeature + if (!addExonFeature(parentgfo, gffline, pex, noExonAttr)) + validation_errors=true; + } + } + } //for each parsed parent Id + if (!found_parent) { //new GTF-like record starting here with a subfeature directly + //or it could be some chado GFF3 barf with exons declared BEFORE their parent :( + //check if this feature isn't parented by a previously stored "exon" subfeature + char* subp_name=NULL; + CNonExon* subp=subfPoolCheck(gffline, pex, subp_name); + if (subp!=NULL) { //found a subfeature that is the parent of this gffline + //promote that subfeature to a full GffObj + GfoHolder* gfoh=promoteFeature(subp, subp_name, pex, keepAttr, noExonAttr); + //add current gffline as an exon of the newly promoted subfeature + if (!addExonFeature(gfoh, gffline, pex, noExonAttr)) + validation_errors=true; + } + else { //no parent seen before, create one directly with this exon + //loc_debug=true; + GfoHolder* newgfo=prevseen ? prevseen : newGffRec(gffline, keepAttr, noExonAttr); + if (gffline->ID!=NULL && gffline->exontype==0) + subfPoolAdd(pex, newgfo); + //even those with errors will be added here! + } + GFREE(subp_name); + } //no previous parent found + } //parented feature + //-- + delete gffline; + gffline=NULL; + }//while gff lines + gflst.finalize(this, mergeCloseExons, keepAttr, noExonAttr); //force sorting by locus if so constructed + // all gff records are now loaded in GList gflst + // so we can free the hash + phash.Clear(); + tids.Clear(); + if (validation_errors) { + exit(1); + } +} + +GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, bool noExonAttr) { + //merge + //always merge adjacent or overlapping segments + //but if mergeCloseExons then merge even when distance is up to 5 bases + udata=0; + uptr=NULL; + if (gfr->transcriptsOnly && !(isTranscript() || (isGene() && children.Count()==0))) { + isDiscarded(true); + } + if (ftype_id==gff_fid_transcript && CDstart>0) { + ftype_id=gff_fid_mRNA; + //exon_ftype_id=gff_fid_exon; + } + //if (ftype_id==gff_fid_mRNA || exon_ftype_id==gff_fid_exon || mergeCloseExons) { + if (isTranscript() || exon_ftype_id==gff_fid_exon || mergeCloseExons) { + int mindist=mergeCloseExons ? 5:1; + for (int i=0;iend; + while (nistart-mend); + if (dist>mindist) break; //no merging with next segment + if (gfr!=NULL && gfr->gff_warns && dist!=0 && (exons[ni]->exontype!=exgffUTR && exons[i]->exontype!=exgffUTR)) { + GMessage("GFF warning: merging adjacent/overlapping segments of %s on %s (%d-%d, %d-%d)\n", + gffID, getGSeqName(), exons[i]->start, exons[i]->end,exons[ni]->start, exons[ni]->end); + } + mend=exons[ni]->end; + covlen-=exons[i]->len(); + exons[i]->end=mend; + covlen+=exons[i]->len(); + covlen-=exons[ni]->len(); + if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL || + exons[i]->attrs->Count()attrs->Count())) { + //use the other exon attributes, if more + delete(exons[i]->attrs); + exons[i]->attrs=exons[ni]->attrs; + exons[ni]->attrs=NULL; + } + exons.Delete(ni); + } //check for merge with next exon + } //for each exon + } + //attribute reduction for GTF records + if (keepAttrs && !noExonAttr && !fromGff3() + && exons.Count()>0 && exons[0]->attrs!=NULL) { + bool attrs_discarded=false; + for (int a=0;aattrs->Count();a++) { + int attr_name_id=exons[0]->attrs->Get(a)->attr_id; + char* attr_name=names->attrs.getName(attr_name_id); + char* attr_val =exons[0]->attrs->Get(a)->attr_val; + bool sameExonAttr=true; + for (int i=1;igetAttr(attr_name_id); + if (ov==NULL || (strcmp(ov,attr_val)!=0)) { + sameExonAttr=false; + break; + } + } + if (sameExonAttr) { + //delete this attribute from exons level + attrs_discarded=true; + this->addAttr(attr_name, attr_val); + for (int i=1;iattrs->freeItem(a); + } + } + if (attrs_discarded) exons[0]->attrs->Pack(); + } + return this; +} + +void GffObj::parseAttrs(GffAttrs*& atrlist, char* info, bool isExon) { + if (names==NULL) + GError(ERR_NULL_GFNAMES, "parseAttrs()"); + if (atrlist==NULL) + atrlist=new GffAttrs(); + char* endinfo=info+strlen(info); + char* start=info; + char* pch=start; + while (startaddAttr(start, ech); + start=pch; + continue; + } + atrlist->add_or_update(names, start, ech); + } + /* + else { //not an attr=value format + atrlist->Add(new GffAttr(names->attrs.addName(start),"1")); + } + */ + start=pch; + } + if (atrlist->Count()==0) { delete atrlist; atrlist=NULL; } +} + +void GffObj::addAttr(const char* attrname, const char* attrvalue) { + if (this->attrs==NULL) + this->attrs=new GffAttrs(); + //this->attrs->Add(new GffAttr(names->attrs.addName(attrname),attrvalue)); + this->attrs->add_or_update(names, attrname, attrvalue); +} + + +void GffObj::setFeatureName(const char* feature) { + //change the feature name/type for a transcript + int fid=names->feats.addName(feature); + if (monoFeature() && exons.Count()>0) + this->exon_ftype_id=fid; + this->ftype_id=fid; +} + +void GffObj::setRefName(const char* newname) { + //change the feature name/type for a transcript + int rid=names->gseqs.addName(newname); + this->gseq_id=rid; +} + + + +int GffObj::removeAttr(const char* attrname, const char* attrval) { + if (this->attrs==NULL || attrname==NULL || attrname[0]==0) return 0; + int aid=this->names->attrs.getId(attrname); + if (aid<0) return 0; + int delcount=0; //could be more than one ? + for (int i=0;iattrs->Count();i++) { + if (aid==this->attrs->Get(i)->attr_id) { + if (attrval==NULL || + strcmp(attrval, this->attrs->Get(i)->attr_val)==0) { + delcount++; + this->attrs->freeItem(i); + } + } + } + if (delcount>0) this->attrs->Pack(); + return delcount; +} + +int GffObj::removeAttr(int aid, const char* attrval) { + if (this->attrs==NULL || aid<0) return 0; + int delcount=0; //could be more than one ? + for (int i=0;iattrs->Count();i++) { + if (aid==this->attrs->Get(i)->attr_id) { + if (attrval==NULL || + strcmp(attrval, this->attrs->Get(i)->attr_val)==0) { + delcount++; + this->attrs->freeItem(i); + } + } + } + if (delcount>0) this->attrs->Pack(); + return delcount; +} + + +int GffObj::removeExonAttr(GffExon& exon, const char* attrname, const char* attrval) { + if (exon.attrs==NULL || attrname==NULL || attrname[0]==0) return 0; + int aid=this->names->attrs.getId(attrname); + if (aid<0) return 0; + int delcount=0; //could be more than one + for (int i=0;iCount();i++) { + if (aid==exon.attrs->Get(i)->attr_id) { + if (attrval==NULL || + strcmp(attrval, exon.attrs->Get(i)->attr_val)==0) { + delcount++; + exon.attrs->freeItem(i); + } + } + } + if (delcount>0) exon.attrs->Pack(); + return delcount; +} + +int GffObj::removeExonAttr(GffExon& exon, int aid, const char* attrval) { + if (exon.attrs==NULL || aid<0) return 0; + int delcount=0; //could be more than one + for (int i=0;iCount();i++) { + if (aid==exon.attrs->Get(i)->attr_id) { + if (attrval==NULL || + strcmp(attrval, exon.attrs->Get(i)->attr_val)==0) { + delcount++; + exon.attrs->freeItem(i); + } + } + } + if (delcount>0) exon.attrs->Pack(); + return delcount; +} + + +void GffObj::getCDS_ends(uint& cds_start, uint& cds_end) { + cds_start=0; + cds_end=0; + if (CDstart==0 || CDend==0) return; //no CDS info + int cdsadj=0; + if (CDphase=='1' || CDphase=='2') { + cdsadj=CDphase-'0'; + } + cds_start=CDstart; + cds_end=CDend; + if (strand=='-') cds_end-=cdsadj; + else cds_start+=cdsadj; + } + +void GffObj::mRNA_CDS_coords(uint& cds_mstart, uint& cds_mend) { + //sets cds_start and cds_end to the CDS start,end coordinates on the spliced mRNA transcript + cds_mstart=0; + cds_mend=0; + if (CDstart==0 || CDend==0) return; //no CDS info + //restore normal coordinates, just in case + unxcoord(); + int cdsadj=0; + if (CDphase=='1' || CDphase=='2') { + cdsadj=CDphase-'0'; + } + /* + uint seqstart=CDstart; + uint seqend=CDend; + */ + uint seqstart=exons.First()->start; + uint seqend=exons.Last()->end; + int s=0; //resulting nucleotide counter + if (strand=='-') { + for (int x=exons.Count()-1;x>=0;x--) { + uint sgstart=exons[x]->start; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) + sgend=seqend; //seqend within this segment + s+=(int)(sgend-sgstart)+1; + if (CDstart>=sgstart && CDstart<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + cds_mend=s-(int)(CDstart-sgstart); + } + if (CDend>=sgstart && CDend<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + cds_mstart=s-(int)(CDend-cdsadj-sgstart); + } + } //for each exon + } // - strand + else { // + strand + for (int x=0;xstart; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) + sgend=seqend; //seqend within this segment + s+=(int)(sgend-sgstart)+1; + /* for (uint i=sgstart;i<=sgend;i++) { + spliced[s]=gsubseq[i-gstart]; + s++; + }//for each nt + */ + if (CDstart>=sgstart && CDstart<=sgend) { + //CDstart in this segment + cds_mstart=s-(int)(sgend-CDstart-cdsadj); + } + if (CDend>=sgstart && CDend<=sgend) { + //CDend in this segment + cds_mend=s-(int)(sgend-CDend); + } + } //for each exon + } // + strand + //spliced[s]=0; + //if (rlen!=NULL) *rlen=s; + //return spliced; +} + +char* GffObj::getUnspliced(GFaSeqGet* faseq, int* rlen, GList* seglst) +{ + if (faseq==NULL) { GMessage("Warning: getUnspliced(NULL,.. ) called!\n"); + return NULL; + } + //restore normal coordinates: + unxcoord(); + if (exons.Count()==0) return NULL; + int fspan=end-start+1; + const char* gsubseq=faseq->subseq(start, fspan); + if (gsubseq==NULL) { + GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end); + } + char* unspliced=NULL; + + int seqstart=exons.First()->start; + int seqend=exons.Last()->end; + + int unsplicedlen = 0; + + unsplicedlen += seqend - seqstart + 1; + + GMALLOC(unspliced, unsplicedlen+1); //allocate more here + //uint seqstart, seqend; + + int s = 0; //resulting nucleotide counter + if (strand=='-') + { + if (seglst!=NULL) + seglst->Add(new GSeg(s+1,s+1+seqend-seqstart)); + for (int i=seqend;i>=seqstart;i--) + { + unspliced[s] = ntComplement(gsubseq[i-start]); + s++; + }//for each nt + } // - strand + else + { // + strand + if (seglst!=NULL) + seglst->Add(new GSeg(s+1,s+1+seqend-seqstart)); + for (int i=seqstart;i<=seqend;i++) + { + unspliced[s]=gsubseq[i-start]; + s++; + }//for each nt + } // + strand + //assert(s <= unsplicedlen); + unspliced[s]=0; + if (rlen!=NULL) *rlen=s; + return unspliced; +} + +char* GffObj::getSpliced(GFaSeqGet* faseq, bool CDSonly, int* rlen, uint* cds_start, uint* cds_end, + GList* seglst) { + if (CDSonly && CDstart==0) return NULL; + if (faseq==NULL) { GMessage("Warning: getSpliced(NULL,.. ) called!\n"); + return NULL; + } + //restore normal coordinates: + unxcoord(); + if (exons.Count()==0) return NULL; + int fspan=end-start+1; + const char* gsubseq=faseq->subseq(start, fspan); + if (gsubseq==NULL) { + GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end); + } + if (fspan<(int)(end-start+1)) { //special case: stop coordinate was extended past the gseq length, must adjust + int endadj=end-start+1-fspan; + uint prevend=end; + end-=endadj; + if (CDend>end) CDend=end; + if (exons.Last()->end>end) { + exons.Last()->end=end; //this could get us into trouble if exon start is also > end + if (exons.Last()->start>exons.Last()->end) { + GError("GffObj::getSpliced() error: improper genomic coordinate %d on %s for %s\n", + prevend,getGSeqName(), getID()); + } + covlen-=endadj; + } + } + char* spliced=NULL; + GMALLOC(spliced, covlen+1); //allocate more here + uint seqstart, seqend; + int cdsadj=0; + if (CDphase=='1' || CDphase=='2') { + cdsadj=CDphase-'0'; + } + if (CDSonly) { + seqstart=CDstart; + seqend=CDend; + if (strand=='-') seqend-=cdsadj; + else seqstart+=cdsadj; + } + else { + seqstart=exons.First()->start; + seqend=exons.Last()->end; + } + int s=0; //resulting nucleotide counter + if (strand=='-') { + for (int x=exons.Count()-1;x>=0;x--) { + uint sgstart=exons[x]->start; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) + sgend=seqend; //seqend within this segment + if (seglst!=NULL) + seglst->Add(new GSeg(s+1,s+1+sgend-sgstart)); + for (uint i=sgend;i>=sgstart;i--) { + spliced[s] = ntComplement(gsubseq[i-start]); + s++; + }//for each nt + + if (!CDSonly && cds_start!=NULL && CDstart>0) { + if (CDstart>=sgstart && CDstart<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + *cds_end=s-(CDstart-sgstart); + } + if (CDend>=sgstart && CDend<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + *cds_start=s-(CDend-cdsadj-sgstart); + } + }//update local CDS coordinates + } //for each exon + } // - strand + else { // + strand + for (int x=0;xstart; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) + sgend=seqend; //seqend within this segment + if (seglst!=NULL) + seglst->Add(new GSeg(s+1,s+1+sgend-sgstart)); + for (uint i=sgstart;i<=sgend;i++) { + spliced[s]=gsubseq[i-start]; + s++; + }//for each nt + if (!CDSonly && cds_start!=NULL && CDstart>0) { + if (CDstart>=sgstart && CDstart<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + *cds_start=s-(sgend-CDstart-cdsadj); + } + if (CDend>=sgstart && CDend<=sgend) { + //CDstart in this segment + //and we are getting the whole transcript + *cds_end=s-(sgend-CDend); + } + }//update local CDS coordinates + } //for each exon + } // + strand + spliced[s]=0; + if (rlen!=NULL) *rlen=s; + return spliced; +} + +char* GffObj::getSplicedTr(GFaSeqGet* faseq, bool CDSonly, int* rlen) { + if (CDSonly && CDstart==0) return NULL; + //restore normal coordinates: + unxcoord(); + if (exons.Count()==0) return NULL; + int fspan=end-start+1; + const char* gsubseq=faseq->subseq(start, fspan); + if (gsubseq==NULL) { + GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end); + } + + char* translation=NULL; + GMALLOC(translation, (int)(covlen/3)+1); + uint seqstart, seqend; + int cdsadj=0; + if (CDphase=='1' || CDphase=='2') { + cdsadj=CDphase-'0'; + } + if (CDSonly) { + seqstart=CDstart; + seqend=CDend; + if (strand=='-') seqend-=cdsadj; + else seqstart+=cdsadj; + } + else { + seqstart=exons.First()->start; + seqend=exons.Last()->end; + } + Codon codon; + int nt=0; //codon nucleotide counter (0..2) + int aa=0; //aminoacid count + if (strand=='-') { + for (int x=exons.Count()-1;x>=0;x--) { + uint sgstart=exons[x]->start; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) { + sgend=seqend; //seqend within this segment + } + for (uint i=sgend;i>=sgstart;i--) { + codon.nuc[nt]=ntComplement(gsubseq[i-start]); + nt++; + if (nt==3) { + nt=0; + translation[aa]=codon.translate(); + aa++; + } + }//for each nt + } //for each exon + } // - strand + else { // + strand + for (int x=0;xstart; + uint sgend=exons[x]->end; + if (seqendsgend) continue; + if (seqstart>=sgstart && seqstart<=sgend) + sgstart=seqstart; //seqstart within this segment + if (seqend>=sgstart && seqend<=sgend) + sgend=seqend; //seqend within this segment + for (uint i=sgstart;i<=sgend;i++) { + codon.nuc[nt]=gsubseq[i-start]; + nt++; + if (nt==3) { + nt=0; + translation[aa]=codon.translate(); + aa++; + } + }//for each nt + } //for each exon + } // + strand + translation[aa]=0; + if (rlen!=NULL) *rlen=aa; + return translation; +} + +void GffObj::printSummary(FILE* fout) { + if (fout==NULL) fout=stdout; + fprintf(fout, "%s\t%c\t%d\t%d\t%4.2f\t%4.1f\n", gffID, + strand, start, end, gscore, (float)qcov/10.0); +} + +void GffObj::printGxfLine(FILE* fout, const char* tlabel, const char* gseqname, bool iscds, + uint segstart, uint segend, int exidx, char phase, bool gff3) { + static char scorestr[14]; + strcpy(scorestr,"."); + GffAttrs* xattrs=NULL; + if (exidx>=0) { + if (exons[exidx]->score) sprintf(scorestr,"%.2f", exons[exidx]->score); + xattrs=exons[exidx]->attrs; + } + if (phase==0 || !iscds) phase='.'; + const char* ftype=iscds ? "CDS" : getSubfName(); + if (gff3) { + fprintf(fout, + "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\tParent=%s", + gseqname, tlabel, ftype, segstart, segend, scorestr, strand, + phase, gffID); + if (xattrs!=NULL) { + for (int i=0;iCount();i++) + fprintf(fout, ";%s=%s",names->attrs.getName(xattrs->Get(i)->attr_id), + xattrs->Get(i)->attr_val); + } + fprintf(fout, "\n"); + } //GFF + else {//for GTF -- we print only transcripts + //if (isValidTranscript()) + fprintf(fout, "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\ttranscript_id \"%s\";", + gseqname, tlabel, ftype, segstart, segend, scorestr, strand, phase, gffID); + //char* geneid=(geneID!=NULL)? geneID : gffID; + if (geneID) + fprintf(fout," gene_id \"%s\";",geneID); + if (gene_name!=NULL) { + //fprintf(fout, " gene_name "); + //if (gene_name[0]=='"') fprintf (fout, "%s;",gene_name); + // else fprintf(fout, "\"%s\";",gene_name); + fprintf(fout," gene_name \"%s\";",gene_name); + } + if (xattrs!=NULL) { + for (int i=0;iCount();i++) { + if (xattrs->Get(i)->attr_val==NULL) continue; + const char* attrname=names->attrs.getName(xattrs->Get(i)->attr_id); + fprintf(fout, " %s ",attrname); + if (xattrs->Get(i)->attr_val[0]=='"') + fprintf(fout, "%s;",xattrs->Get(i)->attr_val); + else fprintf(fout, "\"%s\";",xattrs->Get(i)->attr_val); + } + } + //for GTF, also append the GffObj attributes to each exon line + if ((xattrs=this->attrs)!=NULL) { + for (int i=0;iCount();i++) { + if (xattrs->Get(i)->attr_val==NULL) continue; + const char* attrname=names->attrs.getName(xattrs->Get(i)->attr_id); + fprintf(fout, " %s ",attrname); + if (xattrs->Get(i)->attr_val[0]=='"') + fprintf(fout, "%s;",xattrs->Get(i)->attr_val); + else fprintf(fout, "\"%s\";",xattrs->Get(i)->attr_val); + } + } + fprintf(fout, "\n"); + }//GTF +} + +void GffObj::printGxf(FILE* fout, GffPrintMode gffp, + const char* tlabel, const char* gfparent) { + static char tmpstr[255]; + if (tlabel==NULL) { + tlabel=track_id>=0 ? names->tracks.Get(track_id)->name : + (char*)"gffobj" ; + } + unxcoord(); + //if (exons.Count()==0) return; + const char* gseqname=names->gseqs.Get(gseq_id)->name; + bool gff3 = (gffp>=pgffAny); + bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth); + bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth); + if (gff3) { + //print GFF3 mRNA line: + if (gscore>0.0) sprintf(tmpstr,"%.2f", gscore); + else strcpy(tmpstr,"."); + uint pstart, pend; + if (gffp==pgffCDS) { + pstart=CDstart; + pend=CDend; + } + else { pstart=start;pend=end; } + //const char* ftype=isTranscript() ? "mRNA" : getFeatureName(); + const char* ftype=getFeatureName(); + fprintf(fout, + "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tID=%s", + gseqname, tlabel, ftype, pstart, pend, tmpstr, strand, gffID); + if (CDstart>0 && !showCDS && !isCDS) fprintf(fout,";CDS=%d-%d",CDstart,CDend); + if (gfparent!=NULL) { + //parent override + fprintf(fout, ";Parent=%s",gfparent); + } + else { + if (parent!=NULL && !parent->isDiscarded()) + fprintf(fout, ";Parent=%s",parent->getID()); + } + if (geneID!=NULL) + fprintf(fout, ";geneID=%s",geneID); + if (gene_name!=NULL) + fprintf(fout, ";gene_name=%s",gene_name); + if (attrs!=NULL) { + for (int i=0;iCount();i++) { + const char* attrname=names->attrs.getName(attrs->Get(i)->attr_id); + fprintf(fout,";%s=%s", attrname, + attrs->Get(i)->attr_val); + } + } + fprintf(fout,"\n"); + }// gff3 mRNA line + if (showExon) { + //print exons + if (isCDS && exons.Count()>0 && + ((strand=='-' && exons.Last()->phase<'0') || (strand=='+' && exons.Last()->phase<'0'))) + updateExonPhase(); + + for (int i=0;istart, exons[i]->end, i, exons[i]->phase, gff3); + } + }//printing exons + if (showCDS && !isCDS && CDstart>0) { + GArray cds(true,true); + getCDSegs(cds); + for (int i=0;i=0;i--) { + exons[i]->phase='0'+ (3-cdsacc%3)%3; + cdsacc+=exons[i]->end-exons[i]->start+1; + } + } + else { //forward strand + for (int i=0;iphase='0'+ (3-cdsacc%3)%3; + cdsacc+=exons[i]->end-exons[i]->start+1; + } + } +} + + +void GffObj::getCDSegs(GArray& cds) { + GffCDSeg cdseg; + int cdsacc=0; + if (CDphase=='1' || CDphase=='2') { + cdsacc+= 3-(CDphase-'0'); + } + if (strand=='-') { + for (int x=exons.Count()-1;x>=0;x--) { + uint sgstart=exons[x]->start; + uint sgend=exons[x]->end; + if (CDendsgend) continue; + if (CDstart>=sgstart && CDstart<=sgend) + sgstart=CDstart; //cdstart within this segment + if (CDend>=sgstart && CDend<=sgend) + sgend=CDend; //cdend within this segment + cdseg.start=sgstart; + cdseg.end=sgend; + cdseg.exonidx=x; + //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0); + cdseg.phase='0'+ (3-cdsacc%3)%3; + cdsacc+=sgend-sgstart+1; + cds.Add(cdseg); + } //for each exon + } // - strand + else { // + strand + for (int x=0;xstart; + uint sgend=exons[x]->end; + if (CDendsgend) continue; + if (CDstart>=sgstart && CDstart<=sgend) + sgstart=CDstart; //seqstart within this segment + if (CDend>=sgstart && CDend<=sgend) + sgend=CDend; //seqend within this segment + cdseg.start=sgstart; + cdseg.end=sgend; + cdseg.exonidx=x; + //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0); + cdseg.phase='0' + (3-cdsacc%3)%3 ; + cdsacc+=sgend-sgstart+1; + cds.Add(cdseg); + } //for each exon + } // + strand +} diff --git a/src/gff.h b/src/gff.h new file mode 100644 index 0000000..ee3eadb --- /dev/null +++ b/src/gff.h @@ -0,0 +1,1099 @@ +#ifndef GFF_H +#define GFF_H + +#include "GBase.h" +#include "gdna.h" +#include "codons.h" +#include "GFaSeqGet.h" +#include "GList.hh" +#include "GHash.hh" + +/* +const byte exMskMajSpliceL = 0x01; +const byte exMskMajSpliceR = 0x02; +const byte exMskMinSpliceL = 0x04; +const byte exMskMinSpliceR = 0x08; +const byte exMskTag = 0x80; +*/ + +//reserved Gffnames::feats entries -- basic feature types +extern const int gff_fid_mRNA; // "mRNA" feature name +extern const int gff_fid_transcript; // *RNA, *transcript feature name +extern const int gff_fid_exon; +extern const int gff_fid_CDS; //never really used, except for display only + //use gff_fid_exon instead +extern const uint GFF_MAX_LOCUS; +extern const uint GFF_MAX_EXON; +extern const uint GFF_MAX_INTRON; + +extern const uint gfo_flag_CHILDREN_PROMOTED; +extern const uint gfo_flag_HAS_ERRORS; +extern const uint gfo_flag_IS_GENE; +extern const uint gfo_flag_FROM_GFF3; //parsed from GFF3 formatted record +extern const uint gfo_flag_BY_EXON; //created by subfeature (exon) directly + //(GTF2 and some chado gff3 dumps with exons given before their mRNA) +extern const uint gfo_flag_IS_TRANSCRIPT; //recognized as '*RNA' or '*transcript' +extern const uint gfo_flag_DISCARDED; //should not be printed under the "transcriptsOnly" directive +extern const uint gfo_flag_LST_KEEP; //GffObj from GffReader::gflst is to be kept (not deallocated) + //when GffReader is destroyed +extern const uint gfo_flag_LEVEL_MSK; //hierarchical level: 0 = no parent +extern const byte gfo_flagShift_LEVEL; + +extern bool gff_show_warnings; + +#define GFF_LINELEN 2048 +#define ERR_NULL_GFNAMES "Error: GffObj::%s requires a non-null GffNames* names!\n" + + +enum GffExonType { + exgffNone=0, //not a recognizable exon or CDS segment + exgffStart, //from "start_codon" feature (within CDS) + exgffStop, //from "stop_codon" feature (may be outside CDS) + exgffCDS, //from "CDS" feature + exgffUTR, //from "UTR" feature + exgffCDSUTR, //from a merge of UTR and CDS feature + exgffExon, //from "exon" feature +}; + +class GffReader; + +class GffLine { + char* _parents; //stores a copy of the Parent attribute value, + //with commas replaced by \0 + int _parents_len; + public: + char* dupline; //duplicate of original line + char* line; //this will have tabs replaced by \0 + int llen; + char* gseqname; + char* track; + char* ftype; //feature name: mRNA/gene/exon/CDS + char* info; //the last, attributes' field, unparsed + uint fstart; + uint fend; + uint qstart; //overlap coords on query, if available + uint qend; + uint qlen; //query len, if given + double score; + char strand; + bool skip; + bool is_gff3; //if the line appears to be in GFF3 format + bool is_cds; //"cds" and "stop_codon" features + bool is_exon; //"exon" and "utr" features + char exontype; // gffExonType + bool is_transcript; //if current feature is *RNA or *transcript + bool is_gene; //if current feature is *gene + char phase; // '.' , '0', '1' or '2' + // -- allocated strings: + char* gene_name; //value of gene_name attribute (GTF) if present or Name attribute of a gene feature (GFF3) + char* gene_id; //value of gene_id attribute (GTF) if present or ID attribute of a gene feature (GFF3) + // + char** parents; //for GTF only parents[0] is used + int num_parents; + char* ID; // if a ID=.. attribute was parsed, or a GTF with 'transcript' line (transcript_id) + GffLine(GffReader* reader, const char* l); //parse the line accordingly + void discardParent() { + GFREE(_parents); + _parents_len=0; + num_parents=0; + parents=NULL; + } + char* extractAttr(const char* pre, bool caseStrict=true, bool enforce_GTF2=false); + GffLine(GffLine* l) { //a copy constructor + memcpy((void*)this, (void*)l, sizeof(GffLine)); + line=NULL; + GMALLOC(line, llen+1); + memcpy(line, l->line, llen+1); + GMALLOC(dupline, llen+1); + memcpy(dupline, l->dupline, llen+1); + //--offsets within line[] + gseqname=line+(l->gseqname-l->line); + track=line+(l->track-l->line); + ftype=line+(l->ftype-l->line); + info=line+(l->info-l->line); + //Parent=Gstrdup(l->Parent); + if (l->_parents_len>0) { + _parents_len=l->_parents_len; + GMALLOC(_parents, _parents_len); + memcpy(_parents, l->_parents, _parents_len); + num_parents=l->num_parents; + for (int i=0;iparents[i] - l->_parents); + } + } + //-- allocated string copies: + ID=Gstrdup(l->ID); + if (l->gene_name!=NULL) + gene_name=Gstrdup(l->gene_name); + if (l->gene_id!=NULL) + gene_id=Gstrdup(l->gene_id); + } + GffLine() { + line=NULL; + dupline=NULL; + gseqname=NULL; + track=NULL; + ftype=NULL; + fstart=0; + fend=0; + info=NULL; + _parents=NULL; + _parents_len=0; + parents=NULL; + num_parents=0; + ID=NULL; + gene_name=NULL; + gene_id=NULL; + skip=true; + qstart=0; + qend=0; + qlen=0; + exontype=0; + is_cds=false; + is_gff3=false; + is_transcript=false; + is_gene=false; + is_exon=false; + } + ~GffLine() { + GFREE(dupline); + GFREE(line); + GFREE(_parents); + GFREE(parents); + GFREE(ID); + GFREE(gene_name); + GFREE(gene_id); + } +}; + +class GffAttr { + public: + int attr_id; + char* attr_val; + GffAttr(int an_id, const char* av=NULL) { + attr_id=an_id; + attr_val=NULL; + setValue(av); + } + ~GffAttr() { + GFREE(attr_val); + } + void setValue(const char* av) { + if (attr_val!=NULL) { + GFREE(attr_val); + } + if (av==NULL || av[0]==0) return; + //trim spaces + const char* vstart=av; + while (*vstart==' ') av++; + const char* vend=vstart; + bool keep_dq=false; + while (vend[1]!=0) { + if (*vend==' ' && vend[1]!=' ') keep_dq=true; + else if (*vend==';') keep_dq=true; + vend++; + } + //remove spaces at the end: + while (*vend==' ' && vend!=vstart) vend--; + //practical clean-up: if it doesn't have any internal spaces just strip those useless double quotes + if (!keep_dq && *vstart=='"' && *vend=='"') { + vend--; + vstart++; + } + attr_val=Gstrdup(vstart, vend); + } + bool operator==(GffAttr& d){ + return (this==&d); + } + bool operator>(GffAttr& d){ + return (this>&d); + } + bool operator<(GffAttr& d){ + return (this<&d); + } + + }; + +class GffNameList; +class GffNames; + +class GffNameInfo { + friend class GffNameList; +protected: + int idx; + public: + char* name; + GffNameInfo() { name=NULL; idx=-1; } + GffNameInfo(const char* n) { + name=Gstrdup(n); + } + + ~GffNameInfo() { + GFREE(name); + } + + bool operator==(GffNameInfo& d){ + return (strcmp(this->name, d.name)==0); + } + bool operator>(GffNameInfo& d){ + return (strcmp(this->name, d.name)>0); + } + bool operator<(GffNameInfo& d){ + return (strcmp(this->name, d.name)<0); + } +}; + +class GffNameList:public GList { + friend class GffNameInfo; + friend class GffNames; +protected: + GHash byName;//hash with shared keys + int idlast; //fList index of last added/reused name + void addStatic(const char* tname) {// fast add + GffNameInfo* f=new GffNameInfo(tname); + idlast=this->Add(f); + f->idx=idlast; + byName.shkAdd(f->name,f); + } +public: + GffNameList():GList(false,true,true), byName(false) { + idlast=-1; + } + char* lastNameUsed() { return idlast<0 ? NULL : Get(idlast)->name; } + int lastNameId() { return idlast; } + char* getName(int nid) { //retrieve name by its ID + if (nid<0 || nid>=fCount) + GError("GffNameList Error: invalid index (%d)\n",nid); + return fList[nid]->name; + } + + int addName(const char* tname) {//returns or create an id for the given name + //check idlast first, chances are it's the same feature name checked + if (idlast>=0 && strcmp(fList[idlast]->name,tname)==0) + return idlast; + GffNameInfo* f=byName.Find(tname); + int fidx=-1; + if (f!=NULL) fidx=f->idx; + else {//add new entry + f=new GffNameInfo(tname); + fidx=this->Add(f); + f->idx=fidx; + byName.shkAdd(f->name,f); + } + idlast=fidx; + return fidx; + } + + int addNewName(const char* tname) { + GffNameInfo* f=new GffNameInfo(tname); + int fidx=this->Add(f); + f->idx=fidx; + byName.shkAdd(f->name,f); + return fidx; + } + + int getId(const char* tname) { //only returns a name id# if found + GffNameInfo* f=byName.Find(tname); + if (f==NULL) return -1; + return f->idx; + } + int removeName() { + GError("Error: removing names from GffNameList not allowed!\n"); + return -1; + } +}; + +class GffNames { + public: + int numrefs; + GffNameList tracks; + GffNameList gseqs; + GffNameList attrs; + GffNameList feats; //feature names: 'mRNA', 'exon', 'CDS' etc. + GffNames():tracks(),gseqs(),attrs(), feats() { + numrefs=0; + //the order below is critical! + //has to match: gff_fid_mRNA, gff_fid_exon, gff_fid_CDS + feats.addStatic("mRNA");//index 0=gff_fid_mRNA + feats.addStatic("transcript");//index 1=gff_fid_transcript + feats.addStatic("exon");//index 1=gff_fid_exon + feats.addStatic("CDS"); //index 2=gff_fid_CDS + } +}; + +void gffnames_ref(GffNames* &n); +void gffnames_unref(GffNames* &n); + +enum GffPrintMode { + pgtfAny, //print record as read + pgtfExon, + pgtfCDS, + pgffAny, //print record as read + pgffExon, + pgffCDS, + pgffBoth, +}; + + +class GffAttrs:public GList { + public: + GffAttrs():GList(false,true,false) { } + void add_or_update(GffNames* names, const char* attrname, const char* val) { + int aid=names->attrs.getId(attrname); + if (aid>=0) { + //attribute found in the dictionary + for (int i=0;iattr_id) { + //update the value + Get(i)->setValue(val); + return; + } + } + } + else { + aid=names->attrs.addNewName(attrname); + } + this->Add(new GffAttr(aid, val)); + } + + char* getAttr(GffNames* names, const char* attrname) { + int aid=names->attrs.getId(attrname); + if (aid>=0) + for (int i=0;iattr_id) return Get(i)->attr_val; + return NULL; + } + char* getAttr(int aid) { + if (aid>=0) + for (int i=0;iattr_id) return Get(i)->attr_val; + return NULL; + } +}; + + +class GffExon : public GSeg { + public: + void* uptr; //for later extensions + GffAttrs* attrs; //other attributes kept for this exon + double score; // gff score column + char phase; //GFF phase column - for CDS segments only + // '.' = undefined (UTR), '0','1','2' for CDS exons + char exontype; // 1="exon" 2="cds" 3="utr" 4="stop_codon" + int qstart; // for mRNA/protein exon mappings: coordinates on query + int qend; + GffExon(int s=0, int e=0, double sc=0, char fr=0, int qs=0, int qe=0, char et=0) { + uptr=NULL; + attrs=NULL; + if (sgetAttr(names, atrname); + } + + char* getAttr(int aid) { + if (attrs==NULL) return NULL; + return attrs->getAttr(aid); + } + + ~GffExon() { //destructor + if (attrs!=NULL) delete attrs; + } +}; + + +class GffCDSeg:public GSeg { + public: + char phase; + int exonidx; +}; +//one GFF mRNA object -- e.g. a mRNA with its exons and/or CDS segments +class GffObj:public GSeg { + //utility segment-merging function for addExon() + void expandExon(int xovl, uint segstart, uint segend, + char exontype, double sc, char fr, int qs, int qe); + protected: + //coordinate transformation data: + uint xstart; //absolute genomic coordinates of reference region + uint xend; + char xstatus; //coordinate transform status: + //0 : (start,end) coordinates are absolute + //'+' : (start,end) coords are relative to xstart..xend region + //'-' : (start,end) are relative to the reverse complement of xstart..xend region + //-- + char* gffID; // ID name for mRNA (parent) feature + char* gene_name; //value of gene_name attribute (GTF) if present or Name attribute of the parent gene feature (GFF3) + char* geneID; //value of gene_id attribute (GTF) if present or ID attribute of a parent gene feature (GFF3) + unsigned int flags; + //-- friends: + friend class GffReader; + friend class GffExon; +public: + static GffNames* names; // dictionary storage that holds the various attribute names etc. + int track_id; // index of track name in names->tracks + int gseq_id; // index of genomic sequence name in names->gseqs + int ftype_id; // index of this record's feature name in names->feats, or the special gff_fid_mRNA value + int exon_ftype_id; //index of child subfeature name in names->feats (that subfeature stored in "exons") + //if ftype_id==gff_fid_mRNA then this value is ignored + GList exons; //for non-mRNA entries, these can be any subfeature of type subftype_id + GPVec children; + GffObj* parent; + int udata; //user data, flags etc. + void* uptr; //user pointer (to a parent object, cluster, locus etc.) + GffObj* ulink; //link to another GffObj (user controlled field) + // mRNA specific fields: + bool isCDS; //just a CDS, no UTRs + bool partial; //partial CDS + uint CDstart; //CDS start coord + uint CDend; //CDS end coord + char CDphase; //initial phase for CDS start + bool hasErrors() { return ((flags & gfo_flag_HAS_ERRORS)!=0); } + void hasErrors(bool v) { + if (v) flags |= gfo_flag_HAS_ERRORS; + else flags &= ~gfo_flag_HAS_ERRORS; + } + bool fromGff3() { return ((flags & gfo_flag_FROM_GFF3)!=0); } + void fromGff3(bool v) { + if (v) flags |= gfo_flag_FROM_GFF3; + else flags &= ~gfo_flag_FROM_GFF3; + } + bool createdByExon() { return ((flags & gfo_flag_BY_EXON)!=0); } + void createdByExon(bool v) { + if (v) flags |= gfo_flag_BY_EXON; + else flags &= ~gfo_flag_BY_EXON; + } + bool isGene() { return ((flags & gfo_flag_IS_GENE)!=0); } + void isGene(bool v) { + if (v) flags |= gfo_flag_IS_GENE; + else flags &= ~gfo_flag_IS_GENE; + } + bool isDiscarded() { return ((flags & gfo_flag_DISCARDED)!=0); } + void isDiscarded(bool v) { + if (v) flags |= gfo_flag_DISCARDED; + else flags &= ~gfo_flag_DISCARDED; + } + + bool isUsed() { return ((flags & gfo_flag_LST_KEEP)!=0); } + void isUsed(bool v) { + if (v) flags |= gfo_flag_LST_KEEP; + else flags &= ~gfo_flag_LST_KEEP; + } + bool isTranscript() { return ((flags & gfo_flag_IS_TRANSCRIPT)!=0); } + void isTranscript(bool v) { + if (v) flags |= gfo_flag_IS_TRANSCRIPT; + else flags &= ~gfo_flag_IS_TRANSCRIPT; + } + bool promotedChildren() { return ((flags & gfo_flag_CHILDREN_PROMOTED)!=0); } + void promotedChildren(bool v) { + if (v) flags |= gfo_flag_CHILDREN_PROMOTED; + else flags &= ~gfo_flag_CHILDREN_PROMOTED; + } + void setLevel(byte v) { + if (v==0) flags &= ~gfo_flag_LEVEL_MSK; + else flags &= ~(((uint)v) << gfo_flagShift_LEVEL); + } + byte incLevel() { + uint v=((flags & gfo_flag_LEVEL_MSK) >> gfo_flagShift_LEVEL); + v++; + flags &= ~(v << gfo_flagShift_LEVEL); + return v; + } + byte getLevel() { + return ((byte)((flags & gfo_flag_LEVEL_MSK) >> gfo_flagShift_LEVEL)); + } + + bool isValidTranscript() { + //return (ftype_id==gff_fid_mRNA && exons.Count()>0); + return (isTranscript() && exons.Count()>0); + } + + + int addExon(uint segstart, uint segend, double sc=0, char fr='.', + int qs=0, int qe=0, bool iscds=false, char exontype=0); + + int addExon(GffReader* reader, GffLine* gl, bool keepAttr=false, bool noExonAttr=true); + + void removeExon(int idx); + void removeExon(GffExon* p); + char strand; //true if features are on the reverse complement strand + double gscore; + double uscore; //custom, user-computed score, if needed + int covlen; //total coverage of reference genomic sequence (sum of maxcf segment lengths) + + //--------- optional data: + int qlen; //query length, start, end - if available + int qstart; + int qend; + int qcov; //query coverage - percent + GffAttrs* attrs; //other gff3 attributes found for the main mRNA feature + //constructor by gff line parsing: + GffObj(GffReader* gfrd, GffLine* gffline, bool keepAttrs=false, bool noExonAttr=true); + //if gfline->Parent!=NULL then this will also add the first sub-feature + // otherwise, only the main feature is created + void clearAttrs() { + if (attrs!=NULL) { + bool sharedattrs=(exons.Count()>0 && exons[0]->attrs==attrs); + delete attrs; attrs=NULL; + if (sharedattrs) exons[0]->attrs=NULL; + } + } + GffObj(char* anid=NULL):GSeg(0,0), exons(true,true,false), children(1,false) { + //exons: sorted, free, non-unique + gffID=NULL; + uptr=NULL; + ulink=NULL; + flags=0; + udata=0; + parent=NULL; + ftype_id=-1; + exon_ftype_id=-1; + if (anid!=NULL) gffID=Gstrdup(anid); + gffnames_ref(names); + qlen=0; + qstart=0; + qend=0; + qcov=0; + partial=true; + isCDS=false; + CDstart=0; // hasCDS <=> CDstart>0 + CDend=0; + CDphase=0; + gseq_id=-1; + track_id=-1; + xstart=0; + xend=0; + xstatus=0; + strand='.'; + gscore=0; + uscore=0; + attrs=NULL; + covlen=0; + gene_name=NULL; + geneID=NULL; + } + ~GffObj() { + GFREE(gffID); + GFREE(gene_name); + GFREE(geneID); + clearAttrs(); + gffnames_unref(names); + } + //-------------- + GffObj* finalize(GffReader* gfr, bool mergeCloseExons=false, + bool keepAttrs=false, bool noExonAttr=true); + //complete parsing: must be called in order to merge adjacent/close proximity subfeatures + void parseAttrs(GffAttrs*& atrlist, char* info, bool isExon=false); + const char* getSubfName() { //returns the generic feature type of the entries in exons array + int sid=exon_ftype_id; + if (sid==gff_fid_exon && isCDS) sid=gff_fid_CDS; + return names->feats.getName(sid); + } + void addCDS(uint cd_start, uint cd_end, char phase=0); + + bool monoFeature() { + return (exons.Count()==0 || + (exons.Count()==1 && //exon_ftype_id==ftype_id && + exons[0]->end==this->end && exons[0]->start==this->start)); + } + + bool hasCDS() { return (CDstart>0); } + + const char* getFeatureName() { + return names->feats.getName(ftype_id); + } + void setFeatureName(const char* feature); + + void addAttr(const char* attrname, const char* attrvalue); + int removeAttr(const char* attrname, const char* attrval=NULL); + int removeAttr(int aid, const char* attrval=NULL); + int removeExonAttr(GffExon& exon, const char* attrname, const char* attrval=NULL); + int removeExonAttr(GffExon& exon, int aid, const char* attrval=NULL); + const char* getAttrName(int i) { + if (attrs==NULL) return NULL; + return names->attrs.getName(attrs->Get(i)->attr_id); + } + char* getAttr(const char* attrname, bool checkFirstExon=false) { + if (names==NULL || attrname==NULL) return NULL; + char* r=NULL; + if (attrs==NULL) { + if (!checkFirstExon) return NULL; + } + else r=attrs->getAttr(names, attrname); + if (r!=NULL) return r; + if (checkFirstExon && exons.Count()>0) { + r=exons[0]->getAttr(names, attrname); + } + return r; + } + + char* getExonAttr(GffExon* exon, const char* attrname) { + if (exon==NULL || attrname==NULL) return NULL; + return exon->getAttr(names, attrname); + } + + char* getExonAttr(int exonidx, const char* attrname) { + if (exonidx<0 || exonidx>=exons.Count() || attrname==NULL) return NULL; + return exons[exonidx]->getAttr(names, attrname); + } + + char* getAttrValue(int i) { + if (attrs==NULL) return NULL; + return attrs->Get(i)->attr_val; + } + const char* getGSeqName() { + return names->gseqs.getName(gseq_id); + } + + const char* getRefName() { + return names->gseqs.getName(gseq_id); + } + void setRefName(const char* newname); + + const char* getTrackName() { + return names->tracks.getName(track_id); + } + bool exonOverlap(uint s, uint e) {//check if ANY exon overlaps given segment + //ignores strand! + if (s>e) swap(s,e); + for (int i=0;ioverlap(s,e)) return true; + } + return false; + } + bool exonOverlap(GffObj& m) {//check if ANY exon overlaps given segment + //if (gseq_id!=m.gseq_id) return false; + // ignores strand and gseq_id, must check in advance + for (int i=0;istart>m.exons[j]->end) continue; + if (m.exons[j]->start>exons[i]->end) break; + //-- overlap if we are here: + return true; + } + } + return false; + } + + int exonOverlapIdx(uint s, uint e, int* ovlen=NULL) { + //return the exons' index for the overlapping OR ADJACENT exon + //ovlen, if given, will return the overlap length + if (s>e) swap(s,e); + s--;e++; //to also catch adjacent exons + for (int i=0;istart>e) break; + if (s>exons[i]->end) continue; + //-- overlap if we are here: + if (ovlen!=NULL) { + s++;e--; + int ovlend= (exons[i]->end>e) ? e : exons[i]->end; + *ovlen= ovlend - ((s>exons[i]->start)? s : exons[i]->start)+1; + } + return i; + } //for each exon + *ovlen=0; + return -1; + } + + int exonOverlapLen(GffObj& m) { + if (start>m.end || m.start>end) return 0; + int i=0; + int j=0; + int ovlen=0; + while (istart; + uint iend=exons[i]->end; + uint jstart=m.exons[j]->start; + uint jend=m.exons[j]->end; + if (istart>jend) { j++; continue; } + if (jstart>iend) { i++; continue; } + //exon overlap + uint ovstart=GMAX(istart,jstart); + if (iend0) xcoordseg(CDstart, CDend); + for (int i=0;istart, exons[i]->end); + } + if (xstatus=='-') { + exons.Reverse(); + int flen=end-start; + start=xend-end+1; + end=start+flen; + } + else { + start=start-xstart+1; + end=end-xstart+1; + } + } + + //transform an arbitrary segment based on current xstatus/xstart-xend + void xcoordseg(uint& segstart, uint &segend) { + if (xstatus==0) return; + if (xstatus=='-') { + int flen=segend-segstart; + segstart=xend-segend+1; + segend=segstart+flen; + return; + } + else { + segstart=segstart-xstart+1; + segend=segend-xstart+1; + } + } + + void unxcoord() { //revert back to absolute genomic/gff coordinates if xstatus==true + if (xstatus==0) return; //nothing to do, no transformation appplied + if (CDstart>0) unxcoordseg(CDstart, CDend); + //restore all GffExon intervals too + for (int i=0;istart, exons[i]->end); + } + if (xstatus=='-') { + exons.Reverse(); + int flen=end-start; + start=xend-end+1; + end=start+flen; + } + else { + start=start+xstart-1; + end=end+xstart-1; + } + xstatus=0; + } + void unxcoordseg(uint& astart, uint &aend) { + //restore an arbitrary interval -- does NOT change the transform state! + if (xstatus==0) return; + if (xstatus=='-') { + int flen=aend-astart; + astart=xend-aend+1; + aend=astart+flen; + } + else { + astart=astart+xstart-1; + aend=aend+xstart-1; + } + } + //--------------------- + bool operator==(GffObj& d){ + return (gseq_id==d.gseq_id && start==d.start && end==d.end && strcmp(gffID, d.gffID)==0); + } + bool operator>(GffObj& d){ + if (gseq_id!=d.gseq_id) return (gseq_id>d.gseq_id); + if (start==d.start) { + if (getLevel()==d.getLevel()) { + if (end==d.end) return (strcmp(gffID, d.gffID)>0); + else return (end>d.end); + } else return (getLevel()>d.getLevel()); + } else return (start>d.start); + } + bool operator<(GffObj& d){ + if (gseq_id!=d.gseq_id) return (gseq_id& cds); + + void updateExonPhase(); //for CDS-only features, updates GExon::phase + + void printGxfLine(FILE* fout, const char* tlabel, const char* gseqname, + bool iscds, uint segstart, uint segend, int exidx, char phase, bool gff3); + void printGxf(FILE* fout, GffPrintMode gffp=pgffExon, + const char* tlabel=NULL, const char* gfparent=NULL); + void printGtf(FILE* fout, const char* tlabel=NULL) { + printGxf(fout, pgtfAny, tlabel); + } + void printGff(FILE* fout, const char* tlabel=NULL, + const char* gfparent=NULL) { + printGxf(fout, pgffAny, tlabel, gfparent); + } + void printTranscriptGff(FILE* fout, char* tlabel=NULL, + bool showCDS=false, const char* gfparent=NULL) { + if (isValidTranscript()) + printGxf(fout, showCDS ? pgffBoth : pgffExon, tlabel, gfparent); + } + void printSummary(FILE* fout=NULL); + void getCDS_ends(uint& cds_start, uint& cds_end); + void mRNA_CDS_coords(uint& cds_start, uint& cds_end); + char* getSpliced(GFaSeqGet* faseq, bool CDSonly=false, int* rlen=NULL, + uint* cds_start=NULL, uint* cds_end=NULL, GList* seglst=NULL); + char* getUnspliced(GFaSeqGet* faseq, int* rlen, GList* seglst); + char* getSplicedTr(GFaSeqGet* faseq, bool CDSonly=true, int* rlen=NULL); + //bool validCDS(GFaSeqGet* faseq); //has In-Frame Stop Codon ? + bool empty() { return (start==0); } +}; + +typedef bool GffRecFunc(GffObj* gobj, void* usrptr1, void* usrptr2); +//user callback after parsing a mapping object: +// Returns: "done with it" status: +// TRUE if gobj is no longer needed so it's FREEd upon return +// FALSE if the user needs the gobj pointer and is responsible for +// collecting and freeing all GffObj objects + + +//GSeqStat: collect basic stats about a common underlying genomic sequence +// for multiple GffObj +class GSeqStat { + public: + int gseqid; //gseq id in the global static pool of gseqs + char* gseqname; //just a pointer to the name of gseq + //int fcount;//number of features on this gseq + uint mincoord; + uint maxcoord; + uint maxfeat_len; //maximum feature length on this genomic sequence + GffObj* maxfeat; + GSeqStat(int id=-1, char* name=NULL) { + gseqid=id; + gseqname=name; + mincoord=MAXUINT; + maxcoord=0; + maxfeat_len=0; + maxfeat=NULL; + } + bool operator>(GSeqStat& g) { + return (gseqid>g.gseqid); + } + bool operator<(GSeqStat& g) { + return (gseqid { + //just adding the option to sort by genomic sequence and coordinate + bool mustSort; + public: + GfList(bool sortbyloc=false):GList(false,false,false) { + //GffObjs in this list are NOT deleted when the list is cleared + //-- for deallocation of these objects, call freeAll() or freeUnused() as needed + mustSort=sortbyloc; + } + void sortedByLoc(bool v=true) { + bool prev=mustSort; + mustSort=v; + if (fCount>0 && mustSort && !prev) { + this->setSorted((GCompareProc*)gfo_cmpByLoc); + } + } + void finalize(GffReader* gfr, bool mergeCloseExons, + bool keepAttrs=false, bool noExonAttr=true) { //if set, enforce sort by locus + if (mustSort) { //force (re-)sorting + this->setSorted(false); + this->setSorted((GCompareProc*)gfo_cmpByLoc); + } + int delcount=0; + for (int i=0;ifinalize(gfr, mergeCloseExons, keepAttrs, noExonAttr); + } + if (delcount>0) this->Pack(); + } + void freeAll() { + for (int i=0;iisUsed()) continue; + //inform the children + for (int c=0;cchildren.Count();c++) { + fList[i]->children[c]->parent=NULL; + } + delete fList[i]; + fList[i]=NULL; + } + Clear(); + } + +}; + +class GfoHolder { + public: + int idx; //position in GffReader::gflst array + GffObj* gffobj; + GfoHolder(GffObj* gfo=NULL, int i=0) { + idx=i; + gffobj=gfo; + } +}; + +class CNonExon { //utility class used in subfeature promotion + public: + int idx; + GffObj* parent; + GffExon* exon; + GffLine* gffline; + CNonExon(int i, GffObj* p, GffExon* e, GffLine* gl) { + parent=p; + exon=e; + idx=i; + gffline=new GffLine(gl); + } + ~CNonExon() { + delete gffline; + } + }; + + +class GffReader { + friend class GffObj; + friend class GffLine; + char* linebuf; + off_t fpos; + int buflen; + protected: + bool gff_warns; //warn about duplicate IDs, etc. even when they are on different chromosomes + FILE* fh; + char* fname; //optional fasta file with the underlying genomic sequence to be attached to this reader + GffNames* names; //just a pointer to the global static Gff names repository in GffObj + GffLine* gffline; + bool transcriptsOnly; //keep only transcripts w/ their exon/CDS features + GHash discarded_ids; //for transcriptsOnly mode, keep track + // of discarded parent IDs + GHash phash; //transcript_id+contig (Parent~Contig) => [gflst index, GffObj] + GHash tids; //transcript_id uniqueness + char* gfoBuildId(const char* id, const char* ctg); + void gfoRemove(const char* id, const char* ctg); + GfoHolder* gfoAdd(const char* id, const char* ctg, GffObj* gfo, int idx); + GfoHolder* gfoFind(const char* id, const char* ctg); + CNonExon* subfPoolCheck(GffLine* gffline, GHash& pex, char*& subp_name); + void subfPoolAdd(GHash& pex, GfoHolder* newgfo); + GfoHolder* promoteFeature(CNonExon* subp, char*& subp_name, GHash& pex, + bool keepAttr, bool noExonAttr); + public: + GfList gflst; //accumulate GffObjs being read + GfoHolder* newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, + GffObj* parent=NULL, GffExon* pexon=NULL); + GfoHolder* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx); + GfoHolder* updateGffRec(GfoHolder* prevgfo, GffLine* gffline, + bool keepAttr); + GfoHolder* updateParent(GfoHolder* newgfh, GffObj* parent); + bool addExonFeature(GfoHolder* prevgfo, GffLine* gffline, GHash& pex, bool noExonAttr); + GList gseqstats; //list of all genomic sequences seen by this reader, accumulates stats + GffReader(FILE* f=NULL, bool t_only=false, bool sortbyloc=false):discarded_ids(true), + phash(true), tids(true), gflst(sortbyloc), gseqstats(true,true,true) { + gff_warns=gff_show_warnings; + names=NULL; + gffline=NULL; + transcriptsOnly=t_only; + fpos=0; + fname=NULL; + fh=f; + GMALLOC(linebuf, GFF_LINELEN); + buflen=GFF_LINELEN-1; + } + void init(FILE *f, bool t_only=false, bool sortbyloc=false) { + fname=NULL; + fh=f; + if (fh!=NULL) rewind(fh); + fpos=0; + transcriptsOnly=t_only; + gflst.sortedByLoc(sortbyloc); + } + GffReader(char* fn, bool t_only=false, bool sort=false):discarded_ids(true), phash(true), + tids(true),gflst(sort),gseqstats(true,true,true) { + gff_warns=gff_show_warnings; + names=NULL; + fname=Gstrdup(fn); + transcriptsOnly=t_only; + fh=fopen(fname, "rb"); + fpos=0; + gffline=NULL; + GMALLOC(linebuf, GFF_LINELEN); + buflen=GFF_LINELEN-1; + } + + ~GffReader() { + delete gffline; + gffline=NULL; + fpos=0; + gflst.freeUnused(); + gflst.Clear(); + discarded_ids.Clear(); + phash.Clear(); + gseqstats.Clear(); + GFREE(fname); + GFREE(linebuf); + } + + void showWarnings(bool v=true) { + gff_warns=v; + gff_show_warnings=v; + } + + GffLine* nextGffLine(); + + // load all subfeatures, re-group them: + void readAll(bool keepAttr=false, bool mergeCloseExons=false, bool noExonAttr=true); + +}; // end of GffReader + +#endif diff --git a/src/gff_utils.cpp b/src/gff_utils.cpp new file mode 100644 index 0000000..1a36394 --- /dev/null +++ b/src/gff_utils.cpp @@ -0,0 +1,579 @@ +#include "gff_utils.h" + +extern bool verbose; +extern bool debugMode; + +//bool debugState=false; + +void printFasta(FILE* f, GStr& defline, char* seq, int seqlen) { + if (seq==NULL) return; + int len=(seqlen>0)?seqlen:strlen(seq); + if (len<=0) return; + if (!defline.is_empty()) + fprintf(f, ">%s\n",defline.chars()); + int ilen=0; + for (int i=0; i < len; i++, ilen++) { + if (ilen == 70) { + fputc('\n', f); + ilen = 0; + } + putc(seq[i], f); + } //for + fputc('\n', f); +} + +int qsearch_gloci(uint x, GList& loci) { + //binary search + //do the simplest tests first: + if (loci[0]->start>x) return 0; + if (loci.Last()->start>1; + istart=loci[i]->start; + if (istart < x) l = i + 1; + else { + if (istart == x) { //found matching coordinate here + idx=i; + while (idx<=maxh && loci[idx]->start==x) { + idx++; + } + return (idx>maxh) ? -1 : idx; + } + h = i - 1; + } + } //while + idx = l; + while (idx<=maxh && loci[idx]->start<=x) { + idx++; + } + return (idx>maxh) ? -1 : idx; +} + +int qsearch_rnas(uint x, GList& rnas) { + //binary search + //do the simplest tests first: + if (rnas[0]->start>x) return 0; + if (rnas.Last()->start>1; + istart=rnas[i]->start; + if (istart < x) l = i + 1; + else { + if (istart == x) { //found matching coordinate here + idx=i; + while (idx<=maxh && rnas[idx]->start==x) { + idx++; + } + return (idx>maxh) ? -1 : idx; + } + h = i - 1; + } + } //while + idx = l; + while (idx<=maxh && rnas[idx]->start<=x) { + idx++; + } + return (idx>maxh) ? -1 : idx; +} + +int cmpRedundant(GffObj& a, GffObj& b) { + if (a.exons.Count()==b.exons.Count()) { + if (a.covlen==b.covlen) { + return strcmp(a.getID(), b.getID()); + } + else return (a.covlen>b.covlen)? 1 : -1; + } + else return (a.exons.Count()>b.exons.Count())? 1: -1; +} + + +bool tMatch(GffObj& a, GffObj& b) { + //strict intron chain match, or single-exon perfect match + int imax=a.exons.Count()-1; + int jmax=b.exons.Count()-1; + int ovlen=0; + if (imax!=jmax) return false; //different number of introns + + if (imax==0) { //single-exon mRNAs + //if (equnspl) { + //fuzz match for single-exon transfrags: + // it's a match if they overlap at least 80% of max len + ovlen=a.exons[0]->overlapLen(b.exons[0]); + int maxlen=GMAX(a.covlen,b.covlen); + return (ovlen>=maxlen*0.8); + /*} + else { + //only exact match + ovlen=a.covlen; + return (a.exons[0]->start==b.exons[0]->start && + a.exons[0]->end==b.exons[0]->end); + + }*/ + } + //check intron overlaps + ovlen=a.exons[0]->end-(GMAX(a.start,b.start))+1; + ovlen+=(GMIN(a.end,b.end))-a.exons.Last()->start; + for (int i=1;i<=imax;i++) { + if (ilen(); + if ((a.exons[i-1]->end!=b.exons[i-1]->end) || + (a.exons[i]->start!=b.exons[i]->start)) { + return false; //intron mismatch + } + } + return true; +} + + +bool unsplContained(GffObj& ti, GffObj& tj, bool fuzzSpan) { + //returns true only if ti (which MUST be single-exon) is "almost" contained in any of tj's exons + //but it does not cross any intron-exon boundary of tj + int imax=ti.exons.Count()-1; + int jmax=tj.exons.Count()-1; + if (imax>0) GError("Error: bad unsplContained() call, 1st param must be single-exon transcript!\n"); + int minovl = (int)(0.8 * ti.len()); //minimum overlap for fuzzSpan + if (fuzzSpan) { + for (int j=0;j<=jmax;j++) { + //must NOT overlap the introns + if ((j>0 && ti.startstart) + || (jtj.exons[j]->end)) + return false; + if (ti.exons[0]->overlapLen(tj.exons[j])>=minovl) + return true; + } + } else { + for (int j=0;j<=jmax;j++) { + //must NOT overlap the introns + if ((j>0 && ti.startstart) + || (jtj.exons[j]->end)) + return false; + //strict containment + if (ti.end<=tj.exons[j]->end && ti.start>=tj.exons[j]->start) + return true; + } + } + return false; +} + +GffObj* redundantTranscripts(GffObj& ti, GffObj& tj, bool matchAllIntrons, bool fuzzSpan) { + // matchAllIntrons==true: transcripts are considered "redundant" only if + // they have the exact same number of introns and same splice sites (or none) + // (single-exon transcripts can be also fully contained to be considered matching) + // matchAllIntrons==false: an intron chain could be a subset of a "container" chain, + // as long as no intron-exon boundaries are violated; also, a single-exon + // transcript will be collapsed if it's contained in one of the exons of the other + // fuzzSpan==false: the genomic span of one transcript must be contained in or equal with the genomic + // span of the other + // + // fuzzSpan==true: then genomic spans of transcripts are no longer required to be fully contained + // (i.e. they may extend each-other in opposite directions) + + //if redundancy is found, the "bigger" transcript is returned (otherwise NULL is returned) + if (ti.start>=tj.end || tj.start>=ti.end || tj.strand!=ti.strand) return NULL; //no span overlap at all + int imax=ti.exons.Count()-1; + int jmax=tj.exons.Count()-1; + GffObj* bigger=NULL; + GffObj* smaller=NULL; + if (matchAllIntrons) { + if (imax!=jmax) return false; + if (ti.covlen>tj.covlen) { + bigger=&ti; + if (!fuzzSpan && (ti.start>tj.start || ti.endti.start || tj.endend!=tj.exons[i]->end || + ti.exons[i+1]->start!=tj.exons[i+1]->start) return NULL; + } + return bigger; + } + //--- matchAllIntrons==false: intron-chain containment is also considered redundancy + //int maxlen=0; + int minlen=0; + if (ti.covlen>tj.covlen) { + if (tj.exons.Count()>ti.exons.Count()) { + //exon count override + bigger=&tj; + smaller=&ti; + } + else { + bigger=&ti; + smaller=&tj; + } + //maxlen=ti.covlen; + minlen=tj.covlen; + } + else { //tj has more bases + if (ti.exons.Count()>tj.exons.Count()) { + //exon count override + bigger=&ti; + smaller=&tj; + } + else { + bigger=&tj; + smaller=&ti; + } + //maxlen=tj.covlen; + minlen=ti.covlen; + } + if (imax==0 && jmax==0) { + //single-exon transcripts: if fuzzSpan, at least 80% of the shortest one must be overlapped by the other + if (fuzzSpan) { + return (ti.exons[0]->overlapLen(tj.exons[0])>=minlen*0.8) ? bigger : NULL; + } + else { + return (smaller->start>=bigger->start && smaller->end<=bigger->end) ? bigger : NULL; + } + } + //containment is also considered redundancy + if (smaller->exons.Count()==1) { + //check if this single exon is contained in any of tj exons + //without violating any intron-exon boundaries + return (unsplContained(*smaller, *bigger, fuzzSpan) ? bigger : NULL); + } + + //--from here on: both are multi-exon transcripts, imax>0 && jmax>0 + if (ti.exons[imax]->startend || + tj.exons[jmax]->startend ) + return NULL; //intron chains do not overlap at all + + + //checking full intron chain containment + uint eistart=0, eiend=0, ejstart=0, ejend=0; //exon boundaries + int i=1; //exon idx to the right of the current intron of ti + int j=1; //exon idx to the right of the current intron of tj + //find the first intron overlap: + while (i<=imax && j<=jmax) { + eistart=ti.exons[i-1]->end; + eiend=ti.exons[i]->start; + ejstart=tj.exons[j-1]->end; + ejend=tj.exons[j]->start; + if (ejendstart>smaller->start || bigger->end < smaller->end)) return NULL; + if ((i>1 && j>1) || i>imax || j>jmax) { + return NULL; //either no intron overlaps found at all + //or it's not the first intron for at least one of the transcripts + } + if (eistart!=ejstart || eiend!=ejend) return NULL; //not an exact intron match + if (j>i) { + //i==1, ti's start must not conflict with the previous intron of tj + if (ti.startstart) return NULL; + //so i's first intron starts AFTER j's first intron + // then j must contain i, so i's last intron must end with or before j's last intron + if (ti.exons[imax]->start>tj.exons[jmax]->start) return NULL; + //comment out the line above if you just want "intron compatibility" (i.e. extension of intron chains ) + } + else if (i>j) { + //j==1, tj's start must not conflict with the previous intron of ti + if (tj.startstart) return NULL; + //so j's intron chain starts AFTER i's + // then i must contain j, so j's last intron must end with or before j's last intron + if (tj.exons[jmax]->start>ti.exons[imax]->start) return NULL; + //comment out the line above for just "intronCompatible()" check (allowing extension of intron chain) + } + //now check if the rest of the introns overlap, in the same sequence + i++; + j++; + while (i<=imax && j<=jmax) { + if (ti.exons[i-1]->end!=tj.exons[j-1]->end || + ti.exons[i]->start!=tj.exons[j]->start) return NULL; + i++; + j++; + } + i--; + j--; + if (i==imax && jtj.exons[j]->end) return NULL; + } + else if (j==jmax && iti.exons[i]->end) return NULL; + } + return bigger; +} + + +int gseqCmpName(const pointer p1, const pointer p2) { + return strcmp(((GenomicSeqData*)p1)->gseq_name, ((GenomicSeqData*)p2)->gseq_name); +} + + +void printLocus(GffLocus* loc, const char* pre) { + if (pre!=NULL) fprintf(stderr, "%s", pre); + GMessage(" [%d-%d] : ", loc->start, loc->end); + GMessage("%s",loc->rnas[0]->getID()); + for (int i=1;irnas.Count();i++) { + GMessage(",%s",loc->rnas[i]->getID()); + } + GMessage("\n"); +} + +void preserveContainedCDS(GffObj* t, GffObj* tfrom) { + //transfer CDS info to the container t if it's a larger protein + if (tfrom->CDstart==0) return; + if (t->CDstart) { + if (tfrom->CDstartCDstart && tfrom->CDstart>=t->start) + t->CDstart=tfrom->CDstart; + if (tfrom->CDend>t->CDend && tfrom->CDend<=t->end) + t->CDend=tfrom->CDend; + } + else { //no CDS info on container, just copy it from the contained + t->addCDS(tfrom->CDstart, tfrom->CDend, tfrom->CDphase); + } +} + +void placeGf(GffObj* t, GenomicSeqData* gdata, bool doCluster, bool collapseRedundant, + bool matchAllIntrons, bool fuzzSpan) { + GTData* tdata=new GTData(t); + gdata->tdata.Add(tdata); + int tidx=-1; + /* + if (debug) { + GMessage(">>Placing transcript %s\n", t->getID()); + debugState=true; + } + else debugState=false; + */ + if (t->exons.Count()>0) + tidx=gdata->rnas.Add(t); //added it in sorted order + else { + gdata->gfs.Add(t); + return; //nothing to do with these non-transcript objects + } + if (!doCluster) return; + if (gdata->loci.Count()==0) { + gdata->loci.Add(new GffLocus(t)); + //GMessage(" <start, t->end); + return; + } + /* + //DEBUG: show available loci: + if (debug) { + GMessage(" [%d loci already:\n", gdata->loci.Count()); + for (int l=0;lloci.Count();l++) { + printLocus(gdata->loci[l]); + } + } + */ + int nidx=qsearch_gloci(t->end, gdata->loci); //get index of nearest locus starting just ABOVE t->end + //GMessage("\tlooking up end coord %d in gdata->loci.. (qsearch got nidx=%d)\n", t->end, nidx); + if (nidx==0) { + //cannot have any overlapping loci + //if (debug) GMessage(" <start, t->end); + gdata->loci.Add(new GffLocus(t)); + return; + } + if (nidx==-1) nidx=gdata->loci.Count();//all loci start below t->end + int lfound=0; //count of parent loci + GArray mrgloci(false); + GList tloci(true); //candidate parent loci to adopt this + //if (debug) GMessage("\tchecking all loci from %d to 0\n",nidx-1); + for (int l=nidx-1;l>=0;l--) { + GffLocus& loc=*(gdata->loci[l]); + if (loc.strand!='.' && t->strand!='.'&& loc.strand!=t->strand) continue; + if (t->start>loc.end) { + if (t->start-loc.start>GFF_MAX_LOCUS) break; //give up already + continue; + } + if (loc.start>t->end) { + //this should never be the case if nidx was found correctly + GMessage("Warning: qsearch_gloci found loc.start>t.end!(t=%s)\n", t->getID()); + continue; + } + /* + if (debug) { + GMessage(" !range overlap found with locus "); + printLocus(&loc); + } + */ + if (loc.add_RNA(t)) { + //will add this transcript to loc + lfound++; + mrgloci.Add(l); + if (collapseRedundant) { + //compare to every single transcript in this locus + for (int ti=0;tiuptr); + //GMessage(" ..redundant check vs overlapping transcript %s\n",loc.rnas[ti]->getID()); + GffObj* container=NULL; + if (odata->replaced_by==NULL && + (container=redundantTranscripts(*t, *(loc.rnas[ti]), matchAllIntrons, fuzzSpan))!=NULL) { + if (container==t) { + odata->replaced_by=t; + preserveContainedCDS(t, loc.rnas[ti]); + } + else { + tdata->replaced_by=loc.rnas[ti]; + preserveContainedCDS(loc.rnas[ti], t); + } + } + }//for each transcript in the exon-overlapping locus + } //if doCollapseRedundant + } //overlapping locus + } //for each existing locus + if (lfound==0) { + //overlapping loci not found, create a locus with only this mRNA + /* if (debug) { + GMessage(" overlapping locus not found, create locus %d-%d \n",t->start, t->end); + } + */ + int addidx=gdata->loci.Add(new GffLocus(t)); + if (addidx<0) { + //should never be the case! + GMessage(" WARNING: new GffLocus(%s:%d-%d) not added!\n",t->getID(), t->start, t->end); + } + } + else { //found at least one overlapping locus + lfound--; + int locidx=mrgloci[lfound]; + GffLocus& loc=*(gdata->loci[locidx]); + //last locus index found is also the smallest index + if (lfound>0) { + //more than one loci found parenting this mRNA, merge loci + /* if (debug) + GMessage(" merging %d loci \n",lfound); + */ + for (int l=0;lloci[mlidx]), t); + gdata->loci.Delete(mlidx); //highest indices first, so it's safe to remove + } + } + int i=locidx; + while (i>0 && loc<*(gdata->loci[i-1])) { + //bubble down until it's in the proper order + i--; + gdata->loci.Swap(i,i+1); + } + }//found at least one overlapping locus +} + +void collectLocusData(GList& ref_data) { + int locus_num=0; + for (int g=0;gloci.Count();l++) { + GffLocus& loc=*(gdata->loci[l]); + GHash gnames(true); //gene names in this locus + GHash geneids(true); //Entrez GeneID: numbers + for (int i=0;i0) { //collect all gene names associated to this locus + gnames.startIterate(); + int* gfreq=NULL; + char* key=NULL; + while ((gfreq=gnames.NextData(key))!=NULL) { + loc.gene_names.AddIfNew(new CGeneSym(key,*gfreq)); + } + } //added collected gene_names + if (loc.gene_ids.Count()>0) { //collect all GeneIDs names associated to this locus + geneids.startIterate(); + int* gfreq=NULL; + char* key=NULL; + while ((gfreq=geneids.NextData(key))!=NULL) { + loc.gene_ids.AddIfNew(new CGeneSym(key,*gfreq)); + } + } + } //for each locus + }//for each genomic sequence +} + + +void GffLoader::load(GList& seqdata, GFValidateFunc* gf_validate, + bool doCluster, bool doCollapseRedundant, + bool matchAllIntrons, bool fuzzSpan, bool forceExons) { + GffReader* gffr=new GffReader(f, this->transcriptsOnly, false); //not only mRNA features, not sorted + gffr->showWarnings(this->showWarnings); + // keepAttrs mergeCloseExons noExonAttr + gffr->readAll(this->fullAttributes, this->mergeCloseExons, this->noExonAttrs); + //int redundant=0; //redundant annotation discarded + if (verbose) GMessage(" .. loaded %d genomic features from %s\n", gffr->gflst.Count(), fname.chars()); + //int rna_deleted=0; + //add to GenomicSeqData, adding to existing loci and identifying intron-chain duplicates + for (int k=0;kgflst.Count();k++) { + GffObj* m=gffr->gflst[k]; + if (strcmp(m->getFeatureName(), "locus")==0 && + m->getAttr("transcripts")!=NULL) { + continue; //discard locus meta-features + } + + char* rloc=m->getAttr("locus"); + if (rloc!=NULL && startsWith(rloc, "RLOC_")) { + m->removeAttr("locus", rloc); + } + if (m->exons.Count()==0 && m->children.Count()==0) { + //a non-mRNA feature with no subfeatures + //add a dummy exon just to have the generic exon checking work + m->addExon(m->start,m->end); + } + if (forceExons && m->children.Count()==0) { + m->exon_ftype_id=gff_fid_exon; + } + GList gfadd(false,false); + if (gf_validate!=NULL && !(*gf_validate)(m, &gfadd)) { + continue; + } + m->isUsed(true); //so the gffreader won't destroy it + int i=-1; + GenomicSeqData f(m->gseq_id); + GenomicSeqData* gdata=NULL; + + if (seqdata.Found(&f,i)) gdata=seqdata[i]; + else { //entry not created yet for this genomic seq + gdata=new GenomicSeqData(m->gseq_id); + seqdata.Add(gdata); + } + for (int k=0;kgflst.Count(), fname.chars()); + if (f!=stdin) { fclose(f); f=NULL; } + delete gffr; +} diff --git a/src/gff_utils.h b/src/gff_utils.h new file mode 100644 index 0000000..4d9b934 --- /dev/null +++ b/src/gff_utils.h @@ -0,0 +1,623 @@ +#ifndef GFF_UTILS_H +#define GFF_UTILS_H +#include "gff.h" +#include "GStr.h" +#include "GFastaIndex.h" +#include "GFaSeqGet.h" + +typedef bool GFValidateFunc(GffObj* gf, GList* gfadd); + +class GeneInfo { //for Ensembl GTF conversion + public: + int flag; + GffObj* gf; + GList gene_names; + GList transcripts; //list of transcript IDs + GeneInfo():gene_names(true, true, true), transcripts(true,true,true) { + gf=NULL; + flag=0; + } + GeneInfo(GffObj* gfrec, bool ensembl_convert=false):gene_names(true, true, true), + transcripts(true,true,true) { + flag=0; + if (gfrec->getGeneName()) + gene_names.Add(new GStr(gfrec->getGeneName())); + transcripts.Add(new GStr(gfrec->getID())); + create_gf(gfrec, ensembl_convert); + } + + void create_gf(GffObj* gfrec, bool ensembl_convert) { + gf=new GffObj(gfrec->getGeneID()); + gf->gseq_id=gfrec->gseq_id; + gf->track_id=gfrec->track_id; + gf->start=gfrec->start; + gf->end=gfrec->end; + gf->strand=gfrec->strand; + gf->setFeatureName("gene"); + gf->isGene(true); + gf->isUsed(true); + gf->uptr=this; + gfrec->incLevel(); + gfrec->parent=gf; + gf->children.Add(gfrec); + if (ensembl_convert) { + //gf->addAttr("type", gf->getTrackName()); + const char* biotype=gfrec->getAttr("type"); + if (biotype) gf->addAttr("type", biotype); + } + //gf->children.Add(gfrec); + } + //~GeneInfo() { + // } + void update(GffObj* gfrec) { + if (transcripts.AddedIfNew(new GStr(gfrec->getID()))<0) + return; + gene_names.AddedIfNew(new GStr(gfrec->getGeneName())); + if (gf==NULL) { + GError("GeneInfo::update() called on uninitialized gf!\n"); + //create_gf(gfrec); + //return; + } + gfrec->parent=gf; + gf->children.Add(gfrec); + gfrec->incLevel(); + if (gf->start>gfrec->start) + gf->start=gfrec->start; + if (gf->endend) + gf->end=gfrec->end; + } + void finalize() { + //prepare attributes for printing + //must be called right before printing + if (gf==NULL || transcripts.Count()==0) return; + if (gene_names.Count()>0) { + gf->addAttr("Name", gene_names[0]->chars()); + /* + GStr s(gene_names[0]->chars()); + for (int i=1;ichars()); + } + gf->addAttr("genes", s.chars()); + */ + } //has gene names + GStr t(transcripts[0]->chars()); + for (int i=1;ichars()); + } + gf->addAttr("transcripts", t.chars()); + } +}; + +//genomic fasta sequence handling +class GFastaDb { + public: + char* fastaPath; + GFastaIndex* faIdx; //could be a cdb .cidx file + int last_fetchid; + GFaSeqGet* faseq; + //GCdbYank* gcdb; + char* getFastaFile(int gseq_id) { + if (fastaPath==NULL) return NULL; + GStr s(fastaPath); + s.trimR('/'); + s.appendfmt("/%s",GffObj::names->gseqs.getName(gseq_id)); + GStr sbase(s); + if (!fileExists(s.chars())) s.append(".fa"); + if (!fileExists(s.chars())) s.append("sta"); + if (fileExists(s.chars())) return Gstrdup(s.chars()); + else { + GMessage("Warning: cannot find genomic sequence file %s{.fa,.fasta}\n",sbase.chars()); + return NULL; + } + } + + GFastaDb(const char* fpath=NULL) { + //gcdb=NULL; + fastaPath=NULL; + faseq=NULL; + faIdx=NULL; + init(fpath); + } + + void init(const char* fpath) { + if (fpath==NULL || fpath[0]==0) return; + last_fetchid=-1; + if (!fileExists(fpath)) + GError("Error: file/directory %s does not exist!\n",fpath); + fastaPath=Gstrdup(fpath); + GStr gseqpath(fpath); + if (fileExists(fastaPath)>1) { //exists and it's not a directory + GStr fainame(fastaPath); + if (fainame.rindex(".fai")==fainame.length()-4) { + //.fai index file given directly + fastaPath[fainame.length()-4]=0; + if (!fileExists(fastaPath)) + GError("Error: cannot find fasta file for index %s !\n", fastaPath); + } + else fainame.append(".fai"); + //GMessage("creating GFastaIndex with fastaPath=%s, fainame=%s\n", fastaPath, fainame.chars()); + faIdx=new GFastaIndex(fastaPath,fainame.chars()); + GStr fainamecwd(fainame); + int ip=-1; + if ((ip=fainamecwd.rindex(CHPATHSEP))>=0) + fainamecwd.cut(0,ip+1); + if (!faIdx->hasIndex()) { //could not load index + //try current directory + if (fainame!=fainamecwd) { + if (fileExists(fainamecwd.chars())>1) { + faIdx->loadIndex(fainamecwd.chars()); + } + } + } //tried to load index + if (!faIdx->hasIndex()) { + GMessage("No fasta index found for %s. Rebuilding, please wait..\n",fastaPath); + faIdx->buildIndex(); + if (faIdx->getCount()==0) GError("Error: no fasta records found!\n"); + GMessage("Fasta index rebuilt.\n"); + FILE* fcreate=fopen(fainame.chars(), "w"); + if (fcreate==NULL) { + GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fainame.chars()); + if (fainame!=fainamecwd) fcreate=fopen(fainamecwd.chars(), "w"); + if (fcreate==NULL) + GError("Error: cannot create fasta index %s!\n", fainamecwd.chars()); + } + if (faIdx->storeIndex(fcreate)getCount()) + GMessage("Warning: error writing the index file!\n"); + } //index created and attempted to store it + } //multi-fasta + } + GFaSeqGet* fetch(int gseq_id, bool checkFasta=false) { + if (fastaPath==NULL) return NULL; + if (gseq_id==last_fetchid && faseq!=NULL) return faseq; + delete faseq; + faseq=NULL; + last_fetchid=-1; + char* gseqname=GffObj::names->gseqs.getName(gseq_id); + if (faIdx!=NULL) { //fastaPath was the multi-fasta file name + GFastaRec* farec=faIdx->getRecord(gseqname); + if (farec!=NULL) { + faseq=new GFaSeqGet(fastaPath,farec->seqlen, farec->fpos, + farec->line_len, farec->line_blen); + faseq->loadall(); //just cache the whole sequence, it's faster + last_fetchid=gseq_id; + } + else { + GMessage("Warning: couldn't find fasta record for '%s'!\n",gseqname); + return NULL; + } + } + else { + char* sfile=getFastaFile(gseq_id); + if (sfile!=NULL) { + faseq=new GFaSeqGet(sfile,checkFasta); + faseq->loadall(); + last_fetchid=gseq_id; + GFREE(sfile); + } + } //one fasta file per contig + return faseq; + } + + ~GFastaDb() { + GFREE(fastaPath); + //delete gcdb; + delete faIdx; + delete faseq; + } +}; + +class GffLocus; + +class GTData { //transcript associated data + public: + GffObj* rna; + GffLocus* locus; + GffObj* replaced_by; + GeneInfo* geneinfo; + int flag; + GTData(GffObj* t=NULL) { + rna=t; + flag=0; + locus=NULL; + replaced_by=NULL; + geneinfo=NULL; + if (rna!=NULL) { + geneinfo=(GeneInfo*)rna->uptr; //take over geneinfo, if there + rna->uptr=this; + } + } + bool operator>(GTData& b) { return (rna > b.rna); } + bool operator<(GTData& b) { return (rna < b.rna); } + bool operator==(GTData& b) { return (rna==b.rna); } +}; + +class CGeneSym { + public: + GStr name; + int freq; + CGeneSym(const char* n=NULL, int f=0):name(n) { + freq=f; + } + bool operator>(CGeneSym& b) { + return (freq==b.freq) ? ( (name.length()==b.name.length()) ? (name>b.name) : + (name.length()>b.name.length()) ) : ( freqb.freq ); + } + bool operator==(CGeneSym& b) { return name==b.name; } +}; + +const char* getGeneDescr(const char* gsym); + +void printLocus(GffLocus* loc, const char* pre=NULL); + +class GffLocus:public GSeg { +public: + int gseq_id; //id of underlying genomic sequence + int locus_num; + bool is_mrna; + char strand; + GffObj* t_maxcov; //transcript with maximum coverage (for main "ref" transcript) + GList rnas; //list of transcripts (isoforms) for this locus + GArray mexons; //list of merged exons in this region + GList gene_names; + GList gene_ids; + int v; //user flag/data + /* + bool operator==(GffLocus& d){ + return (gseq_id==d.gseq_id && strand==d.strand && start==d.start && end==d.end); + } + bool operator>(GffLocus& d){ + if (gseq_id!=d.gseq_id) return (gseq_id>d.gseq_id); + if (start==d.start) { + if (end==d.end) return (strand>d.strand); + else return (end>d.end); + } else return (start>d.start); + } + bool operator<(GffLocus& d){ + if (gseq_id!=d.gseq_id) return (gseq_idname.chars(); + } + const char* get_tmax_id() { + return t_maxcov->getID(); + } + const char* get_descr() { + if (gene_names.Count()>0) { + for (int i=0;iname.chars()); + if (gn!=NULL) return gn; + } + } + char* s=t_maxcov->getAttr("product"); + if (s!=NULL) return s; + s=t_maxcov->getAttr("descr"); + if (s!=NULL) return s; + s=t_maxcov->getAttr("description"); + if (s!=NULL) return s; + s=t_maxcov->getAttr("info"); + if (s!=NULL) return s; + return NULL; + } + + GffLocus(GffObj* t=NULL):rnas(true,false,false),mexons(true,true), + gene_names(true,true,false), gene_ids(true,true,false) { + //this will NOT free rnas! + t_maxcov=NULL; + gseq_id=-1; + v=0; + locus_num=0; + start=0; + end=0; + strand=0; + is_mrna=false; + if (t!=NULL) { + start=t->exons.First()->start; + end=t->exons.Last()->end;; + gseq_id=t->gseq_id; + GSeg seg; + for (int i=0;iexons.Count();i++) { + seg.start=t->exons[i]->start; + seg.end=t->exons[i]->end; + mexons.Add(seg); + } + rnas.Add(t); + ((GTData*)(t->uptr))->locus=this; + t_maxcov=t; + strand=t->strand; + if (t->ftype_id==gff_fid_mRNA) { + is_mrna=true; + } + } + } + + void addMerge(GffLocus& locus, GffObj* lnkrna) { + //add all the elements of the other locus (merging) + //-- merge mexons + GArray ovlexons(true,true); //list of locus.mexons indexes overlapping existing mexons + int i=0; //index of first mexons with a merge + int j=0; //index current mrna exon + while (iiend) { //mexons[i] end extend + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } // mexons[i] end extend + j++; //check the next locus.mexon + } + //-- add the rest of the non-overlapping mexons: + GSeg seg; + for (int i=0;iuptr))->locus=this; + if (locus.rnas[i]!=lnkrna) rnas.Add(locus.rnas[i]); + } + // -- adjust start/end as needed + if (start>locus.start) start=locus.start; + if (endcovlencovlen) + t_maxcov=locus.t_maxcov; + } + + bool exonOverlap(GffLocus& loc) { + //check if any mexons overlap! + if (strand!=loc.strand || loc.start>end || start>loc.end) return false; + int i=0; + int j=0; + while (igseq_id!=gseq_id || t->strand!=strand || t->start>end || start>t->end) + return false; //rna must be on the same genomic seq + //check for exon overlap with existing mexons + //also update mexons accordingly if t is to be added + bool hasovl=false; + int i=0; //index of first mexons with a merge + int j=0; //index current t exon + GArray ovlexons(true,true); //list of mrna exon indexes overlapping mexons + while (iexons.Count()) { + uint istart=mexons[i].start; + uint iend=mexons[i].end; + uint jstart=t->exons[j]->start; + uint jend=t->exons[j]->end; + if (iendiend) { //mexon stretch up + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } //possible mexons merge + + j++; //check the next t exon + }//all vs all exon check loop + if (hasovl) { + GSeg seg; + //add the rest of the non-overlapping exons + for (int i=0;iexons.Count();i++) { + seg.start=t->exons[i]->start; + seg.end=t->exons[i]->end; + if (!ovlexons.Exists(i)) mexons.Add(seg); + } + rnas_add(t); + // add to rnas + ((GTData*)t->uptr)->locus=this; + gseq_id=t->gseq_id; + } + return hasovl; + } + + //simpler,basic adding of a mrna + void rnas_add(GffObj* t) { + rnas.Add(t); + // adjust start/end + //if (start==0 || start>t->start) start=t->start; + if (start==0) start=t->start; + else if (start>t->start) { + start=t->start; + } + if (endend) end=t->end; + if (t_maxcov->covlencovlen) t_maxcov=t; + if (strand==0) strand=t->strand; + if (t->ftype_id==gff_fid_mRNA) is_mrna=true; + } +}; + +class GenomicSeqData { + int gseq_id; + public: + const char* gseq_name; + GList gfs; //all non-transcript features -> usually gene features + GList rnas; //all transcripts on this genomic sequence + GList loci; //all loci clusters + GList tdata; //transcript data (uptr holder for all rnas loaded here) + //GenomicSeqData(int gid=-1):rnas(true,true,false),loci(true,true,true), + GenomicSeqData(int gid=-1):gfs(true, true, false),rnas((GCompareProc*)gfo_cmpByLoc),loci(true,true,false), + tdata(false,true,false) { + gseq_id=gid; + if (gseq_id>=0) + gseq_name=GffObj::names->gseqs.getName(gseq_id); + + } + bool operator==(GenomicSeqData& d){ + return gseq_id==d.gseq_id; + } + bool operator>(GenomicSeqData& d){ + return (gseq_id>d.gseq_id); + } + bool operator<(GenomicSeqData& d){ + return (gseq_idnt[0]==nt[0] && c->nt[1]==nt[1]); + } + bool operator==(const char* c) { + //return (nt[0]==toupper(c[0]) && nt[1]==toupper(c[1])); + //assumes given const nucleotides are uppercase already! + return (nt[0]==c[0] && nt[1]==c[1]); + } + bool operator!=(const char* c) { + //assumes given const nucleotides are uppercase already! + return (nt[0]!=c[0] || nt[1]!=c[1]); + } +}; + +struct GffLoader { + GStr fname; + FILE* f; + bool transcriptsOnly; + bool fullAttributes; + bool noExonAttrs; + bool mergeCloseExons; + bool showWarnings; + void load(GList&seqdata, GFValidateFunc* gf_validate=NULL, + bool doCluster=true, bool doCollapseRedundant=true, + bool matchAllIntrons=true, bool fuzzSpan=false, bool forceExons=false); + GffLoader(const char* filename):fname(filename) { + f=NULL; + transcriptsOnly=true; + fullAttributes=false; + noExonAttrs=false; + mergeCloseExons=false; + showWarnings=false; + if (fname=="-") { + f=stdin; + fname="stdin"; + } + else { + if ((f=fopen(fname.chars(), "r"))==NULL) { + GError("Error: cannot open file %s!\n",fname.chars()); + } + } + } + ~GffLoader() { + if (f!=NULL && f!=stdin) fclose(f); + } +}; + +void printFasta(FILE* f, GStr& defline, char* seq, int seqlen=-1); + +//"position" a given coordinate x within a list of transcripts sorted by their start (lowest) +//coordinate, using quick-search; the returned int is the list index of the closest *higher* +//GffObj - i.e. starting right *ABOVE* the given coordinate +//Convention: returns -1 if there is no such GffObj (i.e. last GffObj starts below x) +int qsearch_rnas(uint x, GList& rnas); +int qsearch_gloci(uint x, GList& loci); + +GffObj* redundantTranscripts(GffObj& ti, GffObj& tj, bool matchAllIntrons=true, bool fuzzSpan=false); + +void placeGf(GffObj* t, GenomicSeqData* gdata, bool doCluster=true, bool collapseRedundant=true, + bool matchAllIntrons=true, bool fuzzSpan=false); +//void loadGFF(FILE* f, GList& seqdata, const char* fname); + +void collectLocusData(GList& ref_data); + +#endif diff --git a/src/gffread.cpp b/src/gffread.cpp new file mode 100644 index 0000000..b3c9a58 --- /dev/null +++ b/src/gffread.cpp @@ -0,0 +1,1016 @@ +#include "gff_utils.h" +#include "GArgs.h" +#include +// don't care about cdb compression +//#ifdef ENABLE_COMPRESSION +//#undef ENABLE_COMPRESSION +//#endif +//#include "GCdbYank.h" + +#define USAGE "Usage:\n\ +gffread [-g | ][-s ] \n\ + [-o ] [-t ] [-r [[]:].. [-R]]\n\ + [-CTVNJMKQAFGUBHZWTOLE] [-w ] [-x ] [-y ]\n\ + [-i ] \n\ + Filters and/or converts GFF3/GTF2 records.\n\ + is a GFF file, use '-' if the GFF records will be given at stdin\n\ + \n\ + Options:\n\ + -g full path to a multi-fasta file with the genomic sequences\n\ + for all input mappings, OR a directory with single-fasta files\n\ + (one per genomic sequence, with file names matching sequence names)\n\ + -s is a tab-delimited file providing this info\n\ + for each of the mapped sequences:\n\ + \n\ + (useful for -A option with mRNA/EST/protein mappings)\n\ + -i discard transcripts having an intron larger than \n\ + -r only show transcripts overlapping coordinate range ..\n\ + (on chromosome/contig , strand if provided)\n\ + -R for -r option, discard all transcripts that are not fully \n\ + contained within the given range\n\ + -U discard single-exon transcripts\n\ + -C coding only: discard mRNAs that have no CDS feature\n\ + -F full GFF attribute preservation (all attributes are shown)\n\ + -G only parse additional exon attributes from the first exon\n\ + and move them to the mRNA level (useful for GTF input)\n\ + -A use the description field from and add it\n\ + as the value for a 'descr' attribute to the GFF record\n\ + \n\ + -O process also non-transcript GFF records (by default non-transcript\n\ + records are ignored)\n\ + -V discard any mRNAs with CDS having in-frame stop codons\n\ + -H for -V option, check and adjust the starting CDS phase\n\ + if the original phase leads to a translation with an \n\ + in-frame stop codon\n\ + -B for -V option, single-exon transcripts are also checked on the\n\ + opposite strand\n\ + -N discard multi-exon mRNAs that have any intron with a non-canonical\n\ + splice site consensus (i.e. not GT-AG, GC-AG or AT-AC)\n\ + -J discard any mRNAs that either lack initial START codon\n\ + or the terminal STOP codon, or have an in-frame stop codon\n\ + (only print mRNAs with a fulll, valid CDS)\n\ + \n\ + -M/--merge : cluster the input transcripts into loci, collapsing matching\n\ + transcripts (those with the same exact introns and fully contained)\n\ + -d : for -M option, write collapsing info to file \n\ + --cluster-only: same as --merge but without collapsing matching transcripts\n\ + -K for -M option: also collapse shorter, fully contained transcripts\n\ + with fewer introns than the container\n\ + -Q for -M option, remove the containment restriction:\n\ + (multi-exon transcripts will be collapsed if just their introns match,\n\ + while single-exon transcripts can partially overlap (80%))\n\ + \n\ + -E expose (warn about) duplicate transcript IDs and other potential \n\ + problems with the given GFF/GTF records\n\ + -Z merge close exons into a single exon (for intron size<4)\n\ + -w write a fasta file with spliced exons for each GFF transcript\n\ + -x write a fasta file with spliced CDS for each GFF transcript\n\ + -W for -w and -x options, also write for each fasta record the exon\n\ + coordinates projected onto the spliced sequence\n\ + -y write a protein fasta file with the translation of CDS for each record\n\ + -L Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)\n\ + -m is a reference (genomic) sequence replacement table with\n\ + this format:\n\ + \n\ + GFF records on reference sequences that are not found among the\n\ + entries in this file will be filtered out\n\ + -o the \"filtered\" GFF records will be written to \n\ + (use -o- for printing to stdout)\n\ + -t use in the second column of each GFF output line\n\ + -T -o option will output GTF format instead of GFF3\n\ + " + + +class SeqInfo { //populated from the -s option of gffread + public: + int len; + char* descr; + SeqInfo( int l, char* s) { + len=l; + if (s==NULL) { + descr=NULL; + } else { + descr=Gstrdup(s); + } + } + ~SeqInfo() { + GFREE(descr); + } +}; + +class RefTran { + public: + char* new_name; + RefTran(char *ns) { + new_name=NULL; + if (ns!=NULL) + new_name=Gstrdup(ns); + } + ~RefTran() { + GFREE(new_name); + } +}; + +FILE* ffasta=NULL; +FILE* f_in=NULL; +FILE* f_out=NULL; +FILE* f_w=NULL; //fasta with spliced exons (transcripts) +FILE* f_x=NULL; //fasta with spliced CDS +FILE* f_y=NULL; //fasta with translated CDS +bool wCDSonly=false; + +bool validCDSonly=false; // translation with no in-frame STOP +bool bothStrands=false; //for single-exon mRNA validation, check the other strand too +bool altPhases=false; //if original phase fails translation validation, + //try the other 2 phases until one makes it +bool mRNAOnly=true; +bool spliceCheck=false; //only known splice-sites + +bool fullCDSonly=false; // starts with START, ends with STOP codon +bool fullattr=false; +//bool sortByLoc=false; // if the GFF output should be sorted by location +bool ensembl_convert=false; //-L, assist in converting Ensembl GTF to GFF3 + + +//GStr gseqpath; +//GStr gcdbfa; +//bool multiGSeq=false; //if a directory or a .cidx file was given to -g option +//GFaSeqGet* faseq=NULL; +//GCdbYank* gcdb=NULL; +//int gseq_id=-1; //current genome sequence ID -- the current GffObj::gseq_id +bool fmtGTF=false; +bool addDescr=false; +//bool protmap=false; +bool multiExon=false; +bool writeExonSegs=false; +char* tracklabel=NULL; +int maxintron=999000000; +bool mergeCloseExons=false; +//range filter: +char* rfltGSeq=NULL; +char rfltStrand=0; +uint rfltStart=0; +uint rfltEnd=MAX_UINT; +bool rfltWithin=false; //check for full containment within given range +bool noExonAttr=false; + +bool doCluster=false; +bool doCollapseRedundant=false; + +GList g_data(true,true,true); //list of GFF records by genomic seq + +//hash with sequence info +GHash seqinfo; +GHash isoCounter; //counts the valid isoforms +GHash reftbl; +GHash gene_ids; + //min-max gene span associated to chr|gene_id (mostly for Ensembl conversion) + +bool debugMode=false; +bool verbose=false; + +void loadSeqInfo(FILE* f, GHash &si) { + GLineReader fr(f); + while (!fr.isEof()) { + char* line=fr.getLine(); + if (line==NULL) break; + char* id=line; + char* lenstr=NULL; + char* text=NULL; + char* p=line; + while (*p!=0 && !isspace(*p)) p++; + if (*p==0) continue; + *p=0;p++; + while (*p==' ' || *p=='\t') p++; + if (*p==0) continue; + lenstr=p; + while (*p!=0 && !isspace(*p)) p++; + if (*p!=0) { *p=0;p++; } + while (*p==' ' || *p=='\t') p++; + if (*p!=0) text=p; //else text remains NULL + int len=0; + if (!parseInt(lenstr,len)) { + GMessage("Warning: could not parse sequence length: %s %s\n", + id, lenstr); + continue; + } + // --- here we have finished parsing the line + si.Add(id, new SeqInfo(len,text)); + } //while lines +} + +void loadRefTable(FILE* f, GHash& rt) { + GLineReader fr(f); + char* line=NULL; + while ((line=fr.getLine())) { + char* orig_id=line; + char* p=line; + while (*p!=0 && !isspace(*p)) p++; + if (*p==0) continue; + *p=0;p++;//split the line here + while (*p==' ' || *p=='\t') p++; + if (*p==0) continue; + rt.Add(orig_id, new RefTran(p)); + } //while lines +} + +char* getSeqDescr(char* seqid) { + static char charbuf[128]; + if (seqinfo.Count()==0) return NULL; + char* suf=rstrchr(seqid, '.'); + if (suf!=NULL) *suf=0; + SeqInfo* seqd=seqinfo.Find(seqid); + if (suf!=NULL) *suf='.'; + if (seqd!=NULL) { + GStr s(seqd->descr); + //cleanup some Uniref gunk + if (s[0]=='[') { + int r=s.index(']'); + if (r>=0 && r<8 && isdigit(s[1])) + s.remove(0,r+1); + } + if (s.length()>80) { + int r=s.index(';'); + if (r>5) s.cut(r); + } + if (s.length()>127) { + s.cut(127); + int r=s.rindex(' '); + if (r>0) s.cut(r); + } + strcpy(charbuf, s.chars()); + return charbuf; + } + else return NULL; +} + +char* getSeqName(char* seqid) { + static char charbuf[128]; + char* suf=rstrchr(seqid, '.'); + if (suf!=NULL) *suf=0; + strcpy(charbuf, seqid); + if (suf!=NULL) *suf='.'; + return charbuf; +} + +GFaSeqGet* fastaSeqGet(GFastaDb& gfasta, GffObj& gffrec) { + if (gfasta.fastaPath==NULL) return NULL; + return gfasta.fetch(gffrec.gseq_id); +} + + +int adjust_stopcodon(GffObj& gffrec, int adj, GList* seglst=NULL) { + //adj>0 => extedn CDS, adj<0 => shrink CDS + //when CDS is expanded, exons have to be checked too and + // expanded accordingly if they had the same boundary + int realadj=0; + if (gffrec.strand=='-') { + if ((int)gffrec.CDstart>adj) { + + gffrec.CDstart-=adj; + realadj=adj; + if (adj<0) { //restore + if (gffrec.exons.First()->start==gffrec.CDstart+adj) { + gffrec.exons.First()->start-=adj; + gffrec.start=gffrec.exons.First()->start; + gffrec.covlen+=adj; + } + } + else if (gffrec.exons.First()->start>=gffrec.CDstart) { + gffrec.exons.First()->start-=adj; + gffrec.start=gffrec.exons.First()->start; + gffrec.covlen+=adj; + } + } + } + else { + realadj=adj; + gffrec.CDend+=adj; + if (adj<0) {//restore + if (gffrec.exons.Last()->end==gffrec.CDend-adj) { + gffrec.exons.Last()->end+=adj; + gffrec.end=gffrec.exons.Last()->end; + gffrec.covlen+=adj; + } + } + else if (gffrec.exons.Last()->end<=gffrec.CDend) { + gffrec.exons.Last()->end+=adj; + gffrec.end=gffrec.exons.Last()->end; + gffrec.covlen+=adj; + } + } + if (seglst!=NULL) seglst->Last()->end+=adj; + return realadj; + } + +bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) { + //returns true if the transcript passed the filter + char* gname=gffrec.getGeneName(); + if (gname==NULL) gname=gffrec.getGeneID(); + GStr defline(gffrec.getID()); + if (f_out && !fmtGTF) { + const char* tname=NULL; + if ((tname=gffrec.getAttr("transcript_name"))!=NULL) { + gffrec.addAttr("Name", tname); + gffrec.removeAttr("transcript_name"); + } + } + if (ensembl_convert && startsWith(gffrec.getID(), "ENS")) { + const char* biotype=gffrec.getAttr("gene_biotype"); + if (biotype) { + gffrec.addAttr("type", biotype); + gffrec.removeAttr("gene_biotype"); + } + else { //old Ensembl files lacking gene_biotype + gffrec.addAttr("type", gffrec.getTrackName()); + } + + //bool is_gene=false; + bool is_pseudo=false; + if (strcmp(biotype, "protein_coding")==0 || gffrec.hasCDS()) + gffrec.setFeatureName("mRNA"); + else { + if (strcmp(biotype, "processed_transcript")==0) + gffrec.setFeatureName("proc_RNA"); + else { + //is_gene=endsWith(biotype, "gene"); + is_pseudo=strifind(biotype, "pseudo"); + if (is_pseudo) { + gffrec.setFeatureName("pseudo_RNA"); + } + else if (endsWith(biotype, "RNA")) { + gffrec.setFeatureName(biotype); + } else gffrec.setFeatureName("misc_RNA"); + } + } + } + if (gname && strcmp(gname, gffrec.getID())!=0) { + int* isonum=isoCounter.Find(gname); + if (isonum==NULL) { + isonum=new int(1); + isoCounter.Add(gname,isonum); + } + else (*isonum)++; + defline.appendfmt(" gene=%s", gname); + } + int seqlen=0; + + const char* tlabel=tracklabel; + if (tlabel==NULL) tlabel=gffrec.getTrackName(); + //defline.appendfmt(" track:%s",tlabel); + char* cdsnt = NULL; + char* cdsaa = NULL; + int aalen=0; + for (int i=1;istart-gffrec.exons[i-1]->end-1; + if (ilen>4000000) + GMessage("Warning: very large intron (%d) for transcript %s\n", + ilen, gffrec.getID()); + if (ilen>maxintron) { + return false; + } + } + GList seglst(false,true); + GFaSeqGet* faseq=fastaSeqGet(gfasta, gffrec); + if (spliceCheck && gffrec.exons.Count()>1) { + //check introns for splice site consensi ( GT-AG, GC-AG or AT-AC ) + if (faseq==NULL) GError("Error: no genomic sequence available!\n"); + int glen=gffrec.end-gffrec.start+1; + const char* gseq=faseq->subseq(gffrec.start, glen); + bool revcompl=(gffrec.strand=='-'); + bool ssValid=true; + for (int e=1;eend+1-gffrec.start; + int intronlen=gffrec.exons[e]->start-gffrec.exons[e-1]->end-1; + GSpliceSite acceptorSite(intron,intronlen,true, revcompl); + GSpliceSite donorSite(intron,intronlen, false, revcompl); + //GMessage("%c intron %d-%d : %s .. %s\n", + // gffrec.strand, istart, iend, donorSite.nt, acceptorSite.nt); + if (acceptorSite=="AG") { // GT-AG or GC-AG + if (!donorSite.canonicalDonor()) { + ssValid=false;break; + } + } + else if (acceptorSite=="AC") { // + if (donorSite!="AT") { ssValid=false; break; } + } + else { ssValid=false; break; } + } + //GFREE(gseq); + if (!ssValid) { + if (verbose) + GMessage("Invalid splice sites found for '%s'\n",gffrec.getID()); + return false; //don't print this one! + } + } + + bool trprint=true; + int stopCodonAdjust=0; + int mCDphase=0; + bool hasStop=false; + if (gffrec.CDphase=='1' || gffrec.CDphase=='2') + mCDphase = gffrec.CDphase-'0'; + if (f_y!=NULL || f_x!=NULL || validCDSonly) { + if (faseq==NULL) GError("Error: no genomic sequence provided!\n"); + //if (protmap && fullCDSonly) { + //if (protmap && (fullCDSonly || (gffrec.qlen>0 && gffrec.qend==gffrec.qlen))) { + + if (validCDSonly) { //make sure the stop codon is always included + //adjust_stopcodon(gffrec,3); + stopCodonAdjust=adjust_stopcodon(gffrec,3); + } + int strandNum=0; + int phaseNum=0; + CDS_CHECK: + cdsnt=gffrec.getSpliced(faseq, true, &seqlen,NULL,NULL,&seglst); + if (cdsnt==NULL) trprint=false; + if (validCDSonly) { + cdsaa=translateDNA(cdsnt, aalen, seqlen); + char* p=strchr(cdsaa,'.'); + hasStop=false; + if (p!=NULL) { + if (p-cdsaa>=aalen-2) { //stop found as the last codon + *p='0';//remove it + hasStop=true; + if (aalen-2==p-cdsaa) { + //previous to last codon is the stop codon + //so correct the CDS stop accordingly + adjust_stopcodon(gffrec,-3, &seglst); + stopCodonAdjust=0; //clear artificial stop adjustment + seqlen-=3; + cdsnt[seqlen]=0; + } + aalen=p-cdsaa; + } + else {//stop found before the last codon + trprint=false; + } + }//stop codon found + if (trprint==false) { //failed CDS validity check + //in-frame stop codon found + if (altPhases && phaseNum<3) { + phaseNum++; + gffrec.CDphase = '0'+((mCDphase+phaseNum)%3); + GFREE(cdsaa); + goto CDS_CHECK; + } + if (gffrec.exons.Count()==1 && bothStrands) { + strandNum++; + phaseNum=0; + if (strandNum<2) { + GFREE(cdsaa); + gffrec.strand = (gffrec.strand=='-') ? '+':'-'; + goto CDS_CHECK; //repeat the CDS check for a different frame + } + } + if (verbose) GMessage("In-frame STOP found for '%s'\n",gffrec.getID()); + } //has in-frame STOP + if (fullCDSonly) { + if (!hasStop || cdsaa[0]!='M') trprint=false; + } + } // CDS check requested + } //translation or codon check/output was requested + if (!trprint) { + GFREE(cdsnt); + GFREE(cdsaa); + return false; + } + if (stopCodonAdjust>0 && !hasStop) { + //restore stop codon location + adjust_stopcodon(gffrec, -stopCodonAdjust, &seglst); + if (cdsnt!=NULL && seqlen>0) { + seqlen-=stopCodonAdjust; + cdsnt[seqlen]=0; + } + if (cdsaa!=NULL) aalen--; + } + + if (f_y!=NULL) { //CDS translation fasta output requested + //char* + if (cdsaa==NULL) { //translate now if not done before + cdsaa=translateDNA(cdsnt, aalen, seqlen); + } + if (fullattr && gffrec.attrs!=NULL) { + //append all attributes found for each transcripts + for (int i=0;iCount();i++) { + defline.append(" "); + defline.append(gffrec.getAttrName(i)); + defline.append("="); + defline.append(gffrec.getAttrValue(i)); + } + } + printFasta(f_y, defline, cdsaa, aalen); + } + if (f_x!=NULL) { //CDS only + if (writeExonSegs) { + defline.append(" loc:"); + defline.append(gffrec.getGSeqName()); + defline.appendfmt("(%c)",gffrec.strand); + //warning: not CDS coordinates are written here, but the exon ones + defline+=(int)gffrec.start; + defline+=(char)'-'; + defline+=(int)gffrec.end; + // -- here these are CDS substring coordinates on the spliced sequence: + defline.append(" segs:"); + for (int i=0;i0) defline.append(","); + defline+=(int)seglst[i]->start; + defline.append("-"); + defline+=(int)seglst[i]->end; + } + } + if (fullattr && gffrec.attrs!=NULL) { + //append all attributes found for each transcript + for (int i=0;iCount();i++) { + defline.append(" "); + defline.append(gffrec.getAttrName(i)); + defline.append("="); + defline.append(gffrec.getAttrValue(i)); + } + } + printFasta(f_x, defline, cdsnt, seqlen); + } + GFREE(cdsnt); + GFREE(cdsaa); + if (f_w!=NULL) { //write spliced exons + uint cds_start=0; + uint cds_end=0; + seglst.Clear(); + char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst); + if (exont!=NULL) { + if (gffrec.CDstart>0) { + defline.appendfmt(" CDS=%d-%d", cds_start, cds_end); + } + if (writeExonSegs) { + defline.append(" loc:"); + defline.append(gffrec.getGSeqName()); + defline+=(char)'|'; + defline+=(int)gffrec.start; + defline+=(char)'-'; + defline+=(int)gffrec.end; + defline+=(char)'|'; + defline+=(char)gffrec.strand; + defline.append(" exons:"); + for (int i=0;i0) defline.append(","); + defline+=(int)gffrec.exons[i]->start; + defline.append("-"); + defline+=(int)gffrec.exons[i]->end; + } + defline.append(" segs:"); + for (int i=0;i0) defline.append(","); + defline+=(int)seglst[i]->start; + defline.append("-"); + defline+=(int)seglst[i]->end; + } + } + if (fullattr && gffrec.attrs!=NULL) { + //append all attributes found for each transcripts + for (int i=0;iCount();i++) { + defline.append(" "); + defline.append(gffrec.getAttrName(i)); + defline.append("="); + defline.append(gffrec.getAttrValue(i)); + } + } + printFasta(f_w, defline, exont, seqlen); + GFREE(exont); + } + } //writing f_w (spliced exons) + return true; +} + +void openfw(FILE* &f, GArgs& args, char opt) { + GStr s=args.getOpt(opt); + if (!s.is_empty()) { + if (s=='-') + f=stdout; + else { + f=fopen(s,"w"); + if (f==NULL) GError("Error creating file: %s\n", s.chars()); + } + } +} + +#define FWCLOSE(fh) if (fh!=NULL && fh!=stdout) fclose(fh) +#define FRCLOSE(fh) if (fh!=NULL && fh!=stdin) fclose(fh) + +void printGff3Header(FILE* f, GArgs& args) { + fprintf(f, "# "); + args.printCmdLine(f); + fprintf(f, "##gff-version 3\n"); + //for (int i=0;i* gfnew) { + if (reftbl.Count()>0) { + GStr refname(gffrec->getRefName()); + RefTran* rt=reftbl.Find(refname.chars()); + if (rt==NULL && refname.length()>2 && refname[-2]=='.' && isdigit(refname[-1])) { + //try removing the version suffix + refname.cut(-2); + //GMessage("[DEBUG] Trying ref name '%s'...\n", refname.chars()); + rt=reftbl.Find(refname.chars()); + } + if (rt) { + gffrec->setRefName(rt->new_name); + } + else return false; //discard, ref seq not in the given translation table + } + if (mRNAOnly && gffrec->isDiscarded()) { + //discard generic "gene" or "locus" features with no other detailed subfeatures + //GMessage("Warning: discarding %s GFF generic gene/locus container %s\n",m->getID()); + return false; + } + /* + if (gffrec->exons.Count()==0 && gffrec->children.Count()==0)) { + //a non-mRNA feature with no subfeatures + //just so we get some sequence functions working, add a dummy "exon"-like subfeature here + //--this could be a single "pseudogene" entry or another genomic region without exons + // + gffrec->addExon(gffrec->start,gffrec->end); + } + */ + if (rfltGSeq!=NULL) { //filter by gseqName + if (strcmp(gffrec->getGSeqName(),rfltGSeq)!=0) { + return false; + } + } + if (rfltStrand>0 && gffrec->strand !=rfltStrand) { + return false; + } + //check coordinates + if (rfltStart!=0 || rfltEnd!=MAX_UINT) { + if (rfltWithin) { + if (gffrec->startend>rfltEnd) { + return false; //not within query range + } + } + else { + if (gffrec->start>rfltEnd || gffrec->endexons.Count()<=1) { + return false; + } + if (wCDSonly && gffrec->CDstart==0) { + return false; + } + if (ensembl_convert && startsWith(gffrec->getID(), "ENS")) { + //keep track of chr|gene_id data -- coordinate range + char* geneid=gffrec->getGeneID(); + if (geneid!=NULL) { + GeneInfo* ginfo=gene_ids.Find(geneid); + if (ginfo==NULL) {//first time seeing this gene ID + GeneInfo* geneinfo=new GeneInfo(gffrec, ensembl_convert); + gene_ids.Add(geneid, geneinfo); + if (gfnew!=NULL) gfnew->Add(geneinfo->gf); + } + else ginfo->update(gffrec); + } + } + return true; +} + + +int main(int argc, char * const argv[]) { + GArgs args(argc, argv, + "debug;merge;cluster-only;help;force-exons;MINCOV=MINPID=hvOUNHWCVJMKQNSXTDAPRZFGLEm:g:i:r:s:t:a:b:o:w:x:y:d:"); + args.printError(USAGE, true); + if (args.getOpt('h') || args.getOpt("help")) { + GMessage("%s",USAGE); + exit(1); + } + debugMode=(args.getOpt("debug")!=NULL); + bool forceExons=(args.getOpt("force-exons")!=NULL); + mRNAOnly=(args.getOpt('O')==NULL); + //sortByLoc=(args.getOpt('S')!=NULL); + addDescr=(args.getOpt('A')!=NULL); + verbose=(args.getOpt('v')!=NULL); + wCDSonly=(args.getOpt('C')!=NULL); + validCDSonly=(args.getOpt('V')!=NULL); + altPhases=(args.getOpt('H')!=NULL); + fmtGTF=(args.getOpt('T')!=NULL); //switch output format to GTF + bothStrands=(args.getOpt('B')!=NULL); + fullCDSonly=(args.getOpt('J')!=NULL); + spliceCheck=(args.getOpt('N')!=NULL); + bool matchAllIntrons=(args.getOpt('K')==NULL); + bool fuzzSpan=(args.getOpt('Q')!=NULL); + if (args.getOpt('M') || args.getOpt("merge")) { + doCluster=true; + doCollapseRedundant=true; + } + else { + if (!matchAllIntrons || fuzzSpan) { + GMessage("%s",USAGE); + GMessage("Error: -K or -Q options require -M/--merge option!\n"); + exit(1); + } + } + if (args.getOpt("cluster-only")) { + doCluster=true; + doCollapseRedundant=false; + if (!matchAllIntrons || fuzzSpan) { + GMessage("%s",USAGE); + GMessage("Error: -K or -Q options have no effect with --cluster-only.\n"); + exit(1); + } + } + if (fullCDSonly) validCDSonly=true; + if (verbose) { + fprintf(stderr, "Command line was:\n"); + args.printCmdLine(stderr); + } + + fullattr=(args.getOpt('F')!=NULL); + if (args.getOpt('G')==NULL) + noExonAttr=!fullattr; + else { + noExonAttr=true; + fullattr=true; + } + ensembl_convert=(args.getOpt('L')!=NULL); + if (ensembl_convert) { + fullattr=true; + noExonAttr=false; + //sortByLoc=true; + } + + mergeCloseExons=(args.getOpt('Z')!=NULL); + multiExon=(args.getOpt('U')!=NULL); + writeExonSegs=(args.getOpt('W')!=NULL); + tracklabel=args.getOpt('t'); + GFastaDb gfasta(args.getOpt('g')); + //if (gfasta.fastaPath!=NULL) + // sortByLoc=true; //enforce sorting by chromosome/contig + GStr s=args.getOpt('i'); + if (!s.is_empty()) maxintron=s.asInt(); + + FILE* f_repl=NULL; + s=args.getOpt('d'); + if (!s.is_empty()) { + if (s=="-") f_repl=stdout; + else { + f_repl=fopen(s.chars(), "w"); + if (f_repl==NULL) GError("Error creating file %s\n", s.chars()); + } + } + + rfltWithin=(args.getOpt('R')!=NULL); + s=args.getOpt('r'); + if (!s.is_empty()) { + s.trim(); + if (s[0]=='+' || s[0]=='-') { + rfltStrand=s[0]; + s.cut(0,1); + } + int isep=s.index(':'); + if (isep>0) { //gseq name given + if (rfltStrand==0 && (s[isep-1]=='+' || s[isep-1]=='-')) { + isep--; + rfltStrand=s[isep]; + s.cut(isep,1); + } + if (isep>0) + rfltGSeq=Gstrdup((s.substr(0,isep)).chars()); + s.cut(0,isep+1); + } + GStr gsend; + char slast=s[s.length()-1]; + if (rfltStrand==0 && (slast=='+' || slast=='-')) { + s.chomp(slast); + rfltStrand=slast; + } + if (s.index("..")>=0) gsend=s.split(".."); + else gsend=s.split('-'); + if (!s.is_empty()) rfltStart=(uint)s.asInt(); + if (!gsend.is_empty()) { + rfltEnd=(uint)gsend.asInt(); + if (rfltEnd==0) rfltEnd=MAX_UINT; + } + } //gseq/range filtering + else { + if (rfltWithin) + GError("Error: option -R requires -r!\n"); + //if (rfltWholeTranscript) + // GError("Error: option -P requires -r!\n"); + } + s=args.getOpt('m'); + if (!s.is_empty()) { + FILE* ft=fopen(s,"r"); + if (ft==NULL) GError("Error opening reference table: %s\n",s.chars()); + loadRefTable(ft, reftbl); + fclose(ft); + } + s=args.getOpt('s'); + if (!s.is_empty()) { + FILE* fsize=fopen(s,"r"); + if (fsize==NULL) GError("Error opening info file: %s\n",s.chars()); + loadSeqInfo(fsize, seqinfo); + fclose(fsize); + } + + openfw(f_out, args, 'o'); + //if (f_out==NULL) f_out=stdout; + if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL)) + GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n"); + + openfw(f_w, args, 'w'); + openfw(f_x, args, 'x'); + openfw(f_y, args, 'y'); + if (f_y!=NULL || f_x!=NULL) wCDSonly=true; + //useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL); + + int numfiles = args.startNonOpt(); + //GList gfkept(false,true); //unsorted, free items on delete + int out_counter=0; //number of records printed + while (true) { + GStr infile; + if (numfiles) { + infile=args.nextNonOpt(); + if (infile.is_empty()) break; + if (infile=="-") { f_in=stdin; infile="stdin"; } + else + if ((f_in=fopen(infile, "r"))==NULL) + GError("Error: cannot open input file %s!\n",infile.chars()); + } + else + infile="-"; + GffLoader gffloader(infile.chars()); + gffloader.transcriptsOnly=mRNAOnly; + gffloader.fullAttributes=fullattr; + gffloader.noExonAttrs=noExonAttr; + gffloader.mergeCloseExons=mergeCloseExons; + gffloader.showWarnings=(args.getOpt('E')!=NULL); + gffloader.load(g_data, &validateGffRec, doCluster, doCollapseRedundant, + matchAllIntrons, fuzzSpan, forceExons); + if (doCluster) + collectLocusData(g_data); + if (numfiles==0) break; + } + + GStr loctrack("gffcl"); + if (tracklabel) loctrack=tracklabel; + g_data.setSorted(&gseqCmpName); + if (doCluster) { + //grouped in loci + for (int g=0;gloci.Count();l++) { + GffLocus& loc=*(gdata->loci[l]); + //check all non-replaced transcripts in this locus: + int numvalid=0; + int idxfirstvalid=-1; + for (int i=0;ireplaced_by!=NULL) { + if (f_repl && (t.udata & 8)==0) { + //t.udata|=8; + fprintf(f_repl, "%s", t.getID()); + GTData* rby=tdata; + while (rby->replaced_by!=NULL) { + fprintf(f_repl," => %s", rby->replaced_by->getID()); + rby->rna->udata|=8; + rby=(GTData*)(rby->replaced_by->uptr); + } + fprintf(f_repl, "\n"); + } + continue; + } + if (process_transcript(gfasta, t)) { + t.udata|=4; //tag it as valid + numvalid++; + if (idxfirstvalid<0) idxfirstvalid=i; + } + } + + if (f_out && numvalid>0) { + GStr locname("RLOC_"); + locname.appendfmt("%08d",loc.locus_num); + if (!fmtGTF) { + if (out_counter==0) + printGff3Header(f_out, args); + fprintf(f_out,"%s\t%s\tlocus\t%d\t%d\t.\t%c\t.\tID=%s;locus=%s", + loc.rnas[0]->getGSeqName(), loctrack.chars(), loc.start, loc.end, loc.strand, + locname.chars(), locname.chars()); + //const char* loc_gname=loc.getGeneName(); + if (loc.gene_names.Count()>0) { //print all gene names associated to this locus + fprintf(f_out, ";genes=%s",loc.gene_names.First()->name.chars()); + for (int i=1;iname.chars()); + } + } + if (loc.gene_ids.Count()>0) { //print all GeneIDs names associated to this locus + fprintf(f_out, ";geneIDs=%s",loc.gene_ids.First()->name.chars()); + for (int i=1;iname.chars()); + } + } + fprintf(f_out, ";transcripts=%s",loc.rnas[idxfirstvalid]->getID()); + for (int i=idxfirstvalid+1;igetID()); + } + fprintf(f_out, "\n"); + } + //now print all valid, non-replaced transcripts in this locus: + for (int i=0;ireplaced_by!=NULL || ((t.udata & 4)==0)) continue; + t.addAttr("locus", locname.chars()); + out_counter++; + if (fmtGTF) t.printGtf(f_out, tracklabel); + else { + //print the parent first, if any + if (t.parent!=NULL && ((t.parent->udata & 4)==0)) { + GTData* pdata=(GTData*)(t.parent->uptr); + if (pdata->geneinfo!=NULL) + pdata->geneinfo->finalize(); + t.parent->addAttr("locus", locname.chars()); + t.parent->printGff(f_out, tracklabel); + t.parent->udata|=4; + } + t.printGff(f_out, tracklabel); + } + } + } //have valid transcripts to print + }//for each locus + if (f_out && !mRNAOnly) { + //final pass through the non-transcripts, in case any of them were not printed + //TODO: order broken, these should be interspersed among the rnas in the correct order! + for (int m=0;mgfs.Count();m++) { + GffObj& t=*(gdata->gfs[m]); + if ((t.udata&4)==0) { //never printed + t.udata|=4; + if (fmtGTF) t.printGtf(f_out, tracklabel); + else t.printGff(f_out, tracklabel); + } + } //for each non-transcript + } + } //for each genomic sequence + } + else { + //not grouped into loci, print the rnas with their parents, if any + int numvalid=0; + for (int g=0;grnas.Count();m++) { + GffObj& t=*(gdata->rnas[m]); + GTData* tdata=(GTData*)(t.uptr); + if (tdata->replaced_by!=NULL) continue; + if (process_transcript(gfasta, t)) { + t.udata|=4; //tag it as valid + numvalid++; + if (f_out) { + if (tdata->geneinfo) tdata->geneinfo->finalize(); + out_counter++; + if (fmtGTF) t.printGtf(f_out, tracklabel); + else { + if (out_counter==1) + printGff3Header(f_out, args); + //print the parent first, if any + if (t.parent!=NULL && ((t.parent->udata & 4)==0)) { + GTData* pdata=(GTData*)(t.parent->uptr); + if (pdata->geneinfo!=NULL) + pdata->geneinfo->finalize(); + t.parent->printGff(f_out, tracklabel); + t.parent->udata|=4; + } + t.printGff(f_out, tracklabel); + } + }//GFF/GTF output requested + } //valid transcript + } //for each rna + if (f_out && !mRNAOnly) { + //final pass through the non-transcripts, in case any of them were not printed + //TODO: order broken, these should be interspersed among the rnas in the correct order! + for (int m=0;mgfs.Count();m++) { + GffObj& t=*(gdata->gfs[m]); + if ((t.udata&4)==0) { //never printed + t.udata|=4; + if (fmtGTF) t.printGtf(f_out, tracklabel); + else t.printGff(f_out, tracklabel); + } + } //for each non-transcript + } + } //for each genomic seq + } + if (f_repl && f_repl!=stdout) fclose(f_repl); + seqinfo.Clear(); + //if (faseq!=NULL) delete faseq; + //if (gcdb!=NULL) delete gcdb; + GFREE(rfltGSeq); + FRCLOSE(f_in); + FWCLOSE(f_out); + FWCLOSE(f_w); + FWCLOSE(f_x); + FWCLOSE(f_y); + } + + diff --git a/src/graph_optimize.cpp b/src/graph_optimize.cpp new file mode 100644 index 0000000..e877774 --- /dev/null +++ b/src/graph_optimize.cpp @@ -0,0 +1,751 @@ +/* + * graph_optimize.cpp + * cufflinks + * + * Created by Cole Trapnell on 6/1/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include +#include + +#include "graph_optimize.h" +// for graph optimization only + +#include +#include +#include + +#include "scaffold_graph.h" +#include "scaffolds.h" +#include "filters.h" +#include "matching_merge.h" + +using namespace std; +using namespace boost; + +namespace ublas = boost::numeric::ublas; + +void fill_gaps(vector& scaffolds, int fill_size) +{ + for (size_t i = 0; i < scaffolds.size(); ++i) + scaffolds[i].fill_gaps(fill_size); +} + +enum ConflictState { UNKNOWN_CONFLICTS = 0, SAME_CONFLICTS, DIFF_CONFLICTS }; + +bool scaff_left_lt_right_gt(const Scaffold& lhs, const Scaffold& rhs) +{ + if (lhs.left() != rhs.left()) + return lhs.left() < rhs.left(); + return lhs.right() > rhs.right(); +} + +bool op_left_lt_right_lt(const AugmentedCuffOp& lhs, const AugmentedCuffOp& rhs) +{ + if (lhs.genomic_offset != rhs.genomic_offset) + { + return lhs.genomic_offset < rhs.genomic_offset; + } + if (lhs.genomic_length != rhs.genomic_length) + { + return lhs.genomic_length < rhs.genomic_length; + } + return false; +} + +void extract_conflicting_ops(const vector& ops, + vector& conflict_ops) +{ + for (size_t i = 0; i < ops.size(); ++i) + { + for (size_t j = i+1; j < ops.size(); ++j) + { + if (AugmentedCuffOp::overlap_in_genome(ops[i], ops[j])) + { + if (!AugmentedCuffOp::compatible(ops[i], ops[j])) + { + if (!binary_search(conflict_ops.begin(), conflict_ops.end(), ops[i])) + { + conflict_ops.push_back(ops[i]); + sort(conflict_ops.begin(), conflict_ops.end()); + } + + if (!binary_search(conflict_ops.begin(), conflict_ops.end(), ops[j])) + { + conflict_ops.push_back(ops[j]); + sort(conflict_ops.begin(), conflict_ops.end()); + } + } + } + else + { + break; + } + + } + } +} + +void collect_non_redundant_ops(const vector& scaffolds, + vector& ops) +{ + + for (size_t i = 0; i < scaffolds.size(); ++i) + { + ops.insert(ops.end(), + scaffolds[i].augmented_ops().begin(), + scaffolds[i].augmented_ops().end()); + } + sort(ops.begin(), ops.end()); + vector::iterator new_end = unique(ops.begin(), ops.end()); + ops.erase(new_end, ops.end()); + + sort (ops.begin(), ops.end(), op_left_lt_right_lt); +} + +void fill_unambiguous_unknowns(vector& to_fill, + const vector& constitutive) +{ +// vector conflict_ops; +// vector ops; + + for (size_t i = 0; i < to_fill.size(); ++i) + { + if (to_fill[i].has_unknown()) + { + for( size_t j = 0; j < constitutive.size(); ++j) + { + const Scaffold& cons = constitutive[j]; + if (Scaffold::overlap_in_genome(to_fill[i], cons, 0) && + Scaffold::compatible(to_fill[i], cons)) + { + if (cons.strand() != CUFF_STRAND_UNKNOWN) + to_fill[i].strand(cons.strand()); + + to_fill[i].fill_gaps(cons.augmented_ops()); + if (!to_fill[i].has_unknown()) + { + break; + } + } + } + } + } +} + +// WARNING: scaffolds MUST be sorted by scaff_lt_rt() in order for this routine +// to work correctly. +void add_non_constitutive_to_scaffold_mask(const vector& scaffolds, + vector& scaffold_mask) +{ + + // First, we filter out all fragments that are entirely contained in a + // constitutive exon of the "gene". If such fragments were non-constitutive, + // neither would that exon. Also, we can examine all fragments at most + // once here, because if we don't look at them here, we'll look hard in the + // next loop + Scaffold smashed_gene; + + // setting introns_overwrite_matches in a gene smash takes only it's + // constitutive regions + Scaffold::merge(scaffolds, smashed_gene, true); + + vector smash_filter(scaffolds.size(), false); + + const vector& cig = smashed_gene.augmented_ops(); + size_t next_frag = 0; + + size_t num_filtered = 0; + for (size_t j = 0; j < cig.size(); ++j) + { + if (cig[j].opcode == CUFF_MATCH) + { + for (;next_frag < scaffolds.size(); ++next_frag) + { + const Scaffold& frag = scaffolds[next_frag]; + + if (frag.left() >= cig[j].g_left() && + frag.right() <= cig[j].g_right()) + { + smash_filter[next_frag] = true; + //scaffold_mask[next_frag] = true; + num_filtered++; + } + if (frag.left() >= cig[j].g_right()) + { + break; + } + } + } + } + + verbose_msg("%lu constitutive reads of %lu smash-filtered from further consideration\n", num_filtered, smash_filter.size()); + + vector ops; + collect_non_redundant_ops(scaffolds, ops); + + vector conflict_ops; + extract_conflicting_ops(ops, conflict_ops); + + for (size_t i = 0; i < scaffolds.size(); ++i) + { + if (smash_filter[i]) + continue; + const vector& s_ops = scaffolds[i].augmented_ops(); + for (size_t j = 0; j < s_ops.size(); ++j) + { + if (binary_search(conflict_ops.begin(), conflict_ops.end(), s_ops[j])) + { + scaffold_mask[i] = true; + break; + } + } + } +} + + + + + +bool collapse_contained_transfrags(vector& scaffolds, + uint32_t max_rounds) +{ + // The containment graph is a bipartite graph with an edge (u,v) when + // u is (not necessarily properly) contained in v and is the two are + // compatible. + typedef lemon::SmartBpUGraph ContainmentGraph; + normal norm(0, 0.1); + bool performed_collapse = false; + + while (max_rounds--) + { + + verbose_msg("%s\tStarting new collapse round\n", bundle_label->c_str()); + + ContainmentGraph containment; + + + typedef pair NodePair; + vector node_ids; + vector A_to_scaff(scaffolds.size()); + vector B_to_scaff(scaffolds.size()); + + for (size_t n = 0; n < scaffolds.size(); ++n) + { + NodePair p = make_pair(containment.addANode(), + containment.addBNode()); + node_ids.push_back(p); + A_to_scaff[containment.aNodeId(p.first)] = n; + B_to_scaff[containment.bNodeId(p.second)] = n; + } + + bool will_perform_collapse = false; + for (size_t i = 0; i < scaffolds.size(); ++i) + { + for (size_t j = 0; j < scaffolds.size(); ++j) + { + if (i == j) + continue; + + if (scaffolds[i].contains(scaffolds[j]) && + Scaffold::compatible(scaffolds[i], scaffolds[j])) + { + // To gaurd against the identity collapse, which won't + // necessary reduce the total number of scaffolds. + if (scaffolds[j].contains(scaffolds[i]) && i < j) + continue; + const NodePair& nj = node_ids[j]; + const NodePair& ni = node_ids[i]; + assert (nj.first != ni.second); + + will_perform_collapse = true; + ContainmentGraph::UEdge e = containment.addEdge(nj.first, + ni.second); + + } + } + } + + if (will_perform_collapse == false) + return performed_collapse; + + lemon::MaxBipartiteMatching matcher(containment); + + verbose_msg("%s\tContainment graph has %d nodes, %d edges\n", bundle_label->c_str(), containment.aNodeNum(), containment.uEdgeNum()); + verbose_msg("%s\tFinding a maximum matching to collapse scaffolds\n", bundle_label->c_str()); + + matcher.run(); + + verbose_msg( "%s\tWill collapse %d scaffolds\n", bundle_label->c_str(), matcher.matchingSize()); + + ContainmentGraph::UEdgeMap matched_edges(containment); + + matcher.matching(matched_edges); + + merge_from_matching(containment, matcher, scaffolds); + + performed_collapse = true; + } + return performed_collapse; +} + +bool scaff_smaller_lt_rt(const Scaffold& lhs, const Scaffold& rhs) +{ + size_t lhs_len = lhs.right() - lhs.left(); + size_t rhs_len = rhs.right() - rhs.left(); + + if (lhs_len != rhs_len) + { + return lhs_len < rhs_len; + } + else + { + return scaff_lt_rt(lhs, rhs); + } + return false; +} + +struct FragIndexSortSmallerLR +{ + FragIndexSortSmallerLR(const vector& frags) : fragments(frags) {} + + const vector& fragments; + + bool operator()(size_t lhs_frag_idx, size_t rhs_frag_idx) + { + const Scaffold& lhs = fragments[lhs_frag_idx]; + const Scaffold& rhs = fragments[rhs_frag_idx]; + + size_t lhs_len = lhs.right() - lhs.left(); + size_t rhs_len = rhs.right() - rhs.left(); + + if (lhs_len != rhs_len) + { + return lhs_len > rhs_len; + } + else + { + return scaff_lt_rt(lhs, rhs); + } + return false; + } +}; + +bool collapse_equivalent_transfrags(vector& fragments, + uint32_t max_rounds) +{ + // The containment graph is a bipartite graph with an edge (u,v) when + // u is (not necessarily properly) contained in v and is the two are + // compatible. + typedef lemon::SmartBpUGraph ContainmentGraph; + normal norm(0, 0.1); + bool performed_collapse = false; + + //double last_size = -1; + //long leftmost = 9999999999; + long leftmost = LONG_MAX-1; + long rightmost = -1; + + for (size_t i = 0; i < fragments.size(); ++i) + { + leftmost = std::min((long)fragments[i].left(), leftmost); + rightmost = std::max((long)fragments[i].right(), rightmost); + } + + //long bundle_length = rightmost - leftmost; + + while (max_rounds--) + { + + sort (fragments.begin(), fragments.end(), scaff_lt_rt); + + + vector smaller_idx_array; + for (size_t i = 0; i < fragments.size(); ++i) + { + smaller_idx_array.push_back(i); + } + + sort(smaller_idx_array.begin(), + smaller_idx_array.end(), + FragIndexSortSmallerLR(fragments)); + + verbose_msg("%s\tStarting new collapse round\n", bundle_label->c_str()); + verbose_msg("%s\tFinding fragment-level conflicts\n", bundle_label->c_str()); + + bool will_perform_collapse = false; + + verbose_msg( "%s\tAssessing overlaps between %lu fragments for identical conflict sets\n", + bundle_label->c_str(), + fragments.size()); + vector replacements; + for (size_t i = 0; i < fragments.size(); ++i) + { + replacements.push_back(i); + } + + size_t curr_frag = 0; + vector curr_conflicts; + +// for (int i = 0; i < fragments.size(); ++i) +// { +// if (Scaffold::overlap_in_genome(fragments[0], fragments[i], 0)) +// { +// if (!Scaffold::compatible(fragments[0], fragments[i])) +// { +// curr_conflicts.push_back(i); +// } +// } +// } + + double mean_length = 0; + for (size_t i = 0; i < fragments.size(); ++i) + { + mean_length += fragments[i].length(); + } + + mean_length /= fragments.size(); + + double variance = 0.0; + for (size_t i = 0; i < fragments.size(); ++i) + { + double v = fragments[i].length() - mean_length; + v *= v; + variance += v; + } + + variance /= fragments.size(); + double frag_len_std_dev = sqrt(variance); + + int num_merges = 0; + + while (curr_frag < smaller_idx_array.size()) + { + size_t curr_frag_native_idx = smaller_idx_array[curr_frag]; + if (replacements[curr_frag_native_idx] == curr_frag_native_idx) + { + size_t lhs = curr_frag; + + size_t lhs_native_idx = smaller_idx_array[lhs]; + + const Scaffold& lhs_scaff = fragments[lhs_native_idx]; + curr_conflicts.clear(); + + double lhs_len = lhs_scaff.right() - lhs_scaff.left(); + + for (size_t i = 0; i < smaller_idx_array.size(); ++i) + { + size_t j_scaff_idx = smaller_idx_array[i]; + if (replacements[j_scaff_idx] == j_scaff_idx) + { + if (Scaffold::overlap_in_genome(lhs_scaff, fragments[j_scaff_idx], 0)) + { + if (!Scaffold::compatible(lhs_scaff, fragments[j_scaff_idx])) + { + curr_conflicts.push_back(j_scaff_idx); + } + } + } + } + sort(curr_conflicts.begin(), curr_conflicts.end()); + + //bool advanced_curr = false; + for (size_t c = lhs + 1; c < smaller_idx_array.size(); ++c) + { + size_t c_native_idx = smaller_idx_array[c]; + const Scaffold& c_scaff = fragments[c_native_idx]; + if (replacements[c_native_idx] == c_native_idx && + lhs_scaff.contains(c_scaff)) + { + double c_len = c_scaff.right() - c_scaff.left(); + + if (lhs_len - c_len > frag_len_std_dev) + break; + + if (c_scaff.augmented_ops() == lhs_scaff.augmented_ops()) + { + if (num_merges % 100 == 0) + { + verbose_msg("%s\tCollapsing frag # %d\n", + bundle_label->c_str(), + num_merges); + } + vector s; + s.push_back(c_scaff); + s.push_back(lhs_scaff); + fragments[lhs_native_idx] = Scaffold(s); + replacements[c_native_idx] = lhs_native_idx; + //fragments[c_native_idx] = Scaffold(); + //curr_conflicts = c_conflicts; + fragments[c_native_idx].clear_hits(); + ///lhs = c; + //advanced_curr = true; + will_perform_collapse = true; + num_merges++; + continue; + } + + if (!Scaffold::compatible(lhs_scaff, c_scaff)) + continue; + vector c_conflicts; + // Find c's conflicts + + // If c fails to overlap lhs's conflicts, or if it's + // compatible with any of them, they aren't equivalent + bool not_equivalent = false; + for (size_t j = 0; j < curr_conflicts.size(); ++j) + { + if (!Scaffold::overlap_in_genome(fragments[curr_conflicts[j]], c_scaff, 0) || + Scaffold::compatible(fragments[curr_conflicts[j]], c_scaff)) + { + not_equivalent = true; + break; + } + } + + if (not_equivalent) + continue; + + // If we get here, then c disagrees with at least all + // the guys lhs does. + + // Now check that c doesn't have any additional conflicts + // of it's own + for (size_t i = lhs_native_idx + 1; i < fragments.size(); ++i) + { + if (replacements[i] == i) + { + if (Scaffold::overlap_in_genome(fragments[i], lhs_scaff, 0)) + { + if (Scaffold::overlap_in_genome(fragments[i], c_scaff, 0)) + { + if (!Scaffold::compatible(fragments[i], c_scaff)) + { + //c_conflicts.push_back(i); + if (!binary_search(curr_conflicts.begin(), curr_conflicts.end(), i)) + { + not_equivalent = true; + break; + } + } + } + } + else + { + break; + } + } + } + + + if (not_equivalent) + continue; + + // merge + if (num_merges % 100 == 0) + { + verbose_msg("%s\tCollapsing frag # %d\n", + bundle_label->c_str(), + num_merges); + } + + vector s; + s.push_back(c_scaff); + s.push_back(lhs_scaff); + fragments[lhs_native_idx] = Scaffold(s); + replacements[c_native_idx] = lhs_native_idx; + //fragments[c_native_idx] = Scaffold(); + fragments[c_native_idx].clear_hits(); + //curr_conflicts = c_conflicts; + //advanced_curr = true; + will_perform_collapse = true; + num_merges++; + //break; + } + else + { + continue; + } + + } + } + + //if (!advanced_curr) + { + ++curr_frag; + } + } + + if (will_perform_collapse == false) + return performed_collapse; + + + vector replaced; + for (size_t i = 0; i < fragments.size(); ++i) + { + if (replacements[i] == i) + { + replaced.push_back(fragments[i]); + } + } + + fragments = replaced; + sort(fragments.begin(), fragments.end(), scaff_lt_rt); + performed_collapse = true; + } + return performed_collapse; +} + +void compress_consitutive(vector& hits) +{ + vector scaffold_mask; + + verbose_msg("%s\tBuilding constitutivity mask\n", bundle_label->c_str()); + + scaffold_mask = vector(hits.size(), false); + add_non_constitutive_to_scaffold_mask(hits, scaffold_mask); + + vector constitutive; + vector non_constitutive; + + for (size_t i = 0; i < scaffold_mask.size(); ++i) + { + if (!scaffold_mask[i]) + constitutive.push_back(hits[i]); + else + non_constitutive.push_back(hits[i]); + } + + size_t pre_compress = hits.size(); + hits.clear(); + if (!constitutive.empty()) + { + Scaffold compressed = Scaffold(constitutive); + vector completes; + compressed.fill_gaps(2 * olap_radius); + compressed.get_complete_subscaffolds(completes); + + hits.insert(hits.end(), completes.begin(), completes.end()); + } + + fill_unambiguous_unknowns(non_constitutive, hits); + + hits.insert(hits.end(), non_constitutive.begin(), non_constitutive.end()); + sort(hits.begin(), hits.end(), scaff_lt); + + + size_t post_compress = hits.size(); + size_t delta = pre_compress - post_compress; + double collapse_ratio = delta / (double) pre_compress; + verbose_msg("%s\tCompressed %lu of %lu constitutive fragments (%lf percent)\n", + bundle_label->c_str(), + delta, + pre_compress, + collapse_ratio); +} + + +void compress_redundant(vector& fragments) +{ + double last_size = -1; + //long leftmost = 9999999999; + long leftmost = LONG_MAX-1; + long rightmost = -1; + + for (size_t i = 0; i < fragments.size(); ++i) + { + leftmost = std::min((long)fragments[i].left(), leftmost); + rightmost = std::max((long)fragments[i].right(), rightmost); + } + + size_t pre_compress = fragments.size(); + + while (true) + { + if (last_size == -1 || 0.9 * last_size > fragments.size()) + { + last_size = fragments.size(); + if (!collapse_equivalent_transfrags(fragments, 1)) + { + break; + } + } + else + { + break; + } + } + + size_t post_compress = fragments.size(); + size_t delta = pre_compress - post_compress; + double collapse_ratio = delta / (double) pre_compress; + verbose_msg("%s\tCompressed %lu of %lu redundant fragments (%lf percent)\n", + bundle_label->c_str(), + delta, + pre_compress, + collapse_ratio); +} + +void compress_fragments(vector& fragments) +{ + verbose_msg("%s\tPerforming preliminary containment collapse on %lu fragments\n", bundle_label->c_str(), fragments.size()); + size_t pre_hit_collapse_size = fragments.size(); + sort(fragments.begin(), fragments.end(), scaff_lt_rt); + + compress_consitutive(fragments); + + compress_redundant(fragments); + + size_t post_hit_collapse_size = fragments.size(); + verbose_msg("%s\tIgnoring %lu strictly contained fragments\n", bundle_label->c_str(), pre_hit_collapse_size - post_hit_collapse_size); +} + +void compress_overlap_dag_paths(DAG& bundle_dag, + vector& hits) +{ + HitsForNodeMap hits_for_node = get(vertex_name, bundle_dag); + + vector_property_map path_for_scaff; + path_compress_visitor, + vector_property_map > vis(path_for_scaff); + depth_first_search(bundle_dag, + visitor(vis)); + + vector > compressed_paths(hits.size()+1); + + vector new_scaffs; + + for (size_t i = 0; i < num_vertices(bundle_dag); ++i) + { + size_t path_id = path_for_scaff[i]; + assert (path_id < compressed_paths.size()); + const Scaffold* h = hits_for_node[i]; + if (h) + { + compressed_paths[path_id].push_back(*h); + } + } + for (size_t i = 0; i < compressed_paths.size(); ++i) + { + if (!compressed_paths[i].empty()) + { + Scaffold s(compressed_paths[i]); + verbose_msg("Path over %d-%d has %lu fragments in it\n", s.left(), s.right(), compressed_paths[i].size()); + new_scaffs.push_back(s); + } + } + //hits = new_scaffs; + + verbose_msg("%s\tCompressed overlap graph from %lu to %lu fragments (%f percent)\n", + bundle_label->c_str(), + hits.size(), + new_scaffs.size(), + (hits.size() - new_scaffs.size())/(double)hits.size()); + + hits = new_scaffs; + sort(hits.begin(), hits.end(), scaff_lt); + create_overlap_dag(hits, bundle_dag); +} + diff --git a/src/graph_optimize.h b/src/graph_optimize.h new file mode 100644 index 0000000..dc4d743 --- /dev/null +++ b/src/graph_optimize.h @@ -0,0 +1,94 @@ +#ifndef GRAPH_OPTIMIZE_H +#define GRAPH_OPTIMIZE_H +/* + * graph_optimize.h + * cufflinks + * + * Created by Cole Trapnell on 6/1/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include + +#include +#include + +#include "bundles.h" +#include "scaffold_graph.h" +#include "scaffolds.h" + +using namespace std; + +using namespace boost; + +template < typename PredecessorMap, + typename PathIDMap > +class path_compress_visitor : public default_dfs_visitor +{ +public: + path_compress_visitor(PathIDMap pm) : curr_path_id(0), path_map(pm) {} + + template < typename Vertex, typename Graph > + void initialize_vertex(Vertex u, const Graph & g) const + { + put(predecessor, u, u); + put(path_map, u, u); + } + + template < typename Vertex, typename Graph > + void discover_vertex(Vertex u, const Graph & g) + { + //fprintf(stderr, "node %d has indegree %d, outdegree %d\n",u,in_degree(u, g),out_degree(u, g)); + if (in_degree(u, g) == 1) + { + + Vertex v = get(predecessor, u); + + assert(v != u); + + if (out_degree(v, g) == 1) + { + // compress into predecessor's path + typename PathIDMap::value_type path = get(path_map, v); + put(path_map, u, path); + //fprintf(stderr, "\told path for node %d = %d\n", u, path); + + return; + } + } + // start a new path + curr_path_id++; + put(path_map, u, curr_path_id); + //fprintf(stderr, "\tnew path for node %d = %d\n", u, curr_path_id); + + } + + template < typename Edge, typename Graph > + void tree_edge(Edge e, const Graph & g) const + { + put(predecessor, target(e, g), source(e, g)); + } + + size_t last_path_id() const { return curr_path_id; } + + PredecessorMap predecessor; + + size_t curr_path_id; + PathIDMap path_map; +}; + +void fill_gaps(vector& scaffolds, int fill_size); + +void compress_fragments(vector& hits); + +bool collapse_equivalent_transfrags(vector& scaffolds, + uint32_t max_rounds = 0xFFFFFFFF); + +bool collapse_contained_transfrags(vector& scaffolds, + uint32_t max_rounds = 0xFFFFFFFF); + +void compress_overlap_dag_paths(DAG& bundle_dag, + vector& hits); + +#endif diff --git a/src/gtf_to_sam.cpp b/src/gtf_to_sam.cpp new file mode 100644 index 0000000..12f70c1 --- /dev/null +++ b/src/gtf_to_sam.cpp @@ -0,0 +1,348 @@ +/* + * gtf_to_sam.cpp + * Cufflinks + * + * Created by Cole Trapnell on 8/1/10. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#else +#define PACKAGE_VERSION "INTERNAL" +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "hits.h" +#include "bundles.h" + +#include "scaffolds.h" +#include "tokenize.h" + +using namespace boost; +using namespace std; + +#if ENABLE_THREADS +const char *short_options = "r:F"; +#else +const char *short_options = "r:F"; +#endif + +bool raw_fpkm = false; + +static struct option long_options[] = { +{"reference-seq", required_argument, 0, 'r'}, +{"raw-fpkm", no_argument, 0, 'F'}, +{0, 0, 0, 0} // terminator +}; + +void print_usage() +{ + //NOTE: SPACES ONLY, bozo + fprintf(stderr, "gtf_to_sam v%s\n", PACKAGE_VERSION); + fprintf(stderr, "linked against Boost version %d\n", BOOST_VERSION); + fprintf(stderr, "-----------------------------\n"); + fprintf(stderr, "Usage: cufflinks [options] \n"); + fprintf(stderr, "Options:\n\n"); + fprintf(stderr, "-r/--reference-seq reference fasta file [ default: NULL ]\n"); + fprintf(stderr, "-F/--raw-fpkm use FPKM instead of isoform fraction \n"); +} + +int parse_options(int argc, char** argv) +{ + int option_index = 0; + int next_option; + do { + next_option = getopt_long(argc, argv, short_options, long_options, &option_index); + switch (next_option) { + case -1: /* Done with options. */ + break; + case 'r': + { + fasta_dir = optarg; + break; + } + case 'F': + { + raw_fpkm = true; + break; + } + default: + print_usage(); + return 1; + } + } while(next_option != -1); + + return 0; +} + +void print_scaff_as_sam(FILE* sam_out, + const RefSequenceTable& rt, + const Scaffold& scaff) +{ + string seq; + string quals; + + seq = "*"; + quals = "*"; + + uint32_t sam_flag = 0; + if (scaff.strand() == CUFF_REV) + { + sam_flag |= 0x0010; // BAM_FREVERSE +// if (sequence) +// { +// reverse_complement(seq); +// reverse(quals.begin(), quals.end()); +// } + } + + uint32_t sam_pos = scaff.left() + 1; + uint32_t map_quality = 255; + char cigar[8192]; + cigar[0] = 0; + string mate_ref_name = "*"; + uint32_t mate_pos = 0; + uint32_t insert_size = 0; + + const vector& ops = scaff.augmented_ops(); + for (size_t c = 0; c < ops.size(); ++c) + { + char ibuf[64]; + sprintf(ibuf, "%d", ops[c].genomic_length); + switch(ops[c].opcode) + { + case CUFF_MATCH: + strcat(cigar, ibuf); + strcat(cigar, "M"); + break; + case CUFF_INTRON: + strcat(cigar, ibuf); + strcat(cigar, "N"); + break; + default: + fprintf(stderr, "Warning: Transcript %s contains an unconvertible alignment operator, skipping\n", scaff.annotated_trans_id().c_str()); + return; + break; + } + } + + //string q = string(bh.read_len(), '!'); + //string s = string(bh.read_len(), 'N'); + + const char* ref_name = rt.get_name(scaff.ref_id()); + if (!ref_name) + { + fprintf(stderr, "Warning: Could not find contig name for ID %d, skipping\n", scaff.ref_id()); + return; + } + + if (scaff.annotated_trans_id() == "") + { + fprintf(stderr, "Warning: transcript_id attribute is empty, skipping\n"); + return; + } + + fprintf(sam_out, + "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s", + scaff.annotated_trans_id().c_str(), + sam_flag, + ref_name, + sam_pos, + map_quality, + cigar, + mate_ref_name.c_str(), + mate_pos, + insert_size, + seq.c_str(), + quals.c_str()); + + if (scaff.strand() != CUFF_STRAND_UNKNOWN) + { + fprintf(sam_out, + "\tXS:A:%c", + scaff.strand() == CUFF_REV ? '-' : '+'); + } + + if (scaff.fpkm() != 0) + { + fprintf(sam_out, + "\tZF:f:%f", + scaff.fpkm()); + } + + fprintf(sam_out, "\n"); + +} + +void set_relative_fpkms(vector >& ref_mRNAs) +{ + adjacency_list G; + + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + add_vertex(G); + } + + map > gene_id_idxs; + + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + pair >::iterator, bool> inserted; + inserted = gene_id_idxs.insert(make_pair(ref_mRNAs[i]->annotated_gene_id(), vector())); + inserted.first->second.push_back(i); + } + + for (map >::iterator itr = gene_id_idxs.begin(); + itr != gene_id_idxs.end(); + ++itr) + { + vector& gene = itr->second; + for (size_t i = 0; i < gene.size(); ++i) + { + for (size_t j = 0; j < gene.size(); ++j) + { + { + add_edge(gene[i], gene[j], G); + } + } + } + } + + std::vector component(num_vertices(G)); + connected_components(G, &component[0]); + + vector > clusters(ref_mRNAs.size(), + vector(ref_mRNAs.size(), false)); + + //vector > cluster_indices(three_prime_ends.size()); + + vector > > grouped_scaffolds(ref_mRNAs.size()); + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + clusters[component[i]][i] = true; + grouped_scaffolds[component[i]].push_back(ref_mRNAs[i]); + } + + for (size_t i = 0; i < grouped_scaffolds.size(); ++i) + { + vector >& gene = grouped_scaffolds[i]; + + double total_fpkm = 0.0; + foreach(shared_ptr scaff, gene) + { + total_fpkm += scaff->fpkm(); + } + if (total_fpkm > 0) + { + foreach (shared_ptr scaff, gene) + { + scaff->fpkm(scaff->fpkm() / total_fpkm); + } + } + } +} + +void driver(vector ref_gtf_files, FILE* sam_out) +{ + ReadTable it; + RefSequenceTable rt(true, false); + + vector > > ref_mRNA_table; + vector > > sample_count_table; + + foreach (FILE* ref_gtf, ref_gtf_files) + { + vector > ref_mRNAs; + ::load_ref_rnas(ref_gtf, rt, ref_mRNAs, false, true); + ref_mRNA_table.push_back(ref_mRNAs); + } + + for (size_t j = 0; j < ref_mRNA_table.size(); ++j) + { + vector > ref_mRNAs = ref_mRNA_table[j]; + + if (!raw_fpkm) + set_relative_fpkms(ref_mRNAs); + + for (size_t i = 0; i < ref_mRNAs.size(); ++i) + { + print_scaff_as_sam(sam_out, rt, *ref_mRNA_table[j][i]); + } + } +} + +int main(int argc, char** argv) +{ + init_library_table(); + + int parse_ret = parse_options(argc,argv); + if (parse_ret) + return parse_ret; + + + if(optind >= argc) + { + print_usage(); + return 1; + } + + string ref_gtf_in_filenames = argv[optind++]; + + if(optind >= argc) + { + print_usage(); + return 1; + } + + string sam_out_filename = argv[optind++]; + + vector ref_gtf_filenames; + tokenize(ref_gtf_in_filenames, ",", ref_gtf_filenames); + + vector ref_gtf_files; + + foreach (const string& ref_gtf_in_filename, ref_gtf_filenames) + { + FILE* ref_gtf = NULL; + if (ref_gtf_in_filename != "") + { + ref_gtf = fopen(ref_gtf_in_filename.c_str(), "r"); + if (!ref_gtf) + { + fprintf(stderr, "Error: cannot open GTF file %s for reading\n", + ref_gtf_in_filename.c_str()); + exit(1); + } + ref_gtf_files.push_back(ref_gtf); + } + } + + FILE* sam_out = NULL; + if (sam_out_filename != "") + { + sam_out = fopen(sam_out_filename.c_str(), "w"); + if (!sam_out) + { + fprintf(stderr, "Error: cannot open SAM file %s for writing\n", + sam_out_filename.c_str()); + exit(1); + } + } + + driver(ref_gtf_files, sam_out); + + return 0; +} diff --git a/src/gtf_tracking.cpp b/src/gtf_tracking.cpp new file mode 100644 index 0000000..213f60f --- /dev/null +++ b/src/gtf_tracking.cpp @@ -0,0 +1,698 @@ +/* + * gtf_tracking.cpp + * cufflinks + * + * Created by Cole Trapnell on 9/5/09. + * Copyright 2009 Geo Pertea. All rights reserved. + * + */ + +#include "gtf_tracking.h" + +bool gtf_tracking_verbose = false; +bool gtf_tracking_largeScale=false; //many input Cufflinks files processed at once by cuffcompare, discard exon attributes + +int GXConsensus::count=0; + +char* getGSeqName(int gseq_id) { + return GffObj::names->gseqs.getName(gseq_id); +} + +int cmpByPtr(const pointer p1, const pointer p2) { + return (p1>p2) ? 1: ((p1==p2)? 0 : -1); + } + +bool betterRef(GffObj* a, GffObj* b) { + if (a==NULL || b==NULL) return (a!=NULL); + if (a->exons.Count()!=b->exons.Count()) return (a->exons.Count()>b->exons.Count()); + if (a->hasCDS() && !b->hasCDS()) + return true; + else { + if (b->hasCDS() && !a->hasCDS()) return false; + return (a->covlen>b->covlen); + } + } + +GffObj* is_RefDup(GffObj* m, GList& mrnas, int& dupidx) { + //mrnas MUST be sorted by start coordinate + int ovlen=0; + dupidx=-1; + if (mrnas.Count()==0) return NULL; + int nidx=qsearch_mrnas(m->end, mrnas); + if (nidx==0) return NULL; + if (nidx==-1) nidx=mrnas.Count();//all can overlap + for (int i=nidx-1;i>=0;i--) { + GffObj& omrna=*mrnas[i]; + if (m->start>omrna.end) { + if (m->start-omrna.start>GFF_MAX_EXON) break; //give up already + continue; + } + if (omrna.start>m->end) continue; //this should never be the case if nidx was found correctly + //locus overlap here: + if (tMatch(*m, omrna, ovlen, false, true)) { + dupidx=i; + return mrnas[i]; + } + } + return NULL; +} + + +bool intronRedundant(GffObj& ti, GffObj& tj) { + //two transcripts are "intron redundant" iff one transcript's intron chain + // is a sub-chain of the other's + int imax=ti.exons.Count()-1; + int jmax=tj.exons.Count()-1; + if (imax==0 || jmax==0) return false; //don't deal with single-exon transcripts here + if (ti.exons[imax]->startend || + tj.exons[jmax]->startend ) + return false; //intron chains do not overlap at all + + uint eistart=0, eiend=0, ejstart=0, ejend=0; //exon boundaries + int i=1; //exon idx to the right of the current intron of ti + int j=1; //exon idx to the right of the current intron of tj + //find the first intron overlap: + while (i<=imax && j<=jmax) { + eistart=ti.exons[i-1]->end; + eiend=ti.exons[i]->start; + ejstart=tj.exons[j-1]->end; + ejend=tj.exons[j]->start; + if (ejend1 && j>1) || i>imax || j>jmax) { + return false; //either no intron overlaps found at all + //or it's not the first intron for at least one of the transcripts + } + if (eistart!=ejstart || eiend!=ejend) return false; //not an exact intron match + //we have the first matching intron on the left + if (j>i) { + //i==1, ti's start must not conflict with the previous intron of tj + if (ti.startstart) return false; + //so i's first intron starts AFTER j's first intron + // then j must contain i, so i's last intron must end with or before j's last intron + if (ti.exons[imax]->start>tj.exons[jmax]->start) return false; + //comment out the line above if you just want "intron compatibility" (i.e. extension of intron chains ) + } + else if (i>j) { + //j==1, tj's start must not conflict with the previous intron of ti + if (tj.startstart) return false; + //so j's intron chain starts AFTER i's + // then i must contain j, so j's last intron must end with or before j's last intron + if (tj.exons[jmax]->start>ti.exons[imax]->start) return false; + //comment out the line above for just "intronCompatible()" check + } + //now check if the rest of the introns overlap, in the same sequence + i++; + j++; + while (i<=imax && j<=jmax) { + if (ti.exons[i-1]->end!=tj.exons[j-1]->end || + ti.exons[i]->start!=tj.exons[j]->start) return false; + i++; + j++; + } + i--; + j--; + if (i==imax && jtj.exons[j]->end) return false; + } + else if (j==jmax && iti.exons[i]->end) return false; + } + return true; +} + +bool t_contains(GffObj& a, GffObj& b) { + //returns true if b's intron chain (or single exon) is included in a + if (b.exons.Count()>=a.exons.Count()) return false; + if (b.exons.Count()==1) { + //check if b is contained in any of a's exons: + for (int i=0;i=a.exons[i]->start && b.end<=a.exons[i]->end) return true; + } + return false; + } + if (intronRedundant(a,b)) { + //intronRedudant allows b's initial/terminal exons to extend beyond a's boundaries + //but we don't allow this kind of behavior here + return (b.start>=a.start && b.end<=a.end); + } + else return false; + } + +int is_Redundant(GffObj*m, GList* mrnas) { + //first locate the list index of the mrna starting just ABOVE + //the end of this mrna + if (mrnas->Count()==0) return -1; + int nidx=qsearch_mrnas(m->end, *mrnas); + if (nidx==0) return -1; + if (nidx==-1) nidx=mrnas->Count();//all can overlap + for (int i=nidx-1;i>=0;i--) { + GffObj& omrna=*mrnas->Get(i); + if (m->start>omrna.end) { + if (m->start-omrna.start>GFF_MAX_EXON) break; //give up already + continue; + } + if (omrna.start>m->end) continue; //this should never be the case if nidx was found correctly + + if (intronRedundant(*m, omrna)) return i; + } + return -1; +} + +bool t_dominates(GffObj* a, GffObj* b) { + // for redundant / intron compatible transfrags: + // returns true if a "dominates" b, i.e. a has more exons or is longer + if (a->exons.Count()==b->exons.Count()) + return (a->covlen>b->covlen); + else return (a->exons.Count()>b->exons.Count()); +} + +bool betterDupRef(GffObj* a, GffObj* b) { + if (a->exons.Count()!=b->exons.Count()) + return (a->exons.Count()>b->exons.Count()); + if (a->hasCDS()!=b->hasCDS()) + return (a->hasCDS()>b->hasCDS()); + //for annotation purposes, it's more important to keep the + //longer transcript, instead of the one that was loaded first + if (a->covlen != b->covlen) + return (a->covlen > b->covlen); + else return (a->track_id < b->track_id); +} + +int parse_mRNAs(GfList& mrnas, + GList& glstdata, + bool is_ref_set, + bool check_for_dups, + int qfidx, bool only_multiexon) { + int refdiscarded=0; //ref duplicates discarded + int tredundant=0; //cufflinks redundant transcripts discarded + for (int k=0;kgseq_id); + GSeqData* gdata=NULL; + uint tlen=m->len(); + if (m->hasErrors() || (tlen+500>GFF_MAX_LOCUS)) { //should probably report these in a file too.. + if (gtf_tracking_verbose) + GMessage("Warning: transcript %s discarded (structural errors found, length=%d).\n", m->getID(), tlen); + continue; + } + if (only_multiexon && m->exons.Count()<2) { + continue; + } + //GStr feature(m->getFeatureName()); + //feature.lower(); + //bool gene_or_locus=(feature.endsWith("gene") ||feature.index("loc")>=0); + //if (m->exons.Count()==0 && gene_or_locus) { + if (m->isDiscarded()) { + //discard generic "gene" or "locus" features with no other detailed subfeatures + //if (gtf_tracking_verbose) + // GMessage("Warning: discarding GFF generic gene/locus container %s\n",m->getID()); + continue; + } + if (m->exons.Count()==0) { + //if (gtf_tracking_verbose) + // GMessage("Warning: %s %s found without exon segments (adding default exon).\n",m->getFeatureName(), m->getID()); + m->addExon(m->start,m->end); + } + if (glstdata.Found(&f,i)) gdata=glstdata[i]; + else { + gdata=new GSeqData(m->gseq_id); + glstdata.Add(gdata); + } + + double fpkm=0; + double cov=0; + double conf_hi=0; + double conf_lo=0; + + GList* target_mrnas=NULL; + if (is_ref_set) { //-- ref transcripts + if (m->strand=='.') { + //unknown strand - discard from reference set (!) + continue; + } + target_mrnas=(m->strand=='+') ? &(gdata->mrnas_f) : &(gdata->mrnas_r); + if (check_for_dups) { + //check all gdata->mrnas_r (ref_data) for duplicate ref transcripts + int rpidx=-1; + GffObj* rp= is_RefDup(m, *target_mrnas, rpidx); + if (rp!=NULL) { //duplicate found + //discard one of them + //but let's keep the gene_name if present + //DEBUG: + //GMessage("Ref duplicates: %s = %s\n", rp->getID(), m->getID()); + refdiscarded++; + if (betterDupRef(rp, m)) { + if (rp->getGeneName()==NULL && m->getGeneName()!=NULL) { + rp->setGeneName(m->getGeneName()); + } + continue; + } + else { + if (m->getGeneName()==NULL && rp->getGeneName()!=NULL) { + m->setGeneName(rp->getGeneName()); + } + ((CTData*)(rp->uptr))->mrna=NULL; + rp->isUsed(false); + target_mrnas->Forget(rpidx); + target_mrnas->Delete(rpidx); + } + } + } //check for duplicate ref transcripts + } //ref transcripts + else { //-- transfrags + if (m->strand=='+') { target_mrnas = &(gdata->mrnas_f); } + else if (m->strand=='-') { target_mrnas=&(gdata->mrnas_r); } + else { m->strand='.'; target_mrnas=&(gdata->umrnas); } + if (check_for_dups) { //check for redundancy + // check if there is a redundancy between this and another already loaded Cufflinks transcript + int cidx = is_Redundant(m, target_mrnas); + if (cidx>=0) { + //always discard the redundant transcript with the fewer exons OR shorter + if (t_dominates(target_mrnas->Get(cidx),m)) { + //new transcript is shorter, discard it + continue; + } + else { + //discard the older transfrag + ((CTData*)(target_mrnas->Get(cidx)->uptr))->mrna=NULL; + target_mrnas->Get(cidx)->isUsed(false); + target_mrnas->Forget(cidx); + target_mrnas->Delete(cidx); + //the uptr (CTData) pointer will still be kept in gdata->ctdata and deallocated eventually + } + tredundant++; + } + }// redundant transfrag check + if (m->gscore==0.0) + m->gscore=m->exons[0]->score; //Cufflinks exon score = isoform abundance + //const char* expr = (gtf_tracking_largeScale) ? m->getAttr("FPKM") : m->exons[0]->getAttr(m->names,"FPKM"); + const char* expr = m->getAttr("FPKM"); + if (expr!=NULL) { + if (expr[0]=='"') expr++; + fpkm=strtod(expr, NULL); + } else { //backward compatibility: read RPKM if FPKM not found + //expr=(gtf_tracking_largeScale) ? m->getAttr("RPKM") : m->exons[0]->getAttr(m->names,"RPKM"); + expr=m->getAttr("RPKM"); + if (expr!=NULL) { + if (expr[0]=='"') expr++; + fpkm=strtod(expr, NULL); + } + } + //const char* scov=(gtf_tracking_largeScale) ? m->getAttr("cov") : m->exons[0]->getAttr(m->names,"cov"); + const char* scov=m->getAttr("cov"); + if (scov!=NULL) { + if (scov[0]=='"') scov++; + cov=strtod(scov, NULL); + } + //const char* sconf_hi=(gtf_tracking_largeScale) ? m->getAttr("conf_hi") : m->exons[0]->getAttr(m->names,"conf_hi"); + const char* sconf_hi=m->getAttr("conf_hi"); + if (sconf_hi!=NULL){ + if (sconf_hi[0]=='"') sconf_hi++; + conf_hi=strtod(sconf_hi, NULL); + } + //const char* sconf_lo=(gtf_tracking_largeScale) ? m->getAttr("conf_lo") : m->exons[0]->getAttr(m->names,"conf_lo"); + const char* sconf_lo=m->getAttr("conf_lo"); + if (sconf_lo!=NULL) { + if (sconf_lo[0]=='"') sconf_lo++; + conf_lo=strtod(sconf_lo, NULL); + } + } //Cufflinks transfrags + target_mrnas->Add(m); + m->isUsed(true); + CTData* mdata=new CTData(m); + mdata->qset=qfidx; + gdata->tdata.Add(mdata); + if (!is_ref_set) { + // Cufflinks - attributes parsing + mdata->FPKM=fpkm; + mdata->cov=cov; + mdata->conf_hi=conf_hi; + mdata->conf_lo=conf_lo; + } + }//for each mrna read + //if (mrna_deleted>0) + // mrnas.Pack(); + + return (is_ref_set ? refdiscarded : tredundant); +} + +bool tMatch(GffObj& a, GffObj& b, int& ovlen, bool fuzzunspl, bool contain_only) { + //strict intron chain match, or single-exon perfect match + int imax=a.exons.Count()-1; + int jmax=b.exons.Count()-1; + ovlen=0; + if (imax!=jmax) return false; //different number of introns + if (imax==0) { //single-exon mRNAs + if (contain_only) { + return ((a.start>=b.start && a.end<=b.end) || + (b.start>=a.start && b.end<=a.end)); + } + if (fuzzunspl) { + //fuzz match for single-exon transfrags: + // it's a match if they overlap at least 80% of shortest one + ovlen=a.exons[0]->overlapLen(b.exons[0]); + int maxlen=GMAX(a.covlen,b.covlen); + return (ovlen>=maxlen*0.8); + } + else { + //only exact match, or strictly contained + ovlen=a.covlen; + return (a.exons[0]->start==b.exons[0]->start && + a.exons[0]->end==b.exons[0]->end); + } + } + if ( a.exons[imax]->startend || + b.exons[jmax]->startend ) + return false; //intron chains do not overlap at all + //check intron overlaps + ovlen=a.exons[0]->end-(GMAX(a.start,b.start))+1; + ovlen+=(GMIN(a.end,b.end))-a.exons.Last()->start; + for (int i=1;i<=imax;i++) { + if (ilen(); + if ((a.exons[i-1]->end!=b.exons[i-1]->end) || + (a.exons[i]->start!=b.exons[i]->start)) { + return false; //intron mismatch + } + } + if (contain_only) + return ((a.start>=b.start && a.end<=b.end) || + (b.start>=a.start && b.end<=a.end)); + else return true; +} + + +void cluster_mRNAs(GList & mrnas, GList & loci, int qfidx) { + //mrnas sorted by start coordinate + //and so are the loci + //int rdisc=0; + for (int t=0;t mrgloci(false); + GffObj* mrna=mrnas[t]; + int lfound=0; //count of parent loci + /*for (int l=0;lendexons.First()->start) continue; + if (loci[l]->start>mrna->exons.Last()->end) break; */ + for (int l=loci.Count()-1;l>=0;l--) { + if (loci[l]->endexons.First()->start) { + if (mrna->exons.First()->start-loci[l]->start > GFF_MAX_LOCUS) break; + continue; + } + if (loci[l]->start>mrna->exons.Last()->end) continue; + //here we have mrna overlapping loci[l] + if (loci[l]->add_mRNA(mrna)) { + //a parent locus was found + lfound++; + mrgloci.Add(l); //locus indices added here, in decreasing order + } + }//loci loop + //if (lfound<0) continue; //mrna was a ref duplicate, skip it + if (lfound==0) { + //create a locus with only this mRNA + loci.Add(new GLocus(mrna, qfidx)); + } + else if (lfound>1) { + //more than one locus found parenting this mRNA, merge loci + lfound--; + for (int l=0;laddMerge(*loci[mlidx], mrna); + loci.Delete(mlidx); + } + } + }//mrnas loop + //if (rdisc>0) mrnas.Pack(); + //return rdisc; +} + +int fix_umrnas(GSeqData& seqdata, GSeqData* rdata, FILE* fdis=NULL) { + //attempt to find the strand for seqdata.umrnas + //based on a) overlaps with oriented reference mRNAs if present + // b) overlaps with oriented mRNAs from the same input set + if (rdata!=NULL) { //we have reference mrnas + for (int i=0;imrnas_f.Count();i++) { + for (int j=0;jmrnas_f[i]->gseq_id!=seqdata.umrnas[j]->gseq_id) continue; + if (seqdata.umrnas[j]->strand!='.') continue; + uint ustart=seqdata.umrnas[j]->exons.First()->start; + uint uend=seqdata.umrnas[j]->exons.Last()->end; + uint rstart=rdata->mrnas_f[i]->exons.First()->start; + uint rend=rdata->mrnas_f[i]->exons.Last()->end; + if (ustart>rend) break; + if (rstart>uend) continue; + if (rdata->mrnas_f[i]->exonOverlap(ustart,uend)) { + seqdata.umrnas[j]->strand='+'; + } + else { //within intron + //if (seqdata.umrnas[j]->ulink==NULL || + // seqdata.umrnas[j]->ulink->covlenmrnas_f[i]->covlen) { + CTData* mdata=(CTData*)seqdata.umrnas[j]->uptr; + mdata->addOvl('i',rdata->mrnas_f[i]); + } + } + } + for (int i=0;imrnas_r.Count();i++) { + for (int j=0;jstrand!='.') continue; + uint ustart=seqdata.umrnas[j]->exons.First()->start; + uint uend=seqdata.umrnas[j]->exons.Last()->end; + uint rstart=rdata->mrnas_r[i]->exons.First()->start; + uint rend=rdata->mrnas_r[i]->exons.Last()->end; + if (ustart>rend) break; + if (rstart>uend) continue; + if (rdata->mrnas_r[i]->exonOverlap(ustart,uend)) { + seqdata.umrnas[j]->strand='-'; + } + else { //within intron + CTData* mdata=(CTData*)seqdata.umrnas[j]->uptr; + mdata->addOvl('i',rdata->mrnas_r[i]); + } + + } + } + }//we have reference transcripts + //---- now compare to other transcripts + for (int i=0;istrand!='.') continue; + uint ustart=seqdata.umrnas[j]->exons.First()->start; + uint uend=seqdata.umrnas[j]->exons.Last()->end; + uint rstart=seqdata.mrnas_f[i]->exons.First()->start; + uint rend=seqdata.mrnas_f[i]->exons.Last()->end; + if (ustart>rend) break; + if (rstart>uend) continue; + if (seqdata.mrnas_f[i]->exonOverlap(ustart,uend)) { + seqdata.umrnas[j]->strand='+'; + } + } + } + for (int i=0;istrand!='.') continue; + uint ustart=seqdata.umrnas[j]->exons.First()->start; + uint uend=seqdata.umrnas[j]->exons.Last()->end; + uint rstart=seqdata.mrnas_r[i]->exons.First()->start; + uint rend=seqdata.mrnas_r[i]->exons.Last()->end; + if (ustart>rend) break; + if (rstart>uend) continue; + //overlap + if (seqdata.mrnas_r[i]->exonOverlap(ustart,uend)) { + seqdata.umrnas[j]->strand='-'; + } + } + } + int fcount=0; + for (int i=0;istrand=='+') { + seqdata.mrnas_f.Add(seqdata.umrnas[i]); + seqdata.umrnas.Forget(i); + } + else if (seqdata.umrnas[i]->strand=='-') { + seqdata.mrnas_r.Add(seqdata.umrnas[i]); + seqdata.umrnas.Forget(i); + } + else { //discard mRNAs not settled + seqdata.umrnas[i]->strand='.'; + if (fdis!=NULL) { + seqdata.umrnas[i]->printGtf(fdis); + } + fcount++; + } + } + seqdata.umrnas.Pack(); + return fcount; +} + +//retrieve ref_data for a specific genomic sequence +GSeqData* getRefData(int gid, GList& ref_data) { + int ri=-1; + GSeqData f(gid); + GSeqData* r=NULL; + if (ref_data.Found(&f,ri)) + r=ref_data[ri]; + return r; +} + +void read_transcripts(FILE* f, GList& seqdata, bool keepAttrs) { + rewind(f); + GffReader gffr(f, true); //loading only recognizable transcript features + gffr.showWarnings(gtf_tracking_verbose); + + // keepAttrs mergeCloseExons noExonAttrs + gffr.readAll(keepAttrs, true, true); + + // is_ref? check_for_dups, + parse_mRNAs(gffr.gflst, seqdata, false, false); +} + +int cmpGSeqByName(const pointer p1, const pointer p2) { + return strcmp(((GSeqData*)p1)->gseq_name, ((GSeqData*)p2)->gseq_name); +} + +void sort_GSeqs_byName(GList& seqdata) { + seqdata.setSorted(&cmpGSeqByName); +} + +void read_mRNAs(FILE* f, GList& seqdata, GList* ref_data, + bool check_for_dups, int qfidx, const char* fname, bool only_multiexon) { + //>>>>> read all transcripts/features from a GTF/GFF3 file + //int imrna_counter=0; + int loci_counter=0; + if (ref_data==NULL) ref_data=&seqdata; + bool isRefData=(&seqdata==ref_data); + //(f, transcripts_only) + GffReader* gffr=new GffReader(f, true); //load only transcript annotations + gffr->showWarnings(gtf_tracking_verbose); + // keepAttrs mergeCloseExons noExonAttrs + gffr->readAll(!isRefData, true, isRefData || gtf_tracking_largeScale); + //so it will read exon attributes only for low number of Cufflinks files + + int d=parse_mRNAs(gffr->gflst, seqdata, isRefData, check_for_dups, qfidx,only_multiexon); + if (gtf_tracking_verbose && d>0) { + if (isRefData) GMessage(" %d duplicate reference transcripts discarded.\n",d); + else GMessage(" %d redundant cufflinks transfrags discarded.\n",d); + } + //imrna_counter=gffr->mrnas.Count(); + delete gffr; //free the extra memory and unused GffObjs + + //for each genomic sequence, cluster transcripts + int discarded=0; + GStr bname(fname); + GStr s; + if (!bname.is_empty()) { + int di=bname.rindex('.'); + if (di>0) bname.cut(di); + int p=bname.rindex('/'); + if (p<0) p=bname.rindex('\\'); + if (p>=0) bname.remove(0,p); + } + FILE* fdis=NULL; + FILE* frloci=NULL; + + for (int g=0;gget_gseqid(); + if (!isRefData) { //cufflinks data, find corresponding ref data + GSeqData* rdata=getRefData(gseq_id, *ref_data); + if (rdata!=NULL && seqdata[g]->umrnas.Count()>0) { + discarded+=fix_umrnas(*seqdata[g], rdata, fdis); + } + } + //>>>>> group mRNAs into locus-clusters (based on exon overlap) + cluster_mRNAs(seqdata[g]->mrnas_f, seqdata[g]->loci_f, qfidx); + cluster_mRNAs(seqdata[g]->mrnas_r, seqdata[g]->loci_r, qfidx); + if (!isRefData) { + cluster_mRNAs(seqdata[g]->umrnas, seqdata[g]->nloci_u, qfidx); + } + loci_counter+=seqdata[g]->loci_f.Count(); + loci_counter+=seqdata[g]->loci_r.Count(); +// if (refData) { +// if (frloci==NULL) { +// s=bname; +// s.append(".loci.lst"); +// frloci=fopen(s.chars(), "w"); +// } +// writeLoci(frloci, seqdata[g]->loci_f); +// writeLoci(frloci, seqdata[g]->loci_r); +// }//write ref loci + }//for each genomic sequence + if (fdis!=NULL) fclose(fdis); + if (frloci!=NULL) fclose(frloci); + if (discarded>0) { + if (gtf_tracking_verbose) GMessage("Found %d transcripts with undetermined strand.\n", discarded); + } + else { if (fdis!=NULL) remove(s.chars()); } +} + +int qsearch_mrnas(uint x, GList& mrnas) { + //binary search + //do the simplest tests first: + if (mrnas[0]->start>x) return 0; + if (mrnas.Last()->start>1; + istart=mrnas[i]->start; + if (istart < x) l = i + 1; + else { + if (istart == x) { //found matching coordinate here + idx=i; + while (idx<=maxh && mrnas[idx]->start==x) { + idx++; + } + return (idx>maxh) ? -1 : idx; + } + h = i - 1; + } + } //while + idx = l; + while (idx<=maxh && mrnas[idx]->start<=x) { + idx++; + } + return (idx>maxh) ? -1 : idx; +} + +int qsearch_loci(uint x, GList& loci) { + // same as above, but for GSeg lists + //binary search + //do the simplest tests first: + if (loci[0]->start>x) return 0; + if (loci.Last()->start> 1; + istart=loci[i]->start; + if (istart < x) l=i+1; + else { + if (istart == x) { //found matching coordinate here + idx=i; + while (idx<=maxh && loci[idx]->start==x) { + idx++; + } + return (idx>maxh) ? -1 : idx; + } + h=i-1; + } + } //while + idx = l; + while (idx<=maxh && loci[idx]->start<=x) { + idx++; + } + return (idx>maxh) ? -1 : idx; +} + diff --git a/src/gtf_tracking.h b/src/gtf_tracking.h new file mode 100644 index 0000000..6629d52 --- /dev/null +++ b/src/gtf_tracking.h @@ -0,0 +1,1315 @@ +#ifndef GTF_TRACKING_H +#define GTF_TRACKING_H +/* + * gtf_tracking.h + * cufflinks + * + * Created by Cole Trapnell on 9/5/09. + * Copyright 2009 Geo Pertea. All rights reserved. + * + */ + +#include "gff.h" +#include "GFaSeqGet.h" +#include "GFastaIndex.h" +#include "GStr.h" + + +#define MAX_QFILES 500 + +extern bool gtf_tracking_verbose; + +extern bool gtf_tracking_largeScale; +//many input files, no accuracy stats are generated, no *.tmap +// and exon attributes are discarded + +int cmpByPtr(const pointer p1, const pointer p2); + +bool t_contains(GffObj& a, GffObj& b); +//returns true only IF b has fewer exons than a AND a "contains" b + +char* getGSeqName(int gseq_id); + +//genomic fasta sequence handling +class GFastaHandler { + public: + char* fastaPath; + GFastaIndex* faIdx; + char* getFastaFile(int gseq_id) { + if (fastaPath==NULL) return NULL; + GStr s(fastaPath); + s.trimR('/'); + s.appendfmt("/%s",getGSeqName(gseq_id)); + GStr sbase(s); + if (!fileExists(s.chars())) s.append(".fa"); + if (!fileExists(s.chars())) s.append("sta"); + if (fileExists(s.chars())) return Gstrdup(s.chars()); + else { + GMessage("Warning: cannot find genomic sequence file %s{.fa,.fasta}\n",sbase.chars()); + return NULL; + } + } + + GFastaHandler(const char* fpath=NULL) { + fastaPath=NULL; + faIdx=NULL; + if (fpath!=NULL && fpath[0]!=0) init(fpath); + } + + void init(const char* fpath) { + if (fpath==NULL || fpath[0]==0) return; + if (!fileExists(fpath)) + GError("Error: file/directory %s does not exist!\n",fpath); + fastaPath=Gstrdup(fpath); + if (fastaPath!=NULL) { + if (fileExists(fastaPath)>1) { //exists and it's not a directory + GStr fainame(fastaPath); + //the .fai name might have been given directly + if (fainame.rindex(".fai")==fainame.length()-4) { + //.fai index file given directly + fastaPath[fainame.length()-4]=0; + if (!fileExists(fastaPath)) + GError("Error: cannot find fasta file for index %s !\n", fastaPath); + } + else fainame.append(".fai"); + //fainame.append(".fai"); + faIdx=new GFastaIndex(fastaPath,fainame.chars()); + GStr fainamecwd(fainame); + int ip=-1; + if ((ip=fainamecwd.rindex('/'))>=0) + fainamecwd.cut(0,ip+1); + if (!faIdx->hasIndex()) { //could not load index + //try current directory + if (fainame!=fainamecwd) { + if (fileExists(fainamecwd.chars())>1) { + faIdx->loadIndex(fainamecwd.chars()); + } + } + } //tried to load index + if (!faIdx->hasIndex()) { + GMessage("No fasta index found for %s. Rebuilding, please wait..\n",fastaPath); + faIdx->buildIndex(); + if (faIdx->getCount()==0) GError("Error: no fasta records found!\n"); + GMessage("Fasta index rebuilt.\n"); + FILE* fcreate=fopen(fainame.chars(), "w"); + if (fcreate==NULL) { + GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fainame.chars()); + if (fainame!=fainamecwd) fcreate=fopen(fainamecwd.chars(), "w"); + if (fcreate==NULL) + GError("Error: cannot create fasta index %s!\n", fainamecwd.chars()); + } + if (faIdx->storeIndex(fcreate)getCount()) + GMessage("Warning: error writing the index file!\n"); + } //index created and attempted to store it + } //multi-fasta + } //genomic sequence given + } + GFaSeqGet* fetch(int gseq_id, bool checkFasta=false) { + if (fastaPath==NULL) return NULL; + //genomic sequence given + GFaSeqGet* faseq=NULL; + if (faIdx!=NULL) { //fastaPath was the multi-fasta file name + char* gseqname=getGSeqName(gseq_id); + GFastaRec* farec=faIdx->getRecord(gseqname); + if (farec!=NULL) { + faseq=new GFaSeqGet(fastaPath,farec->seqlen, farec->fpos, + farec->line_len, farec->line_blen); + faseq->loadall(); //just cache the whole sequence, it's faster + } + else { + GMessage("Warning: couldn't find fasta record for '%s'!\n",gseqname); + return NULL; + } + } + else //if (fileExists(fastaPath)==1) + { + char* sfile=getFastaFile(gseq_id); + if (sfile!=NULL) { + //if (gtf_tracking_verbose) + // GMessage("Processing sequence from fasta file '%s'\n",sfile); + faseq=new GFaSeqGet(sfile,checkFasta); + faseq->loadall(); + GFREE(sfile); + } + } //one fasta file per contig + return faseq; + } + + ~GFastaHandler() { + GFREE(fastaPath); + delete faIdx; + } +}; + + + +bool betterRef(GffObj* a, GffObj* b); //for better CovLink reference ranking + +class GLocus; + +class COvLink { +public: + static int coderank(char c) { + switch (c) { + case '=': return 0; //ichain match + case 'c': return 2; //containment (ichain fragment) + case 'j': return 4; // overlap with at least a junction match + case 'e': return 6; // single exon transfrag overlapping an intron of reference (possible pre-mRNA) + case 'o': return 8; // generic exon overlap + case 's': return 16; //"shadow" - an intron overlaps with a ref intron on the opposite strand + case 'x': return 18; // exon overlap on opposite strand (usually wrong strand mapping) + case 'i': return 20; // intra-intron + case 'p': return 90; //polymerase run + case 'r': return 92; //repeats + case 'u': return 94; //intergenic + case 0 : return 100; + default: return 96; + } + } + char code; + int rank; + GffObj* mrna; + int ovlen; + COvLink(char c=0,GffObj* m=NULL, int ovl=0) { + code=c; + mrna=m; + ovlen=ovl; + rank=coderank(c); + } + bool operator<(COvLink& b) { + if (rank==b.rank) + return (ovlen==b.ovlen)? betterRef(mrna, b.mrna) : (ovlen>b.ovlen); + else return rank(COvLink& b) { + if (rank==b.rank) + return (ovlen==b.ovlen)? betterRef(b.mrna, mrna) : (ovlenb.rank; + } + bool operator==(COvLink& b) { + return (rank==b.rank && mrna==b.mrna); + } +}; + +class GISeg: public GSeg { + public: + GffObj* t; //pointer to the largest transcript with a segment this exact exon coordinates + GISeg(uint s=0,uint e=0, GffObj* ot=NULL):GSeg(s,e) { t=ot; } +}; + +class GIArray:public GArray { + public: + GIArray(bool uniq=true):GArray(true,uniq) { } + int IAdd(GISeg* item) { + if (item==NULL) return -1; + int result=-1; + if (Found(*item, result)) { + if (fUnique) { + //cannot add a duplicate, return index of existing item + if (item->t!=NULL && fArray[result].t!=NULL && + item->t->covlen>fArray[result].t->covlen) + fArray[result].t=item->t; + return result; + } + } + //Found sets result to the position where the item should be + idxInsert(result, *item); + return result; + } + +}; + +class CEqList: public GList { + public: + GffObj* head; + CEqList():GList((GCompareProc*)cmpByPtr, (GFreeProc*)NULL, true) { + head=NULL; + } +}; + +class CTData { //transcript associated data +public: + GffObj* mrna; //owner transcript + GLocus* locus; + GList ovls; //overlaps with other transcripts (ref vs query) + //-- just for ichain match tracking: + GffObj* eqref; //ref transcript having an ichain match + int qset; //qry set index (qfidx), -1 means reference dataset + //GffObj* eqnext; //next GffObj in the linked list of matching transfrags + CEqList* eqlist; //keep track of matching transfrags + //int eqdata; // flags for EQ list (is it a list head?) + // Cufflinks specific data: + double FPKM; + double conf_hi; + double conf_lo; + double cov; + char classcode; //the best/final classcode + CTData(GffObj* m=NULL, GLocus* l=NULL):ovls(true,true,true) { + mrna=m; + if (mrna!=NULL) mrna->uptr=this; + locus=l; + classcode=0; + eqref=NULL; + //eqnext=NULL; + eqlist=NULL; + //eqdata=0; + qset=-2; + FPKM=0; + conf_lo=0; + conf_hi=0; + cov=0; + } + + ~CTData() { + ovls.Clear(); + //if ((eqdata & EQHEAD_TAG)!=0) delete eqlist; + if (isEqHead()) delete eqlist; + } + + //inline bool eqHead() { return ((eqdata & EQHEAD_TAG)!=0); } + bool isEqHead() { + if (eqlist==NULL) return false; + return (eqlist->head==this->mrna); + } + + void joinEqList(GffObj* m) { //add list from m + //list head is set to the transfrag with the lower qset# + CTData* md=(CTData*)(m->uptr); + //ASSERT(md); + if (eqlist==NULL) { + if (md->eqlist!=NULL) { + eqlist=md->eqlist; + eqlist->Add(this->mrna); + CTData* md_head_d=(CTData*)(md->eqlist->head->uptr); + if (this->qset < md_head_d->qset) + eqlist->head=this->mrna; + } + else { //m was not in an EQ list + //eqlist=new GList((GCompareProc*)cmpByPtr, (GFreeProc*)NULL, true); + eqlist=new CEqList(); + eqlist->Add(this->mrna); + eqlist->Add(m); + md->eqlist=eqlist; + if (qsetqset) eqlist->head=this->mrna; + else eqlist->head=m; + } + }//no eqlist before + else { //merge two eqlists + if (eqlist==md->eqlist) //already in the same eqlist, nothing to do + return; + if (md->eqlist!=NULL) { //copy elements of m's eqlist + //copy the smaller list into the larger one + CEqList* srclst, *destlst; + if (md->eqlist->Count()Count()) { + srclst=md->eqlist; + destlst=eqlist; + } + else { + srclst=eqlist; + destlst=md->eqlist; + } + for (int i=0;iCount();i++) { + destlst->Add(srclst->Get(i)); + CTData* od=(CTData*)((*srclst)[i]->uptr); + od->eqlist=destlst; + //od->eqdata=od->qset+1; + } + this->eqlist=destlst; + CTData* s_head_d=(CTData*)(srclst->head->uptr); + CTData* d_head_d=(CTData*)(destlst->head->uptr); + if (s_head_d->qset < d_head_d->qset ) + this->eqlist->head=srclst->head; + delete srclst; + } + else { //md->eqlist==NULL + eqlist->Add(m); + md->eqlist=eqlist; + CTData* head_d=(CTData*)(eqlist->head->uptr); + if (md->qsetqset) + eqlist->head=m; + } + } + } + + void addOvl(char code,GffObj* target=NULL, int ovlen=0) { + ovls.AddIfNew(new COvLink(code, target, ovlen)); + } + char getBestCode() { + return (ovls.Count()>0) ? ovls[0]->code : 0 ; + } + bool operator>(CTData& b) { return (mrna > b.mrna); } + bool operator<(CTData& b) { return (mrna < b.mrna); } + bool operator==(CTData& b) { return (mrna==b.mrna); } +}; + +class GSuperLocus; +class GTrackLocus; +class GXLocus; + +//Data structure holding a query locus data (overlapping mRNAs on the same strand) +// and also the accuracy data of all mRNAs of a query locus +// (against all reference loci overlapping the same region) +class GLocus:public GSeg { +public: + int gseq_id; //id of underlying genomic sequence + int qfidx; // for locus tracking + GTrackLocus* t_ptr; //for locus tracking cluster + GffObj* mrna_maxcov; //transcript with maximum coverage (for main "ref" transcript) + GffObj* mrna_maxscore; //transcript with maximum gscore (for major isoform) + GList mrnas; //list of transcripts (isoforms) for this locus + GArray uexons; //list of unique exons (covered segments) in this region + GArray mexons; //list of merged exons in this region + GIArray introns; + GList cmpovl; //temp list of overlapping qry/ref loci to compare to (while forming superloci) + + //only for reference loci --> keep track of all superloci found for each qry dataset + // which contain this reference locus + GList* superlst; + GXLocus* xlocus; //superlocus formed by exon overlaps across all qry datasets + // -- if genomic sequence was given: + int spl_major; // number of GT-AG splice site consensi + int spl_rare; // number of GC-AG, AT-AC and other rare splice site consensi + int spl_wrong; //number of "wrong" (unrecognized) splice site consensi + int ichains; //number of multi-exon mrnas + int ichainTP; + int ichainATP; + int mrnaTP; + int mrnaATP; + int v; //user flag/data + GLocus(GffObj* mrna=NULL, int qidx=-1):mrnas(true,false,false),uexons(true,true),mexons(true,true), + introns(), cmpovl(true,false,true) { + //this will NOT free mrnas! + ichains=0; + gseq_id=-1; + qfidx=qidx; + t_ptr=NULL; + creset(); + xlocus=NULL; + mrna_maxcov=NULL; + mrna_maxscore=NULL; + superlst=new GList(true,false,false); + if (mrna!=NULL) { + start=mrna->exons.First()->start; + end=mrna->exons.Last()->end;; + gseq_id=mrna->gseq_id; + GISeg seg; + for (int i=0;iexons.Count();i++) { + seg.start=mrna->exons[i]->start; + seg.end=mrna->exons[i]->end; + uexons.Add(seg); + mexons.Add(seg); + if (i>0) { + seg.start=mrna->exons[i-1]->end+1; + seg.end=mrna->exons[i]->start-1; + seg.t=mrna; + introns.Add(seg); + } + } + mrnas.Add(mrna); + if (mrna->exons.Count()>1) ichains++; + ((CTData*)(mrna->uptr))->locus=this; + mrna_maxscore=mrna; + mrna_maxcov=mrna; + } + } + ~GLocus() { + delete superlst; + } + void creset() { + spl_major=0;spl_rare=0;spl_wrong=0; + v=0; //visited/other data + ichainTP=0; + ichainATP=0; + mrnaTP=0; + mrnaATP=0; + cmpovl.Clear(); + } + + void addMerge(GLocus& locus, GffObj* lnkmrna) { + //add all the elements of the other locus (merging) + //-- merge mexons + GArray ovlexons(true,true); //list of locus.mexons indexes overlapping existing mexons + int i=0; //index of first mexons with a merge + int j=0; //index current mrna exon + while (iiend) { //mexons[i] end extend + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } // mexons[i] end extend + // } //exon overlap + j++; //check the next locus.mexon + } + //-- add the rest of the non-overlapping mexons: + GSeg seg; + for (int i=0;iuptr))->locus=this; + if (locus.mrnas[i]!=lnkmrna) { + mrnas.Add(locus.mrnas[i]); + if (locus.mrnas[i]->exons.Count()>1) ichains++; + } + } + // -- adjust start/end as needed + if (start>locus.start) start=locus.start; + if (endcovlencovlen) + mrna_maxcov=locus.mrna_maxcov; + if (mrna_maxscore->gscoregscore) + mrna_maxscore=locus.mrna_maxscore; + } + + + bool exonOverlap(GLocus& loc) { + //check if any mexons overlap! + int i=0; + int j=0; + while (i0 && mrna->gseq_id!=gseq_id) return false; //mrna must be on the same genomic seq + //check for exon overlap with existing mexons + //also update uexons and mexons accordingly, if mrna is added + uint mrna_start=mrna->exons.First()->start; + uint mrna_end=mrna->exons.Last()->end; + if (mrna_start>end || start>mrna_end) return false; + bool hasovl=false; + int i=0; //index of first mexons with a merge + int j=0; //index current mrna exon + GArray ovlexons(true,true); //list of mrna exon indexes overlapping mexons + while (iexons.Count()) { + uint istart=mexons[i].start; + uint iend=mexons[i].end; + uint jstart=mrna->exons[j]->start; + uint jend=mrna->exons[j]->end; + if (iendiend) { //mexon stretch up + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } //possible mexons merge + + j++; //check the next mrna exon + }//all vs all exon check loop + if (hasovl) { + GSeg seg; + //add the rest of the non-overlapping exons, + // and also to uexons etc. + for (int i=0;iexons.Count();i++) { + seg.start=mrna->exons[i]->start; + seg.end=mrna->exons[i]->end; + if (!ovlexons.Exists(i)) mexons.Add(seg); + uexons.Add(seg); + GISeg iseg; + if (i>0) { + iseg.start=mrna->exons[i-1]->end+1; + iseg.end=mrna->exons[i]->start-1; + iseg.t=mrna; + introns.Add(iseg); + } + } + + mrnas_add(mrna); + // add to mrnas + ((CTData*)mrna->uptr)->locus=this; + gseq_id=mrna->gseq_id; + if (mrna->exons.Count()>1) ichains++; + } + return hasovl; + } + + //simpler,basic adding of a mrna + void mrnas_add(GffObj* mrna) { + mrnas.Add(mrna); + // adjust start/end + if (start>mrna->start) start=mrna->start; + if (endend) end=mrna->end; + if (mrna_maxcov->covlencovlen) mrna_maxcov=mrna; + if (mrna_maxscore->gscoregscore) mrna_maxscore=mrna; + } +}; + +class GSuperLocus; +class GTrackLocus; + +class GSuperLocus : public GSeg { +public: + int qfidx; //index of query dataset/file for which this superlocus was created + GList qloci; + GList rloci; + GList qmrnas; //list of transcripts (isoforms) for this locus + GArray qmexons; //list of merged exons in this region + GArray quexons; //list of unique exons (covered segments) in this region + GIArray qintrons; //list of unique exons (covered segments) in this region + //same lists for reference: + GList rmrnas; //list of transcripts (isoforms) for this locus + GArray rmexons; //list of merged exons in this region + GArray ruexons; //list of unique exons (covered segments) in this region + GArray rintrons; //list of unique exons (covered segments) in this region + // store problematic introns for printing: + GIArray i_missed; //missed reference introns (not overlapped by any qry intron) + GIArray i_notp; //wrong ref introns (one or both ends not matching any qry intron) + // + GIArray i_qwrong; //totally wrong qry introns (not overlapped by any ref intron) + GIArray i_qnotp; //imperfect qry introns (may overlap but has no "perfect" match) + + + int qbases_all; + int rbases_all; //in fact, it's all ref bases overlapping any query loci + int in_rmrnas; //count of ALL ref mrnas and loci given for this region + int in_rloci; //not just those overlapping qry data + // this will keep track of total qry loci, mrnas and exons in an area + int total_superloci; + int total_qloci; + int total_qloci_alt; //total qloci with multiple transcripts + + int total_qmrnas; + int total_qichains; //multi exon mrnas + int total_qexons; //unique exons + int total_qmexons; + int total_qintrons; //unique introns + // these ref totals are in fact only limited to data from + // loci overlapping any of qry loci + int total_rmexons; + int total_richains; //multi exon mrnas + int total_rloci; + int total_rmrnas; + int total_rexons; + int total_rintrons; //unique introns + + //--- accuracy data after compared to ref loci: + int locusQTP; + int locusTP; + int locusAQTP; + int locusATP; // 1 if ichainATP + mrnaATP > 0 + int locusFP; + int locusAFP; + int locusAFN; + int locusFN; + //---transcript level accuracy -- all exon coordinates should match (most stringent) + int mrnaTP; // number of qry mRNAs with perfect match with ref transcripts + int mrnaFP; // number of qry mRNAs with no perfect match with a ref transcript + int mrnaFN; // number of ref mRNAs in this region having no perfect match with a qry transcript + int mrnaATP; + int mrnaAFN; + int mrnaAFP; + //---intron level accuracy (comparing the ordered set of splice sites): + int ichainTP; // number of qry intron chains covering a reference intron chain + // (covering meaning that the ordered set of reference splice sites + // is the same with a ordered subset of the query splice sites) + int ichainFP; // number of qry intron chains not covering a reference intron chain + int ichainFN; // number of ref intron chains in this region not being covered by a reference intron chain + // same as above, but approximate -- allowing a 10bp distance error for splice sites + int ichainATP; + int ichainAFP; + int ichainAFN; + //---projected features --- + //---exon level accuracy: + int exonTP; //number of perfectly overlapping exons (true positives) + int exonFP; //number of exons of query with no perfect match with a reference exon + int exonFN; //number of exons of reference with no perfect match with a query exon + // same as the above but with acceptable approximation (10bp error window): + int exonATP; + int exonAFP; + int exonAFN; + + int intronTP; //number of perfectly overlapping introns (true positives) + int intronFP; //number of introns of query with no perfect match with a reference intron + int intronFN; //number of introns of reference with no perfect match with a query intron + // same as the above but with acceptable approximation (10bp error window): + int intronATP; + int intronAFP; + int intronAFN; + + //-- EGASP added these too: + int m_exons; //number of exons totally missed (not overlapped *at all* by any query exon) + int w_exons; //numer of totally wrong exons (query exons not overlapping *at all* any reference exon) + int m_introns; //number of introns totally missed (not overlapped *at all* by any query intron) + int w_introns; //numer of totally wrong introns (query introns not overlapping *at all* any reference intron) + int m_loci; //missed loci + int w_loci; //novel/wrong loci + //---base level accuracy + int baseTP; //number of overlapping bases + int baseFP; //number of qry bases not overlapping reference + int baseFN; //number of ref bases not overlapping qry + // sorted,free,unique sorted,unique + GSuperLocus(uint lstart=0,uint lend=0):qloci(true,false,false),rloci(true,false,false), + qmrnas(true,false,false), qmexons(true,false), quexons(true,false), qintrons(false), + rmrnas(true,false,false), rmexons(true,false), ruexons(true,false), rintrons(false), + i_missed(false),i_notp(false), i_qwrong(false), i_qnotp(false){ + qfidx=-1; + start=lstart; + end=lend; + qbases_all=0; + rbases_all=0; + baseTP=0;baseFP=0;baseFN=0; + locusTP=0;locusQTP=0; locusAQTP=0; locusATP=0; + locusFP=0;locusAFP=0;locusAFN=0; + locusFN=0; + in_rmrnas=0; + in_rloci=0; + w_loci=0; + m_loci=0; + total_superloci=0; + mrnaTP=0;mrnaFP=0;mrnaFN=0; + mrnaATP=0;mrnaAFP=0;mrnaAFN=0; + ichainTP=0;ichainFP=0;ichainFN=0; + ichainATP=0;ichainAFP=0;ichainAFN=0; + exonTP=0;exonFP=0;exonFN=0; + exonATP=0;exonAFP=0;exonAFN=0; + intronTP=0;intronFP=0;intronFN=0; + intronATP=0;intronAFP=0;intronAFN=0; + total_rmexons=0; + total_qmexons=0; + total_qexons=0;total_qloci=0;total_qmrnas=0; + total_qloci_alt=0; + total_qintrons=0;total_qichains=0; + total_rexons=0;total_rloci=0;total_rmrnas=0; + total_rintrons=0;total_richains=0; + w_exons=0; + m_exons=0; + w_introns=0; + m_introns=0; + } + void addQlocus(GLocus& loc) { + if (start==0 || start>loc.start) start=loc.start; + if (end0 && loc.mrnas.Count()>1) + total_qloci_alt++; + qmrnas.Add(loc.mrnas); + total_qmrnas+=loc.mrnas.Count(); + total_qichains+=loc.ichains; + qmexons.Add(loc.mexons); + total_qmexons+=loc.mexons.Count(); + quexons.Add(loc.uexons); + total_qexons+=loc.uexons.Count(); + qintrons.Add(loc.introns); + total_qintrons+=loc.introns.Count(); + } + void addRlocus(GLocus& loc) { + if (start==0 || start>loc.start) start=loc.start; + if (end0) s.total_superloci=1; + total_superloci+=s.total_superloci; + qbases_all+=s.qbases_all; + rbases_all+=s.rbases_all; + m_loci+=s.m_loci; + w_loci+=s.w_loci; + total_qexons+=s.total_qexons; + total_qintrons+=s.total_qintrons; + total_qmexons+=s.total_qmexons; + total_rexons+=s.total_rexons; + total_rintrons+=s.total_rintrons; + total_rmexons+=s.total_rmexons; + total_qmrnas+=s.total_qmrnas; + total_qichains+=s.total_qichains; + total_rmrnas+=s.total_rmrnas; + total_richains+=s.total_richains; + total_qloci+=s.total_qloci; + total_qloci_alt+=s.total_qloci_alt; + total_rloci+=s.total_rloci; + } +}; + +class GSeqData { + int gseq_id; +public: + const char* gseq_name; + GList refs_f; //forward strand mRNAs + GList refs_r; //reverse strand mRNAs + GList mrnas_f; //forward strand mRNAs + GList mrnas_r; //reverse strand mRNAs + GList loci_f; //forward strand loci + GList loci_r; //reverse strand loci + //--> the fields below are not used by reference data -- + GList gstats_f; //stats for forward strand superloci + GList gstats_r; //stats for reverse strand superloci + GList nloci_f; //"novel" loci on forward strand + GList nloci_r; //"novel" loci on reverse strand + GList umrnas; //unknown orientation mrnas + GList nloci_u; //"novel" loci with no orientation found + + GList tdata; //transcript data (uptr holder for all mrnas here) + + int get_gseqid() { return gseq_id; } + + //--< + GSeqData(int gid=-1):mrnas_f(true,true,false),mrnas_r(true,true,false), + loci_f(true,true,true),loci_r(true,true,true), + gstats_f(true,true,false),gstats_r(true,true,false), + nloci_f(true,false,true), nloci_r(true,false,true), + umrnas(true,true,false), nloci_u(true,true,true), tdata(false,true,false) { + gseq_id=gid; + if (gseq_id>=0) + gseq_name=GffObj::names->gseqs.getName(gseq_id); + } + bool operator==(GSeqData& d){ + return (gseq_id==d.gseq_id); + } + bool operator>(GSeqData& d){ + return (gseq_id>d.gseq_id); + } + bool operator<(GSeqData& d){ + return (gseq_id { + public: + GffObj* mrna_maxcov; //transcript with maximum coverage (for largest transcript) + GffObj* mrna_maxscore; //transcript with maximum gscore ( = major isoform for Cufflinks) + uint start; + uint end; + GList qloci; + //GCluster cl; //just a more compact way of keeping all transcripts in these loci + GQCluster(GList* loci=NULL):GList(true,false,false), + qloci(true,false,false) { + mrna_maxcov=NULL; + mrna_maxscore=NULL; + start=0; + end=0; + if (loci!=NULL) { + qloci.Add(*loci); + for (int i=0;iCount();i++) { + addLocus(loci->Get(i),false); + } + } + } + void addLocus(GLocus* loc, bool toLoci=true) { + //check so we don't add locus duplicates + if (toLoci) { + for (int i=0;imrnas.Count();m++) { + GffObj* mrna=loc->mrnas[m]; + Add(mrna); + if (start==0 || start>mrna->start) start=mrna->start; + if (endend) end=mrna->end; + if (mrna_maxcov==NULL || mrna_maxcov->covlencovlen) mrna_maxcov=mrna; + if (mrna_maxscore==NULL || mrna_maxscore->gscoregscore) mrna_maxscore=mrna; + } + } +}; + +//track a set of clustered qloci across multiple qry datasets +// the qloci in qcls[] overlap but not necessarily at exon level +// (so there can be multiple genes here in fact) +class GTrackLocus:public GSeg { + public: + char strand; + bool hasQloci; + //GLocus* rloc; //corresponding reference locus, if available + GList rloci; //ref loci found overlapping this region + GQCluster* qcls[MAX_QFILES]; //all qloci for this superlocus, grouped by dataset + GTrackLocus(GLocus* qloc=NULL, int q=-1):GSeg(0,0),rloci(true,false,true) { + strand='.'; + for (int i=0;iqfidx>=0) + GError("Error: GTrackLocus::addRLocus called with a query locus (set# %d)\n", + rl->qfidx+1); + if (strand=='.') strand=rl->mrna_maxcov->strand; + if (start==0 || start>rl->start) start=rl->start; + if (end==0 || endend) end=rl->end; + rl->t_ptr=this; + rloci.Add(rl); + } + + void addQLocus(GLocus* loc, int q=-1) { //adding qry locus + if (loc==NULL) return; + if (strand=='.' && loc->mrna_maxcov->strand!='.') + strand=loc->mrna_maxcov->strand; + if (loc->qfidx<0 && q<0) + GError("Error at GTrackLocus::addQLocus(): locus.qfidx not set and index not given!\n"); + if (q>=0) loc->qfidx=q; + else q=loc->qfidx; + if (start==0 || start>loc->start) start=loc->start; + if (end==0 || endend) end=loc->end; + if (qcls[q]==NULL) qcls[q]=new GQCluster(); + hasQloci=true; + loc->t_ptr = this; + qcls[q]->addLocus(loc); + } + + bool add_Locus(GLocus* loc) { + if (start==0 || overlap(*loc)) { //simple range overlap, not exon overlap + if (loc->qfidx<0) addRLocus(loc); + else addQLocus(loc); + return true; + } + return false; + } + + + void addQCl(int q, GQCluster* qcl, GLocus* lnkloc) { + for (int i=0;iqloci.Count();i++) { + GLocus* loc=qcl->qloci[i]; + if (loc==lnkloc || loc->t_ptr==this) continue; + hasQloci=true; + loc->t_ptr=this; + qcls[q]->addLocus(loc); + } + } + + void addMerge(GTrackLocus* loctrack, int qcount, GLocus* lnkloc) { + if (loctrack==NULL) return; + //merge qloci + for (int q=0; q < qcount; q++) { + if (qcls[q]==NULL) { + if (loctrack->qcls[q]!=NULL) { + qcls[q]=loctrack->qcls[q]; + loctrack->qcls[q]=NULL; //just move pointer here + //set all t_ptr pointers for moved loci + for (int ql = 0; ql < qcls[q]->qloci.Count(); ql++) { + qcls[q]->qloci[ql]->t_ptr=this; + } + hasQloci=true; + } + } + else //existing qloci at q + if (loctrack->qcls[q]!=NULL) { //merge elements + addQCl(q, loctrack->qcls[q], lnkloc); + } + }//for each qset + //merge rloci, if any + if (loctrack->rloci.Count()>0) { + for (int l=0;lrloci.Count();l++) { + if (loctrack->rloci[l]!=lnkloc && loctrack->rloci[l]->t_ptr!=this) { + rloci.Add(loctrack->rloci[l]); + loctrack->rloci[l]->t_ptr=this; + } + } + } + if (loctrack->startstart; + if (loctrack->end>end) end=loctrack->end; + if (strand=='.' && loctrack->strand!='.') strand=loctrack->strand; + } + + /* + void add_QLoci(GList* loci, int q, GLocus& r) { + // only add loci overlapping given refloc + //rloc=&r; + if (loci==NULL) return; + for (int i=0;iCount();i++) { + GLocus* loc=loci->Get(i); + // if (!loc->exonOverlap(r)) continue; //do we really needed exon overlap? + if (!loc->overlap(r)) continue; + if (start==0 || start>loc->start) start=loc->start; + if (end==0 || endend) end=loc->end; + loc->t_ptr=this; + loc->qfidx=q; + if (qcls[q]==NULL) qcls[q]=new GQCluster(); + qcls[q]->addLocus(loc); + } + strand=r.mrnas[0]->strand; + } + */ + ~GTrackLocus() { + for (int q=0;q=MAX_QFILES) + GError("Error: qfidx index out of bounds (%d) for GTrackLocus!\n",q); + return qcls[q]; + } +}; + +class GXConsensus:public GSeg { + public: + static int count; + int id; //XConsensus ID + int tss_id; //group id for those xconsensi with shared first exon + int p_id; //group id for those xconsensi with "similar" protein + GffObj* tcons; //longest transcript to represent the combined "consensus" structure + GffObj* ref; //overlapping reference transcript + char refcode; // the code for ref relationship (like in the tracking file) + char* aa; + int aalen; + GXConsensus* contained; //if contained into another GXConsensus + //list of ichain-matching query (cufflinks) transcripts that contributed to this consensus + GList qchain; + GXConsensus(GffObj* c, CEqList* qlst, GffObj* r=NULL, char rcode=0) + :qchain(false,false,false) { + ref=r; + refcode=rcode; + tcons=c; + if (qlst!=NULL) qchain.Add(*((GList*)qlst)); + else qchain.Add(c); + count++; + tss_id=0; + p_id=0; + aalen=0; + id=count; + aa=NULL; + start=tcons->start; + end=tcons->end; + contained=NULL; + } + ~GXConsensus() { + if (aa!=NULL) GFREE(aa); + } +}; + +class GXLocus:public GSeg { + public: + int id; + int num_mtcons; //number of multi-exon "consensus" transcripts in this locus + char strand; + GList rloci; //list of ref loci overlapping any of the mexons + GList qloci; //loci from all qry datasets that have overlapping exons with this region + GArray mexons; //list of merged exonic regions for this locus + GList tcons; + GXLocus(GLocus* lfirst=NULL):GSeg(0,0), + rloci((GCompareProc*)cmpByPtr, (GFreeProc*)NULL, true), + qloci((GCompareProc*)cmpByPtr, (GFreeProc*)NULL, true), + mexons(true,true), tcons(true,true,false) { + strand='.'; + num_mtcons=0; + if (lfirst!=NULL) { + add_Locus(lfirst); + } + id=0; + } + + bool add_Locus(GLocus* loc) { + if (mexons.Count()>0 && (endstart || start > loc->end)) + return false; //no chance for overlapping exons + if (mexons.Count()==0) { + mexons.Add(loc->mexons); + start=loc->start; + end=loc->end; + if (loc->qfidx<0) rloci.Add(loc); + else qloci.Add(loc); + strand=loc->mrna_maxcov->strand; + loc->xlocus=this; + return true; + } + int f=0; + if (loc->qfidx<0) { + if (rloci.Found(loc,f)) return false; + } + else if (qloci.Found(loc,f)) return false; + + // -- merge mexons + GArray ovlexons(true,true); //list of locus.mexons indexes overlapping existing mexons + int i=0; //index of first mexons with a merge + int j=0; //index current mrna exon + while (imexons.Count()) { + uint istart=mexons[i].start; + uint iend=mexons[i].end; + uint jstart=loc->mexons[j].start; + uint jend=loc->mexons[j].end; + if (iendiend) { //mexons[i] end extend + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } // mexons[i] end extend + // } //exon overlap + j++; //check the next locus.mexon + }//while mexons + if (ovlexons.Count()==0) return false; + if (strand=='.' && loc->mrna_maxcov->strand!='.') + strand=loc->mrna_maxcov->strand; + //have exon overlap: + //-- add the rest of the non-overlapping mexons: + GSeg seg; + for (int i=0;imexons.Count();i++) { + seg.start=loc->mexons[i].start; + seg.end=loc->mexons[i].end; + if (!ovlexons.Exists(i)) mexons.Add(seg); + } + // -- adjust start/end as needed + if (start>loc->start) start=loc->start; + if (endend) end=loc->end; + loc->xlocus=this; + if (loc->qfidx<0) rloci.Add(loc); + else qloci.Add(loc); + return true; + } + + void addMerge(GXLocus& oxloc) { + GArray ovlexons(true,true); //list of oxloc.mexons indexes overlapping existing mexons + int i=0; //index of first mexons with a merge + int j=0; //index current mrna exon + while (iiend) { //mexons[i] end extend + mexons[i].end=jend; + //now this could overlap the next mexon(s), so we have to merge them all + while (imexons[i+1].start) { + uint nextend=mexons[i+1].end; + mexons.Delete(i+1); + if (nextend>mexons[i].end) { + mexons[i].end=nextend; + break; //no need to check next mexons + } + } //while next mexons merge + } // mexons[i] end extend + // } //exon overlap + j++; //check the next oxloc.mexon + } + if (ovlexons.Count()==0) { + GError("Error: attempt to merge GXLoci with non-overlapping exons!\n"); + } + //-- add the rest of the non-overlapping mexons: + GSeg seg; + for (int i=0;ioxloc.start) start=oxloc.start; + if (endxlocus==this) continue; + qloci.Add(oxloc.qloci[i]); + oxloc.qloci[i]->xlocus=this; + } + for (int i=0;ixlocus==this) continue; + rloci.Add(oxloc.rloci[i]); + oxloc.rloci[i]->xlocus=this; + } + } //::addMerge() + + + void checkContainment() { + //checking containment + for (int j=0;jcontained!=NULL && t->tcons->exons.Count()>1) continue; //will check the container later anyway + int c_status=checkXConsContain(t->tcons, tcons[i]->tcons); + if (c_status==0) continue; //no containment relationship between t and tcons[i] + if (c_status>0) { //t is a container for tcons[i] + tcons[i]->contained=t; + } + else { //contained into exising XCons + t->contained=tcons[i]; + break; + } + } + } + } + + int checkXConsContain(GffObj* a, GffObj* b) { + // returns 1 if a is the container of b + // -1 if a is contained in b + // 0 if no + if (a->endstart || b->endstart) return 0; + if (a->exons.Count()==b->exons.Count()) { + if (a->exons.Count()>1) return 0; //same number of exons - no containment possible + //because equivalence was already tested + else { //single exon containment testing + //this is fuzzy and messy (end result may vary depending on the testing order) + int ovlen=a->exons[0]->overlapLen(b->exons[0]); + int minlen=GMIN(a->covlen, b->covlen); + if (ovlen>=minlen*0.8) { //if at least 80% of the shorter one is covered, it is contained + return ((a->covlen>b->covlen) ? 1 : -1); + } + else return 0; + //if (a->start<=b->start+10 && a->end+10>=b->end) return 1; + // else { if (b->start<=a->start+10 && b->end+10>=a->end) return -1; + // else return 0; + //} + } + } + //different number of exons: + if (a->exons.Count()>b->exons.Count()) return t_contains(*a, *b) ? 1:0; + else return t_contains(*b, *a) ? -1 : 0; + } + + void addXCons(GXConsensus* t) { + tcons.Add(t); + } + +}; //GXLocus + + + +int parse_mRNAs(GfList& mrnas, + GList& glstdata, + bool is_ref_set=true, + bool check_for_dups=false, + int qfidx=-1, bool only_multiexon=false); + +//reading a mRNAs from a gff file and grouping them into loci +void read_mRNAs(FILE* f, GList& seqdata, GList* ref_data=NULL, + bool check_for_dups=false, int qfidx=-1, const char* fname=NULL, + bool only_multiexon=false); + +void read_transcripts(FILE* f, GList& seqdata, bool keepAttrs=true); +void sort_GSeqs_byName(GList& seqdata); + + +bool tMatch(GffObj& a, GffObj& b, int& ovlen, bool fuzzunspl=false, bool contain_only=false); + +//use qsearch to "position" a given coordinate x within a list of transcripts sorted +//by their start (lowest) coordinate; the returned int is the list index of the +//closest GffObj starting just *ABOVE* coordinate x +//Convention: returns -1 if there is no such GffObj (i.e. last GffObj start <= x) +int qsearch_mrnas(uint x, GList& mrnas); +int qsearch_loci(uint x, GList& segs); // same as above, but for GSeg lists + +GSeqData* getRefData(int gid, GList& ref_data); //returns reference GSeqData for a specific genomic sequence + +#endif diff --git a/src/hits.cpp b/src/hits.cpp new file mode 100644 index 0000000..910ba0f --- /dev/null +++ b/src/hits.cpp @@ -0,0 +1,1097 @@ +/* + * hits.cpp + * Cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "hits.h" +#include "tokenize.h" + +using namespace std; + +#if ENABLE_THREADS +boost::mutex RefSequenceTable::table_lock; +#endif + +int num_deleted = 0; + +void ReadHit::trim(int trimmed_length) +{ + bool antisense_aln = _sam_flag & 0x10; + + vector new_cigar; + int new_left = 0; + + if (!antisense_aln) + { + int pos = _left; + new_left = _left; + int length = 0; + for (vector::iterator i = _cigar.begin(); i < _cigar.end(); ++i) + { + const CigarOp& op = *i; + + if (length < trimmed_length) + { + switch(op.opcode) + { + case REF_SKIP: + //gaps_out.push_back(make_pair(pos, pos + op.length - 1)); + pos += op.length; + new_cigar.push_back(op); + break; + case SOFT_CLIP: + assert(false); // not sure if this case is right + pos += op.length; + length += op.length; + new_cigar.push_back(op); + break; + case HARD_CLIP: + new_cigar.push_back(op); + break; + case MATCH: + if (length + op.length < trimmed_length) + { + pos += op.length; + length += op.length; + new_cigar.push_back(op); + } + else + { + new_cigar.push_back(CigarOp(MATCH, trimmed_length - length)); + pos += trimmed_length - length; + length += trimmed_length - length; + } + break; + case INS: + assert(false); // not sure if this case is right + pos -= op.length; + length -= op.length; + new_cigar.push_back(op); + break; + case DEL: + assert(false); // not sure if this case is right + pos += op.length; + length += op.length; + new_cigar.push_back(op); + break; + default: + break; + } + } + } + } + else + { + int pos = _right; + int length = 0; + for (vector::reverse_iterator i = _cigar.rbegin(); i < _cigar.rend(); ++i) + { + const CigarOp& op = *i; + + if (length < trimmed_length) + { + switch(op.opcode) + { + case REF_SKIP: + //gaps_out.push_back(make_pair(pos, pos + op.length - 1)); + pos -= op.length; + new_cigar.push_back(op); + break; + case SOFT_CLIP: + assert(false); // not sure if this case is right + pos -= op.length; + length += op.length; + new_cigar.push_back(op); + break; + case HARD_CLIP: + new_cigar.push_back(op); + break; + case MATCH: + if (length + op.length < trimmed_length) + { + pos -= op.length; + length += op.length; + new_cigar.push_back(op); + } + else + { + new_cigar.push_back(CigarOp(MATCH, trimmed_length - length)); + pos -= trimmed_length - length; + length += trimmed_length - length; + } + break; + case INS: + assert(false); // not sure if this case is right + pos += op.length; + length -= op.length; + new_cigar.push_back(op); + break; + case DEL: + assert(false); // not sure if this case is right + pos -= op.length; + length += op.length; + new_cigar.push_back(op); + break; + default: + break; + } + } + } + _left = pos; + } + _cigar = new_cigar; + _right = get_right(); + assert (trimmed_length == read_len()); +} + +//static const int max_read_length = 1024; + +bool hit_insert_id_lt(const ReadHit& h1, const ReadHit& h2) +{ + return h1.insert_id() < h2.insert_id(); +} + +bool hits_eq_mod_id(const ReadHit& lhs, const ReadHit& rhs) +{ + return (lhs.ref_id() == rhs.ref_id() && + lhs.antisense_align() == rhs.antisense_align() && + lhs.left() == rhs.left() && + lhs.source_strand() == rhs.source_strand() && + lhs.cigar() == rhs.cigar()); +} + +// Compares for structural equality, but won't declare multihits equal to one another +bool hits_eq_non_multi(const MateHit& lhs, const MateHit& rhs) +{ + if ((lhs.is_multi() || rhs.is_multi() ) && lhs.insert_id() != rhs.insert_id()) + return false; + return hits_equals(lhs, rhs); +} + +// Compares for structural equality, but won't declare multihits equal to one another +// and won't return true for hits from different read groups (e.g. replicate samples) +bool hits_eq_non_multi_non_replicate(const MateHit& lhs, const MateHit& rhs) +{ + if ((lhs.is_multi() || rhs.is_multi() || lhs.read_group_props() != rhs.read_group_props()) && lhs.insert_id() != rhs.insert_id()) + return false; + return hits_equals(lhs, rhs); +} + +// Does NOT care about the read group this hit came from. +bool hits_equals(const MateHit& lhs, const MateHit& rhs) +{ + if (lhs.ref_id() != rhs.ref_id()) + return false; + + if ((lhs.left_alignment() == NULL) != (rhs.left_alignment() == NULL)) + return false; + if ((lhs.right_alignment() == NULL) != (rhs.right_alignment() == NULL)) + return false; + if (lhs.left_alignment()) + { + if (!(hits_eq_mod_id(*lhs.left_alignment(),*(rhs.left_alignment())))) + return false; + } + if (lhs.right_alignment()) + { + if (!(hits_eq_mod_id(*lhs.right_alignment(),*(rhs.right_alignment())))) + return false; + } + return true; +} + +bool has_no_collapse_mass(const MateHit& hit) +{ + return hit.collapse_mass() == 0; +} + +// Assumes hits are sorted by mate_hit_lt +// Does not collapse hits that are multi-reads +void collapse_hits(const vector& hits, + vector& non_redundant) +{ + copy(hits.begin(), hits.end(), back_inserter(non_redundant)); + vector::iterator new_end = unique(non_redundant.begin(), + non_redundant.end(), + hits_eq_non_multi_non_replicate); + non_redundant.erase(new_end, non_redundant.end()); + non_redundant.resize(non_redundant.size()); + + foreach(MateHit& hit, non_redundant) + hit.collapse_mass(0); + + size_t curr_aln = 0; + size_t curr_unique_aln = 0; + while (curr_aln < hits.size()) + { + if (hits_eq_non_multi_non_replicate(non_redundant[curr_unique_aln], hits[curr_aln]) || hits_eq_non_multi_non_replicate(non_redundant[++curr_unique_aln], hits[curr_aln])) + { + double more_mass = hits[curr_aln].common_scale_mass(); + //assert(non_redundant[curr_unique_aln].collapse_mass() == 0 || !non_redundant[curr_unique_aln].is_multi()); + non_redundant[curr_unique_aln].incr_collapse_mass(more_mass); + } + else + assert(false); + + ++curr_aln; + } + + //foreach(MateHit& hit, non_redundant) + //assert(hit.collapse_mass() <= 1 || !hit.is_multi()); + + //non_redundant.erase(remove_if(non_redundant.begin(),non_redundant.end(),has_no_collapse_mass), non_redundant.end()); + +} + +// Places multi-reads to the right of reads they match +bool mate_hit_lt(const MateHit& lhs, const MateHit& rhs) +{ + if (lhs.left() != rhs.left()) + return lhs.left() < rhs.left(); + if (lhs.right() != rhs.right()) + return lhs.right() > rhs.right(); + + if ((lhs.left_alignment() == NULL) != (rhs.left_alignment() == NULL)) + return (lhs.left_alignment() == NULL) < (rhs.left_alignment() == NULL); + + if ((lhs.right_alignment() == NULL) != (rhs.right_alignment() == NULL)) + return (lhs.right_alignment() == NULL) < (rhs.right_alignment() == NULL); + + assert ((lhs.right_alignment() == NULL) == (rhs.right_alignment() == NULL)); + assert ((lhs.left_alignment() == NULL) == (rhs.left_alignment() == NULL)); + + const ReadHit* lhs_l = lhs.left_alignment(); + const ReadHit* lhs_r = lhs.right_alignment(); + + const ReadHit* rhs_l = rhs.left_alignment(); + const ReadHit* rhs_r = rhs.right_alignment(); + + if (lhs_l && rhs_l) + { + if (lhs_l->cigar().size() != rhs_l->cigar().size()) + return lhs_l->cigar().size() < rhs_l->cigar().size(); + for (size_t i = 0; i < lhs_l->cigar().size(); ++i) + { + if (lhs_l->cigar()[i].opcode != rhs_l->cigar()[i].opcode) + return lhs_l->cigar()[i].opcode < rhs_l->cigar()[i].opcode; + if (lhs_l->cigar()[i].length != rhs_l->cigar()[i].length) + return lhs_l->cigar()[i].length < rhs_l->cigar()[i].length; + } + } + + if (lhs_r && rhs_r) + { + if (lhs_r->cigar().size() != rhs_r->cigar().size()) + return lhs_r->cigar().size() < rhs_r->cigar().size(); + for (size_t i = 0; i < lhs_r->cigar().size(); ++i) + { + if (lhs_r->cigar()[i].opcode != rhs_r->cigar()[i].opcode) + return lhs_r->cigar()[i].opcode < rhs_r->cigar()[i].opcode; + if (lhs_r->cigar()[i].length != rhs_r->cigar()[i].length) + return lhs_r->cigar()[i].length < rhs_r->cigar()[i].length; + } + } + + if (lhs.is_multi() != rhs.is_multi()) + { + return rhs.is_multi(); + } + + return false; +} + + +ReadHit HitFactory::create_hit(const string& insert_name, + const string& ref_name, + int left, + const vector& cigar, + CuffStrand source_strand, + const string& partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag) +{ + InsertID insert_id = _insert_table.get_id(insert_name); + RefID reference_id = _ref_table.get_id(ref_name, NULL); + RefID partner_ref_id = _ref_table.get_id(partner_ref, NULL); + + return ReadHit(reference_id, + insert_id, + left, + cigar, + source_strand, + partner_ref_id, + partner_pos, + edit_dist, + num_hits, + base_mass, + sam_flag); +} + +ReadHit HitFactory::create_hit(const string& insert_name, + const string& ref_name, + uint32_t left, + uint32_t read_len, + CuffStrand source_strand, + const string& partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag) +{ + InsertID insert_id = _insert_table.get_id(insert_name); + RefID reference_id = _ref_table.get_id(ref_name, NULL); + RefID partner_ref_id = _ref_table.get_id(partner_ref, NULL); + + return ReadHit(reference_id, + insert_id, + left, + read_len, + source_strand, + partner_ref_id, + partner_pos, + edit_dist, + num_hits, + base_mass, + sam_flag); +} + +// populate a bam_t This will +bool BAMHitFactory::next_record(const char*& buf, size_t& buf_size) +{ + if (_next_hit.data) + { + free(_next_hit.data); + _next_hit.data = NULL; + } + + if (records_remain() == false) + return false; + + mark_curr_pos(); + + memset(&_next_hit, 0, sizeof(_next_hit)); + + int bytes_read = samread(_hit_file, &_next_hit); + if (bytes_read < 0) + { + _eof_encountered = true; + return false; + } + buf = (const char*)&_next_hit; + buf_size = bytes_read; + + return true; +} + +CuffStrand use_stranded_protocol(uint32_t sam_flag, MateStrandMapping msm) +{ + bool antisense_aln = sam_flag & 0x10; + if (((sam_flag & BAM_FPAIRED) && (sam_flag & BAM_FREAD1)) || !(sam_flag & BAM_FPAIRED)) // first-in-pair or single-end + { + switch(msm) + { + case FF: + case FR: + return (antisense_aln) ? CUFF_REV : CUFF_FWD; + break; + case RF: + case RR: + return (antisense_aln) ? CUFF_FWD : CUFF_REV; + break; + } + } + else // second-in-pair read + { + switch (msm) + { + case FF: + case RF: + return (antisense_aln) ? CUFF_REV : CUFF_FWD; + break; + case FR: + case RR: + return (antisense_aln) ? CUFF_FWD : CUFF_REV; + break; + } + } + assert(false); + return CUFF_STRAND_UNKNOWN; +} + + +bool BAMHitFactory::get_hit_from_buf(const char* orig_bwt_buf, + ReadHit& bh, + bool strip_slash, + char* name_out, + char* name_tags) +{ + const bam1_t* hit_buf = (const bam1_t*)orig_bwt_buf; + + uint32_t sam_flag = hit_buf->core.flag; + + int text_offset = hit_buf->core.pos; + int text_mate_pos = hit_buf->core.mpos; + int target_id = hit_buf->core.tid; + int mate_target_id = hit_buf->core.mtid; + + vector cigar; + bool spliced_alignment = false; + int num_hits = 1; + + //header->target_name[c->tid] + + if (sam_flag & 0x4 || target_id < 0) + { + //assert(cigar.size() == 1 && cigar[0].opcode == MATCH); + bh = create_hit(bam1_qname(hit_buf), + "*", + 0, // SAM files are 1-indexed + 0, + CUFF_STRAND_UNKNOWN, + "*", + 0, + 0, + 1, + 1.0, + sam_flag); + return true; + } + if (target_id >= _hit_file->header->n_targets) + { + fprintf (stderr, "BAM error: file contains hits to sequences not in header SQ records (%s)\n", bam1_qname(hit_buf)); + return false; + } + + string text_name = _hit_file->header->target_name[target_id]; + + for (int i = 0; i < hit_buf->core.n_cigar; ++i) + { + //char* t; + + int length = bam1_cigar(hit_buf)[i] >> BAM_CIGAR_SHIFT; + if (length <= 0) + { + fprintf (stderr, "BAM error: CIGAR op has zero length (%s)\n", bam1_qname(hit_buf)); + return false; + } + + CigarOpCode opcode; + switch(bam1_cigar(hit_buf)[i] & BAM_CIGAR_MASK) + { + case BAM_CMATCH: opcode = MATCH; break; + case BAM_CINS: opcode = INS; break; + case BAM_CDEL: opcode = DEL; break; + case BAM_CSOFT_CLIP: opcode = SOFT_CLIP; break; + case BAM_CHARD_CLIP: opcode = HARD_CLIP; break; + case BAM_CPAD: opcode = PAD; break; + case BAM_CREF_SKIP: + opcode = REF_SKIP; + spliced_alignment = true; + if (length > (int)max_intron_length) + { + //fprintf(stderr, "Encounter REF_SKIP > max_gene_length, skipping\n"); + return false; + } + break; + default: + //fprintf (stderr, "SAM error on line %d: invalid CIGAR operation\n", _line_num); + return false; + } + if (opcode != HARD_CLIP) + cigar.push_back(CigarOp(opcode, length)); + } + + string mrnm; + if (mate_target_id >= 0) + { + if (mate_target_id == target_id) + { + mrnm = _hit_file->header->target_name[mate_target_id]; +// if (abs((int)text_mate_pos - (int)text_offset) > (int)max_intron_length) +// { +// //fprintf (stderr, "Mates are too distant, skipping\n"); +// return false; +// } + } + else + { + //fprintf(stderr, "Trans-spliced mates are not currently supported, skipping\n"); + return false; + } + } + else + { + text_mate_pos = 0; + } + + CuffStrand source_strand = CUFF_STRAND_UNKNOWN; + unsigned char num_mismatches = 0; + + uint8_t* ptr = bam_aux_get(hit_buf, "XS"); + if (ptr) + { + char src_strand_char = bam_aux2A(ptr); + if (src_strand_char == '-') + source_strand = CUFF_REV; + else if (src_strand_char == '+') + source_strand = CUFF_FWD; + } + + ptr = bam_aux_get(hit_buf, "NM"); + if (ptr) + { + num_mismatches = bam_aux2i(ptr); + } + + ptr = bam_aux_get(hit_buf, "NH"); + if (ptr) + { + num_hits = bam_aux2i(ptr); + } + + double mass = 1.0; + ptr = bam_aux_get(hit_buf, "ZF"); + if (ptr) + { + mass = bam_aux2i(ptr); + if (mass <= 0.0) + mass = 1.0; + } + + if (_rg_props.strandedness() == STRANDED_PROTOCOL && source_strand == CUFF_STRAND_UNKNOWN) + source_strand = use_stranded_protocol(sam_flag, _rg_props.mate_strand_mapping()); + + if (!spliced_alignment) + { + //assert(_rg_props.strandedness() == STRANDED_PROTOCOL || source_strand == CUFF_STRAND_UNKNOWN); + + //assert(cigar.size() == 1 && cigar[0].opcode == MATCH); + bh = create_hit(bam1_qname(hit_buf), + text_name, + text_offset, // BAM files are 0-indexed + cigar, + source_strand, + mrnm, + text_mate_pos, + num_mismatches, + num_hits, + mass, + sam_flag); + return true; + + } + else + { + if (source_strand == CUFF_STRAND_UNKNOWN) + { + fprintf(stderr, "BAM record error: found spliced alignment without XS attribute\n"); + } + + bh = create_hit(bam1_qname(hit_buf), + text_name, + text_offset, // BAM files are 0-indexed + cigar, + source_strand, + mrnm, + text_mate_pos, + num_mismatches, + num_hits, + mass, + sam_flag); + return true; + } + + + return true; +} + + + +Platform str_to_platform(const string pl_str) +{ + if (pl_str == "SOLiD") + { + return SOLID; + } + else if (pl_str == "Illumina") + { + return ILLUMINA; + } + else + { + return UNKNOWN_PLATFORM; + } +} + +// Parses the header to determine platform and other properties +bool HitFactory::parse_header_string(const string& header_rec, + ReadGroupProperties& rg_props) +{ + vector columns; + tokenize(header_rec, "\t", columns); + + if (columns[0] == "@RG") + { + for (size_t i = 1; i < columns.size(); ++i) + { + vector fields; + tokenize(columns[i], ":", fields); + if (fields[0] == "PL") + { + if (rg_props.platform() == UNKNOWN_PLATFORM) + { + Platform p = str_to_platform(fields[1]); + rg_props.platform(p); + } + else + { + Platform p = str_to_platform(fields[1]); + if (p != rg_props.platform()) + { + fprintf(stderr, "Error: Processing reads from different platforms is not currently supported\n"); + return false; + } + } + + } + } + } + else if (columns[0] == "@SQ") + { + _num_seq_header_recs++; + for (size_t i = 1; i < columns.size(); ++i) + { + vector fields; + tokenize(columns[i], ":", fields); + if (fields[0] == "SN") + { + // Populate the RefSequenceTable with the sequence dictionary, + // to ensure that (for example) downstream GTF files are sorted + // in an order consistent with the header, and to enforce that + // BAM records appear in the order implied by the header + RefID _id = _ref_table.get_id(fields[1], NULL); + + const RefSequenceTable::SequenceInfo* info = _ref_table.get_info(_id); + + if (info->observation_order != _num_seq_header_recs) + { + if (info->name != fields[1]) + { + fprintf(stderr, "Error: Hash collision between references '%s' and '%s'.\n", info->name, fields[1].c_str()); + } + else + { + fprintf(stderr, "Error: sort order of reads in BAMs must be the same\n"); + } + exit(1); + } + } + } + } + + return true; +} + +void HitFactory::finalize_rg_props() +{ + if (_rg_props.platform() == SOLID) + { + _rg_props.strandedness(STRANDED_PROTOCOL); + _rg_props.std_mate_orientation(MATES_POINT_TOWARD); + } + else + { + // Default to Illumina's unstranded protocol params for strandedness and + // mate orientation + _rg_props.strandedness(UNSTRANDED_PROTOCOL); + _rg_props.std_mate_orientation(MATES_POINT_TOWARD); + } +} + +static const unsigned MAX_HEADER_LEN = 4 * 1024 * 1024; // 4 MB + +bool BAMHitFactory::inspect_header() +{ + bam_header_t* header = _hit_file->header; + + if (header == NULL) + { + fprintf(stderr, "Warning: No BAM header\n"); + return false; + } + + if (header->l_text >= MAX_HEADER_LEN) + { + fprintf(stderr, "Warning: BAM header too large\n"); + return false; + } + + if (header->l_text == 0) + { + fprintf(stderr, "Warning: BAM header has 0 length or is corrupted. Try using 'samtools reheader'.\n"); + return false; + } + + + if (header->text != NULL) + { + char* h_text = strdup(header->text); + char* pBuf = h_text; + while(pBuf - h_text < header->l_text) + { + char* nl = strchr(pBuf, '\n'); + if (nl) + { + *nl = 0; + parse_header_string(pBuf, _rg_props); + pBuf = ++nl; + } + else + { + pBuf = h_text + header->l_text; + } + } + + free(h_text); + } + + finalize_rg_props(); + return true; +} + + +bool SAMHitFactory::next_record(const char*& buf, size_t& buf_size) +{ + mark_curr_pos(); + + bool new_rec = fgets(_hit_buf, _hit_buf_max_sz - 1, _hit_file); + if (!new_rec) + return false; + ++_line_num; + char* nl = strrchr(_hit_buf, '\n'); + if (nl) *nl = 0; + buf = _hit_buf; + buf_size = _hit_buf_max_sz - 1; + return true; +} + +bool SAMHitFactory::get_hit_from_buf(const char* orig_bwt_buf, + ReadHit& bh, + bool strip_slash, + char* name_out, + char* name_tags) +{ + char bwt_buf[10*2048]; + + strcpy(bwt_buf, orig_bwt_buf); + // Are we still in the header region? + if (bwt_buf[0] == '@') + return false; + + const char* buf = bwt_buf; + const char* _name = strsep((char**)&buf,"\t"); + if (!_name) + return false; + char name[2048]; + strncpy(name, _name, 2047); + + const char* sam_flag_str = strsep((char**)&buf,"\t"); + if (!sam_flag_str) + return false; + + const char* text_name = strsep((char**)&buf,"\t"); + if (!text_name) + return false; + + const char* text_offset_str = strsep((char**)&buf,"\t"); + if (!text_offset_str) + return false; + + const char* map_qual_str = strsep((char**)&buf,"\t"); + if (!map_qual_str) + return false; + + const char* cigar_str = strsep((char**)&buf,"\t"); + if (!cigar_str) + return false; + + const char* mate_ref_name = strsep((char**)&buf,"\t"); + if (!mate_ref_name) + return false; + + const char* mate_pos_str = strsep((char**)&buf,"\t"); + if (!mate_pos_str) + return false; + + const char* inferred_insert_sz_str = strsep((char**)&buf,"\t"); + if (!inferred_insert_sz_str) + return false; + + const char* seq_str = strsep((char**)&buf,"\t"); + if (!seq_str) + return false; + + const char* qual_str = strsep((char**)&buf,"\t"); + if (!qual_str) + return false; + + + int sam_flag = atoi(sam_flag_str); + int text_offset = atoi(text_offset_str); + int text_mate_pos = atoi(mate_pos_str); + + // Copy the tag out of the name field before we might wipe it out + char* pipe = strrchr(name, '|'); + if (pipe) + { + if (name_tags) + strcpy(name_tags, pipe); + *pipe = 0; + } + // Stripping the slash and number following it gives the insert name + char* slash = strrchr(name, '/'); + if (strip_slash && slash) + *slash = 0; + + const char* p_cig = cigar_str; + //int len = strlen(sequence); + vector cigar; + bool spliced_alignment = false; + int num_hits = 1; + + if ((sam_flag & 0x4) ||!strcmp(text_name, "*")) + { + //assert(cigar.size() == 1 && cigar[0].opcode == MATCH); + bh = create_hit(name, + "*", + 0, // SAM files are 1-indexed + 0, + CUFF_STRAND_UNKNOWN, + "*", + 0, + 0, + 1, + 1.0, + sam_flag); + return true; + } + // Mostly pilfered direct from the SAM tools: + while (*p_cig) + { + char* t; + int length = (int)strtol(p_cig, &t, 10); + if (length <= 0) + { + fprintf (stderr, "SAM error on line %d: CIGAR op has zero length\n", _line_num); + return false; + } + char op_char = toupper(*t); + CigarOpCode opcode; + if (op_char == 'M') + { + /*if (length > max_read_length) + { + fprintf(stderr, "SAM error on line %d: %s: MATCH op has length > %d\n", line_num, name, max_read_length); + return false; + }*/ + opcode = MATCH; + } + else if (op_char == 'I') opcode = INS; + else if (op_char == 'D') + { + opcode = DEL; + } + else if (op_char == 'N') + { + opcode = REF_SKIP; + spliced_alignment = true; + if (length > (int)max_intron_length) + { + //fprintf(stderr, "Encounter REF_SKIP > max_gene_length, skipping\n"); + return false; + } + } + else if (op_char == 'S') opcode = SOFT_CLIP; + else if (op_char == 'H') opcode = HARD_CLIP; + else if (op_char == 'P') opcode = PAD; + else + { + fprintf (stderr, "SAM error on line %d: invalid CIGAR operation\n", _line_num); + return false; + } + p_cig = t + 1; + //i += length; + if (opcode != HARD_CLIP) + cigar.push_back(CigarOp(opcode, length)); + } + if (*p_cig) + { + fprintf (stderr, "SAM error on line %d: unmatched CIGAR operation\n", _line_num); + return false; + } + + string mrnm; + if (strcmp(mate_ref_name, "*")) + { + if (!strcmp(mate_ref_name, "=") || !strcmp(mate_ref_name, text_name)) + { + mrnm = text_name; +// if (abs((int)text_mate_pos - (int)text_offset) > (int)max_intron_length) +// { +// //fprintf (stderr, "Mates are too distant, skipping\n"); +// return false; +// } + } + else + { + //fprintf(stderr, "Trans-spliced mates are not currently supported, skipping\n"); + return false; + } + } + else + { + text_mate_pos = 0; + } + + CuffStrand source_strand = CUFF_STRAND_UNKNOWN; + unsigned char num_mismatches = 0; + + const char* tag_buf = buf; + + double mass = 1.0; + + while((tag_buf = strsep((char**)&buf,"\t"))) + { + + char* first_colon = (char*)strchr(tag_buf, ':'); + if (first_colon) + { + *first_colon = 0; + ++first_colon; + char* second_colon = strchr(first_colon, ':'); + if (second_colon) + { + *second_colon = 0; + ++second_colon; + const char* first_token = tag_buf; + //const char* second_token = first_colon; + const char* third_token = second_colon; + if (!strcmp(first_token, "XS")) + { + if (*third_token == '-') + source_strand = CUFF_REV; + else if (*third_token == '+') + source_strand = CUFF_FWD; + } + else if (!strcmp(first_token, "NM")) + { + num_mismatches = atoi(third_token); + } + else if (!strcmp(first_token, "NH")) + { + num_hits = atoi(third_token); + } + else if (!strcmp(first_token, "ZF")) + { + mass = atof(third_token); + if (mass <= 0.0) + mass = 1.0; + } + else + { + + } + } + } + } + + // Don't let the protocol setting override explicit XS tags + if (_rg_props.strandedness() == STRANDED_PROTOCOL && source_strand == CUFF_STRAND_UNKNOWN) + source_strand = use_stranded_protocol(sam_flag, _rg_props.mate_strand_mapping()); + + if (!spliced_alignment) + { + //assert(cigar.size() == 1 && cigar[0].opcode == MATCH); + bh = create_hit(name, + text_name, + text_offset - 1, + cigar, + source_strand, + mrnm, + text_mate_pos - 1, + num_mismatches, + num_hits, + mass, + sam_flag); + return true; + + } + else + { + if (source_strand == CUFF_STRAND_UNKNOWN) + { + fprintf(stderr, "SAM error on line %d: found spliced alignment without XS attribute\n", _line_num); + } + + bh = create_hit(name, + text_name, + text_offset - 1, + cigar, + source_strand, + mrnm, + text_mate_pos - 1, + num_mismatches, + num_hits, + mass, + sam_flag); + return true; + } + return false; +} + +bool SAMHitFactory::inspect_header() +{ + char pBuf[10 * 1024]; + + off_t curr_pos = ftello(_hit_file); + rewind(_hit_file); + + while (fgets(pBuf, 10*1024, _hit_file)) + { + if (pBuf[0] != '@') + { + break; // done with the header. + } + char* nl = strchr(pBuf, '\n'); + if (nl) + { + *nl = 0; + parse_header_string(pBuf, _rg_props); + } + } + + fseek(_hit_file, curr_pos, SEEK_SET); + + finalize_rg_props(); + return true; +} diff --git a/src/hits.h b/src/hits.h new file mode 100644 index 0000000..ee1331e --- /dev/null +++ b/src/hits.h @@ -0,0 +1,1050 @@ +#ifndef BWT_MAP_H +#define BWT_MAP_H + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "common.h" +#include "multireads.h" + +using namespace std; +using boost::shared_ptr; + +/* + * hits.h + * Cufflinks + * + * Created by Cole Trapnell on 3/23/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +enum CuffStrand { CUFF_STRAND_UNKNOWN = 0, CUFF_FWD = 1, CUFF_REV = 2, CUFF_BOTH = 3 }; + + +enum CigarOpCode +{ + MATCH = BAM_CMATCH, + INS = BAM_CINS, + DEL = BAM_CDEL, + REF_SKIP = BAM_CREF_SKIP, + SOFT_CLIP = BAM_CSOFT_CLIP, + HARD_CLIP = BAM_CHARD_CLIP, + PAD = BAM_CPAD +}; + +struct CigarOp +{ + CigarOp(CigarOpCode o, uint32_t l) : opcode(o), length(l) {} + CigarOpCode opcode : 3; + uint32_t length : 29; + + bool operator==(const CigarOp& rhs) const { return opcode == rhs.opcode && length == rhs.length; } + +}; + +typedef uint64_t InsertID; +typedef uint64_t RefID; + +extern int num_deleted; + +/* Stores the information from a single record of the bowtie map. A given read + may have many of these. +*/ +struct ReadHit +{ + ReadHit() : + _ref_id(0), + _insert_id(0), + _base_mass(1.0), + _edit_dist(0xFFFFFFFF), + _num_hits(1) + { + num_deleted++; + } + + ReadHit(RefID ref_id, + InsertID insert_id, + int left, + int read_len, + CuffStrand source_strand, + RefID partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag) : + _ref_id(ref_id), + _insert_id(insert_id), + _left(left), + _partner_ref_id(partner_ref), + _partner_pos(partner_pos), + _cigar(vector(1,CigarOp(MATCH,read_len))), + _source_strand(source_strand), + _base_mass(base_mass), + _edit_dist(edit_dist), + _num_hits(num_hits), + _sam_flag(sam_flag) + { + assert(_cigar.capacity() == _cigar.size()); + _right = get_right(); + num_deleted++; + } + + ReadHit(RefID ref_id, + InsertID insert_id, + int left, + const vector& cigar, + CuffStrand source_strand, + RefID partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag) : + _ref_id(ref_id), + _insert_id(insert_id), + _left(left), + _partner_ref_id(partner_ref), + _partner_pos(partner_pos), + _cigar(cigar), + _source_strand(source_strand), + _base_mass(base_mass), + _edit_dist(edit_dist), + _num_hits(num_hits), + _sam_flag(sam_flag) + { + assert(_cigar.capacity() == _cigar.size()); + _right = get_right(); + num_deleted++; + } + + ReadHit(const ReadHit& other) + { + _ref_id = other._ref_id; + _insert_id = other._insert_id; + _left = other._left; + _partner_ref_id = other._partner_ref_id; + _partner_pos = other._partner_pos; + _cigar = other._cigar; + _source_strand = other._source_strand; + _num_hits = other._num_hits; + _base_mass = other._base_mass; + _edit_dist = other._edit_dist; + _right = get_right(); + _sam_flag = other._sam_flag; + num_deleted++; + } + + ~ReadHit() + { + --num_deleted; + } + + int read_len() const + { + int len = 0; + for (size_t i = 0; i < _cigar.size(); ++i) + { + const CigarOp& op = _cigar[i]; + switch(op.opcode) + { + case MATCH: + case INS: + case SOFT_CLIP: + len += op.length; + break; + default: + break; + } + } + + return len; + } + + bool contains_splice() const + { + for (size_t i = 0; i < _cigar.size(); ++i) + { + if (_cigar[i].opcode == REF_SKIP) + return true; + } + return false; + } + + bool is_singleton() const + { + return (partner_ref_id() == 0 || + partner_ref_id() != ref_id() || + abs(partner_pos() - left()) > max_partner_dist); + } + + bool operator==(const ReadHit& rhs) const + { + return (_insert_id == rhs._insert_id && + _ref_id == rhs._ref_id && + antisense_align() == rhs.antisense_align() && + _left == rhs._left && + _source_strand == rhs._source_strand && + /* DO NOT USE ACCEPTED IN COMPARISON */ + _cigar == rhs._cigar); + } + + RefID ref_id() const { return _ref_id; } + InsertID insert_id() const { return _insert_id; } + + RefID partner_ref_id() const { return _partner_ref_id; } + int partner_pos() const { return _partner_pos; } + + int left() const { return _left; } + int right() const { return _right; } + CuffStrand source_strand() const { return _source_strand; } + bool antisense_align() const { return _sam_flag & 0x10; } + bool is_first() const { return _sam_flag & 0x40; } + + // Number of multi-hits for this read + int num_hits() const { return _num_hits; } + + // We are ignoring the _base_mass and re-calculating based on multi-hits + double mass() const + { + if (is_singleton()) + return 1.0/_num_hits; + return 0.5 / _num_hits; + } + + // For convenience, if you just want a copy of the gap intervals + // for this hit. + void gaps(vector >& gaps_out) const + { + gaps_out.clear(); + int pos = _left; + for (size_t i = 0; i < _cigar.size(); ++i) + { + const CigarOp& op = _cigar[i]; + + switch(op.opcode) + { + case REF_SKIP: + gaps_out.push_back(make_pair(pos, pos + op.length - 1)); + pos += op.length; + break; + case SOFT_CLIP: + pos += op.length; + break; + case HARD_CLIP: + break; + case MATCH: + pos += op.length; + break; + case INS: + pos -= op.length; + break; + case DEL: + pos += op.length; + break; + default: + break; + } + } + } + + const vector& cigar() const { return _cigar; } + + bool contiguous() const + { + return _cigar.size() == 1 && _cigar[0].opcode == MATCH; + } + + unsigned int edit_dist() const { return _edit_dist; } + + void trim(int trimmed_length); + + //const string& hitfile_rec() const { return _hitfile_rec; } + //void hitfile_rec(const string& rec) { _hitfile_rec = rec; } + +private: + + int get_right() const + { + int r = _left; + for (size_t i = 0; i < _cigar.size(); ++i) + { + const CigarOp& op = _cigar[i]; + + switch(op.opcode) + { + case MATCH: + case REF_SKIP: + case SOFT_CLIP: + case DEL: + r += op.length; + break; + case INS: + case HARD_CLIP: + default: + break; + } + } + return r; + } + + RefID _ref_id; + InsertID _insert_id; // Id of the sequencing insert + int _left; // Position in the reference of the left side of the alignment + int _right; + + RefID _partner_ref_id; // Reference contig on which we expect the mate + int _partner_pos; // Position at which we expect the mate of this hit + + + vector _cigar; + + CuffStrand _source_strand; // Which strand the read really came from, if known + float _base_mass; + unsigned int _edit_dist; // Number of mismatches + int _num_hits; // Number of multi-hits (1 by default) + uint32_t _sam_flag; + //string _hitfile_rec; // Points to the buffer for the record from which this hit came +}; + +class ReadTable +{ +public: + + ReadTable() {} + + // This function should NEVER return zero + InsertID get_id(const string& name) + { + uint64_t _id = hash_string(name.c_str()); + assert(_id); + return _id; + } + + // Calculate checksum + InsertID get_cs(const string& name) + { + return string_checksum(name.c_str()); + } + +private: + + // This is FNV-1, see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash + inline uint64_t hash_string(const char* __s) + { + uint64_t hash = 0xcbf29ce484222325ull; + for ( ; *__s; ++__s) + { + hash *= 1099511628211ull; + hash ^= *__s; + } + return hash; + } + + + inline uint64_t string_checksum(const char * s) + { + uint64_t c = 0; + for ( ; *s; ++s) + { + c += *s; + } + return c; + } +}; + +class RefSequenceTable +{ +public: + + typedef std::string Sequence; + + struct SequenceInfo + { + SequenceInfo(uint32_t _order, + char* _name, + Sequence* _seq) : + observation_order(_order), + name(_name), + seq(_seq) {} + uint32_t observation_order; + char* name; + Sequence* seq; + }; + + typedef map IDTable; + typedef map InvertedIDTable; + typedef InvertedIDTable::iterator iterator; + typedef InvertedIDTable::const_iterator const_iterator; + + RefSequenceTable(bool keep_names, bool keep_seqs = false) : + _next_id(1), + _keep_names(keep_names) {} + + ~RefSequenceTable() + { + for (InvertedIDTable::iterator itr = _by_id.begin(); + itr != _by_id.end(); + ++itr) + { + free(itr->second.name); + } + } + + RefID get_id(const string& name, + Sequence* seq) + { + if (name.empty()) + return 0; +#if ENABLE_THREADS + table_lock.lock(); +#endif + uint64_t _id = hash_string(name.c_str()); + pair ret = + _by_id.insert(make_pair(_id, SequenceInfo(_next_id, NULL, NULL))); + if (ret.second == true) + { + char* _name = NULL; + if (_keep_names) + _name = strdup(name.c_str()); + ret.first->second.name = _name; + ret.first->second.seq = seq; + ++_next_id; + } + assert (_id); +#if ENABLE_THREADS + table_lock.unlock(); +#endif + return _id; + } + + // You must call invert() before using this function + const char* get_name(RefID ID) const + { + InvertedIDTable::const_iterator itr = _by_id.find(ID); + if (itr != _by_id.end()) + { + //const SequenceInfo& info = itr->second; + return itr->second.name; + } + else + { + return NULL; + } + } + + Sequence* get_seq(RefID ID) const + { + InvertedIDTable::const_iterator itr = _by_id.find(ID); + if (itr != _by_id.end()) + return itr->second.seq; + else + return NULL; + } + + const SequenceInfo* get_info(RefID ID) const + { + + InvertedIDTable::const_iterator itr = _by_id.find(ID); + if (itr != _by_id.end()) + { + return &(itr->second); + } + else + return NULL; + } + + int observation_order(RefID ID) const + { + InvertedIDTable::const_iterator itr = _by_id.find(ID); + if (itr != _by_id.end()) + { + return itr->second.observation_order; + } + else + return -1; + } + + void order_recs_lexicographically() + { + map str_to_id; + + for (InvertedIDTable::iterator i = _by_id.begin(); i != _by_id.end(); ++i) + { + str_to_id[i->second.name] = i->first; + //fprintf(stderr, "%d: %s\n", i->second.observation_order, i->second.name); + } + + size_t new_order = 1; + for (map::iterator i = str_to_id.begin(); i != str_to_id.end(); ++i, ++new_order) + { + _by_id.find(get_id(i->first, NULL))->second.observation_order = new_order; + verbose_msg( "%lu: %s\n", new_order, i->first.c_str()); + } + } + + void print_rec_ordering() + { + for (InvertedIDTable::iterator i = _by_id.begin(); i != _by_id.end(); ++i) + { + verbose_msg( "%lu: %s\n", i->second.observation_order, i->second.name); + } + } + + iterator begin() { return _by_id.begin(); } + iterator end() { return _by_id.end(); } + + const_iterator begin() const { return _by_id.begin(); } + const_iterator end() const { return _by_id.end(); } + + size_t size() const { return _by_id.size(); } + + void clear() + { + //_by_name.clear(); + _by_id.clear(); + } + +private: + + // This is FNV-1, see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash + inline uint64_t hash_string(const char* __s) + { + uint64_t hash = 0xcbf29ce484222325ull; + for ( ; *__s; ++__s) + { + hash *= 1099511628211ull; + hash ^= *__s; + } + return hash; + } + + //IDTable _by_name; + RefID _next_id; + bool _keep_names; + InvertedIDTable _by_id; +#if ENABLE_THREADS + static boost::mutex table_lock; +#endif +}; + + +bool hit_insert_id_lt(const ReadHit& h1, const ReadHit& h2); + +/****************************************************************************** + The HitFactory abstract class is responsible for returning a single ReadHit + from an alignment file. The only class that actually implements this interface + right now in Cufflinks is SAMHitFactory +*******************************************************************************/ +class HitFactory +{ +public: + + HitFactory(ReadTable& insert_table, + RefSequenceTable& reference_table) : + _insert_table(insert_table), + _ref_table(reference_table), + _num_seq_header_recs(0) {} + + HitFactory& operator=(const HitFactory& rhs) + { + if (this != &rhs) + { + //_hit_file = rhs._hit_file; + _insert_table = rhs._insert_table; + _ref_table = rhs._ref_table; + } + return *this; + } + virtual ~HitFactory() {} + + ReadHit create_hit(const string& insert_name, + const string& ref_name, + int left, + const vector& cigar, + CuffStrand source_strand, + const string& partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag); + + ReadHit create_hit(const string& insert_name, + const string& ref_name, + uint32_t left, + uint32_t read_len, + CuffStrand source_strand, + const string& partner_ref, + int partner_pos, + unsigned int edit_dist, + int num_hits, + float base_mass, + uint32_t sam_flag); + + virtual void reset() = 0; + + virtual void undo_hit() = 0; + + // next_record() should always set _curr_pos before reading the + // next_record so undo_hit() will work properly. + virtual bool next_record(const char*& buf, size_t& buf_size) = 0; + + virtual bool records_remain() const = 0; + + virtual bool get_hit_from_buf(const char* bwt_buf, + ReadHit& bh, + bool strip_slash, + char* name_out = NULL, + char* name_tags = NULL) = 0; + + RefSequenceTable& ref_table() { return _ref_table; } + + //FILE* hit_file() { return _hit_file; } + + virtual bool inspect_header() = 0; + + const ReadGroupProperties& read_group_properties() + { + return _rg_props; + } + +protected: + + bool parse_header_string(const string& header_rec, + ReadGroupProperties& rg_props); + + void finalize_rg_props(); + + // TODO: We want to keep a collection of these, indexed by RG ID. See #180 + ReadGroupProperties _rg_props; + +private: + + + ReadTable& _insert_table; + RefSequenceTable& _ref_table; + uint32_t _num_seq_header_recs; +}; + +/****************************************************************************** + SAMHitFactory turns SAM alignments into ReadHits +*******************************************************************************/ +class SAMHitFactory : public HitFactory +{ +public: + SAMHitFactory(const string& hit_file_name, + ReadTable& insert_table, + RefSequenceTable& reference_table) : + HitFactory(insert_table, reference_table), + _line_num(0), + _curr_pos(0) + { + _hit_file = fopen(hit_file_name.c_str(), "r"); + if (_hit_file == NULL) + { + throw std::runtime_error("Error: could not open file for reading"); + } + + if (inspect_header() == false) + { + throw std::runtime_error("Error: could not parse SAM header"); + } + + // Override header-inferred read group properities with whatever + // the user supplied. + if (global_read_properties != NULL) + { + _rg_props = *global_read_properties; + } + } + + ~SAMHitFactory() + { + if (_hit_file) + { + fclose(_hit_file); + } + } + + virtual void undo_hit() + { + fseeko(_hit_file, _curr_pos, SEEK_SET); + --_line_num; + } + + void reset() { rewind(_hit_file); } + + void mark_curr_pos() { _curr_pos = ftell(_hit_file); } + bool records_remain() const { return !feof(_hit_file); } + + bool next_record(const char*& buf, size_t& buf_size); + + bool get_hit_from_buf(const char* bwt_buf, + ReadHit& bh, + bool strip_slash, + char* name_out = NULL, + char* name_tags = NULL); + + bool inspect_header(); + +private: + static const size_t _hit_buf_max_sz = 10 * 1024; + char _hit_buf[_hit_buf_max_sz]; + int _line_num; + + FILE* _hit_file; + off_t _curr_pos; +}; + +/****************************************************************************** + BAMHitFactory turns SAM alignments into ReadHits + *******************************************************************************/ +class BAMHitFactory : public HitFactory +{ +public: + BAMHitFactory(const string& hit_file_name, + ReadTable& insert_table, + RefSequenceTable& reference_table) : + HitFactory(insert_table, reference_table) + { + _hit_file = samopen(hit_file_name.c_str(), "rb", 0); + + memset(&_next_hit, 0, sizeof(_next_hit)); + + if (_hit_file == NULL || _hit_file->header == NULL) + { + throw std::runtime_error("Fail to open BAM file"); + } + + _beginning = bgzf_tell(_hit_file->x.bam); + _eof_encountered = false; + + if (inspect_header() == false) + { + throw std::runtime_error("Error: could not parse BAM header"); + } + + // Override header-inferred read group properities with whatever + // the user supplied. + if (global_read_properties != NULL) + { + _rg_props = *global_read_properties; + } + } + + ~BAMHitFactory() + { + if (_hit_file) + { + samclose(_hit_file); + } + } + + void mark_curr_pos() + { + _curr_pos = bgzf_tell(_hit_file->x.bam); + } + + + void undo_hit() + { + bgzf_seek(_hit_file->x.bam, _curr_pos, SEEK_SET); + //--_line_num; + } + + bool records_remain() const + { + return !_eof_encountered; + } + + void reset() + { + if (_hit_file && _hit_file->x.bam) + { + bgzf_seek(_hit_file->x.bam, _beginning, SEEK_SET); + _eof_encountered = false; + } + } + + + bool next_record(const char*& buf, size_t& buf_size); + + bool get_hit_from_buf(const char* bwt_buf, + ReadHit& bh, + bool strip_slash, + char* name_out = NULL, + char* name_tags = NULL); + + bool inspect_header(); + +private: + samfile_t* _hit_file; + int64_t _curr_pos; + int64_t _beginning; + + bam1_t _next_hit; + bool _eof_encountered; +}; + +// Forward declaration of BundleFactory, because MateHit will need a pointer +// back to the Factory that created. Ultimately, we should replace this +// with a pointer back to the ReadGroupProperty object corresponding to each +// MateHit. That, however, requires that we link fragment length distributions +// and bias models, etc, with each read group, and that's more than we can +// afford to implement right now. + +/******************************************************************************* + MateHit is a class that encapsulates a paired-end alignment as a single object. + MateHits can be "open" when one hit has been read from a stream of individual + read alignments, but the other hasn't. A "closed" MateHit is one where either + both read alignments have been installed in the MateHit, or one read hit has, + but the other will never come (i.e. singletons) +*******************************************************************************/ +class MateHit +{ +public: + MateHit() : + _refid(0), + _left_alignment(NULL), + _right_alignment(NULL), + _collapse_mass(0.0), + _is_mapped(false){} + + MateHit(shared_ptr rg_props, + RefID refid, + const ReadHit* left_alignment, + const ReadHit* right_alignment) : + _rg_props(rg_props), + _refid(refid), + _left_alignment(left_alignment), + _right_alignment(right_alignment), + _collapse_mass(0.0), + _is_mapped(false) + { + //_expected_inner_dist = min(genomic_inner_dist(), _expected_inner_dist); + } + ~MateHit() + { + //fprintf(stderr, "Killing hit %lx\n",this); + } + + //bool closed() {return _closed;} + + shared_ptr read_group_props() const { return _rg_props; } + + const ReadHit* left_alignment() const {return _left_alignment;} + void left_alignment(const ReadHit* left_alignment) + { + _left_alignment = left_alignment; + } + + const ReadHit* right_alignment() const {return _right_alignment;} + void right_alignment(const ReadHit* right_alignment) + { + _right_alignment = right_alignment; + } + + bool is_mapped() const {return _is_mapped;} + void is_mapped(bool mapped) + { + _is_mapped = mapped; + } + + int num_hits() const + { + assert(_left_alignment); + return _left_alignment->num_hits(); + } + + bool is_multi() const + { + return num_hits() > 1; + } + + bool is_pair() const + { + return (_left_alignment && _right_alignment); + } + + int left() const + { + if (_right_alignment && _left_alignment) + { + return min(_right_alignment->left(),_left_alignment->left()); + } + if (_left_alignment) + return _left_alignment->left(); + else if (_right_alignment) + return _right_alignment->left(); + return -1; + } + + int right() const + { + if (_right_alignment && _left_alignment) + { + return max(_right_alignment->right(),_left_alignment->right()); + } + if (_right_alignment) + return _right_alignment->right(); + else if (_left_alignment) + return _left_alignment->right(); + return -1; + } + + CuffStrand strand() const + { + CuffStrand left_strand = CUFF_STRAND_UNKNOWN; + CuffStrand right_strand = CUFF_STRAND_UNKNOWN; + if (_left_alignment) + { + left_strand = _left_alignment->source_strand(); + } + if (_right_alignment) + { + right_strand = _right_alignment->source_strand(); + //assert ( s != CUFF_STRAND_UNKNOWN ? s == r : true); + } + assert (left_strand == right_strand || + left_strand == CUFF_STRAND_UNKNOWN || + right_strand == CUFF_STRAND_UNKNOWN); + + return max(left_strand, right_strand); + } + + + bool contains_splice() const + { + if (_right_alignment) + return (_left_alignment->contains_splice() || _right_alignment->contains_splice()); + return (_left_alignment->contains_splice()); + } + + InsertID insert_id() const + { + if (_left_alignment) return _left_alignment->insert_id(); + if (_right_alignment) return _right_alignment->insert_id(); + return 0; + } + + RefID ref_id() const { return _refid; } + + int genomic_inner_dist() const + { + if (_left_alignment && _right_alignment) + { + return _right_alignment->left() - _left_alignment->right(); + } + else + { + return -1; + } + return -1; + } + + pair genomic_outer_span() const + { + if (_left_alignment && _right_alignment) + { + return make_pair(left(), + right() - 1); + } + + return make_pair(-1,-1); + } + + pair genomic_inner_span() const + { + if (_left_alignment && _right_alignment) + { + return make_pair(_left_alignment->right(), + _right_alignment->left() - 1); + } + + return make_pair(-1,-1); + } + + // MRT is incorrect and not added to rg_props until after inspect_map + // We are ignoring the mass reported by the ReadHits and re-calculating based on multi-hits + double mass() const + { + double base_mass = 1.0; + + if (is_multi()) + { + shared_ptr mrt = _rg_props->multi_read_table(); + if (mrt) + return mrt->get_mass(*this); + else + return base_mass/num_hits(); + } + return base_mass; + } + + double common_scale_mass() const + { + double m = mass(); + m *= _rg_props->mass_scale_factor(); + + return m; + } + + unsigned int edit_dist() const + { + unsigned int edits = 0; + if (_left_alignment) + edits += _left_alignment->edit_dist(); + if (_right_alignment) + edits += _right_alignment->edit_dist(); + return edits; + } + + double collapse_mass() const { return _collapse_mass; } + void collapse_mass(double m) { _collapse_mass = m; } + void incr_collapse_mass(double incr) { _collapse_mass += incr; } + +private: + + shared_ptr _rg_props; + RefID _refid; + const ReadHit* _left_alignment; + const ReadHit* _right_alignment; + double _collapse_mass; + bool _is_mapped; + //bool _closed; +}; + +bool mate_hit_lt(const MateHit& lhs, const MateHit& rhs); + +bool hits_eq_mod_id(const ReadHit& lhs, const ReadHit& rhs); + +bool hits_eq_non_multi(const MateHit& lhs, const MateHit& rhs); +bool hits_eq_non_multi_non_replicate(const MateHit& lhs, const MateHit& rhs); + +bool hits_equals(const MateHit& lhs, const MateHit& rhs); + +bool has_no_collapse_mass(const MateHit& hit); + +// Assumes hits are sorted by mate_hit_lt +void collapse_hits(const vector& hits, + vector& non_redundant); + + + +#endif diff --git a/src/jensen_shannon.cpp b/src/jensen_shannon.cpp new file mode 100644 index 0000000..e3e669f --- /dev/null +++ b/src/jensen_shannon.cpp @@ -0,0 +1,277 @@ +/* + * jensen_shannon.cpp + * cufflinks + * + * Created by Cole Trapnell on 8/30/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include + +#include "jensen_shannon.h" + +using namespace std; +using namespace boost; + + +double entropy(const ublas::vector& p) +{ + double e = 0; + for (size_t i = 0; i < p.size(); ++i) + { + double P = p[i]; + if (P != 0.0) + { + e -= (P * log(P)); + } + } + return e; +} + +double jensen_shannon_distance(std::vector >& sample_kappas) +{ + assert (sample_kappas.size() > 1); + + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + //cerr << sample_kappas[i] << endl; + double kappa_sum = accumulate(sample_kappas[i].begin(), + sample_kappas[i].end(), 0.0); + if (abs(kappa_sum - 1.0) > 1e-10) + { + //cerr << kappa_sum << " " << sample_kappas[i] << endl; + } + assert (abs(kappa_sum - 1.0) < 1e-10); + } + + size_t kappa_length = 0; + for (size_t i = 1; i < sample_kappas.size(); ++i) + { + assert (sample_kappas[i].size() == sample_kappas[i-1].size()); + kappa_length = sample_kappas[i].size(); + } + + ublas::vector avg_kappas = ublas::zero_vector(kappa_length); + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + //cout << "kappa " << i<< " "<< sample_kappas[i] << endl; + avg_kappas += sample_kappas[i]; + } + avg_kappas /= sample_kappas.size(); + //cout << avg_kappas << endl; + + double avg_entropy = 0.0; + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + avg_entropy += entropy(sample_kappas[i]); + } + avg_entropy /= sample_kappas.size(); + //cout << avg_entropy << endl; + + double entropy_avg = entropy(avg_kappas); + + double js = entropy_avg - avg_entropy; + + return sqrt(js); +} + +//void alt_jensen_shannon_gradient(vector >& sample_kappas, +// double js, +// ublas::vector& gradient) +//{ +// assert (sample_kappas.size() > 1); +// size_t kappa_length = sample_kappas.front().size(); +// for (size_t i = 1; i < sample_kappas.size(); ++i) +// { +// assert (sample_kappas[i].size() == sample_kappas[i-1].size()); +// kappa_length = sample_kappas[i].size(); +// } +// +// if (kappa_length == 0) +// return; +// +// gradient = ublas::zero_vector(sample_kappas.size() * kappa_length); +// for (size_t i = 0; i < sample_kappas.size(); ++i) +// { +// for (size_t k = 0; k < kappa_length; ++k) +// { +// gradient(i*kappa_length + k) = sample_kappas[i](k); +// } +// } +// +// //cout << "t1: " << gradient<< endl; +// +// ublas::vector p_bar = ublas::zero_vector(kappa_length); +// for (size_t i = 0; i < sample_kappas.size(); ++i) +// { +// p_bar += sample_kappas[i]; +// } +// p_bar /= sample_kappas.size(); +// +// +// //cout << "t2 " << denoms << endl; +// +// for (size_t i = 0; i < sample_kappas.size(); ++i) +// { +// for (size_t k = 0; k < kappa_length; ++k) +// { +// if (p_bar(k) == 0.0 || gradient(i*kappa_length + k) == 0.0) +// { +// gradient(i*kappa_length + k) = 0.0; +// } +// else +// { +//#ifdef DEBUG +// ublas::vector& grad_tmp = gradient; +//#endif +//// double alt_grad = 0.0; +//// double m = 2.0; +//// alt_grad = js / (2.0 * m); +//// double A = log(gradient(i*kappa_length + k)) + (1.0 / gradient(i*kappa_length + k)); +//// double B = log(p_bar[k]) + (1.0 / p_bar[k]); +//// alt_grad *= (A - B); +// +// double alt_grad = 0.0; +// +// alt_grad /= p_bar(k); +// alt_grad = log(gradient(i*kappa_length + k)); +// alt_grad /= sample_kappas.size(); // m in paper notation +// alt_grad *= (1.0/(2.0 * js)); // This is supposed to use the square root of the distance (it's not a typo) +// +// +// +// gradient(i*kappa_length + k) /= p_bar(k); +// gradient(i*kappa_length + k) = log(gradient(i*kappa_length + k)); +// gradient(i*kappa_length + k) /= sample_kappas.size(); // m in paper notation +// gradient(i*kappa_length + k) *= (1.0/(2.0 * js)); // This is supposed to use the square root of the distance (it's not a typo) +// +// double curr_grad = gradient(i*kappa_length + k); +// +// gradient(i*kappa_length + k) = alt_grad; +// //fprintf(stderr, "Curr gradient: %lg, alternate gradient %lg\n", curr_grad, alt_grad); +//#ifdef DEBUG +// if(isinf(gradient(i*kappa_length + k))) +// { +// cerr << grad_tmp << endl; +// cerr << sample_kappas[i] << endl; +// assert (false); +// } +//#endif +// +// } +// } +// } +//} + + +void jensen_shannon_gradient(vector >& sample_kappas, + double js, + ublas::vector& gradient) +{ + assert (sample_kappas.size() > 1); + size_t kappa_length = sample_kappas.front().size(); + for (size_t i = 1; i < sample_kappas.size(); ++i) + { + assert (sample_kappas[i].size() == sample_kappas[i-1].size()); + kappa_length = sample_kappas[i].size(); + } + + if (kappa_length == 0) + return; + + gradient = ublas::zero_vector(sample_kappas.size() * kappa_length); + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + for (size_t k = 0; k < kappa_length; ++k) + { + assert (!isinf(sample_kappas[i](k)) && !isnan(sample_kappas[i](k))); + gradient(i*kappa_length + k) = sample_kappas[i](k); + } + } + + //cout << "t1: " << gradient<< endl; + + ublas::vector p_bar = ublas::zero_vector(kappa_length); + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + p_bar += sample_kappas[i]; + } + p_bar /= sample_kappas.size(); + + + //cout << "t2 " << denoms << endl; + + for (size_t i = 0; i < sample_kappas.size(); ++i) + { + for (size_t k = 0; k < kappa_length; ++k) + { + if (p_bar(k) == 0.0 || gradient(i*kappa_length + k) == 0.0 || js == 0.0) + { + gradient(i*kappa_length + k) = 0.0; + } + else + { +#ifdef DEBUG + ublas::vector& grad_tmp = gradient; +#endif + double alt_grad = 0.0; + double m = 2.0; + alt_grad = js / (2.0 * m); + double A = log(gradient(i*kappa_length + k)) + (1.0 / gradient(i*kappa_length + k)); + double B = log(p_bar[k]) + (1.0 / p_bar[k]); + alt_grad *= (A - B); + + gradient(i*kappa_length + k) /= p_bar(k); + gradient(i*kappa_length + k) = log(gradient(i*kappa_length + k)); + gradient(i*kappa_length + k) /= sample_kappas.size(); // m in paper notation + gradient(i*kappa_length + k) *= (1.0/(2.0 * js)); // This is supposed to use the square root of the distance (it's not a typo) + + double curr_grad = gradient(i*kappa_length + k); + + assert (!isinf(curr_grad) && !isnan(curr_grad)); + //fprintf(stderr, "Curr gradient: %lg, alternate gradient %lg\n", curr_grad, alt_grad); +#if 0 + if(isinf(gradient(i*kappa_length + k))) + { + cerr << grad_tmp << endl; + cerr << sample_kappas[i] << endl; + assert (false); + } +#endif + + } + } + } +} + +void make_js_covariance_matrix(vector >& kappa_covariances, + ublas::matrix& js_covariance) +{ + size_t kappa_length = 0; + for (size_t i = 1; i < kappa_covariances.size(); ++i) + { + assert (kappa_covariances[i].size1() == kappa_covariances[i-1].size1()); + assert (kappa_covariances[i].size2() == kappa_covariances[i-1].size2()); + + kappa_length = kappa_covariances[i].size1(); + } + + if (kappa_length == 0) + return; + + js_covariance = ublas::zero_matrix(kappa_covariances.size() * kappa_length, + kappa_covariances.size() * kappa_length); + for (size_t i = 0; i < kappa_covariances.size(); ++i) + { + for (size_t j = 0; j < kappa_length; ++j) + { + for (size_t k = 0; k < kappa_length; ++k) + { + js_covariance(i*kappa_length + j, i*kappa_length + k) = + kappa_covariances[i](j,k); + assert (!isinf(js_covariance(i*kappa_length + j, i*kappa_length + k) && ! isnan(js_covariance(i*kappa_length + j, i*kappa_length + k)))); + } + } + } +} diff --git a/src/jensen_shannon.h b/src/jensen_shannon.h new file mode 100644 index 0000000..cb89f0a --- /dev/null +++ b/src/jensen_shannon.h @@ -0,0 +1,33 @@ +/* + * jensen_shannon.h + * cufflinks + * + * Created by Cole Trapnell on 8/30/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include +#include +#include +#include +#include + +#include + +namespace ublas = boost::numeric::ublas; + +double entropy(const ublas::vector& p); + +double jensen_shannon_distance(std::vector >& sample_kappas); + +//void alt_jensen_shannon_gradient(std::vector >& sample_kappas, +// double js, +// ublas::vector& gradient); + +void jensen_shannon_gradient(std::vector >& sample_kappas, + double js, + ublas::vector& gradient); + +void make_js_covariance_matrix(std::vector >& kappa_covariances, + ublas::matrix& js_covariance); diff --git a/src/lemon/bfs.h b/src/lemon/bfs.h new file mode 100644 index 0000000..8fda28b --- /dev/null +++ b/src/lemon/bfs.h @@ -0,0 +1,1597 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BFS_H +#define LEMON_BFS_H + +///\ingroup search +///\file +///\brief Bfs algorithm. + +#include +#include +#include +#include +#include +#include + +namespace lemon { + + + + ///Default traits class of Bfs class. + + ///Default traits class of Bfs class. + ///\param GR Graph type. + template + struct BfsDefaultTraits + { + ///The graph type the algorithm runs on. + typedef GR Graph; + ///\brief The type of the map that stores the last + ///edges of the shortest paths. + /// + ///The type of the map that stores the last + ///edges of the shortest paths. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef typename Graph::template NodeMap PredMap; + ///Instantiates a PredMap. + + ///This function instantiates a \ref PredMap. + ///\param G is the graph, to which we would like to define the PredMap. + ///\todo The graph alone may be insufficient to initialize + static PredMap *createPredMap(const GR &G) + { + return new PredMap(G); + } + ///The type of the map that indicates which nodes are processed. + + ///The type of the map that indicates which nodes are processed. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef NullMap ProcessedMap; + ///Instantiates a ProcessedMap. + + ///This function instantiates a \ref ProcessedMap. + ///\param g is the graph, to which + ///we would like to define the \ref ProcessedMap +#ifdef DOXYGEN + static ProcessedMap *createProcessedMap(const GR &g) +#else + static ProcessedMap *createProcessedMap(const GR &) +#endif + { + return new ProcessedMap(); + } + ///The type of the map that indicates which nodes are reached. + + ///The type of the map that indicates which nodes are reached. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + ///Instantiates a ReachedMap. + + ///This function instantiates a \ref ReachedMap. + ///\param G is the graph, to which + ///we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const GR &G) + { + return new ReachedMap(G); + } + ///The type of the map that stores the dists of the nodes. + + ///The type of the map that stores the dists of the nodes. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef typename Graph::template NodeMap DistMap; + ///Instantiates a DistMap. + + ///This function instantiates a \ref DistMap. + ///\param G is the graph, to which we would like to define the \ref DistMap + static DistMap *createDistMap(const GR &G) + { + return new DistMap(G); + } + }; + + ///%BFS algorithm class. + + ///\ingroup search + ///This class provides an efficient implementation of the %BFS algorithm. + /// + ///\param GR The graph type the algorithm runs on. The default value is + ///\ref ListGraph. The value of GR is not used directly by Bfs, it + ///is only passed to \ref BfsDefaultTraits. + ///\param TR Traits class to set various data types used by the algorithm. + ///The default traits class is + ///\ref BfsDefaultTraits "BfsDefaultTraits". + ///See \ref BfsDefaultTraits for the documentation of + ///a Bfs traits class. + /// + ///\author Alpar Juttner + +#ifdef DOXYGEN + template +#else + template > +#endif + class Bfs { + public: + /** + * \brief \ref Exception for uninitialized parameters. + * + * This error represents problems in the initialization + * of the parameters of the algorithms. + */ + class UninitializedParameter : public lemon::UninitializedParameter { + public: + virtual const char* what() const throw() { + return "lemon::Bfs::UninitializedParameter"; + } + }; + + typedef TR Traits; + ///The type of the underlying graph. + typedef typename TR::Graph Graph; + + ///\brief The type of the map that stores the last + ///edges of the shortest paths. + typedef typename TR::PredMap PredMap; + ///The type of the map indicating which nodes are reached. + typedef typename TR::ReachedMap ReachedMap; + ///The type of the map indicating which nodes are processed. + typedef typename TR::ProcessedMap ProcessedMap; + ///The type of the map that stores the dists of the nodes. + typedef typename TR::DistMap DistMap; + private: + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + typedef typename Graph::OutEdgeIt OutEdgeIt; + + /// Pointer to the underlying graph. + const Graph *G; + ///Pointer to the map of predecessors edges. + PredMap *_pred; + ///Indicates if \ref _pred is locally allocated (\c true) or not. + bool local_pred; + ///Pointer to the map of distances. + DistMap *_dist; + ///Indicates if \ref _dist is locally allocated (\c true) or not. + bool local_dist; + ///Pointer to the map of reached status of the nodes. + ReachedMap *_reached; + ///Indicates if \ref _reached is locally allocated (\c true) or not. + bool local_reached; + ///Pointer to the map of processed status of the nodes. + ProcessedMap *_processed; + ///Indicates if \ref _processed is locally allocated (\c true) or not. + bool local_processed; + + std::vector _queue; + int _queue_head,_queue_tail,_queue_next_dist; + int _curr_dist; + + ///Creates the maps if necessary. + + ///\todo Better memory allocation (instead of new). + void create_maps() + { + if(!_pred) { + local_pred = true; + _pred = Traits::createPredMap(*G); + } + if(!_dist) { + local_dist = true; + _dist = Traits::createDistMap(*G); + } + if(!_reached) { + local_reached = true; + _reached = Traits::createReachedMap(*G); + } + if(!_processed) { + local_processed = true; + _processed = Traits::createProcessedMap(*G); + } + } + + protected: + + Bfs() {} + + public: + + typedef Bfs Create; + + ///\name Named template parameters + + ///@{ + + template + struct DefPredMapTraits : public Traits { + typedef T PredMap; + static PredMap *createPredMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///PredMap type + /// + ///\ref named-templ-param "Named parameter" for setting PredMap type + /// + template + struct DefPredMap : public Bfs< Graph, DefPredMapTraits > { + typedef Bfs< Graph, DefPredMapTraits > Create; + }; + + template + struct DefDistMapTraits : public Traits { + typedef T DistMap; + static DistMap *createDistMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///DistMap type + /// + ///\ref named-templ-param "Named parameter" for setting DistMap type + /// + template + struct DefDistMap : public Bfs< Graph, DefDistMapTraits > { + typedef Bfs< Graph, DefDistMapTraits > Create; + }; + + template + struct DefReachedMapTraits : public Traits { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///ReachedMap type + /// + ///\ref named-templ-param "Named parameter" for setting ReachedMap type + /// + template + struct DefReachedMap : public Bfs< Graph, DefReachedMapTraits > { + typedef Bfs< Graph, DefReachedMapTraits > Create; + }; + + template + struct DefProcessedMapTraits : public Traits { + typedef T ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///ProcessedMap type + /// + ///\ref named-templ-param "Named parameter" for setting ProcessedMap type + /// + template + struct DefProcessedMap : public Bfs< Graph, DefProcessedMapTraits > { + typedef Bfs< Graph, DefProcessedMapTraits > Create; + }; + + struct DefGraphProcessedMapTraits : public Traits { + typedef typename Graph::template NodeMap ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &G) + { + return new ProcessedMap(G); + } + }; + ///\brief \ref named-templ-param "Named parameter" + ///for setting the ProcessedMap type to be Graph::NodeMap. + /// + ///\ref named-templ-param "Named parameter" + ///for setting the ProcessedMap type to be Graph::NodeMap. + ///If you don't set it explicitly, it will be automatically allocated. + template + struct DefProcessedMapToBeDefaultMap : + public Bfs< Graph, DefGraphProcessedMapTraits> { + typedef Bfs< Graph, DefGraphProcessedMapTraits> Create; + }; + + ///@} + + public: + + ///Constructor. + + ///\param _G the graph the algorithm will run on. + /// + Bfs(const Graph& _G) : + G(&_G), + _pred(NULL), local_pred(false), + _dist(NULL), local_dist(false), + _reached(NULL), local_reached(false), + _processed(NULL), local_processed(false) + { } + + ///Destructor. + ~Bfs() + { + if(local_pred) delete _pred; + if(local_dist) delete _dist; + if(local_reached) delete _reached; + if(local_processed) delete _processed; + } + + ///Sets the map storing the predecessor edges. + + ///Sets the map storing the predecessor edges. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destructor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Bfs &predMap(PredMap &m) + { + if(local_pred) { + delete _pred; + local_pred=false; + } + _pred = &m; + return *this; + } + + ///Sets the map indicating the reached nodes. + + ///Sets the map indicating the reached nodes. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destructor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Bfs &reachedMap(ReachedMap &m) + { + if(local_reached) { + delete _reached; + local_reached=false; + } + _reached = &m; + return *this; + } + + ///Sets the map indicating the processed nodes. + + ///Sets the map indicating the processed nodes. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destructor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Bfs &processedMap(ProcessedMap &m) + { + if(local_processed) { + delete _processed; + local_processed=false; + } + _processed = &m; + return *this; + } + + ///Sets the map storing the distances calculated by the algorithm. + + ///Sets the map storing the distances calculated by the algorithm. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destructor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Bfs &distMap(DistMap &m) + { + if(local_dist) { + delete _dist; + local_dist=false; + } + _dist = &m; + return *this; + } + + public: + ///\name Execution control + ///The simplest way to execute the algorithm is to use + ///one of the member functions called \c run(...). + ///\n + ///If you need more control on the execution, + ///first you must call \ref init(), then you can add several source nodes + ///with \ref addSource(). + ///Finally \ref start() will perform the actual path + ///computation. + + ///@{ + + ///\brief Initializes the internal data structures. + /// + ///Initializes the internal data structures. + /// + void init() + { + create_maps(); + _queue.resize(countNodes(*G)); + _queue_head=_queue_tail=0; + _curr_dist=1; + for ( NodeIt u(*G) ; u!=INVALID ; ++u ) { + _pred->set(u,INVALID); + _reached->set(u,false); + _processed->set(u,false); + } + } + + ///Adds a new source node. + + ///Adds a new source node to the set of nodes to be processed. + /// + void addSource(Node s) + { + if(!(*_reached)[s]) + { + _reached->set(s,true); + _pred->set(s,INVALID); + _dist->set(s,0); + _queue[_queue_head++]=s; + _queue_next_dist=_queue_head; + } + } + + ///Processes the next node. + + ///Processes the next node. + /// + ///\return The processed node. + /// + ///\warning The queue must not be empty! + Node processNextNode() + { + if(_queue_tail==_queue_next_dist) { + _curr_dist++; + _queue_next_dist=_queue_head; + } + Node n=_queue[_queue_tail++]; + _processed->set(n,true); + Node m; + for(OutEdgeIt e(*G,n);e!=INVALID;++e) + if(!(*_reached)[m=G->target(e)]) { + _queue[_queue_head++]=m; + _reached->set(m,true); + _pred->set(m,e); + _dist->set(m,_curr_dist); + } + return n; + } + + ///Processes the next node. + + ///Processes the next node. And checks that the given target node + ///is reached. If the target node is reachable from the processed + ///node then the reached parameter will be set true. The reached + ///parameter should be initially false. + /// + ///\param target The target node. + ///\retval reach Indicates that the target node is reached. + ///\return The processed node. + /// + ///\warning The queue must not be empty! + Node processNextNode(Node target, bool& reach) + { + if(_queue_tail==_queue_next_dist) { + _curr_dist++; + _queue_next_dist=_queue_head; + } + Node n=_queue[_queue_tail++]; + _processed->set(n,true); + Node m; + for(OutEdgeIt e(*G,n);e!=INVALID;++e) + if(!(*_reached)[m=G->target(e)]) { + _queue[_queue_head++]=m; + _reached->set(m,true); + _pred->set(m,e); + _dist->set(m,_curr_dist); + reach = reach || (target == m); + } + return n; + } + + ///Processes the next node. + + ///Processes the next node. And checks that at least one of + ///reached node has true value in the \c nm node map. If one node + ///with true value is reachable from the processed node then the + ///rnode parameter will be set to the first of such nodes. + /// + ///\param nm The node map of possible targets. + ///\retval rnode The reached target node. + ///\return The processed node. + /// + ///\warning The queue must not be empty! + template + Node processNextNode(const NM& nm, Node& rnode) + { + if(_queue_tail==_queue_next_dist) { + _curr_dist++; + _queue_next_dist=_queue_head; + } + Node n=_queue[_queue_tail++]; + _processed->set(n,true); + Node m; + for(OutEdgeIt e(*G,n);e!=INVALID;++e) + if(!(*_reached)[m=G->target(e)]) { + _queue[_queue_head++]=m; + _reached->set(m,true); + _pred->set(m,e); + _dist->set(m,_curr_dist); + if (nm[m] && rnode == INVALID) rnode = m; + } + return n; + } + + ///Next node to be processed. + + ///Next node to be processed. + /// + ///\return The next node to be processed or INVALID if the queue is + /// empty. + Node nextNode() + { + return _queue_tail<_queue_head?_queue[_queue_tail]:INVALID; + } + + ///\brief Returns \c false if there are nodes + ///to be processed in the queue + /// + ///Returns \c false if there are nodes + ///to be processed in the queue + bool emptyQueue() { return _queue_tail==_queue_head; } + ///Returns the number of the nodes to be processed. + + ///Returns the number of the nodes to be processed in the queue. + int queueSize() { return _queue_head-_queue_tail; } + + ///Executes the algorithm. + + ///Executes the algorithm. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///This method runs the %BFS algorithm from the root node(s) + ///in order to + ///compute the + ///shortest path to each node. The algorithm computes + ///- The shortest path tree. + ///- The distance of each node from the root(s). + void start() + { + while ( !emptyQueue() ) processNextNode(); + } + + ///Executes the algorithm until \c dest is reached. + + ///Executes the algorithm until \c dest is reached. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///This method runs the %BFS algorithm from the root node(s) + ///in order to compute the shortest path to \c dest. + ///The algorithm computes + ///- The shortest path to \c dest. + ///- The distance of \c dest from the root(s). + void start(Node dest) + { + bool reach = false; + while ( !emptyQueue() && !reach ) processNextNode(dest, reach); + } + + ///Executes the algorithm until a condition is met. + + ///Executes the algorithm until a condition is met. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///\param nm must be a bool (or convertible) node map. The + ///algorithm will stop when it reaches a node \c v with + /// nm[v] true. + /// + ///\return The reached node \c v with nm[v] true or + ///\c INVALID if no such node was found. + template + Node start(const NM &nm) + { + Node rnode = INVALID; + while ( !emptyQueue() && rnode == INVALID ) { + processNextNode(nm, rnode); + } + return rnode; + } + + ///Runs %BFS algorithm from node \c s. + + ///This method runs the %BFS algorithm from a root node \c s + ///in order to + ///compute the + ///shortest path to each node. The algorithm computes + ///- The shortest path tree. + ///- The distance of each node from the root. + /// + ///\note b.run(s) is just a shortcut of the following code. + ///\code + /// b.init(); + /// b.addSource(s); + /// b.start(); + ///\endcode + void run(Node s) { + init(); + addSource(s); + start(); + } + + ///Finds the shortest path between \c s and \c t. + + ///Finds the shortest path between \c s and \c t. + /// + ///\return The length of the shortest s---t path if there exists one, + ///0 otherwise. + ///\note Apart from the return value, b.run(s) is + ///just a shortcut of the following code. + ///\code + /// b.init(); + /// b.addSource(s); + /// b.start(t); + ///\endcode + int run(Node s,Node t) { + init(); + addSource(s); + start(t); + return reached(t) ? _curr_dist : 0; + } + + ///@} + + ///\name Query Functions + ///The result of the %BFS algorithm can be obtained using these + ///functions.\n + ///Before the use of these functions, + ///either run() or start() must be calleb. + + ///@{ + + typedef PredMapPath Path; + + ///Gives back the shortest path. + + ///Gives back the shortest path. + ///\pre The \c t should be reachable from the source. + Path path(Node t) + { + return Path(*G, *_pred, t); + } + + ///The distance of a node from the root(s). + + ///Returns the distance of a node from the root(s). + ///\pre \ref run() must be called before using this function. + ///\warning If node \c v in unreachable from the root(s) the return value + ///of this function is undefined. + int dist(Node v) const { return (*_dist)[v]; } + + ///Returns the 'previous edge' of the shortest path tree. + + ///For a node \c v it returns the 'previous edge' + ///of the shortest path tree, + ///i.e. it returns the last edge of a shortest path from the root(s) to \c + ///v. It is \ref INVALID + ///if \c v is unreachable from the root(s) or \c v is a root. The + ///shortest path tree used here is equal to the shortest path tree used in + ///\ref predNode(). + ///\pre Either \ref run() or \ref start() must be called before using + ///this function. + Edge predEdge(Node v) const { return (*_pred)[v];} + + ///Returns the 'previous node' of the shortest path tree. + + ///For a node \c v it returns the 'previous node' + ///of the shortest path tree, + ///i.e. it returns the last but one node from a shortest path from the + ///root(a) to \c /v. + ///It is INVALID if \c v is unreachable from the root(s) or + ///if \c v itself a root. + ///The shortest path tree used here is equal to the shortest path + ///tree used in \ref predEdge(). + ///\pre Either \ref run() or \ref start() must be called before + ///using this function. + Node predNode(Node v) const { return (*_pred)[v]==INVALID ? INVALID: + G->source((*_pred)[v]); } + + ///Returns a reference to the NodeMap of distances. + + ///Returns a reference to the NodeMap of distances. + ///\pre Either \ref run() or \ref init() must + ///be called before using this function. + const DistMap &distMap() const { return *_dist;} + + ///Returns a reference to the shortest path tree map. + + ///Returns a reference to the NodeMap of the edges of the + ///shortest path tree. + ///\pre Either \ref run() or \ref init() + ///must be called before using this function. + const PredMap &predMap() const { return *_pred;} + + ///Checks if a node is reachable from the root. + + ///Returns \c true if \c v is reachable from the root. + ///\warning The source nodes are indicated as unreached. + ///\pre Either \ref run() or \ref start() + ///must be called before using this function. + /// + bool reached(Node v) { return (*_reached)[v]; } + + ///@} + }; + + ///Default traits class of Bfs function. + + ///Default traits class of Bfs function. + ///\param GR Graph type. + template + struct BfsWizardDefaultTraits + { + ///The graph type the algorithm runs on. + typedef GR Graph; + ///\brief The type of the map that stores the last + ///edges of the shortest paths. + /// + ///The type of the map that stores the last + ///edges of the shortest paths. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef NullMap PredMap; + ///Instantiates a PredMap. + + ///This function instantiates a \ref PredMap. + ///\param g is the graph, to which we would like to define the PredMap. + ///\todo The graph alone may be insufficient to initialize +#ifdef DOXYGEN + static PredMap *createPredMap(const GR &g) +#else + static PredMap *createPredMap(const GR &) +#endif + { + return new PredMap(); + } + + ///The type of the map that indicates which nodes are processed. + + ///The type of the map that indicates which nodes are processed. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef NullMap ProcessedMap; + ///Instantiates a ProcessedMap. + + ///This function instantiates a \ref ProcessedMap. + ///\param g is the graph, to which + ///we would like to define the \ref ProcessedMap +#ifdef DOXYGEN + static ProcessedMap *createProcessedMap(const GR &g) +#else + static ProcessedMap *createProcessedMap(const GR &) +#endif + { + return new ProcessedMap(); + } + ///The type of the map that indicates which nodes are reached. + + ///The type of the map that indicates which nodes are reached. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + ///Instantiates a ReachedMap. + + ///This function instantiates a \ref ReachedMap. + ///\param G is the graph, to which + ///we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const GR &G) + { + return new ReachedMap(G); + } + ///The type of the map that stores the dists of the nodes. + + ///The type of the map that stores the dists of the nodes. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef NullMap DistMap; + ///Instantiates a DistMap. + + ///This function instantiates a \ref DistMap. + ///\param g is the graph, to which we would like to define the \ref DistMap +#ifdef DOXYGEN + static DistMap *createDistMap(const GR &g) +#else + static DistMap *createDistMap(const GR &) +#endif + { + return new DistMap(); + } + }; + + /// Default traits used by \ref BfsWizard + + /// To make it easier to use Bfs algorithm + ///we have created a wizard class. + /// This \ref BfsWizard class needs default traits, + ///as well as the \ref Bfs class. + /// The \ref BfsWizardBase is a class to be the default traits of the + /// \ref BfsWizard class. + template + class BfsWizardBase : public BfsWizardDefaultTraits + { + + typedef BfsWizardDefaultTraits Base; + protected: + /// Type of the nodes in the graph. + typedef typename Base::Graph::Node Node; + + /// Pointer to the underlying graph. + void *_g; + ///Pointer to the map of reached nodes. + void *_reached; + ///Pointer to the map of processed nodes. + void *_processed; + ///Pointer to the map of predecessors edges. + void *_pred; + ///Pointer to the map of distances. + void *_dist; + ///Pointer to the source node. + Node _source; + + public: + /// Constructor. + + /// This constructor does not require parameters, therefore it initiates + /// all of the attributes to default values (0, INVALID). + BfsWizardBase() : _g(0), _reached(0), _processed(0), _pred(0), + _dist(0), _source(INVALID) {} + + /// Constructor. + + /// This constructor requires some parameters, + /// listed in the parameters list. + /// Others are initiated to 0. + /// \param g is the initial value of \ref _g + /// \param s is the initial value of \ref _source + BfsWizardBase(const GR &g, Node s=INVALID) : + _g(reinterpret_cast(const_cast(&g))), + _reached(0), _processed(0), _pred(0), _dist(0), _source(s) {} + + }; + + /// A class to make the usage of Bfs algorithm easier + + /// This class is created to make it easier to use Bfs algorithm. + /// It uses the functions and features of the plain \ref Bfs, + /// but it is much simpler to use it. + /// + /// Simplicity means that the way to change the types defined + /// in the traits class is based on functions that returns the new class + /// and not on templatable built-in classes. + /// When using the plain \ref Bfs + /// the new class with the modified type comes from + /// the original class by using the :: + /// operator. In the case of \ref BfsWizard only + /// a function have to be called and it will + /// return the needed class. + /// + /// It does not have own \ref run method. When its \ref run method is called + /// it initiates a plain \ref Bfs class, and calls the \ref Bfs::run + /// method of it. + template + class BfsWizard : public TR + { + typedef TR Base; + + ///The type of the underlying graph. + typedef typename TR::Graph Graph; + //\e + typedef typename Graph::Node Node; + //\e + typedef typename Graph::NodeIt NodeIt; + //\e + typedef typename Graph::Edge Edge; + //\e + typedef typename Graph::OutEdgeIt OutEdgeIt; + + ///\brief The type of the map that stores + ///the reached nodes + typedef typename TR::ReachedMap ReachedMap; + ///\brief The type of the map that stores + ///the processed nodes + typedef typename TR::ProcessedMap ProcessedMap; + ///\brief The type of the map that stores the last + ///edges of the shortest paths. + typedef typename TR::PredMap PredMap; + ///The type of the map that stores the dists of the nodes. + typedef typename TR::DistMap DistMap; + + public: + /// Constructor. + BfsWizard() : TR() {} + + /// Constructor that requires parameters. + + /// Constructor that requires parameters. + /// These parameters will be the default values for the traits class. + BfsWizard(const Graph &g, Node s=INVALID) : + TR(g,s) {} + + ///Copy constructor + BfsWizard(const TR &b) : TR(b) {} + + ~BfsWizard() {} + + ///Runs Bfs algorithm from a given node. + + ///Runs Bfs algorithm from a given node. + ///The node can be given by the \ref source function. + void run() + { + if(Base::_source==INVALID) throw UninitializedParameter(); + Bfs alg(*reinterpret_cast(Base::_g)); + if(Base::_reached) + alg.reachedMap(*reinterpret_cast(Base::_reached)); + if(Base::_processed) + alg.processedMap(*reinterpret_cast(Base::_processed)); + if(Base::_pred) + alg.predMap(*reinterpret_cast(Base::_pred)); + if(Base::_dist) + alg.distMap(*reinterpret_cast(Base::_dist)); + alg.run(Base::_source); + } + + ///Runs Bfs algorithm from the given node. + + ///Runs Bfs algorithm from the given node. + ///\param s is the given source. + void run(Node s) + { + Base::_source=s; + run(); + } + + template + struct DefPredMapBase : public Base { + typedef T PredMap; + static PredMap *createPredMap(const Graph &) { return 0; }; + DefPredMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting PredMap + /// + /// \ref named-templ-param "Named parameter" + ///function for setting PredMap + /// + template + BfsWizard > predMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return BfsWizard >(*this); + } + + + template + struct DefReachedMapBase : public Base { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &) { return 0; }; + DefReachedMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting ReachedMap + /// + /// \ref named-templ-param "Named parameter" + ///function for setting ReachedMap + /// + template + BfsWizard > reachedMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return BfsWizard >(*this); + } + + + template + struct DefProcessedMapBase : public Base { + typedef T ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &) { return 0; }; + DefProcessedMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting ProcessedMap + /// + /// \ref named-templ-param "Named parameter" + ///function for setting ProcessedMap + /// + template + BfsWizard > processedMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return BfsWizard >(*this); + } + + + template + struct DefDistMapBase : public Base { + typedef T DistMap; + static DistMap *createDistMap(const Graph &) { return 0; }; + DefDistMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting DistMap type + /// + /// \ref named-templ-param "Named parameter" + ///function for setting DistMap type + /// + template + BfsWizard > distMap(const T &t) + { + Base::_dist=reinterpret_cast(const_cast(&t)); + return BfsWizard >(*this); + } + + /// Sets the source node, from which the Bfs algorithm runs. + + /// Sets the source node, from which the Bfs algorithm runs. + /// \param s is the source node. + BfsWizard &source(Node s) + { + Base::_source=s; + return *this; + } + + }; + + ///Function type interface for Bfs algorithm. + + /// \ingroup search + ///Function type interface for Bfs algorithm. + /// + ///This function also has several + ///\ref named-templ-func-param "named parameters", + ///they are declared as the members of class \ref BfsWizard. + ///The following + ///example shows how to use these parameters. + ///\code + /// bfs(g,source).predMap(preds).run(); + ///\endcode + ///\warning Don't forget to put the \ref BfsWizard::run() "run()" + ///to the end of the parameter list. + ///\sa BfsWizard + ///\sa Bfs + template + BfsWizard > + bfs(const GR &g,typename GR::Node s=INVALID) + { + return BfsWizard >(g,s); + } + +#ifdef DOXYGEN + /// \brief Visitor class for bfs. + /// + /// This class defines the interface of the BfsVisit events, and + /// it could be the base of a real Visitor class. + template + struct BfsVisitor { + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + /// \brief Called when the edge reach a node. + /// + /// It is called when the bfs find an edge which target is not + /// reached yet. + void discover(const Edge& edge) {} + /// \brief Called when the node reached first time. + /// + /// It is Called when the node reached first time. + void reach(const Node& node) {} + /// \brief Called when the edge examined but target of the edge + /// already discovered. + /// + /// It called when the edge examined but the target of the edge + /// already discovered. + void examine(const Edge& edge) {} + /// \brief Called for the source node of the bfs. + /// + /// It is called for the source node of the bfs. + void start(const Node& node) {} + /// \brief Called when the node processed. + /// + /// It is Called when the node processed. + void process(const Node& node) {} + }; +#else + template + struct BfsVisitor { + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + void discover(const Edge&) {} + void reach(const Node&) {} + void examine(const Edge&) {} + void start(const Node&) {} + void process(const Node&) {} + + template + struct Constraints { + void constraints() { + Edge edge; + Node node; + visitor.discover(edge); + visitor.reach(node); + visitor.examine(edge); + visitor.start(node); + visitor.process(node); + } + _Visitor& visitor; + }; + }; +#endif + + /// \brief Default traits class of BfsVisit class. + /// + /// Default traits class of BfsVisit class. + /// \param _Graph Graph type. + template + struct BfsVisitDefaultTraits { + + /// \brief The graph type the algorithm runs on. + typedef _Graph Graph; + + /// \brief The type of the map that indicates which nodes are reached. + /// + /// The type of the map that indicates which nodes are reached. + /// It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// \todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + + /// \brief Instantiates a ReachedMap. + /// + /// This function instantiates a \ref ReachedMap. + /// \param graph is the graph, to which + /// we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const Graph &graph) { + return new ReachedMap(graph); + } + + }; + + /// \ingroup search + /// + /// \brief %BFS Visit algorithm class. + /// + /// This class provides an efficient implementation of the %BFS algorithm + /// with visitor interface. + /// + /// The %BfsVisit class provides an alternative interface to the Bfs + /// class. It works with callback mechanism, the BfsVisit object calls + /// on every bfs event the \c Visitor class member functions. + /// + /// \param _Graph The graph type the algorithm runs on. The default value is + /// \ref ListGraph. The value of _Graph is not used directly by Bfs, it + /// is only passed to \ref BfsDefaultTraits. + /// \param _Visitor The Visitor object for the algorithm. The + /// \ref BfsVisitor "BfsVisitor<_Graph>" is an empty Visitor which + /// does not observe the Bfs events. If you want to observe the bfs + /// events you should implement your own Visitor class. + /// \param _Traits Traits class to set various data types used by the + /// algorithm. The default traits class is + /// \ref BfsVisitDefaultTraits "BfsVisitDefaultTraits<_Graph>". + /// See \ref BfsVisitDefaultTraits for the documentation of + /// a Bfs visit traits class. + /// + /// \author Jacint Szabo, Alpar Juttner and Balazs Dezso +#ifdef DOXYGEN + template +#else + template , + typename _Traits = BfsDefaultTraits<_Graph> > +#endif + class BfsVisit { + public: + + /// \brief \ref Exception for uninitialized parameters. + /// + /// This error represents problems in the initialization + /// of the parameters of the algorithms. + class UninitializedParameter : public lemon::UninitializedParameter { + public: + virtual const char* what() const throw() + { + return "lemon::BfsVisit::UninitializedParameter"; + } + }; + + typedef _Traits Traits; + + typedef typename Traits::Graph Graph; + + typedef _Visitor Visitor; + + ///The type of the map indicating which nodes are reached. + typedef typename Traits::ReachedMap ReachedMap; + + private: + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + typedef typename Graph::OutEdgeIt OutEdgeIt; + + /// Pointer to the underlying graph. + const Graph *_graph; + /// Pointer to the visitor object. + Visitor *_visitor; + ///Pointer to the map of reached status of the nodes. + ReachedMap *_reached; + ///Indicates if \ref _reached is locally allocated (\c true) or not. + bool local_reached; + + std::vector _list; + int _list_front, _list_back; + + /// \brief Creates the maps if necessary. + /// + /// Creates the maps if necessary. + void create_maps() { + if(!_reached) { + local_reached = true; + _reached = Traits::createReachedMap(*_graph); + } + } + + protected: + + BfsVisit() {} + + public: + + typedef BfsVisit Create; + + /// \name Named template parameters + + ///@{ + template + struct DefReachedMapTraits : public Traits { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &graph) { + throw UninitializedParameter(); + } + }; + /// \brief \ref named-templ-param "Named parameter" for setting + /// ReachedMap type + /// + /// \ref named-templ-param "Named parameter" for setting ReachedMap type + template + struct DefReachedMap : public BfsVisit< Graph, Visitor, + DefReachedMapTraits > { + typedef BfsVisit< Graph, Visitor, DefReachedMapTraits > Create; + }; + ///@} + + public: + + /// \brief Constructor. + /// + /// Constructor. + /// + /// \param graph the graph the algorithm will run on. + /// \param visitor The visitor of the algorithm. + /// + BfsVisit(const Graph& graph, Visitor& visitor) + : _graph(&graph), _visitor(&visitor), + _reached(0), local_reached(false) {} + + /// \brief Destructor. + /// + /// Destructor. + ~BfsVisit() { + if(local_reached) delete _reached; + } + + /// \brief Sets the map indicating if a node is reached. + /// + /// Sets the map indicating if a node is reached. + /// If you don't use this function before calling \ref run(), + /// it will allocate one. The destuctor deallocates this + /// automatically allocated map, of course. + /// \return (*this) + BfsVisit &reachedMap(ReachedMap &m) { + if(local_reached) { + delete _reached; + local_reached = false; + } + _reached = &m; + return *this; + } + + public: + /// \name Execution control + /// The simplest way to execute the algorithm is to use + /// one of the member functions called \c run(...). + /// \n + /// If you need more control on the execution, + /// first you must call \ref init(), then you can adda source node + /// with \ref addSource(). + /// Finally \ref start() will perform the actual path + /// computation. + + /// @{ + /// \brief Initializes the internal data structures. + /// + /// Initializes the internal data structures. + /// + void init() { + create_maps(); + _list.resize(countNodes(*_graph)); + _list_front = _list_back = -1; + for (NodeIt u(*_graph) ; u != INVALID ; ++u) { + _reached->set(u, false); + } + } + + /// \brief Adds a new source node. + /// + /// Adds a new source node to the set of nodes to be processed. + void addSource(Node s) { + if(!(*_reached)[s]) { + _reached->set(s,true); + _visitor->start(s); + _visitor->reach(s); + _list[++_list_back] = s; + } + } + + /// \brief Processes the next node. + /// + /// Processes the next node. + /// + /// \return The processed node. + /// + /// \pre The queue must not be empty! + Node processNextNode() { + Node n = _list[++_list_front]; + _visitor->process(n); + Edge e; + for (_graph->firstOut(e, n); e != INVALID; _graph->nextOut(e)) { + Node m = _graph->target(e); + if (!(*_reached)[m]) { + _visitor->discover(e); + _visitor->reach(m); + _reached->set(m, true); + _list[++_list_back] = m; + } else { + _visitor->examine(e); + } + } + return n; + } + + /// \brief Processes the next node. + /// + /// Processes the next node. And checks that the given target node + /// is reached. If the target node is reachable from the processed + /// node then the reached parameter will be set true. The reached + /// parameter should be initially false. + /// + /// \param target The target node. + /// \retval reach Indicates that the target node is reached. + /// \return The processed node. + /// + /// \warning The queue must not be empty! + Node processNextNode(Node target, bool& reach) { + Node n = _list[++_list_front]; + _visitor->process(n); + Edge e; + for (_graph->firstOut(e, n); e != INVALID; _graph->nextOut(e)) { + Node m = _graph->target(e); + if (!(*_reached)[m]) { + _visitor->discover(e); + _visitor->reach(m); + _reached->set(m, true); + _list[++_list_back] = m; + reach = reach || (target == m); + } else { + _visitor->examine(e); + } + } + return n; + } + + /// \brief Processes the next node. + /// + /// Processes the next node. And checks that at least one of + /// reached node has true value in the \c nm node map. If one node + /// with true value is reachable from the processed node then the + /// rnode parameter will be set to the first of such nodes. + /// + /// \param nm The node map of possible targets. + /// \retval rnode The reached target node. + /// \return The processed node. + /// + /// \warning The queue must not be empty! + template + Node processNextNode(const NM& nm, Node& rnode) { + Node n = _list[++_list_front]; + _visitor->process(n); + Edge e; + for (_graph->firstOut(e, n); e != INVALID; _graph->nextOut(e)) { + Node m = _graph->target(e); + if (!(*_reached)[m]) { + _visitor->discover(e); + _visitor->reach(m); + _reached->set(m, true); + _list[++_list_back] = m; + if (nm[m] && rnode == INVALID) rnode = m; + } else { + _visitor->examine(e); + } + } + return n; + } + + /// \brief Next node to be processed. + /// + /// Next node to be processed. + /// + /// \return The next node to be processed or INVALID if the stack is + /// empty. + Node nextNode() { + return _list_front != _list_back ? _list[_list_front + 1] : INVALID; + } + + /// \brief Returns \c false if there are nodes + /// to be processed in the queue + /// + /// Returns \c false if there are nodes + /// to be processed in the queue + bool emptyQueue() { return _list_front == _list_back; } + + /// \brief Returns the number of the nodes to be processed. + /// + /// Returns the number of the nodes to be processed in the queue. + int queueSize() { return _list_back - _list_front; } + + /// \brief Executes the algorithm. + /// + /// Executes the algorithm. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + void start() { + while ( !emptyQueue() ) processNextNode(); + } + + /// \brief Executes the algorithm until \c dest is reached. + /// + /// Executes the algorithm until \c dest is reached. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + void start(Node dest) { + bool reach = false; + while ( !emptyQueue() && !reach ) processNextNode(dest, reach); + } + + /// \brief Executes the algorithm until a condition is met. + /// + /// Executes the algorithm until a condition is met. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + /// + ///\param nm must be a bool (or convertible) node map. The + ///algorithm will stop when it reaches a node \c v with + /// nm[v] true. + /// + ///\return The reached node \c v with nm[v] true or + ///\c INVALID if no such node was found. + template + Node start(const NM &nm) { + Node rnode = INVALID; + while ( !emptyQueue() && rnode == INVALID ) { + processNextNode(nm, rnode); + } + return rnode; + } + + /// \brief Runs %BFSVisit algorithm from node \c s. + /// + /// This method runs the %BFS algorithm from a root node \c s. + /// \note b.run(s) is just a shortcut of the following code. + ///\code + /// b.init(); + /// b.addSource(s); + /// b.start(); + ///\endcode + void run(Node s) { + init(); + addSource(s); + start(); + } + + /// \brief Runs %BFSVisit algorithm to visit all nodes in the graph. + /// + /// This method runs the %BFS algorithm in order to + /// compute the %BFS path to each node. The algorithm computes + /// - The %BFS tree. + /// - The distance of each node from the root in the %BFS tree. + /// + ///\note b.run() is just a shortcut of the following code. + ///\code + /// b.init(); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!b.reached(it)) { + /// b.addSource(it); + /// b.start(); + /// } + /// } + ///\endcode + void run() { + init(); + for (NodeIt it(*_graph); it != INVALID; ++it) { + if (!reached(it)) { + addSource(it); + start(); + } + } + } + ///@} + + /// \name Query Functions + /// The result of the %BFS algorithm can be obtained using these + /// functions.\n + /// Before the use of these functions, + /// either run() or start() must be called. + ///@{ + + /// \brief Checks if a node is reachable from the root. + /// + /// Returns \c true if \c v is reachable from the root(s). + /// \warning The source nodes are inditated as unreachable. + /// \pre Either \ref run() or \ref start() + /// must be called before using this function. + /// + bool reached(Node v) { return (*_reached)[v]; } + ///@} + }; + +} //END OF NAMESPACE LEMON + +#endif + diff --git a/src/lemon/bin_heap.h b/src/lemon/bin_heap.h new file mode 100644 index 0000000..63e0c08 --- /dev/null +++ b/src/lemon/bin_heap.h @@ -0,0 +1,346 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BIN_HEAP_H +#define LEMON_BIN_HEAP_H + +///\ingroup auxdat +///\file +///\brief Binary Heap implementation. + +#include +#include +#include + +namespace lemon { + + ///\ingroup auxdat + /// + ///\brief A Binary Heap implementation. + /// + ///This class implements the \e binary \e heap data structure. A \e heap + ///is a data structure for storing items with specified values called \e + ///priorities in such a way that finding the item with minimum priority is + ///efficient. \c Compare specifies the ordering of the priorities. In a heap + ///one can change the priority of an item, add or erase an item, etc. + /// + ///\param _Prio Type of the priority of the items. + ///\param _ItemIntMap A read and writable Item int map, used internally + ///to handle the cross references. + ///\param _Compare A class for the ordering of the priorities. The + ///default is \c std::less<_Prio>. + /// + ///\sa FibHeap + ///\sa Dijkstra + template > + class BinHeap { + + public: + ///\e + typedef _ItemIntMap ItemIntMap; + ///\e + typedef _Prio Prio; + ///\e + typedef typename ItemIntMap::Key Item; + ///\e + typedef std::pair Pair; + ///\e + typedef _Compare Compare; + + /// \brief Type to represent the items states. + /// + /// Each Item element have a state associated to it. It may be "in heap", + /// "pre heap" or "post heap". The latter two are indifferent from the + /// heap's point of view, but may be useful to the user. + /// + /// The ItemIntMap \e should be initialized in such way that it maps + /// PRE_HEAP (-1) to any element to be put in the heap... + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + private: + std::vector data; + Compare comp; + ItemIntMap &iim; + + public: + /// \brief The constructor. + /// + /// The constructor. + /// \param _iim should be given to the constructor, since it is used + /// internally to handle the cross references. The value of the map + /// should be PRE_HEAP (-1) for each element. + explicit BinHeap(ItemIntMap &_iim) : iim(_iim) {} + + /// \brief The constructor. + /// + /// The constructor. + /// \param _iim should be given to the constructor, since it is used + /// internally to handle the cross references. The value of the map + /// should be PRE_HEAP (-1) for each element. + /// + /// \param _comp The comparator function object. + BinHeap(ItemIntMap &_iim, const Compare &_comp) + : iim(_iim), comp(_comp) {} + + + /// The number of items stored in the heap. + /// + /// \brief Returns the number of items stored in the heap. + int size() const { return data.size(); } + + /// \brief Checks if the heap stores no items. + /// + /// Returns \c true if and only if the heap stores no items. + bool empty() const { return data.empty(); } + + /// \brief Make empty this heap. + /// + /// Make empty this heap. It does not change the cross reference map. + /// If you want to reuse what is not surely empty you should first clear + /// the heap and after that you should set the cross reference map for + /// each item to \c PRE_HEAP. + void clear() { + data.clear(); + } + + private: + static int parent(int i) { return (i-1)/2; } + + static int second_child(int i) { return 2*i+2; } + bool less(const Pair &p1, const Pair &p2) const { + return comp(p1.second, p2.second); + } + + int bubble_up(int hole, Pair p) { + int par = parent(hole); + while( hole>0 && less(p,data[par]) ) { + move(data[par],hole); + hole = par; + par = parent(hole); + } + move(p, hole); + return hole; + } + + int bubble_down(int hole, Pair p, int length) { + int child = second_child(hole); + while(child < length) { + if( less(data[child-1], data[child]) ) { + --child; + } + if( !less(data[child], p) ) + goto ok; + move(data[child], hole); + hole = child; + child = second_child(hole); + } + child--; + if( child 0) { + bubble_down(0, data[n], n); + } + data.pop_back(); + } + + /// \brief Deletes \c i from the heap. + /// + /// This method deletes item \c i from the heap. + /// \param i The item to erase. + /// \pre The item should be in the heap. + void erase(const Item &i) { + int h = iim[i]; + int n = data.size()-1; + iim.set(data[h].first, POST_HEAP); + if( h < n ) { + if ( bubble_up(h, data[n]) == h) { + bubble_down(h, data[n], n); + } + } + data.pop_back(); + } + + + /// \brief Returns the priority of \c i. + /// + /// This function returns the priority of item \c i. + /// \pre \c i must be in the heap. + /// \param i The item. + Prio operator[](const Item &i) const { + int idx = iim[i]; + return data[idx].second; + } + + /// \brief \c i gets to the heap with priority \c p independently + /// if \c i was already there. + /// + /// This method calls \ref push(\c i, \c p) if \c i is not stored + /// in the heap and sets the priority of \c i to \c p otherwise. + /// \param i The item. + /// \param p The priority. + void set(const Item &i, const Prio &p) { + int idx = iim[i]; + if( idx < 0 ) { + push(i,p); + } + else if( comp(p, data[idx].second) ) { + bubble_up(idx, Pair(i,p)); + } + else { + bubble_down(idx, Pair(i,p), data.size()); + } + } + + /// \brief Decreases the priority of \c i to \c p. + /// + /// This method decreases the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at least \c + /// p relative to \c Compare. + /// \param i The item. + /// \param p The priority. + void decrease(const Item &i, const Prio &p) { + int idx = iim[i]; + bubble_up(idx, Pair(i,p)); + } + + /// \brief Increases the priority of \c i to \c p. + /// + /// This method sets the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at most \c + /// p relative to \c Compare. + /// \param i The item. + /// \param p The priority. + void increase(const Item &i, const Prio &p) { + int idx = iim[i]; + bubble_down(idx, Pair(i,p), data.size()); + } + + /// \brief Returns if \c item is in, has already been in, or has + /// never been in the heap. + /// + /// This method returns PRE_HEAP if \c item has never been in the + /// heap, IN_HEAP if it is in the heap at the moment, and POST_HEAP + /// otherwise. In the latter case it is possible that \c item will + /// get back to the heap again. + /// \param i The item. + State state(const Item &i) const { + int s = iim[i]; + if( s>=0 ) + s=0; + return State(s); + } + + /// \brief Sets the state of the \c item in the heap. + /// + /// Sets the state of the \c item in the heap. It can be used to + /// manually clear the heap when it is important to achive the + /// better time complexity. + /// \param i The item. + /// \param st The state. It should not be \c IN_HEAP. + void state(const Item& i, State st) { + switch (st) { + case POST_HEAP: + case PRE_HEAP: + if (state(i) == IN_HEAP) { + erase(i); + } + iim[i] = st; + break; + case IN_HEAP: + break; + } + } + + /// \brief Replaces an item in the heap. + /// + /// The \c i item is replaced with \c j item. The \c i item should + /// be in the heap, while the \c j should be out of the heap. The + /// \c i item will out of the heap and \c j will be in the heap + /// with the same prioriority as prevoiusly the \c i item. + void replace(const Item& i, const Item& j) { + int idx = iim[i]; + iim.set(i, iim[j]); + iim.set(j, idx); + data[idx].first = j; + } + + }; // class BinHeap + +} // namespace lemon + +#endif // LEMON_BIN_HEAP_H diff --git a/src/lemon/bipartite_matching.h b/src/lemon/bipartite_matching.h new file mode 100644 index 0000000..ce3cb4f --- /dev/null +++ b/src/lemon/bipartite_matching.h @@ -0,0 +1,1732 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BIPARTITE_MATCHING +#define LEMON_BIPARTITE_MATCHING + +#include + +#include +#include +#include + +#include + +///\ingroup matching +///\file +///\brief Maximum matching algorithms in bipartite graphs. +/// +///\note The pr_bipartite_matching.h file also contains algorithms to +///solve maximum cardinality bipartite matching problems. + +namespace lemon { + + /// \ingroup matching + /// + /// \brief Bipartite Max Cardinality Matching algorithm + /// + /// Bipartite Max Cardinality Matching algorithm. This class implements + /// the Hopcroft-Karp algorithm which has \f$ O(e\sqrt{n}) \f$ time + /// complexity. + /// + /// \note In several cases the push-relabel based algorithms have + /// better runtime performance than the augmenting path based ones. + /// + /// \see PrBipartiteMatching + template + class MaxBipartiteMatching { + protected: + + typedef BpUGraph Graph; + + typedef typename Graph::Node Node; + typedef typename Graph::ANodeIt ANodeIt; + typedef typename Graph::BNodeIt BNodeIt; + typedef typename Graph::UEdge UEdge; + typedef typename Graph::UEdgeIt UEdgeIt; + typedef typename Graph::IncEdgeIt IncEdgeIt; + + typedef typename BpUGraph::template ANodeMap ANodeMatchingMap; + typedef typename BpUGraph::template BNodeMap BNodeMatchingMap; + + + public: + + /// \brief Constructor. + /// + /// Constructor of the algorithm. + MaxBipartiteMatching(const BpUGraph& graph) + : _matching(graph), _rmatching(graph), _reached(graph), _graph(&graph) {} + + /// \name Execution control + /// The simplest way to execute the algorithm is to use + /// one of the member functions called \c run(). + /// \n + /// If you need more control on the execution, + /// first you must call \ref init() or one alternative for it. + /// Finally \ref start() will perform the matching computation or + /// with step-by-step execution you can augment the solution. + + /// @{ + + /// \brief Initalize the data structures. + /// + /// It initalizes the data structures and creates an empty matching. + void init() { + for (ANodeIt it(*_graph); it != INVALID; ++it) { + _matching.set(it, INVALID); + } + for (BNodeIt it(*_graph); it != INVALID; ++it) { + _rmatching.set(it, INVALID); + _reached.set(it, -1); + } + _size = 0; + _phase = -1; + } + + /// \brief Initalize the data structures. + /// + /// It initalizes the data structures and creates a greedy + /// matching. From this matching sometimes it is faster to get + /// the matching than from the initial empty matching. + void greedyInit() { + _size = 0; + for (BNodeIt it(*_graph); it != INVALID; ++it) { + _rmatching.set(it, INVALID); + _reached.set(it, 0); + } + for (ANodeIt it(*_graph); it != INVALID; ++it) { + _matching[it] = INVALID; + for (IncEdgeIt jt(*_graph, it); jt != INVALID; ++jt) { + if (_rmatching[_graph->bNode(jt)] == INVALID) { + _matching.set(it, jt); + _rmatching.set(_graph->bNode(jt), jt); + _reached.set(_graph->bNode(jt), -1); + ++_size; + break; + } + } + } + _phase = 0; + } + + /// \brief Initalize the data structures with an initial matching. + /// + /// It initalizes the data structures with an initial matching. + template + void matchingInit(const MatchingMap& mm) { + for (ANodeIt it(*_graph); it != INVALID; ++it) { + _matching.set(it, INVALID); + } + for (BNodeIt it(*_graph); it != INVALID; ++it) { + _rmatching.set(it, INVALID); + _reached.set(it, 0); + } + _size = 0; + for (UEdgeIt it(*_graph); it != INVALID; ++it) { + if (mm[it]) { + ++_size; + _matching.set(_graph->aNode(it), it); + _rmatching.set(_graph->bNode(it), it); + _reached.set(_graph->bNode(it), 0); + } + } + _phase = 0; + } + + /// \brief Initalize the data structures with an initial matching. + /// + /// It initalizes the data structures with an initial matching. + /// \return %True when the given map contains really a matching. + template + bool checkedMatchingInit(const MatchingMap& mm) { + for (ANodeIt it(*_graph); it != INVALID; ++it) { + _matching.set(it, INVALID); + } + for (BNodeIt it(*_graph); it != INVALID; ++it) { + _rmatching.set(it, INVALID); + _reached.set(it, 0); + } + _size = 0; + for (UEdgeIt it(*_graph); it != INVALID; ++it) { + if (mm[it]) { + ++_size; + if (_matching[_graph->aNode(it)] != INVALID) { + return false; + } + _matching.set(_graph->aNode(it), it); + if (_matching[_graph->bNode(it)] != INVALID) { + return false; + } + _matching.set(_graph->bNode(it), it); + _reached.set(_graph->bNode(it), -1); + } + } + _phase = 0; + return true; + } + + private: + + bool _find_path(Node anode, int maxlevel, + typename Graph::template BNodeMap& level) { + for (IncEdgeIt it(*_graph, anode); it != INVALID; ++it) { + Node bnode = _graph->bNode(it); + if (level[bnode] == maxlevel) { + level.set(bnode, -1); + if (maxlevel == 0) { + _matching.set(anode, it); + _rmatching.set(bnode, it); + return true; + } else { + Node nnode = _graph->aNode(_rmatching[bnode]); + if (_find_path(nnode, maxlevel - 1, level)) { + _matching.set(anode, it); + _rmatching.set(bnode, it); + return true; + } + } + } + } + return false; + } + + public: + + /// \brief An augmenting phase of the Hopcroft-Karp algorithm + /// + /// It runs an augmenting phase of the Hopcroft-Karp + /// algorithm. This phase finds maximal edge disjoint augmenting + /// paths and augments on these paths. The algorithm consists at + /// most of \f$ O(\sqrt{n}) \f$ phase and one phase is \f$ O(e) + /// \f$ long. + bool augment() { + + ++_phase; + + typename Graph::template BNodeMap _level(*_graph, -1); + //typename Graph::template ANodeMap _found(*_graph, false); + typename Graph::template ANodeMap _found(*_graph, false); + std::vector queue, aqueue; + for (BNodeIt it(*_graph); it != INVALID; ++it) { + if (_rmatching[it] == INVALID) { + queue.push_back(it); + _reached.set(it, _phase); + _level.set(it, 0); + } + } + + bool success = false; + + int level = 0; + //std::vector nqueue; + while (!success && !queue.empty()) { + //nqueue.clear(); + std::vector nqueue; + for (int i = 0; i < int(queue.size()); ++i) { + Node bnode = queue[i]; + for (IncEdgeIt jt(*_graph, bnode); jt != INVALID; ++jt) { + Node anode = _graph->aNode(jt); + if (_matching[anode] == INVALID) { + + if (!_found[anode]) { + if (_find_path(anode, level, _level)) { + ++_size; + } + _found.set(anode, true); + } + success = true; + } else { + Node nnode = _graph->bNode(_matching[anode]); + if (_reached[nnode] != _phase) { + _reached.set(nnode, _phase); + nqueue.push_back(nnode); + _level.set(nnode, level + 1); + } + } + } + } + ++level; + queue.swap(nqueue); + } + + return success; + } + private: + + void _find_path_bfs(Node anode, + typename Graph::template ANodeMap& pred) { + while (true) { + UEdge uedge = pred[anode]; + Node bnode = _graph->bNode(uedge); + + UEdge nedge = _rmatching[bnode]; + + _matching.set(anode, uedge); + _rmatching.set(bnode, uedge); + + if (nedge == INVALID) break; + anode = _graph->aNode(nedge); + } + } + + public: + + /// \brief An augmenting phase with single path augementing + /// + /// This phase finds only one augmenting paths and augments on + /// these paths. The algorithm consists at most of \f$ O(n) \f$ + /// phase and one phase is \f$ O(e) \f$ long. + bool simpleAugment() { + ++_phase; + + typename Graph::template ANodeMap _pred(*_graph); + + std::vector queue, aqueue; + for (BNodeIt it(*_graph); it != INVALID; ++it) { + if (_rmatching[it] == INVALID) { + queue.push_back(it); + _reached.set(it, _phase); + } + } + + bool success = false; + + int level = 0; + while (!success && !queue.empty()) { + std::vector nqueue; + for (int i = 0; i < int(queue.size()); ++i) { + Node bnode = queue[i]; + for (IncEdgeIt jt(*_graph, bnode); jt != INVALID; ++jt) { + Node anode = _graph->aNode(jt); + if (_matching[anode] == INVALID) { + _pred.set(anode, jt); + _find_path_bfs(anode, _pred); + ++_size; + return true; + } else { + Node nnode = _graph->bNode(_matching[anode]); + if (_reached[nnode] != _phase) { + _pred.set(anode, jt); + _reached.set(nnode, _phase); + nqueue.push_back(nnode); + } + } + } + } + ++level; + queue.swap(nqueue); + } + + return success; + } + + + + /// \brief Starts the algorithm. + /// + /// Starts the algorithm. It runs augmenting phases until the optimal + /// solution reached. + void start() { + while (augment()) {} + } + + /// \brief Runs the algorithm. + /// + /// It just initalize the algorithm and then start it. + void run() { + greedyInit(); + start(); + } + + /// @} + + /// \name Query Functions + /// The result of the %Matching algorithm can be obtained using these + /// functions.\n + /// Before the use of these functions, + /// either run() or start() must be called. + + ///@{ + + /// \brief Return true if the given uedge is in the matching. + /// + /// It returns true if the given uedge is in the matching. + bool matchingEdge(const UEdge& edge) const { + return _matching[_graph->aNode(edge)] == edge; + } + + /// \brief Returns the matching edge from the node. + /// + /// Returns the matching edge from the node. If there is not such + /// edge it gives back \c INVALID. + /// \note If the parameter node is a B-node then the running time is + /// propotional to the degree of the node. + UEdge matchingEdge(const Node& node) const { + if (_graph->aNode(node)) { + return _matching[node]; + } else { + return _rmatching[node]; + } + } + + /// \brief Set true all matching uedge in the map. + /// + /// Set true all matching uedge in the map. It does not change the + /// value mapped to the other uedges. + /// \return The number of the matching edges. + template + int quickMatching(MatchingMap& mm) const { + for (ANodeIt it(*_graph); it != INVALID; ++it) { + if (_matching[it] != INVALID) { + mm.set(_matching[it], true); + } + } + return _size; + } + + /// \brief Set true all matching uedge in the map and the others to false. + /// + /// Set true all matching uedge in the map and the others to false. + /// \return The number of the matching edges. + template + int matching(MatchingMap& mm) const { + for (UEdgeIt it(*_graph); it != INVALID; ++it) { + mm.set(it, it == _matching[_graph->aNode(it)]); + } + return _size; + } + + ///Gives back the matching in an ANodeMap. + + ///Gives back the matching in an ANodeMap. The parameter should + ///be a write ANodeMap of UEdge values. + ///\return The number of the matching edges. + template + int aMatching(MatchingMap& mm) const { + for (ANodeIt it(*_graph); it != INVALID; ++it) { + mm.set(it, _matching[it]); + } + return _size; + } + + ///Gives back the matching in a BNodeMap. + + ///Gives back the matching in a BNodeMap. The parameter should + ///be a write BNodeMap of UEdge values. + ///\return The number of the matching edges. + template + int bMatching(MatchingMap& mm) const { + for (BNodeIt it(*_graph); it != INVALID; ++it) { + mm.set(it, _rmatching[it]); + } + return _size; + } + + /// \brief Returns a minimum covering of the nodes. + /// + /// The minimum covering set problem is the dual solution of the + /// maximum bipartite matching. It provides a solution for this + /// problem what is proof of the optimality of the matching. + /// \return The size of the cover set. + template + int coverSet(CoverMap& covering) const { + + int size = 0; + for (ANodeIt it(*_graph); it != INVALID; ++it) { + bool cn = _matching[it] != INVALID && + _reached[_graph->bNode(_matching[it])] == _phase; + covering.set(it, cn); + if (cn) ++size; + } + for (BNodeIt it(*_graph); it != INVALID; ++it) { + bool cn = _reached[it] != _phase; + covering.set(it, cn); + if (cn) ++size; + } + return size; + } + + /// \brief Gives back a barrier on the A-nodes + /// + /// The barrier is s subset of the nodes on the same side of the + /// graph, which size minus its neighbours is exactly the + /// unmatched nodes on the A-side. + /// \retval barrier A WriteMap on the ANodes with bool value. + template + void aBarrier(BarrierMap& barrier) const { + + for (ANodeIt it(*_graph); it != INVALID; ++it) { + barrier.set(it, _matching[it] == INVALID || + _reached[_graph->bNode(_matching[it])] != _phase); + } + } + + /// \brief Gives back a barrier on the B-nodes + /// + /// The barrier is s subset of the nodes on the same side of the + /// graph, which size minus its neighbours is exactly the + /// unmatched nodes on the B-side. + /// \retval barrier A WriteMap on the BNodes with bool value. + template + void bBarrier(BarrierMap& barrier) const { + + for (BNodeIt it(*_graph); it != INVALID; ++it) { + barrier.set(it, _reached[it] == _phase); + } + } + + /// \brief Gives back the number of the matching edges. + /// + /// Gives back the number of the matching edges. + int matchingSize() const { + return _size; + } + + /// @} + + private: + + typename BpUGraph::template ANodeMap _matching; + typename BpUGraph::template BNodeMap _rmatching; + + typename BpUGraph::template BNodeMap _reached; + + int _phase; + const Graph *_graph; + + int _size; + + }; + + /// \ingroup matching + /// + /// \brief Maximum cardinality bipartite matching + /// + /// This function calculates the maximum cardinality matching + /// in a bipartite graph. It gives back the matching in an undirected + /// edge map. + /// + /// \param graph The bipartite graph. + /// \return The size of the matching. + template + int maxBipartiteMatching(const BpUGraph& graph) { + MaxBipartiteMatching bpmatching(graph); + bpmatching.run(); + return bpmatching.matchingSize(); + } + + /// \ingroup matching + /// + /// \brief Maximum cardinality bipartite matching + /// + /// This function calculates the maximum cardinality matching + /// in a bipartite graph. It gives back the matching in an undirected + /// edge map. + /// + /// \param graph The bipartite graph. + /// \retval matching The ANodeMap of UEdges which will be set to covered + /// matching undirected edge. + /// \return The size of the matching. + template + int maxBipartiteMatching(const BpUGraph& graph, MatchingMap& matching) { + MaxBipartiteMatching bpmatching(graph); + bpmatching.run(); + bpmatching.aMatching(matching); + return bpmatching.matchingSize(); + } + + /// \ingroup matching + /// + /// \brief Maximum cardinality bipartite matching + /// + /// This function calculates the maximum cardinality matching + /// in a bipartite graph. It gives back the matching in an undirected + /// edge map. + /// + /// \param graph The bipartite graph. + /// \retval matching The ANodeMap of UEdges which will be set to covered + /// matching undirected edge. + /// \retval barrier The BNodeMap of bools which will be set to a barrier + /// of the BNode-set. + /// \return The size of the matching. + template + int maxBipartiteMatching(const BpUGraph& graph, + MatchingMap& matching, BarrierMap& barrier) { + MaxBipartiteMatching bpmatching(graph); + bpmatching.run(); + bpmatching.aMatching(matching); + bpmatching.bBarrier(barrier); + return bpmatching.matchingSize(); + } + + /// \brief Default traits class for weighted bipartite matching algoritms. + /// + /// Default traits class for weighted bipartite matching algoritms. + /// \param _BpUGraph The bipartite undirected graph type. + /// \param _WeightMap Type of weight map. + template + struct MaxWeightedBipartiteMatchingDefaultTraits { + /// \brief The type of the weight of the undirected edges. + typedef typename _WeightMap::Value Value; + + /// The undirected bipartite graph type the algorithm runs on. + typedef _BpUGraph BpUGraph; + + /// The map of the edges weights + typedef _WeightMap WeightMap; + + /// \brief The cross reference type used by heap. + /// + /// The cross reference type used by heap. + /// Usually it is \c Graph::ANodeMap. + typedef typename BpUGraph::template ANodeMap HeapCrossRef; + + /// \brief Instantiates a HeapCrossRef. + /// + /// This function instantiates a \ref HeapCrossRef. + /// \param graph is the graph, to which we would like to define the + /// HeapCrossRef. + static HeapCrossRef *createHeapCrossRef(const BpUGraph &graph) { + return new HeapCrossRef(graph); + } + + /// \brief The heap type used by weighted matching algorithms. + /// + /// The heap type used by weighted matching algorithms. It should + /// minimize the priorities and the heap's key type is the graph's + /// anode graph's node. + /// + /// \sa BinHeap + //typedef BinHeap Heap; + typedef FibHeap Heap; + + /// \brief Instantiates a Heap. + /// + /// This function instantiates a \ref Heap. + /// \param crossref The cross reference of the heap. + static Heap *createHeap(HeapCrossRef& crossref) { + return new Heap(crossref); + } + + }; + + + /// \ingroup matching + /// + /// \brief Bipartite Max Weighted Matching algorithm + /// + /// This class implements the bipartite Max Weighted Matching + /// algorithm. It uses the successive shortest path algorithm to + /// calculate the maximum weighted matching in the bipartite + /// graph. The algorithm can be used also to calculate the maximum + /// cardinality maximum weighted matching. The time complexity + /// of the algorithm is \f$ O(ne\log(n)) \f$ with the default binary + /// heap implementation but this can be improved to + /// \f$ O(n^2\log(n)+ne) \f$ if we use fibonacci heaps. + /// + /// The algorithm also provides a potential function on the nodes + /// which a dual solution of the matching algorithm and it can be + /// used to proof the optimality of the given pimal solution. +#ifdef DOXYGEN + template +#else + template , + typename _Traits = + MaxWeightedBipartiteMatchingDefaultTraits<_BpUGraph, _WeightMap> > +#endif + class MaxWeightedBipartiteMatching { +public: + + typedef _Traits Traits; + typedef typename Traits::BpUGraph BpUGraph; + typedef typename Traits::WeightMap WeightMap; + typedef typename Traits::Value Value; + +protected: + + typedef typename Traits::HeapCrossRef HeapCrossRef; + typedef typename Traits::Heap Heap; + + + typedef typename BpUGraph::Node Node; + typedef typename BpUGraph::ANodeIt ANodeIt; + typedef typename BpUGraph::BNodeIt BNodeIt; + typedef typename BpUGraph::UEdge UEdge; + typedef typename BpUGraph::UEdgeIt UEdgeIt; + typedef typename BpUGraph::IncEdgeIt IncEdgeIt; + + typedef typename BpUGraph::template ANodeMap ANodeMatchingMap; + typedef typename BpUGraph::template BNodeMap BNodeMatchingMap; + + typedef typename BpUGraph::template ANodeMap ANodePotentialMap; + typedef typename BpUGraph::template BNodeMap BNodePotentialMap; + + +public: + + /// \brief \ref Exception for uninitialized parameters. + /// + /// This error represents problems in the initialization + /// of the parameters of the algorithms. + class UninitializedParameter : public lemon::UninitializedParameter { + public: + virtual const char* what() const throw() { + return "lemon::MaxWeightedBipartiteMatching::UninitializedParameter"; + } + }; + + ///\name Named template parameters + + ///@{ + + template + struct DefHeapTraits : public Traits { + typedef CR HeapCrossRef; + typedef H Heap; + static HeapCrossRef *createHeapCrossRef(const BpUGraph &) { + throw UninitializedParameter(); + } + static Heap *createHeap(HeapCrossRef &) { + throw UninitializedParameter(); + } + }; + + /// \brief \ref named-templ-param "Named parameter" for setting heap + /// and cross reference type + /// + /// \ref named-templ-param "Named parameter" for setting heap and cross + /// reference type + template > + struct DefHeap + : public MaxWeightedBipartiteMatching > { + typedef MaxWeightedBipartiteMatching > Create; +}; + +template +struct DefStandardHeapTraits : public Traits { + typedef CR HeapCrossRef; + typedef H Heap; + static HeapCrossRef *createHeapCrossRef(const BpUGraph &graph) { + return new HeapCrossRef(graph); + } + static Heap *createHeap(HeapCrossRef &crossref) { + return new Heap(crossref); + } +}; + +/// \brief \ref named-templ-param "Named parameter" for setting heap and +/// cross reference type with automatic allocation +/// +/// \ref named-templ-param "Named parameter" for setting heap and cross +/// reference type. It can allocate the heap and the cross reference +/// object if the cross reference's constructor waits for the graph as +/// parameter and the heap's constructor waits for the cross reference. +template > +struct DefStandardHeap +: public MaxWeightedBipartiteMatching > { +typedef MaxWeightedBipartiteMatching > +Create; +}; + +///@} + + +/// \brief Constructor. +/// +/// Constructor of the algorithm. +MaxWeightedBipartiteMatching(const BpUGraph& _graph, + const WeightMap& _weight) +: graph(&_graph), weight(&_weight), +anode_matching(_graph), bnode_matching(_graph), +anode_potential(_graph), bnode_potential(_graph), +_heap_cross_ref(0), local_heap_cross_ref(false), +_heap(0), local_heap(0) {} + +/// \brief Destructor. +/// +/// Destructor of the algorithm. +~MaxWeightedBipartiteMatching() { + destroyStructures(); +} + +/// \brief Sets the heap and the cross reference used by algorithm. +/// +/// Sets the heap and the cross reference used by algorithm. +/// If you don't use this function before calling \ref run(), +/// it will allocate one. The destuctor deallocates this +/// automatically allocated map, of course. +/// \return \c (*this) +MaxWeightedBipartiteMatching& heap(Heap& hp, HeapCrossRef &cr) { + if(local_heap_cross_ref) { + delete _heap_cross_ref; + local_heap_cross_ref = false; + } + _heap_cross_ref = &cr; + if(local_heap) { + delete _heap; + local_heap = false; + } + _heap = &hp; + return *this; +} + +/// \name Execution control +/// The simplest way to execute the algorithm is to use +/// one of the member functions called \c run(). +/// \n +/// If you need more control on the execution, +/// first you must call \ref init() or one alternative for it. +/// Finally \ref start() will perform the matching computation or +/// with step-by-step execution you can augment the solution. + +/// @{ + +/// \brief Initalize the data structures. +/// +/// It initalizes the data structures and creates an empty matching. +void init() { + initStructures(); + for (ANodeIt it(*graph); it != INVALID; ++it) { + anode_matching[it] = INVALID; + anode_potential[it] = 0; + } + for (BNodeIt it(*graph); it != INVALID; ++it) { + bnode_matching[it] = INVALID; + bnode_potential[it] = 0; + for (IncEdgeIt jt(*graph, it); jt != INVALID; ++jt) { + if ((*weight)[jt] > bnode_potential[it]) { + bnode_potential[it] = (*weight)[jt]; + } + } + } + matching_value = 0; + matching_size = 0; +} + + +/// \brief An augmenting phase of the weighted matching algorithm +/// +/// It runs an augmenting phase of the weighted matching +/// algorithm. This phase finds the best augmenting path and +/// augments only on this paths. +/// +/// The algorithm consists at most +/// of \f$ O(n) \f$ phase and one phase is \f$ O(n\log(n)+e) \f$ +/// long with Fibonacci heap or \f$ O((n+e)\log(n)) \f$ long +/// with binary heap. +/// \param decrease If the given parameter true the matching value +/// can be decreased in the augmenting phase. If we would like +/// to calculate the maximum cardinality maximum weighted matching +/// then we should let the algorithm to decrease the matching +/// value in order to increase the number of the matching edges. +bool augment(bool decrease = false) { + + typename BpUGraph::template BNodeMap bdist(*graph); + typename BpUGraph::template BNodeMap bpred(*graph, INVALID); + + Node bestNode = INVALID; + Value bestValue = 0; + + _heap->clear(); + for (ANodeIt it(*graph); it != INVALID; ++it) { + (*_heap_cross_ref)[it] = Heap::PRE_HEAP; + } + + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] == INVALID) { + _heap->push(it, 0); + } + } + + Value bdistMax = 0; + while (!_heap->empty()) { + Node anode = _heap->top(); + Value avalue = _heap->prio(); + _heap->pop(); + for (IncEdgeIt jt(*graph, anode); jt != INVALID; ++jt) { + if (jt == anode_matching[anode]) continue; + Node bnode = graph->bNode(jt); + Value bvalue = avalue - (*weight)[jt] + + anode_potential[anode] + bnode_potential[bnode]; + if (bvalue > bdistMax) { + bdistMax = bvalue; + } + if (bpred[bnode] == INVALID || bvalue < bdist[bnode]) { + bdist[bnode] = bvalue; + bpred[bnode] = jt; + } else continue; + if (bnode_matching[bnode] != INVALID) { + Node newanode = graph->aNode(bnode_matching[bnode]); + switch (_heap->state(newanode)) { + case Heap::PRE_HEAP: + _heap->push(newanode, bvalue); + break; + case Heap::IN_HEAP: + if (bvalue < (*_heap)[newanode]) { + _heap->decrease(newanode, bvalue); + } + break; + case Heap::POST_HEAP: + break; + } + } else { + if (bestNode == INVALID || + bnode_potential[bnode] - bvalue > bestValue) { + bestValue = bnode_potential[bnode] - bvalue; + bestNode = bnode; + } + } + } + } + + if (bestNode == INVALID || (!decrease && bestValue < 0)) { + return false; + } + + matching_value += bestValue; + ++matching_size; + + for (BNodeIt it(*graph); it != INVALID; ++it) { + if (bpred[it] != INVALID) { + bnode_potential[it] -= bdist[it]; + } else { + bnode_potential[it] -= bdistMax; + } + } + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] != INVALID) { + Node bnode = graph->bNode(anode_matching[it]); + if (bpred[bnode] != INVALID) { + anode_potential[it] += bdist[bnode]; + } else { + anode_potential[it] += bdistMax; + } + } + } + + while (bestNode != INVALID) { + UEdge uedge = bpred[bestNode]; + Node anode = graph->aNode(uedge); + + bnode_matching[bestNode] = uedge; + if (anode_matching[anode] != INVALID) { + bestNode = graph->bNode(anode_matching[anode]); + } else { + bestNode = INVALID; + } + anode_matching[anode] = uedge; + } + + + return true; +} + +/// \brief Starts the algorithm. +/// +/// Starts the algorithm. It runs augmenting phases until the +/// optimal solution reached. +/// +/// \param maxCardinality If the given value is true it will +/// calculate the maximum cardinality maximum matching instead of +/// the maximum matching. +void start(bool maxCardinality = false) { + while (augment(maxCardinality)) {} +} + +/// \brief Runs the algorithm. +/// +/// It just initalize the algorithm and then start it. +/// +/// \param maxCardinality If the given value is true it will +/// calculate the maximum cardinality maximum matching instead of +/// the maximum matching. +void run(bool maxCardinality = false) { + init(); + start(maxCardinality); +} + +/// @} + +/// \name Query Functions +/// The result of the %Matching algorithm can be obtained using these +/// functions.\n +/// Before the use of these functions, +/// either run() or start() must be called. + +///@{ + +/// \brief Gives back the potential in the NodeMap +/// +/// Gives back the potential in the NodeMap. The matching is optimal +/// with the current number of edges if \f$ \pi(a) + \pi(b) - w(ab) = 0 \f$ +/// for each matching edges and \f$ \pi(a) + \pi(b) - w(ab) \ge 0 \f$ +/// for each edges. +template +void potential(PotentialMap& pt) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + pt.set(it, anode_potential[it]); + } + for (BNodeIt it(*graph); it != INVALID; ++it) { + pt.set(it, bnode_potential[it]); + } +} + +/// \brief Set true all matching uedge in the map. +/// +/// Set true all matching uedge in the map. It does not change the +/// value mapped to the other uedges. +/// \return The number of the matching edges. +template +int quickMatching(MatchingMap& mm) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] != INVALID) { + mm.set(anode_matching[it], true); + } + } + return matching_size; +} + +/// \brief Set true all matching uedge in the map and the others to false. +/// +/// Set true all matching uedge in the map and the others to false. +/// \return The number of the matching edges. +template +int matching(MatchingMap& mm) const { + for (UEdgeIt it(*graph); it != INVALID; ++it) { + mm.set(it, it == anode_matching[graph->aNode(it)]); + } + return matching_size; +} + +///Gives back the matching in an ANodeMap. + +///Gives back the matching in an ANodeMap. The parameter should +///be a write ANodeMap of UEdge values. +///\return The number of the matching edges. +template +int aMatching(MatchingMap& mm) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + mm.set(it, anode_matching[it]); + } + return matching_size; +} + +///Gives back the matching in a BNodeMap. + +///Gives back the matching in a BNodeMap. The parameter should +///be a write BNodeMap of UEdge values. +///\return The number of the matching edges. +template +int bMatching(MatchingMap& mm) const { + for (BNodeIt it(*graph); it != INVALID; ++it) { + mm.set(it, bnode_matching[it]); + } + return matching_size; +} + + +/// \brief Return true if the given uedge is in the matching. +/// +/// It returns true if the given uedge is in the matching. +bool matchingEdge(const UEdge& edge) const { + return anode_matching[graph->aNode(edge)] == edge; +} + +/// \brief Returns the matching edge from the node. +/// +/// Returns the matching edge from the node. If there is not such +/// edge it gives back \c INVALID. +UEdge matchingEdge(const Node& node) const { + if (graph->aNode(node)) { + return anode_matching[node]; + } else { + return bnode_matching[node]; + } +} + +/// \brief Gives back the sum of weights of the matching edges. +/// +/// Gives back the sum of weights of the matching edges. +Value matchingValue() const { + return matching_value; +} + +/// \brief Gives back the number of the matching edges. +/// +/// Gives back the number of the matching edges. +int matchingSize() const { + return matching_size; +} + +/// @} + +private: + +void initStructures() { + if (!_heap_cross_ref) { + local_heap_cross_ref = true; + _heap_cross_ref = Traits::createHeapCrossRef(*graph); + } + if (!_heap) { + local_heap = true; + _heap = Traits::createHeap(*_heap_cross_ref); + } +} + +void destroyStructures() { + if (local_heap_cross_ref) delete _heap_cross_ref; + if (local_heap) delete _heap; +} + + +private: + +const BpUGraph *graph; +const WeightMap* weight; + +ANodeMatchingMap anode_matching; +BNodeMatchingMap bnode_matching; + +ANodePotentialMap anode_potential; +BNodePotentialMap bnode_potential; + +Value matching_value; +int matching_size; + +HeapCrossRef *_heap_cross_ref; +bool local_heap_cross_ref; + +Heap *_heap; +bool local_heap; + +}; + +/// \ingroup matching +/// +/// \brief Maximum weighted bipartite matching +/// +/// This function calculates the maximum weighted matching +/// in a bipartite graph. It gives back the matching in an undirected +/// edge map. +/// +/// \param graph The bipartite graph. +/// \param weight The undirected edge map which contains the weights. +/// \retval matching The undirected edge map which will be set to +/// the matching. +/// \return The value of the matching. +template +typename WeightMap::Value +maxWeightedBipartiteMatching(const BpUGraph& graph, const WeightMap& weight, + MatchingMap& matching) { + MaxWeightedBipartiteMatching + bpmatching(graph, weight); + bpmatching.run(); + bpmatching.matching(matching); + return bpmatching.matchingValue(); +} + +/// \ingroup matching +/// +/// \brief Maximum weighted maximum cardinality bipartite matching +/// +/// This function calculates the maximum weighted of the maximum cardinality +/// matchings of a bipartite graph. It gives back the matching in an +/// undirected edge map. +/// +/// \param graph The bipartite graph. +/// \param weight The undirected edge map which contains the weights. +/// \retval matching The undirected edge map which will be set to +/// the matching. +/// \return The value of the matching. +template +typename WeightMap::Value +maxWeightedMaxBipartiteMatching(const BpUGraph& graph, + const WeightMap& weight, + MatchingMap& matching) { + MaxWeightedBipartiteMatching + bpmatching(graph, weight); + bpmatching.run(true); + bpmatching.matching(matching); + return bpmatching.matchingValue(); +} + +/// \brief Default traits class for minimum cost bipartite matching +/// algoritms. +/// +/// Default traits class for minimum cost bipartite matching +/// algoritms. +/// +/// \param _BpUGraph The bipartite undirected graph +/// type. +/// +/// \param _CostMap Type of cost map. +template +struct MinCostMaxBipartiteMatchingDefaultTraits { + /// \brief The type of the cost of the undirected edges. + typedef typename _CostMap::Value Value; + + /// The undirected bipartite graph type the algorithm runs on. + typedef _BpUGraph BpUGraph; + + /// The map of the edges costs + typedef _CostMap CostMap; + + /// \brief The cross reference type used by heap. + /// + /// The cross reference type used by heap. + /// Usually it is \c Graph::NodeMap. + typedef typename BpUGraph::template NodeMap HeapCrossRef; + + /// \brief Instantiates a HeapCrossRef. + /// + /// This function instantiates a \ref HeapCrossRef. + /// \param graph is the graph, to which we would like to define the + /// HeapCrossRef. + static HeapCrossRef *createHeapCrossRef(const BpUGraph &graph) { + return new HeapCrossRef(graph); + } + + /// \brief The heap type used by costed matching algorithms. + /// + /// The heap type used by costed matching algorithms. It should + /// minimize the priorities and the heap's key type is the graph's + /// anode graph's node. + /// + /// \sa BinHeap + //typedef BinHeap Heap; + typedef FibHeap Heap; + + /// \brief Instantiates a Heap. + /// + /// This function instantiates a \ref Heap. + /// \param crossref The cross reference of the heap. + static Heap *createHeap(HeapCrossRef& crossref) { + return new Heap(crossref); + } + +}; + + +/// \ingroup matching +/// +/// \brief Bipartite Min Cost Matching algorithm +/// +/// This class implements the bipartite Min Cost Matching algorithm. +/// It uses the successive shortest path algorithm to calculate the +/// minimum cost maximum matching in the bipartite graph. The time +/// complexity of the algorithm is \f$ O(ne\log(n)) \f$ with the +/// default binary heap implementation but this can be improved to +/// \f$ O(n^2\log(n)+ne) \f$ if we use fibonacci heaps. +/// +/// The algorithm also provides a potential function on the nodes +/// which a dual solution of the matching algorithm and it can be +/// used to proof the optimality of the given pimal solution. +#ifdef DOXYGEN +template +#else +template , +typename _Traits = +MinCostMaxBipartiteMatchingDefaultTraits<_BpUGraph, _CostMap> > +#endif +class MinCostMaxBipartiteMatching { +public: + +typedef _Traits Traits; +typedef typename Traits::BpUGraph BpUGraph; +typedef typename Traits::CostMap CostMap; +typedef typename Traits::Value Value; + +protected: + +typedef typename Traits::HeapCrossRef HeapCrossRef; +typedef typename Traits::Heap Heap; + + +typedef typename BpUGraph::Node Node; +typedef typename BpUGraph::ANodeIt ANodeIt; +typedef typename BpUGraph::BNodeIt BNodeIt; +typedef typename BpUGraph::UEdge UEdge; +typedef typename BpUGraph::UEdgeIt UEdgeIt; +typedef typename BpUGraph::IncEdgeIt IncEdgeIt; + +typedef typename BpUGraph::template ANodeMap ANodeMatchingMap; +typedef typename BpUGraph::template BNodeMap BNodeMatchingMap; + +typedef typename BpUGraph::template ANodeMap ANodePotentialMap; +typedef typename BpUGraph::template BNodeMap BNodePotentialMap; + + +public: + +/// \brief \ref Exception for uninitialized parameters. +/// +/// This error represents problems in the initialization +/// of the parameters of the algorithms. +class UninitializedParameter : public lemon::UninitializedParameter { +public: + virtual const char* what() const throw() { + return "lemon::MinCostMaxBipartiteMatching::UninitializedParameter"; + } +}; + +///\name Named template parameters + +///@{ + +template +struct DefHeapTraits : public Traits { + typedef CR HeapCrossRef; + typedef H Heap; + static HeapCrossRef *createHeapCrossRef(const BpUGraph &) { + throw UninitializedParameter(); + } + static Heap *createHeap(HeapCrossRef &) { + throw UninitializedParameter(); + } +}; + +/// \brief \ref named-templ-param "Named parameter" for setting heap +/// and cross reference type +/// +/// \ref named-templ-param "Named parameter" for setting heap and cross +/// reference type +template > +struct DefHeap +: public MinCostMaxBipartiteMatching > { +typedef MinCostMaxBipartiteMatching > Create; +}; + +template +struct DefStandardHeapTraits : public Traits { + typedef CR HeapCrossRef; + typedef H Heap; + static HeapCrossRef *createHeapCrossRef(const BpUGraph &graph) { + return new HeapCrossRef(graph); + } + static Heap *createHeap(HeapCrossRef &crossref) { + return new Heap(crossref); + } +}; + +/// \brief \ref named-templ-param "Named parameter" for setting heap and +/// cross reference type with automatic allocation +/// +/// \ref named-templ-param "Named parameter" for setting heap and cross +/// reference type. It can allocate the heap and the cross reference +/// object if the cross reference's constructor waits for the graph as +/// parameter and the heap's constructor waits for the cross reference. +template > +struct DefStandardHeap +: public MinCostMaxBipartiteMatching > { +typedef MinCostMaxBipartiteMatching > +Create; +}; + +///@} + + +/// \brief Constructor. +/// +/// Constructor of the algorithm. +MinCostMaxBipartiteMatching(const BpUGraph& _graph, + const CostMap& _cost) +: graph(&_graph), cost(&_cost), +anode_matching(_graph), bnode_matching(_graph), +anode_potential(_graph), bnode_potential(_graph), +_heap_cross_ref(0), local_heap_cross_ref(false), +_heap(0), local_heap(0) {} + +/// \brief Destructor. +/// +/// Destructor of the algorithm. +~MinCostMaxBipartiteMatching() { + destroyStructures(); +} + +/// \brief Sets the heap and the cross reference used by algorithm. +/// +/// Sets the heap and the cross reference used by algorithm. +/// If you don't use this function before calling \ref run(), +/// it will allocate one. The destuctor deallocates this +/// automatically allocated map, of course. +/// \return \c (*this) +MinCostMaxBipartiteMatching& heap(Heap& hp, HeapCrossRef &cr) { + if(local_heap_cross_ref) { + delete _heap_cross_ref; + local_heap_cross_ref = false; + } + _heap_cross_ref = &cr; + if(local_heap) { + delete _heap; + local_heap = false; + } + _heap = &hp; + return *this; +} + +/// \name Execution control +/// The simplest way to execute the algorithm is to use +/// one of the member functions called \c run(). +/// \n +/// If you need more control on the execution, +/// first you must call \ref init() or one alternative for it. +/// Finally \ref start() will perform the matching computation or +/// with step-by-step execution you can augment the solution. + +/// @{ + +/// \brief Initalize the data structures. +/// +/// It initalizes the data structures and creates an empty matching. +void init() { + initStructures(); + for (ANodeIt it(*graph); it != INVALID; ++it) { + anode_matching[it] = INVALID; + anode_potential[it] = 0; + } + for (BNodeIt it(*graph); it != INVALID; ++it) { + bnode_matching[it] = INVALID; + bnode_potential[it] = 0; + } + matching_cost = 0; + matching_size = 0; +} + + +/// \brief An augmenting phase of the costed matching algorithm +/// +/// It runs an augmenting phase of the matching algorithm. The +/// phase finds the best augmenting path and augments only on this +/// paths. +/// +/// The algorithm consists at most +/// of \f$ O(n) \f$ phase and one phase is \f$ O(n\log(n)+e) \f$ +/// long with Fibonacci heap or \f$ O((n+e)\log(n)) \f$ long +/// with binary heap. +bool augment() { + + typename BpUGraph::template BNodeMap bdist(*graph); + typename BpUGraph::template BNodeMap bpred(*graph, INVALID); + + Node bestNode = INVALID; + Value bestValue = 0; + + _heap->clear(); + for (ANodeIt it(*graph); it != INVALID; ++it) { + (*_heap_cross_ref)[it] = Heap::PRE_HEAP; + } + + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] == INVALID) { + _heap->push(it, 0); + } + } + Value bdistMax = 0; + + while (!_heap->empty()) { + Node anode = _heap->top(); + Value avalue = _heap->prio(); + _heap->pop(); + for (IncEdgeIt jt(*graph, anode); jt != INVALID; ++jt) { + if (jt == anode_matching[anode]) continue; + Node bnode = graph->bNode(jt); + Value bvalue = avalue + (*cost)[jt] + + anode_potential[anode] - bnode_potential[bnode]; + if (bvalue > bdistMax) { + bdistMax = bvalue; + } + if (bpred[bnode] == INVALID || bvalue < bdist[bnode]) { + bdist[bnode] = bvalue; + bpred[bnode] = jt; + } else continue; + if (bnode_matching[bnode] != INVALID) { + Node newanode = graph->aNode(bnode_matching[bnode]); + switch (_heap->state(newanode)) { + case Heap::PRE_HEAP: + _heap->push(newanode, bvalue); + break; + case Heap::IN_HEAP: + if (bvalue < (*_heap)[newanode]) { + _heap->decrease(newanode, bvalue); + } + break; + case Heap::POST_HEAP: + break; + } + } else { + if (bestNode == INVALID || + bvalue + bnode_potential[bnode] < bestValue) { + bestValue = bvalue + bnode_potential[bnode]; + bestNode = bnode; + } + } + } + } + + if (bestNode == INVALID) { + return false; + } + + matching_cost += bestValue; + ++matching_size; + + for (BNodeIt it(*graph); it != INVALID; ++it) { + if (bpred[it] != INVALID) { + bnode_potential[it] += bdist[it]; + } else { + bnode_potential[it] += bdistMax; + } + } + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] != INVALID) { + Node bnode = graph->bNode(anode_matching[it]); + if (bpred[bnode] != INVALID) { + anode_potential[it] += bdist[bnode]; + } else { + anode_potential[it] += bdistMax; + } + } + } + + while (bestNode != INVALID) { + UEdge uedge = bpred[bestNode]; + Node anode = graph->aNode(uedge); + + bnode_matching[bestNode] = uedge; + if (anode_matching[anode] != INVALID) { + bestNode = graph->bNode(anode_matching[anode]); + } else { + bestNode = INVALID; + } + anode_matching[anode] = uedge; + } + + + return true; +} + +/// \brief Starts the algorithm. +/// +/// Starts the algorithm. It runs augmenting phases until the +/// optimal solution reached. +void start() { + while (augment()) {} +} + +/// \brief Runs the algorithm. +/// +/// It just initalize the algorithm and then start it. +void run() { + init(); + start(); +} + +/// @} + +/// \name Query Functions +/// The result of the %Matching algorithm can be obtained using these +/// functions.\n +/// Before the use of these functions, +/// either run() or start() must be called. + +///@{ + +/// \brief Gives back the potential in the NodeMap +/// +/// Gives back the potential in the NodeMap. The matching is optimal +/// with the current number of edges if \f$ \pi(a) + \pi(b) - w(ab) = 0 \f$ +/// for each matching edges and \f$ \pi(a) + \pi(b) - w(ab) \ge 0 \f$ +/// for each edges. +template +void potential(PotentialMap& pt) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + pt.set(it, anode_potential[it]); + } + for (BNodeIt it(*graph); it != INVALID; ++it) { + pt.set(it, bnode_potential[it]); + } +} + +/// \brief Set true all matching uedge in the map. +/// +/// Set true all matching uedge in the map. It does not change the +/// value mapped to the other uedges. +/// \return The number of the matching edges. +template +int quickMatching(MatchingMap& mm) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + if (anode_matching[it] != INVALID) { + mm.set(anode_matching[it], true); + } + } + return matching_size; +} + +/// \brief Set true all matching uedge in the map and the others to false. +/// +/// Set true all matching uedge in the map and the others to false. +/// \return The number of the matching edges. +template +int matching(MatchingMap& mm) const { + for (UEdgeIt it(*graph); it != INVALID; ++it) { + mm.set(it, it == anode_matching[graph->aNode(it)]); + } + return matching_size; +} + +/// \brief Gives back the matching in an ANodeMap. +/// +/// Gives back the matching in an ANodeMap. The parameter should +/// be a write ANodeMap of UEdge values. +/// \return The number of the matching edges. +template +int aMatching(MatchingMap& mm) const { + for (ANodeIt it(*graph); it != INVALID; ++it) { + mm.set(it, anode_matching[it]); + } + return matching_size; +} + +/// \brief Gives back the matching in a BNodeMap. +/// +/// Gives back the matching in a BNodeMap. The parameter should +/// be a write BNodeMap of UEdge values. +/// \return The number of the matching edges. +template +int bMatching(MatchingMap& mm) const { + for (BNodeIt it(*graph); it != INVALID; ++it) { + mm.set(it, bnode_matching[it]); + } + return matching_size; +} + +/// \brief Return true if the given uedge is in the matching. +/// +/// It returns true if the given uedge is in the matching. +bool matchingEdge(const UEdge& edge) const { + return anode_matching[graph->aNode(edge)] == edge; +} + +/// \brief Returns the matching edge from the node. +/// +/// Returns the matching edge from the node. If there is not such +/// edge it gives back \c INVALID. +UEdge matchingEdge(const Node& node) const { + if (graph->aNode(node)) { + return anode_matching[node]; + } else { + return bnode_matching[node]; + } +} + +/// \brief Gives back the sum of costs of the matching edges. +/// +/// Gives back the sum of costs of the matching edges. +Value matchingCost() const { + return matching_cost; +} + +/// \brief Gives back the number of the matching edges. +/// +/// Gives back the number of the matching edges. +int matchingSize() const { + return matching_size; +} + +/// @} + +private: + +void initStructures() { + if (!_heap_cross_ref) { + local_heap_cross_ref = true; + _heap_cross_ref = Traits::createHeapCrossRef(*graph); + } + if (!_heap) { + local_heap = true; + _heap = Traits::createHeap(*_heap_cross_ref); + } +} + +void destroyStructures() { + if (local_heap_cross_ref) delete _heap_cross_ref; + if (local_heap) delete _heap; +} + + +private: + +const BpUGraph *graph; +const CostMap* cost; + +ANodeMatchingMap anode_matching; +BNodeMatchingMap bnode_matching; + +ANodePotentialMap anode_potential; +BNodePotentialMap bnode_potential; + +Value matching_cost; +int matching_size; + +HeapCrossRef *_heap_cross_ref; +bool local_heap_cross_ref; + +Heap *_heap; +bool local_heap; + +}; + +/// \ingroup matching +/// +/// \brief Minimum cost maximum cardinality bipartite matching +/// +/// This function calculates the maximum cardinality matching with +/// minimum cost of a bipartite graph. It gives back the matching in +/// an undirected edge map. +/// +/// \param graph The bipartite graph. +/// \param cost The undirected edge map which contains the costs. +/// \retval matching The undirected edge map which will be set to +/// the matching. +/// \return The cost of the matching. +template +typename CostMap::Value +minCostMaxBipartiteMatching(const BpUGraph& graph, + const CostMap& cost, + MatchingMap& matching) { + MinCostMaxBipartiteMatching + bpmatching(graph, cost); + bpmatching.run(); + bpmatching.matching(matching); + return bpmatching.matchingCost(); +} + +} + +#endif diff --git a/src/lemon/bits/alteration_notifier.h b/src/lemon/bits/alteration_notifier.h new file mode 100644 index 0000000..9a164bf --- /dev/null +++ b/src/lemon/bits/alteration_notifier.h @@ -0,0 +1,485 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_ALTERATION_NOTIFIER_H +#define LEMON_BITS_ALTERATION_NOTIFIER_H + +#include +#include + +#include + +///\ingroup graphbits +///\file +///\brief Observer notifier for graph alteration observers. + +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Notifier class to notify observes about alterations in + /// a container. + /// + /// The simple graph's can be refered as two containers, one node container + /// and one edge container. But they are not standard containers they + /// does not store values directly they are just key continars for more + /// value containers which are the node and edge maps. + /// + /// The graph's node and edge sets can be changed as we add or erase + /// nodes and edges in the graph. Lemon would like to handle easily + /// that the node and edge maps should contain values for all nodes or + /// edges. If we want to check on every indicing if the map contains + /// the current indicing key that cause a drawback in the performance + /// in the library. We use another solution we notify all maps about + /// an alteration in the graph, which cause only drawback on the + /// alteration of the graph. + /// + /// This class provides an interface to the container. The \e first() and \e + /// next() member functions make possible to iterate on the keys of the + /// container. The \e id() function returns an integer id for each key. + /// The \e maxId() function gives back an upper bound of the ids. + /// + /// For the proper functonality of this class, we should notify it + /// about each alteration in the container. The alterations have four type + /// as \e add(), \e erase(), \e build() and \e clear(). The \e add() and + /// \e erase() signals that only one or few items added or erased to or + /// from the graph. If all items are erased from the graph or from an empty + /// graph a new graph is builded then it can be signaled with the + /// clear() and build() members. Important rule that if we erase items + /// from graph we should first signal the alteration and after that erase + /// them from the container, on the other way on item addition we should + /// first extend the container and just after that signal the alteration. + /// + /// The alteration can be observed with a class inherited from the + /// \e ObserverBase nested class. The signals can be handled with + /// overriding the virtual functions defined in the base class. The + /// observer base can be attached to the notifier with the + /// \e attach() member and can be detached with detach() function. The + /// alteration handlers should not call any function which signals + /// an other alteration in the same notifier and should not + /// detach any observer from the notifier. + /// + /// Alteration observers try to be exception safe. If an \e add() or + /// a \e clear() function throws an exception then the remaining + /// observeres will not be notified and the fulfilled additions will + /// be rolled back by calling the \e erase() or \e clear() + /// functions. Thence the \e erase() and \e clear() should not throw + /// exception. Actullay, it can be throw only + /// \ref AlterationObserver::ImmediateDetach ImmediateDetach + /// exception which detach the observer from the notifier. + /// + /// There are some place when the alteration observing is not completly + /// reliable. If we want to carry out the node degree in the graph + /// as in the \ref InDegMap and we use the reverseEdge that cause + /// unreliable functionality. Because the alteration observing signals + /// only erasing and adding but not the reversing it will stores bad + /// degrees. The sub graph adaptors cannot signal the alterations because + /// just a setting in the filter map can modify the graph and this cannot + /// be watched in any way. + /// + /// \param _Container The container which is observed. + /// \param _Item The item type which is obserbved. + /// + /// \author Balazs Dezso + + template + class AlterationNotifier { + public: + + typedef True Notifier; + + typedef _Container Container; + typedef _Item Item; + + /// \brief Exception which can be called from \e clear() and + /// \e erase(). + /// + /// From the \e clear() and \e erase() function only this + /// exception is allowed to throw. The exception immediatly + /// detaches the current observer from the notifier. Because the + /// \e clear() and \e erase() should not throw other exceptions + /// it can be used to invalidate the observer. + struct ImmediateDetach {}; + + /// \brief ObserverBase is the base class for the observers. + /// + /// ObserverBase is the abstract base class for the observers. + /// It will be notified about an item was inserted into or + /// erased from the graph. + /// + /// The observer interface contains some pure virtual functions + /// to override. The add() and erase() functions are + /// to notify the oberver when one item is added or + /// erased. + /// + /// The build() and clear() members are to notify the observer + /// about the container is built from an empty container or + /// is cleared to an empty container. + /// + /// \author Balazs Dezso + + class ObserverBase { + protected: + typedef AlterationNotifier Notifier; + + friend class AlterationNotifier; + + /// \brief Default constructor. + /// + /// Default constructor for ObserverBase. + /// + ObserverBase() : _notifier(0) {} + + /// \brief Constructor which attach the observer into notifier. + /// + /// Constructor which attach the observer into notifier. + ObserverBase(AlterationNotifier& nf) { + attach(nf); + } + + /// \brief Constructor which attach the obserever to the same notifier. + /// + /// Constructor which attach the obserever to the same notifier as + /// the other observer is attached to. + ObserverBase(const ObserverBase& copy) { + if (copy.attached()) { + attach(*copy.notifier()); + } + } + + /// \brief Destructor + virtual ~ObserverBase() { + if (attached()) { + detach(); + } + } + + /// \brief Attaches the observer into an AlterationNotifier. + /// + /// This member attaches the observer into an AlterationNotifier. + /// + void attach(AlterationNotifier& nf) { + nf.attach(*this); + } + + /// \brief Detaches the observer into an AlterationNotifier. + /// + /// This member detaches the observer from an AlterationNotifier. + /// + void detach() { + _notifier->detach(*this); + } + + /// \brief Gives back a pointer to the notifier which the map + /// attached into. + /// + /// This function gives back a pointer to the notifier which the map + /// attached into. + /// + Notifier* notifier() const { return const_cast(_notifier); } + + /// Gives back true when the observer is attached into a notifier. + bool attached() const { return _notifier != 0; } + + private: + + ObserverBase& operator=(const ObserverBase& copy); + + protected: + + Notifier* _notifier; + typename std::list::iterator _index; + + /// \brief The member function to notificate the observer about an + /// item is added to the container. + /// + /// The add() member function notificates the observer about an item + /// is added to the container. It have to be overrided in the + /// subclasses. + virtual void add(const Item&) = 0; + + /// \brief The member function to notificate the observer about + /// more item is added to the container. + /// + /// The add() member function notificates the observer about more item + /// is added to the container. It have to be overrided in the + /// subclasses. + virtual void add(const std::vector& items) = 0; + + /// \brief The member function to notificate the observer about an + /// item is erased from the container. + /// + /// The erase() member function notificates the observer about an + /// item is erased from the container. It have to be overrided in + /// the subclasses. + virtual void erase(const Item&) = 0; + + /// \brief The member function to notificate the observer about + /// more item is erased from the container. + /// + /// The erase() member function notificates the observer about more item + /// is erased from the container. It have to be overrided in the + /// subclasses. + virtual void erase(const std::vector& items) = 0; + + /// \brief The member function to notificate the observer about the + /// container is built. + /// + /// The build() member function notificates the observer about the + /// container is built from an empty container. It have to be + /// overrided in the subclasses. + + virtual void build() = 0; + + /// \brief The member function to notificate the observer about all + /// items are erased from the container. + /// + /// The clear() member function notificates the observer about all + /// items are erased from the container. It have to be overrided in + /// the subclasses. + virtual void clear() = 0; + + }; + + protected: + + const Container* container; + + typedef std::list Observers; + Observers _observers; + + + public: + + /// \brief Default constructor. + /// + /// The default constructor of the AlterationNotifier. + /// It creates an empty notifier. + AlterationNotifier() + : container(0) {} + + /// \brief Constructor. + /// + /// Constructor with the observed container parameter. + AlterationNotifier(const Container& _container) + : container(&_container) {} + + /// \brief Copy Constructor of the AlterationNotifier. + /// + /// Copy constructor of the AlterationNotifier. + /// It creates only an empty notifier because the copiable + /// notifier's observers have to be registered still into that notifier. + AlterationNotifier(const AlterationNotifier& _notifier) + : container(_notifier.container) {} + + /// \brief Destructor. + /// + /// Destructor of the AlterationNotifier. + /// + ~AlterationNotifier() { + typename Observers::iterator it; + for (it = _observers.begin(); it != _observers.end(); ++it) { + (*it)->_notifier = 0; + } + } + + /// \brief Sets the container. + /// + /// Sets the container. + void setContainer(const Container& _container) { + container = &_container; + } + + protected: + + AlterationNotifier& operator=(const AlterationNotifier&); + + public: + + + + /// \brief First item in the container. + /// + /// Returns the first item in the container. It is + /// for start the iteration on the container. + void first(Item& item) const { + container->first(item); + } + + /// \brief Next item in the container. + /// + /// Returns the next item in the container. It is + /// for iterate on the container. + void next(Item& item) const { + container->next(item); + } + + /// \brief Returns the id of the item. + /// + /// Returns the id of the item provided by the container. + int id(const Item& item) const { + return container->id(item); + } + + /// \brief Returns the maximum id of the container. + /// + /// Returns the maximum id of the container. + int maxId() const { + return container->maxId(Item()); + } + + protected: + + void attach(ObserverBase& observer) { + observer._index = _observers.insert(_observers.begin(), &observer); + observer._notifier = this; + } + + void detach(ObserverBase& observer) { + _observers.erase(observer._index); + observer._index = _observers.end(); + observer._notifier = 0; + } + + public: + + /// \brief Notifies all the registed observers about an item added to + /// the container. + /// + /// It notifies all the registed observers about an item added to + /// the container. + /// + void add(const Item& item) { + typename Observers::reverse_iterator it; + try { + for (it = _observers.rbegin(); it != _observers.rend(); ++it) { + (*it)->add(item); + } + } catch (...) { + typename Observers::iterator jt; + for (jt = it.base(); jt != _observers.end(); ++jt) { + (*jt)->erase(item); + } + throw; + } + } + + /// \brief Notifies all the registed observers about more item added to + /// the container. + /// + /// It notifies all the registed observers about more item added to + /// the container. + /// + void add(const std::vector& items) { + typename Observers::reverse_iterator it; + try { + for (it = _observers.rbegin(); it != _observers.rend(); ++it) { + (*it)->add(items); + } + } catch (...) { + typename Observers::iterator jt; + for (jt = it.base(); jt != _observers.end(); ++jt) { + (*jt)->erase(items); + } + throw; + } + } + + /// \brief Notifies all the registed observers about an item erased from + /// the container. + /// + /// It notifies all the registed observers about an item erased from + /// the container. + /// + void erase(const Item& item) throw() { + typename Observers::iterator it = _observers.begin(); + while (it != _observers.end()) { + try { + (*it)->erase(item); + ++it; + } catch (const ImmediateDetach&) { + it = _observers.erase(it); + (*it)->_index = _observers.end(); + (*it)->_notifier = 0; + } + } + } + + /// \brief Notifies all the registed observers about more item erased + /// from the container. + /// + /// It notifies all the registed observers about more item erased from + /// the container. + /// + void erase(const std::vector& items) { + typename Observers::iterator it = _observers.begin(); + while (it != _observers.end()) { + try { + (*it)->erase(items); + ++it; + } catch (const ImmediateDetach&) { + it = _observers.erase(it); + (*it)->_index = _observers.end(); + (*it)->_notifier = 0; + } + } + } + + /// \brief Notifies all the registed observers about the container is + /// built. + /// + /// Notifies all the registed observers about the container is built + /// from an empty container. + void build() { + typename Observers::reverse_iterator it; + try { + for (it = _observers.rbegin(); it != _observers.rend(); ++it) { + (*it)->build(); + } + } catch (...) { + typename Observers::iterator jt; + for (jt = it.base(); jt != _observers.end(); ++jt) { + (*jt)->clear(); + } + throw; + } + } + + /// \brief Notifies all the registed observers about all items are + /// erased. + /// + /// Notifies all the registed observers about all items are erased + /// from the container. + void clear() { + typename Observers::iterator it = _observers.begin(); + while (it != _observers.end()) { + try { + (*it)->clear(); + ++it; + } catch (const ImmediateDetach&) { + it = _observers.erase(it); + (*it)->_index = _observers.end(); + (*it)->_notifier = 0; + } + } + } + }; + +} + +#endif diff --git a/src/lemon/bits/array_map.h b/src/lemon/bits/array_map.h new file mode 100644 index 0000000..08ab218 --- /dev/null +++ b/src/lemon/bits/array_map.h @@ -0,0 +1,346 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_ARRAY_MAP_H +#define LEMON_BITS_ARRAY_MAP_H + +#include + +#include +#include +#include +#include + +/// \ingroup graphbits +/// \file +/// \brief Graph map based on the array storage. + +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Graph map based on the array storage. + /// + /// The ArrayMap template class is graph map structure what + /// automatically updates the map when a key is added to or erased from + /// the map. This map uses the allocators to implement + /// the container functionality. + /// + /// The template parameters are the Graph the current Item type and + /// the Value type of the map. + template + class ArrayMap + : public ItemSetTraits<_Graph, _Item>::ItemNotifier::ObserverBase { + public: + /// The graph type of the maps. + typedef _Graph Graph; + /// The item type of the map. + typedef _Item Item; + /// The reference map tag. + typedef True ReferenceMapTag; + + /// The key type of the maps. + typedef _Item Key; + /// The value type of the map. + typedef _Value Value; + + /// The const reference type of the map. + typedef const _Value& ConstReference; + /// The reference type of the map. + typedef _Value& Reference; + + /// The notifier type. + typedef typename ItemSetTraits<_Graph, _Item>::ItemNotifier Notifier; + + /// The MapBase of the Map which imlements the core regisitry function. + typedef typename Notifier::ObserverBase Parent; + + private: + typedef std::allocator Allocator; + + public: + + /// \brief Graph initialized map constructor. + /// + /// Graph initialized map constructor. + explicit ArrayMap(const Graph& graph) { + Parent::attach(graph.notifier(Item())); + allocate_memory(); + Notifier* nf = Parent::notifier(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it);; + allocator.construct(&(values[id]), Value()); + } + } + + /// \brief Constructor to use default value to initialize the map. + /// + /// It constructs a map and initialize all of the the map. + ArrayMap(const Graph& graph, const Value& value) { + Parent::attach(graph.notifier(Item())); + allocate_memory(); + Notifier* nf = Parent::notifier(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it);; + allocator.construct(&(values[id]), value); + } + } + + /// \brief Constructor to copy a map of the same map type. + /// + /// Constructor to copy a map of the same map type. + ArrayMap(const ArrayMap& copy) : Parent() { + if (copy.attached()) { + attach(*copy.notifier()); + } + capacity = copy.capacity; + if (capacity == 0) return; + values = allocator.allocate(capacity); + Notifier* nf = Parent::notifier(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it);; + allocator.construct(&(values[id]), copy.values[id]); + } + } + + /// \brief Assign operator. + /// + /// This operator assigns for each item in the map the + /// value mapped to the same item in the copied map. + /// The parameter map should be indiced with the same + /// itemset because this assign operator does not change + /// the container of the map. + ArrayMap& operator=(const ArrayMap& cmap) { + return operator=(cmap); + } + + + /// \brief Template assign operator. + /// + /// The given parameter should be conform to the ReadMap + /// concecpt and could be indiced by the current item set of + /// the NodeMap. In this case the value for each item + /// is assigned by the value of the given ReadMap. + template + ArrayMap& operator=(const CMap& cmap) { + checkConcept, CMap>(); + const typename Parent::Notifier* nf = Parent::notifier(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + set(it, cmap[it]); + } + return *this; + } + + /// \brief The destructor of the map. + /// + /// The destructor of the map. + virtual ~ArrayMap() { + if (attached()) { + clear(); + detach(); + } + } + + protected: + + using Parent::attach; + using Parent::detach; + using Parent::attached; + + public: + + /// \brief The subscript operator. + /// + /// The subscript operator. The map can be subscripted by the + /// actual keys of the graph. + Value& operator[](const Key& key) { + int id = Parent::notifier()->id(key); + return values[id]; + } + + /// \brief The const subscript operator. + /// + /// The const subscript operator. The map can be subscripted by the + /// actual keys of the graph. + const Value& operator[](const Key& key) const { + int id = Parent::notifier()->id(key); + return values[id]; + } + + /// \brief Setter function of the map. + /// + /// Setter function of the map. Equivalent with map[key] = val. + /// This is a compatibility feature with the not dereferable maps. + void set(const Key& key, const Value& val) { + (*this)[key] = val; + } + + protected: + + /// \brief Adds a new key to the map. + /// + /// It adds a new key to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const Key& key) { + Notifier* nf = Parent::notifier(); + int id = nf->id(key); + if (id >= capacity) { + int new_capacity = (capacity == 0 ? 1 : capacity); + while (new_capacity <= id) { + new_capacity <<= 1; + } + Value* new_values = allocator.allocate(new_capacity); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int jd = nf->id(it);; + if (id != jd) { + allocator.construct(&(new_values[jd]), values[jd]); + allocator.destroy(&(values[jd])); + } + } + if (capacity != 0) allocator.deallocate(values, capacity); + values = new_values; + capacity = new_capacity; + } + allocator.construct(&(values[id]), Value()); + } + + /// \brief Adds more new keys to the map. + /// + /// It adds more new keys to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const std::vector& keys) { + Notifier* nf = Parent::notifier(); + int max_id = -1; + for (int i = 0; i < int(keys.size()); ++i) { + int id = nf->id(keys[i]); + if (id > max_id) { + max_id = id; + } + } + if (max_id >= capacity) { + int new_capacity = (capacity == 0 ? 1 : capacity); + while (new_capacity <= max_id) { + new_capacity <<= 1; + } + Value* new_values = allocator.allocate(new_capacity); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it); + bool found = false; + for (int i = 0; i < int(keys.size()); ++i) { + int jd = nf->id(keys[i]); + if (id == jd) { + found = true; + break; + } + } + if (found) continue; + allocator.construct(&(new_values[id]), values[id]); + allocator.destroy(&(values[id])); + } + if (capacity != 0) allocator.deallocate(values, capacity); + values = new_values; + capacity = new_capacity; + } + for (int i = 0; i < int(keys.size()); ++i) { + int id = nf->id(keys[i]); + allocator.construct(&(values[id]), Value()); + } + } + + /// \brief Erase a key from the map. + /// + /// Erase a key from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const Key& key) { + int id = Parent::notifier()->id(key); + allocator.destroy(&(values[id])); + } + + /// \brief Erase more keys from the map. + /// + /// Erase more keys from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const std::vector& keys) { + for (int i = 0; i < int(keys.size()); ++i) { + int id = Parent::notifier()->id(keys[i]); + allocator.destroy(&(values[id])); + } + } + + /// \brief Buildes the map. + /// + /// It buildes the map. It called by the observer notifier + /// and it overrides the build() member function of the observer base. + virtual void build() { + Notifier* nf = Parent::notifier(); + allocate_memory(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it);; + allocator.construct(&(values[id]), Value()); + } + } + + /// \brief Clear the map. + /// + /// It erase all items from the map. It called by the observer notifier + /// and it overrides the clear() member function of the observer base. + virtual void clear() { + Notifier* nf = Parent::notifier(); + if (capacity != 0) { + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + int id = nf->id(it); + allocator.destroy(&(values[id])); + } + allocator.deallocate(values, capacity); + capacity = 0; + } + } + + private: + + void allocate_memory() { + int max_id = Parent::notifier()->maxId(); + if (max_id == -1) { + capacity = 0; + values = 0; + return; + } + capacity = 1; + while (capacity <= max_id) { + capacity <<= 1; + } + values = allocator.allocate(capacity); + } + + int capacity; + Value* values; + Allocator allocator; + + }; + +} + +#endif diff --git a/src/lemon/bits/base_extender.h b/src/lemon/bits/base_extender.h new file mode 100644 index 0000000..84bb242 --- /dev/null +++ b/src/lemon/bits/base_extender.h @@ -0,0 +1,495 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_BASE_EXTENDER_H +#define LEMON_BITS_BASE_EXTENDER_H + +#include +#include + +#include +#include + +#include +#include + +///\ingroup graphbits +///\file +///\brief Extenders for the graph types +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief BaseGraph to BaseUGraph extender + template + class UndirGraphExtender : public Base { + + public: + + typedef Base Parent; + typedef typename Parent::Edge UEdge; + typedef typename Parent::Node Node; + + typedef True UndirectedTag; + + class Edge : public UEdge { + friend class UndirGraphExtender; + + protected: + bool forward; + + Edge(const UEdge &ue, bool _forward) : + UEdge(ue), forward(_forward) {} + + public: + Edge() {} + + /// Invalid edge constructor + Edge(Invalid i) : UEdge(i), forward(true) {} + + bool operator==(const Edge &that) const { + return forward==that.forward && UEdge(*this)==UEdge(that); + } + bool operator!=(const Edge &that) const { + return forward!=that.forward || UEdge(*this)!=UEdge(that); + } + bool operator<(const Edge &that) const { + return forward> 1), bool(ix & 1)); + } + + UEdge uEdgeFromId(int ix) const { + return Parent::edgeFromId(ix); + } + + int id(const Node &n) const { + return Parent::id(n); + } + + int id(const UEdge &e) const { + return Parent::id(e); + } + + int id(const Edge &e) const { + return 2 * Parent::id(e) + int(e.forward); + } + + int maxNodeId() const { + return Parent::maxNodeId(); + } + + int maxEdgeId() const { + return 2 * Parent::maxEdgeId() + 1; + } + + int maxUEdgeId() const { + return Parent::maxEdgeId(); + } + + + int edgeNum() const { + return 2 * Parent::edgeNum(); + } + + int uEdgeNum() const { + return Parent::edgeNum(); + } + + Edge findEdge(Node s, Node t, Edge p = INVALID) const { + if (p == INVALID) { + UEdge edge = Parent::findEdge(s, t); + if (edge != INVALID) return direct(edge, true); + edge = Parent::findEdge(t, s); + if (edge != INVALID) return direct(edge, false); + } else if (direction(p)) { + UEdge edge = Parent::findEdge(s, t, p); + if (edge != INVALID) return direct(edge, true); + edge = Parent::findEdge(t, s); + if (edge != INVALID) return direct(edge, false); + } else { + UEdge edge = Parent::findEdge(t, s, p); + if (edge != INVALID) return direct(edge, false); + } + return INVALID; + } + + UEdge findUEdge(Node s, Node t, UEdge p = INVALID) const { + if (s != t) { + if (p == INVALID) { + UEdge edge = Parent::findEdge(s, t); + if (edge != INVALID) return edge; + edge = Parent::findEdge(t, s); + if (edge != INVALID) return edge; + } else if (Parent::s(p) == s) { + UEdge edge = Parent::findEdge(s, t, p); + if (edge != INVALID) return edge; + edge = Parent::findEdge(t, s); + if (edge != INVALID) return edge; + } else { + UEdge edge = Parent::findEdge(t, s, p); + if (edge != INVALID) return edge; + } + } else { + return Parent::findEdge(s, t, p); + } + return INVALID; + } + }; + + template + class BidirBpUGraphExtender : public Base { + public: + typedef Base Parent; + typedef BidirBpUGraphExtender Graph; + + typedef typename Parent::Node Node; + typedef typename Parent::UEdge UEdge; + + + using Parent::first; + using Parent::next; + + using Parent::id; + + class ANode : public Node { + friend class BidirBpUGraphExtender; + public: + ANode() {} + ANode(const Node& node) : Node(node) { + LEMON_ASSERT(Parent::aNode(node) || node == INVALID, + typename Parent::NodeSetError()); + } + ANode& operator=(const Node& node) { + LEMON_ASSERT(Parent::aNode(node) || node == INVALID, + typename Parent::NodeSetError()); + Node::operator=(node); + return *this; + } + ANode(Invalid) : Node(INVALID) {} + ANode& operator=(Invalid) { + Node::operator=(INVALID); + return *this; + } + }; + + void first(ANode& node) const { + Parent::firstANode(static_cast(node)); + } + void next(ANode& node) const { + Parent::nextANode(static_cast(node)); + } + + int id(const ANode& node) const { + return Parent::aNodeId(node); + } + + class BNode : public Node { + friend class BidirBpUGraphExtender; + public: + BNode() {} + BNode(const Node& node) : Node(node) { + LEMON_ASSERT(Parent::bNode(node) || node == INVALID, + typename Parent::NodeSetError()); + } + BNode& operator=(const Node& node) { + LEMON_ASSERT(Parent::bNode(node) || node == INVALID, + typename Parent::NodeSetError()); + Node::operator=(node); + return *this; + } + BNode(Invalid) : Node(INVALID) {} + BNode& operator=(Invalid) { + Node::operator=(INVALID); + return *this; + } + }; + + void first(BNode& node) const { + Parent::firstBNode(static_cast(node)); + } + void next(BNode& node) const { + Parent::nextBNode(static_cast(node)); + } + + int id(const BNode& node) const { + return Parent::aNodeId(node); + } + + Node source(const UEdge& edge) const { + return aNode(edge); + } + Node target(const UEdge& edge) const { + return bNode(edge); + } + + void firstInc(UEdge& edge, bool& dir, const Node& node) const { + if (Parent::aNode(node)) { + Parent::firstFromANode(edge, node); + dir = true; + } else { + Parent::firstFromBNode(edge, node); + dir = static_cast(edge) == INVALID; + } + } + void nextInc(UEdge& edge, bool& dir) const { + if (dir) { + Parent::nextFromANode(edge); + } else { + Parent::nextFromBNode(edge); + if (edge == INVALID) dir = true; + } + } + + class Edge : public UEdge { + friend class BidirBpUGraphExtender; + protected: + bool forward; + + Edge(const UEdge& edge, bool _forward) + : UEdge(edge), forward(_forward) {} + + public: + Edge() {} + Edge (Invalid) : UEdge(INVALID), forward(true) {} + bool operator==(const Edge& i) const { + return UEdge::operator==(i) && forward == i.forward; + } + bool operator!=(const Edge& i) const { + return UEdge::operator!=(i) || forward != i.forward; + } + bool operator<(const Edge& i) const { + return UEdge::operator<(i) || + (!(i.forward(edge)); + edge.forward = true; + } + + void next(Edge& edge) const { + if (!edge.forward) { + Parent::next(static_cast(edge)); + } + edge.forward = !edge.forward; + } + + void firstOut(Edge& edge, const Node& node) const { + if (Parent::aNode(node)) { + Parent::firstFromANode(edge, node); + edge.forward = true; + } else { + Parent::firstFromBNode(edge, node); + edge.forward = static_cast(edge) == INVALID; + } + } + void nextOut(Edge& edge) const { + if (edge.forward) { + Parent::nextFromANode(edge); + } else { + Parent::nextFromBNode(edge); + edge.forward = static_cast(edge) == INVALID; + } + } + + void firstIn(Edge& edge, const Node& node) const { + if (Parent::bNode(node)) { + Parent::firstFromBNode(edge, node); + edge.forward = true; + } else { + Parent::firstFromANode(edge, node); + edge.forward = static_cast(edge) == INVALID; + } + } + void nextIn(Edge& edge) const { + if (edge.forward) { + Parent::nextFromBNode(edge); + } else { + Parent::nextFromANode(edge); + edge.forward = static_cast(edge) == INVALID; + } + } + + Node source(const Edge& edge) const { + return edge.forward ? Parent::aNode(edge) : Parent::bNode(edge); + } + Node target(const Edge& edge) const { + return edge.forward ? Parent::bNode(edge) : Parent::aNode(edge); + } + + int id(const Edge& edge) const { + return (Parent::id(static_cast(edge)) << 1) + + (edge.forward ? 0 : 1); + } + Edge edgeFromId(int ix) const { + return Edge(Parent::fromUEdgeId(ix >> 1), (ix & 1) == 0); + } + int maxEdgeId() const { + return (Parent::maxUEdgeId() << 1) + 1; + } + + bool direction(const Edge& edge) const { + return edge.forward; + } + + Edge direct(const UEdge& edge, bool dir) const { + return Edge(edge, dir); + } + + int edgeNum() const { + return 2 * Parent::uEdgeNum(); + } + + int uEdgeNum() const { + return Parent::uEdgeNum(); + } + + + }; +} + +#endif diff --git a/src/lemon/bits/debug_map.h b/src/lemon/bits/debug_map.h new file mode 100644 index 0000000..9447822 --- /dev/null +++ b/src/lemon/bits/debug_map.h @@ -0,0 +1,382 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_DEBUG_MAP_H +#define LEMON_BITS_DEBUG_MAP_H + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +///\ingroup graphbits +/// +///\file +///\brief Vector based graph maps for debugging. +namespace lemon { + +#ifndef LEMON_STRICT_DEBUG_MAP +#define LEMON_STRICT_DEBUG_MAP false +#endif + + /// \ingroup graphbits + /// + /// \brief Graph map based on the std::vector storage. + /// + /// The DebugMap template class is graph map structure what + /// automatically updates the map when a key is added to or erased from + /// the map. This map also checks some programming failures by example + /// multiple addition of items, erasing of not existing item or + /// not erased items at the destruction of the map. It helps the + /// programmer to avoid segmentation faults and memory leaks. + /// + /// \param Notifier The AlterationNotifier that will notify this map. + /// \param Item The item type of the graph items. + /// \param Value The value type of the map. + /// + /// \author Balazs Dezso + template + class DebugMap + : public ItemSetTraits<_Graph, _Item>::ItemNotifier::ObserverBase { + private: + + /// The container type of the map. + typedef std::vector<_Value> Container; + + /// The container type of the debug flags. + typedef std::vector Flag; + + public: + + static const bool strictCheck = LEMON_STRICT_DEBUG_MAP; + + struct MapError { + public: + virtual ~MapError() {} + virtual const char* what() const throw() { + return "lemon::DebugMap::MapError"; + } + }; + + /// The graph type of the map. + typedef _Graph Graph; + /// The item type of the map. + typedef _Item Item; + /// The reference map tag. + typedef True ReferenceMapTag; + + /// The key type of the map. + typedef _Item Key; + /// The value type of the map. + typedef _Value Value; + + /// The notifier type. + typedef typename ItemSetTraits<_Graph, _Item>::ItemNotifier Notifier; + + /// The map type. + typedef DebugMap Map; + /// The base class of the map. + typedef typename Notifier::ObserverBase Parent; + + /// The reference type of the map; + typedef typename Container::reference Reference; + /// The const reference type of the map; + typedef typename Container::const_reference ConstReference; + + + /// \brief Constructor to attach the new map into the notifier. + /// + /// It constructs a map and attachs it into the notifier. + /// It adds all the items of the graph to the map. + DebugMap(const Graph& graph) { + Parent::attach(graph.notifier(Item())); + container.resize(Parent::notifier()->maxId() + 1); + flag.resize(Parent::notifier()->maxId() + 1, false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + flag[Parent::notifier()->id(it)] = true; + } + } + + /// \brief Constructor uses given value to initialize the map. + /// + /// It constructs a map uses a given value to initialize the map. + /// It adds all the items of the graph to the map. + DebugMap(const Graph& graph, const Value& value) { + Parent::attach(graph.notifier(Item())); + container.resize(Parent::notifier()->maxId() + 1, value); + flag.resize(Parent::notifier()->maxId() + 1, false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + flag[Parent::notifier()->id(it)] = true; + } + } + + /// \brief Copy constructor + /// + /// Copy constructor. + DebugMap(const DebugMap& _copy) : Parent() { + if (_copy.attached()) { + Parent::attach(*_copy.notifier()); + container = _copy.container; + } + flag.resize(Parent::notifier()->maxId() + 1, false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + flag[Parent::notifier()->id(it)] = true; + LEMON_ASSERT(_copy.flag[Parent::notifier()->id(it)], MapError()); + } + } + + /// \brief Destructor + /// + /// Destructor. + ~DebugMap() { + const typename Parent::Notifier* notifier = Parent::notifier(); + if (notifier != 0) { + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + LEMON_ASSERT(flag[Parent::notifier()->id(it)], MapError()); + flag[Parent::notifier()->id(it)] = false; + } + } + for (int i = 0; i < int(flag.size()); ++i) { + LEMON_ASSERT(!flag[i], MapError()); + } + } + + /// \brief Assign operator. + /// + /// This operator assigns for each item in the map the + /// value mapped to the same item in the copied map. + /// The parameter map should be indiced with the same + /// itemset because this assign operator does not change + /// the container of the map. + DebugMap& operator=(const DebugMap& cmap) { + return operator=(cmap); + } + + + /// \brief Template assign operator. + /// + /// The given parameter should be conform to the ReadMap + /// concecpt and could be indiced by the current item set of + /// the NodeMap. In this case the value for each item + /// is assigned by the value of the given ReadMap. + template + DebugMap& operator=(const CMap& cmap) { + checkConcept, CMap>(); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + set(it, cmap[it]); + } + return *this; + } + + public: + + /// \brief The subcript operator. + /// + /// The subscript operator. The map can be subscripted by the + /// actual items of the graph. + Reference operator[](const Key& key) { + LEMON_ASSERT(flag[Parent::notifier()->id(key)], MapError()); + return container[Parent::notifier()->id(key)]; + } + + /// \brief The const subcript operator. + /// + /// The const subscript operator. The map can be subscripted by the + /// actual items of the graph. + ConstReference operator[](const Key& key) const { + LEMON_ASSERT(flag[Parent::notifier()->id(key)], MapError()); + return container[Parent::notifier()->id(key)]; + } + + + /// \brief The setter function of the map. + /// + /// It the same as operator[](key) = value expression. + void set(const Key& key, const Value& value) { + (*this)[key] = value; + } + + protected: + + /// \brief Adds a new key to the map. + /// + /// It adds a new key to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const Key& key) { + int id = Parent::notifier()->id(key); + if (id >= int(container.size())) { + container.resize(id + 1); + flag.resize(id + 1, false); + } + LEMON_ASSERT(!flag[Parent::notifier()->id(key)], MapError()); + flag[Parent::notifier()->id(key)] = true; + if (strictCheck) { + std::vector fl(flag.size(), false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int jd = Parent::notifier()->id(it); + fl[jd] = true; + } + LEMON_ASSERT(fl == flag, MapError()); + } + } + + /// \brief Adds more new keys to the map. + /// + /// It adds more new keys to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const std::vector& keys) { + int max = container.size() - 1; + for (int i = 0; i < int(keys.size()); ++i) { + int id = Parent::notifier()->id(keys[i]); + if (id >= max) { + max = id; + } + } + container.resize(max + 1); + flag.resize(max + 1, false); + for (int i = 0; i < int(keys.size()); ++i) { + LEMON_ASSERT(!flag[Parent::notifier()->id(keys[i])], MapError()); + flag[Parent::notifier()->id(keys[i])] = true; + } + if (strictCheck) { + std::vector fl(flag.size(), false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int id = Parent::notifier()->id(it); + fl[id] = true; + } + LEMON_ASSERT(fl == flag, MapError()); + } + } + + /// \brief Erase a key from the map. + /// + /// Erase a key from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const Key& key) { + if (strictCheck) { + std::vector fl(flag.size(), false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int id = Parent::notifier()->id(it); + fl[id] = true; + } + LEMON_ASSERT(fl == flag, MapError()); + } + container[Parent::notifier()->id(key)] = Value(); + LEMON_ASSERT(flag[Parent::notifier()->id(key)], MapError()); + flag[Parent::notifier()->id(key)] = false; + } + + /// \brief Erase more keys from the map. + /// + /// Erase more keys from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const std::vector& keys) { + if (strictCheck) { + std::vector fl(flag.size(), false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int id = Parent::notifier()->id(it); + fl[id] = true; + } + LEMON_ASSERT(fl == flag, MapError()); + } + for (int i = 0; i < int(keys.size()); ++i) { + container[Parent::notifier()->id(keys[i])] = Value(); + LEMON_ASSERT(flag[Parent::notifier()->id(keys[i])], MapError()); + flag[Parent::notifier()->id(keys[i])] = false; + } + } + + /// \brief Buildes the map. + /// + /// It buildes the map. It called by the observer notifier + /// and it overrides the build() member function of the observer base. + virtual void build() { + if (strictCheck) { + for (int i = 0; i < int(flag.size()); ++i) { + LEMON_ASSERT(flag[i], MapError()); + } + } + int size = Parent::notifier()->maxId() + 1; + container.reserve(size); + container.resize(size); + flag.reserve(size); + flag.resize(size, false); + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int id = Parent::notifier()->id(it); + LEMON_ASSERT(!flag[id], MapError()); + flag[id] = true; + } + } + + /// \brief Clear the map. + /// + /// It erase all items from the map. It called by the observer notifier + /// and it overrides the clear() member function of the observer base. + virtual void clear() { + const typename Parent::Notifier* notifier = Parent::notifier(); + Item it; + for (notifier->first(it); it != INVALID; notifier->next(it)) { + int id = Parent::notifier()->id(it); + LEMON_ASSERT(flag[id], MapError()); + flag[id] = false; + } + if (strictCheck) { + for (int i = 0; i < int(flag.size()); ++i) { + LEMON_ASSERT(!flag[i], MapError()); + } + } + container.clear(); + flag.clear(); + } + + private: + + Container container; + Flag flag; + + }; + +} + +#endif diff --git a/src/lemon/bits/default_map.h b/src/lemon/bits/default_map.h new file mode 100644 index 0000000..2a8689d --- /dev/null +++ b/src/lemon/bits/default_map.h @@ -0,0 +1,181 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_DEFAULT_MAP_H +#define LEMON_BITS_DEFAULT_MAP_H + + +#include +#include +#include + +///\ingroup graphbits +///\file +///\brief Graph maps that construct and destruct their elements dynamically. + +namespace lemon { + + +#ifndef LEMON_USE_DEBUG_MAP + + template + struct DefaultMapSelector { + typedef ArrayMap<_Graph, _Item, _Value> Map; + }; + + // bool + template + struct DefaultMapSelector<_Graph, _Item, bool> { + typedef VectorMap<_Graph, _Item, bool> Map; + }; + + // char + template + struct DefaultMapSelector<_Graph, _Item, char> { + typedef VectorMap<_Graph, _Item, char> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, signed char> { + typedef VectorMap<_Graph, _Item, signed char> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, unsigned char> { + typedef VectorMap<_Graph, _Item, unsigned char> Map; + }; + + + // int + template + struct DefaultMapSelector<_Graph, _Item, signed int> { + typedef VectorMap<_Graph, _Item, signed int> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, unsigned int> { + typedef VectorMap<_Graph, _Item, unsigned int> Map; + }; + + + // short + template + struct DefaultMapSelector<_Graph, _Item, signed short> { + typedef VectorMap<_Graph, _Item, signed short> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, unsigned short> { + typedef VectorMap<_Graph, _Item, unsigned short> Map; + }; + + + // long + template + struct DefaultMapSelector<_Graph, _Item, signed long> { + typedef VectorMap<_Graph, _Item, signed long> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, unsigned long> { + typedef VectorMap<_Graph, _Item, unsigned long> Map; + }; + + +#if defined __GNUC__ && !defined __STRICT_ANSI__ + + // long long + template + struct DefaultMapSelector<_Graph, _Item, signed long long> { + typedef VectorMap<_Graph, _Item, signed long long> Map; + }; + + template + struct DefaultMapSelector<_Graph, _Item, unsigned long long> { + typedef VectorMap<_Graph, _Item, unsigned long long> Map; + }; + +#endif + + + // float + template + struct DefaultMapSelector<_Graph, _Item, float> { + typedef VectorMap<_Graph, _Item, float> Map; + }; + + + // double + template + struct DefaultMapSelector<_Graph, _Item, double> { + typedef VectorMap<_Graph, _Item, double> Map; + }; + + + // long double + template + struct DefaultMapSelector<_Graph, _Item, long double> { + typedef VectorMap<_Graph, _Item, long double> Map; + }; + + + // pointer + template + struct DefaultMapSelector<_Graph, _Item, _Ptr*> { + typedef VectorMap<_Graph, _Item, _Ptr*> Map; + }; + +#else + + template + struct DefaultMapSelector { + typedef DebugMap<_Graph, _Item, _Value> Map; + }; + +#endif + + /// \e + template + class DefaultMap + : public DefaultMapSelector<_Graph, _Item, _Value>::Map { + public: + typedef typename DefaultMapSelector<_Graph, _Item, _Value>::Map Parent; + typedef DefaultMap<_Graph, _Item, _Value> Map; + + typedef typename Parent::Graph Graph; + typedef typename Parent::Value Value; + + explicit DefaultMap(const Graph& graph) : Parent(graph) {} + DefaultMap(const Graph& graph, const Value& value) + : Parent(graph, value) {} + + DefaultMap& operator=(const DefaultMap& cmap) { + return operator=(cmap); + } + + template + DefaultMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + +} + +#endif diff --git a/src/lemon/bits/graph_adaptor_extender.h b/src/lemon/bits/graph_adaptor_extender.h new file mode 100644 index 0000000..3619264 --- /dev/null +++ b/src/lemon/bits/graph_adaptor_extender.h @@ -0,0 +1,742 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_GRAPH_ADAPTOR_EXTENDER_H +#define LEMON_BITS_GRAPH_ADAPTOR_EXTENDER_H + +#include +#include + +#include + + +///\ingroup graphbits +///\file +///\brief Extenders for the graph adaptor types +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Extender for the GraphAdaptors + template + class GraphAdaptorExtender : public _Graph { + public: + + typedef _Graph Parent; + typedef _Graph Graph; + typedef GraphAdaptorExtender Adaptor; + + // Base extensions + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + int maxId(Node) const { + return Parent::maxNodeId(); + } + + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + + Node oppositeNode(const Node &n, const Edge &e) const { + if (n == Parent::source(e)) + return Parent::target(e); + else if(n==Parent::target(e)) + return Parent::source(e); + else + return INVALID; + } + + class NodeIt : public Node { + const Adaptor* graph; + public: + + NodeIt() {} + + NodeIt(Invalid i) : Node(i) { } + + explicit NodeIt(const Adaptor& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + NodeIt(const Adaptor& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class EdgeIt : public Edge { + const Adaptor* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(i) { } + + explicit EdgeIt(const Adaptor& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + EdgeIt(const Adaptor& _graph, const Edge& e) : + Edge(e), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class OutEdgeIt : public Edge { + const Adaptor* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Adaptor& _graph, const Node& node) + : graph(&_graph) { + _graph.firstOut(*this, node); + } + + OutEdgeIt(const Adaptor& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + const Adaptor* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Adaptor& _graph, const Node& node) + : graph(&_graph) { + _graph.firstIn(*this, node); + } + + InEdgeIt(const Adaptor& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(e); + } + + }; + + + /// \ingroup graphbits + /// + /// \brief Extender for the UGraphAdaptors + template + class UGraphAdaptorExtender : public _UGraph { + public: + + typedef _UGraph Parent; + typedef _UGraph UGraph; + typedef UGraphAdaptorExtender Adaptor; + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + typedef typename Parent::UEdge UEdge; + + // UGraph extension + + int maxId(Node) const { + return Parent::maxNodeId(); + } + + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + + int maxId(UEdge) const { + return Parent::maxUEdgeId(); + } + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + + UEdge fromId(int id, UEdge) const { + return Parent::uEdgeFromId(id); + } + + Node oppositeNode(const Node &n, const UEdge &e) const { + if( n == Parent::source(e)) + return Parent::target(e); + else if( n == Parent::target(e)) + return Parent::source(e); + else + return INVALID; + } + + Edge oppositeEdge(const Edge &e) const { + return Parent::direct(e, !Parent::direction(e)); + } + + using Parent::direct; + Edge direct(const UEdge &ue, const Node &s) const { + return Parent::direct(ue, Parent::source(ue) == s); + } + + + class NodeIt : public Node { + const Adaptor* graph; + public: + + NodeIt() {} + + NodeIt(Invalid i) : Node(i) { } + + explicit NodeIt(const Adaptor& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + NodeIt(const Adaptor& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class EdgeIt : public Edge { + const Adaptor* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(i) { } + + explicit EdgeIt(const Adaptor& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + EdgeIt(const Adaptor& _graph, const Edge& e) : + Edge(e), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class OutEdgeIt : public Edge { + const Adaptor* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Adaptor& _graph, const Node& node) + : graph(&_graph) { + _graph.firstOut(*this, node); + } + + OutEdgeIt(const Adaptor& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + const Adaptor* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Adaptor& _graph, const Node& node) + : graph(&_graph) { + _graph.firstIn(*this, node); + } + + InEdgeIt(const Adaptor& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + class UEdgeIt : public Parent::UEdge { + const Adaptor* graph; + public: + + UEdgeIt() { } + + UEdgeIt(Invalid i) : UEdge(i) { } + + explicit UEdgeIt(const Adaptor& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + UEdgeIt(const Adaptor& _graph, const UEdge& e) : + UEdge(e), graph(&_graph) { } + + UEdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class IncEdgeIt : public Parent::UEdge { + friend class UGraphAdaptorExtender; + const Adaptor* graph; + bool direction; + public: + + IncEdgeIt() { } + + IncEdgeIt(Invalid i) : UEdge(i), direction(false) { } + + IncEdgeIt(const Adaptor& _graph, const Node &n) : graph(&_graph) { + _graph.firstInc(static_cast(*this), direction, n); + } + + IncEdgeIt(const Adaptor& _graph, const UEdge &ue, const Node &n) + : graph(&_graph), UEdge(ue) { + direction = (_graph.source(ue) == n); + } + + IncEdgeIt& operator++() { + graph->nextInc(*this, direction); + return *this; + } + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + + /// Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(const IncEdgeIt &e) const { + return e.direction ? source(e) : target(e); + } + /// Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(const IncEdgeIt &e) const { + return e.direction ? target(e) : source(e); + } + + }; + + /// \ingroup graphbits + /// + /// \brief Extender for the BpUGraphAdaptors + template + class BpUGraphAdaptorExtender : public Base { + public: + typedef Base Parent; + typedef BpUGraphAdaptorExtender Graph; + + typedef typename Parent::Node Node; + typedef typename Parent::BNode BNode; + typedef typename Parent::ANode ANode; + typedef typename Parent::Edge Edge; + typedef typename Parent::UEdge UEdge; + + + int maxId(Node) const { + return Parent::maxNodeId(); + } + int maxId(BNode) const { + return Parent::maxBNodeId(); + } + int maxId(ANode) const { + return Parent::maxANodeId(); + } + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + int maxId(UEdge) const { + return Parent::maxUEdgeId(); + } + + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + ANode fromId(int id, ANode) const { + return Parent::nodeFromANodeId(id); + } + BNode fromId(int id, BNode) const { + return Parent::nodeFromBNodeId(id); + } + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + UEdge fromId(int id, UEdge) const { + return Parent::uEdgeFromId(id); + } + + class NodeIt : public Node { + const Graph* graph; + public: + + NodeIt() { } + + NodeIt(Invalid i) : Node(INVALID) { } + + explicit NodeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + NodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) { } + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class ANodeIt : public Node { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + ANodeIt() { } + + ANodeIt(Invalid i) : Node(INVALID) { } + + explicit ANodeIt(const Graph& _graph) : graph(&_graph) { + graph->firstANode(static_cast(*this)); + } + + ANodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + ANodeIt& operator++() { + graph->nextANode(*this); + return *this; + } + }; + + class BNodeIt : public Node { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + BNodeIt() { } + + BNodeIt(Invalid i) : Node(INVALID) { } + + explicit BNodeIt(const Graph& _graph) : graph(&_graph) { + graph->firstBNode(static_cast(*this)); + } + + BNodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + BNodeIt& operator++() { + graph->nextBNode(*this); + return *this; + } + }; + + class EdgeIt : public Edge { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(INVALID) { } + + explicit EdgeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + EdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class UEdgeIt : public UEdge { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + UEdgeIt() { } + + UEdgeIt(Invalid i) : UEdge(INVALID) { } + + explicit UEdgeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + UEdgeIt(const Graph& _graph, const UEdge& edge) + : UEdge(edge), graph(&_graph) { } + + UEdgeIt& operator++() { + graph->next(*this); + return *this; + } + }; + + class OutEdgeIt : public Edge { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + graph->firstOut(*this, node); + } + + OutEdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + graph->firstIn(*this, node); + } + + InEdgeIt(const Graph& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + + class IncEdgeIt : public Parent::UEdge { + friend class BpUGraphAdaptorExtender; + const Graph* graph; + bool direction; + public: + + IncEdgeIt() { } + + IncEdgeIt(Invalid i) : UEdge(i), direction(true) { } + + IncEdgeIt(const Graph& _graph, const Node &n) : graph(&_graph) { + graph->firstInc(*this, direction, n); + } + + IncEdgeIt(const Graph& _graph, const UEdge &ue, const Node &n) + : graph(&_graph), UEdge(ue) { + direction = (graph->source(ue) == n); + } + + IncEdgeIt& operator++() { + graph->nextInc(*this, direction); + return *this; + } + }; + + + /// Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(const IncEdgeIt &e) const { + return e.direction ? source(e) : target(e); + } + + /// Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(const IncEdgeIt &e) const { + return e.direction ? target(e) : source(e); + } + + Node oppositeNode(const Node &n, const UEdge &e) const { + if( n == Parent::source(e)) + return Parent::target(e); + else if( n == Parent::target(e)) + return Parent::source(e); + else + return INVALID; + } + + Edge oppositeEdge(const Edge &e) const { + return Parent::direct(e, !Parent::direction(e)); + } + + using Parent::direct; + Edge direct(const UEdge &ue, const Node &s) const { + return Parent::direct(ue, Parent::source(ue) == s); + } + + }; + + +} + + +#endif diff --git a/src/lemon/bits/graph_extender.h b/src/lemon/bits/graph_extender.h new file mode 100644 index 0000000..464594d --- /dev/null +++ b/src/lemon/bits/graph_extender.h @@ -0,0 +1,1397 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_GRAPH_EXTENDER_H +#define LEMON_BITS_GRAPH_EXTENDER_H + +#include +#include +#include + +#include +#include + +#include +#include + +///\ingroup graphbits +///\file +///\brief Extenders for the graph types +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Extender for the Graphs + template + class GraphExtender : public Base { + public: + + typedef Base Parent; + typedef GraphExtender Graph; + + // Base extensions + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + int maxId(Node) const { + return Parent::maxNodeId(); + } + + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + + Node oppositeNode(const Node &n, const Edge &e) const { + if (n == Parent::source(e)) + return Parent::target(e); + else if(n==Parent::target(e)) + return Parent::source(e); + else + return INVALID; + } + + // Alterable extension + + typedef AlterationNotifier NodeNotifier; + typedef AlterationNotifier EdgeNotifier; + + + protected: + + mutable NodeNotifier node_notifier; + mutable EdgeNotifier edge_notifier; + + public: + + NodeNotifier& notifier(Node) const { + return node_notifier; + } + + EdgeNotifier& notifier(Edge) const { + return edge_notifier; + } + + class NodeIt : public Node { + const Graph* graph; + public: + + NodeIt() {} + + NodeIt(Invalid i) : Node(i) { } + + explicit NodeIt(const Graph& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + NodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class EdgeIt : public Edge { + const Graph* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(i) { } + + explicit EdgeIt(const Graph& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + EdgeIt(const Graph& _graph, const Edge& e) : + Edge(e), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class OutEdgeIt : public Edge { + const Graph* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + _graph.firstOut(*this, node); + } + + OutEdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + const Graph* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + _graph.firstIn(*this, node); + } + + InEdgeIt(const Graph& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (i.e. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (i.e. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (i.e. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (i.e. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(e); + } + + + template + class NodeMap + : public MapExtender > { + public: + typedef GraphExtender Graph; + typedef MapExtender > Parent; + + explicit NodeMap(const Graph& graph) + : Parent(graph) {} + NodeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + template + class EdgeMap + : public MapExtender > { + public: + typedef GraphExtender Graph; + typedef MapExtender > Parent; + + explicit EdgeMap(const Graph& graph) + : Parent(graph) {} + EdgeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + + Node addNode() { + Node node = Parent::addNode(); + notifier(Node()).add(node); + return node; + } + + Edge addEdge(const Node& from, const Node& to) { + Edge edge = Parent::addEdge(from, to); + notifier(Edge()).add(edge); + return edge; + } + + void clear() { + notifier(Edge()).clear(); + notifier(Node()).clear(); + Parent::clear(); + } + + template + void build(const Graph& graph, NodeRefMap& nodeRef, EdgeRefMap& edgeRef) { + Parent::build(graph, nodeRef, edgeRef); + notifier(Node()).build(); + notifier(Edge()).build(); + } + + void erase(const Node& node) { + Edge edge; + Parent::firstOut(edge, node); + while (edge != INVALID ) { + erase(edge); + Parent::firstOut(edge, node); + } + + Parent::firstIn(edge, node); + while (edge != INVALID ) { + erase(edge); + Parent::firstIn(edge, node); + } + + notifier(Node()).erase(node); + Parent::erase(node); + } + + void erase(const Edge& edge) { + notifier(Edge()).erase(edge); + Parent::erase(edge); + } + + GraphExtender() { + node_notifier.setContainer(*this); + edge_notifier.setContainer(*this); + } + + + ~GraphExtender() { + edge_notifier.clear(); + node_notifier.clear(); + } + }; + + /// \ingroup graphbits + /// + /// \brief Extender for the UGraphs + template + class UGraphExtender : public Base { + public: + + typedef Base Parent; + typedef UGraphExtender Graph; + + typedef True UndirectedTag; + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + typedef typename Parent::UEdge UEdge; + + // UGraph extension + + int maxId(Node) const { + return Parent::maxNodeId(); + } + + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + + int maxId(UEdge) const { + return Parent::maxUEdgeId(); + } + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + + UEdge fromId(int id, UEdge) const { + return Parent::uEdgeFromId(id); + } + + Node oppositeNode(const Node &n, const UEdge &e) const { + if( n == Parent::source(e)) + return Parent::target(e); + else if( n == Parent::target(e)) + return Parent::source(e); + else + return INVALID; + } + + Edge oppositeEdge(const Edge &e) const { + return Parent::direct(e, !Parent::direction(e)); + } + + using Parent::direct; + Edge direct(const UEdge &ue, const Node &s) const { + return Parent::direct(ue, Parent::source(ue) == s); + } + + // Alterable extension + + typedef AlterationNotifier NodeNotifier; + typedef AlterationNotifier EdgeNotifier; + typedef AlterationNotifier UEdgeNotifier; + + + protected: + + mutable NodeNotifier node_notifier; + mutable EdgeNotifier edge_notifier; + mutable UEdgeNotifier uedge_notifier; + + public: + + NodeNotifier& notifier(Node) const { + return node_notifier; + } + + EdgeNotifier& notifier(Edge) const { + return edge_notifier; + } + + UEdgeNotifier& notifier(UEdge) const { + return uedge_notifier; + } + + + + class NodeIt : public Node { + const Graph* graph; + public: + + NodeIt() {} + + NodeIt(Invalid i) : Node(i) { } + + explicit NodeIt(const Graph& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + NodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class EdgeIt : public Edge { + const Graph* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(i) { } + + explicit EdgeIt(const Graph& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + EdgeIt(const Graph& _graph, const Edge& e) : + Edge(e), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + + class OutEdgeIt : public Edge { + const Graph* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + _graph.firstOut(*this, node); + } + + OutEdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + const Graph* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + _graph.firstIn(*this, node); + } + + InEdgeIt(const Graph& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + + class UEdgeIt : public Parent::UEdge { + const Graph* graph; + public: + + UEdgeIt() { } + + UEdgeIt(Invalid i) : UEdge(i) { } + + explicit UEdgeIt(const Graph& _graph) : graph(&_graph) { + _graph.first(static_cast(*this)); + } + + UEdgeIt(const Graph& _graph, const UEdge& e) : + UEdge(e), graph(&_graph) { } + + UEdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class IncEdgeIt : public Parent::UEdge { + friend class UGraphExtender; + const Graph* graph; + bool direction; + public: + + IncEdgeIt() { } + + IncEdgeIt(Invalid i) : UEdge(i), direction(false) { } + + IncEdgeIt(const Graph& _graph, const Node &n) : graph(&_graph) { + _graph.firstInc(*this, direction, n); + } + + IncEdgeIt(const Graph& _graph, const UEdge &ue, const Node &n) + : graph(&_graph), UEdge(ue) { + direction = (_graph.source(ue) == n); + } + + IncEdgeIt& operator++() { + graph->nextInc(*this, direction); + return *this; + } + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + + /// Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(const IncEdgeIt &e) const { + return e.direction ? source(e) : target(e); + } + /// Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(const IncEdgeIt &e) const { + return e.direction ? target(e) : source(e); + } + + // Mappable extension + + template + class NodeMap + : public MapExtender > { + public: + typedef UGraphExtender Graph; + typedef MapExtender > Parent; + + NodeMap(const Graph& graph) + : Parent(graph) {} + NodeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + template + class EdgeMap + : public MapExtender > { + public: + typedef UGraphExtender Graph; + typedef MapExtender > Parent; + + EdgeMap(const Graph& graph) + : Parent(graph) {} + EdgeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + + template + class UEdgeMap + : public MapExtender > { + public: + typedef UGraphExtender Graph; + typedef MapExtender > Parent; + + UEdgeMap(const Graph& graph) + : Parent(graph) {} + + UEdgeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + UEdgeMap& operator=(const UEdgeMap& cmap) { + return operator=(cmap); + } + + template + UEdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + // Alteration extension + + Node addNode() { + Node node = Parent::addNode(); + notifier(Node()).add(node); + return node; + } + + UEdge addEdge(const Node& from, const Node& to) { + UEdge uedge = Parent::addEdge(from, to); + notifier(UEdge()).add(uedge); + std::vector ev; + ev.push_back(Parent::direct(uedge, true)); + ev.push_back(Parent::direct(uedge, false)); + notifier(Edge()).add(ev); + return uedge; + } + + void clear() { + notifier(Edge()).clear(); + notifier(UEdge()).clear(); + notifier(Node()).clear(); + Parent::clear(); + } + + template + void build(const Graph& graph, NodeRefMap& nodeRef, + UEdgeRefMap& uEdgeRef) { + Parent::build(graph, nodeRef, uEdgeRef); + notifier(Node()).build(); + notifier(UEdge()).build(); + notifier(Edge()).build(); + } + + void erase(const Node& node) { + Edge edge; + Parent::firstOut(edge, node); + while (edge != INVALID ) { + erase(edge); + Parent::firstOut(edge, node); + } + + Parent::firstIn(edge, node); + while (edge != INVALID ) { + erase(edge); + Parent::firstIn(edge, node); + } + + notifier(Node()).erase(node); + Parent::erase(node); + } + + void erase(const UEdge& uedge) { + std::vector ev; + ev.push_back(Parent::direct(uedge, true)); + ev.push_back(Parent::direct(uedge, false)); + notifier(Edge()).erase(ev); + notifier(UEdge()).erase(uedge); + Parent::erase(uedge); + } + + UGraphExtender() { + node_notifier.setContainer(*this); + edge_notifier.setContainer(*this); + uedge_notifier.setContainer(*this); + } + + ~UGraphExtender() { + uedge_notifier.clear(); + edge_notifier.clear(); + node_notifier.clear(); + } + + }; + + /// \ingroup graphbits + /// + /// \brief Extender for the BpUGraphs + template + class BpUGraphExtender : public Base { + public: + + typedef Base Parent; + typedef BpUGraphExtender Graph; + + typedef True UndirectedTag; + + typedef typename Parent::Node Node; + typedef typename Parent::ANode ANode; + typedef typename Parent::BNode BNode; + typedef typename Parent::Edge Edge; + typedef typename Parent::UEdge UEdge; + + + Node oppositeNode(const Node& node, const UEdge& edge) const { + return Parent::aNode(edge) == node ? + Parent::bNode(edge) : Parent::aNode(edge); + } + + using Parent::direct; + Edge direct(const UEdge& edge, const Node& node) const { + return Parent::direct(edge, node == Parent::source(edge)); + } + + Edge oppositeEdge(const Edge& edge) const { + return direct(edge, !Parent::direction(edge)); + } + + int maxId(Node) const { + return Parent::maxNodeId(); + } + int maxId(BNode) const { + return Parent::maxBNodeId(); + } + int maxId(ANode) const { + return Parent::maxANodeId(); + } + int maxId(Edge) const { + return Parent::maxEdgeId(); + } + int maxId(UEdge) const { + return Parent::maxUEdgeId(); + } + + + Node fromId(int id, Node) const { + return Parent::nodeFromId(id); + } + ANode fromId(int id, ANode) const { + return Parent::nodeFromANodeId(id); + } + BNode fromId(int id, BNode) const { + return Parent::nodeFromBNodeId(id); + } + Edge fromId(int id, Edge) const { + return Parent::edgeFromId(id); + } + UEdge fromId(int id, UEdge) const { + return Parent::uEdgeFromId(id); + } + + typedef AlterationNotifier ANodeNotifier; + typedef AlterationNotifier BNodeNotifier; + typedef AlterationNotifier NodeNotifier; + typedef AlterationNotifier EdgeNotifier; + typedef AlterationNotifier UEdgeNotifier; + + protected: + + mutable ANodeNotifier anode_notifier; + mutable BNodeNotifier bnode_notifier; + mutable NodeNotifier node_notifier; + mutable EdgeNotifier edge_notifier; + mutable UEdgeNotifier uedge_notifier; + + public: + + NodeNotifier& notifier(Node) const { + return node_notifier; + } + + ANodeNotifier& notifier(ANode) const { + return anode_notifier; + } + + BNodeNotifier& notifier(BNode) const { + return bnode_notifier; + } + + EdgeNotifier& notifier(Edge) const { + return edge_notifier; + } + + UEdgeNotifier& notifier(UEdge) const { + return uedge_notifier; + } + + class NodeIt : public Node { + const Graph* graph; + public: + + NodeIt() { } + + NodeIt(Invalid i) : Node(INVALID) { } + + explicit NodeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + NodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) { } + + NodeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class ANodeIt : public Node { + friend class BpUGraphExtender; + const Graph* graph; + public: + + ANodeIt() { } + + ANodeIt(Invalid i) : Node(INVALID) { } + + explicit ANodeIt(const Graph& _graph) : graph(&_graph) { + graph->firstANode(static_cast(*this)); + } + + ANodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + ANodeIt& operator++() { + graph->nextANode(*this); + return *this; + } + }; + + class BNodeIt : public Node { + friend class BpUGraphExtender; + const Graph* graph; + public: + + BNodeIt() { } + + BNodeIt(Invalid i) : Node(INVALID) { } + + explicit BNodeIt(const Graph& _graph) : graph(&_graph) { + graph->firstBNode(static_cast(*this)); + } + + BNodeIt(const Graph& _graph, const Node& node) + : Node(node), graph(&_graph) {} + + BNodeIt& operator++() { + graph->nextBNode(*this); + return *this; + } + }; + + class EdgeIt : public Edge { + friend class BpUGraphExtender; + const Graph* graph; + public: + + EdgeIt() { } + + EdgeIt(Invalid i) : Edge(INVALID) { } + + explicit EdgeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + EdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) { } + + EdgeIt& operator++() { + graph->next(*this); + return *this; + } + + }; + + class UEdgeIt : public UEdge { + friend class BpUGraphExtender; + const Graph* graph; + public: + + UEdgeIt() { } + + UEdgeIt(Invalid i) : UEdge(INVALID) { } + + explicit UEdgeIt(const Graph& _graph) : graph(&_graph) { + graph->first(static_cast(*this)); + } + + UEdgeIt(const Graph& _graph, const UEdge& edge) + : UEdge(edge), graph(&_graph) { } + + UEdgeIt& operator++() { + graph->next(*this); + return *this; + } + }; + + class OutEdgeIt : public Edge { + friend class BpUGraphExtender; + const Graph* graph; + public: + + OutEdgeIt() { } + + OutEdgeIt(Invalid i) : Edge(i) { } + + OutEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + graph->firstOut(*this, node); + } + + OutEdgeIt(const Graph& _graph, const Edge& edge) + : Edge(edge), graph(&_graph) {} + + OutEdgeIt& operator++() { + graph->nextOut(*this); + return *this; + } + + }; + + + class InEdgeIt : public Edge { + friend class BpUGraphExtender; + const Graph* graph; + public: + + InEdgeIt() { } + + InEdgeIt(Invalid i) : Edge(i) { } + + InEdgeIt(const Graph& _graph, const Node& node) + : graph(&_graph) { + graph->firstIn(*this, node); + } + + InEdgeIt(const Graph& _graph, const Edge& edge) : + Edge(edge), graph(&_graph) {} + + InEdgeIt& operator++() { + graph->nextIn(*this); + return *this; + } + + }; + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the source in this case) of the iterator + Node baseNode(const OutEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the target in this case) of the + /// iterator + Node runningNode(const OutEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (ie. the target in this case) of the iterator + Node baseNode(const InEdgeIt &e) const { + return Parent::target(static_cast(e)); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (ie. the source in this case) of the + /// iterator + Node runningNode(const InEdgeIt &e) const { + return Parent::source(static_cast(e)); + } + + class IncEdgeIt : public Parent::UEdge { + friend class BpUGraphExtender; + const Graph* graph; + bool direction; + public: + + IncEdgeIt() { } + + IncEdgeIt(Invalid i) : UEdge(i), direction(true) { } + + IncEdgeIt(const Graph& _graph, const Node &n) : graph(&_graph) { + graph->firstInc(*this, direction, n); + } + + IncEdgeIt(const Graph& _graph, const UEdge &ue, const Node &n) + : graph(&_graph), UEdge(ue) { + direction = (graph->source(ue) == n); + } + + IncEdgeIt& operator++() { + graph->nextInc(*this, direction); + return *this; + } + }; + + + /// Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(const IncEdgeIt &e) const { + return e.direction ? source(e) : target(e); + } + + /// Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(const IncEdgeIt &e) const { + return e.direction ? target(e) : source(e); + } + + template + class ANodeMap + : public MapExtender > { + public: + typedef BpUGraphExtender Graph; + typedef MapExtender > Parent; + + ANodeMap(const Graph& graph) + : Parent(graph) {} + ANodeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + ANodeMap& operator=(const ANodeMap& cmap) { + return operator=(cmap); + } + + template + ANodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + template + class BNodeMap + : public MapExtender > { + public: + typedef BpUGraphExtender Graph; + typedef MapExtender > Parent; + + BNodeMap(const Graph& graph) + : Parent(graph) {} + BNodeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + BNodeMap& operator=(const BNodeMap& cmap) { + return operator=(cmap); + } + + template + BNodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + public: + + template + class NodeMap { + public: + typedef BpUGraphExtender Graph; + + typedef Node Key; + typedef _Value Value; + + /// The reference type of the map; + typedef typename ANodeMap<_Value>::Reference Reference; + /// The const reference type of the map; + typedef typename ANodeMap<_Value>::ConstReference ConstReference; + + typedef True ReferenceMapTag; + + NodeMap(const Graph& _graph) + : graph(_graph), aNodeMap(_graph), bNodeMap(_graph) {} + NodeMap(const Graph& _graph, const _Value& _value) + : graph(_graph), aNodeMap(_graph, _value), bNodeMap(_graph, _value) {} + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + checkConcept, CMap>(); + aNodeMap = cmap; + bNodeMap = cmap; + return *this; + } + + ConstReference operator[](const Key& node) const { + if (Parent::aNode(node)) { + return aNodeMap[node]; + } else { + return bNodeMap[node]; + } + } + + Reference operator[](const Key& node) { + if (Parent::aNode(node)) { + return aNodeMap[node]; + } else { + return bNodeMap[node]; + } + } + + void set(const Key& node, const Value& value) { + if (Parent::aNode(node)) { + aNodeMap.set(node, value); + } else { + bNodeMap.set(node, value); + } + } + + class MapIt : public NodeIt { + public: + + typedef NodeIt Parent; + + explicit MapIt(NodeMap& _map) + : Parent(_map.graph), map(_map) {} + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + typename MapTraits::ReturnValue operator*() { + return map[*this]; + } + + void set(const Value& value) { + map.set(*this, value); + } + + private: + NodeMap& map; + }; + + class ConstMapIt : public NodeIt { + public: + + typedef NodeIt Parent; + + explicit ConstMapIt(const NodeMap& _map) + : Parent(_map.graph), map(_map) {} + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + private: + const NodeMap& map; + }; + + class ItemIt : public NodeIt { + public: + + typedef NodeIt Parent; + + explicit ItemIt(const NodeMap& _map) + : Parent(_map.graph) {} + + }; + + private: + const Graph& graph; + ANodeMap<_Value> aNodeMap; + BNodeMap<_Value> bNodeMap; + }; + + + template + class EdgeMap + : public MapExtender > { + public: + typedef BpUGraphExtender Graph; + typedef MapExtender > Parent; + + EdgeMap(const Graph& graph) + : Parent(graph) {} + EdgeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + template + class UEdgeMap + : public MapExtender > { + public: + typedef BpUGraphExtender Graph; + typedef MapExtender > Parent; + + UEdgeMap(const Graph& graph) + : Parent(graph) {} + UEdgeMap(const Graph& graph, const _Value& value) + : Parent(graph, value) {} + + UEdgeMap& operator=(const UEdgeMap& cmap) { + return operator=(cmap); + } + + template + UEdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + + Node addANode() { + Node node = Parent::addANode(); + notifier(ANode()).add(node); + notifier(Node()).add(node); + return node; + } + + Node addBNode() { + Node node = Parent::addBNode(); + notifier(BNode()).add(node); + notifier(Node()).add(node); + return node; + } + + UEdge addEdge(const Node& s, const Node& t) { + UEdge uedge = Parent::addEdge(s, t); + notifier(UEdge()).add(uedge); + + std::vector ev; + ev.push_back(Parent::direct(uedge, true)); + ev.push_back(Parent::direct(uedge, false)); + notifier(Edge()).add(ev); + + return uedge; + } + + void clear() { + notifier(Edge()).clear(); + notifier(UEdge()).clear(); + notifier(Node()).clear(); + notifier(BNode()).clear(); + notifier(ANode()).clear(); + Parent::clear(); + } + + template + void build(const Graph& graph, ANodeRefMap& aNodeRef, + BNodeRefMap& bNodeRef, UEdgeRefMap& uEdgeRef) { + Parent::build(graph, aNodeRef, bNodeRef, uEdgeRef); + notifier(ANode()).build(); + notifier(BNode()).build(); + notifier(Node()).build(); + notifier(UEdge()).build(); + notifier(Edge()).build(); + } + + void erase(const Node& node) { + UEdge uedge; + if (Parent::aNode(node)) { + Parent::firstFromANode(uedge, node); + while (uedge != INVALID) { + erase(uedge); + Parent::firstFromANode(uedge, node); + } + notifier(ANode()).erase(node); + } else { + Parent::firstFromBNode(uedge, node); + while (uedge != INVALID) { + erase(uedge); + Parent::firstFromBNode(uedge, node); + } + notifier(BNode()).erase(node); + } + + notifier(Node()).erase(node); + Parent::erase(node); + } + + void erase(const UEdge& uedge) { + std::vector ev; + ev.push_back(Parent::direct(uedge, true)); + ev.push_back(Parent::direct(uedge, false)); + notifier(Edge()).erase(ev); + notifier(UEdge()).erase(uedge); + Parent::erase(uedge); + } + + + BpUGraphExtender() { + anode_notifier.setContainer(*this); + bnode_notifier.setContainer(*this); + node_notifier.setContainer(*this); + edge_notifier.setContainer(*this); + uedge_notifier.setContainer(*this); + } + + ~BpUGraphExtender() { + uedge_notifier.clear(); + edge_notifier.clear(); + node_notifier.clear(); + anode_notifier.clear(); + bnode_notifier.clear(); + } + + Edge findEdge(Node u, Node v, Edge prev = INVALID) const { + UEdge uedge = Parent::findUEdge(u, v, prev); + if (uedge != INVALID) { + return Parent::direct(uedge, Parent::aNode(u)); + } else { + return INVALID; + } + } + + }; + +} + +#endif diff --git a/src/lemon/bits/invalid.h b/src/lemon/bits/invalid.h new file mode 100644 index 0000000..164e5c3 --- /dev/null +++ b/src/lemon/bits/invalid.h @@ -0,0 +1,54 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_INVALID_H +#define LEMON_BITS_INVALID_H + +///\file +///\brief Definition of INVALID. + +namespace lemon { + + /// \brief Dummy type to make it easier to make invalid iterators. + /// + /// See \ref INVALID for the usage. + struct Invalid { + public: + bool operator==(Invalid) { return true; } + bool operator!=(Invalid) { return false; } + bool operator< (Invalid) { return false; } + }; + + /// Invalid iterators. + + /// \ref Invalid is a global type that converts to each iterator + /// in such a way that the value of the target iterator will be invalid. + + //Some people didn't like this: + //const Invalid &INVALID = *(Invalid *)0; + +#ifdef LEMON_ONLY_TEMPLATES + const Invalid INVALID = Invalid(); +#else + extern const Invalid INVALID; +#endif + +} //namespace lemon + +#endif + diff --git a/src/lemon/bits/map_extender.h b/src/lemon/bits/map_extender.h new file mode 100644 index 0000000..68e8608 --- /dev/null +++ b/src/lemon/bits/map_extender.h @@ -0,0 +1,321 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_MAP_EXTENDER_H +#define LEMON_BITS_MAP_EXTENDER_H + +#include + +#include + +#include +#include + +///\file +///\brief Extenders for iterable maps. + +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Extender for maps + template + class MapExtender : public _Map { + public: + + typedef _Map Parent; + typedef MapExtender Map; + + + typedef typename Parent::Graph Graph; + typedef typename Parent::Key Item; + + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + class MapIt; + class ConstMapIt; + + friend class MapIt; + friend class ConstMapIt; + + public: + + MapExtender(const Graph& graph) + : Parent(graph) {} + + MapExtender(const Graph& graph, const Value& value) + : Parent(graph, value) {} + + MapExtender& operator=(const MapExtender& cmap) { + return operator=(cmap); + } + + template + MapExtender& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + class MapIt : public Item { + public: + + typedef Item Parent; + typedef typename Map::Value Value; + + MapIt() {} + + MapIt(Invalid i) : Parent(i) { } + + explicit MapIt(Map& _map) : map(_map) { + map.notifier()->first(*this); + } + + MapIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + MapIt& operator++() { + map.notifier()->next(*this); + return *this; + } + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + typename MapTraits::ReturnValue operator*() { + return map[*this]; + } + + void set(const Value& value) { + map.set(*this, value); + } + + protected: + Map& map; + + }; + + class ConstMapIt : public Item { + public: + + typedef Item Parent; + + typedef typename Map::Value Value; + + ConstMapIt() {} + + ConstMapIt(Invalid i) : Parent(i) { } + + explicit ConstMapIt(Map& _map) : map(_map) { + map.notifier()->first(*this); + } + + ConstMapIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + ConstMapIt& operator++() { + map.notifier()->next(*this); + return *this; + } + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + protected: + const Map& map; + }; + + class ItemIt : public Item { + public: + + typedef Item Parent; + + ItemIt() {} + + ItemIt(Invalid i) : Parent(i) { } + + explicit ItemIt(Map& _map) : map(_map) { + map.notifier()->first(*this); + } + + ItemIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + ItemIt& operator++() { + map.notifier()->next(*this); + return *this; + } + + protected: + const Map& map; + + }; + }; + + /// \ingroup graphbits + /// + /// \brief Extender for maps which use a subset of the items. + template + class SubMapExtender : public _Map { + public: + + typedef _Map Parent; + typedef SubMapExtender Map; + + typedef _Graph Graph; + + typedef typename Parent::Key Item; + + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + class MapIt; + class ConstMapIt; + + friend class MapIt; + friend class ConstMapIt; + + public: + + SubMapExtender(const Graph& _graph) + : Parent(_graph), graph(_graph) {} + + SubMapExtender(const Graph& _graph, const Value& _value) + : Parent(_graph, _value), graph(_graph) {} + + SubMapExtender& operator=(const SubMapExtender& cmap) { + return operator=(cmap); + } + + template + SubMapExtender& operator=(const CMap& cmap) { + checkConcept, CMap>(); + Item it; + for (graph.first(it); it != INVALID; graph.next(it)) { + Parent::set(it, cmap[it]); + } + return *this; + } + + class MapIt : public Item { + public: + + typedef Item Parent; + typedef typename Map::Value Value; + + MapIt() {} + + MapIt(Invalid i) : Parent(i) { } + + explicit MapIt(Map& _map) : map(_map) { + map.graph.first(*this); + } + + MapIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + MapIt& operator++() { + map.graph.next(*this); + return *this; + } + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + typename MapTraits::ReturnValue operator*() { + return map[*this]; + } + + void set(const Value& value) { + map.set(*this, value); + } + + protected: + Map& map; + + }; + + class ConstMapIt : public Item { + public: + + typedef Item Parent; + + typedef typename Map::Value Value; + + ConstMapIt() {} + + ConstMapIt(Invalid i) : Parent(i) { } + + explicit ConstMapIt(Map& _map) : map(_map) { + map.graph.first(*this); + } + + ConstMapIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + ConstMapIt& operator++() { + map.graph.next(*this); + return *this; + } + + typename MapTraits::ConstReturnValue operator*() const { + return map[*this]; + } + + protected: + const Map& map; + }; + + class ItemIt : public Item { + public: + + typedef Item Parent; + + ItemIt() {} + + ItemIt(Invalid i) : Parent(i) { } + + explicit ItemIt(Map& _map) : map(_map) { + map.graph.first(*this); + } + + ItemIt(const Map& _map, const Item& item) + : Parent(item), map(_map) {} + + ItemIt& operator++() { + map.graph.next(*this); + return *this; + } + + protected: + const Map& map; + + }; + + private: + + const Graph& graph; + + }; + +} + +#endif diff --git a/src/lemon/bits/path_dump.h b/src/lemon/bits/path_dump.h new file mode 100644 index 0000000..ccef70c --- /dev/null +++ b/src/lemon/bits/path_dump.h @@ -0,0 +1,174 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_PRED_MAP_PATH_H +#define LEMON_BITS_PRED_MAP_PATH_H + +namespace lemon { + + template + class PredMapPath { + public: + typedef True RevPathTag; + + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef _PredMap PredMap; + + PredMapPath(const Graph& _graph, const PredMap& _predMap, + typename Graph::Node _target) + : graph(_graph), predMap(_predMap), target(_target) {} + + int length() const { + int len = 0; + typename Graph::Node node = target; + typename Graph::Edge edge; + while ((edge = predMap[node]) != INVALID) { + node = graph.source(edge); + ++len; + } + return len; + } + + bool empty() const { + return predMap[target] != INVALID; + } + + class RevEdgeIt { + public: + RevEdgeIt() {} + RevEdgeIt(Invalid) : path(0), current(INVALID) {} + RevEdgeIt(const PredMapPath& _path) + : path(&_path), current(_path.target) { + if (path->predMap[current] == INVALID) current = INVALID; + } + + operator const typename Graph::Edge() const { + return path->predMap[current]; + } + + RevEdgeIt& operator++() { + current = path->graph.source(path->predMap[current]); + if (path->predMap[current] == INVALID) current = INVALID; + return *this; + } + + bool operator==(const RevEdgeIt& e) const { + return current == e.current; + } + + bool operator!=(const RevEdgeIt& e) const { + return current != e.current; + } + + bool operator<(const RevEdgeIt& e) const { + return current < e.current; + } + + private: + const PredMapPath* path; + typename Graph::Node current; + }; + + private: + const Graph& graph; + const PredMap& predMap; + typename Graph::Node target; + }; + + + template + class PredMatrixMapPath { + public: + typedef True RevPathTag; + + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef _PredMatrixMap PredMatrixMap; + + PredMatrixMapPath(const Graph& _graph, + const PredMatrixMap& _predMatrixMap, + typename Graph::Node _source, + typename Graph::Node _target) + : graph(_graph), predMatrixMap(_predMatrixMap), + source(_source), target(_target) {} + + int length() const { + int len = 0; + typename Graph::Node node = target; + typename Graph::Edge edge; + while ((edge = predMatrixMap(source, node)) != INVALID) { + node = graph.source(edge); + ++len; + } + return len; + } + + bool empty() const { + return source != target; + } + + class RevEdgeIt { + public: + RevEdgeIt() {} + RevEdgeIt(Invalid) : path(0), current(INVALID) {} + RevEdgeIt(const PredMatrixMapPath& _path) + : path(&_path), current(_path.target) { + if (path->predMatrixMap(path->source, current) == INVALID) + current = INVALID; + } + + operator const typename Graph::Edge() const { + return path->predMatrixMap(path->source, current); + } + + RevEdgeIt& operator++() { + current = + path->graph.source(path->predMatrixMap(path->source, current)); + if (path->predMatrixMap(path->source, current) == INVALID) + current = INVALID; + return *this; + } + + bool operator==(const RevEdgeIt& e) const { + return current == e.current; + } + + bool operator!=(const RevEdgeIt& e) const { + return current != e.current; + } + + bool operator<(const RevEdgeIt& e) const { + return current < e.current; + } + + private: + const PredMatrixMapPath* path; + typename Graph::Node current; + }; + + private: + const Graph& graph; + const PredMatrixMap& predMatrixMap; + typename Graph::Node source; + typename Graph::Node target; + }; + +} + +#endif diff --git a/src/lemon/bits/traits.h b/src/lemon/bits/traits.h new file mode 100644 index 0000000..6eb5b75 --- /dev/null +++ b/src/lemon/bits/traits.h @@ -0,0 +1,346 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_TRAITS_H +#define LEMON_BITS_TRAITS_H + +#include + +///\file +///\brief Traits for graphs and maps +/// + +namespace lemon { + template + class ItemSetTraits {}; + + + template + struct NodeNotifierIndicator { + typedef InvalidType Type; + }; + template + struct NodeNotifierIndicator< + Graph, + typename enable_if::type + > { + typedef typename Graph::NodeNotifier Type; + }; + + template + class ItemSetTraits<_Graph, typename _Graph::Node> { + public: + + typedef _Graph Graph; + + typedef typename Graph::Node Item; + typedef typename Graph::NodeIt ItemIt; + + typedef typename NodeNotifierIndicator::Type ItemNotifier; + + template + class Map : public Graph::template NodeMap<_Value> { + public: + typedef typename Graph::template NodeMap<_Value> Parent; + typedef typename Graph::template NodeMap<_Value> Type; + typedef typename Parent::Value Value; + + Map(const Graph& _graph) : Parent(_graph) {} + Map(const Graph& _graph, const Value& _value) + : Parent(_graph, _value) {} + + }; + + }; + + template + struct EdgeNotifierIndicator { + typedef InvalidType Type; + }; + template + struct EdgeNotifierIndicator< + Graph, + typename enable_if::type + > { + typedef typename Graph::EdgeNotifier Type; + }; + + template + class ItemSetTraits<_Graph, typename _Graph::Edge> { + public: + + typedef _Graph Graph; + + typedef typename Graph::Edge Item; + typedef typename Graph::EdgeIt ItemIt; + + typedef typename EdgeNotifierIndicator::Type ItemNotifier; + + template + class Map : public Graph::template EdgeMap<_Value> { + public: + typedef typename Graph::template EdgeMap<_Value> Parent; + typedef typename Graph::template EdgeMap<_Value> Type; + typedef typename Parent::Value Value; + + Map(const Graph& _graph) : Parent(_graph) {} + Map(const Graph& _graph, const Value& _value) + : Parent(_graph, _value) {} + }; + + }; + + template + struct UEdgeNotifierIndicator { + typedef InvalidType Type; + }; + template + struct UEdgeNotifierIndicator< + Graph, + typename enable_if::type + > { + typedef typename Graph::UEdgeNotifier Type; + }; + + template + class ItemSetTraits<_Graph, typename _Graph::UEdge> { + public: + + typedef _Graph Graph; + + typedef typename Graph::UEdge Item; + typedef typename Graph::UEdgeIt ItemIt; + + typedef typename UEdgeNotifierIndicator::Type ItemNotifier; + + template + class Map : public Graph::template UEdgeMap<_Value> { + public: + typedef typename Graph::template UEdgeMap<_Value> Parent; + typedef typename Graph::template UEdgeMap<_Value> Type; + typedef typename Parent::Value Value; + + Map(const Graph& _graph) : Parent(_graph) {} + Map(const Graph& _graph, const Value& _value) + : Parent(_graph, _value) {} + }; + + }; + + template + struct ANodeNotifierIndicator { + typedef InvalidType Type; + }; + template + struct ANodeNotifierIndicator< + Graph, + typename enable_if::type + > { + typedef typename Graph::ANodeNotifier Type; + }; + + template + class ItemSetTraits<_Graph, typename _Graph::ANode> { + public: + + typedef _Graph Graph; + + typedef typename Graph::ANode Item; + typedef typename Graph::ANodeIt ItemIt; + + typedef typename ANodeNotifierIndicator::Type ItemNotifier; + + template + class Map : public Graph::template ANodeMap<_Value> { + public: + typedef typename Graph::template ANodeMap<_Value> Parent; + typedef typename Graph::template ANodeMap<_Value> Type; + typedef typename Parent::Value Value; + + Map(const Graph& _graph) : Parent(_graph) {} + Map(const Graph& _graph, const Value& _value) + : Parent(_graph, _value) {} + }; + + }; + + template + struct BNodeNotifierIndicator { + typedef InvalidType Type; + }; + template + struct BNodeNotifierIndicator< + Graph, + typename enable_if::type + > { + typedef typename Graph::BNodeNotifier Type; + }; + + template + class ItemSetTraits<_Graph, typename _Graph::BNode> { + public: + + typedef _Graph Graph; + + typedef typename Graph::BNode Item; + typedef typename Graph::BNodeIt ItemIt; + + typedef typename BNodeNotifierIndicator::Type ItemNotifier; + + template + class Map : public Graph::template BNodeMap<_Value> { + public: + typedef typename Graph::template BNodeMap<_Value> Parent; + typedef typename Graph::template BNodeMap<_Value> Type; + typedef typename Parent::Value Value; + + Map(const Graph& _graph) : Parent(_graph) {} + Map(const Graph& _graph, const Value& _value) + : Parent(_graph, _value) {} + }; + + }; + + + template + struct MapTraits { + typedef False ReferenceMapTag; + + typedef typename Map::Key Key; + typedef typename Map::Value Value; + + typedef const Value ConstReturnValue; + typedef const Value ReturnValue; + }; + + template + struct MapTraits< + Map, typename enable_if::type > + { + typedef True ReferenceMapTag; + + typedef typename Map::Key Key; + typedef typename Map::Value Value; + + typedef typename Map::ConstReference ConstReturnValue; + typedef typename Map::Reference ReturnValue; + + typedef typename Map::ConstReference ConstReference; + typedef typename Map::Reference Reference; + }; + + template + struct MatrixMapTraits { + typedef False ReferenceMapTag; + + typedef typename MatrixMap::FirstKey FirstKey; + typedef typename MatrixMap::SecondKey SecondKey; + typedef typename MatrixMap::Value Value; + + typedef const Value ConstReturnValue; + typedef const Value ReturnValue; + }; + + template + struct MatrixMapTraits< + MatrixMap, typename enable_if::type > + { + typedef True ReferenceMapTag; + + typedef typename MatrixMap::FirstKey FirstKey; + typedef typename MatrixMap::SecondKey SecondKey; + typedef typename MatrixMap::Value Value; + + typedef typename MatrixMap::ConstReference ConstReturnValue; + typedef typename MatrixMap::Reference ReturnValue; + + typedef typename MatrixMap::ConstReference ConstReference; + typedef typename MatrixMap::Reference Reference; + }; + + // Indicators for the tags + + template + struct NodeNumTagIndicator { + static const bool value = false; + }; + + template + struct NodeNumTagIndicator< + Graph, + typename enable_if::type + > { + static const bool value = true; + }; + + template + struct EdgeNumTagIndicator { + static const bool value = false; + }; + + template + struct EdgeNumTagIndicator< + Graph, + typename enable_if::type + > { + static const bool value = true; + }; + + template + struct FindEdgeTagIndicator { + static const bool value = false; + }; + + template + struct FindEdgeTagIndicator< + Graph, + typename enable_if::type + > { + static const bool value = true; + }; + + template + struct UndirectedTagIndicator { + static const bool value = false; + }; + + template + struct UndirectedTagIndicator< + Graph, + typename enable_if::type + > { + static const bool value = true; + }; + + template + struct BuildTagIndicator { + static const bool value = false; + }; + + template + struct BuildTagIndicator< + Graph, + typename enable_if::type + > { + static const bool value = true; + }; + +} + +#endif diff --git a/src/lemon/bits/utility.h b/src/lemon/bits/utility.h new file mode 100644 index 0000000..34fa159 --- /dev/null +++ b/src/lemon/bits/utility.h @@ -0,0 +1,140 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +// This file contains a modified version of the enable_if library from BOOST. +// See the appropriate copyright notice below. + +// Boost enable_if library + +// Copyright 2003 © The Trustees of Indiana University. + +// Use, modification, and distribution is subject to the Boost Software +// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Authors: Jaakko Järvi (jajarvi at osl.iu.edu) +// Jeremiah Willcock (jewillco at osl.iu.edu) +// Andrew Lumsdaine (lums at osl.iu.edu) + + +#ifndef LEMON_BITS_UTILITY_H +#define LEMON_BITS_UTILITY_H + +///\file +///\brief Miscellaneous basic utilities +/// +///\todo Please rethink the organisation of the basic files like this. +///E.g. this file might be merged with invalid.h. + + +namespace lemon +{ + + /// Basic type for defining "tags". A "YES" condition for \c enable_if. + + /// Basic type for defining "tags". A "YES" condition for \c enable_if. + /// + ///\sa False + /// + /// \todo This should go to a separate "basic_types.h" (or something) + /// file. + struct True { + ///\e + static const bool value = true; + }; + + /// Basic type for defining "tags". A "NO" condition for \c enable_if. + + /// Basic type for defining "tags". A "NO" condition for \c enable_if. + /// + ///\sa True + struct False { + ///\e + static const bool value = false; + }; + + + struct InvalidType { + }; + + template + struct Wrap { + const T &value; + Wrap(const T &t) : value(t) {} + }; + + /**************** dummy class to avoid ambiguity ****************/ + + template struct dummy { dummy(int) {} }; + + /**************** enable_if from BOOST ****************/ + + template + struct exists { + typedef T type; + }; + + + template + struct enable_if_c { + typedef T type; + }; + + template + struct enable_if_c {}; + + template + struct enable_if : public enable_if_c {}; + + template + struct lazy_enable_if_c { + typedef typename T::type type; + }; + + template + struct lazy_enable_if_c {}; + + template + struct lazy_enable_if : public lazy_enable_if_c {}; + + + template + struct disable_if_c { + typedef T type; + }; + + template + struct disable_if_c {}; + + template + struct disable_if : public disable_if_c {}; + + template + struct lazy_disable_if_c { + typedef typename T::type type; + }; + + template + struct lazy_disable_if_c {}; + + template + struct lazy_disable_if : public lazy_disable_if_c {}; + +} // namespace lemon + +#endif diff --git a/src/lemon/bits/variant.h b/src/lemon/bits/variant.h new file mode 100644 index 0000000..1f71484 --- /dev/null +++ b/src/lemon/bits/variant.h @@ -0,0 +1,508 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_VARIANT_H +#define LEMON_BITS_VARIANT_H + +#include + +/// \file +/// \brief Variant types + +namespace lemon { + + namespace _variant_bits { + + template + struct CTMax { + static const int value = left < right ? right : left; + }; + + } + + + /// \brief Simple Variant type for two types + /// + /// Simple Variant type for two types. The Variant type is a type + /// safe union. The C++ has strong limitations for using unions, by + /// example we can not store type with non default constructor or + /// destructor in an union. This class always knowns the current + /// state of the variant and it cares for the proper construction + /// and destruction. + template + class BiVariant { + public: + + /// \brief The \c First type. + typedef _First First; + /// \brief The \c Second type. + typedef _Second Second; + + struct WrongStateError : public lemon::LogicError { + public: + virtual const char* what() const throw() { + return "lemon::BiVariant::WrongStateError"; + } + }; + + /// \brief Constructor + /// + /// This constructor initalizes to the default value of the \c First + /// type. + BiVariant() { + flag = true; + new(reinterpret_cast(data)) First(); + } + + /// \brief Constructor + /// + /// This constructor initalizes to the given value of the \c First + /// type. + BiVariant(const First& f) { + flag = true; + new(reinterpret_cast(data)) First(f); + } + + /// \brief Constructor + /// + /// This constructor initalizes to the given value of the \c + /// Second type. + BiVariant(const Second& s) { + flag = false; + new(reinterpret_cast(data)) Second(s); + } + + /// \brief Copy constructor + /// + /// Copy constructor + BiVariant(const BiVariant& bivariant) { + flag = bivariant.flag; + if (flag) { + new(reinterpret_cast(data)) First(bivariant.first()); + } else { + new(reinterpret_cast(data)) Second(bivariant.second()); + } + } + + /// \brief Destrcutor + /// + /// Destructor + ~BiVariant() { + destroy(); + } + + /// \brief Set to the default value of the \c First type. + /// + /// This function sets the variant to the default value of the \c + /// First type. + BiVariant& setFirst() { + destroy(); + flag = true; + new(reinterpret_cast(data)) First(); + return *this; + } + + /// \brief Set to the given value of the \c First type. + /// + /// This function sets the variant to the given value of the \c + /// First type. + BiVariant& setFirst(const First& f) { + destroy(); + flag = true; + new(reinterpret_cast(data)) First(f); + return *this; + } + + /// \brief Set to the default value of the \c Second type. + /// + /// This function sets the variant to the default value of the \c + /// Second type. + BiVariant& setSecond() { + destroy(); + flag = false; + new(reinterpret_cast(data)) Second(); + return *this; + } + + /// \brief Set to the given value of the \c Second type. + /// + /// This function sets the variant to the given value of the \c + /// Second type. + BiVariant& setSecond(const Second& s) { + destroy(); + flag = false; + new(reinterpret_cast(data)) Second(s); + return *this; + } + + /// \brief Operator form of the \c setFirst() + BiVariant& operator=(const First& f) { + return setFirst(f); + } + + /// \brief Operator form of the \c setSecond() + BiVariant& operator=(const Second& s) { + return setSecond(s); + } + + /// \brief Assign operator + BiVariant& operator=(const BiVariant& bivariant) { + if (this == &bivariant) return *this; + destroy(); + flag = bivariant.flag; + if (flag) { + new(reinterpret_cast(data)) First(bivariant.first()); + } else { + new(reinterpret_cast(data)) Second(bivariant.second()); + } + return *this; + } + + /// \brief Reference to the value + /// + /// Reference to the value of the \c First type. + /// \pre The BiVariant should store value of \c First type. + First& first() { + LEMON_ASSERT(flag, WrongStateError()); + return *reinterpret_cast(data); + } + + /// \brief Const reference to the value + /// + /// Const reference to the value of the \c First type. + /// \pre The BiVariant should store value of \c First type. + const First& first() const { + LEMON_ASSERT(flag, WrongStateError()); + return *reinterpret_cast(data); + } + + /// \brief Operator form of the \c first() + operator First&() { return first(); } + /// \brief Operator form of the const \c first() + operator const First&() const { return first(); } + + /// \brief Reference to the value + /// + /// Reference to the value of the \c Second type. + /// \pre The BiVariant should store value of \c Second type. + Second& second() { + LEMON_ASSERT(!flag, WrongStateError()); + return *reinterpret_cast(data); + } + + /// \brief Const reference to the value + /// + /// Const reference to the value of the \c Second type. + /// \pre The BiVariant should store value of \c Second type. + const Second& second() const { + LEMON_ASSERT(!flag, WrongStateError()); + return *reinterpret_cast(data); + } + + /// \brief Operator form of the \c second() + operator Second&() { return second(); } + /// \brief Operator form of the const \c second() + operator const Second&() const { return second(); } + + /// \brief %True when the variant is in the first state + /// + /// %True when the variant stores value of the \c First type. + bool firstState() const { return flag; } + + /// \brief %True when the variant is in the second state + /// + /// %True when the variant stores value of the \c Second type. + bool secondState() const { return !flag; } + + private: + + void destroy() { + if (flag) { + reinterpret_cast(data)->~First(); + } else { + reinterpret_cast(data)->~Second(); + } + } + + char data[_variant_bits::CTMax::value]; + bool flag; + }; + + namespace _variant_bits { + + template + struct Memory { + + typedef typename _TypeMap::template Map<_idx>::Type Current; + + static void destroy(int index, char* place) { + if (index == _idx) { + reinterpret_cast(place)->~Current(); + } else { + Memory<_idx - 1, _TypeMap>::destroy(index, place); + } + } + + static void copy(int index, char* to, const char* from) { + if (index == _idx) { + new (reinterpret_cast(to)) + Current(reinterpret_cast(from)); + } else { + Memory<_idx - 1, _TypeMap>::copy(index, to, from); + } + } + + }; + + template + struct Memory<-1, _TypeMap> { + + static void destroy(int, char*) { + LEMON_ASSERT(false, "Wrong Variant Index."); + } + + static void copy(int, char*, const char*) { + LEMON_ASSERT(false, "Wrong Variant Index."); + } + }; + + template + struct Size { + static const int value = + CTMax::Type), + Size<_idx - 1, _TypeMap>::value>::value; + }; + + template + struct Size<0, _TypeMap> { + static const int value = + sizeof(typename _TypeMap::template Map<0>::Type); + }; + + } + + /// \brief Variant type + /// + /// Simple Variant type. The Variant type is a type safe union. The + /// C++ has strong limitations for using unions, by example we + /// cannot store type with non default constructor or destructor in + /// a union. This class always knowns the current state of the + /// variant and it cares for the proper construction and + /// destruction. + /// + /// \param _num The number of the types which can be stored in the + /// variant type. + /// \param _TypeMap This class describes the types of the Variant. The + /// _TypeMap::Map::Type should be a valid type for each index + /// in the range {0, 1, ..., _num - 1}. The \c VariantTypeMap is helper + /// class to define such type mappings up to 10 types. + /// + /// And the usage of the class: + ///\code + /// typedef Variant<3, VariantTypeMap > MyVariant; + /// MyVariant var; + /// var.set<0>(12); + /// std::cout << var.get<0>() << std::endl; + /// var.set<1>("alpha"); + /// std::cout << var.get<1>() << std::endl; + /// var.set<2>(0.75); + /// std::cout << var.get<2>() << std::endl; + ///\endcode + /// + /// The result of course: + ///\code + /// 12 + /// alpha + /// 0.75 + ///\endcode + template + class Variant { + public: + + static const int num = _num; + + typedef _TypeMap TypeMap; + + struct WrongStateError : public lemon::LogicError { + public: + virtual const char* what() const throw() { + return "lemon::Variant::WrongStateError"; + } + }; + + /// \brief Constructor + /// + /// This constructor initalizes to the default value of the \c type + /// with 0 index. + Variant() { + flag = 0; + new(reinterpret_cast::Type*>(data)) + typename TypeMap::template Map<0>::Type(); + } + + + /// \brief Copy constructor + /// + /// Copy constructor + Variant(const Variant& variant) { + flag = variant.flag; + _variant_bits::Memory::copy(flag, data, variant.data); + } + + /// \brief Assign operator + /// + /// Assign operator + Variant& operator=(const Variant& variant) { + if (this == &variant) return *this; + _variant_bits::Memory:: + destroy(flag, data); + flag = variant.flag; + _variant_bits::Memory:: + copy(flag, data, variant.data); + return *this; + } + + /// \brief Destrcutor + /// + /// Destructor + ~Variant() { + _variant_bits::Memory::destroy(flag, data); + } + + /// \brief Set to the default value of the type with \c _idx index. + /// + /// This function sets the variant to the default value of the + /// type with \c _idx index. + template + Variant& set() { + _variant_bits::Memory::destroy(flag, data); + flag = _idx; + new(reinterpret_cast::Type*>(data)) + typename TypeMap::template Map<_idx>::Type(); + return *this; + } + + /// \brief Set to the given value of the type with \c _idx index. + /// + /// This function sets the variant to the given value of the type + /// with \c _idx index. + template + Variant& set(const typename _TypeMap::template Map<_idx>::Type& init) { + _variant_bits::Memory::destroy(flag, data); + flag = _idx; + new(reinterpret_cast::Type*>(data)) + typename TypeMap::template Map<_idx>::Type(init); + return *this; + } + + /// \brief Gets the current value of the type with \c _idx index. + /// + /// Gets the current value of the type with \c _idx index. + template + const typename TypeMap::template Map<_idx>::Type& get() const { + LEMON_ASSERT(_idx == flag, "Wrong Variant Index."); + return *reinterpret_cast::Type*>(data); + } + + /// \brief Gets the current value of the type with \c _idx index. + /// + /// Gets the current value of the type with \c _idx index. + template + typename _TypeMap::template Map<_idx>::Type& get() { + LEMON_ASSERT(_idx == flag, "Wrong Variant Index."); + return *reinterpret_cast::Type*> + (data); + } + + /// \brief Returns the current state of the variant. + /// + /// Returns the current state of the variant. + int state() const { + return flag; + } + + private: + + char data[_variant_bits::Size::value]; + int flag; + }; + + namespace _variant_bits { + + template + struct Get { + typedef typename Get<_index - 1, typename _List::Next>::Type Type; + }; + + template + struct Get<0, _List> { + typedef typename _List::Type Type; + }; + + struct List {}; + + template + struct Insert { + typedef _List Next; + typedef _Type Type; + }; + + template + struct Mapper { + typedef List L10; + typedef Insert<_T9, L10> L9; + typedef Insert<_T8, L9> L8; + typedef Insert<_T7, L8> L7; + typedef Insert<_T6, L7> L6; + typedef Insert<_T5, L6> L5; + typedef Insert<_T4, L5> L4; + typedef Insert<_T3, L4> L3; + typedef Insert<_T2, L3> L2; + typedef Insert<_T1, L2> L1; + typedef Insert<_T0, L1> L0; + typedef typename Get<_idx, L0>::Type Type; + }; + + } + + /// \brief Helper class for Variant + /// + /// Helper class to define type mappings for Variant. This class + /// converts the template parameters to be mappable by integer. + /// \see Variant + template < + typename _T0, + typename _T1 = void, typename _T2 = void, typename _T3 = void, + typename _T5 = void, typename _T4 = void, typename _T6 = void, + typename _T7 = void, typename _T8 = void, typename _T9 = void> + struct VariantTypeMap { + template + struct Map { + typedef typename _variant_bits:: + Mapper<_idx, _T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9>::Type + Type; + }; + }; + +} + + +#endif diff --git a/src/lemon/bits/vector_map.h b/src/lemon/bits/vector_map.h new file mode 100644 index 0000000..9299671 --- /dev/null +++ b/src/lemon/bits/vector_map.h @@ -0,0 +1,243 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BITS_VECTOR_MAP_H +#define LEMON_BITS_VECTOR_MAP_H + +#include +#include + +#include +#include + +#include + +#include +#include + +///\ingroup graphbits +/// +///\file +///\brief Vector based graph maps. +namespace lemon { + + /// \ingroup graphbits + /// + /// \brief Graph map based on the std::vector storage. + /// + /// The VectorMap template class is graph map structure what + /// automatically updates the map when a key is added to or erased from + /// the map. This map type uses the std::vector to store the values. + /// + /// \param Notifier The AlterationNotifier that will notify this map. + /// \param Item The item type of the graph items. + /// \param Value The value type of the map. + /// + /// \author Balazs Dezso + template + class VectorMap + : public ItemSetTraits<_Graph, _Item>::ItemNotifier::ObserverBase { + private: + + /// The container type of the map. + typedef std::vector<_Value> Container; + + public: + + /// The graph type of the map. + typedef _Graph Graph; + /// The item type of the map. + typedef _Item Item; + /// The reference map tag. + typedef True ReferenceMapTag; + + /// The key type of the map. + typedef _Item Key; + /// The value type of the map. + typedef _Value Value; + + /// The notifier type. + typedef typename ItemSetTraits<_Graph, _Item>::ItemNotifier Notifier; + + /// The map type. + typedef VectorMap Map; + /// The base class of the map. + typedef typename Notifier::ObserverBase Parent; + + /// The reference type of the map; + typedef typename Container::reference Reference; + /// The const reference type of the map; + typedef typename Container::const_reference ConstReference; + + + /// \brief Constructor to attach the new map into the notifier. + /// + /// It constructs a map and attachs it into the notifier. + /// It adds all the items of the graph to the map. + VectorMap(const Graph& graph) { + Parent::attach(graph.notifier(Item())); + container.resize(Parent::notifier()->maxId() + 1); + } + + /// \brief Constructor uses given value to initialize the map. + /// + /// It constructs a map uses a given value to initialize the map. + /// It adds all the items of the graph to the map. + VectorMap(const Graph& graph, const Value& value) { + Parent::attach(graph.notifier(Item())); + container.resize(Parent::notifier()->maxId() + 1, value); + } + + /// \brief Copy constructor + /// + /// Copy constructor. + VectorMap(const VectorMap& _copy) : Parent() { + if (_copy.attached()) { + Parent::attach(*_copy.notifier()); + container = _copy.container; + } + } + + /// \brief Assign operator. + /// + /// This operator assigns for each item in the map the + /// value mapped to the same item in the copied map. + /// The parameter map should be indiced with the same + /// itemset because this assign operator does not change + /// the container of the map. + VectorMap& operator=(const VectorMap& cmap) { + return operator=(cmap); + } + + + /// \brief Template assign operator. + /// + /// The given parameter should be conform to the ReadMap + /// concecpt and could be indiced by the current item set of + /// the NodeMap. In this case the value for each item + /// is assigned by the value of the given ReadMap. + template + VectorMap& operator=(const CMap& cmap) { + checkConcept, CMap>(); + const typename Parent::Notifier* nf = Parent::notifier(); + Item it; + for (nf->first(it); it != INVALID; nf->next(it)) { + set(it, cmap[it]); + } + return *this; + } + + public: + + /// \brief The subcript operator. + /// + /// The subscript operator. The map can be subscripted by the + /// actual items of the graph. + Reference operator[](const Key& key) { + return container[Parent::notifier()->id(key)]; + } + + /// \brief The const subcript operator. + /// + /// The const subscript operator. The map can be subscripted by the + /// actual items of the graph. + ConstReference operator[](const Key& key) const { + return container[Parent::notifier()->id(key)]; + } + + + /// \brief The setter function of the map. + /// + /// It the same as operator[](key) = value expression. + void set(const Key& key, const Value& value) { + (*this)[key] = value; + } + + protected: + + /// \brief Adds a new key to the map. + /// + /// It adds a new key to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const Key& key) { + int id = Parent::notifier()->id(key); + if (id >= int(container.size())) { + container.resize(id + 1); + } + } + + /// \brief Adds more new keys to the map. + /// + /// It adds more new keys to the map. It called by the observer notifier + /// and it overrides the add() member function of the observer base. + virtual void add(const std::vector& keys) { + int max = container.size() - 1; + for (int i = 0; i < int(keys.size()); ++i) { + int id = Parent::notifier()->id(keys[i]); + if (id >= max) { + max = id; + } + } + container.resize(max + 1); + } + + /// \brief Erase a key from the map. + /// + /// Erase a key from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const Key& key) { + container[Parent::notifier()->id(key)] = Value(); + } + + /// \brief Erase more keys from the map. + /// + /// Erase more keys from the map. It called by the observer notifier + /// and it overrides the erase() member function of the observer base. + virtual void erase(const std::vector& keys) { + for (int i = 0; i < int(keys.size()); ++i) { + container[Parent::notifier()->id(keys[i])] = Value(); + } + } + + /// \brief Buildes the map. + /// + /// It buildes the map. It called by the observer notifier + /// and it overrides the build() member function of the observer base. + virtual void build() { + int size = Parent::notifier()->maxId() + 1; + container.reserve(size); + container.resize(size); + } + + /// \brief Clear the map. + /// + /// It erase all items from the map. It called by the observer notifier + /// and it overrides the clear() member function of the observer base. + virtual void clear() { + container.clear(); + } + + private: + + Container container; + + }; + +} + +#endif diff --git a/src/lemon/bucket_heap.h b/src/lemon/bucket_heap.h new file mode 100644 index 0000000..f8b57e1 --- /dev/null +++ b/src/lemon/bucket_heap.h @@ -0,0 +1,831 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_BUCKET_HEAP_H +#define LEMON_BUCKET_HEAP_H + +///\ingroup auxdat +///\file +///\brief Bucket Heap implementation. + +#include +#include +#include + +namespace lemon { + + /// \ingroup auxdat + /// + /// \brief A Bucket Heap implementation. + /// + /// This class implements the \e bucket \e heap data structure. A \e heap + /// is a data structure for storing items with specified values called \e + /// priorities in such a way that finding the item with minimum priority is + /// efficient. The bucket heap is very simple implementation, it can store + /// only integer priorities and it stores for each priority in the + /// \f$ [0..C) \f$ range a list of items. So it should be used only when + /// the priorities are small. It is not intended to use as dijkstra heap. + /// + /// \param _ItemIntMap A read and writable Item int map, used internally + /// to handle the cross references. + /// \param minimize If the given parameter is true then the heap gives back + /// the lowest priority. + template + class BucketHeap { + + public: + /// \e + typedef typename _ItemIntMap::Key Item; + /// \e + typedef int Prio; + /// \e + typedef std::pair Pair; + /// \e + typedef _ItemIntMap ItemIntMap; + + /// \brief Type to represent the items states. + /// + /// Each Item element have a state associated to it. It may be "in heap", + /// "pre heap" or "post heap". The latter two are indifferent from the + /// heap's point of view, but may be useful to the user. + /// + /// The ItemIntMap \e should be initialized in such way that it maps + /// PRE_HEAP (-1) to any element to be put in the heap... + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + public: + /// \brief The constructor. + /// + /// The constructor. + /// \param _index should be given to the constructor, since it is used + /// internally to handle the cross references. The value of the map + /// should be PRE_HEAP (-1) for each element. + explicit BucketHeap(ItemIntMap &_index) : index(_index), minimal(0) {} + + /// The number of items stored in the heap. + /// + /// \brief Returns the number of items stored in the heap. + int size() const { return data.size(); } + + /// \brief Checks if the heap stores no items. + /// + /// Returns \c true if and only if the heap stores no items. + bool empty() const { return data.empty(); } + + /// \brief Make empty this heap. + /// + /// Make empty this heap. It does not change the cross reference + /// map. If you want to reuse a heap what is not surely empty you + /// should first clear the heap and after that you should set the + /// cross reference map for each item to \c PRE_HEAP. + void clear() { + data.clear(); first.clear(); minimal = 0; + } + + private: + + void relocate_last(int idx) { + if (idx + 1 < int(data.size())) { + data[idx] = data.back(); + if (data[idx].prev != -1) { + data[data[idx].prev].next = idx; + } else { + first[data[idx].value] = idx; + } + if (data[idx].next != -1) { + data[data[idx].next].prev = idx; + } + index[data[idx].item] = idx; + } + data.pop_back(); + } + + void unlace(int idx) { + if (data[idx].prev != -1) { + data[data[idx].prev].next = data[idx].next; + } else { + first[data[idx].value] = data[idx].next; + } + if (data[idx].next != -1) { + data[data[idx].next].prev = data[idx].prev; + } + } + + void lace(int idx) { + if (int(first.size()) <= data[idx].value) { + first.resize(data[idx].value + 1, -1); + } + data[idx].next = first[data[idx].value]; + if (data[idx].next != -1) { + data[data[idx].next].prev = idx; + } + first[data[idx].value] = idx; + data[idx].prev = -1; + } + + public: + /// \brief Insert a pair of item and priority into the heap. + /// + /// Adds \c p.first to the heap with priority \c p.second. + /// \param p The pair to insert. + void push(const Pair& p) { + push(p.first, p.second); + } + + /// \brief Insert an item into the heap with the given priority. + /// + /// Adds \c i to the heap with priority \c p. + /// \param i The item to insert. + /// \param p The priority of the item. + void push(const Item &i, const Prio &p) { + int idx = data.size(); + index[i] = idx; + data.push_back(BucketItem(i, p)); + lace(idx); + if (p < minimal) { + minimal = p; + } + } + + /// \brief Returns the item with minimum priority. + /// + /// This method returns the item with minimum priority. + /// \pre The heap must be nonempty. + Item top() const { + while (first[minimal] == -1) { + ++minimal; + } + return data[first[minimal]].item; + } + + /// \brief Returns the minimum priority. + /// + /// It returns the minimum priority. + /// \pre The heap must be nonempty. + Prio prio() const { + while (first[minimal] == -1) { + ++minimal; + } + return minimal; + } + + /// \brief Deletes the item with minimum priority. + /// + /// This method deletes the item with minimum priority from the heap. + /// \pre The heap must be non-empty. + void pop() { + while (first[minimal] == -1) { + ++minimal; + } + int idx = first[minimal]; + index[data[idx].item] = -2; + unlace(idx); + relocate_last(idx); + } + + /// \brief Deletes \c i from the heap. + /// + /// This method deletes item \c i from the heap, if \c i was + /// already stored in the heap. + /// \param i The item to erase. + void erase(const Item &i) { + int idx = index[i]; + index[data[idx].item] = -2; + unlace(idx); + relocate_last(idx); + } + + + /// \brief Returns the priority of \c i. + /// + /// This function returns the priority of item \c i. + /// \pre \c i must be in the heap. + /// \param i The item. + Prio operator[](const Item &i) const { + int idx = index[i]; + return data[idx].value; + } + + /// \brief \c i gets to the heap with priority \c p independently + /// if \c i was already there. + /// + /// This method calls \ref push(\c i, \c p) if \c i is not stored + /// in the heap and sets the priority of \c i to \c p otherwise. + /// \param i The item. + /// \param p The priority. + void set(const Item &i, const Prio &p) { + int idx = index[i]; + if (idx < 0) { + push(i,p); + } else if (p > data[idx].value) { + increase(i, p); + } else { + decrease(i, p); + } + } + + /// \brief Decreases the priority of \c i to \c p. + /// + /// This method decreases the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at least \c + /// p relative to \c Compare. + /// \param i The item. + /// \param p The priority. + void decrease(const Item &i, const Prio &p) { + int idx = index[i]; + unlace(idx); + data[idx].value = p; + if (p < minimal) { + minimal = p; + } + lace(idx); + } + + /// \brief Increases the priority of \c i to \c p. + /// + /// This method sets the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at most \c + /// p relative to \c Compare. + /// \param i The item. + /// \param p The priority. + void increase(const Item &i, const Prio &p) { + int idx = index[i]; + unlace(idx); + data[idx].value = p; + lace(idx); + } + + /// \brief Returns if \c item is in, has already been in, or has + /// never been in the heap. + /// + /// This method returns PRE_HEAP if \c item has never been in the + /// heap, IN_HEAP if it is in the heap at the moment, and POST_HEAP + /// otherwise. In the latter case it is possible that \c item will + /// get back to the heap again. + /// \param i The item. + State state(const Item &i) const { + int idx = index[i]; + if (idx >= 0) idx = 0; + return State(idx); + } + + /// \brief Sets the state of the \c item in the heap. + /// + /// Sets the state of the \c item in the heap. It can be used to + /// manually clear the heap when it is important to achive the + /// better time complexity. + /// \param i The item. + /// \param st The state. It should not be \c IN_HEAP. + void state(const Item& i, State st) { + switch (st) { + case POST_HEAP: + case PRE_HEAP: + if (state(i) == IN_HEAP) { + erase(i); + } + index[i] = st; + break; + case IN_HEAP: + break; + } + } + + private: + + struct BucketItem { + BucketItem(const Item& _item, int _value) + : item(_item), value(_value) {} + + Item item; + int value; + + int prev, next; + }; + + ItemIntMap& index; + std::vector first; + std::vector data; + mutable int minimal; + + }; // class BucketHeap + + + template + class BucketHeap<_ItemIntMap, false> { + + public: + typedef typename _ItemIntMap::Key Item; + typedef int Prio; + typedef std::pair Pair; + typedef _ItemIntMap ItemIntMap; + + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + public: + + explicit BucketHeap(ItemIntMap &_index) : index(_index), maximal(-1) {} + + int size() const { return data.size(); } + bool empty() const { return data.empty(); } + + void clear() { + data.clear(); first.clear(); maximal = -1; + } + + private: + + void relocate_last(int idx) { + if (idx + 1 != int(data.size())) { + data[idx] = data.back(); + if (data[idx].prev != -1) { + data[data[idx].prev].next = idx; + } else { + first[data[idx].value] = idx; + } + if (data[idx].next != -1) { + data[data[idx].next].prev = idx; + } + index[data[idx].item] = idx; + } + data.pop_back(); + } + + void unlace(int idx) { + if (data[idx].prev != -1) { + data[data[idx].prev].next = data[idx].next; + } else { + first[data[idx].value] = data[idx].next; + } + if (data[idx].next != -1) { + data[data[idx].next].prev = data[idx].prev; + } + } + + void lace(int idx) { + if (int(first.size()) <= data[idx].value) { + first.resize(data[idx].value + 1, -1); + } + data[idx].next = first[data[idx].value]; + if (data[idx].next != -1) { + data[data[idx].next].prev = idx; + } + first[data[idx].value] = idx; + data[idx].prev = -1; + } + + public: + + void push(const Pair& p) { + push(p.first, p.second); + } + + void push(const Item &i, const Prio &p) { + int idx = data.size(); + index[i] = idx; + data.push_back(BucketItem(i, p)); + lace(idx); + if (data[idx].value > maximal) { + maximal = data[idx].value; + } + } + + Item top() const { + while (first[maximal] == -1) { + --maximal; + } + return data[first[maximal]].item; + } + + Prio prio() const { + while (first[maximal] == -1) { + --maximal; + } + return maximal; + } + + void pop() { + while (first[maximal] == -1) { + --maximal; + } + int idx = first[maximal]; + index[data[idx].item] = -2; + unlace(idx); + relocate_last(idx); + } + + void erase(const Item &i) { + int idx = index[i]; + index[data[idx].item] = -2; + unlace(idx); + relocate_last(idx); + } + + Prio operator[](const Item &i) const { + int idx = index[i]; + return data[idx].value; + } + + void set(const Item &i, const Prio &p) { + int idx = index[i]; + if (idx < 0) { + push(i,p); + } else if (p > data[idx].value) { + decrease(i, p); + } else { + increase(i, p); + } + } + + void decrease(const Item &i, const Prio &p) { + int idx = index[i]; + unlace(idx); + data[idx].value = p; + if (p > maximal) { + maximal = p; + } + lace(idx); + } + + void increase(const Item &i, const Prio &p) { + int idx = index[i]; + unlace(idx); + data[idx].value = p; + lace(idx); + } + + State state(const Item &i) const { + int idx = index[i]; + if (idx >= 0) idx = 0; + return State(idx); + } + + void state(const Item& i, State st) { + switch (st) { + case POST_HEAP: + case PRE_HEAP: + if (state(i) == IN_HEAP) { + erase(i); + } + index[i] = st; + break; + case IN_HEAP: + break; + } + } + + private: + + struct BucketItem { + BucketItem(const Item& _item, int _value) + : item(_item), value(_value) {} + + Item item; + int value; + + int prev, next; + }; + + ItemIntMap& index; + std::vector first; + std::vector data; + mutable int maximal; + + }; // class BucketHeap + + /// \ingroup auxdat + /// + /// \brief A Simplified Bucket Heap implementation. + /// + /// This class implements a simplified \e bucket \e heap data + /// structure. It does not provide some functionality but it faster + /// and simplier data structure than the BucketHeap. The main + /// difference is that the BucketHeap stores for every key a double + /// linked list while this class stores just simple lists. In the + /// other way it does not supports erasing each elements just the + /// minimal and it does not supports key increasing, decreasing. + /// + /// \param _ItemIntMap A read and writable Item int map, used internally + /// to handle the cross references. + /// \param minimize If the given parameter is true then the heap gives back + /// the lowest priority. + /// + /// \sa BucketHeap + template + class SimpleBucketHeap { + + public: + typedef typename _ItemIntMap::Key Item; + typedef int Prio; + typedef std::pair Pair; + typedef _ItemIntMap ItemIntMap; + + /// \brief Type to represent the items states. + /// + /// Each Item element have a state associated to it. It may be "in heap", + /// "pre heap" or "post heap". The latter two are indifferent from the + /// heap's point of view, but may be useful to the user. + /// + /// The ItemIntMap \e should be initialized in such way that it maps + /// PRE_HEAP (-1) to any element to be put in the heap... + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + public: + + /// \brief The constructor. + /// + /// The constructor. + /// \param _index should be given to the constructor, since it is used + /// internally to handle the cross references. The value of the map + /// should be PRE_HEAP (-1) for each element. + explicit SimpleBucketHeap(ItemIntMap &_index) + : index(_index), free(-1), num(0), minimal(0) {} + + /// \brief Returns the number of items stored in the heap. + /// + /// The number of items stored in the heap. + int size() const { return num; } + + /// \brief Checks if the heap stores no items. + /// + /// Returns \c true if and only if the heap stores no items. + bool empty() const { return num == 0; } + + /// \brief Make empty this heap. + /// + /// Make empty this heap. It does not change the cross reference + /// map. If you want to reuse a heap what is not surely empty you + /// should first clear the heap and after that you should set the + /// cross reference map for each item to \c PRE_HEAP. + void clear() { + data.clear(); first.clear(); free = -1; num = 0; minimal = 0; + } + + /// \brief Insert a pair of item and priority into the heap. + /// + /// Adds \c p.first to the heap with priority \c p.second. + /// \param p The pair to insert. + void push(const Pair& p) { + push(p.first, p.second); + } + + /// \brief Insert an item into the heap with the given priority. + /// + /// Adds \c i to the heap with priority \c p. + /// \param i The item to insert. + /// \param p The priority of the item. + void push(const Item &i, const Prio &p) { + int idx; + if (free == -1) { + idx = data.size(); + data.push_back(BucketItem(i)); + } else { + idx = free; + free = data[idx].next; + data[idx].item = i; + } + index[i] = idx; + if (p >= int(first.size())) first.resize(p + 1, -1); + data[idx].next = first[p]; + first[p] = idx; + if (p < minimal) { + minimal = p; + } + ++num; + } + + /// \brief Returns the item with minimum priority. + /// + /// This method returns the item with minimum priority. + /// \pre The heap must be nonempty. + Item top() const { + while (first[minimal] == -1) { + ++minimal; + } + return data[first[minimal]].item; + } + + /// \brief Returns the minimum priority. + /// + /// It returns the minimum priority. + /// \pre The heap must be nonempty. + Prio prio() const { + while (first[minimal] == -1) { + ++minimal; + } + return minimal; + } + + /// \brief Deletes the item with minimum priority. + /// + /// This method deletes the item with minimum priority from the heap. + /// \pre The heap must be non-empty. + void pop() { + while (first[minimal] == -1) { + ++minimal; + } + int idx = first[minimal]; + index[data[idx].item] = -2; + first[minimal] = data[idx].next; + data[idx].next = free; + free = idx; + --num; + } + + /// \brief Returns the priority of \c i. + /// + /// This function returns the priority of item \c i. + /// \warning This operator is not a constant time function + /// because it scans the whole data structure to find the proper + /// value. + /// \pre \c i must be in the heap. + /// \param i The item. + Prio operator[](const Item &i) const { + for (int k = 0; k < first.size(); ++k) { + int idx = first[k]; + while (idx != -1) { + if (data[idx].item == i) { + return k; + } + idx = data[idx].next; + } + } + return -1; + } + + /// \brief Returns if \c item is in, has already been in, or has + /// never been in the heap. + /// + /// This method returns PRE_HEAP if \c item has never been in the + /// heap, IN_HEAP if it is in the heap at the moment, and POST_HEAP + /// otherwise. In the latter case it is possible that \c item will + /// get back to the heap again. + /// \param i The item. + State state(const Item &i) const { + int idx = index[i]; + if (idx >= 0) idx = 0; + return State(idx); + } + + private: + + struct BucketItem { + BucketItem(const Item& _item) + : item(_item) {} + + Item item; + int next; + }; + + ItemIntMap& index; + std::vector first; + std::vector data; + int free, num; + mutable int minimal; + + }; // class SimpleBucketHeap + + template + class SimpleBucketHeap<_ItemIntMap, false> { + + public: + typedef typename _ItemIntMap::Key Item; + typedef int Prio; + typedef std::pair Pair; + typedef _ItemIntMap ItemIntMap; + + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + public: + + explicit SimpleBucketHeap(ItemIntMap &_index) + : index(_index), free(-1), num(0), maximal(0) {} + + int size() const { return num; } + + bool empty() const { return num == 0; } + + void clear() { + data.clear(); first.clear(); free = -1; num = 0; maximal = 0; + } + + void push(const Pair& p) { + push(p.first, p.second); + } + + void push(const Item &i, const Prio &p) { + int idx; + if (free == -1) { + idx = data.size(); + data.push_back(BucketItem(i)); + } else { + idx = free; + free = data[idx].next; + data[idx].item = i; + } + index[i] = idx; + if (p >= int(first.size())) first.resize(p + 1, -1); + data[idx].next = first[p]; + first[p] = idx; + if (p > maximal) { + maximal = p; + } + ++num; + } + + Item top() const { + while (first[maximal] == -1) { + --maximal; + } + return data[first[maximal]].item; + } + + Prio prio() const { + while (first[maximal] == -1) { + --maximal; + } + return maximal; + } + + void pop() { + while (first[maximal] == -1) { + --maximal; + } + int idx = first[maximal]; + index[data[idx].item] = -2; + first[maximal] = data[idx].next; + data[idx].next = free; + free = idx; + --num; + } + + Prio operator[](const Item &i) const { + for (int k = 0; k < first.size(); ++k) { + int idx = first[k]; + while (idx != -1) { + if (data[idx].item == i) { + return k; + } + idx = data[idx].next; + } + } + return -1; + } + + State state(const Item &i) const { + int idx = index[i]; + if (idx >= 0) idx = 0; + return State(idx); + } + + private: + + struct BucketItem { + BucketItem(const Item& _item) : item(_item) {} + + Item item; + + int next; + }; + + ItemIntMap& index; + std::vector first; + std::vector data; + int free, num; + mutable int maximal; + + }; + +} + +#endif diff --git a/src/lemon/concept_check.h b/src/lemon/concept_check.h new file mode 100644 index 0000000..59fa93d --- /dev/null +++ b/src/lemon/concept_check.h @@ -0,0 +1,105 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +// Modified for use in LEMON. +// We should really consider using Boost... + +// +// (C) Copyright Jeremy Siek 2000. +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// Revision History: +// 05 May 2001: Workarounds for HP aCC from Thomas Matelich. (Jeremy Siek) +// 02 April 2001: Removed limits header altogether. (Jeremy Siek) +// 01 April 2001: Modified to use new header. (JMaddock) +// + +// See http://www.boost.org/libs/concept_check for documentation. + +#ifndef LEMON_BOOST_CONCEPT_CHECKS_HPP +#define LEMON_BOOST_CONCEPT_CHECKS_HPP + +namespace lemon { + + /* + "inline" is used for ignore_unused_variable_warning() + and function_requires() to make sure there is no + overtarget with g++. + */ + + template inline void ignore_unused_variable_warning(const T&) { } + + template + inline void function_requires() + { +#if !defined(NDEBUG) + void (Concept::*x)() = & Concept::constraints; + ignore_unused_variable_warning(x); +#endif + } + + template + inline void checkConcept() { +#if !defined(NDEBUG) + typedef typename Concept::template Constraints ConceptCheck; + void (ConceptCheck::*x)() = & ConceptCheck::constraints; + ignore_unused_variable_warning(x); +#endif + } +#if 0 +#define BOOST_CLASS_REQUIRE(type_var, ns, concept) \ + typedef void (ns::concept ::* func##type_var##concept)(); \ + template \ + struct concept_checking_##type_var##concept { }; \ + typedef concept_checking_##type_var##concept< \ + BOOST_FPTR ns::concept::constraints> \ + concept_checking_typedef_##type_var##concept + +#define BOOST_CLASS_REQUIRE2(type_var1, type_var2, ns, concept) \ + typedef void (ns::concept ::* \ + func##type_var1##type_var2##concept)(); \ + template \ + struct concept_checking_##type_var1##type_var2##concept { }; \ + typedef concept_checking_##type_var1##type_var2##concept< \ + BOOST_FPTR ns::concept::constraints> \ + concept_checking_typedef_##type_var1##type_var2##concept + +#define BOOST_CLASS_REQUIRE3(tv1, tv2, tv3, ns, concept) \ + typedef void (ns::concept ::* \ + func##tv1##tv2##tv3##concept)(); \ + template \ + struct concept_checking_##tv1##tv2##tv3##concept { }; \ + typedef concept_checking_##tv1##tv2##tv3##concept< \ + BOOST_FPTR ns::concept::constraints> \ + concept_checking_typedef_##tv1##tv2##tv3##concept + +#define BOOST_CLASS_REQUIRE4(tv1, tv2, tv3, tv4, ns, concept) \ + typedef void (ns::concept ::* \ + func##tv1##tv2##tv3##tv4##concept)(); \ + template \ + struct concept_checking_##tv1##tv2##tv3##tv4##concept { }; \ + typedef concept_checking_##tv1##tv2##tv3##tv4##concept< \ + BOOST_FPTR ns::concept::constraints> \ + concept_checking_typedef_##tv1##tv2##tv3##tv4##concept +#endif + +} // namespace lemon + +#endif // LEMON_BOOST_CONCEPT_CHECKS_HPP diff --git a/src/lemon/concepts/bpugraph.h b/src/lemon/concepts/bpugraph.h new file mode 100644 index 0000000..8b1867d --- /dev/null +++ b/src/lemon/concepts/bpugraph.h @@ -0,0 +1,1004 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +/// \ingroup graph_concepts +/// \file +/// \brief The concept of Bipartite Undirected Graphs. + +#ifndef LEMON_CONCEPT_BPUGRAPH_H +#define LEMON_CONCEPT_BPUGRAPH_H + +#include + +#include +#include + +#include + +namespace lemon { + namespace concepts { + + /// \ingroup graph_concepts + /// + /// \brief Class describing the concept of Bipartite Undirected Graphs. + /// + /// This class describes the common interface of all + /// Undirected Bipartite Graphs. + /// + /// As all concept describing classes it provides only interface + /// without any sensible implementation. So any algorithm for + /// bipartite undirected graph should compile with this class, but it + /// will not run properly, of course. + /// + /// In LEMON bipartite undirected graphs also fulfill the concept of + /// the undirected graphs (\ref lemon::concepts::UGraph "UGraph Concept"). + /// + /// You can assume that all undirected bipartite graph can be handled + /// as an undirected graph and consequently as a static graph. + /// + /// The bipartite graph stores two types of nodes which are named + /// ANode and BNode. The graph type contains two types ANode and + /// BNode which are inherited from Node type. Moreover they have + /// constructor which converts Node to either ANode or BNode when + /// it is possible. Therefor everywhere the Node type can be used + /// instead of ANode and BNode. So the usage of the ANode and + /// BNode is not suggested. + /// + /// The iteration on the partition can be done with the ANodeIt and + /// BNodeIt classes. The node map can be used to map values to the nodes + /// and similarly we can use to map values for just the ANodes and + /// BNodes the ANodeMap and BNodeMap template classes. + + class BpUGraph { + public: + /// \brief The undirected graph should be tagged by the + /// UndirectedTag. + /// + /// The undirected graph should be tagged by the UndirectedTag. This + /// tag helps the enable_if technics to make compile time + /// specializations for undirected graphs. + typedef True UndirectedTag; + + /// \brief The base type of node iterators, + /// or in other words, the trivial node iterator. + /// + /// This is the base type of each node iterator, + /// thus each kind of node iterator converts to this. + /// More precisely each kind of node iterator should be inherited + /// from the trivial node iterator. The Node class represents + /// both of two types of nodes. + class Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Node() { } + /// Copy constructor. + + /// Copy constructor. + /// + Node(const Node&) { } + + /// Invalid constructor \& conversion. + + /// This constructor initializes the iterator to be invalid. + /// \sa Invalid for more details. + Node(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Node) const { return true; } + + /// Inequality operator + + /// \sa operator==(Node n) + /// + bool operator!=(Node) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Node) const { return false; } + + }; + + /// \brief Helper class for ANodes. + /// + /// This class is just a helper class for ANodes, it is not + /// suggested to use it directly. It can be converted easily to + /// node and vice versa. The usage of this class is limited + /// to use just as template parameters for special map types. + class ANode : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + ANode() : Node() { } + /// Copy constructor. + + /// Copy constructor. + /// + ANode(const ANode&) : Node() { } + + /// Construct the same node as ANode. + + /// Construct the same node as ANode. It may throws assertion + /// when the given node is from the BNode set. + ANode(const Node&) : Node() { } + + /// Assign node to A-node. + + /// Besides the core graph item functionality each node should + /// be convertible to the represented A-node if it is it possible. + ANode& operator=(const Node&) { return *this; } + + /// Invalid constructor \& conversion. + + /// This constructor initializes the iterator to be invalid. + /// \sa Invalid for more details. + ANode(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(ANode) const { return true; } + + /// Inequality operator + + /// \sa operator==(ANode n) + /// + bool operator!=(ANode) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(ANode) const { return false; } + + }; + + /// \brief Helper class for BNodes. + /// + /// This class is just a helper class for BNodes, it is not + /// suggested to use it directly. It can be converted easily to + /// node and vice versa. The usage of this class is limited + /// to use just as template parameters for special map types. + class BNode : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + BNode() : Node() { } + /// Copy constructor. + + /// Copy constructor. + /// + BNode(const BNode&) : Node() { } + + /// Construct the same node as BNode. + + /// Construct the same node as BNode. It may throws assertion + /// when the given node is from the ANode set. + BNode(const Node&) : Node() { } + + /// Assign node to B-node. + + /// Besides the core graph item functionality each node should + /// be convertible to the represented B-node if it is it possible. + BNode& operator=(const Node&) { return *this; } + + /// Invalid constructor \& conversion. + + /// This constructor initializes the iterator to be invalid. + /// \sa Invalid for more details. + BNode(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(BNode) const { return true; } + + /// Inequality operator + + /// \sa operator==(BNode n) + /// + bool operator!=(BNode) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(BNode) const { return false; } + + }; + + /// This iterator goes through each node. + + /// This iterator goes through each node. + /// Its usage is quite simple, for example you can count the number + /// of nodes in graph \c g of type \c Graph like this: + ///\code + /// int count=0; + /// for (Graph::NodeIt n(g); n!=INVALID; ++n) ++count; + ///\endcode + class NodeIt : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + NodeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + NodeIt(const NodeIt& n) : Node(n) { } + /// Invalid constructor \& conversion. + + /// Initialize the iterator to be invalid. + /// \sa Invalid for more details. + NodeIt(Invalid) { } + /// Sets the iterator to the first node. + + /// Sets the iterator to the first node of \c g. + /// + NodeIt(const BpUGraph&) { } + /// Node -> NodeIt conversion. + + /// Sets the iterator to the node of \c the graph pointed by + /// the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + NodeIt(const BpUGraph&, const Node&) { } + /// Next node. + + /// Assign the iterator to the next node. + /// + NodeIt& operator++() { return *this; } + }; + + /// This iterator goes through each ANode. + + /// This iterator goes through each ANode. + /// Its usage is quite simple, for example you can count the number + /// of nodes in graph \c g of type \c Graph like this: + ///\code + /// int count=0; + /// for (Graph::ANodeIt n(g); n!=INVALID; ++n) ++count; + ///\endcode + class ANodeIt : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + ANodeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + ANodeIt(const ANodeIt& n) : Node(n) { } + /// Invalid constructor \& conversion. + + /// Initialize the iterator to be invalid. + /// \sa Invalid for more details. + ANodeIt(Invalid) { } + /// Sets the iterator to the first node. + + /// Sets the iterator to the first node of \c g. + /// + ANodeIt(const BpUGraph&) { } + /// Node -> ANodeIt conversion. + + /// Sets the iterator to the node of \c the graph pointed by + /// the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + ANodeIt(const BpUGraph&, const Node&) { } + /// Next node. + + /// Assign the iterator to the next node. + /// + ANodeIt& operator++() { return *this; } + }; + + /// This iterator goes through each BNode. + + /// This iterator goes through each BNode. + /// Its usage is quite simple, for example you can count the number + /// of nodes in graph \c g of type \c Graph like this: + ///\code + /// int count=0; + /// for (Graph::BNodeIt n(g); n!=INVALID; ++n) ++count; + ///\endcode + class BNodeIt : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + BNodeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + BNodeIt(const BNodeIt& n) : Node(n) { } + /// Invalid constructor \& conversion. + + /// Initialize the iterator to be invalid. + /// \sa Invalid for more details. + BNodeIt(Invalid) { } + /// Sets the iterator to the first node. + + /// Sets the iterator to the first node of \c g. + /// + BNodeIt(const BpUGraph&) { } + /// Node -> BNodeIt conversion. + + /// Sets the iterator to the node of \c the graph pointed by + /// the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + BNodeIt(const BpUGraph&, const Node&) { } + /// Next node. + + /// Assign the iterator to the next node. + /// + BNodeIt& operator++() { return *this; } + }; + + + /// The base type of the undirected edge iterators. + + /// The base type of the undirected edge iterators. + /// + class UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + UEdge() { } + /// Copy constructor. + + /// Copy constructor. + /// + UEdge(const UEdge&) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + UEdge(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(UEdge) const { return true; } + /// Inequality operator + + /// \sa operator==(UEdge n) + /// + bool operator!=(UEdge) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(UEdge) const { return false; } + }; + + /// This iterator goes through each undirected edge. + + /// This iterator goes through each undirected edge of a graph. + /// Its usage is quite simple, for example you can count the number + /// of undirected edges in a graph \c g of type \c Graph as follows: + ///\code + /// int count=0; + /// for(Graph::UEdgeIt e(g); e!=INVALID; ++e) ++count; + ///\endcode + class UEdgeIt : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + UEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + UEdgeIt(const UEdgeIt& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + UEdgeIt(Invalid) { } + /// This constructor sets the iterator to the first undirected edge. + + /// This constructor sets the iterator to the first undirected edge. + UEdgeIt(const BpUGraph&) { } + /// UEdge -> UEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator. + /// This feature necessitates that each time we + /// iterate the undirected edge-set, the iteration order is the + /// same. + UEdgeIt(const BpUGraph&, const UEdge&) { } + /// Next undirected edge + + /// Assign the iterator to the next undirected edge. + UEdgeIt& operator++() { return *this; } + }; + + /// \brief This iterator goes trough the incident undirected + /// edges of a node. + /// + /// This iterator goes trough the incident undirected edges + /// of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can compute the + /// degree (i.e. count the number + /// of incident edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for(Graph::IncEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + class IncEdgeIt : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + IncEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + IncEdgeIt(const IncEdgeIt& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + IncEdgeIt(Invalid) { } + /// This constructor sets the iterator to first incident edge. + + /// This constructor set the iterator to the first incident edge of + /// the node. + IncEdgeIt(const BpUGraph&, const Node&) { } + /// UEdge -> IncEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + IncEdgeIt(const BpUGraph&, const UEdge&) { } + /// Next incident edge + + /// Assign the iterator to the next incident edge + /// of the corresponding node. + IncEdgeIt& operator++() { return *this; } + }; + + /// The directed edge type. + + /// The directed edge type. It can be converted to the + /// undirected edge. + class Edge : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Edge() { } + /// Copy constructor. + + /// Copy constructor. + /// + Edge(const Edge& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + Edge(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Edge) const { return true; } + /// Inequality operator + + /// \sa operator==(Edge n) + /// + bool operator!=(Edge) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Edge) const { return false; } + + }; + /// This iterator goes through each directed edge. + + /// This iterator goes through each edge of a graph. + /// Its usage is quite simple, for example you can count the number + /// of edges in a graph \c g of type \c Graph as follows: + ///\code + /// int count=0; + /// for(Graph::EdgeIt e(g); e!=INVALID; ++e) ++count; + ///\endcode + class EdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + EdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + EdgeIt(const EdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + EdgeIt(Invalid) { } + /// This constructor sets the iterator to the first edge. + + /// This constructor sets the iterator to the first edge of \c g. + ///@param g the graph + EdgeIt(const BpUGraph &g) { ignore_unused_variable_warning(g); } + /// Edge -> EdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + EdgeIt(const BpUGraph&, const Edge&) { } + ///Next edge + + /// Assign the iterator to the next edge. + EdgeIt& operator++() { return *this; } + }; + + /// This iterator goes trough the outgoing directed edges of a node. + + /// This iterator goes trough the \e outgoing edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for (Graph::OutEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class OutEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + OutEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + OutEdgeIt(const OutEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + OutEdgeIt(Invalid) { } + /// This constructor sets the iterator to the first outgoing edge. + + /// This constructor sets the iterator to the first outgoing edge of + /// the node. + ///@param n the node + ///@param g the graph + OutEdgeIt(const BpUGraph& n, const Node& g) { + ignore_unused_variable_warning(n); + ignore_unused_variable_warning(g); + } + /// Edge -> OutEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + OutEdgeIt(const BpUGraph&, const Edge&) { } + ///Next outgoing edge + + /// Assign the iterator to the next + /// outgoing edge of the corresponding node. + OutEdgeIt& operator++() { return *this; } + }; + + /// This iterator goes trough the incoming directed edges of a node. + + /// This iterator goes trough the \e incoming edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for(Graph::InEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class InEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + InEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + InEdgeIt(const InEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + InEdgeIt(Invalid) { } + /// This constructor sets the iterator to first incoming edge. + + /// This constructor set the iterator to the first incoming edge of + /// the node. + ///@param n the node + ///@param g the graph + InEdgeIt(const BpUGraph& g, const Node& n) { + ignore_unused_variable_warning(n); + ignore_unused_variable_warning(g); + } + /// Edge -> InEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + InEdgeIt(const BpUGraph&, const Edge&) { } + /// Next incoming edge + + /// Assign the iterator to the next inedge of the corresponding node. + /// + InEdgeIt& operator++() { return *this; } + }; + + /// \brief Read write map of the nodes to type \c T. + /// + /// ReadWrite map of the nodes to type \c T. + /// \sa Reference + /// \todo Wrong documentation + template + class NodeMap : public ReadWriteMap< Node, T > + { + public: + + ///\e + NodeMap(const BpUGraph&) { } + ///\e + NodeMap(const BpUGraph&, T) { } + + ///Copy constructor + NodeMap(const NodeMap& nm) : ReadWriteMap< Node, T >(nm) { } + ///Assignment operator + NodeMap& operator=(const NodeMap&) { return *this; } + ///Assignment operator + template + NodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Read write map of the ANodes to type \c T. + /// + /// ReadWrite map of the ANodes to type \c T. + /// \sa Reference + /// \todo Wrong documentation + template + class ANodeMap : public ReadWriteMap< Node, T > + { + public: + + ///\e + ANodeMap(const BpUGraph&) { } + ///\e + ANodeMap(const BpUGraph&, T) { } + + ///Copy constructor + ANodeMap(const ANodeMap& nm) : ReadWriteMap< Node, T >(nm) { } + ///Assignment operator + ANodeMap& operator=(const ANodeMap&) { return *this; } + ///Assignment operator + template + ANodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Read write map of the BNodes to type \c T. + /// + /// ReadWrite map of the BNodes to type \c T. + /// \sa Reference + /// \todo Wrong documentation + template + class BNodeMap : public ReadWriteMap< Node, T > + { + public: + + ///\e + BNodeMap(const BpUGraph&) { } + ///\e + BNodeMap(const BpUGraph&, T) { } + + ///Copy constructor + BNodeMap(const BNodeMap& nm) : ReadWriteMap< Node, T >(nm) { } + ///Assignment operator + BNodeMap& operator=(const BNodeMap&) { return *this; } + ///Assignment operator + template + BNodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Read write map of the directed edges to type \c T. + /// + /// Reference map of the directed edges to type \c T. + /// \sa Reference + /// \todo Wrong documentation + template + class EdgeMap : public ReadWriteMap + { + public: + + ///\e + EdgeMap(const BpUGraph&) { } + ///\e + EdgeMap(const BpUGraph&, T) { } + ///Copy constructor + EdgeMap(const EdgeMap& em) : ReadWriteMap(em) { } + ///Assignment operator + EdgeMap& operator=(const EdgeMap&) { return *this; } + ///Assignment operator + template + EdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// Read write map of the undirected edges to type \c T. + + /// Reference map of the edges to type \c T. + /// \sa Reference + /// \todo Wrong documentation + template + class UEdgeMap : public ReadWriteMap + { + public: + + ///\e + UEdgeMap(const BpUGraph&) { } + ///\e + UEdgeMap(const BpUGraph&, T) { } + ///Copy constructor + UEdgeMap(const UEdgeMap& em) : ReadWriteMap(em) {} + ///Assignment operator + UEdgeMap &operator=(const UEdgeMap&) { return *this; } + ///Assignment operator + template + UEdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Direct the given undirected edge. + /// + /// Direct the given undirected edge. The returned edge source + /// will be the given node. + Edge direct(const UEdge&, const Node&) const { + return INVALID; + } + + /// \brief Direct the given undirected edge. + /// + /// Direct the given undirected edge. The returned edge + /// represents the given undirected edge and the direction comes + /// from the given bool. The source of the undirected edge and + /// the directed edge is the same when the given bool is true. + Edge direct(const UEdge&, bool) const { + return INVALID; + } + + /// \brief Returns true when the given node is an ANode. + /// + /// Returns true when the given node is an ANode. + bool aNode(Node) const { return true;} + + /// \brief Returns true when the given node is an BNode. + /// + /// Returns true when the given node is an BNode. + bool bNode(Node) const { return true;} + + /// \brief Returns the edge's end node which is in the ANode set. + /// + /// Returns the edge's end node which is in the ANode set. + Node aNode(UEdge) const { return INVALID;} + + /// \brief Returns the edge's end node which is in the BNode set. + /// + /// Returns the edge's end node which is in the BNode set. + Node bNode(UEdge) const { return INVALID;} + + /// \brief Returns true if the edge has default orientation. + /// + /// Returns whether the given directed edge is same orientation as + /// the corresponding undirected edge's default orientation. + bool direction(Edge) const { return true; } + + /// \brief Returns the opposite directed edge. + /// + /// Returns the opposite directed edge. + Edge oppositeEdge(Edge) const { return INVALID; } + + /// \brief Opposite node on an edge + /// + /// \return the opposite of the given Node on the given UEdge + Node oppositeNode(Node, UEdge) const { return INVALID; } + + /// \brief First node of the undirected edge. + /// + /// \return the first node of the given UEdge. + /// + /// Naturally undirected edges don't have direction and thus + /// don't have source and target node. But we use these two methods + /// to query the two endnodes of the edge. The direction of the edge + /// which arises this way is called the inherent direction of the + /// undirected edge, and is used to define the "default" direction + /// of the directed versions of the edges. + /// \sa direction + Node source(UEdge) const { return INVALID; } + + /// \brief Second node of the undirected edge. + Node target(UEdge) const { return INVALID; } + + /// \brief Source node of the directed edge. + Node source(Edge) const { return INVALID; } + + /// \brief Target node of the directed edge. + Node target(Edge) const { return INVALID; } + + /// \brief Base node of the iterator + /// + /// Returns the base node (the source in this case) of the iterator + Node baseNode(OutEdgeIt e) const { + return source(e); + } + + /// \brief Running node of the iterator + /// + /// Returns the running node (the target in this case) of the + /// iterator + Node runningNode(OutEdgeIt e) const { + return target(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (the target in this case) of the iterator + Node baseNode(InEdgeIt e) const { + return target(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (the source in this case) of the + /// iterator + Node runningNode(InEdgeIt e) const { + return source(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(IncEdgeIt) const { + return INVALID; + } + + /// \brief Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(IncEdgeIt) const { + return INVALID; + } + + void first(Node&) const {} + void next(Node&) const {} + + void first(Edge&) const {} + void next(Edge&) const {} + + void first(UEdge&) const {} + void next(UEdge&) const {} + + void firstANode(Node&) const {} + void nextANode(Node&) const {} + + void firstBNode(Node&) const {} + void nextBNode(Node&) const {} + + void firstIn(Edge&, const Node&) const {} + void nextIn(Edge&) const {} + + void firstOut(Edge&, const Node&) const {} + void nextOut(Edge&) const {} + + void firstInc(UEdge &, bool &, const Node &) const {} + void nextInc(UEdge &, bool &) const {} + + void firstFromANode(UEdge&, const Node&) const {} + void nextFromANode(UEdge&) const {} + + void firstFromBNode(UEdge&, const Node&) const {} + void nextFromBNode(UEdge&) const {} + + template + struct Constraints { + void constraints() { + checkConcept, Graph>(); + checkConcept, Graph>(); + } + }; + + }; + + } + +} + +#endif diff --git a/src/lemon/concepts/graph.h b/src/lemon/concepts/graph.h new file mode 100644 index 0000000..8189803 --- /dev/null +++ b/src/lemon/concepts/graph.h @@ -0,0 +1,453 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_CONCEPT_GRAPH_H +#define LEMON_CONCEPT_GRAPH_H + +///\ingroup graph_concepts +///\file +///\brief The concept of Directed Graphs. + +#include +#include +#include +#include +#include + +namespace lemon { + namespace concepts { + + /// \ingroup graph_concepts + /// + /// \brief Class describing the concept of Directed Graphs. + /// + /// This class describes the \ref concept "concept" of the + /// immutable directed graphs. + /// + /// Note that actual graph implementation like @ref ListGraph or + /// @ref SmartGraph may have several additional functionality. + /// + /// \sa concept + class Graph { + private: + ///Graphs are \e not copy constructible. Use GraphCopy() instead. + + ///Graphs are \e not copy constructible. Use GraphCopy() instead. + /// + Graph(const Graph &) {}; + ///\brief Assignment of \ref Graph "Graph"s to another ones are + ///\e not allowed. Use GraphCopy() instead. + + ///Assignment of \ref Graph "Graph"s to another ones are + ///\e not allowed. Use GraphCopy() instead. + + void operator=(const Graph &) {} + public: + ///\e + + /// Defalult constructor. + + /// Defalult constructor. + /// + Graph() { } + /// Class for identifying a node of the graph + + /// This class identifies a node of the graph. It also serves + /// as a base class of the node iterators, + /// thus they will convert to this type. + class Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Node() { } + /// Copy constructor. + + /// Copy constructor. + /// + Node(const Node&) { } + + /// Invalid constructor \& conversion. + + /// This constructor initializes the iterator to be invalid. + /// \sa Invalid for more details. + Node(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Node) const { return true; } + + /// Inequality operator + + /// \sa operator==(Node n) + /// + bool operator!=(Node) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Node) const { return false; } + + }; + + /// This iterator goes through each node. + + /// This iterator goes through each node. + /// Its usage is quite simple, for example you can count the number + /// of nodes in graph \c g of type \c Graph like this: + ///\code + /// int count=0; + /// for (Graph::NodeIt n(g); n!=INVALID; ++n) ++count; + ///\endcode + class NodeIt : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + NodeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + NodeIt(const NodeIt& n) : Node(n) { } + /// Invalid constructor \& conversion. + + /// Initialize the iterator to be invalid. + /// \sa Invalid for more details. + NodeIt(Invalid) { } + /// Sets the iterator to the first node. + + /// Sets the iterator to the first node of \c g. + /// + NodeIt(const Graph&) { } + /// Node -> NodeIt conversion. + + /// Sets the iterator to the node of \c the graph pointed by + /// the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + NodeIt(const Graph&, const Node&) { } + /// Next node. + + /// Assign the iterator to the next node. + /// + NodeIt& operator++() { return *this; } + }; + + + /// Class for identifying an edge of the graph + + /// This class identifies an edge of the graph. It also serves + /// as a base class of the edge iterators, + /// thus they will convert to this type. + class Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Edge() { } + /// Copy constructor. + + /// Copy constructor. + /// + Edge(const Edge&) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + Edge(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Edge) const { return true; } + /// Inequality operator + + /// \sa operator==(Edge n) + /// + bool operator!=(Edge) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Edge) const { return false; } + }; + + /// This iterator goes trough the outgoing edges of a node. + + /// This iterator goes trough the \e outgoing edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for (Graph::OutEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class OutEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + OutEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + OutEdgeIt(const OutEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + OutEdgeIt(Invalid) { } + /// This constructor sets the iterator to the first outgoing edge. + + /// This constructor sets the iterator to the first outgoing edge of + /// the node. + OutEdgeIt(const Graph&, const Node&) { } + /// Edge -> OutEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + OutEdgeIt(const Graph&, const Edge&) { } + ///Next outgoing edge + + /// Assign the iterator to the next + /// outgoing edge of the corresponding node. + OutEdgeIt& operator++() { return *this; } + }; + + /// This iterator goes trough the incoming edges of a node. + + /// This iterator goes trough the \e incoming edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for(Graph::InEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class InEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + InEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + InEdgeIt(const InEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + InEdgeIt(Invalid) { } + /// This constructor sets the iterator to first incoming edge. + + /// This constructor set the iterator to the first incoming edge of + /// the node. + InEdgeIt(const Graph&, const Node&) { } + /// Edge -> InEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + InEdgeIt(const Graph&, const Edge&) { } + /// Next incoming edge + + /// Assign the iterator to the next inedge of the corresponding node. + /// + InEdgeIt& operator++() { return *this; } + }; + /// This iterator goes through each edge. + + /// This iterator goes through each edge of a graph. + /// Its usage is quite simple, for example you can count the number + /// of edges in a graph \c g of type \c Graph as follows: + ///\code + /// int count=0; + /// for(Graph::EdgeIt e(g); e!=INVALID; ++e) ++count; + ///\endcode + class EdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + EdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + EdgeIt(const EdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + EdgeIt(Invalid) { } + /// This constructor sets the iterator to the first edge. + + /// This constructor sets the iterator to the first edge of \c g. + ///@param g the graph + EdgeIt(const Graph& g) { ignore_unused_variable_warning(g); } + /// Edge -> EdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + EdgeIt(const Graph&, const Edge&) { } + ///Next edge + + /// Assign the iterator to the next edge. + EdgeIt& operator++() { return *this; } + }; + ///Gives back the target node of an edge. + + ///Gives back the target node of an edge. + /// + Node target(Edge) const { return INVALID; } + ///Gives back the source node of an edge. + + ///Gives back the source node of an edge. + /// + Node source(Edge) const { return INVALID; } + + void first(Node&) const {} + void next(Node&) const {} + + void first(Edge&) const {} + void next(Edge&) const {} + + + void firstIn(Edge&, const Node&) const {} + void nextIn(Edge&) const {} + + void firstOut(Edge&, const Node&) const {} + void nextOut(Edge&) const {} + + /// \brief The base node of the iterator. + /// + /// Gives back the base node of the iterator. + /// It is always the target of the pointed edge. + Node baseNode(const InEdgeIt&) const { return INVALID; } + + /// \brief The running node of the iterator. + /// + /// Gives back the running node of the iterator. + /// It is always the source of the pointed edge. + Node runningNode(const InEdgeIt&) const { return INVALID; } + + /// \brief The base node of the iterator. + /// + /// Gives back the base node of the iterator. + /// It is always the source of the pointed edge. + Node baseNode(const OutEdgeIt&) const { return INVALID; } + + /// \brief The running node of the iterator. + /// + /// Gives back the running node of the iterator. + /// It is always the target of the pointed edge. + Node runningNode(const OutEdgeIt&) const { return INVALID; } + + /// \brief The opposite node on the given edge. + /// + /// Gives back the opposite node on the given edge. + Node oppositeNode(const Node&, const Edge&) const { return INVALID; } + + /// \brief Read write map of the nodes to type \c T. + /// + /// ReadWrite map of the nodes to type \c T. + /// \sa Reference + template + class NodeMap : public ReadWriteMap< Node, T > { + public: + + ///\e + NodeMap(const Graph&) { } + ///\e + NodeMap(const Graph&, T) { } + + ///Copy constructor + NodeMap(const NodeMap& nm) : ReadWriteMap< Node, T >(nm) { } + ///Assignment operator + template + NodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Read write map of the edges to type \c T. + /// + /// Reference map of the edges to type \c T. + /// \sa Reference + template + class EdgeMap : public ReadWriteMap { + public: + + ///\e + EdgeMap(const Graph&) { } + ///\e + EdgeMap(const Graph&, T) { } + ///Copy constructor + EdgeMap(const EdgeMap& em) : ReadWriteMap(em) { } + ///Assignment operator + template + EdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + template + struct Constraints { + void constraints() { + checkConcept, Graph>(); + checkConcept, Graph>(); + } + }; + + }; + + } //namespace concepts +} //namespace lemon + + + +#endif // LEMON_CONCEPT_GRAPH_H diff --git a/src/lemon/concepts/graph_components.h b/src/lemon/concepts/graph_components.h new file mode 100644 index 0000000..91aaf95 --- /dev/null +++ b/src/lemon/concepts/graph_components.h @@ -0,0 +1,2093 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +///\ingroup graph_concepts +///\file +///\brief The concept of graph components. + + +#ifndef LEMON_CONCEPT_GRAPH_COMPONENTS_H +#define LEMON_CONCEPT_GRAPH_COMPONENTS_H + +#include +#include + +#include + +namespace lemon { + namespace concepts { + + /// \brief Skeleton class for graph Node and Edge types + /// + /// This class describes the interface of Node and Edge (and UEdge + /// in undirected graphs) subtypes of graph types. + /// + /// \note This class is a template class so that we can use it to + /// create graph skeleton classes. The reason for this is than Node + /// and Edge types should \em not derive from the same base class. + /// For Node you should instantiate it with character 'n' and for Edge + /// with 'e'. + +#ifndef DOXYGEN + template +#endif + class GraphItem { + public: + /// \brief Default constructor. + /// + /// \warning The default constructor is not required to set + /// the item to some well-defined value. So you should consider it + /// as uninitialized. + GraphItem() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + GraphItem(const GraphItem &) {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + GraphItem(Invalid) {} + /// \brief Assign operator for nodes. + /// + /// The nodes are assignable. + /// + GraphItem& operator=(GraphItem const&) { return *this; } + /// \brief Equality operator. + /// + /// Two iterators are equal if and only if they represents the + /// same node in the graph or both are invalid. + bool operator==(GraphItem) const { return false; } + /// \brief Inequality operator. + /// + /// \sa operator==(const Node& n) + /// + bool operator!=(GraphItem) const { return false; } + + /// \brief Artificial ordering operator. + /// + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(GraphItem) const { return false; } + + template + struct Constraints { + void constraints() { + _GraphItem i1; + _GraphItem i2 = i1; + _GraphItem i3 = INVALID; + + i1 = i2 = i3; + + bool b; + // b = (ia == ib) && (ia != ib) && (ia < ib); + b = (ia == ib) && (ia != ib); + b = (ia == INVALID) && (ib != INVALID); + b = (ia < ib); + } + + const _GraphItem &ia; + const _GraphItem &ib; + }; + }; + + /// \brief An empty base graph class. + /// + /// This class provides the minimal set of features needed for a graph + /// structure. All graph concepts have to be conform to this base + /// graph. It just provides types for nodes and edges and functions to + /// get the source and the target of the edges. + class BaseGraphComponent { + public: + + typedef BaseGraphComponent Graph; + + /// \brief Node class of the graph. + /// + /// This class represents the Nodes of the graph. + /// + typedef GraphItem<'n'> Node; + + /// \brief Edge class of the graph. + /// + /// This class represents the Edges of the graph. + /// + typedef GraphItem<'e'> Edge; + + /// \brief Gives back the target node of an edge. + /// + /// Gives back the target node of an edge. + /// + Node target(const Edge&) const { return INVALID;} + + /// \brief Gives back the source node of an edge. + /// + /// Gives back the source node of an edge. + /// + Node source(const Edge&) const { return INVALID;} + + /// \brief Gives back the opposite node on the given edge. + /// + /// Gives back the opposite node on the given edge. + Node oppositeNode(const Node&, const Edge&) const { + return INVALID; + } + + template + struct Constraints { + typedef typename _Graph::Node Node; + typedef typename _Graph::Edge Edge; + + void constraints() { + checkConcept, Node>(); + checkConcept, Edge>(); + { + Node n; + Edge e(INVALID); + n = graph.source(e); + n = graph.target(e); + n = graph.oppositeNode(n, e); + } + } + + const _Graph& graph; + }; + }; + + /// \brief An empty base undirected graph class. + /// + /// This class provides the minimal set of features needed for an + /// undirected graph structure. All undirected graph concepts have + /// to be conform to this base graph. It just provides types for + /// nodes, edges and undirected edges and functions to get the + /// source and the target of the edges and undirected edges, + /// conversion from edges to undirected edges and function to get + /// both direction of the undirected edges. + class BaseUGraphComponent : public BaseGraphComponent { + public: + typedef BaseGraphComponent::Node Node; + typedef BaseGraphComponent::Edge Edge; + /// \brief Undirected edge class of the graph. + /// + /// This class represents the undirected edges of the graph. + /// The undirected graphs can be used as a directed graph which + /// for each edge contains the opposite edge too so the graph is + /// bidirected. The undirected edge represents two opposite + /// directed edges. + class UEdge : public GraphItem<'u'> { + public: + typedef GraphItem<'u'> Parent; + /// \brief Default constructor. + /// + /// \warning The default constructor is not required to set + /// the item to some well-defined value. So you should consider it + /// as uninitialized. + UEdge() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + UEdge(const UEdge &) : Parent() {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + UEdge(Invalid) {} + /// \brief Converter from edge to undirected edge. + /// + /// Besides the core graph item functionality each edge should + /// be convertible to the represented undirected edge. + UEdge(const Edge&) {} + /// \brief Assign edge to undirected edge. + /// + /// Besides the core graph item functionality each edge should + /// be convertible to the represented undirected edge. + UEdge& operator=(const Edge&) { return *this; } + }; + + /// \brief Returns the direction of the edge. + /// + /// Returns the direction of the edge. Each edge represents an + /// undirected edge with a direction. It gives back the + /// direction. + bool direction(const Edge&) const { return true; } + + /// \brief Returns the directed edge. + /// + /// Returns the directed edge from its direction and the + /// represented undirected edge. + Edge direct(const UEdge&, bool) const { return INVALID;} + + /// \brief Returns the directed edge. + /// + /// Returns the directed edge from its source and the + /// represented undirected edge. + Edge direct(const UEdge&, const Node&) const { return INVALID;} + + /// \brief Returns the opposite edge. + /// + /// Returns the opposite edge. It is the edge representing the + /// same undirected edge and has opposite direction. + Edge oppositeEdge(const Edge&) const { return INVALID;} + + /// \brief Gives back the target node of an undirected edge. + /// + /// Gives back the target node of an undirected edge. The name + /// target is a little confusing because the undirected edge + /// does not have target but it just means that one of the end + /// node. + Node target(const UEdge&) const { return INVALID;} + + /// \brief Gives back the source node of an undirected edge. + /// + /// Gives back the source node of an undirected edge. The name + /// source is a little confusing because the undirected edge + /// does not have source but it just means that one of the end + /// node. + Node source(const UEdge&) const { return INVALID;} + + template + struct Constraints { + typedef typename _Graph::Node Node; + typedef typename _Graph::Edge Edge; + typedef typename _Graph::UEdge UEdge; + + void constraints() { + checkConcept(); + checkConcept, UEdge>(); + { + Node n; + UEdge ue(INVALID); + Edge e; + n = graph.source(ue); + n = graph.target(ue); + e = graph.direct(ue, true); + e = graph.direct(ue, n); + e = graph.oppositeEdge(e); + ue = e; + bool d = graph.direction(e); + ignore_unused_variable_warning(d); + } + } + + const _Graph& graph; + }; + + }; + + /// \brief An empty base bipartite undirected graph class. + /// + /// This class provides the minimal set of features needed for an + /// bipartite undirected graph structure. All bipartite undirected + /// graph concepts have to be conform to this base graph. It just + /// provides types for nodes, A-nodes, B-nodes, edges and + /// undirected edges and functions to get the source and the + /// target of the edges and undirected edges, conversion from + /// edges to undirected edges and function to get both direction + /// of the undirected edges. + class BaseBpUGraphComponent : public BaseUGraphComponent { + public: + typedef BaseUGraphComponent::Node Node; + typedef BaseUGraphComponent::Edge Edge; + typedef BaseUGraphComponent::UEdge UEdge; + + /// \brief Helper class for A-nodes. + /// + /// This class is just a helper class for A-nodes, it is not + /// suggested to use it directly. It can be converted easily to + /// node and vice versa. The usage of this class is limited + /// to use just as template parameters for special map types. + class ANode : public Node { + public: + typedef Node Parent; + + /// \brief Default constructor. + /// + /// \warning The default constructor is not required to set + /// the item to some well-defined value. So you should consider it + /// as uninitialized. + ANode() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + ANode(const ANode &) : Parent() {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + ANode(Invalid) {} + /// \brief Converter from node to A-node. + /// + /// Besides the core graph item functionality each node should + /// be convertible to the represented A-node if it is it possible. + ANode(const Node&) {} + /// \brief Assign node to A-node. + /// + /// Besides the core graph item functionality each node should + /// be convertible to the represented A-node if it is it possible. + ANode& operator=(const Node&) { return *this; } + }; + + /// \brief Helper class for B-nodes. + /// + /// This class is just a helper class for B-nodes, it is not + /// suggested to use it directly. It can be converted easily to + /// node and vice versa. The usage of this class is limited + /// to use just as template parameters for special map types. + class BNode : public Node { + public: + typedef Node Parent; + + /// \brief Default constructor. + /// + /// \warning The default constructor is not required to set + /// the item to some well-defined value. So you should consider it + /// as uninitialized. + BNode() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + BNode(const BNode &) : Parent() {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + BNode(Invalid) {} + /// \brief Converter from node to B-node. + /// + /// Besides the core graph item functionality each node should + /// be convertible to the represented B-node if it is it possible. + BNode(const Node&) {} + /// \brief Assign node to B-node. + /// + /// Besides the core graph item functionality each node should + /// be convertible to the represented B-node if it is it possible. + BNode& operator=(const Node&) { return *this; } + }; + + /// \brief Gives back %true when the node is A-node. + /// + /// Gives back %true when the node is A-node. + bool aNode(const Node&) const { return false; } + + /// \brief Gives back %true when the node is B-node. + /// + /// Gives back %true when the node is B-node. + bool bNode(const Node&) const { return false; } + + /// \brief Gives back the A-node of the undirected edge. + /// + /// Gives back the A-node of the undirected edge. + Node aNode(const UEdge&) const { return INVALID; } + + /// \brief Gives back the B-node of the undirected edge. + /// + /// Gives back the B-node of the undirected edge. + Node bNode(const UEdge&) const { return INVALID; } + + template + struct Constraints { + typedef typename _Graph::Node Node; + typedef typename _Graph::ANode ANode; + typedef typename _Graph::BNode BNode; + typedef typename _Graph::Edge Edge; + typedef typename _Graph::UEdge UEdge; + + void constraints() { + checkConcept(); + checkConcept, ANode>(); + checkConcept, BNode>(); + { + Node n; + UEdge ue(INVALID); + bool b; + n = graph.aNode(ue); + n = graph.bNode(ue); + b = graph.aNode(n); + b = graph.bNode(n); + ANode an; + an = n; n = an; + BNode bn; + bn = n; n = bn; + ignore_unused_variable_warning(b); + } + } + + const _Graph& graph; + }; + + }; + + /// \brief An empty idable base graph class. + /// + /// This class provides beside the core graph features + /// core id functions for the graph structure. + /// The most of the base graphs should be conform to this concept. + /// The id's are unique and immutable. + template + class IDableGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + + /// \brief Gives back an unique integer id for the Node. + /// + /// Gives back an unique integer id for the Node. + /// + int id(const Node&) const { return -1;} + + /// \brief Gives back the node by the unique id. + /// + /// Gives back the node by the unique id. + /// If the graph does not contain node with the given id + /// then the result of the function is undetermined. + Node nodeFromId(int) const { return INVALID;} + + /// \brief Gives back an unique integer id for the Edge. + /// + /// Gives back an unique integer id for the Edge. + /// + int id(const Edge&) const { return -1;} + + /// \brief Gives back the edge by the unique id. + /// + /// Gives back the edge by the unique id. + /// If the graph does not contain edge with the given id + /// then the result of the function is undetermined. + Edge edgeFromId(int) const { return INVALID;} + + /// \brief Gives back an integer greater or equal to the maximum + /// Node id. + /// + /// Gives back an integer greater or equal to the maximum Node + /// id. + int maxNodeId() const { return -1;} + + /// \brief Gives back an integer greater or equal to the maximum + /// Edge id. + /// + /// Gives back an integer greater or equal to the maximum Edge + /// id. + int maxEdgeId() const { return -1;} + + template + struct Constraints { + + void constraints() { + checkConcept(); + typename _Graph::Node node; + int nid = graph.id(node); + nid = graph.id(node); + node = graph.nodeFromId(nid); + typename _Graph::Edge edge; + int eid = graph.id(edge); + eid = graph.id(edge); + edge = graph.edgeFromId(eid); + + nid = graph.maxNodeId(); + ignore_unused_variable_warning(nid); + eid = graph.maxEdgeId(); + ignore_unused_variable_warning(eid); + } + + const _Graph& graph; + }; + }; + + /// \brief An empty idable base undirected graph class. + /// + /// This class provides beside the core undirected graph features + /// core id functions for the undirected graph structure. The + /// most of the base undirected graphs should be conform to this + /// concept. The id's are unique and immutable. + template + class IDableUGraphComponent : public IDableGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::UEdge UEdge; + + using IDableGraphComponent<_Base>::id; + + /// \brief Gives back an unique integer id for the UEdge. + /// + /// Gives back an unique integer id for the UEdge. + /// + int id(const UEdge&) const { return -1;} + + /// \brief Gives back the undirected edge by the unique id. + /// + /// Gives back the undirected edge by the unique id. If the + /// graph does not contain edge with the given id then the + /// result of the function is undetermined. + UEdge uEdgeFromId(int) const { return INVALID;} + + /// \brief Gives back an integer greater or equal to the maximum + /// UEdge id. + /// + /// Gives back an integer greater or equal to the maximum UEdge + /// id. + int maxUEdgeId() const { return -1;} + + template + struct Constraints { + + void constraints() { + checkConcept(); + checkConcept, _Graph >(); + typename _Graph::UEdge uedge; + int ueid = graph.id(uedge); + ueid = graph.id(uedge); + uedge = graph.uEdgeFromId(ueid); + ueid = graph.maxUEdgeId(); + ignore_unused_variable_warning(ueid); + } + + const _Graph& graph; + }; + }; + + /// \brief An empty idable base bipartite undirected graph class. + /// + /// This class provides beside the core bipartite undirected graph + /// features core id functions for the bipartite undirected graph + /// structure. The most of the base undirected graphs should be + /// conform to this concept. + template + class IDableBpUGraphComponent : public IDableUGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + + using IDableUGraphComponent<_Base>::id; + + /// \brief Gives back an unique integer id for the ANode. + /// + /// Gives back an unique integer id for the ANode. + /// + int aNodeId(const Node&) const { return -1;} + + /// \brief Gives back the undirected edge by the unique id. + /// + /// Gives back the undirected edge by the unique id. If the + /// graph does not contain edge with the given id then the + /// result of the function is undetermined. + Node nodeFromANodeId(int) const { return INVALID;} + + /// \brief Gives back an integer greater or equal to the maximum + /// ANode id. + /// + /// Gives back an integer greater or equal to the maximum ANode + /// id. + int maxANodeId() const { return -1;} + + /// \brief Gives back an unique integer id for the BNode. + /// + /// Gives back an unique integer id for the BNode. + /// + int bNodeId(const Node&) const { return -1;} + + /// \brief Gives back the undirected edge by the unique id. + /// + /// Gives back the undirected edge by the unique id. If the + /// graph does not contain edge with the given id then the + /// result of the function is undetermined. + Node nodeFromBNodeId(int) const { return INVALID;} + + /// \brief Gives back an integer greater or equal to the maximum + /// BNode id. + /// + /// Gives back an integer greater or equal to the maximum BNode + /// id. + int maxBNodeId() const { return -1;} + + template + struct Constraints { + + void constraints() { + checkConcept(); + checkConcept, _Graph >(); + typename _Graph::Node node(INVALID); + int id; + id = graph.aNodeId(node); + id = graph.bNodeId(node); + node = graph.nodeFromANodeId(id); + node = graph.nodeFromBNodeId(id); + id = graph.maxANodeId(); + id = graph.maxBNodeId(); + } + + const _Graph& graph; + }; + }; + + /// \brief Skeleton class for graph NodeIt and EdgeIt + /// + /// Skeleton class for graph NodeIt and EdgeIt. + /// + template + class GraphItemIt : public _Item { + public: + /// \brief Default constructor. + /// + /// @warning The default constructor sets the iterator + /// to an undefined value. + GraphItemIt() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + GraphItemIt(const GraphItemIt& ) {} + /// \brief Sets the iterator to the first item. + /// + /// Sets the iterator to the first item of \c the graph. + /// + explicit GraphItemIt(const _Graph&) {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + GraphItemIt(Invalid) {} + /// \brief Assign operator for items. + /// + /// The items are assignable. + /// + GraphItemIt& operator=(const GraphItemIt&) { return *this; } + /// \brief Next item. + /// + /// Assign the iterator to the next item. + /// + GraphItemIt& operator++() { return *this; } + /// \brief Equality operator + /// + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(const GraphItemIt&) const { return true;} + /// \brief Inequality operator + /// + /// \sa operator==(Node n) + /// + bool operator!=(const GraphItemIt&) const { return true;} + + template + struct Constraints { + void constraints() { + _GraphItemIt it1(g); + _GraphItemIt it2; + + it2 = ++it1; + ++it2 = it1; + ++(++it1); + + _Item bi = it1; + bi = it2; + } + _Graph& g; + }; + }; + + /// \brief Skeleton class for graph InEdgeIt and OutEdgeIt + /// + /// \note Because InEdgeIt and OutEdgeIt may not inherit from the same + /// base class, the _selector is a additional template parameter. For + /// InEdgeIt you should instantiate it with character 'i' and for + /// OutEdgeIt with 'o'. + template + class GraphIncIt : public _Item { + public: + /// \brief Default constructor. + /// + /// @warning The default constructor sets the iterator + /// to an undefined value. + GraphIncIt() {} + /// \brief Copy constructor. + /// + /// Copy constructor. + /// + GraphIncIt(GraphIncIt const& gi) : _Item(gi) {} + /// \brief Sets the iterator to the first edge incoming into or outgoing + /// from the node. + /// + /// Sets the iterator to the first edge incoming into or outgoing + /// from the node. + /// + explicit GraphIncIt(const _Graph&, const _Base&) {} + /// \brief Invalid constructor \& conversion. + /// + /// This constructor initializes the item to be invalid. + /// \sa Invalid for more details. + GraphIncIt(Invalid) {} + /// \brief Assign operator for iterators. + /// + /// The iterators are assignable. + /// + GraphIncIt& operator=(GraphIncIt const&) { return *this; } + /// \brief Next item. + /// + /// Assign the iterator to the next item. + /// + GraphIncIt& operator++() { return *this; } + + /// \brief Equality operator + /// + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(const GraphIncIt&) const { return true;} + + /// \brief Inequality operator + /// + /// \sa operator==(Node n) + /// + bool operator!=(const GraphIncIt&) const { return true;} + + template + struct Constraints { + void constraints() { + checkConcept, _GraphIncIt>(); + _GraphIncIt it1(graph, node); + _GraphIncIt it2; + + it2 = ++it1; + ++it2 = it1; + ++(++it1); + _Item e = it1; + e = it2; + + } + + _Item edge; + _Base node; + _Graph graph; + _GraphIncIt it; + }; + }; + + + /// \brief An empty iterable graph class. + /// + /// This class provides beside the core graph features + /// iterator based iterable interface for the graph structure. + /// This concept is part of the Graph concept. + template + class IterableGraphComponent : public _Base { + + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + + typedef IterableGraphComponent Graph; + + /// \name Base iteration + /// + /// This interface provides functions for iteration on graph items + /// + /// @{ + + /// \brief Gives back the first node in the iterating order. + /// + /// Gives back the first node in the iterating order. + /// + void first(Node&) const {} + + /// \brief Gives back the next node in the iterating order. + /// + /// Gives back the next node in the iterating order. + /// + void next(Node&) const {} + + /// \brief Gives back the first edge in the iterating order. + /// + /// Gives back the first edge in the iterating order. + /// + void first(Edge&) const {} + + /// \brief Gives back the next edge in the iterating order. + /// + /// Gives back the next edge in the iterating order. + /// + void next(Edge&) const {} + + + /// \brief Gives back the first of the edges point to the given + /// node. + /// + /// Gives back the first of the edges point to the given node. + /// + void firstIn(Edge&, const Node&) const {} + + /// \brief Gives back the next of the edges points to the given + /// node. + /// + /// Gives back the next of the edges points to the given node. + /// + void nextIn(Edge&) const {} + + /// \brief Gives back the first of the edges start from the + /// given node. + /// + /// Gives back the first of the edges start from the given node. + /// + void firstOut(Edge&, const Node&) const {} + + /// \brief Gives back the next of the edges start from the given + /// node. + /// + /// Gives back the next of the edges start from the given node. + /// + void nextOut(Edge&) const {} + + /// @} + + /// \name Class based iteration + /// + /// This interface provides functions for iteration on graph items + /// + /// @{ + + /// \brief This iterator goes through each node. + /// + /// This iterator goes through each node. + /// + typedef GraphItemIt NodeIt; + + /// \brief This iterator goes through each node. + /// + /// This iterator goes through each node. + /// + typedef GraphItemIt EdgeIt; + + /// \brief This iterator goes trough the incoming edges of a node. + /// + /// This iterator goes trough the \e inccoming edges of a certain node + /// of a graph. + typedef GraphIncIt InEdgeIt; + + /// \brief This iterator goes trough the outgoing edges of a node. + /// + /// This iterator goes trough the \e outgoing edges of a certain node + /// of a graph. + typedef GraphIncIt OutEdgeIt; + + /// \brief The base node of the iterator. + /// + /// Gives back the base node of the iterator. + /// It is always the target of the pointed edge. + Node baseNode(const InEdgeIt&) const { return INVALID; } + + /// \brief The running node of the iterator. + /// + /// Gives back the running node of the iterator. + /// It is always the source of the pointed edge. + Node runningNode(const InEdgeIt&) const { return INVALID; } + + /// \brief The base node of the iterator. + /// + /// Gives back the base node of the iterator. + /// It is always the source of the pointed edge. + Node baseNode(const OutEdgeIt&) const { return INVALID; } + + /// \brief The running node of the iterator. + /// + /// Gives back the running node of the iterator. + /// It is always the target of the pointed edge. + Node runningNode(const OutEdgeIt&) const { return INVALID; } + + /// @} + + template + struct Constraints { + void constraints() { + checkConcept(); + + { + typename _Graph::Node node(INVALID); + typename _Graph::Edge edge(INVALID); + { + graph.first(node); + graph.next(node); + } + { + graph.first(edge); + graph.next(edge); + } + { + graph.firstIn(edge, node); + graph.nextIn(edge); + } + { + graph.firstOut(edge, node); + graph.nextOut(edge); + } + } + + { + checkConcept, + typename _Graph::EdgeIt >(); + checkConcept, + typename _Graph::NodeIt >(); + checkConcept, typename _Graph::InEdgeIt>(); + checkConcept, typename _Graph::OutEdgeIt>(); + + typename _Graph::Node n; + typename _Graph::InEdgeIt ieit(INVALID); + typename _Graph::OutEdgeIt oeit(INVALID); + n = graph.baseNode(ieit); + n = graph.runningNode(ieit); + n = graph.baseNode(oeit); + n = graph.runningNode(oeit); + ignore_unused_variable_warning(n); + } + } + + const _Graph& graph; + + }; + }; + + /// \brief An empty iterable undirected graph class. + /// + /// This class provides beside the core graph features iterator + /// based iterable interface for the undirected graph structure. + /// This concept is part of the UGraph concept. + template + class IterableUGraphComponent : public IterableGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + typedef typename Base::UEdge UEdge; + + + typedef IterableUGraphComponent Graph; + + /// \name Base iteration + /// + /// This interface provides functions for iteration on graph items + /// @{ + + using IterableGraphComponent<_Base>::first; + using IterableGraphComponent<_Base>::next; + + /// \brief Gives back the first undirected edge in the iterating + /// order. + /// + /// Gives back the first undirected edge in the iterating order. + /// + void first(UEdge&) const {} + + /// \brief Gives back the next undirected edge in the iterating + /// order. + /// + /// Gives back the next undirected edge in the iterating order. + /// + void next(UEdge&) const {} + + + /// \brief Gives back the first of the undirected edges from the + /// given node. + /// + /// Gives back the first of the undirected edges from the given + /// node. The bool parameter gives back that direction which + /// gives a good direction of the uedge so the source of the + /// directed edge is the given node. + void firstInc(UEdge&, bool&, const Node&) const {} + + /// \brief Gives back the next of the undirected edges from the + /// given node. + /// + /// Gives back the next of the undirected edges from the given + /// node. The bool parameter should be used as the \c firstInc() + /// use it. + void nextInc(UEdge&, bool&) const {} + + using IterableGraphComponent<_Base>::baseNode; + using IterableGraphComponent<_Base>::runningNode; + + /// @} + + /// \name Class based iteration + /// + /// This interface provides functions for iteration on graph items + /// + /// @{ + + /// \brief This iterator goes through each node. + /// + /// This iterator goes through each node. + typedef GraphItemIt UEdgeIt; + /// \brief This iterator goes trough the incident edges of a + /// node. + /// + /// This iterator goes trough the incident edges of a certain + /// node of a graph. + typedef GraphIncIt IncEdgeIt; + /// \brief The base node of the iterator. + /// + /// Gives back the base node of the iterator. + Node baseNode(const IncEdgeIt&) const { return INVALID; } + + /// \brief The running node of the iterator. + /// + /// Gives back the running node of the iterator. + Node runningNode(const IncEdgeIt&) const { return INVALID; } + + /// @} + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + + { + typename _Graph::Node node(INVALID); + typename _Graph::UEdge uedge(INVALID); + bool dir; + { + graph.first(uedge); + graph.next(uedge); + } + { + graph.firstInc(uedge, dir, node); + graph.nextInc(uedge, dir); + } + + } + + { + checkConcept, + typename _Graph::UEdgeIt >(); + checkConcept, typename _Graph::IncEdgeIt>(); + + typename _Graph::Node n; + typename _Graph::IncEdgeIt ueit(INVALID); + n = graph.baseNode(ueit); + n = graph.runningNode(ueit); + } + } + + const _Graph& graph; + + }; + }; + + /// \brief An empty iterable bipartite undirected graph class. + /// + /// This class provides beside the core graph features iterator + /// based iterable interface for the bipartite undirected graph + /// structure. This concept is part of the BpUGraph concept. + template + class IterableBpUGraphComponent : public IterableUGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::UEdge UEdge; + + typedef IterableBpUGraphComponent Graph; + + /// \name Base iteration + /// + /// This interface provides functions for iteration on graph items + /// @{ + + using IterableUGraphComponent<_Base>::first; + using IterableUGraphComponent<_Base>::next; + + /// \brief Gives back the first A-node in the iterating order. + /// + /// Gives back the first undirected A-node in the iterating + /// order. + /// + void firstANode(Node&) const {} + + /// \brief Gives back the next A-node in the iterating order. + /// + /// Gives back the next A-node in the iterating order. + /// + void nextANode(Node&) const {} + + /// \brief Gives back the first B-node in the iterating order. + /// + /// Gives back the first undirected B-node in the iterating + /// order. + /// + void firstBNode(Node&) const {} + + /// \brief Gives back the next B-node in the iterating order. + /// + /// Gives back the next B-node in the iterating order. + /// + void nextBNode(Node&) const {} + + + /// \brief Gives back the first of the undirected edges start + /// from the given A-node. + /// + /// Gives back the first of the undirected edges start from the + /// given A-node. + void firstFromANode(UEdge&, const Node&) const {} + + /// \brief Gives back the next of the undirected edges start + /// from the given A-node. + /// + /// Gives back the next of the undirected edges start from the + /// given A-node. + void nextFromANode(UEdge&) const {} + + /// \brief Gives back the first of the undirected edges start + /// from the given B-node. + /// + /// Gives back the first of the undirected edges start from the + /// given B-node. + void firstFromBNode(UEdge&, const Node&) const {} + + /// \brief Gives back the next of the undirected edges start + /// from the given B-node. + /// + /// Gives back the next of the undirected edges start from the + /// given B-node. + void nextFromBNode(UEdge&) const {} + + + /// @} + + /// \name Class based iteration + /// + /// This interface provides functions for iteration on graph items + /// + /// @{ + + /// \brief This iterator goes through each A-node. + /// + /// This iterator goes through each A-node. + typedef GraphItemIt ANodeIt; + + /// \brief This iterator goes through each B-node. + /// + /// This iterator goes through each B-node. + typedef GraphItemIt BNodeIt; + + /// @} + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + + { + typename _Graph::Node node(INVALID); + typename _Graph::UEdge uedge(INVALID); + graph.firstANode(node); + graph.nextANode(node); + graph.firstBNode(node); + graph.nextBNode(node); + + graph.firstFromANode(uedge, node); + graph.nextFromANode(uedge); + graph.firstFromBNode(uedge, node); + graph.nextFromBNode(uedge); + } + { + checkConcept, + typename _Graph::ANodeIt >(); + checkConcept, + typename _Graph::BNodeIt >(); + } + + } + + const _Graph& graph; + + }; + }; + + /// \brief An empty alteration notifier graph class. + /// + /// This class provides beside the core graph features alteration + /// notifier interface for the graph structure. This implements + /// an observer-notifier pattern for each graph item. More + /// obsevers can be registered into the notifier and whenever an + /// alteration occured in the graph all the observers will + /// notified about it. + template + class AlterableGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + + + /// The node observer registry. + typedef AlterationNotifier + NodeNotifier; + /// The edge observer registry. + typedef AlterationNotifier + EdgeNotifier; + + /// \brief Gives back the node alteration notifier. + /// + /// Gives back the node alteration notifier. + NodeNotifier& notifier(Node) const { + return NodeNotifier(); + } + + /// \brief Gives back the edge alteration notifier. + /// + /// Gives back the edge alteration notifier. + EdgeNotifier& notifier(Edge) const { + return EdgeNotifier(); + } + + template + struct Constraints { + void constraints() { + checkConcept(); + typename _Graph::NodeNotifier& nn + = graph.notifier(typename _Graph::Node()); + + typename _Graph::EdgeNotifier& en + = graph.notifier(typename _Graph::Edge()); + + ignore_unused_variable_warning(nn); + ignore_unused_variable_warning(en); + } + + const _Graph& graph; + + }; + + }; + + /// \brief An empty alteration notifier undirected graph class. + /// + /// This class provides beside the core graph features alteration + /// notifier interface for the graph structure. This implements + /// an observer-notifier pattern for each graph item. More + /// obsevers can be registered into the notifier and whenever an + /// alteration occured in the graph all the observers will + /// notified about it. + template + class AlterableUGraphComponent : public AlterableGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::UEdge UEdge; + + + /// The edge observer registry. + typedef AlterationNotifier + UEdgeNotifier; + + /// \brief Gives back the edge alteration notifier. + /// + /// Gives back the edge alteration notifier. + UEdgeNotifier& notifier(UEdge) const { + return UEdgeNotifier(); + } + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + typename _Graph::UEdgeNotifier& uen + = graph.notifier(typename _Graph::UEdge()); + ignore_unused_variable_warning(uen); + } + + const _Graph& graph; + + }; + + }; + + /// \brief An empty alteration notifier bipartite undirected graph + /// class. + /// + /// This class provides beside the core graph features alteration + /// notifier interface for the graph structure. This implements + /// an observer-notifier pattern for each graph item. More + /// obsevers can be registered into the notifier and whenever an + /// alteration occured in the graph all the observers will + /// notified about it. + template + class AlterableBpUGraphComponent : public AlterableUGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::ANode ANode; + typedef typename Base::BNode BNode; + + + /// The A-node observer registry. + typedef AlterationNotifier + ANodeNotifier; + + /// The B-node observer registry. + typedef AlterationNotifier + BNodeNotifier; + + /// \brief Gives back the A-node alteration notifier. + /// + /// Gives back the A-node alteration notifier. + ANodeNotifier& notifier(ANode) const { + return ANodeNotifier(); + } + + /// \brief Gives back the B-node alteration notifier. + /// + /// Gives back the B-node alteration notifier. + BNodeNotifier& notifier(BNode) const { + return BNodeNotifier(); + } + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + typename _Graph::ANodeNotifier& ann + = graph.notifier(typename _Graph::ANode()); + typename _Graph::BNodeNotifier& bnn + = graph.notifier(typename _Graph::BNode()); + ignore_unused_variable_warning(ann); + ignore_unused_variable_warning(bnn); + } + + const _Graph& graph; + + }; + + }; + + + /// \brief Class describing the concept of graph maps + /// + /// This class describes the common interface of the graph maps + /// (NodeMap, EdgeMap), that is \ref maps-page "maps" which can be used to + /// associate data to graph descriptors (nodes or edges). + template + class GraphMap : public ReadWriteMap<_Item, _Value> { + public: + + typedef ReadWriteMap<_Item, _Value> Parent; + + /// The graph type of the map. + typedef _Graph Graph; + /// The key type of the map. + typedef _Item Key; + /// The value type of the map. + typedef _Value Value; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit GraphMap(const Graph&) {} + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + GraphMap(const Graph&, const Value&) {} + /// \brief Copy constructor. + /// + /// Copy Constructor. + GraphMap(const GraphMap&) : Parent() {} + + /// \brief Assign operator. + /// + /// Assign operator. It does not mofify the underlying graph, + /// it just iterates on the current item set and set the map + /// with the value returned by the assigned map. + template + GraphMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + template + struct Constraints { + void constraints() { + checkConcept, _Map >(); + // Construction with a graph parameter + _Map a(g); + // Constructor with a graph and a default value parameter + _Map a2(g,t); + // Copy constructor. + _Map b(c); + + ReadMap cmap; + b = cmap; + + ignore_unused_variable_warning(a2); + ignore_unused_variable_warning(b); + } + + const _Map &c; + const Graph &g; + const typename GraphMap::Value &t; + }; + + }; + + /// \brief An empty mappable graph class. + /// + /// This class provides beside the core graph features + /// map interface for the graph structure. + /// This concept is part of the Graph concept. + template + class MappableGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + + typedef MappableGraphComponent Graph; + + /// \brief ReadWrite map of the nodes. + /// + /// ReadWrite map of the nodes. + /// + template + class NodeMap : public GraphMap { + public: + typedef GraphMap Parent; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit NodeMap(const MappableGraphComponent& graph) + : Parent(graph) {} + + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + NodeMap(const MappableGraphComponent& graph, const _Value& value) + : Parent(graph, value) {} + + /// \brief Copy constructor. + /// + /// Copy Constructor. + NodeMap(const NodeMap& nm) : Parent(nm) {} + + /// \brief Assign operator. + /// + /// Assign operator. + template + NodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + }; + + /// \brief ReadWrite map of the edges. + /// + /// ReadWrite map of the edges. + /// + template + class EdgeMap : public GraphMap { + public: + typedef GraphMap Parent; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit EdgeMap(const MappableGraphComponent& graph) + : Parent(graph) {} + + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + EdgeMap(const MappableGraphComponent& graph, const _Value& value) + : Parent(graph, value) {} + + /// \brief Copy constructor. + /// + /// Copy Constructor. + EdgeMap(const EdgeMap& nm) : Parent(nm) {} + + /// \brief Assign operator. + /// + /// Assign operator. + template + EdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + }; + + + template + struct Constraints { + + struct Dummy { + int value; + Dummy() : value(0) {} + Dummy(int _v) : value(_v) {} + }; + + void constraints() { + checkConcept(); + { // int map test + typedef typename _Graph::template NodeMap IntNodeMap; + checkConcept, + IntNodeMap >(); + } { // bool map test + typedef typename _Graph::template NodeMap BoolNodeMap; + checkConcept, + BoolNodeMap >(); + } { // Dummy map test + typedef typename _Graph::template NodeMap DummyNodeMap; + checkConcept, + DummyNodeMap >(); + } + + { // int map test + typedef typename _Graph::template EdgeMap IntEdgeMap; + checkConcept, + IntEdgeMap >(); + } { // bool map test + typedef typename _Graph::template EdgeMap BoolEdgeMap; + checkConcept, + BoolEdgeMap >(); + } { // Dummy map test + typedef typename _Graph::template EdgeMap DummyEdgeMap; + checkConcept, + DummyEdgeMap >(); + } + } + + _Graph& graph; + }; + }; + + /// \brief An empty mappable base bipartite undirected graph class. + /// + /// This class provides beside the core graph features + /// map interface for the graph structure. + /// This concept is part of the UGraph concept. + template + class MappableUGraphComponent : public MappableGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::UEdge UEdge; + + typedef MappableUGraphComponent Graph; + + /// \brief ReadWrite map of the uedges. + /// + /// ReadWrite map of the uedges. + /// + template + class UEdgeMap : public GraphMap { + public: + typedef GraphMap Parent; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit UEdgeMap(const MappableUGraphComponent& graph) + : Parent(graph) {} + + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + UEdgeMap(const MappableUGraphComponent& graph, const _Value& value) + : Parent(graph, value) {} + + /// \brief Copy constructor. + /// + /// Copy Constructor. + UEdgeMap(const UEdgeMap& nm) : Parent(nm) {} + + /// \brief Assign operator. + /// + /// Assign operator. + template + UEdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + }; + + + template + struct Constraints { + + struct Dummy { + int value; + Dummy() : value(0) {} + Dummy(int _v) : value(_v) {} + }; + + void constraints() { + checkConcept, _Graph>(); + + { // int map test + typedef typename _Graph::template UEdgeMap IntUEdgeMap; + checkConcept, + IntUEdgeMap >(); + } { // bool map test + typedef typename _Graph::template UEdgeMap BoolUEdgeMap; + checkConcept, + BoolUEdgeMap >(); + } { // Dummy map test + typedef typename _Graph::template UEdgeMap DummyUEdgeMap; + checkConcept, + DummyUEdgeMap >(); + } + } + + _Graph& graph; + }; + }; + + /// \brief An empty mappable base bipartite undirected graph + /// class. + /// + /// This class provides beside the core graph features + /// map interface for the graph structure. + /// This concept is part of the BpUGraph concept. + template + class MappableBpUGraphComponent : public MappableUGraphComponent<_Base> { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + + typedef MappableBpUGraphComponent Graph; + + /// \brief ReadWrite map of the A-nodes. + /// + /// ReadWrite map of the A-nodes. + /// + template + class ANodeMap : public GraphMap { + public: + typedef GraphMap Parent; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit ANodeMap(const MappableBpUGraphComponent& graph) + : Parent(graph) {} + + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + ANodeMap(const MappableBpUGraphComponent& graph, const _Value& value) + : Parent(graph, value) {} + + /// \brief Copy constructor. + /// + /// Copy Constructor. + ANodeMap(const ANodeMap& nm) : Parent(nm) {} + + /// \brief Assign operator. + /// + /// Assign operator. + template + ANodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + }; + + /// \brief ReadWrite map of the B-nodes. + /// + /// ReadWrite map of the A-nodes. + /// + template + class BNodeMap : public GraphMap { + public: + typedef GraphMap Parent; + + /// \brief Construct a new map. + /// + /// Construct a new map for the graph. + explicit BNodeMap(const MappableBpUGraphComponent& graph) + : Parent(graph) {} + + /// \brief Construct a new map with default value. + /// + /// Construct a new map for the graph and initalise the values. + BNodeMap(const MappableBpUGraphComponent& graph, const _Value& value) + : Parent(graph, value) {} + + /// \brief Copy constructor. + /// + /// Copy Constructor. + BNodeMap(const BNodeMap& nm) : Parent(nm) {} + + /// \brief Assign operator. + /// + /// Assign operator. + template + BNodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + + }; + + + template + struct Constraints { + + struct Dummy { + int value; + Dummy() : value(0) {} + Dummy(int _v) : value(_v) {} + }; + + void constraints() { + checkConcept, _Graph>(); + + { // int map test + typedef typename _Graph::template ANodeMap IntANodeMap; + checkConcept, + IntANodeMap >(); + } { // bool map test + typedef typename _Graph::template ANodeMap BoolANodeMap; + checkConcept, + BoolANodeMap >(); + } { // Dummy map test + typedef typename _Graph::template ANodeMap DummyANodeMap; + checkConcept, + DummyANodeMap >(); + } + } + + _Graph& graph; + }; + }; + + + /// \brief An empty extendable graph class. + /// + /// This class provides beside the core graph features graph + /// extendable interface for the graph structure. The main + /// difference between the base and this interface is that the + /// graph alterations should handled already on this level. + template + class ExtendableGraphComponent : public _Base { + public: + typedef _Base Base; + + typedef typename _Base::Node Node; + typedef typename _Base::Edge Edge; + + /// \brief Adds a new node to the graph. + /// + /// Adds a new node to the graph. + /// + Node addNode() { + return INVALID; + } + + /// \brief Adds a new edge connects the given two nodes. + /// + /// Adds a new edge connects the the given two nodes. + Edge addEdge(const Node&, const Node&) { + return INVALID; + } + + template + struct Constraints { + void constraints() { + checkConcept(); + typename _Graph::Node node_a, node_b; + node_a = graph.addNode(); + node_b = graph.addNode(); + typename _Graph::Edge edge; + edge = graph.addEdge(node_a, node_b); + } + + _Graph& graph; + }; + }; + + /// \brief An empty extendable base undirected graph class. + /// + /// This class provides beside the core undirected graph features + /// core undircted graph extend interface for the graph structure. + /// The main difference between the base and this interface is + /// that the graph alterations should handled already on this + /// level. + template + class ExtendableUGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename _Base::Node Node; + typedef typename _Base::UEdge UEdge; + + /// \brief Adds a new node to the graph. + /// + /// Adds a new node to the graph. + /// + Node addNode() { + return INVALID; + } + + /// \brief Adds a new edge connects the given two nodes. + /// + /// Adds a new edge connects the the given two nodes. + UEdge addEdge(const Node&, const Node&) { + return INVALID; + } + + template + struct Constraints { + void constraints() { + checkConcept(); + typename _Graph::Node node_a, node_b; + node_a = graph.addNode(); + node_b = graph.addNode(); + typename _Graph::UEdge uedge; + uedge = graph.addUEdge(node_a, node_b); + } + + _Graph& graph; + }; + }; + + /// \brief An empty extendable base undirected graph class. + /// + /// This class provides beside the core bipartite undirected graph + /// features core undircted graph extend interface for the graph + /// structure. The main difference between the base and this + /// interface is that the graph alterations should handled already + /// on this level. + template + class ExtendableBpUGraphComponent + : public ExtendableUGraphComponent<_Base> { + + typedef _Base Base; + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + } + }; + }; + + /// \brief An empty erasable graph class. + /// + /// This class provides beside the core graph features core erase + /// functions for the graph structure. The main difference between + /// the base and this interface is that the graph alterations + /// should handled already on this level. + template + class ErasableGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::Edge Edge; + + /// \brief Erase a node from the graph. + /// + /// Erase a node from the graph. This function should + /// erase all edges connecting to the node. + void erase(const Node&) {} + + /// \brief Erase an edge from the graph. + /// + /// Erase an edge from the graph. + /// + void erase(const Edge&) {} + + template + struct Constraints { + void constraints() { + checkConcept(); + typename _Graph::Node node; + graph.erase(node); + typename _Graph::Edge edge; + graph.erase(edge); + } + + _Graph& graph; + }; + }; + + /// \brief An empty erasable base undirected graph class. + /// + /// This class provides beside the core undirected graph features + /// core erase functions for the undirceted graph structure. The + /// main difference between the base and this interface is that + /// the graph alterations should handled already on this level. + template + class ErasableUGraphComponent : public _Base { + public: + + typedef _Base Base; + typedef typename Base::Node Node; + typedef typename Base::UEdge UEdge; + + /// \brief Erase a node from the graph. + /// + /// Erase a node from the graph. This function should erase + /// edges connecting to the node. + void erase(const Node&) {} + + /// \brief Erase an edge from the graph. + /// + /// Erase an edge from the graph. + /// + void erase(const UEdge&) {} + + template + struct Constraints { + void constraints() { + checkConcept(); + typename _Graph::Node node; + graph.erase(node); + typename _Graph::Edge edge; + graph.erase(edge); + } + + _Graph& graph; + }; + }; + + /// \brief An empty erasable base bipartite undirected graph class. + /// + /// This class provides beside the core bipartite undirected graph + /// features core erase functions for the undirceted graph + /// structure. The main difference between the base and this + /// interface is that the graph alterations should handled already + /// on this level. + template + class ErasableBpUGraphComponent : public ErasableUGraphComponent<_Base> { + public: + + typedef _Base Base; + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + } + }; + }; + + /// \brief An empty clearable base graph class. + /// + /// This class provides beside the core graph features core clear + /// functions for the graph structure. The main difference between + /// the base and this interface is that the graph alterations + /// should handled already on this level. + template + class ClearableGraphComponent : public _Base { + public: + + typedef _Base Base; + + /// \brief Erase all nodes and edges from the graph. + /// + /// Erase all nodes and edges from the graph. + /// + void clear() {} + + template + struct Constraints { + void constraints() { + checkConcept(); + graph.clear(); + } + + _Graph graph; + }; + }; + + /// \brief An empty clearable base undirected graph class. + /// + /// This class provides beside the core undirected graph features + /// core clear functions for the undirected graph structure. The + /// main difference between the base and this interface is that + /// the graph alterations should handled already on this level. + template + class ClearableUGraphComponent : public ClearableGraphComponent<_Base> { + public: + + typedef _Base Base; + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + } + + _Graph graph; + }; + }; + + /// \brief An empty clearable base bipartite undirected graph + /// class. + /// + /// This class provides beside the core bipartite undirected graph + /// features core clear functions for the undirected graph + /// structure. The main difference between the base and this + /// interface is that the graph alterations should handled already + /// on this level. + template + class ClearableBpUGraphComponent : public ClearableUGraphComponent<_Base> { + public: + + typedef _Base Base; + + template + struct Constraints { + void constraints() { + checkConcept, _Graph>(); + } + + }; + + }; + + } + +} + +#endif diff --git a/src/lemon/concepts/heap.h b/src/lemon/concepts/heap.h new file mode 100644 index 0000000..90e8cc0 --- /dev/null +++ b/src/lemon/concepts/heap.h @@ -0,0 +1,226 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +///\ingroup concept +///\file +///\brief Classes for representing heaps. +/// + +#ifndef LEMON_CONCEPT_HEAP_H +#define LEMON_CONCEPT_HEAP_H + +#include + +namespace lemon { + namespace concepts { + /// \addtogroup concept + /// @{ + + + /// \brief A concept structure describes the main interface of heaps. + /// + /// A concept structure describes the main interface of heaps. + /// + template + class Heap { + public: + + ///\brief Type of the items stored in the heap. + typedef typename ItemIntMap::Key Item; + + + /// \brief Type to represent the items states. + /// + /// Each Item element have a state associated to it. It may be "in heap", + /// "pre heap" or "post heap". The later two are indifferent from the + /// heap's point of view, but may be useful to the user. + /// + /// The ItemIntMap _should_ be initialized in such way, that it maps + /// PRE_HEAP (-1) to any element to be put in the heap... + enum State { + IN_HEAP = 0, + PRE_HEAP = -1, + POST_HEAP = -2 + }; + + /// \brief The constructor. + /// + /// The constructor. + /// \param _iim should be given to the constructor, since it is used + /// internally to handle the cross references. The value of the map + /// should be PRE_HEAP (-1) for each element. + explicit Heap(ItemIntMap &_iim) {} + + /// \brief The number of items stored in the heap. + /// + /// Returns the number of items stored in the heap. + int size() const { return 0; } + + /// \brief Checks if the heap stores no items. + /// + /// Returns \c true if and only if the heap stores no items. + bool empty() const { return false; } + + /// \brief Makes empty this heap. + /// + /// Makes this heap empty. + void clear(); + + /// \brief Insert an item into the heap with the given heap. + /// + /// Adds \c i to the heap with priority \c p. + /// \param i The item to insert. + /// \param p The priority of the item. + void push(const Item &i, const Prio &p) {} + + /// \brief Returns the item with minimum priority. + /// + /// This method returns the item with minimum priority. + /// \pre The heap must be nonempty. + Item top() const {} + + /// \brief Returns the minimum priority. + /// + /// It returns the minimum priority. + /// \pre The heap must be nonempty. + Prio prio() const {} + + /// \brief Deletes the item with minimum priority. + /// + /// This method deletes the item with minimum priority. + /// \pre The heap must be non-empty. + void pop() {} + + /// \brief Deletes \c i from the heap. + /// + /// This method deletes item \c i from the heap, if \c i was + /// already stored in the heap. + /// \param i The item to erase. + void erase(const Item &i) {} + + /// \brief Returns the priority of \c i. + /// + /// This function returns the priority of item \c i. + /// \pre \c i must be in the heap. + /// \param i The item. + Prio operator[](const Item &i) const {} + + /// \brief \c i gets to the heap with priority \c p independently + /// if \c i was already there. + /// + /// This method calls \ref push(\c i, \c p) if \c i is not stored + /// in the heap and sets the priority of \c i to \c p otherwise. + /// It may throw an \e UnderFlowPriorityException. + /// \param i The item. + /// \param p The priority. + void set(const Item &i, const Prio &p) {} + + /// \brief Decreases the priority of \c i to \c p. + /// + /// This method decreases the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at least \c p. + /// \param i The item. + /// \param p The priority. + void decrease(const Item &i, const Prio &p) {} + + /// \brief Increases the priority of \c i to \c p. + /// + /// This method sets the priority of item \c i to \c p. + /// \pre \c i must be stored in the heap with priority at most \c + /// p relative to \c Compare. + /// \param i The item. + /// \param p The priority. + void increase(const Item &i, const Prio &p) {} + + /// \brief Returns if \c item is in, has already been in, or has + /// never been in the heap. + /// + /// This method returns PRE_HEAP if \c item has never been in the + /// heap, IN_HEAP if it is in the heap at the moment, and POST_HEAP + /// otherwise. In the latter case it is possible that \c item will + /// get back to the heap again. + /// \param i The item. + State state(const Item &i) const {} + + /// \brief Sets the state of the \c item in the heap. + /// + /// Sets the state of the \c item in the heap. It can be used to + /// manually clear the heap when it is important to achive the + /// better time complexity. + /// \param i The item. + /// \param st The state. It should not be \c IN_HEAP. + void state(const Item& i, State st) {} + + + template + struct Constraints { + public: + + void constraints() { + Item item; + Prio prio; + + item=Item(); + prio=Prio(); + + ignore_unused_variable_warning(item); + ignore_unused_variable_warning(prio); + + typedef typename _Heap::State State; + State state; + + ignore_unused_variable_warning(state); + + _Heap heap1 = _Heap(map); + + ignore_unused_variable_warning(heap1); + + heap.push(item, prio); + + prio = heap.prio(); + item = heap.top(); + + heap.pop(); + + heap.set(item, prio); + heap.decrease(item, prio); + heap.increase(item, prio); + prio = heap[item]; + + heap.erase(item); + + state = heap.state(item); + + state = _Heap::PRE_HEAP; + state = _Heap::IN_HEAP; + state = _Heap::POST_HEAP; + + heap.clear(); + } + + _Heap& heap; + ItemIntMap& map; + + Constraints() : heap(0), map(0) {} + }; + }; + + /// @} + } // namespace lemon +} +#endif // LEMON_CONCEPT_PATH_H diff --git a/src/lemon/concepts/maps.h b/src/lemon/concepts/maps.h new file mode 100644 index 0000000..6d7ace0 --- /dev/null +++ b/src/lemon/concepts/maps.h @@ -0,0 +1,208 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_CONCEPT_MAPS_H +#define LEMON_CONCEPT_MAPS_H + +#include +#include + +///\ingroup concept +///\file +///\brief Map concepts checking classes for testing and documenting. + +namespace lemon { + + namespace concepts { + + /// \addtogroup concept + /// @{ + + /// Readable map concept + + /// Readable map concept. + /// + template + class ReadMap + { + public: + /// The key type of the map. + typedef K Key; + /// The value type of the map. (The type of objects associated with the keys). + typedef T Value; + + /// Returns the value associated with a key. + + /// Returns the value associated with a key. + /// \bug Value shouldn't need to be default constructible. + /// + Value operator[](const Key &) const {return Value();} + + template + struct Constraints { + + void constraints() { + Value val = m[key]; + val = m[key]; + typename _ReadMap::Value own_val = m[own_key]; + own_val = m[own_key]; + + ignore_unused_variable_warning(val); + ignore_unused_variable_warning(own_val); + ignore_unused_variable_warning(key); + } + Key& key; + typename _ReadMap::Key& own_key; + _ReadMap& m; + }; + + }; + + + /// Writable map concept + + /// Writable map concept. + /// + template + class WriteMap + { + public: + /// The key type of the map. + typedef K Key; + /// The value type of the map. (The type of objects associated with the keys). + typedef T Value; + + /// Sets the value associated with a key. + void set(const Key &,const Value &) {} + + ///Default constructor + WriteMap() {} + + template + struct Constraints { + void constraints() { + // No constraints for constructor. + m.set(key, val); + m.set(own_key, own_val); + ignore_unused_variable_warning(key); + ignore_unused_variable_warning(val); + ignore_unused_variable_warning(own_key); + ignore_unused_variable_warning(own_val); + } + + Value& val; + typename _WriteMap::Value own_val; + Key& key; + typename _WriteMap::Key& own_key; + _WriteMap& m; + + }; + }; + + /// Read/writable map concept + + /// Read/writable map concept. + /// + template + class ReadWriteMap : public ReadMap, + public WriteMap + { + public: + /// The key type of the map. + typedef K Key; + /// The value type of the map. (The type of objects associated with the keys). + typedef T Value; + + /// Returns the value associated with a key. + Value operator[](const Key &) const {return Value();} + /// Sets the value associated with a key. + void set(const Key & ,const Value &) {} + + template + struct Constraints { + void constraints() { + checkConcept, _ReadWriteMap >(); + checkConcept, _ReadWriteMap >(); + } + }; + }; + + + ///Dereferable map concept + + /// Dereferable map concept. + /// + /// \todo Rethink this concept. + template + class ReferenceMap : public ReadWriteMap + { + public: + /// Tag for reference maps. + typedef True ReferenceMapTag; + /// The key type of the map. + typedef K Key; + /// The value type of the map. (The type of objects associated with the keys). + typedef T Value; + /// The reference type of the map. + typedef R Reference; + /// The const reference type of the map. + typedef CR ConstReference; + + protected: + Value tmp; + public: + + ///Returns a reference to the value associated with a key. + Reference operator[](const Key &) { return tmp; } + ///Returns a const reference to the value associated with a key. + ConstReference operator[](const Key &) const { return tmp; } + /// Sets the value associated with a key. + void set(const Key &k,const Value &t) { operator[](k)=t; } + + template + struct Constraints { + + void constraints() { + checkConcept, _ReferenceMap >(); + m[key] = val; + val = m[key]; + m[key] = ref; + ref = m[key]; + m[own_key] = own_val; + own_val = m[own_key]; + m[own_key] = own_ref; + own_ref = m[own_key]; + } + + typename _ReferenceMap::Key& own_key; + typename _ReferenceMap::Value& own_val; + typename _ReferenceMap::Reference own_ref; + Key& key; + Value& val; + Reference ref; + _ReferenceMap& m; + }; + }; + + // @} + + } //namespace concepts + +} //namespace lemon + +#endif // LEMON_CONCEPT_MAPS_H diff --git a/src/lemon/concepts/matrix_maps.h b/src/lemon/concepts/matrix_maps.h new file mode 100644 index 0000000..07943e9 --- /dev/null +++ b/src/lemon/concepts/matrix_maps.h @@ -0,0 +1,207 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_CONCEPT_MATRIX_MAPS_H +#define LEMON_CONCEPT_MATRIX_MAPS_H + +#include +#include + +///\ingroup concept +///\file +///\brief MatrixMap concepts checking classes for testing and documenting. + +namespace lemon { + + namespace concepts { + + /// \addtogroup concept + /// @{ + + /// Readable matrix map concept + template + class ReadMatrixMap + { + public: + /// Map's first key type. + typedef K1 FirstKey; + /// Map's second key type. + typedef K2 SecondKey; + /// \brief Map's value type. + /// (The type of objects associated with the pairs of keys). + typedef V Value; + + // \bug Value don't need to be default constructible. + /// Returns the value associated with a key. + Value operator()(const FirstKey&, const SecondKey&) const { + return Value(); + } + + template + struct Constraints { + + void constraints() { + Value val = m(first_key, second_key); + val = m(first_key, second_key); + typename _ReadMatrixMap::Value own_val = + m(own_first_key, own_second_key); + own_val = m(own_first_key, own_second_key); + ignore_unused_variable_warning(val); + ignore_unused_variable_warning(own_val); + } + + FirstKey& first_key; + SecondKey& second_key; + typename _ReadMatrixMap::FirstKey& own_first_key; + typename _ReadMatrixMap::SecondKey& own_second_key; + _ReadMatrixMap& m; + }; + + }; + + + /// Writable map concept + template + class WriteMatrixMap { + public: + /// Map's first key type. + typedef K1 FirstKey; + /// Map's second key type. + typedef K2 SecondKey; + /// \brief Map's value type. + /// (The type of objects associated with the pairs of keys). + typedef V Value; + + /// Sets the value associated with the pair of keys. + void set(const FirstKey&, const SecondKey& ,const Value&) {} + + template + struct Constraints { + void constraints() { + // No constraints for constructor. + m.set(first_key, second_key, val); + m.set(own_first_key, own_second_key, own_val); + } + + Value& val; + typename _WriteMatrixMap::Value own_val; + FirstKey& first_key; + SecondKey& second_key; + typename _WriteMatrixMap::FirstKey& own_first_key; + typename _WriteMatrixMap::SecondKey& own_second_key; + _WriteMatrixMap& m; + + }; + }; + + ///Read/Writable map concept + template + class ReadWriteMatrixMap + : public ReadMatrixMap, public WriteMatrixMap { + public: + /// Map's first key type. + typedef K1 FirstKey; + /// Map's second key type. + typedef K2 SecondKey; + /// \brief Map's value type. + /// (The type of objects associated with the pairs of keys). + typedef V Value; + + /// Returns the value associated with a pair of keys. + Value operator()(const FirstKey&, const SecondKey&) const { + return Value(); + } + /// Sets the value associated with the pair of keys. + void set(const FirstKey&, const SecondKey& ,const Value&) {} + + template + struct Constraints { + void constraints() { + checkConcept, _ReadWriteMatrixMap >(); + checkConcept, _ReadWriteMatrixMap >(); + } + }; + }; + + + ///Dereferable matrix map concept + template + class ReferenceMatrixMap : public ReadWriteMatrixMap + { + public: + /// Tag for reference maps. + typedef True ReferenceMapTag; + /// Map's first key type. + typedef K1 FirstKey; + /// Map's second key type. + typedef K1 SecondKey; + /// Map's value type. (The type of objects associated with the keys). + typedef V Value; + /// Map's reference type. + typedef R Reference; + /// Map's const reference type. + typedef CR ConstReference; + + protected: + Value tmp; + public: + + ///Returns a reference to the value associated to a pair of keys. + Reference operator()(const FirstKey&, const SecondKey&) { + return tmp; + } + ///Returns a const reference to the value associated to a pair of keys. + ConstReference operator()(const FirstKey&, const SecondKey&) const { + return tmp; + } + /// Sets the value associated with the pair of keys. + void set(const FirstKey&, const SecondKey& ,const Value&) {} + + // \todo rethink this concept + template + struct ReferenceMapConcept { + + void constraints() { + checkConcept(); + m(first_key, second_key) = val; + val = m(first_key, second_key); + m(first_key, second_key) = ref; + ref = m(first_key, second_key); + m(own_first_key, own_second_key) = own_val; + own_val = m(own_first_key, own_second_key); + m(own_first_key, own_second_key) = own_ref; + own_ref = m(own_first_key, own_second_key); + } + + typename _ReferenceMatrixMap::Key& own_first_key; + typename _ReferenceMatrixMap::Key& own_second_key; + typename _ReferenceMatrixMap::Value& own_val; + typename _ReferenceMatrixMap::Reference& own_ref; + FirstKey& first_key; + SecondKey& second_key; + Value& val; + Reference& ref; + _ReferenceMatrixMap& m; + }; + }; + + // @} + + } //namespace concepts +} //namespace lemon +#endif // LEMON_CONCEPT_MATRIX_MAPS_H diff --git a/src/lemon/concepts/path.h b/src/lemon/concepts/path.h new file mode 100644 index 0000000..d881d0a --- /dev/null +++ b/src/lemon/concepts/path.h @@ -0,0 +1,307 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +///\ingroup concept +///\file +///\brief Classes for representing paths in graphs. +/// +///\todo Iterators have obsolete style + +#ifndef LEMON_CONCEPT_PATH_H +#define LEMON_CONCEPT_PATH_H + +#include +#include +#include + +namespace lemon { + namespace concepts { + + /// \addtogroup concept + /// @{ + + /// \brief A skeleton structure for representing directed paths in + /// a graph. + /// + /// A skeleton structure for representing directed paths in a + /// graph. + /// \param _Graph The graph type in which the path is. + /// + /// In a sense, the path can be treated as a list of edges. The + /// lemon path type stores just this list. As a consequence it + /// cannot enumerate the nodes in the path and the zero length + /// paths cannot store the source. + /// + template + class Path { + public: + + /// Type of the underlying graph. + typedef _Graph Graph; + /// Edge type of the underlying graph. + typedef typename Graph::Edge Edge; + + class EdgeIt; + + /// \brief Default constructor + Path() {} + + /// \brief Template constructor + template + Path(const CPath& cpath) {} + + /// \brief Template assigment + template + Path& operator=(const CPath& cpath) {} + + /// Length of the path ie. the number of edges in the path. + int length() const { return 0;} + + /// Returns whether the path is empty. + bool empty() const { return true;} + + /// Resets the path to an empty path. + void clear() {} + + /// \brief Lemon style iterator for path edges + /// + /// This class is used to iterate on the edges of the paths. + class EdgeIt { + public: + /// Default constructor + EdgeIt() {} + /// Invalid constructor + EdgeIt(Invalid) {} + /// Constructor for first edge + EdgeIt(const Path &) {} + + /// Conversion to Edge + operator Edge() const { return INVALID; } + + /// Next edge + EdgeIt& operator++() {return *this;} + + /// Comparison operator + bool operator==(const EdgeIt&) const {return true;} + /// Comparison operator + bool operator!=(const EdgeIt&) const {return true;} + /// Comparison operator + bool operator<(const EdgeIt&) const {return false;} + + }; + + template + struct Constraints { + void constraints() { + Path pc; + _Path p, pp(pc); + int l = p.length(); + int e = p.empty(); + p.clear(); + + p = pc; + + typename _Path::EdgeIt id, ii(INVALID), i(p); + + ++i; + typename Graph::Edge ed = i; + + e = (i == ii); + e = (i != ii); + e = (i < ii); + + ignore_unused_variable_warning(l); + ignore_unused_variable_warning(pp); + ignore_unused_variable_warning(e); + ignore_unused_variable_warning(id); + ignore_unused_variable_warning(ii); + ignore_unused_variable_warning(ed); + } + }; + + }; + + namespace _path_bits { + + template + struct PathDumperConstraints { + void constraints() { + int l = p.length(); + int e = p.empty(); + + typename _Path::EdgeIt id, i(p); + + ++i; + typename _Graph::Edge ed = i; + + e = (i == INVALID); + e = (i != INVALID); + + ignore_unused_variable_warning(l); + ignore_unused_variable_warning(e); + ignore_unused_variable_warning(id); + ignore_unused_variable_warning(ed); + } + _Path& p; + }; + + template + struct PathDumperConstraints< + _Graph, _Path, + typename enable_if::type + > { + void constraints() { + int l = p.length(); + int e = p.empty(); + + typename _Path::RevEdgeIt id, i(p); + + ++i; + typename _Graph::Edge ed = i; + + e = (i == INVALID); + e = (i != INVALID); + + ignore_unused_variable_warning(l); + ignore_unused_variable_warning(e); + ignore_unused_variable_warning(id); + ignore_unused_variable_warning(ed); + } + _Path& p; + }; + + } + + + /// \brief A skeleton structure for path dumpers. + /// + /// A skeleton structure for path dumpers. The path dumpers are + /// the generalization of the paths. The path dumpers can + /// enumerate the edges of the path wheter in forward or in + /// backward order. In most time these classes are not used + /// directly rather it used to assign a dumped class to a real + /// path type. + /// + /// The main purpose of this concept is that the shortest path + /// algorithms can enumerate easily the edges in reverse order. + /// If we would like to give back a real path from these + /// algorithms then we should create a temporarly path object. In + /// Lemon such algorithms gives back a path dumper what can + /// assigned to a real path and the dumpers can be implemented as + /// an adaptor class to the predecessor map. + + /// \param _Graph The graph type in which the path is. + /// + /// The paths can be constructed from any path type by a + /// template constructor or a template assignment operator. + /// + template + class PathDumper { + public: + + /// Type of the underlying graph. + typedef _Graph Graph; + /// Edge type of the underlying graph. + typedef typename Graph::Edge Edge; + + /// Length of the path ie. the number of edges in the path. + int length() const { return 0;} + + /// Returns whether the path is empty. + bool empty() const { return true;} + + /// \brief Forward or reverse dumping + /// + /// If the RevPathTag is defined and true then reverse dumping + /// is provided in the path dumper. In this case instead of the + /// EdgeIt the RevEdgeIt iterator should be implemented in the + /// dumper. + typedef False RevPathTag; + + /// \brief Lemon style iterator for path edges + /// + /// This class is used to iterate on the edges of the paths. + class EdgeIt { + public: + /// Default constructor + EdgeIt() {} + /// Invalid constructor + EdgeIt(Invalid) {} + /// Constructor for first edge + EdgeIt(const PathDumper&) {} + + /// Conversion to Edge + operator Edge() const { return INVALID; } + + /// Next edge + EdgeIt& operator++() {return *this;} + + /// Comparison operator + bool operator==(const EdgeIt&) const {return true;} + /// Comparison operator + bool operator!=(const EdgeIt&) const {return true;} + /// Comparison operator + bool operator<(const EdgeIt&) const {return false;} + + }; + + /// \brief Lemon style iterator for path edges + /// + /// This class is used to iterate on the edges of the paths in + /// reverse direction. + class RevEdgeIt { + public: + /// Default constructor + RevEdgeIt() {} + /// Invalid constructor + RevEdgeIt(Invalid) {} + /// Constructor for first edge + RevEdgeIt(const PathDumper &) {} + + /// Conversion to Edge + operator Edge() const { return INVALID; } + + /// Next edge + RevEdgeIt& operator++() {return *this;} + + /// Comparison operator + bool operator==(const RevEdgeIt&) const {return true;} + /// Comparison operator + bool operator!=(const RevEdgeIt&) const {return true;} + /// Comparison operator + bool operator<(const RevEdgeIt&) const {return false;} + + }; + + template + struct Constraints { + void constraints() { + function_requires<_path_bits:: + PathDumperConstraints >(); + } + }; + + }; + + + ///@} + } + +} // namespace lemon + +#endif // LEMON_CONCEPT_PATH_H diff --git a/src/lemon/concepts/ugraph.h b/src/lemon/concepts/ugraph.h new file mode 100644 index 0000000..7b0e8be --- /dev/null +++ b/src/lemon/concepts/ugraph.h @@ -0,0 +1,702 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +///\ingroup graph_concepts +///\file +///\brief The concept of Undirected Graphs. + +#ifndef LEMON_CONCEPT_UGRAPH_H +#define LEMON_CONCEPT_UGRAPH_H + +#include +#include +#include + +namespace lemon { + namespace concepts { + + /// \ingroup graph_concepts + /// + /// \brief Class describing the concept of Undirected Graphs. + /// + /// This class describes the common interface of all Undirected + /// Graphs. + /// + /// As all concept describing classes it provides only interface + /// without any sensible implementation. So any algorithm for + /// undirected graph should compile with this class, but it will not + /// run properly, of course. + /// + /// The LEMON undirected graphs also fulfill the concept of + /// directed graphs (\ref lemon::concepts::Graph "Graph + /// Concept"). Each undirected edges can be seen as two opposite + /// directed edge and consequently the undirected graph can be + /// seen as the direceted graph of these directed edges. The + /// UGraph has the UEdge inner class for the undirected edges and + /// the Edge type for the directed edges. The Edge type is + /// convertible to UEdge or inherited from it so from a directed + /// edge we can get the represented undirected edge. + /// + /// In the sense of the LEMON each undirected edge has a default + /// direction (it should be in every computer implementation, + /// because the order of undirected edge's nodes defines an + /// orientation). With the default orientation we can define that + /// the directed edge is forward or backward directed. With the \c + /// direction() and \c direct() function we can get the direction + /// of the directed edge and we can direct an undirected edge. + /// + /// The UEdgeIt is an iterator for the undirected edges. We can use + /// the UEdgeMap to map values for the undirected edges. The InEdgeIt and + /// OutEdgeIt iterates on the same undirected edges but with opposite + /// direction. The IncEdgeIt iterates also on the same undirected edges + /// as the OutEdgeIt and InEdgeIt but it is not convertible to Edge just + /// to UEdge. + class UGraph { + public: + /// \brief The undirected graph should be tagged by the + /// UndirectedTag. + /// + /// The undirected graph should be tagged by the UndirectedTag. This + /// tag helps the enable_if technics to make compile time + /// specializations for undirected graphs. + typedef True UndirectedTag; + + /// \brief The base type of node iterators, + /// or in other words, the trivial node iterator. + /// + /// This is the base type of each node iterator, + /// thus each kind of node iterator converts to this. + /// More precisely each kind of node iterator should be inherited + /// from the trivial node iterator. + class Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Node() { } + /// Copy constructor. + + /// Copy constructor. + /// + Node(const Node&) { } + + /// Invalid constructor \& conversion. + + /// This constructor initializes the iterator to be invalid. + /// \sa Invalid for more details. + Node(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Node) const { return true; } + + /// Inequality operator + + /// \sa operator==(Node n) + /// + bool operator!=(Node) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Node) const { return false; } + + }; + + /// This iterator goes through each node. + + /// This iterator goes through each node. + /// Its usage is quite simple, for example you can count the number + /// of nodes in graph \c g of type \c Graph like this: + ///\code + /// int count=0; + /// for (Graph::NodeIt n(g); n!=INVALID; ++n) ++count; + ///\endcode + class NodeIt : public Node { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + NodeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + NodeIt(const NodeIt& n) : Node(n) { } + /// Invalid constructor \& conversion. + + /// Initialize the iterator to be invalid. + /// \sa Invalid for more details. + NodeIt(Invalid) { } + /// Sets the iterator to the first node. + + /// Sets the iterator to the first node of \c g. + /// + NodeIt(const UGraph&) { } + /// Node -> NodeIt conversion. + + /// Sets the iterator to the node of \c the graph pointed by + /// the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + NodeIt(const UGraph&, const Node&) { } + /// Next node. + + /// Assign the iterator to the next node. + /// + NodeIt& operator++() { return *this; } + }; + + + /// The base type of the undirected edge iterators. + + /// The base type of the undirected edge iterators. + /// + class UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + UEdge() { } + /// Copy constructor. + + /// Copy constructor. + /// + UEdge(const UEdge&) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + UEdge(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(UEdge) const { return true; } + /// Inequality operator + + /// \sa operator==(UEdge n) + /// + bool operator!=(UEdge) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(UEdge) const { return false; } + }; + + /// This iterator goes through each undirected edge. + + /// This iterator goes through each undirected edge of a graph. + /// Its usage is quite simple, for example you can count the number + /// of undirected edges in a graph \c g of type \c Graph as follows: + ///\code + /// int count=0; + /// for(Graph::UEdgeIt e(g); e!=INVALID; ++e) ++count; + ///\endcode + class UEdgeIt : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + UEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + UEdgeIt(const UEdgeIt& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + UEdgeIt(Invalid) { } + /// This constructor sets the iterator to the first undirected edge. + + /// This constructor sets the iterator to the first undirected edge. + UEdgeIt(const UGraph&) { } + /// UEdge -> UEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator. + /// This feature necessitates that each time we + /// iterate the undirected edge-set, the iteration order is the + /// same. + UEdgeIt(const UGraph&, const UEdge&) { } + /// Next undirected edge + + /// Assign the iterator to the next undirected edge. + UEdgeIt& operator++() { return *this; } + }; + + /// \brief This iterator goes trough the incident undirected + /// edges of a node. + /// + /// This iterator goes trough the incident undirected edges + /// of a certain node of a graph. You should assume that the + /// loop edges will be iterated twice. + /// + /// Its usage is quite simple, for example you can compute the + /// degree (i.e. count the number of incident edges of a node \c n + /// in graph \c g of type \c Graph as follows. + /// + ///\code + /// int count=0; + /// for(Graph::IncEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + class IncEdgeIt : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + IncEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + IncEdgeIt(const IncEdgeIt& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + IncEdgeIt(Invalid) { } + /// This constructor sets the iterator to first incident edge. + + /// This constructor set the iterator to the first incident edge of + /// the node. + IncEdgeIt(const UGraph&, const Node&) { } + /// UEdge -> IncEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + IncEdgeIt(const UGraph&, const UEdge&) { } + /// Next incident edge + + /// Assign the iterator to the next incident edge + /// of the corresponding node. + IncEdgeIt& operator++() { return *this; } + }; + + /// The directed edge type. + + /// The directed edge type. It can be converted to the + /// undirected edge or it should be inherited from the undirected + /// edge. + class Edge : public UEdge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + Edge() { } + /// Copy constructor. + + /// Copy constructor. + /// + Edge(const Edge& e) : UEdge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + Edge(Invalid) { } + /// Equality operator + + /// Two iterators are equal if and only if they point to the + /// same object or both are invalid. + bool operator==(Edge) const { return true; } + /// Inequality operator + + /// \sa operator==(Edge n) + /// + bool operator!=(Edge) const { return true; } + + /// Artificial ordering operator. + + /// To allow the use of graph descriptors as key type in std::map or + /// similar associative container we require this. + /// + /// \note This operator only have to define some strict ordering of + /// the items; this order has nothing to do with the iteration + /// ordering of the items. + bool operator<(Edge) const { return false; } + + }; + /// This iterator goes through each directed edge. + + /// This iterator goes through each edge of a graph. + /// Its usage is quite simple, for example you can count the number + /// of edges in a graph \c g of type \c Graph as follows: + ///\code + /// int count=0; + /// for(Graph::EdgeIt e(g); e!=INVALID; ++e) ++count; + ///\endcode + class EdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + EdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + EdgeIt(const EdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + EdgeIt(Invalid) { } + /// This constructor sets the iterator to the first edge. + + /// This constructor sets the iterator to the first edge of \c g. + ///@param g the graph + EdgeIt(const UGraph &g) { ignore_unused_variable_warning(g); } + /// Edge -> EdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + EdgeIt(const UGraph&, const Edge&) { } + ///Next edge + + /// Assign the iterator to the next edge. + EdgeIt& operator++() { return *this; } + }; + + /// This iterator goes trough the outgoing directed edges of a node. + + /// This iterator goes trough the \e outgoing edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for (Graph::OutEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class OutEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + OutEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + OutEdgeIt(const OutEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + OutEdgeIt(Invalid) { } + /// This constructor sets the iterator to the first outgoing edge. + + /// This constructor sets the iterator to the first outgoing edge of + /// the node. + ///@param n the node + ///@param g the graph + OutEdgeIt(const UGraph& n, const Node& g) { + ignore_unused_variable_warning(n); + ignore_unused_variable_warning(g); + } + /// Edge -> OutEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + OutEdgeIt(const UGraph&, const Edge&) { } + ///Next outgoing edge + + /// Assign the iterator to the next + /// outgoing edge of the corresponding node. + OutEdgeIt& operator++() { return *this; } + }; + + /// This iterator goes trough the incoming directed edges of a node. + + /// This iterator goes trough the \e incoming edges of a certain node + /// of a graph. + /// Its usage is quite simple, for example you can count the number + /// of outgoing edges of a node \c n + /// in graph \c g of type \c Graph as follows. + ///\code + /// int count=0; + /// for(Graph::InEdgeIt e(g, n); e!=INVALID; ++e) ++count; + ///\endcode + + class InEdgeIt : public Edge { + public: + /// Default constructor + + /// @warning The default constructor sets the iterator + /// to an undefined value. + InEdgeIt() { } + /// Copy constructor. + + /// Copy constructor. + /// + InEdgeIt(const InEdgeIt& e) : Edge(e) { } + /// Initialize the iterator to be invalid. + + /// Initialize the iterator to be invalid. + /// + InEdgeIt(Invalid) { } + /// This constructor sets the iterator to first incoming edge. + + /// This constructor set the iterator to the first incoming edge of + /// the node. + ///@param n the node + ///@param g the graph + InEdgeIt(const UGraph& g, const Node& n) { + ignore_unused_variable_warning(n); + ignore_unused_variable_warning(g); + } + /// Edge -> InEdgeIt conversion + + /// Sets the iterator to the value of the trivial iterator \c e. + /// This feature necessitates that each time we + /// iterate the edge-set, the iteration order is the same. + InEdgeIt(const UGraph&, const Edge&) { } + /// Next incoming edge + + /// Assign the iterator to the next inedge of the corresponding node. + /// + InEdgeIt& operator++() { return *this; } + }; + + /// \brief Read write map of the nodes to type \c T. + /// + /// ReadWrite map of the nodes to type \c T. + /// \sa Reference + template + class NodeMap : public ReadWriteMap< Node, T > + { + public: + + ///\e + NodeMap(const UGraph&) { } + ///\e + NodeMap(const UGraph&, T) { } + + ///Copy constructor + NodeMap(const NodeMap& nm) : ReadWriteMap< Node, T >(nm) { } + ///Assignment operator + template + NodeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Read write map of the directed edges to type \c T. + /// + /// Reference map of the directed edges to type \c T. + /// \sa Reference + template + class EdgeMap : public ReadWriteMap + { + public: + + ///\e + EdgeMap(const UGraph&) { } + ///\e + EdgeMap(const UGraph&, T) { } + ///Copy constructor + EdgeMap(const EdgeMap& em) : ReadWriteMap(em) { } + ///Assignment operator + template + EdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// Read write map of the undirected edges to type \c T. + + /// Reference map of the edges to type \c T. + /// \sa Reference + template + class UEdgeMap : public ReadWriteMap + { + public: + + ///\e + UEdgeMap(const UGraph&) { } + ///\e + UEdgeMap(const UGraph&, T) { } + ///Copy constructor + UEdgeMap(const UEdgeMap& em) : ReadWriteMap(em) {} + ///Assignment operator + template + UEdgeMap& operator=(const CMap&) { + checkConcept, CMap>(); + return *this; + } + }; + + /// \brief Direct the given undirected edge. + /// + /// Direct the given undirected edge. The returned edge source + /// will be the given node. + Edge direct(const UEdge&, const Node&) const { + return INVALID; + } + + /// \brief Direct the given undirected edge. + /// + /// Direct the given undirected edge. The returned edge + /// represents the given undirected edge and the direction comes + /// from the given bool. The source of the undirected edge and + /// the directed edge is the same when the given bool is true. + Edge direct(const UEdge&, bool) const { + return INVALID; + } + + /// \brief Returns true if the edge has default orientation. + /// + /// Returns whether the given directed edge is same orientation as + /// the corresponding undirected edge's default orientation. + bool direction(Edge) const { return true; } + + /// \brief Returns the opposite directed edge. + /// + /// Returns the opposite directed edge. + Edge oppositeEdge(Edge) const { return INVALID; } + + /// \brief Opposite node on an edge + /// + /// \return the opposite of the given Node on the given UEdge + Node oppositeNode(Node, UEdge) const { return INVALID; } + + /// \brief First node of the undirected edge. + /// + /// \return the first node of the given UEdge. + /// + /// Naturally undirected edges don't have direction and thus + /// don't have source and target node. But we use these two methods + /// to query the two nodes of the edge. The direction of the edge + /// which arises this way is called the inherent direction of the + /// undirected edge, and is used to define the "default" direction + /// of the directed versions of the edges. + /// \sa direction + Node source(UEdge) const { return INVALID; } + + /// \brief Second node of the undirected edge. + Node target(UEdge) const { return INVALID; } + + /// \brief Source node of the directed edge. + Node source(Edge) const { return INVALID; } + + /// \brief Target node of the directed edge. + Node target(Edge) const { return INVALID; } + + void first(Node&) const {} + void next(Node&) const {} + + void first(UEdge&) const {} + void next(UEdge&) const {} + + void first(Edge&) const {} + void next(Edge&) const {} + + void firstOut(Edge&, Node) const {} + void nextOut(Edge&) const {} + + void firstIn(Edge&, Node) const {} + void nextIn(Edge&) const {} + + + void firstInc(UEdge &, bool &, const Node &) const {} + void nextInc(UEdge &, bool &) const {} + + /// \brief Base node of the iterator + /// + /// Returns the base node (the source in this case) of the iterator + Node baseNode(OutEdgeIt e) const { + return source(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (the target in this case) of the + /// iterator + Node runningNode(OutEdgeIt e) const { + return target(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node (the target in this case) of the iterator + Node baseNode(InEdgeIt e) const { + return target(e); + } + /// \brief Running node of the iterator + /// + /// Returns the running node (the source in this case) of the + /// iterator + Node runningNode(InEdgeIt e) const { + return source(e); + } + + /// \brief Base node of the iterator + /// + /// Returns the base node of the iterator + Node baseNode(IncEdgeIt) const { + return INVALID; + } + + /// \brief Running node of the iterator + /// + /// Returns the running node of the iterator + Node runningNode(IncEdgeIt) const { + return INVALID; + } + + template + struct Constraints { + void constraints() { + checkConcept, Graph>(); + checkConcept, Graph>(); + } + }; + + }; + + } + +} + +#endif diff --git a/src/lemon/dfs.h b/src/lemon/dfs.h new file mode 100644 index 0000000..a003b69 --- /dev/null +++ b/src/lemon/dfs.h @@ -0,0 +1,1543 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_DFS_H +#define LEMON_DFS_H + +///\ingroup search +///\file +///\brief Dfs algorithm. + +#include +#include +#include +#include +#include +#include + +#include + +namespace lemon { + + + ///Default traits class of Dfs class. + + ///Default traits class of Dfs class. + ///\param GR Graph type. + template + struct DfsDefaultTraits + { + ///The graph type the algorithm runs on. + typedef GR Graph; + ///\brief The type of the map that stores the last + ///edges of the %DFS paths. + /// + ///The type of the map that stores the last + ///edges of the %DFS paths. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef typename Graph::template NodeMap PredMap; + ///Instantiates a PredMap. + + ///This function instantiates a \ref PredMap. + ///\param G is the graph, to which we would like to define the PredMap. + ///\todo The graph alone may be insufficient to initialize + static PredMap *createPredMap(const GR &G) + { + return new PredMap(G); + } + + ///The type of the map that indicates which nodes are processed. + + ///The type of the map that indicates which nodes are processed. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef NullMap ProcessedMap; + ///Instantiates a ProcessedMap. + + ///This function instantiates a \ref ProcessedMap. + ///\param g is the graph, to which + ///we would like to define the \ref ProcessedMap +#ifdef DOXYGEN + static ProcessedMap *createProcessedMap(const GR &g) +#else + static ProcessedMap *createProcessedMap(const GR &) +#endif + { + return new ProcessedMap(); + } + ///The type of the map that indicates which nodes are reached. + + ///The type of the map that indicates which nodes are reached. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + ///Instantiates a ReachedMap. + + ///This function instantiates a \ref ReachedMap. + ///\param G is the graph, to which + ///we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const GR &G) + { + return new ReachedMap(G); + } + ///The type of the map that stores the dists of the nodes. + + ///The type of the map that stores the dists of the nodes. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef typename Graph::template NodeMap DistMap; + ///Instantiates a DistMap. + + ///This function instantiates a \ref DistMap. + ///\param G is the graph, to which we would like to define the \ref DistMap + static DistMap *createDistMap(const GR &G) + { + return new DistMap(G); + } + }; + + ///%DFS algorithm class. + + ///\ingroup search + ///This class provides an efficient implementation of the %DFS algorithm. + /// + ///\param GR The graph type the algorithm runs on. The default value is + ///\ref ListGraph. The value of GR is not used directly by Dfs, it + ///is only passed to \ref DfsDefaultTraits. + ///\param TR Traits class to set various data types used by the algorithm. + ///The default traits class is + ///\ref DfsDefaultTraits "DfsDefaultTraits". + ///See \ref DfsDefaultTraits for the documentation of + ///a Dfs traits class. + /// + ///\author Jacint Szabo and Alpar Juttner +#ifdef DOXYGEN + template +#else + template > +#endif + class Dfs { + public: + /** + * \brief \ref Exception for uninitialized parameters. + * + * This error represents problems in the initialization + * of the parameters of the algorithms. + */ + class UninitializedParameter : public lemon::UninitializedParameter { + public: + virtual const char* what() const throw() { + return "lemon::Dfs::UninitializedParameter"; + } + }; + + typedef TR Traits; + ///The type of the underlying graph. + typedef typename TR::Graph Graph; + ///\e + typedef typename Graph::Node Node; + ///\e + typedef typename Graph::NodeIt NodeIt; + ///\e + typedef typename Graph::Edge Edge; + ///\e + typedef typename Graph::OutEdgeIt OutEdgeIt; + + ///\brief The type of the map that stores the last + ///edges of the %DFS paths. + typedef typename TR::PredMap PredMap; + ///The type of the map indicating which nodes are reached. + typedef typename TR::ReachedMap ReachedMap; + ///The type of the map indicating which nodes are processed. + typedef typename TR::ProcessedMap ProcessedMap; + ///The type of the map that stores the dists of the nodes. + typedef typename TR::DistMap DistMap; + private: + /// Pointer to the underlying graph. + const Graph *G; + ///Pointer to the map of predecessors edges. + PredMap *_pred; + ///Indicates if \ref _pred is locally allocated (\c true) or not. + bool local_pred; + ///Pointer to the map of distances. + DistMap *_dist; + ///Indicates if \ref _dist is locally allocated (\c true) or not. + bool local_dist; + ///Pointer to the map of reached status of the nodes. + ReachedMap *_reached; + ///Indicates if \ref _reached is locally allocated (\c true) or not. + bool local_reached; + ///Pointer to the map of processed status of the nodes. + ProcessedMap *_processed; + ///Indicates if \ref _processed is locally allocated (\c true) or not. + bool local_processed; + + std::vector _stack; + int _stack_head; + + ///Creates the maps if necessary. + + ///\todo Better memory allocation (instead of new). + void create_maps() + { + if(!_pred) { + local_pred = true; + _pred = Traits::createPredMap(*G); + } + if(!_dist) { + local_dist = true; + _dist = Traits::createDistMap(*G); + } + if(!_reached) { + local_reached = true; + _reached = Traits::createReachedMap(*G); + } + if(!_processed) { + local_processed = true; + _processed = Traits::createProcessedMap(*G); + } + } + + protected: + + Dfs() {} + + public: + + typedef Dfs Create; + + ///\name Named template parameters + + ///@{ + + template + struct DefPredMapTraits : public Traits { + typedef T PredMap; + static PredMap *createPredMap(const Graph &G) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///PredMap type + /// + ///\ref named-templ-param "Named parameter" for setting PredMap type + /// + template + struct DefPredMap : public Dfs > { + typedef Dfs > Create; + }; + + + template + struct DefDistMapTraits : public Traits { + typedef T DistMap; + static DistMap *createDistMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///DistMap type + /// + ///\ref named-templ-param "Named parameter" for setting DistMap + ///type + template + struct DefDistMap { + typedef Dfs > Create; + }; + + template + struct DefReachedMapTraits : public Traits { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///ReachedMap type + /// + ///\ref named-templ-param "Named parameter" for setting ReachedMap type + /// + template + struct DefReachedMap : public Dfs< Graph, DefReachedMapTraits > { + typedef Dfs< Graph, DefReachedMapTraits > Create; + }; + + template + struct DefProcessedMapTraits : public Traits { + typedef T ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &) + { + throw UninitializedParameter(); + } + }; + ///\brief \ref named-templ-param "Named parameter" for setting + ///ProcessedMap type + /// + ///\ref named-templ-param "Named parameter" for setting ProcessedMap type + /// + template + struct DefProcessedMap : public Dfs< Graph, DefProcessedMapTraits > { + typedef Dfs< Graph, DefProcessedMapTraits > Create; + }; + + struct DefGraphProcessedMapTraits : public Traits { + typedef typename Graph::template NodeMap ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &G) + { + return new ProcessedMap(G); + } + }; + ///\brief \ref named-templ-param "Named parameter" + ///for setting the ProcessedMap type to be Graph::NodeMap. + /// + ///\ref named-templ-param "Named parameter" + ///for setting the ProcessedMap type to be Graph::NodeMap. + ///If you don't set it explicitely, it will be automatically allocated. + template + class DefProcessedMapToBeDefaultMap : + public Dfs< Graph, DefGraphProcessedMapTraits> { + typedef Dfs< Graph, DefGraphProcessedMapTraits> Create; + }; + + ///@} + + public: + + ///Constructor. + + ///\param _G the graph the algorithm will run on. + /// + Dfs(const Graph& _G) : + G(&_G), + _pred(NULL), local_pred(false), + _dist(NULL), local_dist(false), + _reached(NULL), local_reached(false), + _processed(NULL), local_processed(false) + { } + + ///Destructor. + ~Dfs() + { + if(local_pred) delete _pred; + if(local_dist) delete _dist; + if(local_reached) delete _reached; + if(local_processed) delete _processed; + } + + ///Sets the map storing the predecessor edges. + + ///Sets the map storing the predecessor edges. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destuctor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Dfs &predMap(PredMap &m) + { + if(local_pred) { + delete _pred; + local_pred=false; + } + _pred = &m; + return *this; + } + + ///Sets the map storing the distances calculated by the algorithm. + + ///Sets the map storing the distances calculated by the algorithm. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destuctor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Dfs &distMap(DistMap &m) + { + if(local_dist) { + delete _dist; + local_dist=false; + } + _dist = &m; + return *this; + } + + ///Sets the map indicating if a node is reached. + + ///Sets the map indicating if a node is reached. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destuctor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Dfs &reachedMap(ReachedMap &m) + { + if(local_reached) { + delete _reached; + local_reached=false; + } + _reached = &m; + return *this; + } + + ///Sets the map indicating if a node is processed. + + ///Sets the map indicating if a node is processed. + ///If you don't use this function before calling \ref run(), + ///it will allocate one. The destuctor deallocates this + ///automatically allocated map, of course. + ///\return (*this) + Dfs &processedMap(ProcessedMap &m) + { + if(local_processed) { + delete _processed; + local_processed=false; + } + _processed = &m; + return *this; + } + + public: + ///\name Execution control + ///The simplest way to execute the algorithm is to use + ///one of the member functions called \c run(...). + ///\n + ///If you need more control on the execution, + ///first you must call \ref init(), then you can add a source node + ///with \ref addSource(). + ///Finally \ref start() will perform the actual path + ///computation. + + ///@{ + + ///Initializes the internal data structures. + + ///Initializes the internal data structures. + /// + void init() + { + create_maps(); + _stack.resize(countNodes(*G)); + _stack_head=-1; + for ( NodeIt u(*G) ; u!=INVALID ; ++u ) { + _pred->set(u,INVALID); + // _predNode->set(u,INVALID); + _reached->set(u,false); + _processed->set(u,false); + } + } + + ///Adds a new source node. + + ///Adds a new source node to the set of nodes to be processed. + /// + ///\warning dists are wrong (or at least strange) + ///in case of multiple sources. + void addSource(Node s) + { + if(!(*_reached)[s]) + { + _reached->set(s,true); + _pred->set(s,INVALID); + OutEdgeIt e(*G,s); + if(e!=INVALID) { + _stack[++_stack_head]=e; + _dist->set(s,_stack_head); + } + else { + _processed->set(s,true); + _dist->set(s,0); + } + } + } + + ///Processes the next edge. + + ///Processes the next edge. + /// + ///\return The processed edge. + /// + ///\pre The stack must not be empty! + Edge processNextEdge() + { + Node m; + Edge e=_stack[_stack_head]; + if(!(*_reached)[m=G->target(e)]) { + _pred->set(m,e); + _reached->set(m,true); + ++_stack_head; + _stack[_stack_head] = OutEdgeIt(*G, m); + _dist->set(m,_stack_head); + } + else { + m=G->source(e); + ++_stack[_stack_head]; + } + while(_stack_head>=0 && _stack[_stack_head]==INVALID) { + _processed->set(m,true); + --_stack_head; + if(_stack_head>=0) { + m=G->source(_stack[_stack_head]); + ++_stack[_stack_head]; + } + } + return e; + } + ///Next edge to be processed. + + ///Next edge to be processed. + /// + ///\return The next edge to be processed or INVALID if the stack is + /// empty. + OutEdgeIt nextEdge() + { + return _stack_head>=0?_stack[_stack_head]:INVALID; + } + + ///\brief Returns \c false if there are nodes + ///to be processed in the queue + /// + ///Returns \c false if there are nodes + ///to be processed in the queue + bool emptyQueue() { return _stack_head<0; } + ///Returns the number of the nodes to be processed. + + ///Returns the number of the nodes to be processed in the queue. + int queueSize() { return _stack_head+1; } + + ///Executes the algorithm. + + ///Executes the algorithm. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///This method runs the %DFS algorithm from the root node(s) + ///in order to + ///compute the + ///%DFS path to each node. The algorithm computes + ///- The %DFS tree. + ///- The distance of each node from the root(s) in the %DFS tree. + /// + void start() + { + while ( !emptyQueue() ) processNextEdge(); + } + + ///Executes the algorithm until \c dest is reached. + + ///Executes the algorithm until \c dest is reached. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///This method runs the %DFS algorithm from the root node(s) + ///in order to + ///compute the + ///%DFS path to \c dest. The algorithm computes + ///- The %DFS path to \c dest. + ///- The distance of \c dest from the root(s) in the %DFS tree. + /// + void start(Node dest) + { + while ( !emptyQueue() && G->target(_stack[_stack_head])!=dest ) + processNextEdge(); + } + + ///Executes the algorithm until a condition is met. + + ///Executes the algorithm until a condition is met. + /// + ///\pre init() must be called and at least one node should be added + ///with addSource() before using this function. + /// + ///\param em must be a bool (or convertible) edge map. The algorithm + ///will stop when it reaches an edge \c e with em[e] true. + /// + ///\return The reached edge \c e with em[e] true or + ///\c INVALID if no such edge was found. + /// + ///\warning Contrary to \ref Bfs and \ref Dijkstra, \c em is an edge map, + ///not a node map. + template + Edge start(const EM &em) + { + while ( !emptyQueue() && !em[_stack[_stack_head]] ) + processNextEdge(); + return emptyQueue() ? INVALID : _stack[_stack_head]; + } + + ///Runs %DFS algorithm to visit all nodes in the graph. + + ///This method runs the %DFS algorithm in order to + ///compute the + ///%DFS path to each node. The algorithm computes + ///- The %DFS tree. + ///- The distance of each node from the root in the %DFS tree. + /// + ///\note d.run() is just a shortcut of the following code. + ///\code + /// d.init(); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!d.reached(it)) { + /// d.addSource(it); + /// d.start(); + /// } + /// } + ///\endcode + void run() { + init(); + for (NodeIt it(*G); it != INVALID; ++it) { + if (!reached(it)) { + addSource(it); + start(); + } + } + } + + ///Runs %DFS algorithm from node \c s. + + ///This method runs the %DFS algorithm from a root node \c s + ///in order to + ///compute the + ///%DFS path to each node. The algorithm computes + ///- The %DFS tree. + ///- The distance of each node from the root in the %DFS tree. + /// + ///\note d.run(s) is just a shortcut of the following code. + ///\code + /// d.init(); + /// d.addSource(s); + /// d.start(); + ///\endcode + void run(Node s) { + init(); + addSource(s); + start(); + } + + ///Finds the %DFS path between \c s and \c t. + + ///Finds the %DFS path between \c s and \c t. + /// + ///\return The length of the %DFS s---t path if there exists one, + ///0 otherwise. + ///\note Apart from the return value, d.run(s,t) is + ///just a shortcut of the following code. + ///\code + /// d.init(); + /// d.addSource(s); + /// d.start(t); + ///\endcode + int run(Node s,Node t) { + init(); + addSource(s); + start(t); + return reached(t)?_stack_head+1:0; + } + + ///@} + + ///\name Query Functions + ///The result of the %DFS algorithm can be obtained using these + ///functions.\n + ///Before the use of these functions, + ///either run() or start() must be called. + + ///@{ + + typedef PredMapPath Path; + + ///Gives back the shortest path. + + ///Gives back the shortest path. + ///\pre The \c t should be reachable from the source. + Path path(Node t) + { + return Path(*G, *_pred, t); + } + + ///The distance of a node from the root(s). + + ///Returns the distance of a node from the root(s). + ///\pre \ref run() must be called before using this function. + ///\warning If node \c v is unreachable from the root(s) then the return + ///value of this funcion is undefined. + int dist(Node v) const { return (*_dist)[v]; } + + ///Returns the 'previous edge' of the %DFS tree. + + ///For a node \c v it returns the 'previous edge' + ///of the %DFS path, + ///i.e. it returns the last edge of a %DFS path from the root(s) to \c + ///v. It is \ref INVALID + ///if \c v is unreachable from the root(s) or \c v is a root. The + ///%DFS tree used here is equal to the %DFS tree used in + ///\ref predNode(). + ///\pre Either \ref run() or \ref start() must be called before using + ///this function. + Edge predEdge(Node v) const { return (*_pred)[v];} + + ///Returns the 'previous node' of the %DFS tree. + + ///For a node \c v it returns the 'previous node' + ///of the %DFS tree, + ///i.e. it returns the last but one node from a %DFS path from the + ///root(s) to \c v. + ///It is INVALID if \c v is unreachable from the root(s) or + ///if \c v itself a root. + ///The %DFS tree used here is equal to the %DFS + ///tree used in \ref predEdge(). + ///\pre Either \ref run() or \ref start() must be called before + ///using this function. + Node predNode(Node v) const { return (*_pred)[v]==INVALID ? INVALID: + G->source((*_pred)[v]); } + + ///Returns a reference to the NodeMap of distances. + + ///Returns a reference to the NodeMap of distances. + ///\pre Either \ref run() or \ref init() must + ///be called before using this function. + const DistMap &distMap() const { return *_dist;} + + ///Returns a reference to the %DFS edge-tree map. + + ///Returns a reference to the NodeMap of the edges of the + ///%DFS tree. + ///\pre Either \ref run() or \ref init() + ///must be called before using this function. + const PredMap &predMap() const { return *_pred;} + + ///Checks if a node is reachable from the root. + + ///Returns \c true if \c v is reachable from the root(s). + ///\warning The source nodes are inditated as unreachable. + ///\pre Either \ref run() or \ref start() + ///must be called before using this function. + /// + bool reached(Node v) { return (*_reached)[v]; } + + ///@} + }; + + ///Default traits class of Dfs function. + + ///Default traits class of Dfs function. + ///\param GR Graph type. + template + struct DfsWizardDefaultTraits + { + ///The graph type the algorithm runs on. + typedef GR Graph; + ///\brief The type of the map that stores the last + ///edges of the %DFS paths. + /// + ///The type of the map that stores the last + ///edges of the %DFS paths. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef NullMap PredMap; + ///Instantiates a PredMap. + + ///This function instantiates a \ref PredMap. + ///\param g is the graph, to which we would like to define the PredMap. + ///\todo The graph alone may be insufficient to initialize +#ifdef DOXYGEN + static PredMap *createPredMap(const GR &g) +#else + static PredMap *createPredMap(const GR &) +#endif + { + return new PredMap(); + } + + ///The type of the map that indicates which nodes are processed. + + ///The type of the map that indicates which nodes are processed. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef NullMap ProcessedMap; + ///Instantiates a ProcessedMap. + + ///This function instantiates a \ref ProcessedMap. + ///\param g is the graph, to which + ///we would like to define the \ref ProcessedMap +#ifdef DOXYGEN + static ProcessedMap *createProcessedMap(const GR &g) +#else + static ProcessedMap *createProcessedMap(const GR &) +#endif + { + return new ProcessedMap(); + } + ///The type of the map that indicates which nodes are reached. + + ///The type of the map that indicates which nodes are reached. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + ///\todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + ///Instantiates a ReachedMap. + + ///This function instantiates a \ref ReachedMap. + ///\param G is the graph, to which + ///we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const GR &G) + { + return new ReachedMap(G); + } + ///The type of the map that stores the dists of the nodes. + + ///The type of the map that stores the dists of the nodes. + ///It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// + typedef NullMap DistMap; + ///Instantiates a DistMap. + + ///This function instantiates a \ref DistMap. + ///\param g is the graph, to which we would like to define the \ref DistMap +#ifdef DOXYGEN + static DistMap *createDistMap(const GR &g) +#else + static DistMap *createDistMap(const GR &) +#endif + { + return new DistMap(); + } + }; + + /// Default traits used by \ref DfsWizard + + /// To make it easier to use Dfs algorithm + ///we have created a wizard class. + /// This \ref DfsWizard class needs default traits, + ///as well as the \ref Dfs class. + /// The \ref DfsWizardBase is a class to be the default traits of the + /// \ref DfsWizard class. + template + class DfsWizardBase : public DfsWizardDefaultTraits + { + + typedef DfsWizardDefaultTraits Base; + protected: + /// Type of the nodes in the graph. + typedef typename Base::Graph::Node Node; + + /// Pointer to the underlying graph. + void *_g; + ///Pointer to the map of reached nodes. + void *_reached; + ///Pointer to the map of processed nodes. + void *_processed; + ///Pointer to the map of predecessors edges. + void *_pred; + ///Pointer to the map of distances. + void *_dist; + ///Pointer to the source node. + Node _source; + + public: + /// Constructor. + + /// This constructor does not require parameters, therefore it initiates + /// all of the attributes to default values (0, INVALID). + DfsWizardBase() : _g(0), _reached(0), _processed(0), _pred(0), + _dist(0), _source(INVALID) {} + + /// Constructor. + + /// This constructor requires some parameters, + /// listed in the parameters list. + /// Others are initiated to 0. + /// \param g is the initial value of \ref _g + /// \param s is the initial value of \ref _source + DfsWizardBase(const GR &g, Node s=INVALID) : + _g(reinterpret_cast(const_cast(&g))), + _reached(0), _processed(0), _pred(0), _dist(0), _source(s) {} + + }; + + /// A class to make the usage of the Dfs algorithm easier + + /// This class is created to make it easier to use the Dfs algorithm. + /// It uses the functions and features of the plain \ref Dfs, + /// but it is much simpler to use it. + /// + /// Simplicity means that the way to change the types defined + /// in the traits class is based on functions that returns the new class + /// and not on templatable built-in classes. + /// When using the plain \ref Dfs + /// the new class with the modified type comes from + /// the original class by using the :: + /// operator. In the case of \ref DfsWizard only + /// a function have to be called and it will + /// return the needed class. + /// + /// It does not have own \ref run method. When its \ref run method is called + /// it initiates a plain \ref Dfs object, and calls the \ref Dfs::run + /// method of it. + template + class DfsWizard : public TR + { + typedef TR Base; + + ///The type of the underlying graph. + typedef typename TR::Graph Graph; + //\e + typedef typename Graph::Node Node; + //\e + typedef typename Graph::NodeIt NodeIt; + //\e + typedef typename Graph::Edge Edge; + //\e + typedef typename Graph::OutEdgeIt OutEdgeIt; + + ///\brief The type of the map that stores + ///the reached nodes + typedef typename TR::ReachedMap ReachedMap; + ///\brief The type of the map that stores + ///the processed nodes + typedef typename TR::ProcessedMap ProcessedMap; + ///\brief The type of the map that stores the last + ///edges of the %DFS paths. + typedef typename TR::PredMap PredMap; + ///The type of the map that stores the distances of the nodes. + typedef typename TR::DistMap DistMap; + + public: + /// Constructor. + DfsWizard() : TR() {} + + /// Constructor that requires parameters. + + /// Constructor that requires parameters. + /// These parameters will be the default values for the traits class. + DfsWizard(const Graph &g, Node s=INVALID) : + TR(g,s) {} + + ///Copy constructor + DfsWizard(const TR &b) : TR(b) {} + + ~DfsWizard() {} + + ///Runs Dfs algorithm from a given node. + + ///Runs Dfs algorithm from a given node. + ///The node can be given by the \ref source function. + void run() + { + if(Base::_source==INVALID) throw UninitializedParameter(); + Dfs alg(*reinterpret_cast(Base::_g)); + if(Base::_reached) + alg.reachedMap(*reinterpret_cast(Base::_reached)); + if(Base::_processed) + alg.processedMap(*reinterpret_cast(Base::_processed)); + if(Base::_pred) + alg.predMap(*reinterpret_cast(Base::_pred)); + if(Base::_dist) + alg.distMap(*reinterpret_cast(Base::_dist)); + alg.run(Base::_source); + } + + ///Runs Dfs algorithm from the given node. + + ///Runs Dfs algorithm from the given node. + ///\param s is the given source. + void run(Node s) + { + Base::_source=s; + run(); + } + + template + struct DefPredMapBase : public Base { + typedef T PredMap; + static PredMap *createPredMap(const Graph &) { return 0; }; + DefPredMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting PredMap type + /// + /// \ref named-templ-param "Named parameter" + ///function for setting PredMap type + /// + template + DfsWizard > predMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return DfsWizard >(*this); + } + + + template + struct DefReachedMapBase : public Base { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &) { return 0; }; + DefReachedMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting ReachedMap + /// + /// \ref named-templ-param "Named parameter" + ///function for setting ReachedMap + /// + template + DfsWizard > reachedMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return DfsWizard >(*this); + } + + + template + struct DefProcessedMapBase : public Base { + typedef T ProcessedMap; + static ProcessedMap *createProcessedMap(const Graph &) { return 0; }; + DefProcessedMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting ProcessedMap + /// + /// \ref named-templ-param "Named parameter" + ///function for setting ProcessedMap + /// + template + DfsWizard > processedMap(const T &t) + { + Base::_pred=reinterpret_cast(const_cast(&t)); + return DfsWizard >(*this); + } + + template + struct DefDistMapBase : public Base { + typedef T DistMap; + static DistMap *createDistMap(const Graph &) { return 0; }; + DefDistMapBase(const TR &b) : TR(b) {} + }; + + ///\brief \ref named-templ-param "Named parameter" + ///function for setting DistMap type + /// + /// \ref named-templ-param "Named parameter" + ///function for setting DistMap type + /// + template + DfsWizard > distMap(const T &t) + { + Base::_dist=reinterpret_cast(const_cast(&t)); + return DfsWizard >(*this); + } + + /// Sets the source node, from which the Dfs algorithm runs. + + /// Sets the source node, from which the Dfs algorithm runs. + /// \param s is the source node. + DfsWizard &source(Node s) + { + Base::_source=s; + return *this; + } + + }; + + ///Function type interface for Dfs algorithm. + + ///\ingroup search + ///Function type interface for Dfs algorithm. + /// + ///This function also has several + ///\ref named-templ-func-param "named parameters", + ///they are declared as the members of class \ref DfsWizard. + ///The following + ///example shows how to use these parameters. + ///\code + /// dfs(g,source).predMap(preds).run(); + ///\endcode + ///\warning Don't forget to put the \ref DfsWizard::run() "run()" + ///to the end of the parameter list. + ///\sa DfsWizard + ///\sa Dfs + template + DfsWizard > + dfs(const GR &g,typename GR::Node s=INVALID) + { + return DfsWizard >(g,s); + } + +#ifdef DOXYGEN + /// \brief Visitor class for dfs. + /// + /// It gives a simple interface for a functional interface for dfs + /// traversal. The traversal on a linear data structure. + template + struct DfsVisitor { + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + /// \brief Called when the edge reach a node. + /// + /// It is called when the dfs find an edge which target is not + /// reached yet. + void discover(const Edge& edge) {} + /// \brief Called when the node reached first time. + /// + /// It is Called when the node reached first time. + void reach(const Node& node) {} + /// \brief Called when we step back on an edge. + /// + /// It is called when the dfs should step back on the edge. + void backtrack(const Edge& edge) {} + /// \brief Called when we step back from the node. + /// + /// It is called when we step back from the node. + void leave(const Node& node) {} + /// \brief Called when the edge examined but target of the edge + /// already discovered. + /// + /// It called when the edge examined but the target of the edge + /// already discovered. + void examine(const Edge& edge) {} + /// \brief Called for the source node of the dfs. + /// + /// It is called for the source node of the dfs. + void start(const Node& node) {} + /// \brief Called when we leave the source node of the dfs. + /// + /// It is called when we leave the source node of the dfs. + void stop(const Node& node) {} + + }; +#else + template + struct DfsVisitor { + typedef _Graph Graph; + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + void discover(const Edge&) {} + void reach(const Node&) {} + void backtrack(const Edge&) {} + void leave(const Node&) {} + void examine(const Edge&) {} + void start(const Node&) {} + void stop(const Node&) {} + + template + struct Constraints { + void constraints() { + Edge edge; + Node node; + visitor.discover(edge); + visitor.reach(node); + visitor.backtrack(edge); + visitor.leave(node); + visitor.examine(edge); + visitor.start(node); + visitor.stop(edge); + } + _Visitor& visitor; + }; + }; +#endif + + /// \brief Default traits class of DfsVisit class. + /// + /// Default traits class of DfsVisit class. + /// \param _Graph Graph type. + template + struct DfsVisitDefaultTraits { + + /// \brief The graph type the algorithm runs on. + typedef _Graph Graph; + + /// \brief The type of the map that indicates which nodes are reached. + /// + /// The type of the map that indicates which nodes are reached. + /// It must meet the \ref concepts::WriteMap "WriteMap" concept. + /// \todo named parameter to set this type, function to read and write. + typedef typename Graph::template NodeMap ReachedMap; + + /// \brief Instantiates a ReachedMap. + /// + /// This function instantiates a \ref ReachedMap. + /// \param graph is the graph, to which + /// we would like to define the \ref ReachedMap. + static ReachedMap *createReachedMap(const Graph &graph) { + return new ReachedMap(graph); + } + + }; + + /// %DFS Visit algorithm class. + + /// \ingroup search + /// This class provides an efficient implementation of the %DFS algorithm + /// with visitor interface. + /// + /// The %DfsVisit class provides an alternative interface to the Dfs + /// class. It works with callback mechanism, the DfsVisit object calls + /// on every dfs event the \c Visitor class member functions. + /// + /// \param _Graph The graph type the algorithm runs on. The default value is + /// \ref ListGraph. The value of _Graph is not used directly by Dfs, it + /// is only passed to \ref DfsDefaultTraits. + /// \param _Visitor The Visitor object for the algorithm. The + /// \ref DfsVisitor "DfsVisitor<_Graph>" is an empty Visitor which + /// does not observe the Dfs events. If you want to observe the dfs + /// events you should implement your own Visitor class. + /// \param _Traits Traits class to set various data types used by the + /// algorithm. The default traits class is + /// \ref DfsVisitDefaultTraits "DfsVisitDefaultTraits<_Graph>". + /// See \ref DfsVisitDefaultTraits for the documentation of + /// a Dfs visit traits class. + /// + /// \author Jacint Szabo, Alpar Juttner and Balazs Dezso +#ifdef DOXYGEN + template +#else + template , + typename _Traits = DfsDefaultTraits<_Graph> > +#endif + class DfsVisit { + public: + + /// \brief \ref Exception for uninitialized parameters. + /// + /// This error represents problems in the initialization + /// of the parameters of the algorithms. + class UninitializedParameter : public lemon::UninitializedParameter { + public: + virtual const char* what() const throw() + { + return "lemon::DfsVisit::UninitializedParameter"; + } + }; + + typedef _Traits Traits; + + typedef typename Traits::Graph Graph; + + typedef _Visitor Visitor; + + ///The type of the map indicating which nodes are reached. + typedef typename Traits::ReachedMap ReachedMap; + + private: + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + typedef typename Graph::OutEdgeIt OutEdgeIt; + + /// Pointer to the underlying graph. + const Graph *_graph; + /// Pointer to the visitor object. + Visitor *_visitor; + ///Pointer to the map of reached status of the nodes. + ReachedMap *_reached; + ///Indicates if \ref _reached is locally allocated (\c true) or not. + bool local_reached; + + std::vector _stack; + int _stack_head; + + /// \brief Creates the maps if necessary. + /// + /// Creates the maps if necessary. + void create_maps() { + if(!_reached) { + local_reached = true; + _reached = Traits::createReachedMap(*_graph); + } + } + + protected: + + DfsVisit() {} + + public: + + typedef DfsVisit Create; + + /// \name Named template parameters + + ///@{ + template + struct DefReachedMapTraits : public Traits { + typedef T ReachedMap; + static ReachedMap *createReachedMap(const Graph &graph) { + throw UninitializedParameter(); + } + }; + /// \brief \ref named-templ-param "Named parameter" for setting + /// ReachedMap type + /// + /// \ref named-templ-param "Named parameter" for setting ReachedMap type + template + struct DefReachedMap : public DfsVisit< Graph, Visitor, + DefReachedMapTraits > { + typedef DfsVisit< Graph, Visitor, DefReachedMapTraits > Create; + }; + ///@} + + public: + + /// \brief Constructor. + /// + /// Constructor. + /// + /// \param graph the graph the algorithm will run on. + /// \param visitor The visitor of the algorithm. + /// + DfsVisit(const Graph& graph, Visitor& visitor) + : _graph(&graph), _visitor(&visitor), + _reached(0), local_reached(false) {} + + /// \brief Destructor. + /// + /// Destructor. + ~DfsVisit() { + if(local_reached) delete _reached; + } + + /// \brief Sets the map indicating if a node is reached. + /// + /// Sets the map indicating if a node is reached. + /// If you don't use this function before calling \ref run(), + /// it will allocate one. The destuctor deallocates this + /// automatically allocated map, of course. + /// \return (*this) + DfsVisit &reachedMap(ReachedMap &m) { + if(local_reached) { + delete _reached; + local_reached=false; + } + _reached = &m; + return *this; + } + + public: + /// \name Execution control + /// The simplest way to execute the algorithm is to use + /// one of the member functions called \c run(...). + /// \n + /// If you need more control on the execution, + /// first you must call \ref init(), then you can adda source node + /// with \ref addSource(). + /// Finally \ref start() will perform the actual path + /// computation. + + /// @{ + /// \brief Initializes the internal data structures. + /// + /// Initializes the internal data structures. + /// + void init() { + create_maps(); + _stack.resize(countNodes(*_graph)); + _stack_head = -1; + for (NodeIt u(*_graph) ; u != INVALID ; ++u) { + _reached->set(u, false); + } + } + + /// \brief Adds a new source node. + /// + /// Adds a new source node to the set of nodes to be processed. + void addSource(Node s) { + if(!(*_reached)[s]) { + _reached->set(s,true); + _visitor->start(s); + _visitor->reach(s); + Edge e; + _graph->firstOut(e, s); + if (e != INVALID) { + _stack[++_stack_head] = e; + } else { + _visitor->leave(s); + } + } + } + + /// \brief Processes the next edge. + /// + /// Processes the next edge. + /// + /// \return The processed edge. + /// + /// \pre The stack must not be empty! + Edge processNextEdge() { + Edge e = _stack[_stack_head]; + Node m = _graph->target(e); + if(!(*_reached)[m]) { + _visitor->discover(e); + _visitor->reach(m); + _reached->set(m, true); + _graph->firstOut(_stack[++_stack_head], m); + } else { + _visitor->examine(e); + m = _graph->source(e); + _graph->nextOut(_stack[_stack_head]); + } + while (_stack_head>=0 && _stack[_stack_head] == INVALID) { + _visitor->leave(m); + --_stack_head; + if (_stack_head >= 0) { + _visitor->backtrack(_stack[_stack_head]); + m = _graph->source(_stack[_stack_head]); + _graph->nextOut(_stack[_stack_head]); + } else { + _visitor->stop(m); + } + } + return e; + } + + /// \brief Next edge to be processed. + /// + /// Next edge to be processed. + /// + /// \return The next edge to be processed or INVALID if the stack is + /// empty. + Edge nextEdge() { + return _stack_head >= 0 ? _stack[_stack_head] : INVALID; + } + + /// \brief Returns \c false if there are nodes + /// to be processed in the queue + /// + /// Returns \c false if there are nodes + /// to be processed in the queue + bool emptyQueue() { return _stack_head < 0; } + + /// \brief Returns the number of the nodes to be processed. + /// + /// Returns the number of the nodes to be processed in the queue. + int queueSize() { return _stack_head + 1; } + + /// \brief Executes the algorithm. + /// + /// Executes the algorithm. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + void start() { + while ( !emptyQueue() ) processNextEdge(); + } + + /// \brief Executes the algorithm until \c dest is reached. + /// + /// Executes the algorithm until \c dest is reached. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + void start(Node dest) { + while ( !emptyQueue() && _graph->target(_stack[_stack_head]) != dest ) + processNextEdge(); + } + + /// \brief Executes the algorithm until a condition is met. + /// + /// Executes the algorithm until a condition is met. + /// + /// \pre init() must be called and at least one node should be added + /// with addSource() before using this function. + /// + /// \param em must be a bool (or convertible) edge map. The algorithm + /// will stop when it reaches an edge \c e with em[e] true. + /// + ///\return The reached edge \c e with em[e] true or + ///\c INVALID if no such edge was found. + /// + /// \warning Contrary to \ref Bfs and \ref Dijkstra, \c em is an edge map, + /// not a node map. + template + Edge start(const EM &em) { + while ( !emptyQueue() && !em[_stack[_stack_head]] ) + processNextEdge(); + return emptyQueue() ? INVALID : _stack[_stack_head]; + } + + /// \brief Runs %DFSVisit algorithm from node \c s. + /// + /// This method runs the %DFS algorithm from a root node \c s. + /// \note d.run(s) is just a shortcut of the following code. + ///\code + /// d.init(); + /// d.addSource(s); + /// d.start(); + ///\endcode + void run(Node s) { + init(); + addSource(s); + start(); + } + + /// \brief Runs %DFSVisit algorithm to visit all nodes in the graph. + + /// This method runs the %DFS algorithm in order to + /// compute the %DFS path to each node. The algorithm computes + /// - The %DFS tree. + /// - The distance of each node from the root in the %DFS tree. + /// + ///\note d.run() is just a shortcut of the following code. + ///\code + /// d.init(); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!d.reached(it)) { + /// d.addSource(it); + /// d.start(); + /// } + /// } + ///\endcode + void run() { + init(); + for (NodeIt it(*_graph); it != INVALID; ++it) { + if (!reached(it)) { + addSource(it); + start(); + } + } + } + ///@} + + /// \name Query Functions + /// The result of the %DFS algorithm can be obtained using these + /// functions.\n + /// Before the use of these functions, + /// either run() or start() must be called. + ///@{ + /// \brief Checks if a node is reachable from the root. + /// + /// Returns \c true if \c v is reachable from the root(s). + /// \warning The source nodes are inditated as unreachable. + /// \pre Either \ref run() or \ref start() + /// must be called before using this function. + /// + bool reached(Node v) { return (*_reached)[v]; } + ///@} + }; + + +} //END OF NAMESPACE LEMON + +#endif + diff --git a/src/lemon/error.h b/src/lemon/error.h new file mode 100644 index 0000000..01931d8 --- /dev/null +++ b/src/lemon/error.h @@ -0,0 +1,683 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_ERROR_H +#define LEMON_ERROR_H + +//! \ingroup exceptions +//! \file +//! \brief Basic exception classes and error handling. + +#include +#include +#include +#include +#include +#include + +namespace lemon { + + /// \addtogroup exceptions + /// @{ + + /// \brief Exception safe wrapper class. + /// + /// Exception safe wrapper class to implement the members of exceptions. + template + class ExceptionMember { + public: + typedef _Type Type; + + ExceptionMember() throw () { + try { + ptr.reset(new Type()); + } catch (...) {} + } + + ExceptionMember(const Type& type) throw () { + try { + ptr.reset(new Type()); + if (ptr.get() == 0) return; + *ptr = type; + } catch (...) {} + } + + ExceptionMember(const ExceptionMember& copy) throw() { + try { + if (!copy.valid()) return; + ptr.reset(new Type()); + if (ptr.get() == 0) return; + *ptr = copy.get(); + } catch (...) {} + } + + ExceptionMember& operator=(const ExceptionMember& copy) { + if (ptr.get() == 0) return; + try { + if (!copy.valid()) return; + *ptr = copy.get(); + } catch (...) {} + } + + void set(const Type& type) { + if (ptr.get() == 0) return; + try { + *ptr = type; + } catch (...) {} + } + + const Type& get() const { + return *ptr; + } + + bool valid() const { + return ptr.get() != 0; + } + + private: + std::auto_ptr<_Type> ptr; + }; + + /// Exception-safe convenient "error message" class. + + /// Helper class which provides a convenient ostream-like (operator << + /// based) interface to create a string message. Mostly useful in + /// exception classes (therefore the name). + class ErrorMessage { + protected: + ///\e + + ///\todo The good solution is boost::shared_ptr... + /// + mutable + std::auto_ptr buf; + + ///\e + bool init() throw() { + try { + buf.reset(new std::ostringstream); + } + catch(...) { + buf.reset(); + } + return buf.get(); + } + + public: + + ///\e + ErrorMessage() throw() { init(); } + + ErrorMessage(const ErrorMessage& em) throw() : buf(em.buf) { } + + ///\e + ErrorMessage(const char *msg) throw() { + init(); + *this << msg; + } + + ///\e + ErrorMessage(const std::string &msg) throw() { + init(); + *this << msg; + } + + ///\e + template + ErrorMessage& operator<<(const T &t) throw() { + if( ! buf.get() ) return *this; + + try { + *buf << t; + } + catch(...) { + buf.reset(); + } + return *this; + } + + ///\e + const char* message() throw() { + if( ! buf.get() ) return 0; + + const char* mes = 0; + try { + mes = buf->str().c_str(); + } + catch(...) {} + return mes; + } + + }; + + /** + * \brief Generic exception class. + * + * Base class for exceptions used in LEMON. + */ + class Exception : public std::exception { + public: + ///\e + Exception() {} + ///\e + virtual ~Exception() throw() {} + ///\e + virtual const char* what() const throw() { + return "lemon::Exception"; + } + }; + + /** + * \brief One of the two main subclasses of \ref Exception. + * + * Logic errors represent problems in the internal logic of a program; + * in theory, these are preventable, and even detectable before the + * program runs (e.g., violations of class invariants). + * + * A typical example for this is \ref UninitializedParameter. + */ + class LogicError : public Exception { + public: + virtual const char* what() const throw() { + return "lemon::LogicError"; + } + }; + + /** + * \brief \ref Exception for uninitialized parameters. + * + * This error represents problems in the initialization + * of the parameters of the algorithms. + */ + class UninitializedParameter : public LogicError { + public: + virtual const char* what() const throw() { + return "lemon::UninitializedParameter"; + } + }; + + + /** + * \brief One of the two main subclasses of \ref Exception. + * + * Runtime errors represent problems outside the scope of a program; + * they cannot be easily predicted and can generally only be caught as + * the program executes. + */ + class RuntimeError : public Exception { + public: + virtual const char* what() const throw() { + return "lemon::RuntimeError"; + } + }; + + ///\e + class RangeError : public RuntimeError { + public: + virtual const char* what() const throw() { + return "lemon::RangeError"; + } + }; + + ///\e + class IoError : public RuntimeError { + public: + virtual const char* what() const throw() { + return "lemon::IoError"; + } + }; + + ///\e + class DataFormatError : public IoError { + protected: + ExceptionMember _message; + ExceptionMember _file; + int _line; + + mutable ExceptionMember _message_holder; + public: + + DataFormatError(const DataFormatError &dfe) : + IoError(dfe), _message(dfe._message), _file(dfe._file), + _line(dfe._line) {} + + ///\e + explicit DataFormatError(const char *the_message) + : _message(the_message), _line(0) {} + + ///\e + DataFormatError(const std::string &file_name, int line_num, + const char *the_message) + : _message(the_message), _line(line_num) { file(file_name); } + + ///\e + void line(int ln) { _line = ln; } + ///\e + void message(const std::string& msg) { _message.set(msg); } + ///\e + void file(const std::string &fl) { _file.set(fl); } + + ///\e + int line() const { return _line; } + ///\e + const char* message() const { + if (_message.valid() && !_message.get().empty()) { + return _message.get().c_str(); + } else { + return 0; + } + } + + /// \brief Returns the filename. + /// + /// Returns \e null if the filename was not specified. + const char* file() const { + if (_file.valid() && !_file.get().empty()) { + return _file.get().c_str(); + } else { + return 0; + } + } + + ///\e + virtual const char* what() const throw() { + try { + std::ostringstream ostr; + ostr << "lemon:DataFormatError" << ": "; + if (message()) ostr << message(); + if( file() || line() != 0 ) { + ostr << " ("; + if( file() ) ostr << "in file '" << file() << "'"; + if( file() && line() != 0 ) ostr << " "; + if( line() != 0 ) ostr << "at line " << line(); + ostr << ")"; + } + _message_holder.set(ostr.str()); + } + catch (...) {} + if( _message_holder.valid()) return _message_holder.get().c_str(); + return "lemon:DataFormatError"; + } + + virtual ~DataFormatError() throw() {} + }; + + ///\e + class FileOpenError : public IoError { + protected: + ExceptionMember _file; + + mutable ExceptionMember _message_holder; + public: + + FileOpenError(const FileOpenError &foe) : + IoError(foe), _file(foe._file) {} + + ///\e + explicit FileOpenError(const std::string& fl) + : _file(fl) {} + + + ///\e + void file(const std::string &fl) { _file.set(fl); } + + /// \brief Returns the filename. + /// + /// Returns \e null if the filename was not specified. + const char* file() const { + if (_file.valid() && !_file.get().empty()) { + return _file.get().c_str(); + } else { + return 0; + } + } + + ///\e + virtual const char* what() const throw() { + try { + std::ostringstream ostr; + ostr << "lemon::FileOpenError" << ": "; + ostr << "Cannot open file - " << file(); + _message_holder.set(ostr.str()); + } + catch (...) {} + if( _message_holder.valid()) return _message_holder.get().c_str(); + return "lemon::FileOpenError"; + } + virtual ~FileOpenError() throw() {} + }; + + class IoParameterError : public IoError { + protected: + ExceptionMember _message; + ExceptionMember _file; + + mutable ExceptionMember _message_holder; + public: + + IoParameterError(const IoParameterError &ile) : + IoError(ile), _message(ile._message), _file(ile._file) {} + + ///\e + explicit IoParameterError(const char *the_message) + : _message(the_message) {} + + ///\e + IoParameterError(const char *file_name, const char *the_message) + : _message(the_message), _file(file_name) {} + + ///\e + void message(const std::string& msg) { _message.set(msg); } + ///\e + void file(const std::string &fl) { _file.set(fl); } + + ///\e + const char* message() const { + if (_message.valid()) { + return _message.get().c_str(); + } else { + return 0; + } + } + + /// \brief Returns the filename. + /// + /// Returns \e null if the filename was not specified. + const char* file() const { + if (_file.valid()) { + return _file.get().c_str(); + } else { + return 0; + } + } + + ///\e + virtual const char* what() const throw() { + try { + std::ostringstream ostr; + if (message()) ostr << message(); + if (file()) ostr << "(when reading file '" << file() << "')"; + _message_holder.set(ostr.str()); + } + catch (...) {} + if( _message_holder.valid() ) return _message_holder.get().c_str(); + return "lemon:IoParameterError"; + } + virtual ~IoParameterError() throw() {} + }; + + + ///\e + class AssertionFailedError : public LogicError { + protected: + const char *assertion; + const char *file; + int line; + const char *function; + const char *message; + + mutable ExceptionMember _message_holder; + public: + ///\e + AssertionFailedError(const char *_file, int _line, const char *func, + const char *msg, const char *_assertion = 0) : + assertion(_assertion), file(_file), line(_line), function(func), + message(msg) {} + + ///\e + const char* get_assertion() const { return assertion; } + ///\e + const char* get_message() const { return message; } + ///\e + const char* get_file() const { return file; } + ///\e + const char* get_function() const { return function; } + ///\e + int get_line() const { return line; } + + + virtual const char* what() const throw() { + try { + std::ostringstream ostr; + ostr << file << ":" << line << ": "; + if( function ) + ostr << function << ": "; + ostr << message; + if( assertion ) + ostr << " (assertion '" << assertion << "' failed)"; + _message_holder.set(ostr.str()); + return ostr.str().c_str(); + } + catch(...) {} + if( _message_holder.valid() ) return _message_holder.get().c_str(); + return "lemon::AssertionFailedError"; + } + virtual ~AssertionFailedError() throw() {} + }; + + + /**************** Macros ****************/ + + + template + inline void assert_fail(const char *file, int line, + const char *func, + Exception exception, + const char *assertion = 0, + bool do_abort=true) + { + using namespace std; + cerr << file << ":" << line << ": "; + if( func ) + cerr << func << ": "; + cerr << exception.what(); + if( assertion ) + cerr << " (assertion '" << assertion << "' failed)"; + cerr << endl; + if(do_abort) + abort(); + } + + template <> + inline void assert_fail(const char *file, int line, + const char *func, + const char *message, + const char *assertion, + bool do_abort) + { + using namespace std; + cerr << file << ":" << line << ": "; + if( func ) + cerr << func << ": "; + cerr << message; + if( assertion ) + cerr << " (assertion '" << assertion << "' failed)"; + cerr << endl; + if(do_abort) + abort(); + } + + template <> + inline void assert_fail(const char *file, int line, + const char *func, + std::string message, + const char *assertion, + bool do_abort) + { + assert_fail(file, line, func, message.c_str(), assertion, do_abort); + } + + template + inline void assert_fail_failure(const char *file, int line, const char *func, + Exception exception, + const char *assertion = 0, + bool = true) + { + throw AssertionFailedError(file, line, func, exception.what(), assertion); + } + + template <> + inline void assert_fail_failure(const char *file, int line, + const char *func, + const char *message, + const char *assertion, + bool) + { + throw AssertionFailedError(file, line, func, message, assertion); + } + + template <> + inline void assert_fail_failure(const char *file, int line, + const char *func, + std::string message, + const char *assertion, + bool) + { + assert_fail_failure(file, line, func, message.c_str(), assertion, true); + } + + template + inline void assert_fail_exception(const char *file, int line, const char *func, + Exception exception, + const char *assertion = 0, bool = true) + { + throw exception; + } + + template <> + inline void assert_fail_exception(const char *file, int line, + const char *func, + const char *message, + const char *assertion, + bool) + { + throw AssertionFailedError(file, line, func, message, assertion); + } + + template <> + inline void assert_fail_exception(const char *file, int line, + const char *func, + std::string message, + const char *assertion, + bool) + { + assert_fail_exception(file, line, func, message.c_str(), assertion, true); + } + +/// @} + +} +#endif // LEMON_ERROR_H + +#undef LEMON_ASSERT +#undef LEMON_FIXME + +#ifdef LEMON_ENABLE_ASSERTS +# define LEMON_ASSERT_ABORT +#endif + +#ifndef LEMON_ASSERT_DO_ABORT +# define LEMON_ASSERT_DO_ABORT 1 +#endif + +#ifndef LEMON_ASSERT_HANDLER +# if defined LEMON_ASSERT_EXCEPTION +# define LEMON_ASSERT_HANDLER ::lemon::assert_fail_exception +# elif defined LEMON_ASSERT_FAILURE +# define LEMON_ASSERT_HANDLER ::lemon::assert_fail_failure +# elif defined LEMON_ASSERT_ABORT +# define LEMON_ASSERT_HANDLER ::lemon::assert_fail +# else +# define LEMON_DISABLE_ASSERTS +# endif +#endif + +#ifdef DOXYGEN + +/// \brief Macro for assertions with customizable message +/// +/// Macro for assertions with customizable message. +/// +/// The assertions are disabled in the default behaviour. You can +/// enable the assertions with the +/// \code +/// #define LEMON_ENABLE_ASSERTS +/// \endcode +/// Then an assert +/// provides a log on the standard error about the assertion and aborts +/// the program if LEMON_ASSERT_DO_ABORT is also defined (otherwise the +/// program keeps on running). +/// By defining LEMON_ASSERT_FAILURE or +/// LEMON_ASSERT_EXCEPTION, you can set other behaviour to the +/// assertions. In case LEMON_ASSERT_FAILURE is given, LEMON_ASSERT +/// will always throw an \c AssertionFailedError exception with +/// the \c msg error message. By using +/// LEMON_ASSERT_EXCEPTION, one can define an arbitrary exception to be thrown. +/// +/// The LEMON_ASSERT macro should be called with the \c exp parameter +/// which should be an expression convertible to bool. If the given +/// parameter is false the assertion is raised and one of the assertion +/// behaviour will be activated. The \c msg should be either a const +/// char* message or an exception. When the \c msg is an exception the +/// \ref lemon::Exception::what() "what()" function is called to retrieve and +/// display the error message. +/// +/// \todo We should provide some way to reset to the default behaviour, +/// shouldn't we? +/// +/// \todo This whole 'assert' business should be placed in a separate +/// include file. The boost assert is not guarded by header sentries +/// which may help to change the behaviour of the assertions in +/// the files. +/// +/// \todo __PRETTY_FUNCTION__ should be replaced by something +/// compiler-independent, like BOOST_CURRENT_FUNCTION + +# define LEMON_ASSERT(exp, msg) \ + (static_cast (!!(exp) ? 0 : ( \ + LEMON_ASSERT_HANDLER(__FILE__, __LINE__, \ + __PRETTY_FUNCTION__, \ + msg, #exp, LEMON_ASSERT_DO_ABORT), 0))) + +#else +# if defined LEMON_DISABLE_ASSERTS + +# define LEMON_ASSERT(exp, msg) (static_cast (0)) + +# else +# define LEMON_ASSERT(exp, msg) \ + (static_cast (!!(exp) ? 0 : ( \ + LEMON_ASSERT_HANDLER(__FILE__, __LINE__, \ + __PRETTY_FUNCTION__, \ + msg, #exp, LEMON_ASSERT_DO_ABORT), 0))) +# endif +#endif + +/** + * \brief Macro for mark not yet implemented features. + * + * \todo Is this the right place for this? It should be used only in + * modules under development. + * + * \todo __PRETTY_FUNCTION__ should be replaced by something + * compiler-independent, like BOOST_CURRENT_FUNCTION + */ + +# define LEMON_FIXME(msg) \ + (LEMON_ASSERT_HANDLER(__FILE__, __LINE__, __PRETTY_FUNCTION__, \ + "FIXME: " msg)) diff --git a/src/lemon/fib_heap.h b/src/lemon/fib_heap.h new file mode 100644 index 0000000..c0d632b --- /dev/null +++ b/src/lemon/fib_heap.h @@ -0,0 +1,464 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_FIB_HEAP_H +#define LEMON_FIB_HEAP_H + +///\file +///\ingroup auxdat +///\brief Fibonacci Heap implementation. + +#include +#include +#include + +namespace lemon { + + /// \ingroup auxdat + /// + ///\brief Fibonacci Heap. + /// + ///This class implements the \e Fibonacci \e heap data structure. A \e heap + ///is a data structure for storing items with specified values called \e + ///priorities in such a way that finding the item with minimum priority is + ///efficient. \c Compare specifies the ordering of the priorities. In a heap + ///one can change the priority of an item, add or erase an item, etc. + /// + ///The methods \ref increase and \ref erase are not efficient in a Fibonacci + ///heap. In case of many calls to these operations, it is better to use a + ///\ref BinHeap "binary heap". + /// + ///\param _Prio Type of the priority of the items. + ///\param _ItemIntMap A read and writable Item int map, used internally + ///to handle the cross references. + ///\param _Compare A class for the ordering of the priorities. The + ///default is \c std::less<_Prio>. + /// + ///\sa BinHeap + ///\sa Dijkstra + ///\author Jacint Szabo + +#ifdef DOXYGEN + template +#else + template > +#endif + class FibHeap { + public: + typedef _ItemIntMap ItemIntMap; + typedef _Prio Prio; + typedef typename ItemIntMap::Key Item; + typedef std::pair Pair; + typedef _Compare Compare; + + private: + class store; + + std::vector container; + int minimum; + ItemIntMap &iimap; + Compare comp; + int num_items; + + public: + ///Status of the nodes + enum State { + ///The node is in the heap + IN_HEAP = 0, + ///The node has never been in the heap + PRE_HEAP = -1, + ///The node was in the heap but it got out of it + POST_HEAP = -2 + }; + + /// \brief The constructor + /// + /// \c _iimap should be given to the constructor, since it is + /// used internally to handle the cross references. + explicit FibHeap(ItemIntMap &_iimap) + : minimum(0), iimap(_iimap), num_items() {} + + /// \brief The constructor + /// + /// \c _iimap should be given to the constructor, since it is used + /// internally to handle the cross references. \c _comp is an + /// object for ordering of the priorities. + FibHeap(ItemIntMap &_iimap, const Compare &_comp) + : minimum(0), iimap(_iimap), comp(_comp), num_items() {} + + /// \brief The number of items stored in the heap. + /// + /// Returns the number of items stored in the heap. + int size() const { return num_items; } + + /// \brief Checks if the heap stores no items. + /// + /// Returns \c true if and only if the heap stores no items. + bool empty() const { return num_items==0; } + + /// \brief Make empty this heap. + /// + /// Make empty this heap. It does not change the cross reference + /// map. If you want to reuse a heap what is not surely empty you + /// should first clear the heap and after that you should set the + /// cross reference map for each item to \c PRE_HEAP. + void clear() { + container.clear(); minimum = 0; num_items = 0; + } + + /// \brief \c item gets to the heap with priority \c value independently + /// if \c item was already there. + /// + /// This method calls \ref push(\c item, \c value) if \c item is not + /// stored in the heap and it calls \ref decrease(\c item, \c value) or + /// \ref increase(\c item, \c value) otherwise. + void set (const Item& item, const Prio& value) { + int i=iimap[item]; + if ( i >= 0 && container[i].in ) { + if ( comp(value, container[i].prio) ) decrease(item, value); + if ( comp(container[i].prio, value) ) increase(item, value); + } else push(item, value); + } + + /// \brief Adds \c item to the heap with priority \c value. + /// + /// Adds \c item to the heap with priority \c value. + /// \pre \c item must not be stored in the heap. + void push (const Item& item, const Prio& value) { + int i=iimap[item]; + if ( i < 0 ) { + int s=container.size(); + iimap.set( item, s ); + store st; + st.name=item; + container.push_back(st); + i=s; + } else { + container[i].parent=container[i].child=-1; + container[i].degree=0; + container[i].in=true; + container[i].marked=false; + } + + if ( num_items ) { + container[container[minimum].right_neighbor].left_neighbor=i; + container[i].right_neighbor=container[minimum].right_neighbor; + container[minimum].right_neighbor=i; + container[i].left_neighbor=minimum; + if ( comp( value, container[minimum].prio) ) minimum=i; + } else { + container[i].right_neighbor=container[i].left_neighbor=i; + minimum=i; + } + container[i].prio=value; + ++num_items; + } + + /// \brief Returns the item with minimum priority relative to \c Compare. + /// + /// This method returns the item with minimum priority relative to \c + /// Compare. + /// \pre The heap must be nonempty. + Item top() const { return container[minimum].name; } + + /// \brief Returns the minimum priority relative to \c Compare. + /// + /// It returns the minimum priority relative to \c Compare. + /// \pre The heap must be nonempty. + const Prio& prio() const { return container[minimum].prio; } + + /// \brief Returns the priority of \c item. + /// + /// It returns the priority of \c item. + /// \pre \c item must be in the heap. + const Prio& operator[](const Item& item) const { + return container[iimap[item]].prio; + } + + /// \brief Deletes the item with minimum priority relative to \c Compare. + /// + /// This method deletes the item with minimum priority relative to \c + /// Compare from the heap. + /// \pre The heap must be non-empty. + void pop() { + /*The first case is that there are only one root.*/ + if ( container[minimum].left_neighbor==minimum ) { + container[minimum].in=false; + if ( container[minimum].degree!=0 ) { + makeroot(container[minimum].child); + minimum=container[minimum].child; + balance(); + } + } else { + int right=container[minimum].right_neighbor; + unlace(minimum); + container[minimum].in=false; + if ( container[minimum].degree > 0 ) { + int left=container[minimum].left_neighbor; + int child=container[minimum].child; + int last_child=container[child].left_neighbor; + + makeroot(child); + + container[left].right_neighbor=child; + container[child].left_neighbor=left; + container[right].left_neighbor=last_child; + container[last_child].right_neighbor=right; + } + minimum=right; + balance(); + } // the case where there are more roots + --num_items; + } + + /// \brief Deletes \c item from the heap. + /// + /// This method deletes \c item from the heap, if \c item was already + /// stored in the heap. It is quite inefficient in Fibonacci heaps. + void erase (const Item& item) { + int i=iimap[item]; + + if ( i >= 0 && container[i].in ) { + if ( container[i].parent!=-1 ) { + int p=container[i].parent; + cut(i,p); + cascade(p); + } + minimum=i; //As if its prio would be -infinity + pop(); + } + } + + /// \brief Decreases the priority of \c item to \c value. + /// + /// This method decreases the priority of \c item to \c value. + /// \pre \c item must be stored in the heap with priority at least \c + /// value relative to \c Compare. + void decrease (Item item, const Prio& value) { + int i=iimap[item]; + container[i].prio=value; + int p=container[i].parent; + + if ( p!=-1 && comp(value, container[p].prio) ) { + cut(i,p); + cascade(p); + } + if ( comp(value, container[minimum].prio) ) minimum=i; + } + + /// \brief Increases the priority of \c item to \c value. + /// + /// This method sets the priority of \c item to \c value. Though + /// there is no precondition on the priority of \c item, this + /// method should be used only if it is indeed necessary to increase + /// (relative to \c Compare) the priority of \c item, because this + /// method is inefficient. + void increase (Item item, const Prio& value) { + erase(item); + push(item, value); + } + + + /// \brief Returns if \c item is in, has already been in, or has never + /// been in the heap. + /// + /// This method returns PRE_HEAP if \c item has never been in the + /// heap, IN_HEAP if it is in the heap at the moment, and POST_HEAP + /// otherwise. In the latter case it is possible that \c item will + /// get back to the heap again. + State state(const Item &item) const { + int i=iimap[item]; + if( i>=0 ) { + if ( container[i].in ) i=0; + else i=-2; + } + return State(i); + } + + /// \brief Sets the state of the \c item in the heap. + /// + /// Sets the state of the \c item in the heap. It can be used to + /// manually clear the heap when it is important to achive the + /// better time complexity. + /// \param i The item. + /// \param st The state. It should not be \c IN_HEAP. + void state(const Item& i, State st) { + switch (st) { + case POST_HEAP: + case PRE_HEAP: + if (state(i) == IN_HEAP) { + erase(i); + } + iimap[i] = st; + break; + case IN_HEAP: + break; + } + } + + private: + + void balance() { + + int maxdeg=int( std::floor( 2.08*log(double(container.size()))))+1; + + std::vector A(maxdeg,-1); + + /* + *Recall that now minimum does not point to the minimum prio element. + *We set minimum to this during balance(). + */ + int anchor=container[minimum].left_neighbor; + int next=minimum; + bool end=false; + + do { + int active=next; + if ( anchor==active ) end=true; + int d=container[active].degree; + next=container[active].right_neighbor; + + while (A[d]!=-1) { + if( comp(container[active].prio, container[A[d]].prio) ) { + fuse(active,A[d]); + } else { + fuse(A[d],active); + active=A[d]; + } + A[d]=-1; + ++d; + } + A[d]=active; + } while ( !end ); + + + while ( container[minimum].parent >=0 ) + minimum=container[minimum].parent; + int s=minimum; + int m=minimum; + do { + if ( comp(container[s].prio, container[minimum].prio) ) minimum=s; + s=container[s].right_neighbor; + } while ( s != m ); + } + + void makeroot(int c) { + int s=c; + do { + container[s].parent=-1; + s=container[s].right_neighbor; + } while ( s != c ); + } + + void cut(int a, int b) { + /* + *Replacing a from the children of b. + */ + --container[b].degree; + + if ( container[b].degree !=0 ) { + int child=container[b].child; + if ( child==a ) + container[b].child=container[child].right_neighbor; + unlace(a); + } + + + /*Lacing a to the roots.*/ + int right=container[minimum].right_neighbor; + container[minimum].right_neighbor=a; + container[a].left_neighbor=minimum; + container[a].right_neighbor=right; + container[right].left_neighbor=a; + + container[a].parent=-1; + container[a].marked=false; + } + + void cascade(int a) { + if ( container[a].parent!=-1 ) { + int p=container[a].parent; + + if ( container[a].marked==false ) container[a].marked=true; + else { + cut(a,p); + cascade(p); + } + } + } + + void fuse(int a, int b) { + unlace(b); + + /*Lacing b under a.*/ + container[b].parent=a; + + if (container[a].degree==0) { + container[b].left_neighbor=b; + container[b].right_neighbor=b; + container[a].child=b; + } else { + int child=container[a].child; + int last_child=container[child].left_neighbor; + container[child].left_neighbor=b; + container[b].right_neighbor=child; + container[last_child].right_neighbor=b; + container[b].left_neighbor=last_child; + } + + ++container[a].degree; + + container[b].marked=false; + } + + /* + *It is invoked only if a has siblings. + */ + void unlace(int a) { + int leftn=container[a].left_neighbor; + int rightn=container[a].right_neighbor; + container[leftn].right_neighbor=rightn; + container[rightn].left_neighbor=leftn; + } + + + class store { + friend class FibHeap; + + Item name; + int parent; + int left_neighbor; + int right_neighbor; + int child; + int degree; + bool marked; + bool in; + Prio prio; + + store() : parent(-1), child(-1), degree(), marked(false), in(true) {} + }; + }; + +} //namespace lemon + +#endif //LEMON_FIB_HEAP_H + diff --git a/src/lemon/graph_adaptor.h b/src/lemon/graph_adaptor.h new file mode 100644 index 0000000..66e75f1 --- /dev/null +++ b/src/lemon/graph_adaptor.h @@ -0,0 +1,2720 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_GRAPH_ADAPTOR_H +#define LEMON_GRAPH_ADAPTOR_H + +///\ingroup graph_adaptors +///\file +///\brief Several graph adaptors. +/// +///This file contains several useful graph adaptor functions. +/// +///\author Marton Makai and Balazs Dezso + +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace lemon { + + ///\brief Base type for the Graph Adaptors + /// + ///Base type for the Graph Adaptors + /// + ///This is the base type for most of LEMON graph adaptors. + ///This class implements a trivial graph adaptor i.e. it only wraps the + ///functions and types of the graph. The purpose of this class is to + ///make easier implementing graph adaptors. E.g. if an adaptor is + ///considered which differs from the wrapped graph only in some of its + ///functions or types, then it can be derived from GraphAdaptor, + ///and only the + ///differences should be implemented. + /// + ///author Marton Makai + template + class GraphAdaptorBase { + public: + typedef _Graph Graph; + typedef GraphAdaptorBase Adaptor; + typedef Graph ParentGraph; + + protected: + Graph* graph; + GraphAdaptorBase() : graph(0) { } + void setGraph(Graph& _graph) { graph=&_graph; } + + public: + GraphAdaptorBase(Graph& _graph) : graph(&_graph) { } + + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + + void first(Node& i) const { graph->first(i); } + void first(Edge& i) const { graph->first(i); } + void firstIn(Edge& i, const Node& n) const { graph->firstIn(i, n); } + void firstOut(Edge& i, const Node& n ) const { graph->firstOut(i, n); } + + void next(Node& i) const { graph->next(i); } + void next(Edge& i) const { graph->next(i); } + void nextIn(Edge& i) const { graph->nextIn(i); } + void nextOut(Edge& i) const { graph->nextOut(i); } + + Node source(const Edge& e) const { return graph->source(e); } + Node target(const Edge& e) const { return graph->target(e); } + + typedef NodeNumTagIndicator NodeNumTag; + int nodeNum() const { return graph->nodeNum(); } + + typedef EdgeNumTagIndicator EdgeNumTag; + int edgeNum() const { return graph->edgeNum(); } + + typedef FindEdgeTagIndicator FindEdgeTag; + Edge findEdge(const Node& u, const Node& v, + const Edge& prev = INVALID) { + return graph->findEdge(u, v, prev); + } + + Node addNode() const { + return Node(graph->addNode()); + } + + Edge addEdge(const Node& u, const Node& v) const { + return Edge(graph->addEdge(u, v)); + } + + void erase(const Node& i) const { graph->erase(i); } + void erase(const Edge& i) const { graph->erase(i); } + + void clear() const { graph->clear(); } + + int id(const Node& v) const { return graph->id(v); } + int id(const Edge& e) const { return graph->id(e); } + + Node fromNodeId(int ix) const { + return graph->fromNodeId(ix); + } + + Edge fromEdgeId(int ix) const { + return graph->fromEdgeId(ix); + } + + int maxNodeId() const { + return graph->maxNodeId(); + } + + int maxEdgeId() const { + return graph->maxEdgeId(); + } + + typedef typename ItemSetTraits::ItemNotifier NodeNotifier; + + NodeNotifier& notifier(Node) const { + return graph->notifier(Node()); + } + + typedef typename ItemSetTraits::ItemNotifier EdgeNotifier; + + EdgeNotifier& notifier(Edge) const { + return graph->notifier(Edge()); + } + + template + class NodeMap : public Graph::template NodeMap<_Value> { + public: + + typedef typename Graph::template NodeMap<_Value> Parent; + + explicit NodeMap(const Adaptor& ga) + : Parent(*ga.graph) {} + + NodeMap(const Adaptor& ga, const _Value& value) + : Parent(*ga.graph, value) { } + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + template + class EdgeMap : public Graph::template EdgeMap<_Value> { + public: + + typedef typename Graph::template EdgeMap<_Value> Parent; + + explicit EdgeMap(const Adaptor& ga) + : Parent(*ga.graph) {} + + EdgeMap(const Adaptor& ga, const _Value& value) + : Parent(*ga.graph, value) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + }; + + ///\ingroup graph_adaptors + /// + ///\brief Trivial Graph Adaptor + /// + /// This class is an adaptor which does not change the adapted graph. + /// It can be used only to test the graph adaptors. + template + class GraphAdaptor : + public GraphAdaptorExtender > { + public: + typedef _Graph Graph; + typedef GraphAdaptorExtender > Parent; + protected: + GraphAdaptor() : Parent() { } + + public: + explicit GraphAdaptor(Graph& _graph) { setGraph(_graph); } + }; + + /// \brief Just gives back a graph adaptor + /// + /// Just gives back a graph adaptor which + /// should be provide original graph + template + GraphAdaptor + graphAdaptor(const Graph& graph) { + return GraphAdaptor(graph); + } + + + template + class RevGraphAdaptorBase : public GraphAdaptorBase<_Graph> { + public: + typedef _Graph Graph; + typedef GraphAdaptorBase<_Graph> Parent; + protected: + RevGraphAdaptorBase() : Parent() { } + public: + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + void firstIn(Edge& i, const Node& n) const { Parent::firstOut(i, n); } + void firstOut(Edge& i, const Node& n ) const { Parent::firstIn(i, n); } + + void nextIn(Edge& i) const { Parent::nextOut(i); } + void nextOut(Edge& i) const { Parent::nextIn(i); } + + Node source(const Edge& e) const { return Parent::target(e); } + Node target(const Edge& e) const { return Parent::source(e); } + + typedef FindEdgeTagIndicator FindEdgeTag; + Edge findEdge(const Node& u, const Node& v, + const Edge& prev = INVALID) { + return Parent::findEdge(v, u, prev); + } + + }; + + + ///\ingroup graph_adaptors + /// + ///\brief A graph adaptor which reverses the orientation of the edges. + /// + /// If \c g is defined as + ///\code + /// ListGraph g; + ///\endcode + /// then + ///\code + /// RevGraphAdaptor ga(g); + ///\endcode + /// implements the graph obtained from \c g by + /// reversing the orientation of its edges. + /// + /// A good example of using RevGraphAdaptor is to decide that the + /// directed graph is wheter strongly connected or not. If from one + /// node each node is reachable and from each node is reachable this + /// node then and just then the graph is strongly connected. Instead of + /// this condition we use a little bit different. From one node each node + /// ahould be reachable in the graph and in the reversed graph. Now this + /// condition can be checked with the Dfs algorithm class and the + /// RevGraphAdaptor algorithm class. + /// + /// And look at the code: + /// + ///\code + /// bool stronglyConnected(const Graph& graph) { + /// if (NodeIt(graph) == INVALID) return true; + /// Dfs dfs(graph); + /// dfs.run(NodeIt(graph)); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!dfs.reached(it)) { + /// return false; + /// } + /// } + /// typedef RevGraphAdaptor RGraph; + /// RGraph rgraph(graph); + /// DfsVisit rdfs(rgraph); + /// rdfs.run(NodeIt(graph)); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!rdfs.reached(it)) { + /// return false; + /// } + /// } + /// return true; + /// } + ///\endcode + template + class RevGraphAdaptor : + public GraphAdaptorExtender > { + public: + typedef _Graph Graph; + typedef GraphAdaptorExtender< + RevGraphAdaptorBase<_Graph> > Parent; + protected: + RevGraphAdaptor() { } + public: + explicit RevGraphAdaptor(_Graph& _graph) { setGraph(_graph); } + }; + + /// \brief Just gives back a reverse graph adaptor + /// + /// Just gives back a reverse graph adaptor + template + RevGraphAdaptor + revGraphAdaptor(const Graph& graph) { + return RevGraphAdaptor(graph); + } + + template + class SubGraphAdaptorBase : public GraphAdaptorBase<_Graph> { + public: + typedef _Graph Graph; + typedef SubGraphAdaptorBase Adaptor; + typedef GraphAdaptorBase<_Graph> Parent; + protected: + NodeFilterMap* node_filter_map; + EdgeFilterMap* edge_filter_map; + SubGraphAdaptorBase() : Parent(), + node_filter_map(0), edge_filter_map(0) { } + + void setNodeFilterMap(NodeFilterMap& _node_filter_map) { + node_filter_map=&_node_filter_map; + } + void setEdgeFilterMap(EdgeFilterMap& _edge_filter_map) { + edge_filter_map=&_edge_filter_map; + } + + public: + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + void first(Node& i) const { + Parent::first(i); + while (i!=INVALID && !(*node_filter_map)[i]) Parent::next(i); + } + + void first(Edge& i) const { + Parent::first(i); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::source(i)] + || !(*node_filter_map)[Parent::target(i)])) Parent::next(i); + } + + void firstIn(Edge& i, const Node& n) const { + Parent::firstIn(i, n); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::source(i)])) Parent::nextIn(i); + } + + void firstOut(Edge& i, const Node& n) const { + Parent::firstOut(i, n); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::target(i)])) Parent::nextOut(i); + } + + void next(Node& i) const { + Parent::next(i); + while (i!=INVALID && !(*node_filter_map)[i]) Parent::next(i); + } + + void next(Edge& i) const { + Parent::next(i); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::source(i)] + || !(*node_filter_map)[Parent::target(i)])) Parent::next(i); + } + + void nextIn(Edge& i) const { + Parent::nextIn(i); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::source(i)])) Parent::nextIn(i); + } + + void nextOut(Edge& i) const { + Parent::nextOut(i); + while (i!=INVALID && (!(*edge_filter_map)[i] + || !(*node_filter_map)[Parent::target(i)])) Parent::nextOut(i); + } + + ///\e + + /// This function hides \c n in the graph, i.e. the iteration + /// jumps over it. This is done by simply setting the value of \c n + /// to be false in the corresponding node-map. + void hide(const Node& n) const { node_filter_map->set(n, false); } + + ///\e + + /// This function hides \c e in the graph, i.e. the iteration + /// jumps over it. This is done by simply setting the value of \c e + /// to be false in the corresponding edge-map. + void hide(const Edge& e) const { edge_filter_map->set(e, false); } + + ///\e + + /// The value of \c n is set to be true in the node-map which stores + /// hide information. If \c n was hidden previuosly, then it is shown + /// again + void unHide(const Node& n) const { node_filter_map->set(n, true); } + + ///\e + + /// The value of \c e is set to be true in the edge-map which stores + /// hide information. If \c e was hidden previuosly, then it is shown + /// again + void unHide(const Edge& e) const { edge_filter_map->set(e, true); } + + /// Returns true if \c n is hidden. + + ///\e + /// + bool hidden(const Node& n) const { return !(*node_filter_map)[n]; } + + /// Returns true if \c n is hidden. + + ///\e + /// + bool hidden(const Edge& e) const { return !(*edge_filter_map)[e]; } + + typedef False NodeNumTag; + typedef False EdgeNumTag; + + typedef FindEdgeTagIndicator FindEdgeTag; + Edge findEdge(const Node& source, const Node& target, + const Edge& prev = INVALID) { + if (!(*node_filter_map)[source] || !(*node_filter_map)[target]) { + return INVALID; + } + Edge edge = Parent::findEdge(source, target, prev); + while (edge != INVALID && !(*edge_filter_map)[edge]) { + edge = Parent::findEdge(source, target, edge); + } + return edge; + } + + template + class NodeMap + : public SubMapExtender > + { + public: + typedef Adaptor Graph; + //typedef SubMapExtender > Parent; + + NodeMap(const Graph& g) + : Parent(g) {} + NodeMap(const Graph& g, const _Value& v) + : Parent(g, v) {} + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + template + class EdgeMap + : public SubMapExtender > + { + public: + typedef Adaptor Graph; + //typedef SubMapExtender > Parent; + + EdgeMap(const Graph& g) + : Parent(g) {} + EdgeMap(const Graph& g, const _Value& v) + : Parent(g, v) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + }; + + template + class SubGraphAdaptorBase<_Graph, NodeFilterMap, EdgeFilterMap, false> + : public GraphAdaptorBase<_Graph> { + public: + typedef _Graph Graph; + typedef SubGraphAdaptorBase Adaptor; + typedef GraphAdaptorBase<_Graph> Parent; + protected: + NodeFilterMap* node_filter_map; + EdgeFilterMap* edge_filter_map; + SubGraphAdaptorBase() : Parent(), + node_filter_map(0), edge_filter_map(0) { } + + void setNodeFilterMap(NodeFilterMap& _node_filter_map) { + node_filter_map=&_node_filter_map; + } + void setEdgeFilterMap(EdgeFilterMap& _edge_filter_map) { + edge_filter_map=&_edge_filter_map; + } + + public: + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + void first(Node& i) const { + Parent::first(i); + while (i!=INVALID && !(*node_filter_map)[i]) Parent::next(i); + } + + void first(Edge& i) const { + Parent::first(i); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::next(i); + } + + void firstIn(Edge& i, const Node& n) const { + Parent::firstIn(i, n); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::nextIn(i); + } + + void firstOut(Edge& i, const Node& n) const { + Parent::firstOut(i, n); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::nextOut(i); + } + + void next(Node& i) const { + Parent::next(i); + while (i!=INVALID && !(*node_filter_map)[i]) Parent::next(i); + } + void next(Edge& i) const { + Parent::next(i); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::next(i); + } + void nextIn(Edge& i) const { + Parent::nextIn(i); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::nextIn(i); + } + + void nextOut(Edge& i) const { + Parent::nextOut(i); + while (i!=INVALID && !(*edge_filter_map)[i]) Parent::nextOut(i); + } + + ///\e + + /// This function hides \c n in the graph, i.e. the iteration + /// jumps over it. This is done by simply setting the value of \c n + /// to be false in the corresponding node-map. + void hide(const Node& n) const { node_filter_map->set(n, false); } + + ///\e + + /// This function hides \c e in the graph, i.e. the iteration + /// jumps over it. This is done by simply setting the value of \c e + /// to be false in the corresponding edge-map. + void hide(const Edge& e) const { edge_filter_map->set(e, false); } + + ///\e + + /// The value of \c n is set to be true in the node-map which stores + /// hide information. If \c n was hidden previuosly, then it is shown + /// again + void unHide(const Node& n) const { node_filter_map->set(n, true); } + + ///\e + + /// The value of \c e is set to be true in the edge-map which stores + /// hide information. If \c e was hidden previuosly, then it is shown + /// again + void unHide(const Edge& e) const { edge_filter_map->set(e, true); } + + /// Returns true if \c n is hidden. + + ///\e + /// + bool hidden(const Node& n) const { return !(*node_filter_map)[n]; } + + /// Returns true if \c n is hidden. + + ///\e + /// + bool hidden(const Edge& e) const { return !(*edge_filter_map)[e]; } + + typedef False NodeNumTag; + typedef False EdgeNumTag; + + typedef FindEdgeTagIndicator FindEdgeTag; + Edge findEdge(const Node& source, const Node& target, + const Edge& prev = INVALID) { + if (!(*node_filter_map)[source] || !(*node_filter_map)[target]) { + return INVALID; + } + Edge edge = Parent::findEdge(source, target, prev); + while (edge != INVALID && !(*edge_filter_map)[edge]) { + edge = Parent::findEdge(source, target, edge); + } + return edge; + } + + template + class NodeMap + : public SubMapExtender > + { + public: + typedef Adaptor Graph; + //typedef SubMapExtender > Parent; + + NodeMap(const Graph& g) + : Parent(g) {} + NodeMap(const Graph& g, const _Value& v) + : Parent(g, v) {} + + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + template + class EdgeMap + : public SubMapExtender > + { + public: + typedef Adaptor Graph; + //typedef SubMapExtender > Parent; + + EdgeMap(const Graph& g) + : Parent(g) {} + EdgeMap(const Graph& g, const _Value& v) + : Parent(g, v) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + }; + + /// \ingroup graph_adaptors + /// + /// \brief A graph adaptor for hiding nodes and edges from a graph. + /// + /// SubGraphAdaptor shows the graph with filtered node-set and + /// edge-set. If the \c checked parameter is true then it filters the edgeset + /// to do not get invalid edges without source or target. + /// Let \f$ G=(V, A) \f$ be a directed graph + /// and suppose that the graph instance \c g of type ListGraph + /// implements \f$ G \f$. + /// Let moreover \f$ b_V \f$ and \f$ b_A \f$ be bool-valued functions resp. + /// on the node-set and edge-set. + /// SubGraphAdaptor<...>::NodeIt iterates + /// on the node-set \f$ \{v\in V : b_V(v)=true\} \f$ and + /// SubGraphAdaptor<...>::EdgeIt iterates + /// on the edge-set \f$ \{e\in A : b_A(e)=true\} \f$. Similarly, + /// SubGraphAdaptor<...>::OutEdgeIt and + /// SubGraphAdaptor<...>::InEdgeIt iterates + /// only on edges leaving and entering a specific node which have true value. + /// + /// If the \c checked template parameter is false then we have to note that + /// the node-iterator cares only the filter on the node-set, and the + /// edge-iterator cares only the filter on the edge-set. + /// This way the edge-map + /// should filter all edges which's source or target is filtered by the + /// node-filter. + ///\code + /// typedef ListGraph Graph; + /// Graph g; + /// typedef Graph::Node Node; + /// typedef Graph::Edge Edge; + /// Node u=g.addNode(); //node of id 0 + /// Node v=g.addNode(); //node of id 1 + /// Node e=g.addEdge(u, v); //edge of id 0 + /// Node f=g.addEdge(v, u); //edge of id 1 + /// Graph::NodeMap nm(g, true); + /// nm.set(u, false); + /// Graph::EdgeMap em(g, true); + /// em.set(e, false); + /// typedef SubGraphAdaptor, Graph::EdgeMap > SubGA; + /// SubGA ga(g, nm, em); + /// for (SubGA::NodeIt n(ga); n!=INVALID; ++n) std::cout << g.id(n) << std::endl; + /// std::cout << ":-)" << std::endl; + /// for (SubGA::EdgeIt e(ga); e!=INVALID; ++e) std::cout << g.id(e) << std::endl; + ///\endcode + /// The output of the above code is the following. + ///\code + /// 1 + /// :-) + /// 1 + ///\endcode + /// Note that \c n is of type \c SubGA::NodeIt, but it can be converted to + /// \c Graph::Node that is why \c g.id(n) can be applied. + /// + /// For other examples see also the documentation of NodeSubGraphAdaptor and + /// EdgeSubGraphAdaptor. + /// + /// \author Marton Makai + + template + class SubGraphAdaptor : + public GraphAdaptorExtender< + SubGraphAdaptorBase<_Graph, NodeFilterMap, EdgeFilterMap, checked> > { + public: + typedef _Graph Graph; + typedef GraphAdaptorExtender< SubGraphAdaptorBase<_Graph, NodeFilterMap, + EdgeFilterMap, checked> > + Parent; + + protected: + SubGraphAdaptor() { } + public: + + SubGraphAdaptor(_Graph& _graph, NodeFilterMap& _node_filter_map, + EdgeFilterMap& _edge_filter_map) { + setGraph(_graph); + setNodeFilterMap(_node_filter_map); + setEdgeFilterMap(_edge_filter_map); + } + + }; + + /// \brief Just gives back a sub graph adaptor + /// + /// Just gives back a sub graph adaptor + template + SubGraphAdaptor + subGraphAdaptor(const Graph& graph, + NodeFilterMap& nfm, EdgeFilterMap& efm) { + return SubGraphAdaptor + (graph, nfm, efm); + } + + template + SubGraphAdaptor + subGraphAdaptor(const Graph& graph, + NodeFilterMap& nfm, EdgeFilterMap& efm) { + return SubGraphAdaptor + (graph, nfm, efm); + } + + template + SubGraphAdaptor + subGraphAdaptor(const Graph& graph, + NodeFilterMap& nfm, EdgeFilterMap& efm) { + return SubGraphAdaptor + (graph, nfm, efm); + } + + template + SubGraphAdaptor + subGraphAdaptor(const Graph& graph, + NodeFilterMap& nfm, EdgeFilterMap& efm) { + return SubGraphAdaptor(graph, nfm, efm); + } + + + + ///\ingroup graph_adaptors + /// + ///\brief An adaptor for hiding nodes from a graph. + /// + ///An adaptor for hiding nodes from a graph. + ///This adaptor specializes SubGraphAdaptor in the way that only + ///the node-set + ///can be filtered. In usual case the checked parameter is true, we get the + ///induced subgraph. But if the checked parameter is false then we can + ///filter only isolated nodes. + ///\author Marton Makai + template + class NodeSubGraphAdaptor : + public SubGraphAdaptor, checked> { + public: + + typedef SubGraphAdaptor, checked > + Parent; + + protected: + ConstMap const_true_map; + + NodeSubGraphAdaptor() : const_true_map(true) { + Parent::setEdgeFilterMap(const_true_map); + } + + public: + + NodeSubGraphAdaptor(Graph& _graph, NodeFilterMap& _node_filter_map) : + Parent(), const_true_map(true) { + Parent::setGraph(_graph); + Parent::setNodeFilterMap(_node_filter_map); + Parent::setEdgeFilterMap(const_true_map); + } + + }; + + + /// \brief Just gives back a node sub graph adaptor + /// + /// Just gives back a node sub graph adaptor + template + NodeSubGraphAdaptor + nodeSubGraphAdaptor(const Graph& graph, NodeFilterMap& nfm) { + return NodeSubGraphAdaptor(graph, nfm); + } + + template + NodeSubGraphAdaptor + nodeSubGraphAdaptor(const Graph& graph, const NodeFilterMap& nfm) { + return NodeSubGraphAdaptor(graph, nfm); + } + + ///\ingroup graph_adaptors + /// + ///\brief An adaptor for hiding edges from a graph. + /// + ///An adaptor for hiding edges from a graph. + ///This adaptor specializes SubGraphAdaptor in the way that + ///only the edge-set + ///can be filtered. The usefulness of this adaptor is demonstrated in the + ///problem of searching a maximum number of edge-disjoint shortest paths + ///between + ///two nodes \c s and \c t. Shortest here means being shortest w.r.t. + ///non-negative edge-lengths. Note that + ///the comprehension of the presented solution + ///need's some elementary knowledge from combinatorial optimization. + /// + ///If a single shortest path is to be + ///searched between \c s and \c t, then this can be done easily by + ///applying the Dijkstra algorithm. What happens, if a maximum number of + ///edge-disjoint shortest paths is to be computed. It can be proved that an + ///edge can be in a shortest path if and only + ///if it is tight with respect to + ///the potential function computed by Dijkstra. + ///Moreover, any path containing + ///only such edges is a shortest one. + ///Thus we have to compute a maximum number + ///of edge-disjoint paths between \c s and \c t in + ///the graph which has edge-set + ///all the tight edges. The computation will be demonstrated + ///on the following + ///graph, which is read from the dimacs file \c sub_graph_adaptor_demo.dim. + ///The full source code is available in \ref sub_graph_adaptor_demo.cc. + ///If you are interested in more demo programs, you can use + ///\ref dim_to_dot.cc to generate .dot files from dimacs files. + ///The .dot file of the following figure was generated by + ///the demo program \ref dim_to_dot.cc. + /// + ///\dot + ///digraph lemon_dot_example { + ///node [ shape=ellipse, fontname=Helvetica, fontsize=10 ]; + ///n0 [ label="0 (s)" ]; + ///n1 [ label="1" ]; + ///n2 [ label="2" ]; + ///n3 [ label="3" ]; + ///n4 [ label="4" ]; + ///n5 [ label="5" ]; + ///n6 [ label="6 (t)" ]; + ///edge [ shape=ellipse, fontname=Helvetica, fontsize=10 ]; + ///n5 -> n6 [ label="9, length:4" ]; + ///n4 -> n6 [ label="8, length:2" ]; + ///n3 -> n5 [ label="7, length:1" ]; + ///n2 -> n5 [ label="6, length:3" ]; + ///n2 -> n6 [ label="5, length:5" ]; + ///n2 -> n4 [ label="4, length:2" ]; + ///n1 -> n4 [ label="3, length:3" ]; + ///n0 -> n3 [ label="2, length:1" ]; + ///n0 -> n2 [ label="1, length:2" ]; + ///n0 -> n1 [ label="0, length:3" ]; + ///} + ///\enddot + /// + ///\code + ///Graph g; + ///Node s, t; + ///LengthMap length(g); + /// + ///readDimacs(std::cin, g, length, s, t); + /// + ///cout << "edges with lengths (of form id, source--length->target): " << endl; + ///for(EdgeIt e(g); e!=INVALID; ++e) + /// cout << g.id(e) << ", " << g.id(g.source(e)) << "--" + /// << length[e] << "->" << g.id(g.target(e)) << endl; + /// + ///cout << "s: " << g.id(s) << " t: " << g.id(t) << endl; + ///\endcode + ///Next, the potential function is computed with Dijkstra. + ///\code + ///typedef Dijkstra Dijkstra; + ///Dijkstra dijkstra(g, length); + ///dijkstra.run(s); + ///\endcode + ///Next, we consrtruct a map which filters the edge-set to the tight edges. + ///\code + ///typedef TightEdgeFilterMap + /// TightEdgeFilter; + ///TightEdgeFilter tight_edge_filter(g, dijkstra.distMap(), length); + /// + ///typedef EdgeSubGraphAdaptor SubGA; + ///SubGA ga(g, tight_edge_filter); + ///\endcode + ///Then, the maximum nimber of edge-disjoint \c s-\c t paths are computed + ///with a max flow algorithm Preflow. + ///\code + ///ConstMap const_1_map(1); + ///Graph::EdgeMap flow(g, 0); + /// + ///Preflow, Graph::EdgeMap > + /// preflow(ga, const_1_map, s, t); + ///preflow.run(); + ///\endcode + ///Last, the output is: + ///\code + ///cout << "maximum number of edge-disjoint shortest path: " + /// << preflow.flowValue() << endl; + ///cout << "edges of the maximum number of edge-disjoint shortest s-t paths: " + /// << endl; + ///for(EdgeIt e(g); e!=INVALID; ++e) + /// if (preflow.flow(e)) + /// cout << " " << g.id(g.source(e)) << "--" + /// << length[e] << "->" << g.id(g.target(e)) << endl; + ///\endcode + ///The program has the following (expected :-)) output: + ///\code + ///edges with lengths (of form id, source--length->target): + /// 9, 5--4->6 + /// 8, 4--2->6 + /// 7, 3--1->5 + /// 6, 2--3->5 + /// 5, 2--5->6 + /// 4, 2--2->4 + /// 3, 1--3->4 + /// 2, 0--1->3 + /// 1, 0--2->2 + /// 0, 0--3->1 + ///s: 0 t: 6 + ///maximum number of edge-disjoint shortest path: 2 + ///edges of the maximum number of edge-disjoint shortest s-t paths: + /// 9, 5--4->6 + /// 8, 4--2->6 + /// 7, 3--1->5 + /// 4, 2--2->4 + /// 2, 0--1->3 + /// 1, 0--2->2 + ///\endcode + /// + ///\author Marton Makai + template + class EdgeSubGraphAdaptor : + public SubGraphAdaptor, + EdgeFilterMap, false> { + public: + typedef SubGraphAdaptor, + EdgeFilterMap, false> Parent; + protected: + ConstMap const_true_map; + + EdgeSubGraphAdaptor() : const_true_map(true) { + Parent::setNodeFilterMap(const_true_map); + } + + public: + + EdgeSubGraphAdaptor(Graph& _graph, EdgeFilterMap& _edge_filter_map) : + Parent(), const_true_map(true) { + Parent::setGraph(_graph); + Parent::setNodeFilterMap(const_true_map); + Parent::setEdgeFilterMap(_edge_filter_map); + } + + }; + + /// \brief Just gives back an edge sub graph adaptor + /// + /// Just gives back an edge sub graph adaptor + template + EdgeSubGraphAdaptor + edgeSubGraphAdaptor(const Graph& graph, EdgeFilterMap& efm) { + return EdgeSubGraphAdaptor(graph, efm); + } + + template + EdgeSubGraphAdaptor + edgeSubGraphAdaptor(const Graph& graph, const EdgeFilterMap& efm) { + return EdgeSubGraphAdaptor(graph, efm); + } + + template + class UndirGraphAdaptorBase : + public UndirGraphExtender > { + public: + typedef _Graph Graph; + typedef UndirGraphAdaptorBase Adaptor; + typedef UndirGraphExtender > Parent; + + protected: + + UndirGraphAdaptorBase() : Parent() {} + + public: + + typedef typename Parent::UEdge UEdge; + typedef typename Parent::Edge Edge; + + private: + + template + class EdgeMapBase { + private: + + typedef typename _Graph::template EdgeMap<_Value> MapImpl; + + public: + + typedef typename MapTraits::ReferenceMapTag ReferenceMapTag; + + typedef _Value Value; + typedef Edge Key; + + EdgeMapBase(const Adaptor& adaptor) : + forward_map(*adaptor.graph), backward_map(*adaptor.graph) {} + + EdgeMapBase(const Adaptor& adaptor, const Value& v) + : forward_map(*adaptor.graph, v), backward_map(*adaptor.graph, v) {} + + void set(const Edge& e, const Value& a) { + if (Parent::direction(e)) { + forward_map.set(e, a); + } else { + backward_map.set(e, a); + } + } + + typename MapTraits::ConstReturnValue operator[](Edge e) const { + if (Parent::direction(e)) { + return forward_map[e]; + } else { + return backward_map[e]; + } + } + + typename MapTraits::ReturnValue operator[](Edge e) { + if (Parent::direction(e)) { + return forward_map[e]; + } else { + return backward_map[e]; + } + } + + protected: + + MapImpl forward_map, backward_map; + + }; + + public: + + template + class EdgeMap + : public SubMapExtender > + { + public: + typedef Adaptor Graph; + typedef SubMapExtender > Parent; + + EdgeMap(const Graph& g) + : Parent(g) {} + EdgeMap(const Graph& g, const _Value& v) + : Parent(g, v) {} + + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + }; + + template + class UEdgeMap : public Graph::template EdgeMap<_Value> { + public: + + typedef typename Graph::template EdgeMap<_Value> Parent; + + explicit UEdgeMap(const Adaptor& ga) + : Parent(*ga.graph) {} + + UEdgeMap(const Adaptor& ga, const _Value& value) + : Parent(*ga.graph, value) {} + + UEdgeMap& operator=(const UEdgeMap& cmap) { + return operator=(cmap); + } + + template + UEdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + }; + + }; + + template + class AlterableUndirGraphAdaptor + : public UGraphAdaptorExtender > { + public: + typedef UGraphAdaptorExtender > Parent; + + protected: + + AlterableUndirGraphAdaptor() : Parent() {} + + public: + + typedef typename Parent::EdgeNotifier UEdgeNotifier; + typedef InvalidType EdgeNotifier; + + }; + + template + class AlterableUndirGraphAdaptor< + _Graph, + typename enable_if::type > + : public UGraphAdaptorExtender > { + public: + + typedef UGraphAdaptorExtender > Parent; + typedef _Graph Graph; + typedef typename _Graph::Edge GraphEdge; + + protected: + + AlterableUndirGraphAdaptor() + : Parent(), edge_notifier(*this), edge_notifier_proxy(*this) {} + + void setGraph(_Graph& g) { + Parent::setGraph(g); + edge_notifier_proxy.setNotifier(g.notifier(GraphEdge())); + } + + public: + + ~AlterableUndirGraphAdaptor() { + edge_notifier.clear(); + } + + typedef typename Parent::UEdge UEdge; + typedef typename Parent::Edge Edge; + + typedef typename Parent::EdgeNotifier UEdgeNotifier; + + using Parent::notifier; + + typedef AlterationNotifier EdgeNotifier; + EdgeNotifier& notifier(Edge) const { return edge_notifier; } + + protected: + + class NotifierProxy : public Graph::EdgeNotifier::ObserverBase { + public: + + typedef typename Graph::EdgeNotifier::ObserverBase Parent; + typedef AlterableUndirGraphAdaptor AdaptorBase; + + NotifierProxy(const AdaptorBase& _adaptor) + : Parent(), adaptor(&_adaptor) { + } + + virtual ~NotifierProxy() { + if (Parent::attached()) { + Parent::detach(); + } + } + + void setNotifier(typename Graph::EdgeNotifier& nf) { + Parent::attach(nf); + } + + + protected: + + virtual void add(const GraphEdge& ge) { + std::vector edges; + edges.push_back(AdaptorBase::Parent::direct(ge, true)); + edges.push_back(AdaptorBase::Parent::direct(ge, false)); + adaptor->notifier(Edge()).add(edges); + } + virtual void add(const std::vector& ge) { + std::vector edges; + for (int i = 0; i < int(ge.size()); ++i) { + edges.push_back(AdaptorBase::Parent::direct(ge[i], true)); + edges.push_back(AdaptorBase::Parent::direct(ge[i], false)); + } + adaptor->notifier(Edge()).add(edges); + } + virtual void erase(const GraphEdge& ge) { + std::vector edges; + edges.push_back(AdaptorBase::Parent::direct(ge, true)); + edges.push_back(AdaptorBase::Parent::direct(ge, false)); + adaptor->notifier(Edge()).erase(edges); + } + virtual void erase(const std::vector& ge) { + std::vector edges; + for (int i = 0; i < int(ge.size()); ++i) { + edges.push_back(AdaptorBase::Parent::direct(ge[i], true)); + edges.push_back(AdaptorBase::Parent::direct(ge[i], false)); + } + adaptor->notifier(Edge()).erase(edges); + } + virtual void build() { + adaptor->notifier(Edge()).build(); + } + virtual void clear() { + adaptor->notifier(Edge()).clear(); + } + + const AdaptorBase* adaptor; + }; + + + mutable EdgeNotifier edge_notifier; + NotifierProxy edge_notifier_proxy; + + }; + + + ///\ingroup graph_adaptors + /// + /// \brief An undirected graph is made from a directed graph by an adaptor + /// + /// This adaptor makes an undirected graph from a directed + /// graph. All edge of the underlying will be showed in the adaptor + /// as an undirected edge. Let's see an informal example about using + /// this adaptor: + /// + /// There is a network of the streets of a town. Of course there are + /// some one-way street in the town hence the network is a directed + /// one. There is a crazy driver who go oppositely in the one-way + /// street without moral sense. Of course he can pass this streets + /// slower than the regular way, in fact his speed is half of the + /// normal speed. How long should he drive to get from a source + /// point to the target? Let see the example code which calculate it: + /// + ///\code + /// typedef UndirGraphAdaptor UGraph; + /// UGraph ugraph(graph); + /// + /// typedef SimpleMap FLengthMap; + /// FLengthMap flength(length); + /// + /// typedef ScaleMap RLengthMap; + /// RLengthMap rlength(length, 2.0); + /// + /// typedef UGraph::CombinedEdgeMap ULengthMap; + /// ULengthMap ulength(flength, rlength); + /// + /// Dijkstra dijkstra(ugraph, ulength); + /// std::cout << "Driving time : " << dijkstra.run(src, trg) << std::endl; + ///\endcode + /// + /// The combined edge map makes the length map for the undirected + /// graph. It is created from a forward and reverse map. The forward + /// map is created from the original length map with a SimpleMap + /// adaptor which just makes a read-write map from the reference map + /// i.e. it forgets that it can be return reference to values. The + /// reverse map is just the scaled original map with the ScaleMap + /// adaptor. The combination solves that passing the reverse way + /// takes double time than the original. To get the driving time we + /// run the dijkstra algorithm on the undirected graph. + /// + /// \author Marton Makai and Balazs Dezso + template + class UndirGraphAdaptor : public AlterableUndirGraphAdaptor<_Graph> { + public: + typedef _Graph Graph; + typedef AlterableUndirGraphAdaptor<_Graph> Parent; + protected: + UndirGraphAdaptor() { } + public: + + /// \brief Constructor + /// + /// Constructor + UndirGraphAdaptor(_Graph& _graph) { + setGraph(_graph); + } + + /// \brief EdgeMap combined from two original EdgeMap + /// + /// This class adapts two original graph EdgeMap to + /// get an edge map on the adaptor. + template + class CombinedEdgeMap { + public: + + typedef _ForwardMap ForwardMap; + typedef _BackwardMap BackwardMap; + + typedef typename MapTraits::ReferenceMapTag ReferenceMapTag; + + typedef typename ForwardMap::Value Value; + typedef typename Parent::Edge Key; + + /// \brief Constructor + /// + /// Constructor + CombinedEdgeMap() : forward_map(0), backward_map(0) {} + + /// \brief Constructor + /// + /// Constructor + CombinedEdgeMap(ForwardMap& _forward_map, BackwardMap& _backward_map) + : forward_map(&_forward_map), backward_map(&_backward_map) {} + + + /// \brief Sets the value associated with a key. + /// + /// Sets the value associated with a key. + void set(const Key& e, const Value& a) { + if (Parent::direction(e)) { + forward_map->set(e, a); + } else { + backward_map->set(e, a); + } + } + + /// \brief Returns the value associated with a key. + /// + /// Returns the value associated with a key. + typename MapTraits::ConstReturnValue + operator[](const Key& e) const { + if (Parent::direction(e)) { + return (*forward_map)[e]; + } else { + return (*backward_map)[e]; + } + } + + /// \brief Returns the value associated with a key. + /// + /// Returns the value associated with a key. + typename MapTraits::ReturnValue + operator[](const Key& e) { + if (Parent::direction(e)) { + return (*forward_map)[e]; + } else { + return (*backward_map)[e]; + } + } + + /// \brief Sets the forward map + /// + /// Sets the forward map + void setForwardMap(ForwardMap& _forward_map) { + forward_map = &_forward_map; + } + + /// \brief Sets the backward map + /// + /// Sets the backward map + void setBackwardMap(BackwardMap& _backward_map) { + backward_map = &_backward_map; + } + + protected: + + ForwardMap* forward_map; + BackwardMap* backward_map; + + }; + + }; + + /// \brief Just gives back an undir graph adaptor + /// + /// Just gives back an undir graph adaptor + template + UndirGraphAdaptor + undirGraphAdaptor(const Graph& graph) { + return UndirGraphAdaptor(graph); + } + + template > + class ResForwardFilter { + const CapacityMap* capacity; + const FlowMap* flow; + Tol tolerance; + public: + typedef typename Graph::Edge Key; + typedef bool Value; + + ResForwardFilter(const CapacityMap& _capacity, const FlowMap& _flow, + const Tol& _tolerance = Tol()) + : capacity(&_capacity), flow(&_flow), tolerance(_tolerance) { } + + ResForwardFilter(const Tol& _tolerance) + : capacity(0), flow(0), tolerance(_tolerance) { } + + void setCapacity(const CapacityMap& _capacity) { capacity = &_capacity; } + void setFlow(const FlowMap& _flow) { flow = &_flow; } + + bool operator[](const typename Graph::Edge& e) const { + return tolerance.positive((*capacity)[e] - (*flow)[e]); + } + }; + + template > + class ResBackwardFilter { + const CapacityMap* capacity; + const FlowMap* flow; + Tol tolerance; + public: + typedef typename Graph::Edge Key; + typedef bool Value; + + ResBackwardFilter(const CapacityMap& _capacity, const FlowMap& _flow, + const Tol& _tolerance = Tol()) + : capacity(&_capacity), flow(&_flow), tolerance(_tolerance) { } + ResBackwardFilter(const Tol& _tolerance = Tol()) + : capacity(0), flow(0), tolerance(_tolerance) { } + void setCapacity(const CapacityMap& _capacity) { capacity = &_capacity; } + void setFlow(const FlowMap& _flow) { flow = &_flow; } + bool operator[](const typename Graph::Edge& e) const { + return tolerance.positive((*flow)[e]); + } + }; + + + ///\ingroup graph_adaptors + /// + ///\brief An adaptor for composing the residual + ///graph for directed flow and circulation problems. + /// + ///An adaptor for composing the residual graph for directed flow and + ///circulation problems. Let \f$ G=(V, A) \f$ be a directed graph + ///and let \f$ F \f$ be a number type. Let moreover \f$ f,c:A\to F \f$, + ///be functions on the edge-set. + /// + ///In the appications of ResGraphAdaptor, \f$ f \f$ usually stands + ///for a flow and \f$ c \f$ for a capacity function. Suppose that a + ///graph instange \c g of type \c ListGraph implements \f$ G \f$. + /// + ///\code + /// ListGraph g; + ///\endcode + /// + ///Then ResGraphAdaptor implements the graph structure with node-set + /// \f$ V \f$ and edge-set \f$ A_{forward}\cup A_{backward} \f$, + ///where \f$ A_{forward}=\{uv : uv\in A, f(uv)0\} \f$, i.e. the so called + ///residual graph. When we take the union + /// \f$ A_{forward}\cup A_{backward} \f$, multilicities are counted, i.e. + ///if an edge is in both \f$ A_{forward} \f$ and \f$ A_{backward} \f$, + ///then in the adaptor it appears twice. The following code shows how + ///such an instance can be constructed. + /// + ///\code + /// typedef ListGraph Graph; + /// Graph::EdgeMap f(g); + /// Graph::EdgeMap c(g); + /// ResGraphAdaptor, Graph::EdgeMap > ga(g); + ///\endcode + ///\author Marton Makai + /// + template > + class ResGraphAdaptor : + public EdgeSubGraphAdaptor< + UndirGraphAdaptor, + typename UndirGraphAdaptor::template CombinedEdgeMap< + ResForwardFilter, + ResBackwardFilter > > { + public: + + typedef UndirGraphAdaptor UGraph; + + typedef ResForwardFilter + ForwardFilter; + + typedef ResBackwardFilter + BackwardFilter; + + typedef typename UGraph:: + template CombinedEdgeMap + EdgeFilter; + + typedef EdgeSubGraphAdaptor Parent; + + protected: + + const CapacityMap* capacity; + FlowMap* flow; + + UGraph ugraph; + ForwardFilter forward_filter; + BackwardFilter backward_filter; + EdgeFilter edge_filter; + + void setCapacityMap(const CapacityMap& _capacity) { + capacity=&_capacity; + forward_filter.setCapacity(_capacity); + backward_filter.setCapacity(_capacity); + } + + void setFlowMap(FlowMap& _flow) { + flow=&_flow; + forward_filter.setFlow(_flow); + backward_filter.setFlow(_flow); + } + + public: + + /// \brief Constructor of the residual graph. + /// + /// Constructor of the residual graph. The parameters are the graph type, + /// the flow map, the capacity map and a tolerance object. + ResGraphAdaptor(const Graph& _graph, const CapacityMap& _capacity, + FlowMap& _flow, const Tol& _tolerance = Tol()) + : Parent(), capacity(&_capacity), flow(&_flow), ugraph(_graph), + forward_filter(_capacity, _flow, _tolerance), + backward_filter(_capacity, _flow, _tolerance), + edge_filter(forward_filter, backward_filter) + { + Parent::setGraph(ugraph); + Parent::setEdgeFilterMap(edge_filter); + } + + typedef typename Parent::Edge Edge; + + /// \brief Gives back the residual capacity of the edge. + /// + /// Gives back the residual capacity of the edge. + Number rescap(const Edge& edge) const { + if (UGraph::direction(edge)) { + return (*capacity)[edge]-(*flow)[edge]; + } else { + return (*flow)[edge]; + } + } + + /// \brief Augment on the given edge in the residual graph. + /// + /// Augment on the given edge in the residual graph. It increase + /// or decrease the flow on the original edge depend on the direction + /// of the residual edge. + void augment(const Edge& e, Number a) const { + if (UGraph::direction(e)) { + flow->set(e, (*flow)[e] + a); + } else { + flow->set(e, (*flow)[e] - a); + } + } + + /// \brief Returns the direction of the edge. + /// + /// Returns true when the edge is same oriented as the original edge. + static bool forward(const Edge& e) { + return UGraph::direction(e); + } + + /// \brief Returns the direction of the edge. + /// + /// Returns true when the edge is opposite oriented as the original edge. + static bool backward(const Edge& e) { + return !UGraph::direction(e); + } + + /// \brief Gives back the forward oriented residual edge. + /// + /// Gives back the forward oriented residual edge. + static Edge forward(const typename Graph::Edge& e) { + return UGraph::direct(e, true); + } + + /// \brief Gives back the backward oriented residual edge. + /// + /// Gives back the backward oriented residual edge. + static Edge backward(const typename Graph::Edge& e) { + return UGraph::direct(e, false); + } + + /// \brief Residual capacity map. + /// + /// In generic residual graphs the residual capacity can be obtained + /// as a map. + class ResCap { + protected: + const ResGraphAdaptor* res_graph; + public: + typedef Number Value; + typedef Edge Key; + ResCap(const ResGraphAdaptor& _res_graph) + : res_graph(&_res_graph) {} + + Number operator[](const Edge& e) const { + return res_graph->rescap(e); + } + + }; + + }; + + + + template + class ErasingFirstGraphAdaptorBase : public GraphAdaptorBase<_Graph> { + public: + typedef _Graph Graph; + typedef GraphAdaptorBase<_Graph> Parent; + protected: + FirstOutEdgesMap* first_out_edges; + ErasingFirstGraphAdaptorBase() : Parent(), + first_out_edges(0) { } + + void setFirstOutEdgesMap(FirstOutEdgesMap& _first_out_edges) { + first_out_edges=&_first_out_edges; + } + + public: + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + void firstOut(Edge& i, const Node& n) const { + i=(*first_out_edges)[n]; + } + + void erase(const Edge& e) const { + Node n=source(e); + Edge f=e; + Parent::nextOut(f); + first_out_edges->set(n, f); + } + }; + + + ///\ingroup graph_adaptors + /// + ///\brief For blocking flows. + /// + ///This graph adaptor is used for on-the-fly + ///Dinits blocking flow computations. + ///For each node, an out-edge is stored which is used when the + ///\code + ///OutEdgeIt& first(OutEdgeIt&, const Node&) + ///\endcode + ///is called. + /// + ///\author Marton Makai + /// + template + class ErasingFirstGraphAdaptor : + public GraphAdaptorExtender< + ErasingFirstGraphAdaptorBase<_Graph, FirstOutEdgesMap> > { + public: + typedef _Graph Graph; + typedef GraphAdaptorExtender< + ErasingFirstGraphAdaptorBase<_Graph, FirstOutEdgesMap> > Parent; + ErasingFirstGraphAdaptor(Graph& _graph, + FirstOutEdgesMap& _first_out_edges) { + setGraph(_graph); + setFirstOutEdgesMap(_first_out_edges); + } + + }; + + /// \brief Base class for split graph adaptor + /// + /// Base class of split graph adaptor. In most case you do not need to + /// use it directly but the documented member functions of this class can + /// be used with the SplitGraphAdaptor class. + /// \sa SplitGraphAdaptor + template + class SplitGraphAdaptorBase + : public GraphAdaptorBase { + public: + + typedef _Graph Graph; + + typedef GraphAdaptorBase Parent; + + typedef typename Graph::Node GraphNode; + typedef typename Graph::Edge GraphEdge; + + class Node; + class Edge; + + template class NodeMap; + template class EdgeMap; + + + class Node : public GraphNode { + friend class SplitGraphAdaptorBase; + template friend class NodeMap; + private: + + bool in_node; + Node(GraphNode _node, bool _in_node) + : GraphNode(_node), in_node(_in_node) {} + + public: + + Node() {} + Node(Invalid) : GraphNode(INVALID), in_node(true) {} + + bool operator==(const Node& node) const { + return GraphNode::operator==(node) && in_node == node.in_node; + } + + bool operator!=(const Node& node) const { + return !(*this == node); + } + + bool operator<(const Node& node) const { + return GraphNode::operator<(node) || + (GraphNode::operator==(node) && in_node < node.in_node); + } + }; + + class Edge { + friend class SplitGraphAdaptorBase; + template friend class EdgeMap; + private: + typedef BiVariant EdgeImpl; + + explicit Edge(const GraphEdge& edge) : item(edge) {} + explicit Edge(const GraphNode& node) : item(node) {} + + EdgeImpl item; + + public: + Edge() {} + Edge(Invalid) : item(GraphEdge(INVALID)) {} + + bool operator==(const Edge& edge) const { + if (item.firstState()) { + if (edge.item.firstState()) { + return item.first() == edge.item.first(); + } + } else { + if (edge.item.secondState()) { + return item.second() == edge.item.second(); + } + } + return false; + } + + bool operator!=(const Edge& edge) const { + return !(*this == edge); + } + + bool operator<(const Edge& edge) const { + if (item.firstState()) { + if (edge.item.firstState()) { + return item.first() < edge.item.first(); + } + return false; + } else { + if (edge.item.secondState()) { + return item.second() < edge.item.second(); + } + return true; + } + } + + operator GraphEdge() const { return item.first(); } + operator GraphNode() const { return item.second(); } + + }; + + void first(Node& n) const { + Parent::first(n); + n.in_node = true; + } + + void next(Node& n) const { + if (n.in_node) { + n.in_node = false; + } else { + n.in_node = true; + Parent::next(n); + } + } + + void first(Edge& e) const { + e.item.setSecond(); + Parent::first(e.item.second()); + if (e.item.second() == INVALID) { + e.item.setFirst(); + Parent::first(e.item.first()); + } + } + + void next(Edge& e) const { + if (e.item.secondState()) { + Parent::next(e.item.second()); + if (e.item.second() == INVALID) { + e.item.setFirst(); + Parent::first(e.item.first()); + } + } else { + Parent::next(e.item.first()); + } + } + + void firstOut(Edge& e, const Node& n) const { + if (n.in_node) { + e.item.setSecond(n); + } else { + e.item.setFirst(); + Parent::firstOut(e.item.first(), n); + } + } + + void nextOut(Edge& e) const { + if (!e.item.firstState()) { + e.item.setFirst(INVALID); + } else { + Parent::nextOut(e.item.first()); + } + } + + void firstIn(Edge& e, const Node& n) const { + if (!n.in_node) { + e.item.setSecond(n); + } else { + e.item.setFirst(); + Parent::firstIn(e.item.first(), n); + } + } + + void nextIn(Edge& e) const { + if (!e.item.firstState()) { + e.item.setFirst(INVALID); + } else { + Parent::nextIn(e.item.first()); + } + } + + Node source(const Edge& e) const { + if (e.item.firstState()) { + return Node(Parent::source(e.item.first()), false); + } else { + return Node(e.item.second(), true); + } + } + + Node target(const Edge& e) const { + if (e.item.firstState()) { + return Node(Parent::target(e.item.first()), true); + } else { + return Node(e.item.second(), false); + } + } + + int id(const Node& n) const { + return (Parent::id(n) << 1) | (n.in_node ? 0 : 1); + } + Node nodeFromId(int ix) const { + return Node(Parent::nodeFromId(ix >> 1), (ix & 1) == 0); + } + int maxNodeId() const { + return 2 * Parent::maxNodeId() + 1; + } + + int id(const Edge& e) const { + if (e.item.firstState()) { + return Parent::id(e.item.first()) << 1; + } else { + return (Parent::id(e.item.second()) << 1) | 1; + } + } + Edge edgeFromId(int ix) const { + if ((ix & 1) == 0) { + return Edge(Parent::edgeFromId(ix >> 1)); + } else { + return Edge(Parent::nodeFromId(ix >> 1)); + } + } + int maxEdgeId() const { + return std::max(Parent::maxNodeId() << 1, + (Parent::maxEdgeId() << 1) | 1); + } + + /// \brief Returns true when the node is in-node. + /// + /// Returns true when the node is in-node. + static bool inNode(const Node& n) { + return n.in_node; + } + + /// \brief Returns true when the node is out-node. + /// + /// Returns true when the node is out-node. + static bool outNode(const Node& n) { + return !n.in_node; + } + + /// \brief Returns true when the edge is edge in the original graph. + /// + /// Returns true when the edge is edge in the original graph. + static bool origEdge(const Edge& e) { + return e.item.firstState(); + } + + /// \brief Returns true when the edge binds an in-node and an out-node. + /// + /// Returns true when the edge binds an in-node and an out-node. + static bool bindEdge(const Edge& e) { + return e.item.secondState(); + } + + /// \brief Gives back the in-node created from the \c node. + /// + /// Gives back the in-node created from the \c node. + static Node inNode(const GraphNode& n) { + return Node(n, true); + } + + /// \brief Gives back the out-node created from the \c node. + /// + /// Gives back the out-node created from the \c node. + static Node outNode(const GraphNode& n) { + return Node(n, false); + } + + /// \brief Gives back the edge binds the two part of the node. + /// + /// Gives back the edge binds the two part of the node. + static Edge edge(const GraphNode& n) { + return Edge(n); + } + + /// \brief Gives back the edge of the original edge. + /// + /// Gives back the edge of the original edge. + static Edge edge(const GraphEdge& e) { + return Edge(e); + } + + typedef True NodeNumTag; + + int nodeNum() const { + return 2 * countNodes(*Parent::graph); + } + + typedef True EdgeNumTag; + + int edgeNum() const { + return countEdges(*Parent::graph) + countNodes(*Parent::graph); + } + + typedef True FindEdgeTag; + + Edge findEdge(const Node& u, const Node& v, + const Edge& prev = INVALID) const { + if (inNode(u)) { + if (outNode(v)) { + if (static_cast(u) == + static_cast(v) && prev == INVALID) { + return Edge(u); + } + } + } else { + if (inNode(v)) { + return Edge(findEdge(*Parent::graph, u, v, prev)); + } + } + return INVALID; + } + + + template + class NodeMap : public MapBase { + typedef typename Parent::template NodeMap NodeImpl; + public: + NodeMap(const SplitGraphAdaptorBase& _graph) + : inNodeMap(_graph), outNodeMap(_graph) {} + NodeMap(const SplitGraphAdaptorBase& _graph, const T& t) + : inNodeMap(_graph, t), outNodeMap(_graph, t) {} + NodeMap& operator=(const NodeMap& cmap) { + return operator=(cmap); + } + template + NodeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + void set(const Node& key, const T& val) { + if (SplitGraphAdaptorBase::inNode(key)) { inNodeMap.set(key, val); } + else {outNodeMap.set(key, val); } + } + + typename MapTraits::ReturnValue + operator[](const Node& key) { + if (SplitGraphAdaptorBase::inNode(key)) { return inNodeMap[key]; } + else { return outNodeMap[key]; } + } + + typename MapTraits::ConstReturnValue + operator[](const Node& key) const { + if (SplitGraphAdaptorBase::inNode(key)) { return inNodeMap[key]; } + else { return outNodeMap[key]; } + } + + private: + NodeImpl inNodeMap, outNodeMap; + }; + + template + class EdgeMap : public MapBase { + typedef typename Parent::template EdgeMap EdgeMapImpl; + typedef typename Parent::template NodeMap NodeMapImpl; + public: + + EdgeMap(const SplitGraphAdaptorBase& _graph) + : edge_map(_graph), node_map(_graph) {} + EdgeMap(const SplitGraphAdaptorBase& _graph, const T& t) + : edge_map(_graph, t), node_map(_graph, t) {} + EdgeMap& operator=(const EdgeMap& cmap) { + return operator=(cmap); + } + template + EdgeMap& operator=(const CMap& cmap) { + Parent::operator=(cmap); + return *this; + } + + void set(const Edge& key, const T& val) { + if (SplitGraphAdaptorBase::origEdge(key)) { + edge_map.set(key.item.first(), val); + } else { + node_map.set(key.item.second(), val); + } + } + + typename MapTraits::ReturnValue + operator[](const Edge& key) { + if (SplitGraphAdaptorBase::origEdge(key)) { + return edge_map[key.item.first()]; + } else { + return node_map[key.item.second()]; + } + } + + typename MapTraits::ConstReturnValue + operator[](const Edge& key) const { + if (SplitGraphAdaptorBase::origEdge(key)) { + return edge_map[key.item.first()]; + } else { + return node_map[key.item.second()]; + } + } + + private: + typename Parent::template EdgeMap edge_map; + typename Parent::template NodeMap node_map; + }; + + + }; + + template + class AlterableSplitGraphAdaptor + : public GraphAdaptorExtender > { + public: + + typedef GraphAdaptorExtender > Parent; + typedef _Graph Graph; + + typedef typename Graph::Node GraphNode; + typedef typename Graph::Node GraphEdge; + + protected: + + AlterableSplitGraphAdaptor() : Parent() {} + + public: + + typedef InvalidType NodeNotifier; + typedef InvalidType EdgeNotifier; + + }; + + template + class AlterableSplitGraphAdaptor< + _Graph, + typename enable_if::type, + EdgeEnable> + : public GraphAdaptorExtender > { + public: + + typedef GraphAdaptorExtender > Parent; + typedef _Graph Graph; + + typedef typename Graph::Node GraphNode; + typedef typename Graph::Edge GraphEdge; + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + protected: + + AlterableSplitGraphAdaptor() + : Parent(), node_notifier(*this), node_notifier_proxy(*this) {} + + void setGraph(_Graph& graph) { + Parent::setGraph(graph); + node_notifier_proxy.setNotifier(graph.notifier(GraphNode())); + } + + public: + + ~AlterableSplitGraphAdaptor() { + node_notifier.clear(); + } + + typedef AlterationNotifier NodeNotifier; + typedef InvalidType EdgeNotifier; + + NodeNotifier& notifier(Node) const { return node_notifier; } + + protected: + + class NodeNotifierProxy : public Graph::NodeNotifier::ObserverBase { + public: + + typedef typename Graph::NodeNotifier::ObserverBase Parent; + typedef AlterableSplitGraphAdaptor AdaptorBase; + + NodeNotifierProxy(const AdaptorBase& _adaptor) + : Parent(), adaptor(&_adaptor) { + } + + virtual ~NodeNotifierProxy() { + if (Parent::attached()) { + Parent::detach(); + } + } + + void setNotifier(typename Graph::NodeNotifier& graph_notifier) { + Parent::attach(graph_notifier); + } + + + protected: + + virtual void add(const GraphNode& gn) { + std::vector nodes; + nodes.push_back(AdaptorBase::Parent::inNode(gn)); + nodes.push_back(AdaptorBase::Parent::outNode(gn)); + adaptor->notifier(Node()).add(nodes); + } + + virtual void add(const std::vector& gn) { + std::vector nodes; + for (int i = 0; i < int(gn.size()); ++i) { + nodes.push_back(AdaptorBase::Parent::inNode(gn[i])); + nodes.push_back(AdaptorBase::Parent::outNode(gn[i])); + } + adaptor->notifier(Node()).add(nodes); + } + + virtual void erase(const GraphNode& gn) { + std::vector nodes; + nodes.push_back(AdaptorBase::Parent::inNode(gn)); + nodes.push_back(AdaptorBase::Parent::outNode(gn)); + adaptor->notifier(Node()).erase(nodes); + } + + virtual void erase(const std::vector& gn) { + std::vector nodes; + for (int i = 0; i < int(gn.size()); ++i) { + nodes.push_back(AdaptorBase::Parent::inNode(gn[i])); + nodes.push_back(AdaptorBase::Parent::outNode(gn[i])); + } + adaptor->notifier(Node()).erase(nodes); + } + virtual void build() { + adaptor->notifier(Node()).build(); + } + virtual void clear() { + adaptor->notifier(Node()).clear(); + } + + const AdaptorBase* adaptor; + }; + + + mutable NodeNotifier node_notifier; + + NodeNotifierProxy node_notifier_proxy; + + }; + + template + class AlterableSplitGraphAdaptor< + _Graph, + typename enable_if::type, + typename enable_if::type> + : public GraphAdaptorExtender > { + public: + + typedef GraphAdaptorExtender > Parent; + typedef _Graph Graph; + + typedef typename Graph::Node GraphNode; + typedef typename Graph::Edge GraphEdge; + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + protected: + + AlterableSplitGraphAdaptor() + : Parent(), node_notifier(*this), edge_notifier(*this), + node_notifier_proxy(*this), edge_notifier_proxy(*this) {} + + void setGraph(_Graph& g) { + Parent::setGraph(g); + node_notifier_proxy.setNotifier(g.notifier(GraphNode())); + edge_notifier_proxy.setNotifier(g.notifier(GraphEdge())); + } + + public: + + ~AlterableSplitGraphAdaptor() { + node_notifier.clear(); + edge_notifier.clear(); + } + + typedef AlterationNotifier NodeNotifier; + typedef AlterationNotifier EdgeNotifier; + + NodeNotifier& notifier(Node) const { return node_notifier; } + EdgeNotifier& notifier(Edge) const { return edge_notifier; } + + protected: + + class NodeNotifierProxy : public Graph::NodeNotifier::ObserverBase { + public: + + typedef typename Graph::NodeNotifier::ObserverBase Parent; + typedef AlterableSplitGraphAdaptor AdaptorBase; + + NodeNotifierProxy(const AdaptorBase& _adaptor) + : Parent(), adaptor(&_adaptor) { + } + + virtual ~NodeNotifierProxy() { + if (Parent::attached()) { + Parent::detach(); + } + } + + void setNotifier(typename Graph::NodeNotifier& graph_notifier) { + Parent::attach(graph_notifier); + } + + + protected: + + virtual void add(const GraphNode& gn) { + std::vector nodes; + nodes.push_back(AdaptorBase::Parent::inNode(gn)); + nodes.push_back(AdaptorBase::Parent::outNode(gn)); + adaptor->notifier(Node()).add(nodes); + adaptor->notifier(Edge()).add(AdaptorBase::Parent::edge(gn)); + } + virtual void add(const std::vector& gn) { + std::vector nodes; + std::vector edges; + for (int i = 0; i < int(gn.size()); ++i) { + edges.push_back(AdaptorBase::Parent::edge(gn[i])); + nodes.push_back(AdaptorBase::Parent::inNode(gn[i])); + nodes.push_back(AdaptorBase::Parent::outNode(gn[i])); + } + adaptor->notifier(Node()).add(nodes); + adaptor->notifier(Edge()).add(edges); + } + virtual void erase(const GraphNode& gn) { + adaptor->notifier(Edge()).erase(AdaptorBase::Parent::edge(gn)); + std::vector nodes; + nodes.push_back(AdaptorBase::Parent::inNode(gn)); + nodes.push_back(AdaptorBase::Parent::outNode(gn)); + adaptor->notifier(Node()).erase(nodes); + } + virtual void erase(const std::vector& gn) { + std::vector nodes; + std::vector edges; + for (int i = 0; i < int(gn.size()); ++i) { + edges.push_back(AdaptorBase::Parent::edge(gn[i])); + nodes.push_back(AdaptorBase::Parent::inNode(gn[i])); + nodes.push_back(AdaptorBase::Parent::outNode(gn[i])); + } + adaptor->notifier(Edge()).erase(edges); + adaptor->notifier(Node()).erase(nodes); + } + virtual void build() { + std::vector edges; + const typename Parent::Notifier* nf = Parent::notifier(); + GraphNode it; + for (nf->first(it); it != INVALID; nf->next(it)) { + edges.push_back(AdaptorBase::Parent::edge(it)); + } + adaptor->notifier(Node()).build(); + adaptor->notifier(Edge()).add(edges); + } + virtual void clear() { + std::vector edges; + const typename Parent::Notifier* nf = Parent::notifier(); + GraphNode it; + for (nf->first(it); it != INVALID; nf->next(it)) { + edges.push_back(AdaptorBase::Parent::edge(it)); + } + adaptor->notifier(Edge()).erase(edges); + adaptor->notifier(Node()).clear(); + } + + const AdaptorBase* adaptor; + }; + + class EdgeNotifierProxy : public Graph::EdgeNotifier::ObserverBase { + public: + + typedef typename Graph::EdgeNotifier::ObserverBase Parent; + typedef AlterableSplitGraphAdaptor AdaptorBase; + + EdgeNotifierProxy(const AdaptorBase& _adaptor) + : Parent(), adaptor(&_adaptor) { + } + + virtual ~EdgeNotifierProxy() { + if (Parent::attached()) { + Parent::detach(); + } + } + + void setNotifier(typename Graph::EdgeNotifier& graph_notifier) { + Parent::attach(graph_notifier); + } + + + protected: + + virtual void add(const GraphEdge& ge) { + adaptor->notifier(Edge()).add(AdaptorBase::edge(ge)); + } + virtual void add(const std::vector& ge) { + std::vector edges; + for (int i = 0; i < int(ge.size()); ++i) { + edges.push_back(AdaptorBase::edge(ge[i])); + } + adaptor->notifier(Edge()).add(edges); + } + virtual void erase(const GraphEdge& ge) { + adaptor->notifier(Edge()).erase(AdaptorBase::edge(ge)); + } + virtual void erase(const std::vector& ge) { + std::vector edges; + for (int i = 0; i < int(ge.size()); ++i) { + edges.push_back(AdaptorBase::edge(ge[i])); + } + adaptor->notifier(Edge()).erase(edges); + } + virtual void build() { + std::vector edges; + const typename Parent::Notifier* nf = Parent::notifier(); + GraphEdge it; + for (nf->first(it); it != INVALID; nf->next(it)) { + edges.push_back(AdaptorBase::Parent::edge(it)); + } + adaptor->notifier(Edge()).add(edges); + } + virtual void clear() { + std::vector edges; + const typename Parent::Notifier* nf = Parent::notifier(); + GraphEdge it; + for (nf->first(it); it != INVALID; nf->next(it)) { + edges.push_back(AdaptorBase::Parent::edge(it)); + } + adaptor->notifier(Edge()).erase(edges); + } + + const AdaptorBase* adaptor; + }; + + + mutable NodeNotifier node_notifier; + mutable EdgeNotifier edge_notifier; + + NodeNotifierProxy node_notifier_proxy; + EdgeNotifierProxy edge_notifier_proxy; + + }; + + /// \ingroup graph_adaptors + /// + /// \brief Split graph adaptor class + /// + /// This is an graph adaptor which splits all node into an in-node + /// and an out-node. Formaly, the adaptor replaces each \f$ u \f$ + /// node in the graph with two node, \f$ u_{in} \f$ node and + /// \f$ u_{out} \f$ node. If there is an \f$ (v, u) \f$ edge in the + /// original graph the new target of the edge will be \f$ u_{in} \f$ and + /// similarly the source of the original \f$ (u, v) \f$ edge will be + /// \f$ u_{out} \f$. The adaptor will add for each node in the + /// original graph an additional edge which will connect + /// \f$ (u_{in}, u_{out}) \f$. + /// + /// The aim of this class is to run algorithm with node costs if the + /// algorithm can use directly just edge costs. In this case we should use + /// a \c SplitGraphAdaptor and set the node cost of the graph to the + /// bind edge in the adapted graph. + /// + /// By example a maximum flow algoritm can compute how many edge + /// disjoint paths are in the graph. But we would like to know how + /// many node disjoint paths are in the graph. First we have to + /// adapt the graph with the \c SplitGraphAdaptor. Then run the flow + /// algorithm on the adapted graph. The bottleneck of the flow will + /// be the bind edges which bounds the flow with the count of the + /// node disjoint paths. + /// + ///\code + /// + /// typedef SplitGraphAdaptor SGraph; + /// + /// SGraph sgraph(graph); + /// + /// typedef ConstMap SCapacity; + /// SCapacity scapacity(1); + /// + /// SGraph::EdgeMap sflow(sgraph); + /// + /// Preflow + /// spreflow(sgraph, scapacity, + /// SGraph::outNode(source), SGraph::inNode(target)); + /// + /// spreflow.run(); + /// + ///\endcode + /// + /// The result of the mamixum flow on the original graph + /// shows the next figure: + /// + /// \image html edge_disjoint.png + /// \image latex edge_disjoint.eps "Edge disjoint paths" width=\textwidth + /// + /// And the maximum flow on the adapted graph: + /// + /// \image html node_disjoint.png + /// \image latex node_disjoint.eps "Node disjoint paths" width=\textwidth + /// + /// The second solution contains just 3 disjoint paths while the first 4. + /// The full code can be found in the \ref disjoint_paths_demo.cc demo file. + /// + /// This graph adaptor is fully conform to the + /// \ref concepts::Graph "Graph" concept and + /// contains some additional member functions and types. The + /// documentation of some member functions may be found just in the + /// SplitGraphAdaptorBase class. + /// + /// \sa SplitGraphAdaptorBase + template + class SplitGraphAdaptor : public AlterableSplitGraphAdaptor<_Graph> { + public: + typedef AlterableSplitGraphAdaptor<_Graph> Parent; + + typedef typename Parent::Node Node; + typedef typename Parent::Edge Edge; + + /// \brief Constructor of the adaptor. + /// + /// Constructor of the adaptor. + SplitGraphAdaptor(_Graph& g) { + Parent::setGraph(g); + } + + /// \brief NodeMap combined from two original NodeMap + /// + /// This class adapt two of the original graph NodeMap to + /// get a node map on the adapted graph. + template + class CombinedNodeMap { + public: + + typedef Node Key; + typedef typename InNodeMap::Value Value; + + /// \brief Constructor + /// + /// Constructor. + CombinedNodeMap(InNodeMap& _inNodeMap, OutNodeMap& _outNodeMap) + : inNodeMap(_inNodeMap), outNodeMap(_outNodeMap) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + Value& operator[](const Key& key) { + if (Parent::inNode(key)) { + return inNodeMap[key]; + } else { + return outNodeMap[key]; + } + } + + /// \brief The const subscript operator. + /// + /// The const subscript operator. + Value operator[](const Key& key) const { + if (Parent::inNode(key)) { + return inNodeMap[key]; + } else { + return outNodeMap[key]; + } + } + + /// \brief The setter function of the map. + /// + /// The setter function of the map. + void set(const Key& key, const Value& value) { + if (Parent::inNode(key)) { + inNodeMap.set(key, value); + } else { + outNodeMap.set(key, value); + } + } + + private: + + InNodeMap& inNodeMap; + OutNodeMap& outNodeMap; + + }; + + + /// \brief Just gives back a combined node map. + /// + /// Just gives back a combined node map. + template + static CombinedNodeMap + combinedNodeMap(InNodeMap& in_map, OutNodeMap& out_map) { + return CombinedNodeMap(in_map, out_map); + } + + template + static CombinedNodeMap + combinedNodeMap(const InNodeMap& in_map, OutNodeMap& out_map) { + return CombinedNodeMap(in_map, out_map); + } + + template + static CombinedNodeMap + combinedNodeMap(InNodeMap& in_map, const OutNodeMap& out_map) { + return CombinedNodeMap(in_map, out_map); + } + + template + static CombinedNodeMap + combinedNodeMap(const InNodeMap& in_map, const OutNodeMap& out_map) { + return CombinedNodeMap(in_map, out_map); + } + + /// \brief EdgeMap combined from an original EdgeMap and NodeMap + /// + /// This class adapt an original graph EdgeMap and NodeMap to + /// get an edge map on the adapted graph. + template + class CombinedEdgeMap { + public: + + typedef Edge Key; + typedef typename GraphEdgeMap::Value Value; + + /// \brief Constructor + /// + /// Constructor. + CombinedEdgeMap(GraphEdgeMap& _edge_map, GraphNodeMap& _node_map) + : edge_map(_edge_map), node_map(_node_map) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + void set(const Edge& edge, const Value& val) { + if (Parent::origEdge(edge)) { + edge_map.set(edge, val); + } else { + node_map.set(edge, val); + } + } + + /// \brief The const subscript operator. + /// + /// The const subscript operator. + Value operator[](const Key& edge) const { + if (Parent::origEdge(edge)) { + return edge_map[edge]; + } else { + return node_map[edge]; + } + } + + /// \brief The const subscript operator. + /// + /// The const subscript operator. + Value& operator[](const Key& edge) { + if (Parent::origEdge(edge)) { + return edge_map[edge]; + } else { + return node_map[edge]; + } + } + + private: + GraphEdgeMap& edge_map; + GraphNodeMap& node_map; + }; + + /// \brief Just gives back a combined edge map. + /// + /// Just gives back a combined edge map. + template + static CombinedEdgeMap + combinedEdgeMap(GraphEdgeMap& edge_map, GraphNodeMap& node_map) { + return CombinedEdgeMap(edge_map, node_map); + } + + template + static CombinedEdgeMap + combinedEdgeMap(const GraphEdgeMap& edge_map, GraphNodeMap& node_map) { + return CombinedEdgeMap(edge_map, node_map); + } + + template + static CombinedEdgeMap + combinedEdgeMap(GraphEdgeMap& edge_map, const GraphNodeMap& node_map) { + return CombinedEdgeMap(edge_map, node_map); + } + + template + static CombinedEdgeMap + combinedEdgeMap(const GraphEdgeMap& edge_map, + const GraphNodeMap& node_map) { + return CombinedEdgeMap(edge_map, node_map); + } + + }; + + /// \brief Just gives back a split graph adaptor + /// + /// Just gives back a split graph adaptor + template + SplitGraphAdaptor + splitGraphAdaptor(const Graph& graph) { + return SplitGraphAdaptor(graph); + } + + +} //namespace lemon + +#endif //LEMON_GRAPH_ADAPTOR_H + diff --git a/src/lemon/graph_utils.h b/src/lemon/graph_utils.h new file mode 100644 index 0000000..5d11b46 --- /dev/null +++ b/src/lemon/graph_utils.h @@ -0,0 +1,3179 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_GRAPH_UTILS_H +#define LEMON_GRAPH_UTILS_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +///\ingroup gutils +///\file +///\brief Graph utilities. + +namespace lemon { + + /// \addtogroup gutils + /// @{ + + ///Creates convenience typedefs for the graph types and iterators + + ///This \c \#define creates convenience typedefs for the following types + ///of \c Graph: \c Node, \c NodeIt, \c Edge, \c EdgeIt, \c InEdgeIt, + ///\c OutEdgeIt + ///\note If \c G it a template parameter, it should be used in this way. + ///\code + /// GRAPH_TYPEDEFS(typename G); + ///\endcode + /// + ///\warning There are no typedefs for the graph maps because of the lack of + ///template typedefs in C++. +#define GRAPH_TYPEDEFS(Graph) \ + typedef Graph:: Node Node; \ + typedef Graph:: NodeIt NodeIt; \ + typedef Graph:: Edge Edge; \ + typedef Graph:: EdgeIt EdgeIt; \ + typedef Graph:: InEdgeIt InEdgeIt; \ + typedef Graph::OutEdgeIt OutEdgeIt + + ///Creates convenience typedefs for the undirected graph types and iterators + + ///This \c \#define creates the same convenience typedefs as defined by + ///\ref GRAPH_TYPEDEFS(Graph) and three more, namely it creates + ///\c UEdge, \c UEdgeIt, \c IncEdgeIt, + /// + ///\note If \c G it a template parameter, it should be used in this way. + ///\code + /// UGRAPH_TYPEDEFS(typename G); + ///\endcode + /// + ///\warning There are no typedefs for the graph maps because of the lack of + ///template typedefs in C++. +#define UGRAPH_TYPEDEFS(Graph) \ + GRAPH_TYPEDEFS(Graph); \ + typedef Graph:: UEdge UEdge; \ + typedef Graph:: UEdgeIt UEdgeIt; \ + typedef Graph:: IncEdgeIt IncEdgeIt + + ///\brief Creates convenience typedefs for the bipartite undirected graph + ///types and iterators + + ///This \c \#define creates the same convenience typedefs as defined by + ///\ref UGRAPH_TYPEDEFS(Graph) and two more, namely it creates + ///\c ANodeIt, \c BNodeIt, + /// + ///\note If \c G it a template parameter, it should be used in this way. + ///\code + /// BPUGRAPH_TYPEDEFS(typename G); + ///\endcode + /// + ///\warning There are no typedefs for the graph maps because of the lack of + ///template typedefs in C++. +#define BPUGRAPH_TYPEDEFS(Graph) \ + UGRAPH_TYPEDEFS(Graph); \ + typedef Graph::ANode ANode; \ + typedef Graph::BNode BNode; \ + typedef Graph::ANodeIt ANodeIt; \ + typedef Graph::BNodeIt BNodeIt + + /// \brief Function to count the items in the graph. + /// + /// This function counts the items (nodes, edges etc) in the graph. + /// The complexity of the function is O(n) because + /// it iterates on all of the items. + + template + inline int countItems(const Graph& g) { + typedef typename ItemSetTraits::ItemIt ItemIt; + int num = 0; + for (ItemIt it(g); it != INVALID; ++it) { + ++num; + } + return num; + } + + // Node counting: + + namespace _graph_utils_bits { + + template + struct CountNodesSelector { + static int count(const Graph &g) { + return countItems(g); + } + }; + + template + struct CountNodesSelector< + Graph, typename + enable_if::type> + { + static int count(const Graph &g) { + return g.nodeNum(); + } + }; + } + + /// \brief Function to count the nodes in the graph. + /// + /// This function counts the nodes in the graph. + /// The complexity of the function is O(n) but for some + /// graph structures it is specialized to run in O(1). + /// + /// If the graph contains a \e nodeNum() member function and a + /// \e NodeNumTag tag then this function calls directly the member + /// function to query the cardinality of the node set. + template + inline int countNodes(const Graph& g) { + return _graph_utils_bits::CountNodesSelector::count(g); + } + + namespace _graph_utils_bits { + + template + struct CountANodesSelector { + static int count(const Graph &g) { + return countItems(g); + } + }; + + template + struct CountANodesSelector< + Graph, typename + enable_if::type> + { + static int count(const Graph &g) { + return g.aNodeNum(); + } + }; + } + + /// \brief Function to count the anodes in the graph. + /// + /// This function counts the anodes in the graph. + /// The complexity of the function is O(an) but for some + /// graph structures it is specialized to run in O(1). + /// + /// If the graph contains an \e aNodeNum() member function and a + /// \e NodeNumTag tag then this function calls directly the member + /// function to query the cardinality of the A-node set. + template + inline int countANodes(const Graph& g) { + return _graph_utils_bits::CountANodesSelector::count(g); + } + + namespace _graph_utils_bits { + + template + struct CountBNodesSelector { + static int count(const Graph &g) { + return countItems(g); + } + }; + + template + struct CountBNodesSelector< + Graph, typename + enable_if::type> + { + static int count(const Graph &g) { + return g.bNodeNum(); + } + }; + } + + /// \brief Function to count the bnodes in the graph. + /// + /// This function counts the bnodes in the graph. + /// The complexity of the function is O(bn) but for some + /// graph structures it is specialized to run in O(1). + /// + /// If the graph contains a \e bNodeNum() member function and a + /// \e NodeNumTag tag then this function calls directly the member + /// function to query the cardinality of the B-node set. + template + inline int countBNodes(const Graph& g) { + return _graph_utils_bits::CountBNodesSelector::count(g); + } + + + // Edge counting: + + namespace _graph_utils_bits { + + template + struct CountEdgesSelector { + static int count(const Graph &g) { + return countItems(g); + } + }; + + template + struct CountEdgesSelector< + Graph, + typename enable_if::type> + { + static int count(const Graph &g) { + return g.edgeNum(); + } + }; + } + + /// \brief Function to count the edges in the graph. + /// + /// This function counts the edges in the graph. + /// The complexity of the function is O(e) but for some + /// graph structures it is specialized to run in O(1). + /// + /// If the graph contains a \e edgeNum() member function and a + /// \e EdgeNumTag tag then this function calls directly the member + /// function to query the cardinality of the edge set. + template + inline int countEdges(const Graph& g) { + return _graph_utils_bits::CountEdgesSelector::count(g); + } + + // Undirected edge counting: + namespace _graph_utils_bits { + + template + struct CountUEdgesSelector { + static int count(const Graph &g) { + return countItems(g); + } + }; + + template + struct CountUEdgesSelector< + Graph, + typename enable_if::type> + { + static int count(const Graph &g) { + return g.uEdgeNum(); + } + }; + } + + /// \brief Function to count the undirected edges in the graph. + /// + /// This function counts the undirected edges in the graph. + /// The complexity of the function is O(e) but for some + /// graph structures it is specialized to run in O(1). + /// + /// If the graph contains a \e uEdgeNum() member function and a + /// \e EdgeNumTag tag then this function calls directly the member + /// function to query the cardinality of the undirected edge set. + template + inline int countUEdges(const Graph& g) { + return _graph_utils_bits::CountUEdgesSelector::count(g); + + } + + + template + inline int countNodeDegree(const Graph& _g, const typename Graph::Node& _n) { + int num = 0; + for (DegIt it(_g, _n); it != INVALID; ++it) { + ++num; + } + return num; + } + + /// \brief Function to count the number of the out-edges from node \c n. + /// + /// This function counts the number of the out-edges from node \c n + /// in the graph. + template + inline int countOutEdges(const Graph& _g, const typename Graph::Node& _n) { + return countNodeDegree(_g, _n); + } + + /// \brief Function to count the number of the in-edges to node \c n. + /// + /// This function counts the number of the in-edges to node \c n + /// in the graph. + template + inline int countInEdges(const Graph& _g, const typename Graph::Node& _n) { + return countNodeDegree(_g, _n); + } + + /// \brief Function to count the number of the inc-edges to node \c n. + /// + /// This function counts the number of the inc-edges to node \c n + /// in the graph. + template + inline int countIncEdges(const Graph& _g, const typename Graph::Node& _n) { + return countNodeDegree(_g, _n); + } + + namespace _graph_utils_bits { + + template + struct FindEdgeSelector { + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + static Edge find(const Graph &g, Node u, Node v, Edge e) { + if (e == INVALID) { + g.firstOut(e, u); + } else { + g.nextOut(e); + } + while (e != INVALID && g.target(e) != v) { + g.nextOut(e); + } + return e; + } + }; + + template + struct FindEdgeSelector< + Graph, + typename enable_if::type> + { + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + static Edge find(const Graph &g, Node u, Node v, Edge prev) { + return g.findEdge(u, v, prev); + } + }; + } + + /// \brief Finds an edge between two nodes of a graph. + /// + /// Finds an edge from node \c u to node \c v in graph \c g. + /// + /// If \c prev is \ref INVALID (this is the default value), then + /// it finds the first edge from \c u to \c v. Otherwise it looks for + /// the next edge from \c u to \c v after \c prev. + /// \return The found edge or \ref INVALID if there is no such an edge. + /// + /// Thus you can iterate through each edge from \c u to \c v as it follows. + ///\code + /// for(Edge e=findEdge(g,u,v);e!=INVALID;e=findEdge(g,u,v,e)) { + /// ... + /// } + ///\endcode + /// + ///\sa EdgeLookUp + ///\sa AllEdgeLookUp + ///\sa DynEdgeLookUp + ///\sa ConEdgeIt + template + inline typename Graph::Edge + findEdge(const Graph &g, typename Graph::Node u, typename Graph::Node v, + typename Graph::Edge prev = INVALID) { + return _graph_utils_bits::FindEdgeSelector::find(g, u, v, prev); + } + + /// \brief Iterator for iterating on edges connected the same nodes. + /// + /// Iterator for iterating on edges connected the same nodes. It is + /// higher level interface for the findEdge() function. You can + /// use it the following way: + ///\code + /// for (ConEdgeIt it(g, src, trg); it != INVALID; ++it) { + /// ... + /// } + ///\endcode + /// + ///\sa findEdge() + ///\sa EdgeLookUp + ///\sa AllEdgeLookUp + ///\sa DynEdgeLookUp + /// + /// \author Balazs Dezso + template + class ConEdgeIt : public _Graph::Edge { + public: + + typedef _Graph Graph; + typedef typename Graph::Edge Parent; + + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + + /// \brief Constructor. + /// + /// Construct a new ConEdgeIt iterating on the edges which + /// connects the \c u and \c v node. + ConEdgeIt(const Graph& g, Node u, Node v) : graph(g) { + Parent::operator=(findEdge(graph, u, v)); + } + + /// \brief Constructor. + /// + /// Construct a new ConEdgeIt which continues the iterating from + /// the \c e edge. + ConEdgeIt(const Graph& g, Edge e) : Parent(e), graph(g) {} + + /// \brief Increment operator. + /// + /// It increments the iterator and gives back the next edge. + ConEdgeIt& operator++() { + Parent::operator=(findEdge(graph, graph.source(*this), + graph.target(*this), *this)); + return *this; + } + private: + const Graph& graph; + }; + + namespace _graph_utils_bits { + + template + struct FindUEdgeSelector { + typedef typename Graph::Node Node; + typedef typename Graph::UEdge UEdge; + static UEdge find(const Graph &g, Node u, Node v, UEdge e) { + bool b; + if (u != v) { + if (e == INVALID) { + g.firstInc(e, b, u); + } else { + b = g.source(e) == u; + g.nextInc(e, b); + } + while (e != INVALID && (b ? g.target(e) : g.source(e)) != v) { + g.nextInc(e, b); + } + } else { + if (e == INVALID) { + g.firstInc(e, b, u); + } else { + b = true; + g.nextInc(e, b); + } + while (e != INVALID && (!b || g.target(e) != v)) { + g.nextInc(e, b); + } + } + return e; + } + }; + + template + struct FindUEdgeSelector< + Graph, + typename enable_if::type> + { + typedef typename Graph::Node Node; + typedef typename Graph::UEdge UEdge; + static UEdge find(const Graph &g, Node u, Node v, UEdge prev) { + return g.findUEdge(u, v, prev); + } + }; + } + + /// \brief Finds an uedge between two nodes of a graph. + /// + /// Finds an uedge from node \c u to node \c v in graph \c g. + /// If the node \c u and node \c v is equal then each loop edge + /// will be enumerated. + /// + /// If \c prev is \ref INVALID (this is the default value), then + /// it finds the first edge from \c u to \c v. Otherwise it looks for + /// the next edge from \c u to \c v after \c prev. + /// \return The found edge or \ref INVALID if there is no such an edge. + /// + /// Thus you can iterate through each edge from \c u to \c v as it follows. + ///\code + /// for(UEdge e = findUEdge(g,u,v); e != INVALID; + /// e = findUEdge(g,u,v,e)) { + /// ... + /// } + ///\endcode + /// + ///\sa ConEdgeIt + + template + inline typename Graph::UEdge + findUEdge(const Graph &g, typename Graph::Node u, typename Graph::Node v, + typename Graph::UEdge p = INVALID) { + return _graph_utils_bits::FindUEdgeSelector::find(g, u, v, p); + } + + /// \brief Iterator for iterating on uedges connected the same nodes. + /// + /// Iterator for iterating on uedges connected the same nodes. It is + /// higher level interface for the findUEdge() function. You can + /// use it the following way: + ///\code + /// for (ConUEdgeIt it(g, src, trg); it != INVALID; ++it) { + /// ... + /// } + ///\endcode + /// + ///\sa findUEdge() + /// + /// \author Balazs Dezso + template + class ConUEdgeIt : public _Graph::UEdge { + public: + + typedef _Graph Graph; + typedef typename Graph::UEdge Parent; + + typedef typename Graph::UEdge UEdge; + typedef typename Graph::Node Node; + + /// \brief Constructor. + /// + /// Construct a new ConUEdgeIt iterating on the edges which + /// connects the \c u and \c v node. + ConUEdgeIt(const Graph& g, Node u, Node v) : graph(g) { + Parent::operator=(findUEdge(graph, u, v)); + } + + /// \brief Constructor. + /// + /// Construct a new ConUEdgeIt which continues the iterating from + /// the \c e edge. + ConUEdgeIt(const Graph& g, UEdge e) : Parent(e), graph(g) {} + + /// \brief Increment operator. + /// + /// It increments the iterator and gives back the next edge. + ConUEdgeIt& operator++() { + Parent::operator=(findUEdge(graph, graph.source(*this), + graph.target(*this), *this)); + return *this; + } + private: + const Graph& graph; + }; + + /// \brief Copy a map. + /// + /// This function copies the \c from map to the \c to map. It uses the + /// given iterator to iterate on the data structure and it uses the \c ref + /// mapping to convert the from's keys to the to's keys. + template + void copyMap(To& to, const From& from, + ItemIt it, const Ref& ref) { + for (; it != INVALID; ++it) { + to[ref[it]] = from[it]; + } + } + + /// \brief Copy the from map to the to map. + /// + /// Copy the \c from map to the \c to map. It uses the given iterator + /// to iterate on the data structure. + template + void copyMap(To& to, const From& from, ItemIt it) { + for (; it != INVALID; ++it) { + to[it] = from[it]; + } + } + + namespace _graph_utils_bits { + + template + class MapCopyBase { + public: + virtual void copy(const Graph& from, const RefMap& refMap) = 0; + + virtual ~MapCopyBase() {} + }; + + template + class MapCopy : public MapCopyBase { + public: + + MapCopy(ToMap& tmap, const FromMap& map) + : _tmap(tmap), _map(map) {} + + virtual void copy(const Graph& graph, const RefMap& refMap) { + typedef typename ItemSetTraits::ItemIt ItemIt; + for (ItemIt it(graph); it != INVALID; ++it) { + _tmap.set(refMap[it], _map[it]); + } + } + + private: + ToMap& _tmap; + const FromMap& _map; + }; + + template + class ItemCopy : public MapCopyBase { + public: + + ItemCopy(It& it, const Item& item) : _it(it), _item(item) {} + + virtual void copy(const Graph&, const RefMap& refMap) { + _it = refMap[_item]; + } + + private: + It& _it; + Item _item; + }; + + template + class RefCopy : public MapCopyBase { + public: + + RefCopy(Ref& map) : _map(map) {} + + virtual void copy(const Graph& graph, const RefMap& refMap) { + typedef typename ItemSetTraits::ItemIt ItemIt; + for (ItemIt it(graph); it != INVALID; ++it) { + _map.set(it, refMap[it]); + } + } + + private: + Ref& _map; + }; + + template + class CrossRefCopy : public MapCopyBase { + public: + + CrossRefCopy(CrossRef& cmap) : _cmap(cmap) {} + + virtual void copy(const Graph& graph, const RefMap& refMap) { + typedef typename ItemSetTraits::ItemIt ItemIt; + for (ItemIt it(graph); it != INVALID; ++it) { + _cmap.set(refMap[it], it); + } + } + + private: + CrossRef& _cmap; + }; + + template + struct GraphCopySelector { + template + static void copy(Graph &to, const From& from, + NodeRefMap& nodeRefMap, EdgeRefMap& edgeRefMap) { + for (typename From::NodeIt it(from); it != INVALID; ++it) { + nodeRefMap[it] = to.addNode(); + } + for (typename From::EdgeIt it(from); it != INVALID; ++it) { + edgeRefMap[it] = to.addEdge(nodeRefMap[from.source(it)], + nodeRefMap[from.target(it)]); + } + } + }; + + template + struct GraphCopySelector< + Graph, + typename enable_if::type> + { + template + static void copy(Graph &to, const From& from, + NodeRefMap& nodeRefMap, EdgeRefMap& edgeRefMap) { + to.build(from, nodeRefMap, edgeRefMap); + } + }; + + template + struct UGraphCopySelector { + template + static void copy(UGraph &to, const From& from, + NodeRefMap& nodeRefMap, UEdgeRefMap& uEdgeRefMap) { + for (typename From::NodeIt it(from); it != INVALID; ++it) { + nodeRefMap[it] = to.addNode(); + } + for (typename From::UEdgeIt it(from); it != INVALID; ++it) { + uEdgeRefMap[it] = to.addEdge(nodeRefMap[from.source(it)], + nodeRefMap[from.target(it)]); + } + } + }; + + template + struct UGraphCopySelector< + UGraph, + typename enable_if::type> + { + template + static void copy(UGraph &to, const From& from, + NodeRefMap& nodeRefMap, UEdgeRefMap& uEdgeRefMap) { + to.build(from, nodeRefMap, uEdgeRefMap); + } + }; + + template + struct BpUGraphCopySelector { + template + static void copy(BpUGraph &to, const From& from, + ANodeRefMap& aNodeRefMap, BNodeRefMap& bNodeRefMap, + UEdgeRefMap& uEdgeRefMap) { + for (typename From::ANodeIt it(from); it != INVALID; ++it) { + aNodeRefMap[it] = to.addANode(); + } + for (typename From::BNodeIt it(from); it != INVALID; ++it) { + bNodeRefMap[it] = to.addBNode(); + } + for (typename From::UEdgeIt it(from); it != INVALID; ++it) { + uEdgeRefMap[it] = to.addEdge(aNodeRefMap[from.aNode(it)], + bNodeRefMap[from.bNode(it)]); + } + } + }; + + template + struct BpUGraphCopySelector< + BpUGraph, + typename enable_if::type> + { + template + static void copy(BpUGraph &to, const From& from, + ANodeRefMap& aNodeRefMap, BNodeRefMap& bNodeRefMap, + UEdgeRefMap& uEdgeRefMap) { + to.build(from, aNodeRefMap, bNodeRefMap, uEdgeRefMap); + } + }; + + + } + + /// \brief Class to copy a graph. + /// + /// Class to copy a graph to another graph (duplicate a graph). The + /// simplest way of using it is through the \c copyGraph() function. + template + class GraphCopy { + private: + + typedef typename From::Node Node; + typedef typename From::NodeIt NodeIt; + typedef typename From::Edge Edge; + typedef typename From::EdgeIt EdgeIt; + + typedef typename To::Node TNode; + typedef typename To::Edge TEdge; + + typedef typename From::template NodeMap NodeRefMap; + typedef typename From::template EdgeMap EdgeRefMap; + + + public: + + + /// \brief Constructor for the GraphCopy. + /// + /// It copies the content of the \c _from graph into the + /// \c _to graph. + GraphCopy(To& _to, const From& _from) + : from(_from), to(_to) {} + + /// \brief Destructor of the GraphCopy + /// + /// Destructor of the GraphCopy + ~GraphCopy() { + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + delete nodeMapCopies[i]; + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + delete edgeMapCopies[i]; + } + + } + + /// \brief Copies the node references into the given map. + /// + /// Copies the node references into the given map. + template + GraphCopy& nodeRef(NodeRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the node cross references into the given map. + /// + /// Copies the node cross references (reverse references) into + /// the given map. + template + GraphCopy& nodeCrossRef(NodeCrossRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's node type, + /// and the copied map's key type is the from graph's node + /// type. + template + GraphCopy& nodeMap(ToMap& tmap, const FromMap& map) { + nodeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given node. + /// + /// Make a copy of the given node. + GraphCopy& node(TNode& tnode, const Node& snode) { + nodeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tnode, snode)); + return *this; + } + + /// \brief Copies the edge references into the given map. + /// + /// Copies the edge references into the given map. + template + GraphCopy& edgeRef(EdgeRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the edge cross references into the given map. + /// + /// Copies the edge cross references (reverse references) into + /// the given map. + template + GraphCopy& edgeCrossRef(EdgeCrossRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's edge type, + /// and the copied map's key type is the from graph's edge + /// type. + template + GraphCopy& edgeMap(ToMap& tmap, const FromMap& map) { + edgeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given edge. + /// + /// Make a copy of the given edge. + GraphCopy& edge(TEdge& tedge, const Edge& sedge) { + edgeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tedge, sedge)); + return *this; + } + + /// \brief Executes the copies. + /// + /// Executes the copies. + void run() { + NodeRefMap nodeRefMap(from); + EdgeRefMap edgeRefMap(from); + _graph_utils_bits::GraphCopySelector:: + copy(to, from, nodeRefMap, edgeRefMap); + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + nodeMapCopies[i]->copy(from, nodeRefMap); + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + edgeMapCopies[i]->copy(from, edgeRefMap); + } + } + + protected: + + + const From& from; + To& to; + + std::vector<_graph_utils_bits::MapCopyBase* > + nodeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + edgeMapCopies; + + }; + + /// \brief Copy a graph to another graph. + /// + /// Copy a graph to another graph. + /// The usage of the function: + /// + ///\code + /// copyGraph(trg, src).nodeRef(nr).edgeCrossRef(ecr).run(); + ///\endcode + /// + /// After the copy the \c nr map will contain the mapping from the + /// nodes of the \c from graph to the nodes of the \c to graph and + /// \c ecr will contain the mapping from the edges of the \c to graph + /// to the edges of the \c from graph. + /// + /// \see GraphCopy + template + GraphCopy copyGraph(To& to, const From& from) { + return GraphCopy(to, from); + } + + /// \brief Class to copy an undirected graph. + /// + /// Class to copy an undirected graph to another graph (duplicate a graph). + /// The simplest way of using it is through the \c copyUGraph() function. + template + class UGraphCopy { + private: + + typedef typename From::Node Node; + typedef typename From::NodeIt NodeIt; + typedef typename From::Edge Edge; + typedef typename From::EdgeIt EdgeIt; + typedef typename From::UEdge UEdge; + typedef typename From::UEdgeIt UEdgeIt; + + typedef typename To::Node TNode; + typedef typename To::Edge TEdge; + typedef typename To::UEdge TUEdge; + + typedef typename From::template NodeMap NodeRefMap; + typedef typename From::template UEdgeMap UEdgeRefMap; + + struct EdgeRefMap { + EdgeRefMap(const To& _to, const From& _from, + const UEdgeRefMap& _uedge_ref, const NodeRefMap& _node_ref) + : to(_to), from(_from), + uedge_ref(_uedge_ref), node_ref(_node_ref) {} + + typedef typename From::Edge Key; + typedef typename To::Edge Value; + + Value operator[](const Key& key) const { + bool forward = + (from.direction(key) == + (node_ref[from.source(static_cast(key))] == + to.source(uedge_ref[static_cast(key)]))); + return to.direct(uedge_ref[key], forward); + } + + const To& to; + const From& from; + const UEdgeRefMap& uedge_ref; + const NodeRefMap& node_ref; + }; + + + public: + + + /// \brief Constructor for the GraphCopy. + /// + /// It copies the content of the \c _from graph into the + /// \c _to graph. + UGraphCopy(To& _to, const From& _from) + : from(_from), to(_to) {} + + /// \brief Destructor of the GraphCopy + /// + /// Destructor of the GraphCopy + ~UGraphCopy() { + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + delete nodeMapCopies[i]; + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + delete edgeMapCopies[i]; + } + for (int i = 0; i < int(uEdgeMapCopies.size()); ++i) { + delete uEdgeMapCopies[i]; + } + + } + + /// \brief Copies the node references into the given map. + /// + /// Copies the node references into the given map. + template + UGraphCopy& nodeRef(NodeRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the node cross references into the given map. + /// + /// Copies the node cross references (reverse references) into + /// the given map. + template + UGraphCopy& nodeCrossRef(NodeCrossRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's node type, + /// and the copied map's key type is the from graph's node + /// type. + template + UGraphCopy& nodeMap(ToMap& tmap, const FromMap& map) { + nodeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given node. + /// + /// Make a copy of the given node. + UGraphCopy& node(TNode& tnode, const Node& snode) { + nodeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tnode, snode)); + return *this; + } + + /// \brief Copies the edge references into the given map. + /// + /// Copies the edge references into the given map. + template + UGraphCopy& edgeRef(EdgeRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the edge cross references into the given map. + /// + /// Copies the edge cross references (reverse references) into + /// the given map. + template + UGraphCopy& edgeCrossRef(EdgeCrossRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's edge type, + /// and the copied map's key type is the from graph's edge + /// type. + template + UGraphCopy& edgeMap(ToMap& tmap, const FromMap& map) { + edgeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given edge. + /// + /// Make a copy of the given edge. + UGraphCopy& edge(TEdge& tedge, const Edge& sedge) { + edgeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tedge, sedge)); + return *this; + } + + /// \brief Copies the undirected edge references into the given map. + /// + /// Copies the undirected edge references into the given map. + template + UGraphCopy& uEdgeRef(UEdgeRef& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the undirected edge cross references into the given map. + /// + /// Copies the undirected edge cross references (reverse + /// references) into the given map. + template + UGraphCopy& uEdgeCrossRef(UEdgeCrossRef& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's undirected edge type, + /// and the copied map's key type is the from graph's undirected edge + /// type. + template + UGraphCopy& uEdgeMap(ToMap& tmap, const FromMap& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given undirected edge. + /// + /// Make a copy of the given undirected edge. + UGraphCopy& uEdge(TUEdge& tuedge, const UEdge& suedge) { + uEdgeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tuedge, suedge)); + return *this; + } + + /// \brief Executes the copies. + /// + /// Executes the copies. + void run() { + NodeRefMap nodeRefMap(from); + UEdgeRefMap uEdgeRefMap(from); + EdgeRefMap edgeRefMap(to, from, uEdgeRefMap, nodeRefMap); + _graph_utils_bits::UGraphCopySelector:: + copy(to, from, nodeRefMap, uEdgeRefMap); + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + nodeMapCopies[i]->copy(from, nodeRefMap); + } + for (int i = 0; i < int(uEdgeMapCopies.size()); ++i) { + uEdgeMapCopies[i]->copy(from, uEdgeRefMap); + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + edgeMapCopies[i]->copy(from, edgeRefMap); + } + } + + private: + + const From& from; + To& to; + + std::vector<_graph_utils_bits::MapCopyBase* > + nodeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + edgeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + uEdgeMapCopies; + + }; + + /// \brief Copy an undirected graph to another graph. + /// + /// Copy an undirected graph to another graph. + /// The usage of the function: + /// + ///\code + /// copyUGraph(trg, src).nodeRef(nr).edgeCrossRef(ecr).run(); + ///\endcode + /// + /// After the copy the \c nr map will contain the mapping from the + /// nodes of the \c from graph to the nodes of the \c to graph and + /// \c ecr will contain the mapping from the edges of the \c to graph + /// to the edges of the \c from graph. + /// + /// \see UGraphCopy + template + UGraphCopy + copyUGraph(To& to, const From& from) { + return UGraphCopy(to, from); + } + + /// \brief Class to copy a bipartite undirected graph. + /// + /// Class to copy a bipartite undirected graph to another graph + /// (duplicate a graph). The simplest way of using it is through + /// the \c copyBpUGraph() function. + template + class BpUGraphCopy { + private: + + typedef typename From::Node Node; + typedef typename From::ANode ANode; + typedef typename From::BNode BNode; + typedef typename From::NodeIt NodeIt; + typedef typename From::Edge Edge; + typedef typename From::EdgeIt EdgeIt; + typedef typename From::UEdge UEdge; + typedef typename From::UEdgeIt UEdgeIt; + + typedef typename To::Node TNode; + typedef typename To::Edge TEdge; + typedef typename To::UEdge TUEdge; + + typedef typename From::template ANodeMap ANodeRefMap; + typedef typename From::template BNodeMap BNodeRefMap; + typedef typename From::template UEdgeMap UEdgeRefMap; + + struct NodeRefMap { + NodeRefMap(const From& _from, const ANodeRefMap& _anode_ref, + const BNodeRefMap& _bnode_ref) + : from(_from), anode_ref(_anode_ref), bnode_ref(_bnode_ref) {} + + typedef typename From::Node Key; + typedef typename To::Node Value; + + Value operator[](const Key& key) const { + return from.aNode(key) ? anode_ref[key] : bnode_ref[key]; + } + + const From& from; + const ANodeRefMap& anode_ref; + const BNodeRefMap& bnode_ref; + }; + + struct EdgeRefMap { + EdgeRefMap(const To& _to, const From& _from, + const UEdgeRefMap& _uedge_ref, const NodeRefMap& _node_ref) + : to(_to), from(_from), + uedge_ref(_uedge_ref), node_ref(_node_ref) {} + + typedef typename From::Edge Key; + typedef typename To::Edge Value; + + Value operator[](const Key& key) const { + bool forward = + (from.direction(key) == + (node_ref[from.source(static_cast(key))] == + to.source(uedge_ref[static_cast(key)]))); + return to.direct(uedge_ref[key], forward); + } + + const To& to; + const From& from; + const UEdgeRefMap& uedge_ref; + const NodeRefMap& node_ref; + }; + + public: + + + /// \brief Constructor for the GraphCopy. + /// + /// It copies the content of the \c _from graph into the + /// \c _to graph. + BpUGraphCopy(To& _to, const From& _from) + : from(_from), to(_to) {} + + /// \brief Destructor of the GraphCopy + /// + /// Destructor of the GraphCopy + ~BpUGraphCopy() { + for (int i = 0; i < int(aNodeMapCopies.size()); ++i) { + delete aNodeMapCopies[i]; + } + for (int i = 0; i < int(bNodeMapCopies.size()); ++i) { + delete bNodeMapCopies[i]; + } + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + delete nodeMapCopies[i]; + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + delete edgeMapCopies[i]; + } + for (int i = 0; i < int(uEdgeMapCopies.size()); ++i) { + delete uEdgeMapCopies[i]; + } + + } + + /// \brief Copies the A-node references into the given map. + /// + /// Copies the A-node references into the given map. + template + BpUGraphCopy& aNodeRef(ANodeRef& map) { + aNodeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the A-node cross references into the given map. + /// + /// Copies the A-node cross references (reverse references) into + /// the given map. + template + BpUGraphCopy& aNodeCrossRef(ANodeCrossRef& map) { + aNodeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given A-node map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's node type, + /// and the copied map's key type is the from graph's node + /// type. + template + BpUGraphCopy& aNodeMap(ToMap& tmap, const FromMap& map) { + aNodeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Copies the B-node references into the given map. + /// + /// Copies the B-node references into the given map. + template + BpUGraphCopy& bNodeRef(BNodeRef& map) { + bNodeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the B-node cross references into the given map. + /// + /// Copies the B-node cross references (reverse references) into + /// the given map. + template + BpUGraphCopy& bNodeCrossRef(BNodeCrossRef& map) { + bNodeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given B-node map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's node type, + /// and the copied map's key type is the from graph's node + /// type. + template + BpUGraphCopy& bNodeMap(ToMap& tmap, const FromMap& map) { + bNodeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + /// \brief Copies the node references into the given map. + /// + /// Copies the node references into the given map. + template + BpUGraphCopy& nodeRef(NodeRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the node cross references into the given map. + /// + /// Copies the node cross references (reverse references) into + /// the given map. + template + BpUGraphCopy& nodeCrossRef(NodeCrossRef& map) { + nodeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's node type, + /// and the copied map's key type is the from graph's node + /// type. + template + BpUGraphCopy& nodeMap(ToMap& tmap, const FromMap& map) { + nodeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given node. + /// + /// Make a copy of the given node. + BpUGraphCopy& node(TNode& tnode, const Node& snode) { + nodeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tnode, snode)); + return *this; + } + + /// \brief Copies the edge references into the given map. + /// + /// Copies the edge references into the given map. + template + BpUGraphCopy& edgeRef(EdgeRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the edge cross references into the given map. + /// + /// Copies the edge cross references (reverse references) into + /// the given map. + template + BpUGraphCopy& edgeCrossRef(EdgeCrossRef& map) { + edgeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's edge type, + /// and the copied map's key type is the from graph's edge + /// type. + template + BpUGraphCopy& edgeMap(ToMap& tmap, const FromMap& map) { + edgeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given edge. + /// + /// Make a copy of the given edge. + BpUGraphCopy& edge(TEdge& tedge, const Edge& sedge) { + edgeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tedge, sedge)); + return *this; + } + + /// \brief Copies the undirected edge references into the given map. + /// + /// Copies the undirected edge references into the given map. + template + BpUGraphCopy& uEdgeRef(UEdgeRef& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::RefCopy(map)); + return *this; + } + + /// \brief Copies the undirected edge cross references into the given map. + /// + /// Copies the undirected edge cross references (reverse + /// references) into the given map. + template + BpUGraphCopy& uEdgeCrossRef(UEdgeCrossRef& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::CrossRefCopy(map)); + return *this; + } + + /// \brief Make copy of the given map. + /// + /// Makes copy of the given map for the newly created graph. + /// The new map's key type is the to graph's undirected edge type, + /// and the copied map's key type is the from graph's undirected edge + /// type. + template + BpUGraphCopy& uEdgeMap(ToMap& tmap, const FromMap& map) { + uEdgeMapCopies.push_back(new _graph_utils_bits::MapCopy(tmap, map)); + return *this; + } + + /// \brief Make a copy of the given undirected edge. + /// + /// Make a copy of the given undirected edge. + BpUGraphCopy& uEdge(TUEdge& tuedge, const UEdge& suedge) { + uEdgeMapCopies.push_back(new _graph_utils_bits::ItemCopy(tuedge, suedge)); + return *this; + } + + /// \brief Executes the copies. + /// + /// Executes the copies. + void run() { + ANodeRefMap aNodeRefMap(from); + BNodeRefMap bNodeRefMap(from); + NodeRefMap nodeRefMap(from, aNodeRefMap, bNodeRefMap); + UEdgeRefMap uEdgeRefMap(from); + EdgeRefMap edgeRefMap(to, from, uEdgeRefMap, nodeRefMap); + _graph_utils_bits::BpUGraphCopySelector:: + copy(to, from, aNodeRefMap, bNodeRefMap, uEdgeRefMap); + for (int i = 0; i < int(aNodeMapCopies.size()); ++i) { + aNodeMapCopies[i]->copy(from, aNodeRefMap); + } + for (int i = 0; i < int(bNodeMapCopies.size()); ++i) { + bNodeMapCopies[i]->copy(from, bNodeRefMap); + } + for (int i = 0; i < int(nodeMapCopies.size()); ++i) { + nodeMapCopies[i]->copy(from, nodeRefMap); + } + for (int i = 0; i < int(uEdgeMapCopies.size()); ++i) { + uEdgeMapCopies[i]->copy(from, uEdgeRefMap); + } + for (int i = 0; i < int(edgeMapCopies.size()); ++i) { + edgeMapCopies[i]->copy(from, edgeRefMap); + } + } + + private: + + const From& from; + To& to; + + std::vector<_graph_utils_bits::MapCopyBase* > + aNodeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + bNodeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + nodeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + edgeMapCopies; + + std::vector<_graph_utils_bits::MapCopyBase* > + uEdgeMapCopies; + + }; + + /// \brief Copy a bipartite undirected graph to another graph. + /// + /// Copy a bipartite undirected graph to another graph. + /// The usage of the function: + /// + ///\code + /// copyBpUGraph(trg, src).aNodeRef(anr).edgeCrossRef(ecr).run(); + ///\endcode + /// + /// After the copy the \c nr map will contain the mapping from the + /// nodes of the \c from graph to the nodes of the \c to graph and + /// \c ecr will contain the mapping from the edges of the \c to graph + /// to the edges of the \c from graph. + /// + /// \see BpUGraphCopy + template + BpUGraphCopy + copyBpUGraph(To& to, const From& from) { + return BpUGraphCopy(to, from); + } + + + /// @} + + /// \addtogroup graph_maps + /// @{ + + /// Provides an immutable and unique id for each item in the graph. + + /// The IdMap class provides a unique and immutable id for each item of the + /// same type (e.g. node) in the graph. This id is
  • \b unique: + /// different items (nodes) get different ids
  • \b immutable: the id of an + /// item (node) does not change (even if you delete other nodes).
+ /// Through this map you get access (i.e. can read) the inner id values of + /// the items stored in the graph. This map can be inverted with its member + /// class \c InverseMap. + /// + template + class IdMap { + public: + typedef _Graph Graph; + typedef int Value; + typedef _Item Item; + typedef _Item Key; + + /// \brief Constructor. + /// + /// Constructor of the map. + explicit IdMap(const Graph& _graph) : graph(&_graph) {} + + /// \brief Gives back the \e id of the item. + /// + /// Gives back the immutable and unique \e id of the item. + int operator[](const Item& item) const { return graph->id(item);} + + /// \brief Gives back the item by its id. + /// + /// Gives back the item by its id. + Item operator()(int id) { return graph->fromId(id, Item()); } + + private: + const Graph* graph; + + public: + + /// \brief The class represents the inverse of its owner (IdMap). + /// + /// The class represents the inverse of its owner (IdMap). + /// \see inverse() + class InverseMap { + public: + + /// \brief Constructor. + /// + /// Constructor for creating an id-to-item map. + explicit InverseMap(const Graph& _graph) : graph(&_graph) {} + + /// \brief Constructor. + /// + /// Constructor for creating an id-to-item map. + explicit InverseMap(const IdMap& idMap) : graph(idMap.graph) {} + + /// \brief Gives back the given item from its id. + /// + /// Gives back the given item from its id. + /// + Item operator[](int id) const { return graph->fromId(id, Item());} + + private: + const Graph* graph; + }; + + /// \brief Gives back the inverse of the map. + /// + /// Gives back the inverse of the IdMap. + InverseMap inverse() const { return InverseMap(*graph);} + + }; + + + /// \brief General invertable graph-map type. + + /// This type provides simple invertable graph-maps. + /// The InvertableMap wraps an arbitrary ReadWriteMap + /// and if a key is set to a new value then store it + /// in the inverse map. + /// + /// The values of the map can be accessed + /// with stl compatible forward iterator. + /// + /// \param _Graph The graph type. + /// \param _Item The item type of the graph. + /// \param _Value The value type of the map. + /// + /// \see IterableValueMap + template + class InvertableMap : protected DefaultMap<_Graph, _Item, _Value> { + private: + + typedef DefaultMap<_Graph, _Item, _Value> Map; + typedef _Graph Graph; + + typedef std::map<_Value, _Item> Container; + Container invMap; + + public: + + /// The key type of InvertableMap (Node, Edge, UEdge). + typedef typename Map::Key Key; + /// The value type of the InvertableMap. + typedef typename Map::Value Value; + + + + /// \brief Constructor. + /// + /// Construct a new InvertableMap for the graph. + /// + explicit InvertableMap(const Graph& graph) : Map(graph) {} + + /// \brief Forward iterator for values. + /// + /// This iterator is an stl compatible forward + /// iterator on the values of the map. The values can + /// be accessed in the [beginValue, endValue) range. + /// + class ValueIterator + : public std::iterator { + friend class InvertableMap; + private: + ValueIterator(typename Container::const_iterator _it) + : it(_it) {} + public: + + ValueIterator() {} + + ValueIterator& operator++() { ++it; return *this; } + ValueIterator operator++(int) { + ValueIterator tmp(*this); + operator++(); + return tmp; + } + + const Value& operator*() const { return it->first; } + const Value* operator->() const { return &(it->first); } + + bool operator==(ValueIterator jt) const { return it == jt.it; } + bool operator!=(ValueIterator jt) const { return it != jt.it; } + + private: + typename Container::const_iterator it; + }; + + /// \brief Returns an iterator to the first value. + /// + /// Returns an stl compatible iterator to the + /// first value of the map. The values of the + /// map can be accessed in the [beginValue, endValue) + /// range. + ValueIterator beginValue() const { + return ValueIterator(invMap.begin()); + } + + /// \brief Returns an iterator after the last value. + /// + /// Returns an stl compatible iterator after the + /// last value of the map. The values of the + /// map can be accessed in the [beginValue, endValue) + /// range. + ValueIterator endValue() const { + return ValueIterator(invMap.end()); + } + + /// \brief The setter function of the map. + /// + /// Sets the mapped value. + void set(const Key& key, const Value& val) { + Value oldval = Map::operator[](key); + typename Container::iterator it = invMap.find(oldval); + if (it != invMap.end() && it->second == key) { + invMap.erase(it); + } + invMap.insert(make_pair(val, key)); + Map::set(key, val); + } + + /// \brief The getter function of the map. + /// + /// It gives back the value associated with the key. + typename MapTraits::ConstReturnValue + operator[](const Key& key) const { + return Map::operator[](key); + } + + /// \brief Gives back the item by its value. + /// + /// Gives back the item by its value. + Key operator()(const Value& key) const { + typename Container::const_iterator it = invMap.find(key); + return it != invMap.end() ? it->second : INVALID; + } + + protected: + + /// \brief Erase the key from the map. + /// + /// Erase the key to the map. It is called by the + /// \c AlterationNotifier. + virtual void erase(const Key& key) { + Value val = Map::operator[](key); + typename Container::iterator it = invMap.find(val); + if (it != invMap.end() && it->second == key) { + invMap.erase(it); + } + Map::erase(key); + } + + /// \brief Erase more keys from the map. + /// + /// Erase more keys from the map. It is called by the + /// \c AlterationNotifier. + virtual void erase(const std::vector& keys) { + for (int i = 0; i < int(keys.size()); ++i) { + Value val = Map::operator[](keys[i]); + typename Container::iterator it = invMap.find(val); + if (it != invMap.end() && it->second == keys[i]) { + invMap.erase(it); + } + } + Map::erase(keys); + } + + /// \brief Clear the keys from the map and inverse map. + /// + /// Clear the keys from the map and inverse map. It is called by the + /// \c AlterationNotifier. + virtual void clear() { + invMap.clear(); + Map::clear(); + } + + public: + + /// \brief The inverse map type. + /// + /// The inverse of this map. The subscript operator of the map + /// gives back always the item what was last assigned to the value. + class InverseMap { + public: + /// \brief Constructor of the InverseMap. + /// + /// Constructor of the InverseMap. + explicit InverseMap(const InvertableMap& _inverted) + : inverted(_inverted) {} + + /// The value type of the InverseMap. + typedef typename InvertableMap::Key Value; + /// The key type of the InverseMap. + typedef typename InvertableMap::Value Key; + + /// \brief Subscript operator. + /// + /// Subscript operator. It gives back always the item + /// what was last assigned to the value. + Value operator[](const Key& key) const { + return inverted(key); + } + + private: + const InvertableMap& inverted; + }; + + /// \brief It gives back the just readable inverse map. + /// + /// It gives back the just readable inverse map. + InverseMap inverse() const { + return InverseMap(*this); + } + + + + }; + + /// \brief Provides a mutable, continuous and unique descriptor for each + /// item in the graph. + /// + /// The DescriptorMap class provides a unique and continuous (but mutable) + /// descriptor (id) for each item of the same type (e.g. node) in the + /// graph. This id is
  • \b unique: different items (nodes) get + /// different ids
  • \b continuous: the range of the ids is the set of + /// integers between 0 and \c n-1, where \c n is the number of the items of + /// this type (e.g. nodes) (so the id of a node can change if you delete an + /// other node, i.e. this id is mutable).
This map can be inverted + /// with its member class \c InverseMap. + /// + /// \param _Graph The graph class the \c DescriptorMap belongs to. + /// \param _Item The Item is the Key of the Map. It may be Node, Edge or + /// UEdge. + template + class DescriptorMap : protected DefaultMap<_Graph, _Item, int> { + + typedef _Item Item; + typedef DefaultMap<_Graph, _Item, int> Map; + + public: + /// The graph class of DescriptorMap. + typedef _Graph Graph; + + /// The key type of DescriptorMap (Node, Edge, UEdge). + typedef typename Map::Key Key; + /// The value type of DescriptorMap. + typedef typename Map::Value Value; + + /// \brief Constructor. + /// + /// Constructor for descriptor map. + explicit DescriptorMap(const Graph& _graph) : Map(_graph) { + Item it; + const typename Map::Notifier* nf = Map::notifier(); + for (nf->first(it); it != INVALID; nf->next(it)) { + Map::set(it, invMap.size()); + invMap.push_back(it); + } + } + + protected: + + /// \brief Add a new key to the map. + /// + /// Add a new key to the map. It is called by the + /// \c AlterationNotifier. + virtual void add(const Item& item) { + Map::add(item); + Map::set(item, invMap.size()); + invMap.push_back(item); + } + + /// \brief Add more new keys to the map. + /// + /// Add more new keys to the map. It is called by the + /// \c AlterationNotifier. + virtual void add(const std::vector& items) { + Map::add(items); + for (int i = 0; i < int(items.size()); ++i) { + Map::set(items[i], invMap.size()); + invMap.push_back(items[i]); + } + } + + /// \brief Erase the key from the map. + /// + /// Erase the key from the map. It is called by the + /// \c AlterationNotifier. + virtual void erase(const Item& item) { + Map::set(invMap.back(), Map::operator[](item)); + invMap[Map::operator[](item)] = invMap.back(); + invMap.pop_back(); + Map::erase(item); + } + + /// \brief Erase more keys from the map. + /// + /// Erase more keys from the map. It is called by the + /// \c AlterationNotifier. + virtual void erase(const std::vector& items) { + for (int i = 0; i < int(items.size()); ++i) { + Map::set(invMap.back(), Map::operator[](items[i])); + invMap[Map::operator[](items[i])] = invMap.back(); + invMap.pop_back(); + } + Map::erase(items); + } + + /// \brief Build the unique map. + /// + /// Build the unique map. It is called by the + /// \c AlterationNotifier. + virtual void build() { + Map::build(); + Item it; + const typename Map::Notifier* nf = Map::notifier(); + for (nf->first(it); it != INVALID; nf->next(it)) { + Map::set(it, invMap.size()); + invMap.push_back(it); + } + } + + /// \brief Clear the keys from the map. + /// + /// Clear the keys from the map. It is called by the + /// \c AlterationNotifier. + virtual void clear() { + invMap.clear(); + Map::clear(); + } + + public: + + /// \brief Returns the maximal value plus one. + /// + /// Returns the maximal value plus one in the map. + unsigned int size() const { + return invMap.size(); + } + + /// \brief Swaps the position of the two items in the map. + /// + /// Swaps the position of the two items in the map. + void swap(const Item& p, const Item& q) { + int pi = Map::operator[](p); + int qi = Map::operator[](q); + Map::set(p, qi); + invMap[qi] = p; + Map::set(q, pi); + invMap[pi] = q; + } + + /// \brief Gives back the \e descriptor of the item. + /// + /// Gives back the mutable and unique \e descriptor of the map. + int operator[](const Item& item) const { + return Map::operator[](item); + } + + /// \brief Gives back the item by its descriptor. + /// + /// Gives back th item by its descriptor. + Item operator()(int id) const { + return invMap[id]; + } + + private: + + typedef std::vector Container; + Container invMap; + + public: + /// \brief The inverse map type of DescriptorMap. + /// + /// The inverse map type of DescriptorMap. + class InverseMap { + public: + /// \brief Constructor of the InverseMap. + /// + /// Constructor of the InverseMap. + explicit InverseMap(const DescriptorMap& _inverted) + : inverted(_inverted) {} + + + /// The value type of the InverseMap. + typedef typename DescriptorMap::Key Value; + /// The key type of the InverseMap. + typedef typename DescriptorMap::Value Key; + + /// \brief Subscript operator. + /// + /// Subscript operator. It gives back the item + /// that the descriptor belongs to currently. + Value operator[](const Key& key) const { + return inverted(key); + } + + /// \brief Size of the map. + /// + /// Returns the size of the map. + unsigned int size() const { + return inverted.size(); + } + + private: + const DescriptorMap& inverted; + }; + + /// \brief Gives back the inverse of the map. + /// + /// Gives back the inverse of the map. + const InverseMap inverse() const { + return InverseMap(*this); + } + }; + + /// \brief Returns the source of the given edge. + /// + /// The SourceMap gives back the source Node of the given edge. + /// \see TargetMap + /// \author Balazs Dezso + template + class SourceMap { + public: + + typedef typename Graph::Node Value; + typedef typename Graph::Edge Key; + + /// \brief Constructor + /// + /// Constructor + /// \param _graph The graph that the map belongs to. + explicit SourceMap(const Graph& _graph) : graph(_graph) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + /// \param edge The edge + /// \return The source of the edge + Value operator[](const Key& edge) const { + return graph.source(edge); + } + + private: + const Graph& graph; + }; + + /// \brief Returns a \ref SourceMap class. + /// + /// This function just returns an \ref SourceMap class. + /// \relates SourceMap + template + inline SourceMap sourceMap(const Graph& graph) { + return SourceMap(graph); + } + + /// \brief Returns the target of the given edge. + /// + /// The TargetMap gives back the target Node of the given edge. + /// \see SourceMap + /// \author Balazs Dezso + template + class TargetMap { + public: + + typedef typename Graph::Node Value; + typedef typename Graph::Edge Key; + + /// \brief Constructor + /// + /// Constructor + /// \param _graph The graph that the map belongs to. + explicit TargetMap(const Graph& _graph) : graph(_graph) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + /// \param e The edge + /// \return The target of the edge + Value operator[](const Key& e) const { + return graph.target(e); + } + + private: + const Graph& graph; + }; + + /// \brief Returns a \ref TargetMap class. + /// + /// This function just returns a \ref TargetMap class. + /// \relates TargetMap + template + inline TargetMap targetMap(const Graph& graph) { + return TargetMap(graph); + } + + /// \brief Returns the "forward" directed edge view of an undirected edge. + /// + /// Returns the "forward" directed edge view of an undirected edge. + /// \see BackwardMap + /// \author Balazs Dezso + template + class ForwardMap { + public: + + typedef typename Graph::Edge Value; + typedef typename Graph::UEdge Key; + + /// \brief Constructor + /// + /// Constructor + /// \param _graph The graph that the map belongs to. + explicit ForwardMap(const Graph& _graph) : graph(_graph) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + /// \param key An undirected edge + /// \return The "forward" directed edge view of undirected edge + Value operator[](const Key& key) const { + return graph.direct(key, true); + } + + private: + const Graph& graph; + }; + + /// \brief Returns a \ref ForwardMap class. + /// + /// This function just returns an \ref ForwardMap class. + /// \relates ForwardMap + template + inline ForwardMap forwardMap(const Graph& graph) { + return ForwardMap(graph); + } + + /// \brief Returns the "backward" directed edge view of an undirected edge. + /// + /// Returns the "backward" directed edge view of an undirected edge. + /// \see ForwardMap + /// \author Balazs Dezso + template + class BackwardMap { + public: + + typedef typename Graph::Edge Value; + typedef typename Graph::UEdge Key; + + /// \brief Constructor + /// + /// Constructor + /// \param _graph The graph that the map belongs to. + explicit BackwardMap(const Graph& _graph) : graph(_graph) {} + + /// \brief The subscript operator. + /// + /// The subscript operator. + /// \param key An undirected edge + /// \return The "backward" directed edge view of undirected edge + Value operator[](const Key& key) const { + return graph.direct(key, false); + } + + private: + const Graph& graph; + }; + + /// \brief Returns a \ref BackwardMap class + + /// This function just returns a \ref BackwardMap class. + /// \relates BackwardMap + template + inline BackwardMap backwardMap(const Graph& graph) { + return BackwardMap(graph); + } + + /// \brief Potential difference map + /// + /// If there is an potential map on the nodes then we + /// can get an edge map as we get the substraction of the + /// values of the target and source. + template + class PotentialDifferenceMap { + public: + typedef typename Graph::Edge Key; + typedef typename NodeMap::Value Value; + + /// \brief Constructor + /// + /// Contructor of the map + explicit PotentialDifferenceMap(const Graph& _graph, + const NodeMap& _potential) + : graph(_graph), potential(_potential) {} + + /// \brief Const subscription operator + /// + /// Const subscription operator + Value operator[](const Key& edge) const { + return potential[graph.target(edge)] - potential[graph.source(edge)]; + } + + private: + const Graph& graph; + const NodeMap& potential; + }; + + /// \brief Returns a PotentialDifferenceMap. + /// + /// This function just returns a PotentialDifferenceMap. + /// \relates PotentialDifferenceMap + template + PotentialDifferenceMap + potentialDifferenceMap(const Graph& graph, const NodeMap& potential) { + return PotentialDifferenceMap(graph, potential); + } + + /// \brief Map of the node in-degrees. + /// + /// This map returns the in-degree of a node. Once it is constructed, + /// the degrees are stored in a standard NodeMap, so each query is done + /// in constant time. On the other hand, the values are updated automatically + /// whenever the graph changes. + /// + /// \warning Besides addNode() and addEdge(), a graph structure may provide + /// alternative ways to modify the graph. The correct behavior of InDegMap + /// is not guarantied if these additional features are used. For example + /// the functions \ref ListGraph::changeSource() "changeSource()", + /// \ref ListGraph::changeTarget() "changeTarget()" and + /// \ref ListGraph::reverseEdge() "reverseEdge()" + /// of \ref ListGraph will \e not update the degree values correctly. + /// + /// \sa OutDegMap + + template + class InDegMap + : protected ItemSetTraits<_Graph, typename _Graph::Edge> + ::ItemNotifier::ObserverBase { + + public: + + typedef _Graph Graph; + typedef int Value; + typedef typename Graph::Node Key; + + typedef typename ItemSetTraits<_Graph, typename _Graph::Edge> + ::ItemNotifier::ObserverBase Parent; + + private: + + class AutoNodeMap : public DefaultMap<_Graph, Key, int> { + public: + + typedef DefaultMap<_Graph, Key, int> Parent; + typedef typename Parent::Graph Graph; + + AutoNodeMap(const Graph& graph) : Parent(graph, 0) {} + + virtual void add(const Key& key) { + Parent::add(key); + Parent::set(key, 0); + } + + virtual void add(const std::vector& keys) { + Parent::add(keys); + for (int i = 0; i < int(keys.size()); ++i) { + Parent::set(keys[i], 0); + } + } + + virtual void build() { + Parent::build(); + Key it; + typename Parent::Notifier* nf = Parent::notifier(); + for (nf->first(it); it != INVALID; nf->next(it)) { + Parent::set(it, 0); + } + } + }; + + public: + + /// \brief Constructor. + /// + /// Constructor for creating in-degree map. + explicit InDegMap(const Graph& _graph) : graph(_graph), deg(_graph) { + Parent::attach(graph.notifier(typename _Graph::Edge())); + + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = countInEdges(graph, it); + } + } + + /// Gives back the in-degree of a Node. + int operator[](const Key& key) const { + return deg[key]; + } + + protected: + + typedef typename Graph::Edge Edge; + + virtual void add(const Edge& edge) { + ++deg[graph.target(edge)]; + } + + virtual void add(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + ++deg[graph.target(edges[i])]; + } + } + + virtual void erase(const Edge& edge) { + --deg[graph.target(edge)]; + } + + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + --deg[graph.target(edges[i])]; + } + } + + virtual void build() { + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = countInEdges(graph, it); + } + } + + virtual void clear() { + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = 0; + } + } + private: + + const _Graph& graph; + AutoNodeMap deg; + }; + + /// \brief Map of the node out-degrees. + /// + /// This map returns the out-degree of a node. Once it is constructed, + /// the degrees are stored in a standard NodeMap, so each query is done + /// in constant time. On the other hand, the values are updated automatically + /// whenever the graph changes. + /// + /// \warning Besides addNode() and addEdge(), a graph structure may provide + /// alternative ways to modify the graph. The correct behavior of OutDegMap + /// is not guarantied if these additional features are used. For example + /// the functions \ref ListGraph::changeSource() "changeSource()", + /// \ref ListGraph::changeTarget() "changeTarget()" and + /// \ref ListGraph::reverseEdge() "reverseEdge()" + /// of \ref ListGraph will \e not update the degree values correctly. + /// + /// \sa InDegMap + + template + class OutDegMap + : protected ItemSetTraits<_Graph, typename _Graph::Edge> + ::ItemNotifier::ObserverBase { + + public: + + typedef typename ItemSetTraits<_Graph, typename _Graph::Edge> + ::ItemNotifier::ObserverBase Parent; + + typedef _Graph Graph; + typedef int Value; + typedef typename Graph::Node Key; + + private: + + class AutoNodeMap : public DefaultMap<_Graph, Key, int> { + public: + + typedef DefaultMap<_Graph, Key, int> Parent; + typedef typename Parent::Graph Graph; + + AutoNodeMap(const Graph& graph) : Parent(graph, 0) {} + + virtual void add(const Key& key) { + Parent::add(key); + Parent::set(key, 0); + } + virtual void add(const std::vector& keys) { + Parent::add(keys); + for (int i = 0; i < int(keys.size()); ++i) { + Parent::set(keys[i], 0); + } + } + virtual void build() { + Parent::build(); + Key it; + typename Parent::Notifier* nf = Parent::notifier(); + for (nf->first(it); it != INVALID; nf->next(it)) { + Parent::set(it, 0); + } + } + }; + + public: + + /// \brief Constructor. + /// + /// Constructor for creating out-degree map. + explicit OutDegMap(const Graph& _graph) : graph(_graph), deg(_graph) { + Parent::attach(graph.notifier(typename _Graph::Edge())); + + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = countOutEdges(graph, it); + } + } + + /// Gives back the out-degree of a Node. + int operator[](const Key& key) const { + return deg[key]; + } + + protected: + + typedef typename Graph::Edge Edge; + + virtual void add(const Edge& edge) { + ++deg[graph.source(edge)]; + } + + virtual void add(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + ++deg[graph.source(edges[i])]; + } + } + + virtual void erase(const Edge& edge) { + --deg[graph.source(edge)]; + } + + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + --deg[graph.source(edges[i])]; + } + } + + virtual void build() { + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = countOutEdges(graph, it); + } + } + + virtual void clear() { + for(typename _Graph::NodeIt it(graph); it != INVALID; ++it) { + deg[it] = 0; + } + } + private: + + const _Graph& graph; + AutoNodeMap deg; + }; + + + ///Dynamic edge look up between given endpoints. + + ///\ingroup gutils + ///Using this class, you can find an edge in a graph from a given + ///source to a given target in amortized time O(log d), + ///where d is the out-degree of the source node. + /// + ///It is possible to find \e all parallel edges between two nodes with + ///the \c findFirst() and \c findNext() members. + /// + ///See the \ref EdgeLookUp and \ref AllEdgeLookUp classes if your + ///graph do not changed so frequently. + /// + ///This class uses a self-adjusting binary search tree, Sleator's + ///and Tarjan's Splay tree for guarantee the logarithmic amortized + ///time bound for edge lookups. This class also guarantees the + ///optimal time bound in a constant factor for any distribution of + ///queries. + /// + ///\param G The type of the underlying graph. + /// + ///\sa EdgeLookUp + ///\sa AllEdgeLookUp + template + class DynEdgeLookUp + : protected ItemSetTraits::ItemNotifier::ObserverBase + { + public: + typedef typename ItemSetTraits + ::ItemNotifier::ObserverBase Parent; + + GRAPH_TYPEDEFS(typename G); + typedef G Graph; + + protected: + + class AutoNodeMap : public DefaultMap { + public: + + typedef DefaultMap Parent; + + AutoNodeMap(const G& graph) : Parent(graph, INVALID) {} + + virtual void add(const Node& node) { + Parent::add(node); + Parent::set(node, INVALID); + } + + virtual void add(const std::vector& nodes) { + Parent::add(nodes); + for (int i = 0; i < int(nodes.size()); ++i) { + Parent::set(nodes[i], INVALID); + } + } + + virtual void build() { + Parent::build(); + Node it; + typename Parent::Notifier* nf = Parent::notifier(); + for (nf->first(it); it != INVALID; nf->next(it)) { + Parent::set(it, INVALID); + } + } + }; + + const Graph &_g; + AutoNodeMap _head; + typename Graph::template EdgeMap _parent; + typename Graph::template EdgeMap _left; + typename Graph::template EdgeMap _right; + + class EdgeLess { + const Graph &g; + public: + EdgeLess(const Graph &_g) : g(_g) {} + bool operator()(Edge a,Edge b) const + { + return g.target(a)& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + insert(edges[i]); + } + } + + virtual void erase(const Edge& edge) { + remove(edge); + } + + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + remove(edges[i]); + } + } + + virtual void build() { + refresh(); + } + + virtual void clear() { + for(NodeIt n(_g);n!=INVALID;++n) { + _head.set(n, INVALID); + } + } + + void insert(Edge edge) { + Node s = _g.source(edge); + Node t = _g.target(edge); + _left.set(edge, INVALID); + _right.set(edge, INVALID); + + Edge e = _head[s]; + if (e == INVALID) { + _head.set(s, edge); + _parent.set(edge, INVALID); + return; + } + while (true) { + if (t < _g.target(e)) { + if (_left[e] == INVALID) { + _left.set(e, edge); + _parent.set(edge, e); + splay(edge); + return; + } else { + e = _left[e]; + } + } else { + if (_right[e] == INVALID) { + _right.set(e, edge); + _parent.set(edge, e); + splay(edge); + return; + } else { + e = _right[e]; + } + } + } + } + + void remove(Edge edge) { + if (_left[edge] == INVALID) { + if (_right[edge] != INVALID) { + _parent.set(_right[edge], _parent[edge]); + } + if (_parent[edge] != INVALID) { + if (_left[_parent[edge]] == edge) { + _left.set(_parent[edge], _right[edge]); + } else { + _right.set(_parent[edge], _right[edge]); + } + } else { + _head.set(_g.source(edge), _right[edge]); + } + } else if (_right[edge] == INVALID) { + _parent.set(_left[edge], _parent[edge]); + if (_parent[edge] != INVALID) { + if (_left[_parent[edge]] == edge) { + _left.set(_parent[edge], _left[edge]); + } else { + _right.set(_parent[edge], _left[edge]); + } + } else { + _head.set(_g.source(edge), _left[edge]); + } + } else { + Edge e = _left[edge]; + if (_right[e] != INVALID) { + e = _right[e]; + while (_right[e] != INVALID) { + e = _right[e]; + } + Edge s = _parent[e]; + _right.set(_parent[e], _left[e]); + if (_left[e] != INVALID) { + _parent.set(_left[e], _parent[e]); + } + + _left.set(e, _left[edge]); + _parent.set(_left[edge], e); + _right.set(e, _right[edge]); + _parent.set(_right[edge], e); + + _parent.set(e, _parent[edge]); + if (_parent[edge] != INVALID) { + if (_left[_parent[edge]] == edge) { + _left.set(_parent[edge], e); + } else { + _right.set(_parent[edge], e); + } + } + splay(s); + } else { + _right.set(e, _right[edge]); + _parent.set(_right[edge], e); + + if (_parent[edge] != INVALID) { + if (_left[_parent[edge]] == edge) { + _left.set(_parent[edge], e); + } else { + _right.set(_parent[edge], e); + } + } else { + _head.set(_g.source(edge), e); + } + } + } + } + + Edge refreshRec(std::vector &v,int a,int b) + { + int m=(a+b)/2; + Edge me=v[m]; + if (a < m) { + Edge left = refreshRec(v,a,m-1); + _left.set(me, left); + _parent.set(left, me); + } else { + _left.set(me, INVALID); + } + if (m < b) { + Edge right = refreshRec(v,m+1,b); + _right.set(me, right); + _parent.set(right, me); + } else { + _right.set(me, INVALID); + } + return me; + } + + void refresh() { + for(NodeIt n(_g);n!=INVALID;++n) { + std::vector v; + for(OutEdgeIt e(_g,n);e!=INVALID;++e) v.push_back(e); + if(v.size()) { + std::sort(v.begin(),v.end(),EdgeLess(_g)); + Edge head = refreshRec(v,0,v.size()-1); + _head.set(n, head); + _parent.set(head, INVALID); + } + else _head.set(n, INVALID); + } + } + + void zig(Edge v) { + Edge w = _parent[v]; + _parent.set(v, _parent[w]); + _parent.set(w, v); + _left.set(w, _right[v]); + _right.set(v, w); + if (_parent[v] != INVALID) { + if (_right[_parent[v]] == w) { + _right.set(_parent[v], v); + } else { + _left.set(_parent[v], v); + } + } + if (_left[w] != INVALID){ + _parent.set(_left[w], w); + } + } + + void zag(Edge v) { + Edge w = _parent[v]; + _parent.set(v, _parent[w]); + _parent.set(w, v); + _right.set(w, _left[v]); + _left.set(v, w); + if (_parent[v] != INVALID){ + if (_left[_parent[v]] == w) { + _left.set(_parent[v], v); + } else { + _right.set(_parent[v], v); + } + } + if (_right[w] != INVALID){ + _parent.set(_right[w], w); + } + } + + void splay(Edge v) { + while (_parent[v] != INVALID) { + if (v == _left[_parent[v]]) { + if (_parent[_parent[v]] == INVALID) { + zig(v); + } else { + if (_parent[v] == _left[_parent[_parent[v]]]) { + zig(_parent[v]); + zig(v); + } else { + zig(v); + zag(v); + } + } + } else { + if (_parent[_parent[v]] == INVALID) { + zag(v); + } else { + if (_parent[v] == _left[_parent[_parent[v]]]) { + zag(v); + zig(v); + } else { + zag(_parent[v]); + zag(v); + } + } + } + } + _head[_g.source(v)] = v; + } + + + public: + + ///Find an edge between two nodes. + + ///Find an edge between two nodes in time O(logd), where + /// d is the number of outgoing edges of \c s. + ///\param s The source node + ///\param t The target node + ///\return An edge from \c s to \c t if there exists, + ///\ref INVALID otherwise. + Edge operator()(Node s, Node t) const + { + Edge e = _head[s]; + while (true) { + if (_g.target(e) == t) { + const_cast(*this).splay(e); + return e; + } else if (t < _g.target(e)) { + if (_left[e] == INVALID) { + const_cast(*this).splay(e); + return INVALID; + } else { + e = _left[e]; + } + } else { + if (_right[e] == INVALID) { + const_cast(*this).splay(e); + return INVALID; + } else { + e = _right[e]; + } + } + } + } + + ///Find the first edge between two nodes. + + ///Find the first edge between two nodes in time + /// O(logd), where d is the number of + /// outgoing edges of \c s. + ///\param s The source node + ///\param t The target node + ///\return An edge from \c s to \c t if there exists, \ref INVALID + /// otherwise. + Edge findFirst(Node s, Node t) const + { + Edge e = _head[s]; + Edge r = INVALID; + while (true) { + if (_g.target(e) < t) { + if (_right[e] == INVALID) { + const_cast(*this).splay(e); + return r; + } else { + e = _right[e]; + } + } else { + if (_g.target(e) == t) { + r = e; + } + if (_left[e] == INVALID) { + const_cast(*this).splay(e); + return r; + } else { + e = _left[e]; + } + } + } + } + + ///Find the next edge between two nodes. + + ///Find the next edge between two nodes in time + /// O(logd), where d is the number of + /// outgoing edges of \c s. + ///\param s The source node + ///\param t The target node + ///\return An edge from \c s to \c t if there exists, \ref INVALID + /// otherwise. + + ///\note If \c e is not the result of the previous \c findFirst() + ///operation then the amorized time bound can not be guaranteed. +#ifdef DOXYGEN + Edge findNext(Node s, Node t, Edge e) const +#else + Edge findNext(Node, Node t, Edge e) const +#endif + { + if (_right[e] != INVALID) { + e = _right[e]; + while (_left[e] != INVALID) { + e = _left[e]; + } + const_cast(*this).splay(e); + } else { + while (_parent[e] != INVALID && _right[_parent[e]] == e) { + e = _parent[e]; + } + if (_parent[e] == INVALID) { + return INVALID; + } else { + e = _parent[e]; + const_cast(*this).splay(e); + } + } + if (_g.target(e) == t) return e; + else return INVALID; + } + + }; + + ///Fast edge look up between given endpoints. + + ///\ingroup gutils + ///Using this class, you can find an edge in a graph from a given + ///source to a given target in time O(log d), + ///where d is the out-degree of the source node. + /// + ///It is not possible to find \e all parallel edges between two nodes. + ///Use \ref AllEdgeLookUp for this purpose. + /// + ///\warning This class is static, so you should refresh() (or at least + ///refresh(Node)) this data structure + ///whenever the graph changes. This is a time consuming (superlinearly + ///proportional (O(mlogm)) to the number of edges). + /// + ///\param G The type of the underlying graph. + /// + ///\sa DynEdgeLookUp + ///\sa AllEdgeLookUp + template + class EdgeLookUp + { + public: + GRAPH_TYPEDEFS(typename G); + typedef G Graph; + + protected: + const Graph &_g; + typename Graph::template NodeMap _head; + typename Graph::template EdgeMap _left; + typename Graph::template EdgeMap _right; + + class EdgeLess { + const Graph &g; + public: + EdgeLess(const Graph &_g) : g(_g) {} + bool operator()(Edge a,Edge b) const + { + return g.target(a) &v,int a,int b) + { + int m=(a+b)/2; + Edge me=v[m]; + _left[me] = aO(dlogd), where d is + ///the number of the outgoing edges of \c n. + void refresh(Node n) + { + std::vector v; + for(OutEdgeIt e(_g,n);e!=INVALID;++e) v.push_back(e); + if(v.size()) { + std::sort(v.begin(),v.end(),EdgeLess(_g)); + _head[n]=refreshRec(v,0,v.size()-1); + } + else _head[n]=INVALID; + } + ///Refresh the full data structure. + + ///Build up the full search database. In fact, it simply calls + ///\ref refresh(Node) "refresh(n)" for each node \c n. + /// + ///It runs in time O(mlogD), where m is + ///the number of the edges of \c n and D is the maximum + ///out-degree of the graph. + + void refresh() + { + for(NodeIt n(_g);n!=INVALID;++n) refresh(n); + } + + ///Find an edge between two nodes. + + ///Find an edge between two nodes in time O(logd), where + /// d is the number of outgoing edges of \c s. + ///\param s The source node + ///\param t The target node + ///\return An edge from \c s to \c t if there exists, + ///\ref INVALID otherwise. + /// + ///\warning If you change the graph, refresh() must be called before using + ///this operator. If you change the outgoing edges of + ///a single node \c n, then + ///\ref refresh(Node) "refresh(n)" is enough. + /// + Edge operator()(Node s, Node t) const + { + Edge e; + for(e=_head[s]; + e!=INVALID&&_g.target(e)!=t; + e = t < _g.target(e)?_left[e]:_right[e]) ; + return e; + } + + }; + + ///Fast look up of all edges between given endpoints. + + ///\ingroup gutils + ///This class is the same as \ref EdgeLookUp, with the addition + ///that it makes it possible to find all edges between given endpoints. + /// + ///\warning This class is static, so you should refresh() (or at least + ///refresh(Node)) this data structure + ///whenever the graph changes. This is a time consuming (superlinearly + ///proportional (O(mlogm)) to the number of edges). + /// + ///\param G The type of the underlying graph. + /// + ///\sa DynEdgeLookUp + ///\sa EdgeLookUp + template + class AllEdgeLookUp : public EdgeLookUp + { + using EdgeLookUp::_g; + using EdgeLookUp::_right; + using EdgeLookUp::_left; + using EdgeLookUp::_head; + + GRAPH_TYPEDEFS(typename G); + typedef G Graph; + + typename Graph::template EdgeMap _next; + + Edge refreshNext(Edge head,Edge next=INVALID) + { + if(head==INVALID) return next; + else { + next=refreshNext(_right[head],next); +// _next[head]=next; + _next[head]=( next!=INVALID && _g.target(next)==_g.target(head)) + ? next : INVALID; + return refreshNext(_left[head],head); + } + } + + void refreshNext() + { + for(NodeIt n(_g);n!=INVALID;++n) refreshNext(_head[n]); + } + + public: + ///Constructor + + ///Constructor. + /// + ///It builds up the search database, which remains valid until the graph + ///changes. + AllEdgeLookUp(const Graph &g) : EdgeLookUp(g), _next(g) {refreshNext();} + + ///Refresh the data structure at a node. + + ///Build up the search database of node \c n. + /// + ///It runs in time O(dlogd), where d is + ///the number of the outgoing edges of \c n. + + void refresh(Node n) + { + EdgeLookUp::refresh(n); + refreshNext(_head[n]); + } + + ///Refresh the full data structure. + + ///Build up the full search database. In fact, it simply calls + ///\ref refresh(Node) "refresh(n)" for each node \c n. + /// + ///It runs in time O(mlogD), where m is + ///the number of the edges of \c n and D is the maximum + ///out-degree of the graph. + + void refresh() + { + for(NodeIt n(_g);n!=INVALID;++n) refresh(_head[n]); + } + + ///Find an edge between two nodes. + + ///Find an edge between two nodes. + ///\param s The source node + ///\param t The target node + ///\param prev The previous edge between \c s and \c t. It it is INVALID or + ///not given, the operator finds the first appropriate edge. + ///\return An edge from \c s to \c t after \c prev or + ///\ref INVALID if there is no more. + /// + ///For example, you can count the number of edges from \c u to \c v in the + ///following way. + ///\code + ///AllEdgeLookUp ae(g); + ///... + ///int n=0; + ///for(Edge e=ae(u,v);e!=INVALID;e=ae(u,v,e)) n++; + ///\endcode + /// + ///Finding the first edge take O(logd) time, where + /// d is the number of outgoing edges of \c s. Then, the + ///consecutive edges are found in constant time. + /// + ///\warning If you change the graph, refresh() must be called before using + ///this operator. If you change the outgoing edges of + ///a single node \c n, then + ///\ref refresh(Node) "refresh(n)" is enough. + /// +#ifdef DOXYGEN + Edge operator()(Node s, Node t, Edge prev=INVALID) const {} +#else + using EdgeLookUp::operator() ; + Edge operator()(Node s, Node t, Edge prev) const + { + return prev==INVALID?(*this)(s,t):_next[prev]; + } +#endif + + }; + + /// @} + +} //END OF NAMESPACE LEMON + +#endif diff --git a/src/lemon/list_graph.h b/src/lemon/list_graph.h new file mode 100644 index 0000000..86d033a --- /dev/null +++ b/src/lemon/list_graph.h @@ -0,0 +1,2249 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_LIST_GRAPH_H +#define LEMON_LIST_GRAPH_H + +///\ingroup graphs +///\file +///\brief ListGraph, ListUGraph classes. + +#include +#include + +#include + +#include +#include + +namespace lemon { + + class ListGraphBase { + + protected: + struct NodeT { + int first_in, first_out; + int prev, next; + }; + + struct EdgeT { + int target, source; + int prev_in, prev_out; + int next_in, next_out; + }; + + std::vector nodes; + + int first_node; + + int first_free_node; + + std::vector edges; + + int first_free_edge; + + public: + + typedef ListGraphBase Graph; + + class Node { + friend class ListGraphBase; + protected: + + int id; + explicit Node(int pid) { id = pid;} + + public: + Node() {} + Node (Invalid) { id = -1; } + bool operator==(const Node& node) const {return id == node.id;} + bool operator!=(const Node& node) const {return id != node.id;} + bool operator<(const Node& node) const {return id < node.id;} + }; + + class Edge { + friend class ListGraphBase; + protected: + + int id; + explicit Edge(int pid) { id = pid;} + + public: + Edge() {} + Edge (Invalid) { id = -1; } + bool operator==(const Edge& edge) const {return id == edge.id;} + bool operator!=(const Edge& edge) const {return id != edge.id;} + bool operator<(const Edge& edge) const {return id < edge.id;} + }; + + + + ListGraphBase() + : nodes(), first_node(-1), + first_free_node(-1), edges(), first_free_edge(-1) {} + + + int maxNodeId() const { return nodes.size()-1; } + int maxEdgeId() const { return edges.size()-1; } + + Node source(Edge e) const { return Node(edges[e.id].source); } + Node target(Edge e) const { return Node(edges[e.id].target); } + + + void first(Node& node) const { + node.id = first_node; + } + + void next(Node& node) const { + node.id = nodes[node.id].next; + } + + + void first(Edge& e) const { + int n; + for(n = first_node; + n!=-1 && nodes[n].first_in == -1; + n = nodes[n].next); + e.id = (n == -1) ? -1 : nodes[n].first_in; + } + + void next(Edge& edge) const { + if (edges[edge.id].next_in != -1) { + edge.id = edges[edge.id].next_in; + } else { + int n; + for(n = nodes[edges[edge.id].target].next; + n!=-1 && nodes[n].first_in == -1; + n = nodes[n].next); + edge.id = (n == -1) ? -1 : nodes[n].first_in; + } + } + + void firstOut(Edge &e, const Node& v) const { + e.id = nodes[v.id].first_out; + } + void nextOut(Edge &e) const { + e.id=edges[e.id].next_out; + } + + void firstIn(Edge &e, const Node& v) const { + e.id = nodes[v.id].first_in; + } + void nextIn(Edge &e) const { + e.id=edges[e.id].next_in; + } + + + static int id(Node v) { return v.id; } + static int id(Edge e) { return e.id; } + + static Node nodeFromId(int id) { return Node(id);} + static Edge edgeFromId(int id) { return Edge(id);} + + Node addNode() { + int n; + + if(first_free_node==-1) { + n = nodes.size(); + nodes.push_back(NodeT()); + } else { + n = first_free_node; + first_free_node = nodes[n].next; + } + + nodes[n].next = first_node; + if(first_node != -1) nodes[first_node].prev = n; + first_node = n; + nodes[n].prev = -1; + + nodes[n].first_in = nodes[n].first_out = -1; + + return Node(n); + } + + Edge addEdge(Node u, Node v) { + int n; + + if (first_free_edge == -1) { + n = edges.size(); + edges.push_back(EdgeT()); + } else { + n = first_free_edge; + first_free_edge = edges[n].next_in; + } + + edges[n].source = u.id; + edges[n].target = v.id; + + edges[n].next_out = nodes[u.id].first_out; + if(nodes[u.id].first_out != -1) { + edges[nodes[u.id].first_out].prev_out = n; + } + + edges[n].next_in = nodes[v.id].first_in; + if(nodes[v.id].first_in != -1) { + edges[nodes[v.id].first_in].prev_in = n; + } + + edges[n].prev_in = edges[n].prev_out = -1; + + nodes[u.id].first_out = nodes[v.id].first_in = n; + + return Edge(n); + } + + void erase(const Node& node) { + int n = node.id; + + if(nodes[n].next != -1) { + nodes[nodes[n].next].prev = nodes[n].prev; + } + + if(nodes[n].prev != -1) { + nodes[nodes[n].prev].next = nodes[n].next; + } else { + first_node = nodes[n].next; + } + + nodes[n].next = first_free_node; + first_free_node = n; + + } + + void erase(const Edge& edge) { + int n = edge.id; + + if(edges[n].next_in!=-1) { + edges[edges[n].next_in].prev_in = edges[n].prev_in; + } + + if(edges[n].prev_in!=-1) { + edges[edges[n].prev_in].next_in = edges[n].next_in; + } else { + nodes[edges[n].target].first_in = edges[n].next_in; + } + + + if(edges[n].next_out!=-1) { + edges[edges[n].next_out].prev_out = edges[n].prev_out; + } + + if(edges[n].prev_out!=-1) { + edges[edges[n].prev_out].next_out = edges[n].next_out; + } else { + nodes[edges[n].source].first_out = edges[n].next_out; + } + + edges[n].next_in = first_free_edge; + first_free_edge = n; + + } + + void clear() { + edges.clear(); + nodes.clear(); + first_node = first_free_node = first_free_edge = -1; + } + + protected: + void changeTarget(Edge e, Node n) + { + if(edges[e.id].next_in != -1) + edges[edges[e.id].next_in].prev_in = edges[e.id].prev_in; + if(edges[e.id].prev_in != -1) + edges[edges[e.id].prev_in].next_in = edges[e.id].next_in; + else nodes[edges[e.id].target].first_in = edges[e.id].next_in; + if (nodes[n.id].first_in != -1) { + edges[nodes[n.id].first_in].prev_in = e.id; + } + edges[e.id].target = n.id; + edges[e.id].prev_in = -1; + edges[e.id].next_in = nodes[n.id].first_in; + nodes[n.id].first_in = e.id; + } + void changeSource(Edge e, Node n) + { + if(edges[e.id].next_out != -1) + edges[edges[e.id].next_out].prev_out = edges[e.id].prev_out; + if(edges[e.id].prev_out != -1) + edges[edges[e.id].prev_out].next_out = edges[e.id].next_out; + else nodes[edges[e.id].source].first_out = edges[e.id].next_out; + if (nodes[n.id].first_out != -1) { + edges[nodes[n.id].first_out].prev_out = e.id; + } + edges[e.id].source = n.id; + edges[e.id].prev_out = -1; + edges[e.id].next_out = nodes[n.id].first_out; + nodes[n.id].first_out = e.id; + } + + }; + + typedef GraphExtender ExtendedListGraphBase; + + /// \addtogroup graphs + /// @{ + + ///A list graph class. + + ///This is a simple and fast graph implementation. + /// + ///It conforms to the \ref concepts::Graph "Graph concept" and it + ///also provides several additional useful extra functionalities. + ///The most of the member functions and nested classes are + ///documented only in the concept class. + /// + ///An important extra feature of this graph implementation is that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + ///\sa concepts::Graph. + + class ListGraph : public ExtendedListGraphBase { + private: + ///ListGraph is \e not copy constructible. Use GraphCopy() instead. + + ///ListGraph is \e not copy constructible. Use GraphCopy() instead. + /// + ListGraph(const ListGraph &) :ExtendedListGraphBase() {}; + ///\brief Assignment of ListGraph to another one is \e not allowed. + ///Use GraphCopy() instead. + + ///Assignment of ListGraph to another one is \e not allowed. + ///Use GraphCopy() instead. + void operator=(const ListGraph &) {} + public: + + typedef ExtendedListGraphBase Parent; + + /// Constructor + + /// Constructor. + /// + ListGraph() {} + + ///Add a new node to the graph. + + /// \return the new node. + /// + Node addNode() { return Parent::addNode(); } + + ///Add a new edge to the graph. + + ///Add a new edge to the graph with source node \c s + ///and target node \c t. + ///\return the new edge. + Edge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + + /// Changes the target of \c e to \c n + + /// Changes the target of \c e to \c n + /// + ///\note The EdgeIts and OutEdgeIts referencing + ///the changed edge remain valid. However InEdgeIts are + ///invalidated. + ///\warning This functionality cannot be used together with the Snapshot + ///feature. + void changeTarget(Edge e, Node n) { + Parent::changeTarget(e,n); + } + /// Changes the source of \c e to \c n + + /// Changes the source of \c e to \c n + /// + ///\note The EdgeIts and InEdgeIts referencing + ///the changed edge remain valid. However OutEdgeIts are + ///invalidated. + ///\warning This functionality cannot be used together with the Snapshot + ///feature. + void changeSource(Edge e, Node n) { + Parent::changeSource(e,n); + } + + /// Invert the direction of an edge. + + ///\note The EdgeIts referencing the changed edge remain + ///valid. However OutEdgeIts and InEdgeIts are + ///invalidated. + ///\warning This functionality cannot be used together with the Snapshot + ///feature. + void reverseEdge(Edge e) { + Node t=target(e); + changeTarget(e,source(e)); + changeSource(e,t); + } + + /// Using this it is possible to avoid the superfluous memory + /// allocation: if you know that the graph you want to build will + /// be very large (e.g. it will contain millions of nodes and/or edges) + /// then it is worth reserving space for this amount before starting + /// to build the graph. + /// \sa reserveEdge + void reserveNode(int n) { nodes.reserve(n); }; + + /// \brief Using this it is possible to avoid the superfluous memory + /// allocation. + + /// Using this it is possible to avoid the superfluous memory + /// allocation: if you know that the graph you want to build will + /// be very large (e.g. it will contain millions of nodes and/or edges) + /// then it is worth reserving space for this amount before starting + /// to build the graph. + /// \sa reserveNode + void reserveEdge(int m) { edges.reserve(m); }; + + ///Contract two nodes. + + ///This function contracts two nodes. + /// + ///Node \p b will be removed but instead of deleting + ///incident edges, they will be joined to \p a. + ///The last parameter \p r controls whether to remove loops. \c true + ///means that loops will be removed. + /// + ///\note The EdgeIts + ///referencing a moved edge remain + ///valid. However InEdgeIts and OutEdgeIts + ///may be invalidated. + ///\warning This functionality cannot be used together with the Snapshot + ///feature. + void contract(Node a, Node b, bool r = true) + { + for(OutEdgeIt e(*this,b);e!=INVALID;) { + OutEdgeIt f=e; + ++f; + if(r && target(e)==a) erase(e); + else changeSource(e,a); + e=f; + } + for(InEdgeIt e(*this,b);e!=INVALID;) { + InEdgeIt f=e; + ++f; + if(r && source(e)==a) erase(e); + else changeTarget(e,a); + e=f; + } + erase(b); + } + + ///Split a node. + + ///This function splits a node. First a new node is added to the graph, + ///then the source of each outgoing edge of \c n is moved to this new node. + ///If \c connect is \c true (this is the default value), then a new edge + ///from \c n to the newly created node is also added. + ///\return The newly created node. + /// + ///\note The EdgeIts referencing a moved edge remain + ///valid. However InEdgeIts and OutEdgeIts may + ///be invalidated. + /// + ///\warning This functionality cannot be used together with the + ///Snapshot feature. \todo It could be implemented in a bit + ///faster way. + Node split(Node n, bool connect = true) { + Node b = addNode(); + for(OutEdgeIt e(*this,n);e!=INVALID;) { + OutEdgeIt f=e; + ++f; + changeSource(e,b); + e=f; + } + if (connect) addEdge(n,b); + return b; + } + + ///Split an edge. + + ///This function splits an edge. First a new node \c b is added to + ///the graph, then the original edge is re-targeted to \c + ///b. Finally an edge from \c b to the original target is added. + ///\return The newly created node. + ///\warning This functionality + ///cannot be used together with the Snapshot feature. + Node split(Edge e) { + Node b = addNode(); + addEdge(b,target(e)); + changeTarget(e,b); + return b; + } + + /// \brief Class to make a snapshot of the graph and restore + /// to it later. + /// + /// Class to make a snapshot of the graph and to restore it + /// later. + /// + /// The newly added nodes and edges can be removed using the + /// restore() function. + /// + /// \warning Edge and node deletions cannot be restored. This + /// events invalidate the snapshot. + class Snapshot { + protected: + + typedef Parent::NodeNotifier NodeNotifier; + + class NodeObserverProxy : public NodeNotifier::ObserverBase { + public: + + NodeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using NodeNotifier::ObserverBase::attach; + using NodeNotifier::ObserverBase::detach; + using NodeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const Node& node) { + snapshot.addNode(node); + } + virtual void add(const std::vector& nodes) { + for (int i = nodes.size() - 1; i >= 0; ++i) { + snapshot.addNode(nodes[i]); + } + } + virtual void erase(const Node& node) { + snapshot.eraseNode(node); + } + virtual void erase(const std::vector& nodes) { + for (int i = 0; i < int(nodes.size()); ++i) { + snapshot.eraseNode(nodes[i]); + } + } + virtual void build() { + Node node; + std::vector nodes; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + nodes.push_back(node); + } + for (int i = nodes.size() - 1; i >= 0; --i) { + snapshot.addNode(nodes[i]); + } + } + virtual void clear() { + Node node; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + snapshot.eraseNode(node); + } + } + + Snapshot& snapshot; + }; + + class EdgeObserverProxy : public EdgeNotifier::ObserverBase { + public: + + EdgeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using EdgeNotifier::ObserverBase::attach; + using EdgeNotifier::ObserverBase::detach; + using EdgeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const Edge& edge) { + snapshot.addEdge(edge); + } + virtual void add(const std::vector& edges) { + for (int i = edges.size() - 1; i >= 0; ++i) { + snapshot.addEdge(edges[i]); + } + } + virtual void erase(const Edge& edge) { + snapshot.eraseEdge(edge); + } + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + snapshot.eraseEdge(edges[i]); + } + } + virtual void build() { + Edge edge; + std::vector edges; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + edges.push_back(edge); + } + for (int i = edges.size() - 1; i >= 0; --i) { + snapshot.addEdge(edges[i]); + } + } + virtual void clear() { + Edge edge; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + snapshot.eraseEdge(edge); + } + } + + Snapshot& snapshot; + }; + + ListGraph *graph; + + NodeObserverProxy node_observer_proxy; + EdgeObserverProxy edge_observer_proxy; + + std::list added_nodes; + std::list added_edges; + + + void addNode(const Node& node) { + added_nodes.push_front(node); + } + void eraseNode(const Node& node) { + std::list::iterator it = + std::find(added_nodes.begin(), added_nodes.end(), node); + if (it == added_nodes.end()) { + clear(); + edge_observer_proxy.detach(); + throw NodeNotifier::ImmediateDetach(); + } else { + added_nodes.erase(it); + } + } + + void addEdge(const Edge& edge) { + added_edges.push_front(edge); + } + void eraseEdge(const Edge& edge) { + std::list::iterator it = + std::find(added_edges.begin(), added_edges.end(), edge); + if (it == added_edges.end()) { + clear(); + node_observer_proxy.detach(); + throw EdgeNotifier::ImmediateDetach(); + } else { + added_edges.erase(it); + } + } + + void attach(ListGraph &_graph) { + graph = &_graph; + node_observer_proxy.attach(graph->notifier(Node())); + edge_observer_proxy.attach(graph->notifier(Edge())); + } + + void detach() { + node_observer_proxy.detach(); + edge_observer_proxy.detach(); + } + + bool attached() const { + return node_observer_proxy.attached(); + } + + void clear() { + added_nodes.clear(); + added_edges.clear(); + } + + public: + + /// \brief Default constructor. + /// + /// Default constructor. + /// To actually make a snapshot you must call save(). + Snapshot() + : graph(0), node_observer_proxy(*this), + edge_observer_proxy(*this) {} + + /// \brief Constructor that immediately makes a snapshot. + /// + /// This constructor immediately makes a snapshot of the graph. + /// \param _graph The graph we make a snapshot of. + Snapshot(ListGraph &_graph) + : node_observer_proxy(*this), + edge_observer_proxy(*this) { + attach(_graph); + } + + /// \brief Make a snapshot. + /// + /// Make a snapshot of the graph. + /// + /// This function can be called more than once. In case of a repeated + /// call, the previous snapshot gets lost. + /// \param _graph The graph we make the snapshot of. + void save(ListGraph &_graph) { + if (attached()) { + detach(); + clear(); + } + attach(_graph); + } + + /// \brief Undo the changes until the last snapshot. + // + /// Undo the changes until the last snapshot created by save(). + void restore() { + detach(); + for(std::list::iterator it = added_edges.begin(); + it != added_edges.end(); ++it) { + graph->erase(*it); + } + for(std::list::iterator it = added_nodes.begin(); + it != added_nodes.end(); ++it) { + graph->erase(*it); + } + clear(); + } + + /// \brief Gives back true when the snapshot is valid. + /// + /// Gives back true when the snapshot is valid. + bool valid() const { + return attached(); + } + }; + + }; + + ///@} + + class ListUGraphBase { + + protected: + + struct NodeT { + int first_out; + int prev, next; + }; + + struct EdgeT { + int target; + int prev_out, next_out; + }; + + std::vector nodes; + + int first_node; + + int first_free_node; + + std::vector edges; + + int first_free_edge; + + public: + + typedef ListUGraphBase Graph; + + class Node; + class Edge; + class UEdge; + + class Node { + friend class ListUGraphBase; + protected: + + int id; + explicit Node(int pid) { id = pid;} + + public: + Node() {} + Node (Invalid) { id = -1; } + bool operator==(const Node& node) const {return id == node.id;} + bool operator!=(const Node& node) const {return id != node.id;} + bool operator<(const Node& node) const {return id < node.id;} + }; + + class UEdge { + friend class ListUGraphBase; + protected: + + int id; + explicit UEdge(int pid) { id = pid;} + + public: + UEdge() {} + UEdge (Invalid) { id = -1; } + bool operator==(const UEdge& edge) const {return id == edge.id;} + bool operator!=(const UEdge& edge) const {return id != edge.id;} + bool operator<(const UEdge& edge) const {return id < edge.id;} + }; + + class Edge { + friend class ListUGraphBase; + protected: + + int id; + explicit Edge(int pid) { id = pid;} + + public: + operator UEdge() const { return uEdgeFromId(id / 2); } + + Edge() {} + Edge (Invalid) { id = -1; } + bool operator==(const Edge& edge) const {return id == edge.id;} + bool operator!=(const Edge& edge) const {return id != edge.id;} + bool operator<(const Edge& edge) const {return id < edge.id;} + }; + + + + ListUGraphBase() + : nodes(), first_node(-1), + first_free_node(-1), edges(), first_free_edge(-1) {} + + + int maxNodeId() const { return nodes.size()-1; } + int maxUEdgeId() const { return edges.size() / 2 - 1; } + int maxEdgeId() const { return edges.size()-1; } + + Node source(Edge e) const { return Node(edges[e.id ^ 1].target); } + Node target(Edge e) const { return Node(edges[e.id].target); } + + Node source(UEdge e) const { return Node(edges[2 * e.id].target); } + Node target(UEdge e) const { return Node(edges[2 * e.id + 1].target); } + + static bool direction(Edge e) { + return (e.id & 1) == 1; + } + + static Edge direct(UEdge e, bool d) { + return Edge(e.id * 2 + (d ? 1 : 0)); + } + + void first(Node& node) const { + node.id = first_node; + } + + void next(Node& node) const { + node.id = nodes[node.id].next; + } + + void first(Edge& e) const { + int n = first_node; + while (n != -1 && nodes[n].first_out == -1) { + n = nodes[n].next; + } + e.id = (n == -1) ? -1 : nodes[n].first_out; + } + + void next(Edge& e) const { + if (edges[e.id].next_out != -1) { + e.id = edges[e.id].next_out; + } else { + int n = nodes[edges[e.id ^ 1].target].next; + while(n != -1 && nodes[n].first_out == -1) { + n = nodes[n].next; + } + e.id = (n == -1) ? -1 : nodes[n].first_out; + } + } + + void first(UEdge& e) const { + int n = first_node; + while (n != -1) { + e.id = nodes[n].first_out; + while ((e.id & 1) != 1) { + e.id = edges[e.id].next_out; + } + if (e.id != -1) { + e.id /= 2; + return; + } + n = nodes[n].next; + } + e.id = -1; + } + + void next(UEdge& e) const { + int n = edges[e.id * 2].target; + e.id = edges[(e.id * 2) | 1].next_out; + while ((e.id & 1) != 1) { + e.id = edges[e.id].next_out; + } + if (e.id != -1) { + e.id /= 2; + return; + } + n = nodes[n].next; + while (n != -1) { + e.id = nodes[n].first_out; + while ((e.id & 1) != 1) { + e.id = edges[e.id].next_out; + } + if (e.id != -1) { + e.id /= 2; + return; + } + n = nodes[n].next; + } + e.id = -1; + } + + void firstOut(Edge &e, const Node& v) const { + e.id = nodes[v.id].first_out; + } + void nextOut(Edge &e) const { + e.id = edges[e.id].next_out; + } + + void firstIn(Edge &e, const Node& v) const { + e.id = ((nodes[v.id].first_out) ^ 1); + if (e.id == -2) e.id = -1; + } + void nextIn(Edge &e) const { + e.id = ((edges[e.id ^ 1].next_out) ^ 1); + if (e.id == -2) e.id = -1; + } + + void firstInc(UEdge &e, bool& d, const Node& v) const { + int de = nodes[v.id].first_out; + if (de != -1 ) { + e.id = de / 2; + d = ((de & 1) == 1); + } else { + e.id = -1; + d = true; + } + } + void nextInc(UEdge &e, bool& d) const { + int de = (edges[(e.id * 2) | (d ? 1 : 0)].next_out); + if (de != -1 ) { + e.id = de / 2; + d = ((de & 1) == 1); + } else { + e.id = -1; + d = true; + } + } + + static int id(Node v) { return v.id; } + static int id(Edge e) { return e.id; } + static int id(UEdge e) { return e.id; } + + static Node nodeFromId(int id) { return Node(id);} + static Edge edgeFromId(int id) { return Edge(id);} + static UEdge uEdgeFromId(int id) { return UEdge(id);} + + Node addNode() { + int n; + + if(first_free_node==-1) { + n = nodes.size(); + nodes.push_back(NodeT()); + } else { + n = first_free_node; + first_free_node = nodes[n].next; + } + + nodes[n].next = first_node; + if (first_node != -1) nodes[first_node].prev = n; + first_node = n; + nodes[n].prev = -1; + + nodes[n].first_out = -1; + + return Node(n); + } + + UEdge addEdge(Node u, Node v) { + int n; + + if (first_free_edge == -1) { + n = edges.size(); + edges.push_back(EdgeT()); + edges.push_back(EdgeT()); + } else { + n = first_free_edge; + first_free_edge = edges[n].next_out; + } + + edges[n].target = u.id; + edges[n | 1].target = v.id; + + edges[n].next_out = nodes[v.id].first_out; + if (nodes[v.id].first_out != -1) { + edges[nodes[v.id].first_out].prev_out = n; + } + edges[n].prev_out = -1; + nodes[v.id].first_out = n; + + edges[n | 1].next_out = nodes[u.id].first_out; + if (nodes[u.id].first_out != -1) { + edges[nodes[u.id].first_out].prev_out = (n | 1); + } + edges[n | 1].prev_out = -1; + nodes[u.id].first_out = (n | 1); + + return UEdge(n / 2); + } + + void erase(const Node& node) { + int n = node.id; + + if(nodes[n].next != -1) { + nodes[nodes[n].next].prev = nodes[n].prev; + } + + if(nodes[n].prev != -1) { + nodes[nodes[n].prev].next = nodes[n].next; + } else { + first_node = nodes[n].next; + } + + nodes[n].next = first_free_node; + first_free_node = n; + + } + + void erase(const UEdge& edge) { + int n = edge.id * 2; + + if (edges[n].next_out != -1) { + edges[edges[n].next_out].prev_out = edges[n].prev_out; + } + + if (edges[n].prev_out != -1) { + edges[edges[n].prev_out].next_out = edges[n].next_out; + } else { + nodes[edges[n | 1].target].first_out = edges[n].next_out; + } + + if (edges[n | 1].next_out != -1) { + edges[edges[n | 1].next_out].prev_out = edges[n | 1].prev_out; + } + + if (edges[n | 1].prev_out != -1) { + edges[edges[n | 1].prev_out].next_out = edges[n | 1].next_out; + } else { + nodes[edges[n].target].first_out = edges[n | 1].next_out; + } + + edges[n].next_out = first_free_edge; + first_free_edge = n; + + } + + void clear() { + edges.clear(); + nodes.clear(); + first_node = first_free_node = first_free_edge = -1; + } + + protected: + + void changeTarget(UEdge e, Node n) { + if(edges[2 * e.id].next_out != -1) { + edges[edges[2 * e.id].next_out].prev_out = edges[2 * e.id].prev_out; + } + if(edges[2 * e.id].prev_out != -1) { + edges[edges[2 * e.id].prev_out].next_out = + edges[2 * e.id].next_out; + } else { + nodes[edges[(2 * e.id) | 1].target].first_out = + edges[2 * e.id].next_out; + } + + if (nodes[n.id].first_out != -1) { + edges[nodes[n.id].first_out].prev_out = 2 * e.id; + } + edges[(2 * e.id) | 1].target = n.id; + edges[2 * e.id].prev_out = -1; + edges[2 * e.id].next_out = nodes[n.id].first_out; + nodes[n.id].first_out = 2 * e.id; + } + + void changeSource(UEdge e, Node n) { + if(edges[(2 * e.id) | 1].next_out != -1) { + edges[edges[(2 * e.id) | 1].next_out].prev_out = + edges[(2 * e.id) | 1].prev_out; + } + if(edges[(2 * e.id) | 1].prev_out != -1) { + edges[edges[(2 * e.id) | 1].prev_out].next_out = + edges[(2 * e.id) | 1].next_out; + } else { + nodes[edges[2 * e.id].target].first_out = + edges[(2 * e.id) | 1].next_out; + } + + if (nodes[n.id].first_out != -1) { + edges[nodes[n.id].first_out].prev_out = ((2 * e.id) | 1); + } + edges[2 * e.id].target = n.id; + edges[(2 * e.id) | 1].prev_out = -1; + edges[(2 * e.id) | 1].next_out = nodes[n.id].first_out; + nodes[n.id].first_out = ((2 * e.id) | 1); + } + + }; + +// typedef UGraphExtender > +// ExtendedListUGraphBase; + + typedef UGraphExtender ExtendedListUGraphBase; + + + + /// \addtogroup graphs + /// @{ + + ///An undirected list graph class. + + ///This is a simple and fast undirected graph implementation. + /// + ///An important extra feature of this graph implementation is that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + ///It conforms to the + ///\ref concepts::UGraph "UGraph concept". + /// + ///\sa concepts::UGraph. + /// + class ListUGraph : public ExtendedListUGraphBase { + private: + ///ListUGraph is \e not copy constructible. Use UGraphCopy() instead. + + ///ListUGraph is \e not copy constructible. Use UGraphCopy() instead. + /// + ListUGraph(const ListUGraph &) :ExtendedListUGraphBase() {}; + ///\brief Assignment of ListUGraph to another one is \e not allowed. + ///Use UGraphCopy() instead. + + ///Assignment of ListUGraph to another one is \e not allowed. + ///Use UGraphCopy() instead. + void operator=(const ListUGraph &) {} + public: + /// Constructor + + /// Constructor. + /// + ListUGraph() {} + + typedef ExtendedListUGraphBase Parent; + + typedef Parent::OutEdgeIt IncEdgeIt; + + /// \brief Add a new node to the graph. + /// + /// \return the new node. + /// + Node addNode() { return Parent::addNode(); } + + /// \brief Add a new edge to the graph. + /// + /// Add a new edge to the graph with source node \c s + /// and target node \c t. + /// \return the new undirected edge. + UEdge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + /// \brief Changes the source of \c e to \c n + /// + /// Changes the source of \c e to \c n + /// + ///\note The EdgeIts and InEdgeIts + ///referencing the changed edge remain + ///valid. However OutEdgeIts are invalidated. + void changeSource(UEdge e, Node n) { + Parent::changeSource(e,n); + } + /// \brief Changes the target of \c e to \c n + /// + /// Changes the target of \c e to \c n + /// + /// \note The EdgeIts referencing the changed edge remain + /// valid. However the other iterators may be invalidated. + void changeTarget(UEdge e, Node n) { + Parent::changeTarget(e,n); + } + /// \brief Changes the source of \c e to \c n + /// + /// Changes the source of \c e to \c n. It changes the proper + /// node of the represented undirected edge. + /// + ///\note The EdgeIts and InEdgeIts + ///referencing the changed edge remain + ///valid. However OutEdgeIts are invalidated. + void changeSource(Edge e, Node n) { + if (Parent::direction(e)) { + Parent::changeSource(e,n); + } else { + Parent::changeTarget(e,n); + } + } + /// \brief Changes the target of \c e to \c n + /// + /// Changes the target of \c e to \c n. It changes the proper + /// node of the represented undirected edge. + /// + ///\note The EdgeIts and OutEdgeIts + ///referencing the changed edge remain + ///valid. However InEdgeIts are invalidated. + void changeTarget(Edge e, Node n) { + if (Parent::direction(e)) { + Parent::changeTarget(e,n); + } else { + Parent::changeSource(e,n); + } + } + /// \brief Contract two nodes. + /// + /// This function contracts two nodes. + /// + /// Node \p b will be removed but instead of deleting + /// its neighboring edges, they will be joined to \p a. + /// The last parameter \p r controls whether to remove loops. \c true + /// means that loops will be removed. + /// + /// \note The EdgeIts referencing a moved edge remain + /// valid. + void contract(Node a, Node b, bool r = true) { + for(IncEdgeIt e(*this, b); e!=INVALID;) { + IncEdgeIt f = e; ++f; + if (r && runningNode(e) == a) { + erase(e); + } else if (source(e) == b) { + changeSource(e, a); + } else { + changeTarget(e, a); + } + e = f; + } + erase(b); + } + + + /// \brief Class to make a snapshot of the graph and restore + /// to it later. + /// + /// Class to make a snapshot of the graph and to restore it + /// later. + /// + /// The newly added nodes and undirected edges can be removed + /// using the restore() function. + /// + /// \warning Edge and node deletions cannot be restored. This + /// events invalidate the snapshot. + class Snapshot { + protected: + + typedef Parent::NodeNotifier NodeNotifier; + + class NodeObserverProxy : public NodeNotifier::ObserverBase { + public: + + NodeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using NodeNotifier::ObserverBase::attach; + using NodeNotifier::ObserverBase::detach; + using NodeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const Node& node) { + snapshot.addNode(node); + } + virtual void add(const std::vector& nodes) { + for (int i = nodes.size() - 1; i >= 0; ++i) { + snapshot.addNode(nodes[i]); + } + } + virtual void erase(const Node& node) { + snapshot.eraseNode(node); + } + virtual void erase(const std::vector& nodes) { + for (int i = 0; i < int(nodes.size()); ++i) { + snapshot.eraseNode(nodes[i]); + } + } + virtual void build() { + Node node; + std::vector nodes; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + nodes.push_back(node); + } + for (int i = nodes.size() - 1; i >= 0; --i) { + snapshot.addNode(nodes[i]); + } + } + virtual void clear() { + Node node; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + snapshot.eraseNode(node); + } + } + + Snapshot& snapshot; + }; + + class UEdgeObserverProxy : public UEdgeNotifier::ObserverBase { + public: + + UEdgeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using UEdgeNotifier::ObserverBase::attach; + using UEdgeNotifier::ObserverBase::detach; + using UEdgeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const UEdge& edge) { + snapshot.addUEdge(edge); + } + virtual void add(const std::vector& edges) { + for (int i = edges.size() - 1; i >= 0; ++i) { + snapshot.addUEdge(edges[i]); + } + } + virtual void erase(const UEdge& edge) { + snapshot.eraseUEdge(edge); + } + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + snapshot.eraseUEdge(edges[i]); + } + } + virtual void build() { + UEdge edge; + std::vector edges; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + edges.push_back(edge); + } + for (int i = edges.size() - 1; i >= 0; --i) { + snapshot.addUEdge(edges[i]); + } + } + virtual void clear() { + UEdge edge; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + snapshot.eraseUEdge(edge); + } + } + + Snapshot& snapshot; + }; + + ListUGraph *graph; + + NodeObserverProxy node_observer_proxy; + UEdgeObserverProxy edge_observer_proxy; + + std::list added_nodes; + std::list added_edges; + + + void addNode(const Node& node) { + added_nodes.push_front(node); + } + void eraseNode(const Node& node) { + std::list::iterator it = + std::find(added_nodes.begin(), added_nodes.end(), node); + if (it == added_nodes.end()) { + clear(); + edge_observer_proxy.detach(); + throw NodeNotifier::ImmediateDetach(); + } else { + added_nodes.erase(it); + } + } + + void addUEdge(const UEdge& edge) { + added_edges.push_front(edge); + } + void eraseUEdge(const UEdge& edge) { + std::list::iterator it = + std::find(added_edges.begin(), added_edges.end(), edge); + if (it == added_edges.end()) { + clear(); + node_observer_proxy.detach(); + throw UEdgeNotifier::ImmediateDetach(); + } else { + added_edges.erase(it); + } + } + + void attach(ListUGraph &_graph) { + graph = &_graph; + node_observer_proxy.attach(graph->notifier(Node())); + edge_observer_proxy.attach(graph->notifier(UEdge())); + } + + void detach() { + node_observer_proxy.detach(); + edge_observer_proxy.detach(); + } + + bool attached() const { + return node_observer_proxy.attached(); + } + + void clear() { + added_nodes.clear(); + added_edges.clear(); + } + + public: + + /// \brief Default constructor. + /// + /// Default constructor. + /// To actually make a snapshot you must call save(). + Snapshot() + : graph(0), node_observer_proxy(*this), + edge_observer_proxy(*this) {} + + /// \brief Constructor that immediately makes a snapshot. + /// + /// This constructor immediately makes a snapshot of the graph. + /// \param _graph The graph we make a snapshot of. + Snapshot(ListUGraph &_graph) + : node_observer_proxy(*this), + edge_observer_proxy(*this) { + attach(_graph); + } + + /// \brief Make a snapshot. + /// + /// Make a snapshot of the graph. + /// + /// This function can be called more than once. In case of a repeated + /// call, the previous snapshot gets lost. + /// \param _graph The graph we make the snapshot of. + void save(ListUGraph &_graph) { + if (attached()) { + detach(); + clear(); + } + attach(_graph); + } + + /// \brief Undo the changes until the last snapshot. + // + /// Undo the changes until the last snapshot created by save(). + void restore() { + detach(); + for(std::list::iterator it = added_edges.begin(); + it != added_edges.end(); ++it) { + graph->erase(*it); + } + for(std::list::iterator it = added_nodes.begin(); + it != added_nodes.end(); ++it) { + graph->erase(*it); + } + clear(); + } + + /// \brief Gives back true when the snapshot is valid. + /// + /// Gives back true when the snapshot is valid. + bool valid() const { + return attached(); + } + }; + }; + + + class ListBpUGraphBase { + public: + + class NodeSetError : public LogicError { + public: + virtual const char* what() const throw() { + return "lemon::ListBpUGraph::NodeSetError"; + } + }; + + protected: + + struct NodeT { + int first_edge, prev, next; + }; + + struct UEdgeT { + int aNode, prev_out, next_out; + int bNode, prev_in, next_in; + }; + + std::vector aNodes; + std::vector bNodes; + + std::vector edges; + + int first_anode; + int first_free_anode; + + int first_bnode; + int first_free_bnode; + + int first_free_edge; + + public: + + class Node { + friend class ListBpUGraphBase; + protected: + int id; + + explicit Node(int _id) : id(_id) {} + public: + Node() {} + Node(Invalid) { id = -1; } + bool operator==(const Node i) const {return id==i.id;} + bool operator!=(const Node i) const {return id!=i.id;} + bool operator<(const Node i) const {return id> 1].next; + } + + void firstBNode(Node& node) const { + node.id = first_bnode != -1 ? (first_bnode << 1) + 1 : -1; + } + void nextBNode(Node& node) const { + node.id = bNodes[node.id >> 1].next; + } + + void first(Node& node) const { + if (first_anode != -1) { + node.id = (first_anode << 1); + } else if (first_bnode != -1) { + node.id = (first_bnode << 1) + 1; + } else { + node.id = -1; + } + } + void next(Node& node) const { + if (aNode(node)) { + node.id = aNodes[node.id >> 1].next; + if (node.id == -1) { + if (first_bnode != -1) { + node.id = (first_bnode << 1) + 1; + } + } + } else { + node.id = bNodes[node.id >> 1].next; + } + } + + void first(UEdge& edge) const { + int aid = first_anode; + while (aid != -1 && aNodes[aid].first_edge == -1) { + aid = aNodes[aid].next != -1 ? + aNodes[aid].next >> 1 : -1; + } + if (aid != -1) { + edge.id = aNodes[aid].first_edge; + } else { + edge.id = -1; + } + } + void next(UEdge& edge) const { + int aid = edges[edge.id].aNode >> 1; + edge.id = edges[edge.id].next_out; + if (edge.id == -1) { + aid = aNodes[aid].next != -1 ? + aNodes[aid].next >> 1 : -1; + while (aid != -1 && aNodes[aid].first_edge == -1) { + aid = aNodes[aid].next != -1 ? + aNodes[aid].next >> 1 : -1; + } + if (aid != -1) { + edge.id = aNodes[aid].first_edge; + } else { + edge.id = -1; + } + } + } + + void firstFromANode(UEdge& edge, const Node& node) const { + LEMON_ASSERT((node.id & 1) == 0, NodeSetError()); + edge.id = aNodes[node.id >> 1].first_edge; + } + void nextFromANode(UEdge& edge) const { + edge.id = edges[edge.id].next_out; + } + + void firstFromBNode(UEdge& edge, const Node& node) const { + LEMON_ASSERT((node.id & 1) == 1, NodeSetError()); + edge.id = bNodes[node.id >> 1].first_edge; + } + void nextFromBNode(UEdge& edge) const { + edge.id = edges[edge.id].next_in; + } + + static int id(const Node& node) { + return node.id; + } + static Node nodeFromId(int id) { + return Node(id); + } + int maxNodeId() const { + return aNodes.size() > bNodes.size() ? + aNodes.size() * 2 - 2 : bNodes.size() * 2 - 1; + } + + static int id(const UEdge& edge) { + return edge.id; + } + static UEdge uEdgeFromId(int id) { + return UEdge(id); + } + int maxUEdgeId() const { + return edges.size(); + } + + static int aNodeId(const Node& node) { + return node.id >> 1; + } + static Node nodeFromANodeId(int id) { + return Node(id << 1); + } + int maxANodeId() const { + return aNodes.size(); + } + + static int bNodeId(const Node& node) { + return node.id >> 1; + } + static Node nodeFromBNodeId(int id) { + return Node((id << 1) + 1); + } + int maxBNodeId() const { + return bNodes.size(); + } + + Node aNode(const UEdge& edge) const { + return Node(edges[edge.id].aNode); + } + Node bNode(const UEdge& edge) const { + return Node(edges[edge.id].bNode); + } + + static bool aNode(const Node& node) { + return (node.id & 1) == 0; + } + + static bool bNode(const Node& node) { + return (node.id & 1) == 1; + } + + Node addANode() { + int aid; + if (first_free_anode == -1) { + aid = aNodes.size(); + aNodes.push_back(NodeT()); + } else { + aid = first_free_anode; + first_free_anode = aNodes[first_free_anode].next; + } + if (first_anode != -1) { + aNodes[aid].next = first_anode << 1; + aNodes[first_anode].prev = aid << 1; + } else { + aNodes[aid].next = -1; + } + aNodes[aid].prev = -1; + first_anode = aid; + aNodes[aid].first_edge = -1; + return Node(aid << 1); + } + + Node addBNode() { + int bid; + if (first_free_bnode == -1) { + bid = bNodes.size(); + bNodes.push_back(NodeT()); + } else { + bid = first_free_bnode; + first_free_bnode = bNodes[first_free_bnode].next; + } + if (first_bnode != -1) { + bNodes[bid].next = (first_bnode << 1) + 1; + bNodes[first_bnode].prev = (bid << 1) + 1; + } else { + bNodes[bid].next = -1; + } + bNodes[bid].prev = -1; + first_bnode = bid; + bNodes[bid].first_edge = -1; + return Node((bid << 1) + 1); + } + + UEdge addEdge(const Node& source, const Node& target) { + LEMON_ASSERT(((source.id ^ target.id) & 1) == 1, NodeSetError()); + int edgeId; + if (first_free_edge != -1) { + edgeId = first_free_edge; + first_free_edge = edges[edgeId].next_out; + } else { + edgeId = edges.size(); + edges.push_back(UEdgeT()); + } + if ((source.id & 1) == 0) { + edges[edgeId].aNode = source.id; + edges[edgeId].bNode = target.id; + } else { + edges[edgeId].aNode = target.id; + edges[edgeId].bNode = source.id; + } + edges[edgeId].next_out = aNodes[edges[edgeId].aNode >> 1].first_edge; + edges[edgeId].prev_out = -1; + if (aNodes[edges[edgeId].aNode >> 1].first_edge != -1) { + edges[aNodes[edges[edgeId].aNode >> 1].first_edge].prev_out = edgeId; + } + aNodes[edges[edgeId].aNode >> 1].first_edge = edgeId; + edges[edgeId].next_in = bNodes[edges[edgeId].bNode >> 1].first_edge; + edges[edgeId].prev_in = -1; + if (bNodes[edges[edgeId].bNode >> 1].first_edge != -1) { + edges[bNodes[edges[edgeId].bNode >> 1].first_edge].prev_in = edgeId; + } + bNodes[edges[edgeId].bNode >> 1].first_edge = edgeId; + return UEdge(edgeId); + } + + void erase(const Node& node) { + if (aNode(node)) { + int aid = node.id >> 1; + if (aNodes[aid].prev != -1) { + aNodes[aNodes[aid].prev >> 1].next = aNodes[aid].next; + } else { + first_anode = + aNodes[aid].next != -1 ? aNodes[aid].next >> 1 : -1; + } + if (aNodes[aid].next != -1) { + aNodes[aNodes[aid].next >> 1].prev = aNodes[aid].prev; + } + aNodes[aid].next = first_free_anode; + first_free_anode = aid; + } else { + int bid = node.id >> 1; + if (bNodes[bid].prev != -1) { + bNodes[bNodes[bid].prev >> 1].next = bNodes[bid].next; + } else { + first_bnode = + bNodes[bid].next != -1 ? bNodes[bid].next >> 1 : -1; + } + if (bNodes[bid].next != -1) { + bNodes[bNodes[bid].next >> 1].prev = bNodes[bid].prev; + } + bNodes[bid].next = first_free_bnode; + first_free_bnode = bid; + } + } + + void erase(const UEdge& edge) { + + if (edges[edge.id].prev_out != -1) { + edges[edges[edge.id].prev_out].next_out = edges[edge.id].next_out; + } else { + aNodes[edges[edge.id].aNode >> 1].first_edge = edges[edge.id].next_out; + } + if (edges[edge.id].next_out != -1) { + edges[edges[edge.id].next_out].prev_out = edges[edge.id].prev_out; + } + + if (edges[edge.id].prev_in != -1) { + edges[edges[edge.id].prev_in].next_in = edges[edge.id].next_in; + } else { + bNodes[edges[edge.id].bNode >> 1].first_edge = edges[edge.id].next_in; + } + if (edges[edge.id].next_in != -1) { + edges[edges[edge.id].next_in].prev_in = edges[edge.id].prev_in; + } + + edges[edge.id].next_out = first_free_edge; + first_free_edge = edge.id; + } + + void clear() { + aNodes.clear(); + bNodes.clear(); + edges.clear(); + first_anode = -1; + first_free_anode = -1; + first_bnode = -1; + first_free_bnode = -1; + first_free_edge = -1; + } + + void changeANode(const UEdge& edge, const Node& node) { + LEMON_ASSERT((node.id & 1) == 0, NodeSetError()); + if (edges[edge.id].prev_out != -1) { + edges[edges[edge.id].prev_out].next_out = edges[edge.id].next_out; + } else { + aNodes[edges[edge.id].aNode >> 1].first_edge = edges[edge.id].next_out; + } + if (edges[edge.id].next_out != -1) { + edges[edges[edge.id].next_out].prev_out = edges[edge.id].prev_out; + } + if (aNodes[node.id >> 1].first_edge != -1) { + edges[aNodes[node.id >> 1].first_edge].prev_out = edge.id; + } + edges[edge.id].prev_out = -1; + edges[edge.id].next_out = aNodes[node.id >> 1].first_edge; + aNodes[node.id >> 1].first_edge = edge.id; + edges[edge.id].aNode = node.id; + } + + void changeBNode(const UEdge& edge, const Node& node) { + LEMON_ASSERT((node.id & 1) == 1, NodeSetError()); + if (edges[edge.id].prev_in != -1) { + edges[edges[edge.id].prev_in].next_in = edges[edge.id].next_in; + } else { + bNodes[edges[edge.id].bNode >> 1].first_edge = edges[edge.id].next_in; + } + if (edges[edge.id].next_in != -1) { + edges[edges[edge.id].next_in].prev_in = edges[edge.id].prev_in; + } + if (bNodes[node.id >> 1].first_edge != -1) { + edges[bNodes[node.id >> 1].first_edge].prev_in = edge.id; + } + edges[edge.id].prev_in = -1; + edges[edge.id].next_in = bNodes[node.id >> 1].first_edge; + bNodes[node.id >> 1].first_edge = edge.id; + edges[edge.id].bNode = node.id; + } + + }; + + + typedef BpUGraphExtender > + ExtendedListBpUGraphBase; + + /// \ingroup graphs + /// + /// \brief A smart bipartite undirected graph class. + /// + /// This is a bipartite undirected graph implementation. + /// It is conforms to the \ref concepts::BpUGraph "BpUGraph concept". + /// + ///An important extra feature of this graph implementation is that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + /// \sa concepts::BpUGraph. + /// + class ListBpUGraph : public ExtendedListBpUGraphBase { + /// \brief ListBpUGraph is \e not copy constructible. + /// + ///ListBpUGraph is \e not copy constructible. + ListBpUGraph(const ListBpUGraph &) :ExtendedListBpUGraphBase() {}; + /// \brief Assignment of ListBpUGraph to another one is \e not + /// allowed. + /// + /// Assignment of ListBpUGraph to another one is \e not allowed. + void operator=(const ListBpUGraph &) {} + public: + /// \brief Constructor + /// + /// Constructor. + /// + ListBpUGraph() {} + + typedef ExtendedListBpUGraphBase Parent; + /// \brief Add a new ANode to the graph. + /// + /// \return the new node. + /// + Node addANode() { return Parent::addANode(); } + + /// \brief Add a new BNode to the graph. + /// + /// \return the new node. + /// + Node addBNode() { return Parent::addBNode(); } + + /// \brief Add a new edge to the graph. + /// + /// Add a new edge to the graph with an ANode and a BNode. + /// \return the new undirected edge. + UEdge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + + /// \brief Changes the ANode of \c e to \c n + /// + /// Changes the ANode of \c e to \c n + /// + ///\note The EdgeIts and InEdgeIts referencing + ///the changed edge remain valid. However OutEdgeIts are + ///invalidated. + void changeANode(UEdge e, Node n) { + Parent::changeANode(e,n); + } + + /// \brief Changes the BNode of \c e to \c n + /// + /// Changes the BNode of \c e to \c n + /// + /// \note The EdgeIts and OutEdgeIts + /// referencing the changed edge remain + /// valid. However InEdgeIts are invalidated. + void changeBNode(UEdge e, Node n) { + Parent::changeBNode(e,n); + } + + /// \brief Changes the source(ANode) of \c e to \c n + /// + /// Changes the source(ANode) of \c e to \c n + /// + ///\note The EdgeIts and InEdgeIts referencing + ///the changed edge remain valid. However OutEdgeIts are + ///invalidated. + void changeSource(UEdge e, Node n) { + Parent::changeANode(e,n); + } + + /// \brief Changes the target(BNode) of \c e to \c n + /// + /// Changes the target(BNode) of \c e to \c n + /// + /// \note The EdgeIts and OutEdgeIts + /// referencing the changed edge remain + /// valid. However InEdgeIts are invalidated. + void changeTarget(UEdge e, Node n) { + Parent::changeBNode(e,n); + } + + /// \brief Changes the source of \c e to \c n + /// + /// Changes the source of \c e to \c n. It changes the proper + /// node of the represented undirected edge. + /// + ///\note The EdgeIts and InEdgeIts + ///referencing the changed edge remain + ///valid. However OutEdgeIts are invalidated. + void changeSource(Edge e, Node n) { + if (Parent::direction(e)) { + Parent::changeANode(e,n); + } else { + Parent::changeBNode(e,n); + } + } + /// \brief Changes the target of \c e to \c n + /// + /// Changes the target of \c e to \c n. It changes the proper + /// node of the represented undirected edge. + /// + ///\note The EdgeIts and OutEdgeIts + ///referencing the changed edge remain + ///valid. However InEdgeIts are invalidated. + void changeTarget(Edge e, Node n) { + if (Parent::direction(e)) { + Parent::changeBNode(e,n); + } else { + Parent::changeANode(e,n); + } + } + /// \brief Contract two nodes. + /// + /// This function contracts two nodes. + /// + /// Node \p b will be removed but instead of deleting its + /// neighboring edges, they will be joined to \p a. The two nodes + /// should be from the same nodeset, of course. + /// + /// \note The EdgeIts referencing a moved edge remain + /// valid. + void contract(const Node& a, const Node& b) { + LEMON_ASSERT(Parent::aNode(a) == Parent::aNode(b), NodeSetError()); + if (Parent::aNode(a)) { + for (IncEdgeIt e(*this, b); e!=INVALID;) { + IncEdgeIt f = e; ++f; + changeSource(e, a); + e = f; + } + } else { + for (IncEdgeIt e(*this, b); e!=INVALID;) { + IncEdgeIt f = e; ++f; + changeTarget(e, a); + e = f; + } + } + erase(b); + } + + /// \brief Class to make a snapshot of the graph and restore + /// to it later. + /// + /// Class to make a snapshot of the graph and to restore it + /// later. + /// + /// The newly added nodes and undirected edges can be removed + /// using the restore() function. + /// + /// \warning Edge and node deletions cannot be restored. This + /// events invalidate the snapshot. + class Snapshot { + protected: + + typedef Parent::NodeNotifier NodeNotifier; + + class NodeObserverProxy : public NodeNotifier::ObserverBase { + public: + + NodeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using NodeNotifier::ObserverBase::attach; + using NodeNotifier::ObserverBase::detach; + using NodeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const Node& node) { + snapshot.addNode(node); + } + virtual void add(const std::vector& nodes) { + for (int i = nodes.size() - 1; i >= 0; ++i) { + snapshot.addNode(nodes[i]); + } + } + virtual void erase(const Node& node) { + snapshot.eraseNode(node); + } + virtual void erase(const std::vector& nodes) { + for (int i = 0; i < int(nodes.size()); ++i) { + snapshot.eraseNode(nodes[i]); + } + } + virtual void build() { + Node node; + std::vector nodes; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + nodes.push_back(node); + } + for (int i = nodes.size() - 1; i >= 0; --i) { + snapshot.addNode(nodes[i]); + } + } + virtual void clear() { + Node node; + for (notifier()->first(node); node != INVALID; + notifier()->next(node)) { + snapshot.eraseNode(node); + } + } + + Snapshot& snapshot; + }; + + class UEdgeObserverProxy : public UEdgeNotifier::ObserverBase { + public: + + UEdgeObserverProxy(Snapshot& _snapshot) + : snapshot(_snapshot) {} + + using UEdgeNotifier::ObserverBase::attach; + using UEdgeNotifier::ObserverBase::detach; + using UEdgeNotifier::ObserverBase::attached; + + protected: + + virtual void add(const UEdge& edge) { + snapshot.addUEdge(edge); + } + virtual void add(const std::vector& edges) { + for (int i = edges.size() - 1; i >= 0; ++i) { + snapshot.addUEdge(edges[i]); + } + } + virtual void erase(const UEdge& edge) { + snapshot.eraseUEdge(edge); + } + virtual void erase(const std::vector& edges) { + for (int i = 0; i < int(edges.size()); ++i) { + snapshot.eraseUEdge(edges[i]); + } + } + virtual void build() { + UEdge edge; + std::vector edges; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + edges.push_back(edge); + } + for (int i = edges.size() - 1; i >= 0; --i) { + snapshot.addUEdge(edges[i]); + } + } + virtual void clear() { + UEdge edge; + for (notifier()->first(edge); edge != INVALID; + notifier()->next(edge)) { + snapshot.eraseUEdge(edge); + } + } + + Snapshot& snapshot; + }; + + ListBpUGraph *graph; + + NodeObserverProxy node_observer_proxy; + UEdgeObserverProxy edge_observer_proxy; + + std::list added_nodes; + std::list added_edges; + + + void addNode(const Node& node) { + added_nodes.push_front(node); + } + void eraseNode(const Node& node) { + std::list::iterator it = + std::find(added_nodes.begin(), added_nodes.end(), node); + if (it == added_nodes.end()) { + clear(); + edge_observer_proxy.detach(); + throw NodeNotifier::ImmediateDetach(); + } else { + added_nodes.erase(it); + } + } + + void addUEdge(const UEdge& edge) { + added_edges.push_front(edge); + } + void eraseUEdge(const UEdge& edge) { + std::list::iterator it = + std::find(added_edges.begin(), added_edges.end(), edge); + if (it == added_edges.end()) { + clear(); + node_observer_proxy.detach(); + throw UEdgeNotifier::ImmediateDetach(); + } else { + added_edges.erase(it); + } + } + + void attach(ListBpUGraph &_graph) { + graph = &_graph; + node_observer_proxy.attach(graph->notifier(Node())); + edge_observer_proxy.attach(graph->notifier(UEdge())); + } + + void detach() { + node_observer_proxy.detach(); + edge_observer_proxy.detach(); + } + + bool attached() const { + return node_observer_proxy.attached(); + } + + void clear() { + added_nodes.clear(); + added_edges.clear(); + } + + public: + + /// \brief Default constructor. + /// + /// Default constructor. + /// To actually make a snapshot you must call save(). + Snapshot() + : graph(0), node_observer_proxy(*this), + edge_observer_proxy(*this) {} + + /// \brief Constructor that immediately makes a snapshot. + /// + /// This constructor immediately makes a snapshot of the graph. + /// \param _graph The graph we make a snapshot of. + Snapshot(ListBpUGraph &_graph) + : node_observer_proxy(*this), + edge_observer_proxy(*this) { + attach(_graph); + } + + /// \brief Make a snapshot. + /// + /// Make a snapshot of the graph. + /// + /// This function can be called more than once. In case of a repeated + /// call, the previous snapshot gets lost. + /// \param _graph The graph we make the snapshot of. + void save(ListBpUGraph &_graph) { + if (attached()) { + detach(); + clear(); + } + attach(_graph); + } + + /// \brief Undo the changes until the last snapshot. + // + /// Undo the changes until the last snapshot created by save(). + void restore() { + detach(); + for(std::list::iterator it = added_edges.begin(); + it != added_edges.end(); ++it) { + graph->erase(*it); + } + for(std::list::iterator it = added_nodes.begin(); + it != added_nodes.end(); ++it) { + graph->erase(*it); + } + clear(); + } + + /// \brief Gives back true when the snapshot is valid. + /// + /// Gives back true when the snapshot is valid. + bool valid() const { + return attached(); + } + }; + }; + + + /// @} +} //namespace lemon + + +#endif diff --git a/src/lemon/maps.h b/src/lemon/maps.h new file mode 100644 index 0000000..e586406 --- /dev/null +++ b/src/lemon/maps.h @@ -0,0 +1,1633 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_MAPS_H +#define LEMON_MAPS_H + +#include +#include +#include + +#include +#include + +///\file +///\ingroup maps +///\brief Miscellaneous property maps +/// +#include + +namespace lemon { + + /// \addtogroup maps + /// @{ + + /// Base class of maps. + + /// Base class of maps. + /// It provides the necessary typedefs required by the map concept. + template + class MapBase { + public: + /// The key type of the map. + typedef K Key; + /// The value type of the map. (The type of objects associated with the keys). + typedef T Value; + }; + + /// Null map. (a.k.a. DoNothingMap) + + /// This map can be used if you have to provide a map only for + /// its type definitions, or if you have to provide a writable map, + /// but data written to it is not required (i.e. it will be sent to + /// /dev/null). + template + class NullMap : public MapBase { + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + /// Gives back a default constructed element. + T operator[](const K&) const { return T(); } + /// Absorbs the value. + void set(const K&, const T&) {} + }; + + ///Returns a \c NullMap class + + ///This function just returns a \c NullMap class. + ///\relates NullMap + template + NullMap nullMap() { + return NullMap(); + } + + + /// Constant map. + + /// This is a \ref concepts::ReadMap "readable" map which assigns a + /// specified value to each key. + /// In other aspects it is equivalent to \c NullMap. + template + class ConstMap : public MapBase { + private: + T v; + public: + + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + /// Default constructor + + /// Default constructor. + /// The value of the map will be uninitialized. + /// (More exactly it will be default constructed.) + ConstMap() {} + + /// Constructor with specified initial value + + /// Constructor with specified initial value. + /// \param _v is the initial value of the map. + ConstMap(const T &_v) : v(_v) {} + + ///\e + T operator[](const K&) const { return v; } + + ///\e + void setAll(const T &t) { + v = t; + } + + template + struct rebind { + typedef ConstMap other; + }; + + template + ConstMap(const ConstMap &, const T &_v) : v(_v) {} + }; + + ///Returns a \c ConstMap class + + ///This function just returns a \c ConstMap class. + ///\relates ConstMap + template + inline ConstMap constMap(const V &v) { + return ConstMap(v); + } + + + template + struct Const { }; + + /// Constant map with inlined constant value. + + /// This is a \ref concepts::ReadMap "readable" map which assigns a + /// specified value to each key. + /// In other aspects it is equivalent to \c NullMap. + template + class ConstMap > : public MapBase { + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ConstMap() { } + ///\e + V operator[](const K&) const { return v; } + ///\e + void set(const K&, const V&) { } + }; + + ///Returns a \c ConstMap class with inlined value + + ///This function just returns a \c ConstMap class with inlined value. + ///\relates ConstMap + template + inline ConstMap > constMap() { + return ConstMap >(); + } + + ///Map based on \c std::map + + ///This is essentially a wrapper for \c std::map with addition that + ///you can specify a default value different from \c Value() . + ///It meets the \ref concepts::ReferenceMap "ReferenceMap" concept. + template > + class StdMap : public MapBase { + template + friend class StdMap; + public: + + typedef MapBase Parent; + ///Key type + typedef typename Parent::Key Key; + ///Value type + typedef typename Parent::Value Value; + ///Reference Type + typedef T& Reference; + ///Const reference type + typedef const T& ConstReference; + + typedef True ReferenceMapTag; + + private: + + typedef std::map Map; + Value _value; + Map _map; + + public: + + /// Constructor with specified default value + StdMap(const T& value = T()) : _value(value) {} + /// \brief Constructs the map from an appropriate \c std::map, and + /// explicitly specifies a default value. + template + StdMap(const std::map &map, const T& value = T()) + : _map(map.begin(), map.end()), _value(value) {} + + /// \brief Constructs a map from an other \ref StdMap. + template + StdMap(const StdMap &c) + : _map(c._map.begin(), c._map.end()), _value(c._value) {} + + private: + + StdMap& operator=(const StdMap&); + + public: + + ///\e + Reference operator[](const Key &k) { + typename Map::iterator it = _map.lower_bound(k); + if (it != _map.end() && !_map.key_comp()(k, it->first)) + return it->second; + else + return _map.insert(it, std::make_pair(k, _value))->second; + } + + /// \e + ConstReference operator[](const Key &k) const { + typename Map::const_iterator it = _map.find(k); + if (it != _map.end()) + return it->second; + else + return _value; + } + + /// \e + void set(const Key &k, const T &t) { + typename Map::iterator it = _map.lower_bound(k); + if (it != _map.end() && !_map.key_comp()(k, it->first)) + it->second = t; + else + _map.insert(it, std::make_pair(k, t)); + } + + /// \e + void setAll(const T &t) { + _value = t; + _map.clear(); + } + + template > + struct rebind { + typedef StdMap other; + }; + }; + + ///Returns a \c StdMap class + + ///This function just returns a \c StdMap class with specified + ///default value. + ///\relates StdMap + template + inline StdMap stdMap(const V& value = V()) { + return StdMap(value); + } + + template + inline StdMap > stdMap(const V& value = V()) { + return StdMap >(value); + } + + ///Returns a \c StdMap class created from an appropriate \c std::map + + ///This function just returns a \c StdMap class created from an + ///appropriate \c std::map. + ///\relates StdMap + template + inline StdMap stdMap( const std::map &map, + const V& value = V() ) { + return StdMap(map, value); + } + + /// \brief Map for storing values for keys from the range [0..size-1] + /// + /// This map has the [0..size-1] keyset and the values + /// are stored in a \c std::vector container. It can be used with + /// some data structures, for example \c UnionFind, \c BinHeap, when + /// the used items are small integer numbers. + template + class IntegerMap : public MapBase { + + template + friend class IntegerMap; + + public: + + typedef MapBase Parent; + ///\e + typedef typename Parent::Key Key; + ///\e + typedef typename Parent::Value Value; + ///\e + typedef T& Reference; + ///\e + typedef const T& ConstReference; + + typedef True ReferenceMapTag; + + private: + + typedef std::vector Vector; + Vector _vector; + + public: + + /// Constructor with specified default value + IntegerMap(int size = 0, const T& value = T()) : _vector(size, value) {} + + /// \brief Constructs the map from an appropriate \c std::vector. + template + IntegerMap(const std::vector& vector) + : _vector(vector.begin(), vector.end()) {} + + /// \brief Constructs a map from an other \ref IntegerMap. + template + IntegerMap(const IntegerMap &c) + : _vector(c._vector.begin(), c._vector.end()) {} + + /// \brief Resize the container + void resize(int size, const T& value = T()) { + _vector.resize(size, value); + } + + private: + + IntegerMap& operator=(const IntegerMap&); + + public: + + ///\e + Reference operator[](Key k) { + return _vector[k]; + } + + /// \e + ConstReference operator[](Key k) const { + return _vector[k]; + } + + /// \e + void set(const Key &k, const T& t) { + _vector[k] = t; + } + + }; + + ///Returns an \c IntegerMap class + + ///This function just returns an \c IntegerMap class. + ///\relates IntegerMap + template + inline IntegerMap integerMap(int size = 0, const T& value = T()) { + return IntegerMap(size, value); + } + + /// @} + + /// \addtogroup map_adaptors + /// @{ + + /// \brief Identity map. + /// + /// This map gives back the given key as value without any + /// modification. + template + class IdentityMap : public MapBase { + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + /// \e + const T& operator[](const T& t) const { + return t; + } + }; + + ///Returns an \c IdentityMap class + + ///This function just returns an \c IdentityMap class. + ///\relates IdentityMap + template + inline IdentityMap identityMap() { + return IdentityMap(); + } + + + ///\brief Convert the \c Value of a map to another type using + ///the default conversion. + /// + ///This \ref concepts::ReadMap "read only map" + ///converts the \c Value of a map to type \c T. + ///Its \c Key is inherited from \c M. + template + class ConvertMap : public MapBase { + const M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + + ///Constructor + ///\param _m is the underlying map + ConvertMap(const M &_m) : m(_m) {}; + + ///\e + Value operator[](const Key& k) const {return m[k];} + }; + + ///Returns a \c ConvertMap class + + ///This function just returns a \c ConvertMap class. + ///\relates ConvertMap + template + inline ConvertMap convertMap(const M &m) { + return ConvertMap(m); + } + + ///Simple wrapping of a map + + ///This \ref concepts::ReadMap "read only map" returns the simple + ///wrapping of the given map. Sometimes the reference maps cannot be + ///combined with simple read maps. This map adaptor wraps the given + ///map to simple read map. + /// + ///\sa SimpleWriteMap + /// + /// \todo Revise the misleading name + template + class SimpleMap : public MapBase { + const M& m; + + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + SimpleMap(const M &_m) : m(_m) {}; + ///\e + Value operator[](Key k) const {return m[k];} + }; + + ///Returns a \c SimpleMap class + + ///This function just returns a \c SimpleMap class. + ///\relates SimpleMap + template + inline SimpleMap simpleMap(const M &m) { + return SimpleMap(m); + } + + ///Simple writable wrapping of a map + + ///This \ref concepts::ReadWriteMap "read-write map" returns the simple + ///wrapping of the given map. Sometimes the reference maps cannot be + ///combined with simple read-write maps. This map adaptor wraps the + ///given map to simple read-write map. + /// + ///\sa SimpleMap + /// + /// \todo Revise the misleading name + template + class SimpleWriteMap : public MapBase { + M& m; + + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + SimpleWriteMap(M &_m) : m(_m) {}; + ///\e + Value operator[](Key k) const {return m[k];} + ///\e + void set(Key k, const Value& c) { m.set(k, c); } + }; + + ///Returns a \c SimpleWriteMap class + + ///This function just returns a \c SimpleWriteMap class. + ///\relates SimpleWriteMap + template + inline SimpleWriteMap simpleWriteMap(M &m) { + return SimpleWriteMap(m); + } + + ///Sum of two maps + + ///This \ref concepts::ReadMap "read only map" returns the sum of the two + ///given maps. + ///Its \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible to those of \c M1. + template + class AddMap : public MapBase { + const M1& m1; + const M2& m2; + + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + AddMap(const M1 &_m1,const M2 &_m2) : m1(_m1), m2(_m2) {}; + ///\e + Value operator[](Key k) const {return m1[k]+m2[k];} + }; + + ///Returns an \c AddMap class + + ///This function just returns an \c AddMap class. + ///\todo How to call these type of functions? + /// + ///\relates AddMap + template + inline AddMap addMap(const M1 &m1,const M2 &m2) { + return AddMap(m1,m2); + } + + ///Shift a map with a constant. + + ///This \ref concepts::ReadMap "read only map" returns the sum of the + ///given map and a constant value. + ///Its \c Key and \c Value is inherited from \c M. + /// + ///Actually, + ///\code + /// ShiftMap sh(x,v); + ///\endcode + ///is equivalent to + ///\code + /// ConstMap c_tmp(v); + /// AddMap > sh(x,v); + ///\endcode + /// + ///\sa ShiftWriteMap + template + class ShiftMap : public MapBase { + const M& m; + C v; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + + ///Constructor + ///\param _m is the undelying map + ///\param _v is the shift value + ShiftMap(const M &_m, const C &_v ) : m(_m), v(_v) {}; + ///\e + Value operator[](Key k) const {return m[k] + v;} + }; + + ///Shift a map with a constant (ReadWrite version). + + ///This \ref concepts::ReadWriteMap "read-write map" returns the sum of the + ///given map and a constant value. It makes also possible to write the map. + ///Its \c Key and \c Value are inherited from \c M. + /// + ///\sa ShiftMap + template + class ShiftWriteMap : public MapBase { + M& m; + C v; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + + ///Constructor + ///\param _m is the undelying map + ///\param _v is the shift value + ShiftWriteMap(M &_m, const C &_v ) : m(_m), v(_v) {}; + /// \e + Value operator[](Key k) const {return m[k] + v;} + /// \e + void set(Key k, const Value& c) { m.set(k, c - v); } + }; + + ///Returns a \c ShiftMap class + + ///This function just returns an \c ShiftMap class. + ///\relates ShiftMap + template + inline ShiftMap shiftMap(const M &m,const C &v) { + return ShiftMap(m,v); + } + + ///Returns a \c ShiftWriteMap class + + ///This function just returns a \c ShiftWriteMap class. + ///\relates ShiftWriteMap + template + inline ShiftWriteMap shiftMap(M &m,const C &v) { + return ShiftWriteMap(m,v); + } + + ///Difference of two maps + + ///This \ref concepts::ReadMap "read only map" returns the difference + ///of the values of the two given maps. + ///Its \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible to those of \c M1. + + template + class SubMap : public MapBase { + const M1& m1; + const M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + SubMap(const M1 &_m1,const M2 &_m2) : m1(_m1), m2(_m2) {}; + /// \e + Value operator[](Key k) const {return m1[k]-m2[k];} + }; + + ///Returns a \c SubMap class + + ///This function just returns a \c SubMap class. + /// + ///\relates SubMap + template + inline SubMap subMap(const M1 &m1, const M2 &m2) { + return SubMap(m1, m2); + } + + ///Product of two maps + + ///This \ref concepts::ReadMap "read only map" returns the product of the + ///values of the two given maps. + ///Its \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible to those of \c M1. + template + class MulMap : public MapBase { + const M1& m1; + const M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + MulMap(const M1 &_m1,const M2 &_m2) : m1(_m1), m2(_m2) {}; + /// \e + Value operator[](Key k) const {return m1[k]*m2[k];} + }; + + ///Returns a \c MulMap class + + ///This function just returns a \c MulMap class. + ///\relates MulMap + template + inline MulMap mulMap(const M1 &m1,const M2 &m2) { + return MulMap(m1,m2); + } + + ///Scales a map with a constant. + + ///This \ref concepts::ReadMap "read only map" returns the value of the + ///given map multiplied from the left side with a constant value. + ///Its \c Key and \c Value are inherited from \c M. + /// + ///Actually, + ///\code + /// ScaleMap sc(x,v); + ///\endcode + ///is equivalent to + ///\code + /// ConstMap c_tmp(v); + /// MulMap > sc(x,v); + ///\endcode + /// + ///\sa ScaleWriteMap + template + class ScaleMap : public MapBase { + const M& m; + C v; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + + ///Constructor + ///\param _m is the undelying map + ///\param _v is the scaling value + ScaleMap(const M &_m, const C &_v ) : m(_m), v(_v) {}; + /// \e + Value operator[](Key k) const {return v * m[k];} + }; + + ///Scales a map with a constant (ReadWrite version). + + ///This \ref concepts::ReadWriteMap "read-write map" returns the value of the + ///given map multiplied from the left side with a constant value. It can + ///also be used as write map if the \c / operator is defined between + ///\c Value and \c C and the given multiplier is not zero. + ///Its \c Key and \c Value are inherited from \c M. + /// + ///\sa ScaleMap + template + class ScaleWriteMap : public MapBase { + M& m; + C v; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + + ///Constructor + ///\param _m is the undelying map + ///\param _v is the scaling value + ScaleWriteMap(M &_m, const C &_v ) : m(_m), v(_v) {}; + /// \e + Value operator[](Key k) const {return v * m[k];} + /// \e + void set(Key k, const Value& c) { m.set(k, c / v);} + }; + + ///Returns a \c ScaleMap class + + ///This function just returns a \c ScaleMap class. + ///\relates ScaleMap + template + inline ScaleMap scaleMap(const M &m,const C &v) { + return ScaleMap(m,v); + } + + ///Returns a \c ScaleWriteMap class + + ///This function just returns a \c ScaleWriteMap class. + ///\relates ScaleWriteMap + template + inline ScaleWriteMap scaleMap(M &m,const C &v) { + return ScaleWriteMap(m,v); + } + + ///Quotient of two maps + + ///This \ref concepts::ReadMap "read only map" returns the quotient of the + ///values of the two given maps. + ///Its \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible to those of \c M1. + template + class DivMap : public MapBase { + const M1& m1; + const M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + DivMap(const M1 &_m1,const M2 &_m2) : m1(_m1), m2(_m2) {}; + /// \e + Value operator[](Key k) const {return m1[k]/m2[k];} + }; + + ///Returns a \c DivMap class + + ///This function just returns a \c DivMap class. + ///\relates DivMap + template + inline DivMap divMap(const M1 &m1,const M2 &m2) { + return DivMap(m1,m2); + } + + ///Composition of two maps + + ///This \ref concepts::ReadMap "read only map" returns the composition of + ///two given maps. + ///That is to say, if \c m1 is of type \c M1 and \c m2 is of \c M2, + ///then for + ///\code + /// ComposeMap cm(m1,m2); + ///\endcode + /// cm[x] will be equal to m1[m2[x]]. + /// + ///Its \c Key is inherited from \c M2 and its \c Value is from \c M1. + ///\c M2::Value must be convertible to \c M1::Key. + /// + ///\sa CombineMap + /// + ///\todo Check the requirements. + template + class ComposeMap : public MapBase { + const M1& m1; + const M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + ComposeMap(const M1 &_m1,const M2 &_m2) : m1(_m1), m2(_m2) {}; + + typename MapTraits::ConstReturnValue + /// \e + operator[](Key k) const {return m1[m2[k]];} + }; + ///Returns a \c ComposeMap class + + ///This function just returns a \c ComposeMap class. + /// + ///\relates ComposeMap + template + inline ComposeMap composeMap(const M1 &m1,const M2 &m2) { + return ComposeMap(m1,m2); + } + + ///Combine of two maps using an STL (binary) functor. + + ///Combine of two maps using an STL (binary) functor. + /// + ///This \ref concepts::ReadMap "read only map" takes two maps and a + ///binary functor and returns the composition of the two + ///given maps unsing the functor. + ///That is to say, if \c m1 and \c m2 is of type \c M1 and \c M2 + ///and \c f is of \c F, then for + ///\code + /// CombineMap cm(m1,m2,f); + ///\endcode + /// cm[x] will be equal to f(m1[x],m2[x]) + /// + ///Its \c Key is inherited from \c M1 and its \c Value is \c V. + ///\c M2::Value and \c M1::Value must be convertible to the corresponding + ///input parameter of \c F and the return type of \c F must be convertible + ///to \c V. + /// + ///\sa ComposeMap + /// + ///\todo Check the requirements. + template + class CombineMap : public MapBase { + const M1& m1; + const M2& m2; + F f; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + CombineMap(const M1 &_m1,const M2 &_m2,const F &_f = F()) + : m1(_m1), m2(_m2), f(_f) {}; + /// \e + Value operator[](Key k) const {return f(m1[k],m2[k]);} + }; + + ///Returns a \c CombineMap class + + ///This function just returns a \c CombineMap class. + /// + ///For example if \c m1 and \c m2 are both \c double valued maps, then + ///\code + ///combineMap(m1,m2,std::plus()) + ///\endcode + ///is equivalent to + ///\code + ///addMap(m1,m2) + ///\endcode + /// + ///This function is specialized for adaptable binary function + ///classes and C++ functions. + /// + ///\relates CombineMap + template + inline CombineMap + combineMap(const M1& m1,const M2& m2, const F& f) { + return CombineMap(m1,m2,f); + } + + template + inline CombineMap + combineMap(const M1& m1, const M2& m2, const F& f) { + return combineMap(m1,m2,f); + } + + template + inline CombineMap + combineMap(const M1 &m1, const M2 &m2, V (*f)(K1, K2)) { + return combineMap(m1,m2,f); + } + + ///Negative value of a map + + ///This \ref concepts::ReadMap "read only map" returns the negative + ///value of the value returned by the given map. + ///Its \c Key and \c Value are inherited from \c M. + ///The unary \c - operator must be defined for \c Value, of course. + /// + ///\sa NegWriteMap + template + class NegMap : public MapBase { + const M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + NegMap(const M &_m) : m(_m) {}; + /// \e + Value operator[](Key k) const {return -m[k];} + }; + + ///Negative value of a map (ReadWrite version) + + ///This \ref concepts::ReadWriteMap "read-write map" returns the negative + ///value of the value returned by the given map. + ///Its \c Key and \c Value are inherited from \c M. + ///The unary \c - operator must be defined for \c Value, of course. + /// + /// \sa NegMap + template + class NegWriteMap : public MapBase { + M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + NegWriteMap(M &_m) : m(_m) {}; + /// \e + Value operator[](Key k) const {return -m[k];} + /// \e + void set(Key k, const Value& v) { m.set(k, -v); } + }; + + ///Returns a \c NegMap class + + ///This function just returns a \c NegMap class. + ///\relates NegMap + template + inline NegMap negMap(const M &m) { + return NegMap(m); + } + + ///Returns a \c NegWriteMap class + + ///This function just returns a \c NegWriteMap class. + ///\relates NegWriteMap + template + inline NegWriteMap negMap(M &m) { + return NegWriteMap(m); + } + + ///Absolute value of a map + + ///This \ref concepts::ReadMap "read only map" returns the absolute value + ///of the value returned by the given map. + ///Its \c Key and \c Value are inherited from \c M. + ///\c Value must be comparable to \c 0 and the unary \c - + ///operator must be defined for it, of course. + /// + ///\bug We need a unified way to handle the situation below: + ///\code + /// struct _UnConvertible {}; + /// template inline A t_abs(A a) {return _UnConvertible();} + /// template<> inline int t_abs<>(int n) {return abs(n);} + /// template<> inline long int t_abs<>(long int n) {return labs(n);} + /// template<> inline long long int t_abs<>(long long int n) {return ::llabs(n);} + /// template<> inline float t_abs<>(float n) {return fabsf(n);} + /// template<> inline double t_abs<>(double n) {return fabs(n);} + /// template<> inline long double t_abs<>(long double n) {return fabsl(n);} + ///\endcode + + + template + class AbsMap : public MapBase { + const M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + AbsMap(const M &_m) : m(_m) {}; + /// \e + Value operator[](Key k) const { + Value tmp = m[k]; + return tmp >= 0 ? tmp : -tmp; + } + + }; + + ///Returns an \c AbsMap class + + ///This function just returns an \c AbsMap class. + ///\relates AbsMap + template + inline AbsMap absMap(const M &m) { + return AbsMap(m); + } + + ///Converts an STL style functor to a map + + ///This \ref concepts::ReadMap "read only map" returns the value + ///of a given functor. + /// + ///Template parameters \c K and \c V will become its + ///\c Key and \c Value. + ///In most cases they have to be given explicitly because a + ///functor typically does not provide \c argument_type and + ///\c result_type typedefs. + /// + ///Parameter \c F is the type of the used functor. + /// + ///\sa MapFunctor + template + class FunctorMap : public MapBase { + F f; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + FunctorMap(const F &_f = F()) : f(_f) {} + /// \e + Value operator[](Key k) const { return f(k);} + }; + + ///Returns a \c FunctorMap class + + ///This function just returns a \c FunctorMap class. + /// + ///This function is specialized for adaptable binary function + ///classes and C++ functions. + /// + ///\relates FunctorMap + template inline + FunctorMap functorMap(const F &f) { + return FunctorMap(f); + } + + template inline + FunctorMap + functorMap(const F &f) { + return FunctorMap(f); + } + + template inline + FunctorMap functorMap(V (*f)(K)) { + return FunctorMap(f); + } + + + ///Converts a map to an STL style (unary) functor + + ///This class Converts a map to an STL style (unary) functor. + ///That is it provides an operator() to read its values. + /// + ///For the sake of convenience it also works as + ///a ususal \ref concepts::ReadMap "readable map", + ///i.e. operator[] and the \c Key and \c Value typedefs also exist. + /// + ///\sa FunctorMap + template + class MapFunctor : public MapBase { + const M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + typedef typename M::Key argument_type; + typedef typename M::Value result_type; + + ///Constructor + MapFunctor(const M &_m) : m(_m) {}; + ///\e + Value operator()(Key k) const {return m[k];} + ///\e + Value operator[](Key k) const {return m[k];} + }; + + ///Returns a \c MapFunctor class + + ///This function just returns a \c MapFunctor class. + ///\relates MapFunctor + template + inline MapFunctor mapFunctor(const M &m) { + return MapFunctor(m); + } + + ///Just readable version of \ref ForkWriteMap + + ///This map has two \ref concepts::ReadMap "readable map" + ///parameters and each read request will be passed just to the + ///first map. This class is the just readable map type of \c ForkWriteMap. + /// + ///The \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible from those of \c M1. + /// + ///\sa ForkWriteMap + + template + class ForkMap : public MapBase { + const M1& m1; + const M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + ForkMap(const M1 &_m1, const M2 &_m2) : m1(_m1), m2(_m2) {}; + /// \e + Value operator[](Key k) const {return m1[k];} + }; + + + ///Applies all map setting operations to two maps + + ///This map has two \ref concepts::WriteMap "writable map" + ///parameters and each write request will be passed to both of them. + ///If \c M1 is also \ref concepts::ReadMap "readable", + ///then the read operations will return the + ///corresponding values of \c M1. + /// + ///The \c Key and \c Value are inherited from \c M1. + ///The \c Key and \c Value of \c M2 must be convertible from those of \c M1. + /// + ///\sa ForkMap + template + class ForkWriteMap : public MapBase { + M1& m1; + M2& m2; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + ///Constructor + ForkWriteMap(M1 &_m1, M2 &_m2) : m1(_m1), m2(_m2) {}; + ///\e + Value operator[](Key k) const {return m1[k];} + ///\e + void set(Key k, const Value &v) {m1.set(k,v); m2.set(k,v);} + }; + + ///Returns a \c ForkMap class + + ///This function just returns a \c ForkMap class. + ///\relates ForkMap + template + inline ForkMap forkMap(const M1 &m1, const M2 &m2) { + return ForkMap(m1,m2); + } + + ///Returns a \c ForkWriteMap class + + ///This function just returns a \c ForkWriteMap class. + ///\relates ForkWriteMap + template + inline ForkWriteMap forkMap(M1 &m1, M2 &m2) { + return ForkWriteMap(m1,m2); + } + + + + /* ************* BOOL MAPS ******************* */ + + ///Logical 'not' of a map + + ///This bool \ref concepts::ReadMap "read only map" returns the + ///logical negation of the value returned by the given map. + ///Its \c Key is inherited from \c M, its \c Value is \c bool. + /// + ///\sa NotWriteMap + template + class NotMap : public MapBase { + const M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + /// Constructor + NotMap(const M &_m) : m(_m) {}; + ///\e + Value operator[](Key k) const {return !m[k];} + }; + + ///Logical 'not' of a map (ReadWrie version) + + ///This bool \ref concepts::ReadWriteMap "read-write map" returns the + ///logical negation of the value returned by the given map. When it is set, + ///the opposite value is set to the original map. + ///Its \c Key is inherited from \c M, its \c Value is \c bool. + /// + ///\sa NotMap + template + class NotWriteMap : public MapBase { + M& m; + public: + typedef MapBase Parent; + typedef typename Parent::Key Key; + typedef typename Parent::Value Value; + + /// Constructor + NotWriteMap(M &_m) : m(_m) {}; + ///\e + Value operator[](Key k) const {return !m[k];} + ///\e + void set(Key k, bool v) { m.set(k, !v); } + }; + + ///Returns a \c NotMap class + + ///This function just returns a \c NotMap class. + ///\relates NotMap + template + inline NotMap notMap(const M &m) { + return NotMap(m); + } + + ///Returns a \c NotWriteMap class + + ///This function just returns a \c NotWriteMap class. + ///\relates NotWriteMap + template + inline NotWriteMap notMap(M &m) { + return NotWriteMap(m); + } + + namespace _maps_bits { + + template + struct Identity { + typedef Value argument_type; + typedef Value result_type; + Value operator()(const Value& val) const { + return val; + } + }; + + template + struct IteratorTraits { + typedef typename std::iterator_traits<_Iterator>::value_type Value; + }; + + template + struct IteratorTraits<_Iterator, + typename exists::type> + { + typedef typename _Iterator::container_type::value_type Value; + }; + + } + + + /// \brief Writable bool map for logging each \c true assigned element + /// + /// A \ref concepts::ReadWriteMap "read-write" bool map for logging + /// each \c true assigned element, i.e it copies all the keys set + /// to \c true to the given iterator. + /// + /// \note The container of the iterator should contain space + /// for each element. + /// + /// The following example shows how you can write the edges found by + /// the \ref Prim algorithm directly to the standard output. + ///\code + /// typedef IdMap UEdgeIdMap; + /// UEdgeIdMap uedgeId(ugraph); + /// + /// typedef MapFunctor UEdgeIdFunctor; + /// UEdgeIdFunctor uedgeIdFunctor(uedgeId); + /// + /// StoreBoolMap, UEdgeIdFunctor> + /// writerMap(ostream_iterator(cout, " "), uedgeIdFunctor); + /// + /// prim(ugraph, cost, writerMap); + ///\endcode + /// + ///\sa BackInserterBoolMap + ///\sa FrontInserterBoolMap + ///\sa InserterBoolMap + template ::Value> > + class StoreBoolMap { + public: + typedef _Iterator Iterator; + + typedef typename _Functor::argument_type Key; + typedef bool Value; + + typedef _Functor Functor; + + /// Constructor + StoreBoolMap(Iterator it, const Functor& functor = Functor()) + : _begin(it), _end(it), _functor(functor) {} + + /// Gives back the given iterator set for the first key + Iterator begin() const { + return _begin; + } + + /// Gives back the the 'after the last' iterator + Iterator end() const { + return _end; + } + + /// The \c set function of the map + void set(const Key& key, Value value) const { + if (value) { + *_end++ = _functor(key); + } + } + + private: + Iterator _begin; + mutable Iterator _end; + Functor _functor; + }; + + /// \brief Writable bool map for logging each \c true assigned element in + /// a back insertable container. + /// + /// Writable bool map for logging each \c true assigned element by pushing + /// them into a back insertable container. + /// It can be used to retrieve the items into a standard + /// container. The next example shows how you can store the + /// edges found by the Prim algorithm in a vector. + /// + ///\code + /// vector span_tree_uedges; + /// BackInserterBoolMap > inserter_map(span_tree_uedges); + /// prim(ugraph, cost, inserter_map); + ///\endcode + /// + ///\sa StoreBoolMap + ///\sa FrontInserterBoolMap + ///\sa InserterBoolMap + template > + class BackInserterBoolMap { + public: + typedef typename Functor::argument_type Key; + typedef bool Value; + + /// Constructor + BackInserterBoolMap(Container& _container, + const Functor& _functor = Functor()) + : container(_container), functor(_functor) {} + + /// The \c set function of the map + void set(const Key& key, Value value) { + if (value) { + container.push_back(functor(key)); + } + } + + private: + Container& container; + Functor functor; + }; + + /// \brief Writable bool map for logging each \c true assigned element in + /// a front insertable container. + /// + /// Writable bool map for logging each \c true assigned element by pushing + /// them into a front insertable container. + /// It can be used to retrieve the items into a standard + /// container. For example see \ref BackInserterBoolMap. + /// + ///\sa BackInserterBoolMap + ///\sa InserterBoolMap + template > + class FrontInserterBoolMap { + public: + typedef typename Functor::argument_type Key; + typedef bool Value; + + /// Constructor + FrontInserterBoolMap(Container& _container, + const Functor& _functor = Functor()) + : container(_container), functor(_functor) {} + + /// The \c set function of the map + void set(const Key& key, Value value) { + if (value) { + container.push_front(functor(key)); + } + } + + private: + Container& container; + Functor functor; + }; + + /// \brief Writable bool map for storing each \c true assigned element in + /// an insertable container. + /// + /// Writable bool map for storing each \c true assigned element in an + /// insertable container. It will insert all the keys set to \c true into + /// the container. + /// + /// For example, if you want to store the cut arcs of the strongly + /// connected components in a set you can use the next code: + /// + ///\code + /// set cut_edges; + /// InserterBoolMap > inserter_map(cut_edges); + /// stronglyConnectedCutEdges(graph, cost, inserter_map); + ///\endcode + /// + ///\sa BackInserterBoolMap + ///\sa FrontInserterBoolMap + template > + class InserterBoolMap { + public: + typedef typename Container::value_type Key; + typedef bool Value; + + /// Constructor with specified iterator + + /// Constructor with specified iterator. + /// \param _container The container for storing the elements. + /// \param _it The elements will be inserted before this iterator. + /// \param _functor The functor that is used when an element is stored. + InserterBoolMap(Container& _container, typename Container::iterator _it, + const Functor& _functor = Functor()) + : container(_container), it(_it), functor(_functor) {} + + /// Constructor + + /// Constructor without specified iterator. + /// The elements will be inserted before _container.end(). + /// \param _container The container for storing the elements. + /// \param _functor The functor that is used when an element is stored. + InserterBoolMap(Container& _container, const Functor& _functor = Functor()) + : container(_container), it(_container.end()), functor(_functor) {} + + /// The \c set function of the map + void set(const Key& key, Value value) { + if (value) { + it = container.insert(it, functor(key)); + ++it; + } + } + + private: + Container& container; + typename Container::iterator it; + Functor functor; + }; + + /// \brief Writable bool map for filling each \c true assigned element with a + /// given value. + /// + /// Writable bool map for filling each \c true assigned element with a + /// given value. The value can set the container. + /// + /// The following code finds the connected components of a graph + /// and stores it in the \c comp map: + ///\code + /// typedef UGraph::NodeMap ComponentMap; + /// ComponentMap comp(ugraph); + /// typedef FillBoolMap > ComponentFillerMap; + /// ComponentFillerMap filler(comp, 0); + /// + /// Dfs::DefProcessedMap::Create dfs(ugraph); + /// dfs.processedMap(filler); + /// dfs.init(); + /// for (NodeIt it(ugraph); it != INVALID; ++it) { + /// if (!dfs.reached(it)) { + /// dfs.addSource(it); + /// dfs.start(); + /// ++filler.fillValue(); + /// } + /// } + ///\endcode + template + class FillBoolMap { + public: + typedef typename Map::Key Key; + typedef bool Value; + + /// Constructor + FillBoolMap(Map& _map, const typename Map::Value& _fill) + : map(_map), fill(_fill) {} + + /// Constructor + FillBoolMap(Map& _map) + : map(_map), fill() {} + + /// Gives back the current fill value + const typename Map::Value& fillValue() const { + return fill; + } + + /// Gives back the current fill value + typename Map::Value& fillValue() { + return fill; + } + + /// Sets the current fill value + void fillValue(const typename Map::Value& _fill) { + fill = _fill; + } + + /// The \c set function of the map + void set(const Key& key, Value value) { + if (value) { + map.set(key, fill); + } + } + + private: + Map& map; + typename Map::Value fill; + }; + + + /// \brief Writable bool map for storing the sequence number of + /// \c true assignments. + /// + /// Writable bool map that stores for each \c true assigned elements + /// the sequence number of this setting. + /// It makes it easy to calculate the leaving + /// order of the nodes in the \c Dfs algorithm. + /// + ///\code + /// typedef Graph::NodeMap OrderMap; + /// OrderMap order(graph); + /// typedef SettingOrderBoolMap OrderSetterMap; + /// OrderSetterMap setter(order); + /// Dfs::DefProcessedMap::Create dfs(graph); + /// dfs.processedMap(setter); + /// dfs.init(); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!dfs.reached(it)) { + /// dfs.addSource(it); + /// dfs.start(); + /// } + /// } + ///\endcode + /// + /// The storing of the discovering order is more difficult because the + /// ReachedMap should be readable in the dfs algorithm but the setting + /// order map is not readable. Thus we must use the fork map: + /// + ///\code + /// typedef Graph::NodeMap OrderMap; + /// OrderMap order(graph); + /// typedef SettingOrderBoolMap OrderSetterMap; + /// OrderSetterMap setter(order); + /// typedef Graph::NodeMap StoreMap; + /// StoreMap store(graph); + /// + /// typedef ForkWriteMap ReachedMap; + /// ReachedMap reached(store, setter); + /// + /// Dfs::DefReachedMap::Create dfs(graph); + /// dfs.reachedMap(reached); + /// dfs.init(); + /// for (NodeIt it(graph); it != INVALID; ++it) { + /// if (!dfs.reached(it)) { + /// dfs.addSource(it); + /// dfs.start(); + /// } + /// } + ///\endcode + template + class SettingOrderBoolMap { + public: + typedef typename Map::Key Key; + typedef bool Value; + + /// Constructor + SettingOrderBoolMap(Map& _map) + : map(_map), counter(0) {} + + /// Number of set operations. + int num() const { + return counter; + } + + /// The \c set function of the map + void set(const Key& key, Value value) { + if (value) { + map.set(key, counter++); + } + } + + private: + Map& map; + int counter; + }; + + /// @} +} + +#endif // LEMON_MAPS_H diff --git a/src/lemon/math.h b/src/lemon/math.h new file mode 100644 index 0000000..c837a83 --- /dev/null +++ b/src/lemon/math.h @@ -0,0 +1,63 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_MATH_H +#define LEMON_MATH_H + +///\ingroup misc +///\file +///\brief Some extensions to the standard \c cmath library. +/// +///Some extensions to the standard \c cmath library. +/// +///This file includes the standard math library (cmath). + +#include + +namespace lemon { + + /// \addtogroup misc + /// @{ + + /// The Euler constant + const long double E = 2.7182818284590452353602874713526625L; + /// log_2(e) + const long double LOG2E = 1.4426950408889634073599246810018921L; + /// log_10(e) + const long double LOG10E = 0.4342944819032518276511289189166051L; + /// ln(2) + const long double LN2 = 0.6931471805599453094172321214581766L; + /// ln(10) + const long double LN10 = 2.3025850929940456840179914546843642L; + /// pi + const long double PI = 3.1415926535897932384626433832795029L; + /// pi/2 + const long double PI_2 = 1.5707963267948966192313216916397514L; + /// pi/4 + const long double PI_4 = 0.7853981633974483096156608458198757L; + /// sqrt(2) + const long double SQRT2 = 1.4142135623730950488016887242096981L; + /// 1/sqrt(2) + const long double SQRT1_2 = 0.7071067811865475244008443621048490L; + + + /// @} + +} //namespace lemon + +#endif //LEMON_TOLERANCE_H diff --git a/src/lemon/smart_graph.h b/src/lemon/smart_graph.h new file mode 100644 index 0000000..c47aa29 --- /dev/null +++ b/src/lemon/smart_graph.h @@ -0,0 +1,1163 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_SMART_GRAPH_H +#define LEMON_SMART_GRAPH_H + +///\ingroup graphs +///\file +///\brief SmartGraph and SmartUGraph classes. + +#include + +#include + +#include +#include + +#include +#include + +#include + +namespace lemon { + + class SmartGraph; + ///Base of SmartGraph + + ///Base of SmartGraph + /// + class SmartGraphBase { + protected: + + struct NodeT + { + int first_in, first_out; + NodeT() {} + }; + struct EdgeT + { + int target, source, next_in, next_out; + EdgeT() {} + }; + + std::vector nodes; + + std::vector edges; + + + public: + + typedef SmartGraphBase Graph; + + class Node; + class Edge; + + + public: + + SmartGraphBase() : nodes(), edges() { } + SmartGraphBase(const SmartGraphBase &_g) + : nodes(_g.nodes), edges(_g.edges) { } + + typedef True NodeNumTag; + typedef True EdgeNumTag; + + int nodeNum() const { return nodes.size(); } + int edgeNum() const { return edges.size(); } + + int maxNodeId() const { return nodes.size()-1; } + int maxEdgeId() const { return edges.size()-1; } + + Node addNode() { + int n = nodes.size(); + nodes.push_back(NodeT()); + nodes[n].first_in = -1; + nodes[n].first_out = -1; + return Node(n); + } + + Edge addEdge(Node u, Node v) { + int n = edges.size(); + edges.push_back(EdgeT()); + edges[n].source = u.id; + edges[n].target = v.id; + edges[n].next_out = nodes[u.id].first_out; + edges[n].next_in = nodes[v.id].first_in; + nodes[u.id].first_out = nodes[v.id].first_in = n; + + return Edge(n); + } + + void clear() { + edges.clear(); + nodes.clear(); + } + + Node source(Edge e) const { return Node(edges[e.id].source); } + Node target(Edge e) const { return Node(edges[e.id].target); } + + static int id(Node v) { return v.id; } + static int id(Edge e) { return e.id; } + + static Node nodeFromId(int id) { return Node(id);} + static Edge edgeFromId(int id) { return Edge(id);} + + class Node { + friend class SmartGraphBase; + friend class SmartGraph; + + protected: + int id; + explicit Node(int _id) : id(_id) {} + public: + Node() {} + Node (Invalid) : id(-1) {} + bool operator==(const Node i) const {return id == i.id;} + bool operator!=(const Node i) const {return id != i.id;} + bool operator<(const Node i) const {return id < i.id;} + }; + + + class Edge { + friend class SmartGraphBase; + friend class SmartGraph; + + protected: + int id; + explicit Edge(int _id) : id(_id) {} + public: + Edge() { } + Edge (Invalid) : id(-1) {} + bool operator==(const Edge i) const {return id == i.id;} + bool operator!=(const Edge i) const {return id != i.id;} + bool operator<(const Edge i) const {return id < i.id;} + }; + + void first(Node& node) const { + node.id = nodes.size() - 1; + } + + static void next(Node& node) { + --node.id; + } + + void first(Edge& edge) const { + edge.id = edges.size() - 1; + } + + static void next(Edge& edge) { + --edge.id; + } + + void firstOut(Edge& edge, const Node& node) const { + edge.id = nodes[node.id].first_out; + } + + void nextOut(Edge& edge) const { + edge.id = edges[edge.id].next_out; + } + + void firstIn(Edge& edge, const Node& node) const { + edge.id = nodes[node.id].first_in; + } + + void nextIn(Edge& edge) const { + edge.id = edges[edge.id].next_in; + } + + }; + + typedef GraphExtender ExtendedSmartGraphBase; + + ///\ingroup graphs + /// + ///\brief A smart graph class. + /// + ///This is a simple and fast graph implementation. + ///It is also quite memory efficient, but at the price + ///that it does support only limited (only stack-like) + ///node and edge deletions. + ///It conforms to + ///the \ref concepts::Graph "Graph concept" with an + ///important extra feature that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + ///\sa concepts::Graph. + /// + ///\author Alpar Juttner + class SmartGraph : public ExtendedSmartGraphBase { + public: + + typedef ExtendedSmartGraphBase Parent; + + private: + + ///SmartGraph is \e not copy constructible. Use GraphCopy() instead. + + ///SmartGraph is \e not copy constructible. Use GraphCopy() instead. + /// + SmartGraph(const SmartGraph &) : ExtendedSmartGraphBase() {}; + ///\brief Assignment of SmartGraph to another one is \e not allowed. + ///Use GraphCopy() instead. + + ///Assignment of SmartGraph to another one is \e not allowed. + ///Use GraphCopy() instead. + void operator=(const SmartGraph &) {} + + public: + + /// Constructor + + /// Constructor. + /// + SmartGraph() {}; + + ///Add a new node to the graph. + + /// \return the new node. + /// + Node addNode() { return Parent::addNode(); } + + ///Add a new edge to the graph. + + ///Add a new edge to the graph with source node \c s + ///and target node \c t. + ///\return the new edge. + Edge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + + /// \brief Using this it is possible to avoid the superfluous memory + /// allocation. + + /// Using this it is possible to avoid the superfluous memory + /// allocation: if you know that the graph you want to build will + /// be very large (e.g. it will contain millions of nodes and/or edges) + /// then it is worth reserving space for this amount before starting + /// to build the graph. + /// \sa reserveEdge + void reserveNode(int n) { nodes.reserve(n); }; + + /// \brief Using this it is possible to avoid the superfluous memory + /// allocation. + + /// Using this it is possible to avoid the superfluous memory + /// allocation: if you know that the graph you want to build will + /// be very large (e.g. it will contain millions of nodes and/or edges) + /// then it is worth reserving space for this amount before starting + /// to build the graph. + /// \sa reserveNode + void reserveEdge(int m) { edges.reserve(m); }; + + ///Clear the graph. + + ///Erase all the nodes and edges from the graph. + /// + void clear() { + Parent::clear(); + } + + ///Split a node. + + ///This function splits a node. First a new node is added to the graph, + ///then the source of each outgoing edge of \c n is moved to this new node. + ///If \c connect is \c true (this is the default value), then a new edge + ///from \c n to the newly created node is also added. + ///\return The newly created node. + /// + ///\note The Edges + ///referencing a moved edge remain + ///valid. However InEdge's and OutEdge's + ///may be invalidated. + ///\warning This functionality cannot be used together with the Snapshot + ///feature. + ///\todo It could be implemented in a bit faster way. + Node split(Node n, bool connect = true) + { + Node b = addNode(); + nodes[b.id].first_out=nodes[n.id].first_out; + nodes[n.id].first_out=-1; + for(int i=nodes[b.id].first_out;i!=-1;i++) edges[i].source=b.id; + if(connect) addEdge(n,b); + return b; + } + + public: + + class Snapshot; + + protected: + + void restoreSnapshot(const Snapshot &s) + { + while(s.edge_numnodes.size(); + edge_num=g->edges.size(); + } + + ///Make a snapshot. + + ///Make a snapshot of the graph. + /// + ///This function can be called more than once. In case of a repeated + ///call, the previous snapshot gets lost. + ///\param _g The graph we make the snapshot of. + void save(SmartGraph &_g) + { + g=&_g; + node_num=g->nodes.size(); + edge_num=g->edges.size(); + } + + ///Undo the changes until a snapshot. + + ///Undo the changes until a snapshot created by save(). + /// + ///\note After you restored a state, you cannot restore + ///a later state, in other word you cannot add again the edges deleted + ///by restore(). + void restore() + { + g->restoreSnapshot(*this); + } + }; + }; + + + class SmartUGraphBase { + + protected: + + struct NodeT { + int first_out; + }; + + struct EdgeT { + int target; + int next_out; + }; + + std::vector nodes; + std::vector edges; + + int first_free_edge; + + public: + + typedef SmartUGraphBase Graph; + + class Node; + class Edge; + class UEdge; + + class Node { + friend class SmartUGraphBase; + protected: + + int id; + explicit Node(int pid) { id = pid;} + + public: + Node() {} + Node (Invalid) { id = -1; } + bool operator==(const Node& node) const {return id == node.id;} + bool operator!=(const Node& node) const {return id != node.id;} + bool operator<(const Node& node) const {return id < node.id;} + }; + + class UEdge { + friend class SmartUGraphBase; + protected: + + int id; + explicit UEdge(int pid) { id = pid;} + + public: + UEdge() {} + UEdge (Invalid) { id = -1; } + bool operator==(const UEdge& edge) const {return id == edge.id;} + bool operator!=(const UEdge& edge) const {return id != edge.id;} + bool operator<(const UEdge& edge) const {return id < edge.id;} + }; + + class Edge { + friend class SmartUGraphBase; + protected: + + int id; + explicit Edge(int pid) { id = pid;} + + public: + operator UEdge() const { return uEdgeFromId(id / 2); } + + Edge() {} + Edge (Invalid) { id = -1; } + bool operator==(const Edge& edge) const {return id == edge.id;} + bool operator!=(const Edge& edge) const {return id != edge.id;} + bool operator<(const Edge& edge) const {return id < edge.id;} + }; + + + + SmartUGraphBase() + : nodes(), edges() {} + + + int maxNodeId() const { return nodes.size()-1; } + int maxUEdgeId() const { return edges.size() / 2 - 1; } + int maxEdgeId() const { return edges.size()-1; } + + Node source(Edge e) const { return Node(edges[e.id ^ 1].target); } + Node target(Edge e) const { return Node(edges[e.id].target); } + + Node source(UEdge e) const { return Node(edges[2 * e.id].target); } + Node target(UEdge e) const { return Node(edges[2 * e.id + 1].target); } + + static bool direction(Edge e) { + return (e.id & 1) == 1; + } + + static Edge direct(UEdge e, bool d) { + return Edge(e.id * 2 + (d ? 1 : 0)); + } + + void first(Node& node) const { + node.id = nodes.size() - 1; + } + + void next(Node& node) const { + --node.id; + } + + void first(Edge& edge) const { + edge.id = edges.size() - 1; + } + + void next(Edge& edge) const { + --edge.id; + } + + void first(UEdge& edge) const { + edge.id = edges.size() / 2 - 1; + } + + void next(UEdge& edge) const { + --edge.id; + } + + void firstOut(Edge &edge, const Node& v) const { + edge.id = nodes[v.id].first_out; + } + void nextOut(Edge &edge) const { + edge.id = edges[edge.id].next_out; + } + + void firstIn(Edge &edge, const Node& v) const { + edge.id = ((nodes[v.id].first_out) ^ 1); + if (edge.id == -2) edge.id = -1; + } + void nextIn(Edge &edge) const { + edge.id = ((edges[edge.id ^ 1].next_out) ^ 1); + if (edge.id == -2) edge.id = -1; + } + + void firstInc(UEdge &edge, bool& d, const Node& v) const { + int de = nodes[v.id].first_out; + if (de != -1) { + edge.id = de / 2; + d = ((de & 1) == 1); + } else { + edge.id = -1; + d = true; + } + } + void nextInc(UEdge &edge, bool& d) const { + int de = (edges[(edge.id * 2) | (d ? 1 : 0)].next_out); + if (de != -1) { + edge.id = de / 2; + d = ((de & 1) == 1); + } else { + edge.id = -1; + d = true; + } + } + + static int id(Node v) { return v.id; } + static int id(Edge e) { return e.id; } + static int id(UEdge e) { return e.id; } + + static Node nodeFromId(int id) { return Node(id);} + static Edge edgeFromId(int id) { return Edge(id);} + static UEdge uEdgeFromId(int id) { return UEdge(id);} + + Node addNode() { + int n = nodes.size(); + nodes.push_back(NodeT()); + nodes[n].first_out = -1; + + return Node(n); + } + + UEdge addEdge(Node u, Node v) { + int n = edges.size(); + edges.push_back(EdgeT()); + edges.push_back(EdgeT()); + + edges[n].target = u.id; + edges[n | 1].target = v.id; + + edges[n].next_out = nodes[v.id].first_out; + nodes[v.id].first_out = n; + + edges[n | 1].next_out = nodes[u.id].first_out; + nodes[u.id].first_out = (n | 1); + + return UEdge(n / 2); + } + + void clear() { + edges.clear(); + nodes.clear(); + } + + }; + + typedef UGraphExtender ExtendedSmartUGraphBase; + + /// \ingroup graphs + /// + /// \brief A smart undirected graph class. + /// + /// This is a simple and fast undirected graph implementation. + /// It is also quite memory efficient, but at the price + /// that it does support only limited (only stack-like) + /// node and edge deletions. + /// Except from this it conforms to + /// the \ref concepts::UGraph "UGraph concept". + /// + ///It also has an + ///important extra feature that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + /// \sa concepts::UGraph. + /// + class SmartUGraph : public ExtendedSmartUGraphBase { + private: + + ///SmartUGraph is \e not copy constructible. Use UGraphCopy() instead. + + ///SmartUGraph is \e not copy constructible. Use UGraphCopy() instead. + /// + SmartUGraph(const SmartUGraph &) : ExtendedSmartUGraphBase() {}; + + ///\brief Assignment of SmartUGraph to another one is \e not allowed. + ///Use UGraphCopy() instead. + + ///Assignment of SmartUGraph to another one is \e not allowed. + ///Use UGraphCopy() instead. + void operator=(const SmartUGraph &) {} + + public: + + typedef ExtendedSmartUGraphBase Parent; + typedef Parent::OutEdgeIt IncEdgeIt; + + /// Constructor + + /// Constructor. + /// + SmartUGraph() {} + + ///Add a new node to the graph. + + /// \return the new node. + /// + Node addNode() { return Parent::addNode(); } + + ///Add a new undirected edge to the graph. + + ///Add a new undirected edge to the graph with node \c s + ///and \c t. + ///\return the new undirected edge. + UEdge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + + ///Clear the graph. + + ///Erase all the nodes and edges from the graph. + /// + void clear() { + Parent::clear(); + } + + public: + + class Snapshot; + + protected: + + void saveSnapshot(Snapshot &s) + { + s.graph = this; + s.node_num = nodes.size(); + s.edge_num = edges.size(); + } + + void restoreSnapshot(const Snapshot &s) + { + while(s.edge_num dir; + dir.push_back(edgeFromId(n)); + dir.push_back(edgeFromId(n-1)); + Parent::notifier(Edge()).erase(dir); + nodes[edges[n].target].first_out=edges[n].next_out; + nodes[edges[n-1].target].first_out=edges[n-1].next_out; + edges.pop_back(); + edges.pop_back(); + } + while(s.node_numrestoreSnapshot(*this); + } + }; + }; + + + class SmartBpUGraphBase { + public: + + class NodeSetError : public LogicError { + public: + virtual const char* what() const throw() { + return "lemon::SmartBpUGraph::NodeSetError"; + } + }; + + protected: + + struct NodeT { + int first; + NodeT() {} + NodeT(int _first) : first(_first) {} + }; + + struct UEdgeT { + int aNode, next_out; + int bNode, next_in; + }; + + std::vector aNodes; + std::vector bNodes; + + std::vector edges; + + public: + + class Node { + friend class SmartBpUGraphBase; + protected: + int id; + + explicit Node(int _id) : id(_id) {} + public: + Node() {} + Node(Invalid) : id(-1) {} + bool operator==(const Node i) const {return id==i.id;} + bool operator!=(const Node i) const {return id!=i.id;} + bool operator<(const Node i) const {return id 0) { + node.id = 2 * aNodes.size() - 2; + } else { + node.id = 2 * bNodes.size() - 1; + } + } + void next(Node& node) const { + node.id -= 2; + if (node.id == -2) { + node.id = 2 * bNodes.size() - 1; + } + } + + void first(UEdge& edge) const { + edge.id = edges.size() - 1; + } + void next(UEdge& edge) const { + --edge.id; + } + + void firstFromANode(UEdge& edge, const Node& node) const { + LEMON_ASSERT((node.id & 1) == 0, NodeSetError()); + edge.id = aNodes[node.id >> 1].first; + } + void nextFromANode(UEdge& edge) const { + edge.id = edges[edge.id].next_out; + } + + void firstFromBNode(UEdge& edge, const Node& node) const { + LEMON_ASSERT((node.id & 1) == 1, NodeSetError()); + edge.id = bNodes[node.id >> 1].first; + } + void nextFromBNode(UEdge& edge) const { + edge.id = edges[edge.id].next_in; + } + + static int id(const Node& node) { + return node.id; + } + static Node nodeFromId(int id) { + return Node(id); + } + int maxNodeId() const { + return aNodes.size() > bNodes.size() ? + aNodes.size() * 2 - 2 : bNodes.size() * 2 - 1; + } + + static int id(const UEdge& edge) { + return edge.id; + } + static UEdge uEdgeFromId(int id) { + return UEdge(id); + } + int maxUEdgeId() const { + return edges.size(); + } + + static int aNodeId(const Node& node) { + return node.id >> 1; + } + static Node nodeFromANodeId(int id) { + return Node(id << 1); + } + int maxANodeId() const { + return aNodes.size(); + } + + static int bNodeId(const Node& node) { + return node.id >> 1; + } + static Node nodeFromBNodeId(int id) { + return Node((id << 1) + 1); + } + int maxBNodeId() const { + return bNodes.size(); + } + + Node aNode(const UEdge& edge) const { + return Node(edges[edge.id].aNode); + } + Node bNode(const UEdge& edge) const { + return Node(edges[edge.id].bNode); + } + + static bool aNode(const Node& node) { + return (node.id & 1) == 0; + } + + static bool bNode(const Node& node) { + return (node.id & 1) == 1; + } + + Node addANode() { + NodeT nodeT; + nodeT.first = -1; + aNodes.push_back(nodeT); + return Node(aNodes.size() * 2 - 2); + } + + Node addBNode() { + NodeT nodeT; + nodeT.first = -1; + bNodes.push_back(nodeT); + return Node(bNodes.size() * 2 - 1); + } + + UEdge addEdge(const Node& source, const Node& target) { + LEMON_ASSERT(((source.id ^ target.id) & 1) == 1, NodeSetError()); + UEdgeT edgeT; + if ((source.id & 1) == 0) { + edgeT.aNode = source.id; + edgeT.bNode = target.id; + } else { + edgeT.aNode = target.id; + edgeT.bNode = source.id; + } + edgeT.next_out = aNodes[edgeT.aNode >> 1].first; + aNodes[edgeT.aNode >> 1].first = edges.size(); + edgeT.next_in = bNodes[edgeT.bNode >> 1].first; + bNodes[edgeT.bNode >> 1].first = edges.size(); + edges.push_back(edgeT); + return UEdge(edges.size() - 1); + } + + void reserveANode(int n) { aNodes.reserve(n); }; + void reserveBNode(int n) { bNodes.reserve(n); }; + + void reserveEdge(int m) { edges.reserve(m); }; + + void clear() { + aNodes.clear(); + bNodes.clear(); + edges.clear(); + } + + typedef True NodeNumTag; + int nodeNum() const { return aNodes.size() + bNodes.size(); } + int aNodeNum() const { return aNodes.size(); } + int bNodeNum() const { return bNodes.size(); } + + typedef True EdgeNumTag; + int uEdgeNum() const { return edges.size(); } + + }; + + + typedef BpUGraphExtender > + ExtendedSmartBpUGraphBase; + + /// \ingroup graphs + /// + /// \brief A smart bipartite undirected graph class. + /// + /// This is a simple and fast bipartite undirected graph implementation. + /// It is also quite memory efficient, but at the price + /// that it does not support node and edge deletions. + /// Except from this it conforms to + /// the \ref concepts::BpUGraph "BpUGraph concept". + /// + ///It also has an + ///important extra feature that + ///its maps are real \ref concepts::ReferenceMap "reference map"s. + /// + /// \sa concepts::BpUGraph. + /// + class SmartBpUGraph : public ExtendedSmartBpUGraphBase { + private: + + /// \brief SmartBpUGraph is \e not copy constructible. + /// + ///SmartBpUGraph is \e not copy constructible. + SmartBpUGraph(const SmartBpUGraph &) : ExtendedSmartBpUGraphBase() {}; + + /// \brief Assignment of SmartBpUGraph to another one is \e not + /// allowed. + /// + /// Assignment of SmartBpUGraph to another one is \e not allowed. + void operator=(const SmartBpUGraph &) {} + + public: + + typedef ExtendedSmartBpUGraphBase Parent; + + ///Constructor + + ///Constructor. + /// + SmartBpUGraph() : ExtendedSmartBpUGraphBase() {} + + ///Add a new ANode to the graph. + + /// \return the new node. + /// + Node addANode() { return Parent::addANode(); } + + ///Add a new BNode to the graph. + + /// \return the new node. + /// + Node addBNode() { return Parent::addBNode(); } + + ///Add a new undirected edge to the graph. + + ///Add a new undirected edge to the graph with node \c s + ///and \c t. + ///\return the new undirected edge. + UEdge addEdge(const Node& s, const Node& t) { + return Parent::addEdge(s, t); + } + + ///Clear the graph. + + ///Erase all the nodes and edges from the graph. + /// + void clear() { + Parent::clear(); + } + + public: + + class Snapshot; + + protected: + + void restoreSnapshot(const Snapshot &s) + { + while(s.edge_num dir; + dir.push_back(Parent::direct(edge, true)); + dir.push_back(Parent::direct(edge, false)); + Parent::notifier(Edge()).erase(dir); + aNodes[edges.back().aNode >> 1].first=edges.back().next_out; + bNodes[edges.back().bNode >> 1].first=edges.back().next_in; + edges.pop_back(); + } + while(s.anode_numaNodes.size(); + bnode_num=g->bNodes.size(); + edge_num=g->edges.size(); + } + + ///Make a snapshot. + + ///Make a snapshot of the graph. + /// + ///This function can be called more than once. In case of a repeated + ///call, the previous snapshot gets lost. + ///\param _g The graph we make the snapshot of. + void save(SmartBpUGraph &_g) + { + g=&_g; + anode_num=g->aNodes.size(); + bnode_num=g->bNodes.size(); + edge_num=g->edges.size(); + } + + ///Undo the changes until a snapshot. + + ///Undo the changes until a snapshot created by save(). + /// + ///\note After you restored a state, you cannot restore + ///a later state, in other word you cannot add again the edges deleted + ///by restore(). + void restore() + { + g->restoreSnapshot(*this); + } + }; + }; + + + /// @} +} //namespace lemon + + +#endif //LEMON_SMART_GRAPH_H diff --git a/src/lemon/tolerance.h b/src/lemon/tolerance.h new file mode 100644 index 0000000..75c7e71 --- /dev/null +++ b/src/lemon/tolerance.h @@ -0,0 +1,454 @@ +/* -*- C++ -*- + * + * This file is a part of LEMON, a generic C++ optimization library + * + * Copyright (C) 2003-2008 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#ifndef LEMON_TOLERANCE_H +#define LEMON_TOLERANCE_H + +///\ingroup misc +///\file +///\brief A basic tool to handle the anomalies of calculation with +///floating point numbers. +/// +///\todo It should be in a module like "Basic tools" + + +namespace lemon { + + /// \addtogroup misc + /// @{ + + ///\brief A class to provide a basic way to + ///handle the comparison of numbers that are obtained + ///as a result of a probably inexact computation. + /// + ///Tolerance is a class to provide a basic way to + ///handle the comparison of numbers that are obtained + ///as a result of a probably inexact computation. + /// + ///This is an abstract class, it should be specialized for all numerical + ///data types. These specialized classes like \ref Tolerance\ + ///may offer additional tuning parameters. + /// + ///\sa Tolerance + ///\sa Tolerance + ///\sa Tolerance + ///\sa Tolerance + ///\sa Tolerance + ///\sa Tolerance + ///\sa Tolerance + + template + class Tolerance + { + public: + typedef T Value; + + ///\name Comparisons + ///The concept is that these bool functions return with \c true only if + ///the related comparisons hold even if some numerical error appeared + ///during the computations. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) {return false;} + ///Returns \c true if \c a is \e surely different from \c b + static bool different(Value a,Value b) {return false;} + ///Returns \c true if \c a is \e surely positive + static bool positive(Value a) {return false;} + ///Returns \c true if \c a is \e surely negative + static bool negative(Value a) {return false;} + ///Returns \c true if \c a is \e surely non-zero + static bool nonZero(Value a) {return false;} + + ///@} + + ///Returns the zero value. + static Value zero() {return T();} + + // static bool finite(Value a) {} + // static Value big() {} + // static Value negativeBig() {} + }; + + + ///Float specialization of \ref Tolerance. + + ///Float specialization of \ref Tolerance. + ///\sa Tolerance + ///\relates Tolerance + template<> + class Tolerance + { + static float def_epsilon; + float _epsilon; + public: + ///\e + typedef float Value; + + ///Constructor setting the epsilon tolerance to the default value. + Tolerance() : _epsilon(def_epsilon) {} + ///Constructor setting the epsilon tolerance. + Tolerance(float e) : _epsilon(e) {} + + ///Return the epsilon value. + Value epsilon() const {return _epsilon;} + ///Set the epsilon value. + void epsilon(Value e) {_epsilon=e;} + + ///Return the default epsilon value. + static Value defaultEpsilon() {return def_epsilon;} + ///Set the default epsilon value. + static void defaultEpsilon(Value e) {def_epsilon=e;} + + ///\name Comparisons + ///See class Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + bool less(Value a,Value b) const {return a+_epsilona; } + ///Returns \c true if \c a is \e surely non-zero + bool nonZero(Value a) const { return positive(a)||negative(a); }; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Double specialization of \ref Tolerance. + + ///Double specialization of \ref Tolerance. + ///\sa Tolerance + ///\relates Tolerance + template<> + class Tolerance + { + static double def_epsilon; + double _epsilon; + public: + ///\e + typedef double Value; + + ///Constructor setting the epsilon tolerance to the default value. + Tolerance() : _epsilon(def_epsilon) {} + ///Constructor setting the epsilon tolerance. + Tolerance(double e) : _epsilon(e) {} + + ///Return the epsilon value. + Value epsilon() const {return _epsilon;} + ///Set the epsilon value. + void epsilon(Value e) {_epsilon=e;} + + ///Return the default epsilon value. + static Value defaultEpsilon() {return def_epsilon;} + ///Set the default epsilon value. + static void defaultEpsilon(Value e) {def_epsilon=e;} + + ///\name Comparisons + ///See class Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + bool less(Value a,Value b) const {return a+_epsilona; } + ///Returns \c true if \c a is \e surely non-zero + bool nonZero(Value a) const { return positive(a)||negative(a); }; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Long double specialization of \ref Tolerance. + + ///Long double specialization of \ref Tolerance. + ///\sa Tolerance + ///\relates Tolerance + template<> + class Tolerance + { + static long double def_epsilon; + long double _epsilon; + public: + ///\e + typedef long double Value; + + ///Constructor setting the epsilon tolerance to the default value. + Tolerance() : _epsilon(def_epsilon) {} + ///Constructor setting the epsilon tolerance. + Tolerance(long double e) : _epsilon(e) {} + + ///Return the epsilon value. + Value epsilon() const {return _epsilon;} + ///Set the epsilon value. + void epsilon(Value e) {_epsilon=e;} + + ///Return the default epsilon value. + static Value defaultEpsilon() {return def_epsilon;} + ///Set the default epsilon value. + static void defaultEpsilon(Value e) {def_epsilon=e;} + + ///\name Comparisons + ///See class Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + bool less(Value a,Value b) const {return a+_epsilona; } + ///Returns \c true if \c a is \e surely non-zero + bool nonZero(Value a) const { return positive(a)||negative(a); }; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Integer specialization of \ref Tolerance. + + ///Integer specialization of \ref Tolerance. + ///\sa Tolerance + template<> + class Tolerance + { + public: + ///\e + typedef int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return aa; } + ///Returns \c true if \c a is \e surely non-zero + static bool nonZero(Value a) { return a!=0; }; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Unsigned integer specialization of \ref Tolerance. + + ///Unsigned integer specialization of \ref Tolerance. + ///\sa Tolerance + template<> + class Tolerance + { + public: + ///\e + typedef unsigned int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return a + class Tolerance + { + public: + ///\e + typedef long int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return aa; } + ///Returns \c true if \c a is \e surely non-zero + static bool nonZero(Value a) { return a!=0;}; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Unsigned long integer specialization of \ref Tolerance. + + ///Unsigned long integer specialization of \ref Tolerance. + ///\sa Tolerance + template<> + class Tolerance + { + public: + ///\e + typedef unsigned long int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return along long) + ///is not ansi compatible. + ///\sa Tolerance + template<> + class Tolerance + { + public: + ///\e + typedef long long int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return aa; } + ///Returns \c true if \c a is \e surely non-zero + static bool nonZero(Value a) { return a!=0;}; + + ///@} + + ///Returns zero + static Value zero() {return 0;} + }; + + ///Unsigned long long integer specialization of \ref Tolerance. + + ///Unsigned long long integer specialization of \ref Tolerance. + ///\warning This class (more exactly, type unsigned long long) + ///is not ansi compatible. + ///\sa Tolerance + template<> + class Tolerance + { + public: + ///\e + typedef unsigned long long int Value; + + ///\name Comparisons + ///See \ref Tolerance for more details. + + ///@{ + + ///Returns \c true if \c a is \e surely strictly less than \c b + static bool less(Value a,Value b) { return a +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include + +/// \ingroup graph_prop +/// \file +/// \brief Topology related algorithms +/// +/// Topology related algorithms + +namespace lemon { + + /// \ingroup graph_prop + /// + /// \brief Check that the given undirected graph is connected. + /// + /// Check that the given undirected graph connected. + /// \param graph The undirected graph. + /// \return %True when there is path between any two nodes in the graph. + /// \note By definition, the empty graph is connected. + template + bool connected(const UGraph& graph) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + if (NodeIt(graph) == INVALID) return true; + Dfs dfs(graph); + dfs.run(NodeIt(graph)); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + return false; + } + } + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Count the number of connected components of an undirected graph + /// + /// Count the number of connected components of an undirected graph + /// + /// \param graph The graph. It should be undirected. + /// \return The number of components + /// \note By definition, the empty graph consists + /// of zero connected components. + template + int countConnectedComponents(const UGraph &graph) { + checkConcept(); + typedef typename UGraph::Node Node; + typedef typename UGraph::Edge Edge; + + typedef NullMap PredMap; + typedef NullMap DistMap; + + int compNum = 0; + typename Bfs:: + template DefPredMap:: + template DefDistMap:: + Create bfs(graph); + + PredMap predMap; + bfs.predMap(predMap); + + DistMap distMap; + bfs.distMap(distMap); + + bfs.init(); + for(typename UGraph::NodeIt n(graph); n != INVALID; ++n) { + if (!bfs.reached(n)) { + bfs.addSource(n); + bfs.start(); + ++compNum; + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the connected components of an undirected graph + /// + /// Find the connected components of an undirected graph. + /// + /// \image html connected_components.png + /// \image latex connected_components.eps "Connected components" width=\textwidth + /// + /// \param graph The graph. It should be undirected. + /// \retval compMap A writable node map. The values will be set from 0 to + /// the number of the connected components minus one. Each values of the map + /// will be set exactly once, the values of a certain component will be + /// set continuously. + /// \return The number of components + /// + template + int connectedComponents(const UGraph &graph, NodeMap &compMap) { + checkConcept(); + typedef typename UGraph::Node Node; + typedef typename UGraph::Edge Edge; + checkConcept, NodeMap>(); + + typedef NullMap PredMap; + typedef NullMap DistMap; + + int compNum = 0; + typename Bfs:: + template DefPredMap:: + template DefDistMap:: + Create bfs(graph); + + PredMap predMap; + bfs.predMap(predMap); + + DistMap distMap; + bfs.distMap(distMap); + + bfs.init(); + for(typename UGraph::NodeIt n(graph); n != INVALID; ++n) { + if(!bfs.reached(n)) { + bfs.addSource(n); + while (!bfs.emptyQueue()) { + compMap.set(bfs.nextNode(), compNum); + bfs.processNextNode(); + } + ++compNum; + } + } + return compNum; + } + + namespace _topology_bits { + + template + struct LeaveOrderVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + LeaveOrderVisitor(Iterator it) : _it(it) {} + + void leave(const Node& node) { + *(_it++) = node; + } + + private: + Iterator _it; + }; + + template + struct FillMapVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Map::Value Value; + + FillMapVisitor(Map& map, Value& value) + : _map(map), _value(value) {} + + void reach(const Node& node) { + _map.set(node, _value); + } + private: + Map& _map; + Value& _value; + }; + + template + struct StronglyConnectedCutEdgesVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + + StronglyConnectedCutEdgesVisitor(const Graph& graph, EdgeMap& cutMap, + int& cutNum) + : _graph(graph), _cutMap(cutMap), _cutNum(cutNum), + _compMap(graph), _num(0) { + } + + void stop(const Node&) { + ++_num; + } + + void reach(const Node& node) { + _compMap.set(node, _num); + } + + void examine(const Edge& edge) { + if (_compMap[_graph.source(edge)] != _compMap[_graph.target(edge)]) { + _cutMap.set(edge, true); + ++_cutNum; + } + } + private: + const Graph& _graph; + EdgeMap& _cutMap; + int& _cutNum; + + typename Graph::template NodeMap _compMap; + int _num; + }; + + } + + + /// \ingroup graph_prop + /// + /// \brief Check that the given directed graph is strongly connected. + /// + /// Check that the given directed graph is strongly connected. The + /// graph is strongly connected when any two nodes of the graph are + /// connected with directed paths in both direction. + /// \return %False when the graph is not strongly connected. + /// \see connected + /// + /// \note By definition, the empty graph is strongly connected. + template + bool stronglyConnected(const Graph& graph) { + checkConcept(); + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + + if (NodeIt(graph) == INVALID) return true; + + using namespace _topology_bits; + + typedef DfsVisitor Visitor; + Visitor visitor; + + DfsVisit dfs(graph, visitor); + dfs.init(); + dfs.addSource(NodeIt(graph)); + dfs.start(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + return false; + } + } + + typedef RevGraphAdaptor RGraph; + RGraph rgraph(graph); + + typedef DfsVisitor RVisitor; + RVisitor rvisitor; + + DfsVisit rdfs(rgraph, rvisitor); + rdfs.init(); + rdfs.addSource(NodeIt(graph)); + rdfs.start(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!rdfs.reached(it)) { + return false; + } + } + + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Count the strongly connected components of a directed graph + /// + /// Count the strongly connected components of a directed graph. + /// The strongly connected components are the classes of an + /// equivalence relation on the nodes of the graph. Two nodes are in + /// the same class if they are connected with directed paths in both + /// direction. + /// + /// \param graph The graph. + /// \return The number of components + /// \note By definition, the empty graph has zero + /// strongly connected components. + template + int countStronglyConnectedComponents(const Graph& graph) { + checkConcept(); + + using namespace _topology_bits; + + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::EdgeIt EdgeIt; + + typedef std::vector Container; + typedef typename Container::iterator Iterator; + + Container nodes(countNodes(graph)); + typedef LeaveOrderVisitor Visitor; + Visitor visitor(nodes.begin()); + + DfsVisit dfs(graph, visitor); + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + + typedef typename Container::reverse_iterator RIterator; + typedef RevGraphAdaptor RGraph; + + RGraph rgraph(graph); + + typedef DfsVisitor RVisitor; + RVisitor rvisitor; + + DfsVisit rdfs(rgraph, rvisitor); + + int compNum = 0; + + rdfs.init(); + for (RIterator it = nodes.rbegin(); it != nodes.rend(); ++it) { + if (!rdfs.reached(*it)) { + rdfs.addSource(*it); + rdfs.start(); + ++compNum; + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the strongly connected components of a directed graph + /// + /// Find the strongly connected components of a directed graph. The + /// strongly connected components are the classes of an equivalence + /// relation on the nodes of the graph. Two nodes are in + /// relationship when there are directed paths between them in both + /// direction. In addition, the numbering of components will satisfy + /// that there is no edge going from a higher numbered component to + /// a lower. + /// + /// \image html strongly_connected_components.png + /// \image latex strongly_connected_components.eps "Strongly connected components" width=\textwidth + /// + /// \param graph The graph. + /// \retval compMap A writable node map. The values will be set from 0 to + /// the number of the strongly connected components minus one. Each value + /// of the map will be set exactly once, the values of a certain component + /// will be set continuously. + /// \return The number of components + /// + template + int stronglyConnectedComponents(const Graph& graph, NodeMap& compMap) { + checkConcept(); + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + checkConcept, NodeMap>(); + + using namespace _topology_bits; + + typedef std::vector Container; + typedef typename Container::iterator Iterator; + + Container nodes(countNodes(graph)); + typedef LeaveOrderVisitor Visitor; + Visitor visitor(nodes.begin()); + + DfsVisit dfs(graph, visitor); + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + + typedef typename Container::reverse_iterator RIterator; + typedef RevGraphAdaptor RGraph; + + RGraph rgraph(graph); + + int compNum = 0; + + typedef FillMapVisitor RVisitor; + RVisitor rvisitor(compMap, compNum); + + DfsVisit rdfs(rgraph, rvisitor); + + rdfs.init(); + for (RIterator it = nodes.rbegin(); it != nodes.rend(); ++it) { + if (!rdfs.reached(*it)) { + rdfs.addSource(*it); + rdfs.start(); + ++compNum; + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the cut edges of the strongly connected components. + /// + /// Find the cut edges of the strongly connected components. + /// The strongly connected components are the classes of an equivalence + /// relation on the nodes of the graph. Two nodes are in relationship + /// when there are directed paths between them in both direction. + /// The strongly connected components are separated by the cut edges. + /// + /// \param graph The graph. + /// \retval cutMap A writable node map. The values will be set true when the + /// edge is a cut edge. + /// + /// \return The number of cut edges + template + int stronglyConnectedCutEdges(const Graph& graph, EdgeMap& cutMap) { + checkConcept(); + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::NodeIt NodeIt; + checkConcept, EdgeMap>(); + + using namespace _topology_bits; + + typedef std::vector Container; + typedef typename Container::iterator Iterator; + + Container nodes(countNodes(graph)); + typedef LeaveOrderVisitor Visitor; + Visitor visitor(nodes.begin()); + + DfsVisit dfs(graph, visitor); + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + + typedef typename Container::reverse_iterator RIterator; + typedef RevGraphAdaptor RGraph; + + RGraph rgraph(graph); + + int cutNum = 0; + + typedef StronglyConnectedCutEdgesVisitor RVisitor; + RVisitor rvisitor(rgraph, cutMap, cutNum); + + DfsVisit rdfs(rgraph, rvisitor); + + rdfs.init(); + for (RIterator it = nodes.rbegin(); it != nodes.rend(); ++it) { + if (!rdfs.reached(*it)) { + rdfs.addSource(*it); + rdfs.start(); + } + } + return cutNum; + } + + namespace _topology_bits { + + template + class CountBiNodeConnectedComponentsVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + CountBiNodeConnectedComponentsVisitor(const Graph& graph, int &compNum) + : _graph(graph), _compNum(compNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap.set(node, INVALID); + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + ++_num; + } + + void discover(const Edge& edge) { + _predMap.set(_graph.target(edge), _graph.source(edge)); + } + + void examine(const Edge& edge) { + if (_graph.source(edge) == _graph.target(edge) && + _graph.direction(edge)) { + ++_compNum; + return; + } + if (_predMap[_graph.source(edge)] == _graph.target(edge)) { + return; + } + if (_retMap[_graph.source(edge)] > _numMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _numMap[_graph.target(edge)]); + } + } + + void backtrack(const Edge& edge) { + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + if (_numMap[_graph.source(edge)] <= _retMap[_graph.target(edge)]) { + ++_compNum; + } + } + + private: + const Graph& _graph; + int& _compNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + int _num; + }; + + template + class BiNodeConnectedComponentsVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + BiNodeConnectedComponentsVisitor(const Graph& graph, + EdgeMap& compMap, int &compNum) + : _graph(graph), _compMap(compMap), _compNum(compNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap.set(node, INVALID); + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + ++_num; + } + + void discover(const Edge& edge) { + Node target = _graph.target(edge); + _predMap.set(target, edge); + _edgeStack.push(edge); + } + + void examine(const Edge& edge) { + Node source = _graph.source(edge); + Node target = _graph.target(edge); + if (source == target && _graph.direction(edge)) { + _compMap.set(edge, _compNum); + ++_compNum; + return; + } + if (_numMap[target] < _numMap[source]) { + if (_predMap[source] != _graph.oppositeEdge(edge)) { + _edgeStack.push(edge); + } + } + if (_predMap[source] != INVALID && + target == _graph.source(_predMap[source])) { + return; + } + if (_retMap[source] > _numMap[target]) { + _retMap.set(source, _numMap[target]); + } + } + + void backtrack(const Edge& edge) { + Node source = _graph.source(edge); + Node target = _graph.target(edge); + if (_retMap[source] > _retMap[target]) { + _retMap.set(source, _retMap[target]); + } + if (_numMap[source] <= _retMap[target]) { + while (_edgeStack.top() != edge) { + _compMap.set(_edgeStack.top(), _compNum); + _edgeStack.pop(); + } + _compMap.set(edge, _compNum); + _edgeStack.pop(); + ++_compNum; + } + } + + private: + const Graph& _graph; + EdgeMap& _compMap; + int& _compNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + std::stack _edgeStack; + int _num; + }; + + + template + class BiNodeConnectedCutNodesVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + BiNodeConnectedCutNodesVisitor(const Graph& graph, NodeMap& cutMap, + int& cutNum) + : _graph(graph), _cutMap(cutMap), _cutNum(cutNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap.set(node, INVALID); + rootCut = false; + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + ++_num; + } + + void discover(const Edge& edge) { + _predMap.set(_graph.target(edge), _graph.source(edge)); + } + + void examine(const Edge& edge) { + if (_graph.source(edge) == _graph.target(edge) && + _graph.direction(edge)) { + if (!_cutMap[_graph.source(edge)]) { + _cutMap.set(_graph.source(edge), true); + ++_cutNum; + } + return; + } + if (_predMap[_graph.source(edge)] == _graph.target(edge)) return; + if (_retMap[_graph.source(edge)] > _numMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _numMap[_graph.target(edge)]); + } + } + + void backtrack(const Edge& edge) { + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + if (_numMap[_graph.source(edge)] <= _retMap[_graph.target(edge)]) { + if (_predMap[_graph.source(edge)] != INVALID) { + if (!_cutMap[_graph.source(edge)]) { + _cutMap.set(_graph.source(edge), true); + ++_cutNum; + } + } else if (rootCut) { + if (!_cutMap[_graph.source(edge)]) { + _cutMap.set(_graph.source(edge), true); + ++_cutNum; + } + } else { + rootCut = true; + } + } + } + + private: + const Graph& _graph; + NodeMap& _cutMap; + int& _cutNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + std::stack _edgeStack; + int _num; + bool rootCut; + }; + + } + + template + int countBiNodeConnectedComponents(const UGraph& graph); + + /// \ingroup graph_prop + /// + /// \brief Checks the graph is bi-node-connected. + /// + /// This function checks that the undirected graph is bi-node-connected + /// graph. The graph is bi-node-connected if any two undirected edge is + /// on same circle. + /// + /// \param graph The graph. + /// \return %True when the graph bi-node-connected. + template + bool biNodeConnected(const UGraph& graph) { + return countBiNodeConnectedComponents(graph) == 1; + } + + /// \ingroup graph_prop + /// + /// \brief Count the biconnected components. + /// + /// This function finds the bi-node-connected components in an undirected + /// graph. The biconnected components are the classes of an equivalence + /// relation on the undirected edges. Two undirected edge is in relationship + /// when they are on same circle. + /// + /// \param graph The graph. + /// \return The number of components. + template + int countBiNodeConnectedComponents(const UGraph& graph) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + + using namespace _topology_bits; + + typedef CountBiNodeConnectedComponentsVisitor Visitor; + + int compNum = 0; + Visitor visitor(graph, compNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the bi-node-connected components. + /// + /// This function finds the bi-node-connected components in an undirected + /// graph. The bi-node-connected components are the classes of an equivalence + /// relation on the undirected edges. Two undirected edge are in relationship + /// when they are on same circle. + /// + /// \image html node_biconnected_components.png + /// \image latex node_biconnected_components.eps "bi-node-connected components" width=\textwidth + /// + /// \param graph The graph. + /// \retval compMap A writable uedge map. The values will be set from 0 + /// to the number of the biconnected components minus one. Each values + /// of the map will be set exactly once, the values of a certain component + /// will be set continuously. + /// \return The number of components. + /// + template + int biNodeConnectedComponents(const UGraph& graph, + UEdgeMap& compMap) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::UEdge UEdge; + checkConcept, UEdgeMap>(); + + using namespace _topology_bits; + + typedef BiNodeConnectedComponentsVisitor Visitor; + + int compNum = 0; + Visitor visitor(graph, compMap, compNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the bi-node-connected cut nodes. + /// + /// This function finds the bi-node-connected cut nodes in an undirected + /// graph. The bi-node-connected components are the classes of an equivalence + /// relation on the undirected edges. Two undirected edges are in + /// relationship when they are on same circle. The biconnected components + /// are separted by nodes which are the cut nodes of the components. + /// + /// \param graph The graph. + /// \retval cutMap A writable edge map. The values will be set true when + /// the node separate two or more components. + /// \return The number of the cut nodes. + template + int biNodeConnectedCutNodes(const UGraph& graph, NodeMap& cutMap) { + checkConcept(); + typedef typename UGraph::Node Node; + typedef typename UGraph::NodeIt NodeIt; + checkConcept, NodeMap>(); + + using namespace _topology_bits; + + typedef BiNodeConnectedCutNodesVisitor Visitor; + + int cutNum = 0; + Visitor visitor(graph, cutMap, cutNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return cutNum; + } + + namespace _topology_bits { + + template + class CountBiEdgeConnectedComponentsVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + CountBiEdgeConnectedComponentsVisitor(const Graph& graph, int &compNum) + : _graph(graph), _compNum(compNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap.set(node, INVALID); + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + ++_num; + } + + void leave(const Node& node) { + if (_numMap[node] <= _retMap[node]) { + ++_compNum; + } + } + + void discover(const Edge& edge) { + _predMap.set(_graph.target(edge), edge); + } + + void examine(const Edge& edge) { + if (_predMap[_graph.source(edge)] == _graph.oppositeEdge(edge)) { + return; + } + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + void backtrack(const Edge& edge) { + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + private: + const Graph& _graph; + int& _compNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + int _num; + }; + + template + class BiEdgeConnectedComponentsVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + BiEdgeConnectedComponentsVisitor(const Graph& graph, + NodeMap& compMap, int &compNum) + : _graph(graph), _compMap(compMap), _compNum(compNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap.set(node, INVALID); + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + _nodeStack.push(node); + ++_num; + } + + void leave(const Node& node) { + if (_numMap[node] <= _retMap[node]) { + while (_nodeStack.top() != node) { + _compMap.set(_nodeStack.top(), _compNum); + _nodeStack.pop(); + } + _compMap.set(node, _compNum); + _nodeStack.pop(); + ++_compNum; + } + } + + void discover(const Edge& edge) { + _predMap.set(_graph.target(edge), edge); + } + + void examine(const Edge& edge) { + if (_predMap[_graph.source(edge)] == _graph.oppositeEdge(edge)) { + return; + } + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + void backtrack(const Edge& edge) { + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + private: + const Graph& _graph; + NodeMap& _compMap; + int& _compNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + std::stack _nodeStack; + int _num; + }; + + + template + class BiEdgeConnectedCutEdgesVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge Edge; + typedef typename Graph::UEdge UEdge; + + BiEdgeConnectedCutEdgesVisitor(const Graph& graph, + EdgeMap& cutMap, int &cutNum) + : _graph(graph), _cutMap(cutMap), _cutNum(cutNum), + _numMap(graph), _retMap(graph), _predMap(graph), _num(0) {} + + void start(const Node& node) { + _predMap[node] = INVALID; + } + + void reach(const Node& node) { + _numMap.set(node, _num); + _retMap.set(node, _num); + ++_num; + } + + void leave(const Node& node) { + if (_numMap[node] <= _retMap[node]) { + if (_predMap[node] != INVALID) { + _cutMap.set(_predMap[node], true); + ++_cutNum; + } + } + } + + void discover(const Edge& edge) { + _predMap.set(_graph.target(edge), edge); + } + + void examine(const Edge& edge) { + if (_predMap[_graph.source(edge)] == _graph.oppositeEdge(edge)) { + return; + } + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + void backtrack(const Edge& edge) { + if (_retMap[_graph.source(edge)] > _retMap[_graph.target(edge)]) { + _retMap.set(_graph.source(edge), _retMap[_graph.target(edge)]); + } + } + + private: + const Graph& _graph; + EdgeMap& _cutMap; + int& _cutNum; + + typename Graph::template NodeMap _numMap; + typename Graph::template NodeMap _retMap; + typename Graph::template NodeMap _predMap; + int _num; + }; + } + + template + int countBiEdgeConnectedComponents(const UGraph& graph); + + /// \ingroup graph_prop + /// + /// \brief Checks that the graph is bi-edge-connected. + /// + /// This function checks that the graph is bi-edge-connected. The undirected + /// graph is bi-edge-connected when any two nodes are connected with two + /// edge-disjoint paths. + /// + /// \param graph The undirected graph. + /// \return The number of components. + template + bool biEdgeConnected(const UGraph& graph) { + return countBiEdgeConnectedComponents(graph) == 1; + } + + /// \ingroup graph_prop + /// + /// \brief Count the bi-edge-connected components. + /// + /// This function count the bi-edge-connected components in an undirected + /// graph. The bi-edge-connected components are the classes of an equivalence + /// relation on the nodes. Two nodes are in relationship when they are + /// connected with at least two edge-disjoint paths. + /// + /// \param graph The undirected graph. + /// \return The number of components. + template + int countBiEdgeConnectedComponents(const UGraph& graph) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + + using namespace _topology_bits; + + typedef CountBiEdgeConnectedComponentsVisitor Visitor; + + int compNum = 0; + Visitor visitor(graph, compNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the bi-edge-connected components. + /// + /// This function finds the bi-edge-connected components in an undirected + /// graph. The bi-edge-connected components are the classes of an equivalence + /// relation on the nodes. Two nodes are in relationship when they are + /// connected at least two edge-disjoint paths. + /// + /// \image html edge_biconnected_components.png + /// \image latex edge_biconnected_components.eps "bi-edge-connected components" width=\textwidth + /// + /// \param graph The graph. + /// \retval compMap A writable node map. The values will be set from 0 to + /// the number of the biconnected components minus one. Each values + /// of the map will be set exactly once, the values of a certain component + /// will be set continuously. + /// \return The number of components. + /// + template + int biEdgeConnectedComponents(const UGraph& graph, NodeMap& compMap) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::Node Node; + checkConcept, NodeMap>(); + + using namespace _topology_bits; + + typedef BiEdgeConnectedComponentsVisitor Visitor; + + int compNum = 0; + Visitor visitor(graph, compMap, compNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return compNum; + } + + /// \ingroup graph_prop + /// + /// \brief Find the bi-edge-connected cut edges. + /// + /// This function finds the bi-edge-connected components in an undirected + /// graph. The bi-edge-connected components are the classes of an equivalence + /// relation on the nodes. Two nodes are in relationship when they are + /// connected with at least two edge-disjoint paths. The bi-edge-connected + /// components are separted by edges which are the cut edges of the + /// components. + /// + /// \param graph The graph. + /// \retval cutMap A writable node map. The values will be set true when the + /// edge is a cut edge. + /// \return The number of cut edges. + template + int biEdgeConnectedCutEdges(const UGraph& graph, UEdgeMap& cutMap) { + checkConcept(); + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::UEdge UEdge; + checkConcept, UEdgeMap>(); + + using namespace _topology_bits; + + typedef BiEdgeConnectedCutEdgesVisitor Visitor; + + int cutNum = 0; + Visitor visitor(graph, cutMap, cutNum); + + DfsVisit dfs(graph, visitor); + dfs.init(); + + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + return cutNum; + } + + + namespace _topology_bits { + + template + class TopologicalSortVisitor : public DfsVisitor { + public: + typedef typename Graph::Node Node; + typedef typename Graph::Edge edge; + + TopologicalSortVisitor(IntNodeMap& order, int num) + : _order(order), _num(num) {} + + void leave(const Node& node) { + _order.set(node, --_num); + } + + private: + IntNodeMap& _order; + int _num; + }; + + } + + /// \ingroup graph_prop + /// + /// \brief Sort the nodes of a DAG into topolgical order. + /// + /// Sort the nodes of a DAG into topolgical order. + /// + /// \param graph The graph. It should be directed and acyclic. + /// \retval order A writable node map. The values will be set from 0 to + /// the number of the nodes in the graph minus one. Each values of the map + /// will be set exactly once, the values will be set descending order. + /// + /// \see checkedTopologicalSort + /// \see dag + template + void topologicalSort(const Graph& graph, NodeMap& order) { + using namespace _topology_bits; + + checkConcept(); + checkConcept, NodeMap>(); + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + + TopologicalSortVisitor + visitor(order, countNodes(graph)); + + DfsVisit > + dfs(graph, visitor); + + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + dfs.start(); + } + } + } + + /// \ingroup graph_prop + /// + /// \brief Sort the nodes of a DAG into topolgical order. + /// + /// Sort the nodes of a DAG into topolgical order. It also checks + /// that the given graph is DAG. + /// + /// \param graph The graph. It should be directed and acyclic. + /// \retval order A readable - writable node map. The values will be set + /// from 0 to the number of the nodes in the graph minus one. Each values + /// of the map will be set exactly once, the values will be set descending + /// order. + /// \return %False when the graph is not DAG. + /// + /// \see topologicalSort + /// \see dag + template + bool checkedTopologicalSort(const Graph& graph, NodeMap& order) { + using namespace _topology_bits; + + checkConcept(); + checkConcept, NodeMap>(); + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + + order = constMap(); + + TopologicalSortVisitor + visitor(order, countNodes(graph)); + + DfsVisit > + dfs(graph, visitor); + + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + while (!dfs.emptyQueue()) { + Edge edge = dfs.nextEdge(); + Node target = graph.target(edge); + if (dfs.reached(target) && order[target] == -1) { + return false; + } + dfs.processNextEdge(); + } + } + } + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Check that the given directed graph is a DAG. + /// + /// Check that the given directed graph is a DAG. The DAG is + /// an Directed Acyclic Graph. + /// \return %False when the graph is not DAG. + /// \see acyclic + template + bool dag(const Graph& graph) { + + checkConcept(); + + typedef typename Graph::Node Node; + typedef typename Graph::NodeIt NodeIt; + typedef typename Graph::Edge Edge; + + typedef typename Graph::template NodeMap ProcessedMap; + + typename Dfs::template DefProcessedMap:: + Create dfs(graph); + + ProcessedMap processed(graph); + dfs.processedMap(processed); + + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + while (!dfs.emptyQueue()) { + Edge edge = dfs.nextEdge(); + Node target = graph.target(edge); + if (dfs.reached(target) && !processed[target]) { + return false; + } + dfs.processNextEdge(); + } + } + } + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Check that the given undirected graph is acyclic. + /// + /// Check that the given undirected graph acyclic. + /// \param graph The undirected graph. + /// \return %True when there is no circle in the graph. + /// \see dag + template + bool acyclic(const UGraph& graph) { + checkConcept(); + typedef typename UGraph::Node Node; + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::Edge Edge; + Dfs dfs(graph); + dfs.init(); + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + dfs.addSource(it); + while (!dfs.emptyQueue()) { + Edge edge = dfs.nextEdge(); + Node source = graph.source(edge); + Node target = graph.target(edge); + if (dfs.reached(target) && + dfs.predEdge(source) != graph.oppositeEdge(edge)) { + return false; + } + dfs.processNextEdge(); + } + } + } + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Check that the given undirected graph is tree. + /// + /// Check that the given undirected graph is tree. + /// \param graph The undirected graph. + /// \return %True when the graph is acyclic and connected. + template + bool tree(const UGraph& graph) { + checkConcept(); + typedef typename UGraph::Node Node; + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::Edge Edge; + Dfs dfs(graph); + dfs.init(); + dfs.addSource(NodeIt(graph)); + while (!dfs.emptyQueue()) { + Edge edge = dfs.nextEdge(); + Node source = graph.source(edge); + Node target = graph.target(edge); + if (dfs.reached(target) && + dfs.predEdge(source) != graph.oppositeEdge(edge)) { + return false; + } + dfs.processNextEdge(); + } + for (NodeIt it(graph); it != INVALID; ++it) { + if (!dfs.reached(it)) { + return false; + } + } + return true; + } + + namespace _topology_bits { + + template + class BipartiteVisitor : public BfsVisitor { + public: + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + + BipartiteVisitor(const Graph& graph, bool& bipartite) + : _graph(graph), _part(graph), _bipartite(bipartite) {} + + void start(const Node& node) { + _part[node] = true; + } + void discover(const Edge& edge) { + _part.set(_graph.target(edge), !_part[_graph.source(edge)]); + } + void examine(const Edge& edge) { + _bipartite = _bipartite && + _part[_graph.target(edge)] != _part[_graph.source(edge)]; + } + + private: + + const Graph& _graph; + typename Graph::template NodeMap _part; + bool& _bipartite; + }; + + template + class BipartitePartitionsVisitor : public BfsVisitor { + public: + typedef typename Graph::Edge Edge; + typedef typename Graph::Node Node; + + BipartitePartitionsVisitor(const Graph& graph, + PartMap& part, bool& bipartite) + : _graph(graph), _part(part), _bipartite(bipartite) {} + + void start(const Node& node) { + _part.set(node, true); + } + void discover(const Edge& edge) { + _part.set(_graph.target(edge), !_part[_graph.source(edge)]); + } + void examine(const Edge& edge) { + _bipartite = _bipartite && + _part[_graph.target(edge)] != _part[_graph.source(edge)]; + } + + private: + + const Graph& _graph; + PartMap& _part; + bool& _bipartite; + }; + } + + /// \ingroup graph_prop + /// + /// \brief Check if the given undirected graph is bipartite or not + /// + /// The function checks if the given undirected \c graph graph is bipartite + /// or not. The \ref Bfs algorithm is used to calculate the result. + /// \param graph The undirected graph. + /// \return %True if \c graph is bipartite, %false otherwise. + /// \sa bipartitePartitions + /// + /// \author Balazs Attila Mihaly + template + inline bool bipartite(const UGraph &graph){ + using namespace _topology_bits; + + checkConcept(); + + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::EdgeIt EdgeIt; + + bool bipartite = true; + + BipartiteVisitor + visitor(graph, bipartite); + BfsVisit > + bfs(graph, visitor); + bfs.init(); + for(NodeIt it(graph); it != INVALID; ++it) { + if(!bfs.reached(it)){ + bfs.addSource(it); + while (!bfs.emptyQueue()) { + bfs.processNextNode(); + if (!bipartite) return false; + } + } + } + return true; + } + + /// \ingroup graph_prop + /// + /// \brief Check if the given undirected graph is bipartite or not + /// + /// The function checks if the given undirected graph is bipartite + /// or not. The \ref Bfs algorithm is used to calculate the result. + /// During the execution, the \c partMap will be set as the two + /// partitions of the graph. + /// \param graph The undirected graph. + /// \retval partMap A writable bool map of nodes. It will be set as the + /// two partitions of the graph. + /// \return %True if \c graph is bipartite, %false otherwise. + /// + /// \author Balazs Attila Mihaly + /// + /// \image html bipartite_partitions.png + /// \image latex bipartite_partitions.eps "Bipartite partititions" width=\textwidth + template + inline bool bipartitePartitions(const UGraph &graph, NodeMap &partMap){ + using namespace _topology_bits; + + checkConcept(); + + typedef typename UGraph::Node Node; + typedef typename UGraph::NodeIt NodeIt; + typedef typename UGraph::EdgeIt EdgeIt; + + bool bipartite = true; + + BipartitePartitionsVisitor + visitor(graph, partMap, bipartite); + BfsVisit > + bfs(graph, visitor); + bfs.init(); + for(NodeIt it(graph); it != INVALID; ++it) { + if(!bfs.reached(it)){ + bfs.addSource(it); + while (!bfs.emptyQueue()) { + bfs.processNextNode(); + if (!bipartite) return false; + } + } + } + return true; + } + + /// \brief Returns true when there is not loop edge in the graph. + /// + /// Returns true when there is not loop edge in the graph. + template + bool loopFree(const Graph& graph) { + for (typename Graph::EdgeIt it(graph); it != INVALID; ++it) { + if (graph.source(it) == graph.target(it)) return false; + } + return true; + } + + /// \brief Returns true when there is not parallel edges in the graph. + /// + /// Returns true when there is not parallel edges in the graph. + template + bool parallelFree(const Graph& graph) { + typename Graph::template NodeMap reached(graph, false); + for (typename Graph::NodeIt n(graph); n != INVALID; ++n) { + for (typename Graph::OutEdgeIt e(graph, n); e != INVALID; ++e) { + if (reached[graph.target(e)]) return false; + reached.set(graph.target(e), true); + } + for (typename Graph::OutEdgeIt e(graph, n); e != INVALID; ++e) { + reached.set(graph.target(e), false); + } + } + return true; + } + + /// \brief Returns true when there is not loop edge and parallel + /// edges in the graph. + /// + /// Returns true when there is not loop edge and parallel edges in + /// the graph. + template + bool simpleGraph(const Graph& graph) { + typename Graph::template NodeMap reached(graph, false); + for (typename Graph::NodeIt n(graph); n != INVALID; ++n) { + reached.set(n, true); + for (typename Graph::OutEdgeIt e(graph, n); e != INVALID; ++e) { + if (reached[graph.target(e)]) return false; + reached.set(graph.target(e), true); + } + for (typename Graph::OutEdgeIt e(graph, n); e != INVALID; ++e) { + reached.set(graph.target(e), false); + } + reached.set(n, false); + } + return true; + } + +} //namespace lemon + +#endif //LEMON_TOPOLOGY_H diff --git a/src/locfit/adap.c b/src/locfit/adap.c new file mode 100644 index 0000000..eb1b48c --- /dev/null +++ b/src/locfit/adap.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +/* + Functions implementing the adaptive bandwidth selection. + Will make the final call to nbhd() to set smoothing weights + for selected bandwidth, But will **not** make the + final call to locfit(). +*/ + +#include "local.h" + +static double hmin; + +double acri(lk,t0,t2,pen) +double lk, t0, t2, pen; +{ double y; +/* return(-2*lk/(t0*exp(pen*log(1-t2/t0)))); */ + /* return((-2*lk+pen*t2)/t0); */ + y = (MAX(-2*lk,t0-t2)+pen*t2)/t0; + return(y); +} + +double mmse(lf,des) +lfit *lf; +design *des; +{ int i, ii, j, p, p1; + double sv, sb, *l, dp; + l = des->wd; + wdiag(lf,des,l,(INT)0,(INT)1,(INT)0); + sv = sb = 0; + p = lf->mi[MP]; + for (i=0; in; i++) + { sv += l[i]*l[i]; + ii = des->ind[i]; + dp = des->di[ii]; + for (j=0; jmi[MDEG]; j++) dp *= des->di[ii]; + sb += fabs(l[i])*dp; + } + p1 = factorial((int)lf->mi[MDEG]+1); + return(sv+sb*sb*lf->dp[DADP]*lf->dp[DADP]/(p1*p1)); +} + +static double mcp, clo, cup; + +/* + Initial bandwidth will be (by default) + k-nearest neighbors for k small, just lage enough to + get defined estimate (unless user provided nonzero DALP + or DFXH components) +*/ + +INT ainitband(des,lf) +design *des; +lfit *lf; +{ INT lf_status = LF_OK, p, z, cri, noit, redo; + double h, ho, t[6]; + p = des->p; + cri = lf->mi[MACRI]; + noit = !((cri==AOK) | (cri==ANONE)); + z = (INT)(lf->mi[MN]*lf->dp[DALP]); + if ((noit) && (zdp[DFXH],redo); + if (zn) z = des->n; + if (h>ho) lf_status = locfit(lf,des,h,noit); + if (cri==ANONE) return(lf_status); + z++; + redo = 1; + } while ((z<=lf->mi[MN]) && ((h==0)||(lf_status!=LF_OK))); + hmin = h; + + switch(lf->mi[MACRI]) + { case ACP: + local_df(lf,des,t); + mcp = acri(des->llk,t[0],t[2],lf->dp[DADP]); + return(lf_status); + case AKAT: + local_df(lf,des,t); + clo = des->cf[0]-lf->dp[DADP]*t[5]; + cup = des->cf[0]+lf->dp[DADP]*t[5]; + return(lf_status); + case AMDI: + mcp = mmse(lf,des); + return(lf_status); + case AOK: return(lf_status); + } + ERROR(("aband1: unknown criterion")); + return(LF_ERR); +} + +/* + aband2 increases the initial bandwidth until lack of fit results, + or the fit is close to a global fit. Increase h by 1+0.3/d at + each iteration. +*/ + +double aband2(des,lf,h0) +design *des; +lfit *lf; +double h0; +{ double t[6], h, h1, nu1, cp, ncp, tlo, tup; + INT d, inc, n, p, done; + d = lf->mi[MDIM]; n = lf->mi[MN]; p = lf->mi[MP]; + h1 = h = h0; + done = 0; nu1 = 0.0; + inc = 0; ncp = 0.0; + while ((!done) & (nu1<(n-p)*0.95)) + { h = nbhd(lf,des,0,(1+0.3/d)*h,1); + if (locfit(lf,des,h,1)>0) WARN(("aband2: failed fit")); + local_df(lf,des,t); + nu1 = t[0]-t[2]; /* tr(A) */ + switch(lf->mi[MACRI]) + { case AKAT: + tlo = des->cf[0]-lf->dp[DADP]*t[5]; + tup = des->cf[0]+lf->dp[DADP]*t[5]; +/* printf("h %8.5f tlo %8.5f tup %8.5f\n",h,tlo,tup); */ + done = ((tlo>cup) | (tupllk,t[0],t[2],lf->dp[DADP]); +/* printf("h %8.5f lk %8.5f t0 %8.5f t2 %8.5f cp %8.5f\n",h,des->llk,t[0],t[2],cp); */ + if (cp=ncp) inc++; else inc = 0; + ncp = cp; + done = (inc>=10) | ((inc>=3) & ((t[0]-t[2])>=10) & (cp>1.5*mcp)); + break; + case AMDI: + cp = mmse(lf,des); + if (cpncp) inc++; else inc = 0; + ncp = cp; + done = (inc>=3); + break; + } + } + return(h1); +} + +/* + aband3 does a finer search around best h so far. Try + h*(1-0.2/d), h/(1-0.1/d), h*(1+0.1/d), h*(1+0.2/d) +*/ +double aband3(des,lf,h0) +design *des; +lfit *lf; +double h0; +{ double t[6], h, h1, cp, tlo, tup; + INT i, i0, d, n; + d = lf->mi[MDIM]; n = lf->mi[MN]; + + h1 = h0; + i0 = (lf->mi[MACRI]==AKAT) ? 1 : -2; + if (h0==hmin) i0 = 1; + for (i=i0; i<=2; i++) + { if (i==0) i++; + h = h0*(1+0.1*i/lf->mi[MDIM]); + h = nbhd(lf,des,0,h,1); + if (locfit(lf,des,h,1)>0) WARN(("aband3: failed fit")); + local_df(lf,des,t); + switch (lf->mi[MACRI]) + { case AKAT: + tlo = des->cf[0]-lf->dp[DADP]*t[5]; + tup = des->cf[0]+lf->dp[DADP]*t[5]; + if ((tlo>cup) | (tupllk,t[0],t[2],lf->dp[DADP]); + if (cp0) i = 2; } + break; + case AMDI: + cp = mmse(lf,des); + if (cp0) i = 2; } + } + } + return(h1); +} diff --git a/src/locfit/ar_funs.c b/src/locfit/ar_funs.c new file mode 100644 index 0000000..55889e5 --- /dev/null +++ b/src/locfit/ar_funs.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * + * ar_setfunction sets a function pointer on the arithmetic structure, + * according to the first len compnents of the string z. + * Also sets the rt->cmd field, and returns the required number of + * arguments. + */ + +#include "local.h" + +double vrexp() { return(rexp(1.0)); } +double vrnorm() { return(rnorm(0.0,1.0)); } +double vpnorm(double x){ return(pnorm(x,0.0,1.0)); } +double vdnorm(double x){ return(exp(-x*x/2)/S2PI); } +double dummyf() { return(0.0); } +double frac(double x) { return(x-floor(x)); } + +double vmin(v) +vari *v; +{ int i; + double z, x; + z = vitem(v,0); + for (i=1; iz) z = x; + } + return(z); +} + +double vsum(v) +vari *v; +{ int i; + double z; + z = 0.0; + for (i=0; if = NULL; + rt->cmd = 'f'; + rargs = 1; + if (len==3) + { if (stm(z,"sin",3)) rt->f = sin; + if (stm(z,"cos",3)) rt->f = cos; + if (stm(z,"tan",3)) rt->f = tan; + if (stm(z,"exp",3)) rt->f = exp; + if (stm(z,"log",3)) rt->f = log; + if (stm(z,"abs",3)) rt->f = fabs; + if (stm(z,"seq",3)) { rt->f = dummyf; rt->cmd = 'Q'; rargs=3; } + if (stm(z,"min",3)) { rt->f = vmin; rt->cmd = 'S'; rargs=1; } + if (stm(z,"max",3)) { rt->f = vmax; rt->cmd = 'S'; rargs=1; } + if (stm(z,"sum",3)) { rt->f = vsum; rt->cmd = 'S'; rargs=1; } + if (stm(z,"rep",3)) { rt->f = dummyf; rt->cmd = 'R'; rargs=2; } + } + if (len==4) + { if (stm(z,"frac",4)) rt->f = frac; + if (stm(z,"sqrt",4)) rt->f = sqrt; + if (stm(z,"rexp",4)) { rt->f = vrexp; rt->cmd = 'G'; } + if (stm(z,"mean",4)) { rt->f = vmean; rt->cmd = 'S'; rargs=1; } + } + if (len==5) + { if (stm(z,"floor",5)) rt->f = floor; + if (stm(z,"pnorm",5)) rt->f = vpnorm; + if (stm(z,"dnorm",5)) rt->f = vdnorm; + if (stm(z,"logit",5)) rt->f = logit; + if (stm(z,"expit",5)) rt->f = expit; + if (stm(z,"runif",5)) { rt->f = runif; rt->cmd='G'; } + if (stm(z,"rnorm",5)) { rt->f = vrnorm;rt->cmd='G'; } + if (stm(z,"rpois",5)) { rt->f = rpois; rt->cmd='H'; rargs=2; } + } + if (len==6) + { if (stm(z,"sample",6)) { rt->f = dummyf;rt->cmd='M'; rargs=2; } + if (stm(z,"fitted",6)) { rt->f = dummyf; rt->cmd='Z'; rargs=0; } + } + if (len==9) + { if (stm(z,"residuals",9)) + { rt->f= dummyf; rt->cmd='Y'; rargs=0; } + } + if (rt->f==NULL) + { rt->cmd = 'e'; + ERROR(("unknown function")); + } + return(rargs); +} diff --git a/src/locfit/arith.c b/src/locfit/arith.c new file mode 100644 index 0000000..536a78c --- /dev/null +++ b/src/locfit/arith.c @@ -0,0 +1,619 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + */ + +#include +#include "local.h" + +#ifdef CVERSION + +extern lfit lf; +extern int ar_setfunction(); + +double vadd(double e1,double e2) { return(e1+e2); } +double vsub(double e1,double e2) { return(e1-e2); } +double vmul(double e1,double e2) { return(e1*e2); } +double vdiv(double e1,double e2) { return(e1/e2); } +double vpow(double e1,double e2) +{ if (e2==2) return(e1*e1); + if (e1<=0) return(0.0); + return(exp(e2*log(e1))); +} +double vgt(double e1,double e2) { return((double)(e1>e2)); } +double vlt(double e1,double e2) { return((double)(e1=e2)); } +double vle(double e1,double e2) { return((double)(e1<=e2)); } +double veq(double e1,double e2) { return((double)(e1==e2)); } +double vne(double e1,double e2) { return((double)(e1!=e2)); } + +arstruct art; + +double lf_exp(double x) { return (x<700.0) ? exp(x) : exp(700.0); } + +double vseq(double a,double b,int i,int n) { return(a+(b-a)*i/(n-1)); } + +double rsample(v) +vari *v; +{ int i; + i = (int)( runif() * vlength(v) ); + return( vitem(v,i) ); +} + +vari *vrep(v1,v2) +vari *v1, *v2; +{ int i, j, k, m, n; + vari *v; + n = 0; + for (i=0; i=3) + { ERROR(("Too many function arguments")); + return; + } + rt->nx[p] = k; +} + +void prars(v,i) +vari *v; +int i; +{ arstruct *ars; + ars = (arstruct *)viptr(v,i); + printf("%d %c\n",i,ars->cmd); +} + +arstruct *cmdptr(v,i) +vari *v; +int i; +{ return((arstruct *)viptr(v,i)); +} + +int isstring(z,i1,i2) +char *z; +int i1, i2; +{ int i; + if ((z[i1] != '"') | (z[i2] != '"')) return(0); + for (i=i1+1; i= i1) return(i); + + i = checkrtol(z,i1,i2,"=<>"); + if (i > i1) return(i); + + i = checkrtol(z,i1,i2,"+-"); + while ((i>i1) && (strchr("+-*/:^",z[i-1])!=NULL)) + i = checkrtol(z,i1+1,i-1,"+-"); + if (i>i1) return(i); + + i = checkrtol(z,i1,i2,"*/"); + if (i >= i1) return(i); + + /* looks like a weird priority for : (sequence) but seems to match S. */ + i = checkrtol(z,i1,i2,":"); + if (i >= i1) return(i); + + i = checkrtol(z,i1,i2,"^"); + if (i >= i1) return(i); + + return(-1); +} + +vari *arbuild(z,i1,i2,va,k,dum) /* k=0/1 search variable names? */ +char *z; /* dum: are dummies x0, x1 etc allowed? */ +INT i1, i2, k, dum; +vari *va; +{ INT al, ar, i, j, n, nargs, rargs, inum; + vari *v; + arstruct *rt; + char tmp; + double val; + + if (va==NULL) + { va = createvar("_varith",STSYSTEM,10,VARC); + if (lf_error || va == NULL) + { + return(NULL); + } + else + { + vlength(va) = 0; + } + } + + n = vlength(va); + if (vbytes(n+1,VARC)>va->bytes) /* need to grow */ + { v = va; + setvarname(va,"_ovarith"); + va = createvar("_varith",STSYSTEM,n+5,VARC); + vlength(va) = n; + memcpy(vdptr(va),vdptr(v),vbytes(n,VARC)); + deletevar(v); + } + inum = n; + vlength(va) = n+1; + + while ((z[i1]=='(') && (matchrt(z,i1,i2,'(',')')==i2)) { i1++; i2--; } + + if (isNumber(z,i1,i2,&val)) + { rt = cmdptr(va,inum); + rt->cmd = 'D'; + rt->x = val; + return(va); + } + + if (isstring(z,i1,i2)) + { rt = cmdptr(va,inum); + rt->cmd = 's'; + rt->vv = createvar("_string",STHIDDEN,i2-i1,VCHAR); + for (i=0; ivv))[i] = z[i1+i+1]; + ((char *)vdptr(rt->vv))[i2-i1-1] = '\0'; + return(va); + } + + if (isname(z,i1,i2)) + { tmp = z[i2+1]; z[i2+1] = '\0'; + if (dum) /* search for dummies */ + { for (j=0; jcmd = 'x'; + rt->m = j; + z[i2+1] = tmp; + return(va); + } + } + n = 0; + v = findvar(&z[i1],1,&n); + z[i2+1] = tmp; + if (v==NULL) return(va); + rt = cmdptr(va,inum); + rt->cmd = 'v'; + rt->vv = v; + return(va); + } + + if (isfunction(z,i1,i2)) + { + /* build the argument list */ + ar = i2; + al = matchlf(z,i1,i2,'(',')'); + j = al+1; + nargs = 0; + + if (ar>al+1) + { i = al; + while (j<=ar) + { if (z[j]=='(') j = matchrt(z,j,ar-1,'(',')')+1; + if (z[j]=='[') j = matchrt(z,j,ar-1,'[',']')+1; + if (lf_error) return(va); + if ((z[j]==')') | (z[j]==',')) + { setnext(va,n,nargs,vlength(va)); + va = arbuild(z,i+1,j-1,va,k,dum); + nargs++; i = j; + } + j++; + } + } + rt = cmdptr(va,inum); + rt->m = nargs; + + rargs = ar_setfunction(rt,&z[i1],al-i1); + if (rargs != nargs) + ERROR(("arbuild: wrong number of arguments, %s",&z[i1])); + return(va); + } + + rt = cmdptr(va,inum); + + if (issubset(z,i1,i2)) + { rt->cmd = 'U'; + al = matchlf(z,i1,i2,'[',']'); + setnext(va,n,0,vlength(va)); + va = arbuild(z,i1,al-1,va,k,dum); + if (lf_error) return(va); + setnext(va,n,1,vlength(va)); + va = arbuild(z,al+1,i2-1,va,k,dum); + return(va); + } + + /* that leaves operators */ + + i = isoperator(z,i1,i2); + if (i >= i1) + { rt->cmd = 'O'; + rt->f = NULL; + al = i-1; ar = i+1; + if (z[i]==',') rt->cmd = 'C'; + if (z[i]=='>') + { rt->f = vgt; + if (z[i-1]=='<') { rt->f = vne; al--; } + } + if (z[i]=='<') rt->f = vlt; + if (z[i]=='=') + { rt->f = veq; + if (z[i-1]=='=') al--; + if (z[i-1]=='<') { rt->f = vle; al--; } + if (z[i-1]=='>') { rt->f = vge; al--; } + if (z[i-1]=='!') { rt->f = vne; al--; } + } + if (z[i]=='+') rt->f = vadd; + if (z[i]=='-') rt->f = vsub; + if (z[i]=='*') rt->f = vmul; + if (z[i]=='/') rt->f = vdiv; + if (z[i]==':') rt->cmd = ':'; + if (z[i]=='^') rt->f = vpow; + + setnext(va,n,0,vlength(va)); + va = arbuild(z,i1,al,va,k,dum); + if (lf_error) return(va); + setnext(va,n,1,vlength(va)); + va = arbuild(z,ar,i2,va,k,dum); + return(va); + } + + ERROR(("arbuild: unknown expression %s",z)); + return(va); +} + +vari *vevop(l,r,f) +vari *l, *r; +double (*f)(); +{ INT i, n; + vari *v; + double z; + if ((l==NULL) | (r==NULL)) return(NULL); + n = vlength(l); + if (nn,VDOUBLE); + if (lf_error) return(NULL); + for (i=0; in; + v = createvar("_vsubs",STHIDDEN,n,VDOUBLE); + for (i=0; icmd) + { case 'e': return(NULL); + case 'v': return(rt->vv); + case 'O': return(vevop(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]),rt->f)); + case 'C': return(vcat(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]))); + case 'D': + n = 1; + rt->vv = createvar("_vevcon",STHIDDEN,n,VDOUBLE); + if (lf_error) return(NULL); + vassn(rt->vv,0,rt->x); + return(rt->vv); + case 'G': return(vrvec(vareval(v,rt->nx[0]),rt->f)); + case 'H': return(vrve2(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]),rt->f)); + case 'f': return(vvec1(vareval(v,rt->nx[0]),rt->f)); + case 'M': return(vrsamp(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]))); + case 'Q': return(vrseq(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]), + vareval(v,rt->nx[2]))); + case ':': return(vrse2(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]))); + case 'S': return(vrsca(vareval(v,rt->nx[0]),rt->f)); + case 'U': return(vsubset(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]))); + case 'R': return(vrep(vareval(v,rt->nx[0]),vareval(v,rt->nx[1]))); + case 'Z': return(vfitted(RMEAN)); + case 'Y': return(vfitted(RDEV)); + case 's': return(rt->vv); + case 'x': + ERROR(("Dummy in vareval")); + return(NULL); + default : ERROR(("vareval: unknown command %c",rt->cmd)); + } + return(NULL); +} + +vari *saveresult(v,name,status) +vari *v; +int status; +char *name; +{ vari *vr; + if (v==NULL) return(NULL); + + vr = v; + if (v->stat != STHIDDEN) + { vr = createvar("_result",STHIDDEN,vlength(v),vmode(v)); + memcpy(vdptr(vr),vdptr(v),vbytes(vlength(v),vmode(v))); + } + + if (name!=NULL) + { setvarname(vr,name); + vr->stat = status; + } + return(vr); +} + +vari *varith(z,name,status) +char *z, *name; +int status; +{ vari *v, *va; + va = arbuild(z,0,strlen(z)-1,NULL,1,0); + if (lf_error) return(NULL); + v = vareval(va,0); + deletevar(va); + + v = saveresult(v,name,status); + return(v); +} + +double dareval(v,k,x) +vari *v; +INT k; +double *x; +{ arstruct *rt; + rt = viptr(v,k); + switch (rt->cmd) + { case 'e': return(0.0); + case 'v': return(vitem(rt->vv,0)); + case 'O': return(rt->f(dareval(v,rt->nx[0],x),dareval(v,rt->nx[1],x))); + case 'P': return(rt->f(0.0,dareval(v,rt->nx[1],x))); + case 'D': return(rt->x); + case 'G': return(rt->f()); + case 'H': return(rt->f(dareval(v,rt->nx[1],x))); + case 'f': return(rt->f(dareval(v,rt->nx[0],x))); + case 'M': return(rsample(vareval(v,rt->nx[0]))); + case 'x': return(x[rt->m]); + case 'U': return(vitem(vareval(v,rt->nx[0]),(int)dareval(v,rt->nx[1],x)-1)); + case 'Q': ERROR(("sequence in dareval")); + return(0.0); + default : ERROR(("dareval: unknown command %c",rt->cmd)); + } + return(0.0); +} + +double darith(z) +char *z; +{ vari *va; + double y; + va = arbuild(z,0,strlen(z)-1,NULL,1,0); + y = dareval(va,0,NULL); + deletevar(va); + return(y); +} + +INT arvect(z,res,c,a) /* c = no of items to read */ +char *z; +INT c, a; +double *res; +{ INT i; + vari *v; + + if (z==NULL) return(0); + + v = varith(z,"arvect",STPLOTVAR); + if (v==NULL || lf_error) + { + return(0); + } + deletevar(v); + + for (i=0; (iwd,0,1,0); + s0 = s1 = 0.0; + for (i=0; in; i++) + { ii = des->ind[i]; + s0+= prwt(lf,ii)*des->wd[i]*des->wd[i]; + bi = prwt(lf,ii)*fabs(des->wd[i]*ipower(des->di[ii],lf->mi[MDEG]+1)); + s1+= bi*bi; + } + vr += s0; + tb += s1; + return(k); +} + +double bcri(h,c,cri) +double h; +INT c, cri; +{ double num, den; + INT (*pv)(); + lf.dp[c] = h; + if ((cri&63)==BIND) + { pv = procvbind; + vr = tb = 0.0; + } + else pv = procv; + if (cri<64) startlf(&des,&lf,pv,0); + switch(cri&63) + { case BGCV: + ressumm(&lf,&des); + num = -2*lf.mi[MN]*lf.dp[DLK]; + den = lf.mi[MN]-lf.dp[DT0]; + return(num/(den*den)); + case BCP: + ressumm(&lf,&des); + return(-2*lf.dp[DLK]/sig2-lf.mi[MN]+pen*lf.dp[DT0]); + case BIND: + return(vr+pen*pen*tb); + } + ERROR(("bcri: unknown criterion")); + return(0.0); +} + +void bsel2(h0,g0,ifact,c,cri) +double h0, g0, ifact; +INT c, cri; +{ INT done, inc; + double h1, g1; + h1 = h0; g1 = g0; + done = inc = 0; + while (!done) + { h1 *= 1+ifact; + g0 = g1; + g1 = bcri(h1,c,cri); + if (g1g0) inc++; else inc = 0; + switch(cri) + { case BIND: + done = (inc>=4) & (vr=4); + } + } +} + +void bsel3(h0,g0,ifact,c,cri) +double h0, g0, ifact; +INT c, cri; +{ double h1, g1; + INT i; + hmin = h0; gmin = g0; + for (i=-1; i<=1; i++) if (i!=0) + { h1 = h0*(1+i*ifact); + g1 = bcri(h1,c,cri); + if (g1fact*h[2])|(h[2]>fact*h[3])) + { h[4] = h[3]-d[3]*(h[3]-h[2])/(d[3]-d[2]); + if ((h[4]h[1])) h[4] = (h[0]+h[1])/2; + kdecri(x,h[4],res,c,j,ker,n); + r[4] = res[0]; d[4] = res[1]; + if (lf_error) return(0.0); + h[2] = h[3]; h[3] = h[4]; + d[2] = d[3]; d[3] = d[4]; + r[2] = r[3]; r[3] = r[4]; + if (d[4]*d[0]>0) { h[0] = h[4]; d[0] = d[4]; r[0] = r[4]; } + else { h[1] = h[4]; d[1] = d[4]; r[1] = r[4]; } + } + if (j>=4) return(h[4]); /* first min for BCV etc */ + if (r[4]<=min) { min = r[4]; minh = h[4]; } + nc++; + } + } + if (nc==0) minh = (r[5]0) + { z = argval(v,i); + if (z[0]=='h') c = DFXH; + } + + cri = BGCV; + i = getarg(v,"bcri",1); + if (i>0) + { z = argval(v,i); + if (z[0]=='c') cri = BCP; + if (z[0]=='i') cri = BIND; + } + + pen = 2.0; + i = getarg(v,"pen",1); + if (i>0) + pen = darith(argval(v,i)); + + bselect(c,cri,pen); +} +#endif + +#ifdef SVERSION +void slscv(x,n,h,z) +double *x, *h, *z; +int *n; +{ INT i; + double res[4]; + kdecri(x,*h,res,0.0,3,WGAUS,*n); + z[0] = res[0]; + z[1] = res[2]; +} +#endif diff --git a/src/locfit/c_args.c b/src/locfit/c_args.c new file mode 100644 index 0000000..aeb332d --- /dev/null +++ b/src/locfit/c_args.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * Functions for interpreting and manipulating command line + * arguments. + */ + +#include "local.h" + +char *argval(vari *v,int i) +{ if (i<0) return(NULL); + return(((carg *)viptr(v,i))->val); +} + +int getarg(v,s,un) /* un=1: unnamed permitted un=2: next unused */ +vari *v; +int un; +char *s; +{ int i; + if (un==2) + { for (i=1; in1) + { WARN(("too many items in ilist")); + n = n1; + } + for (i=0; i=MAXWIN)) + { WARN(("Invalid window %d",win_no)); + } + else + curwin = win_no; + } + return( &pl[curwin] ); +} + +char *settype(xyz,type,def) +plxyz *xyz; +char *type, def; +{ if ((type==NULL) || (strlen(type)==0)) + { xyz->type = def; + return(NULL); + } + xyz->type = type[0]; + return(&type[1]); +} + +char *pvarname(xyz,c,vn) +plxyz *xyz; +char c; +varname vn; +{ sprintf(vn,"_plv%d%c",xyz->id,c); + return(vn); +} + +plxyz *nextxyz(win,add,ck) +plots *win; +INT add, ck; +{ plxyz *xyz; + vari *v; + varname vn; + + if (!add) + { sprintf(vn,"_xyz%d",curwin); + v = win->xyzs = createvar(vn,STSYSTEM,5,VXYZ); + v->n = 0; + win->xlab[0] = win->ylab[0] = win->zlab[0] = '\0'; + } + else + v = win->xyzs = growvar(win->xyzs,vlength(win->xyzs)+ck); + + xyz = (plxyz *)viptr(v,vlength(v)); + xyz->id = (vlength(v) << 4) + win->id; + xyz->pch = 1; + v->n++; + return(xyz); +} + +void plotopt(v,re) +vari *v; +INT re; +{ INT i, j, h, w; + double z[2]; + char *fmt, *ty; + plxyz *xyz; + + cpl = get_graphics_window(v); + + if (!re) + { cpl->main[0] = '\0'; + cpl->xl[0] = cpl->xl[1] = cpl->yl[0] + = cpl->yl[1] = cpl->zl[0] = cpl->zl[1] = 0.0; + cpl->nsl = 0; + } + + arvect(getargval(v,"xlim",1), cpl->xl, 2, 2); + arvect(getargval(v,"ylim",1), cpl->yl, 2, 2); + arvect(getargval(v,"zlim",1), cpl->zl, 2, 2); + + i = getarg(v,"main",1); + if (i>0) { strcpy(cpl->main,argval(v,i)); strip(cpl->main); } + i = getarg(v,"xlab",1); + if (i>0) { strcpy(cpl->xlab,argval(v,i)); strip(cpl->xlab); } + i = getarg(v,"ylab",1); + if (i>0) { strcpy(cpl->ylab,argval(v,i)); strip(cpl->ylab); } + i = getarg(v,"zlab",1); + if (i>0) { strcpy(cpl->zlab,argval(v,i)); strip(cpl->zlab); } + + if ( arvect(getargval(v,"view",1), z, 2, 2) == 2 ) + { cpl->theta=z[0]; + cpl->phi =z[1]; + } + + fmt = getargval(v,"fmt",1); + if (fmt==NULL) fmt = "xwin"; + + i = getarg(v,"split",1); + if (i>0) + cpl->nsl = arvect(argval(v,i),cpl->sl,10,1); + i = getarg(v,"h",1); + if (i>0) sscanf(argval(v,i),"%d",&h); else h = 0; + i = getarg(v,"w",1); + if (i>0) sscanf(argval(v,i),"%d",&w); else w = 0; + + ty = getargval(v,"type",1); + if (ty != NULL) + { for (j=0; jxyzs); j++) + { xyz = (plxyz *)viptr(cpl->xyzs,j); + ty = settype(xyz,ty,xyz->type); + } + } + if (stm(fmt,"xwin",1)) { plotxwin(cpl,&devwin,curwin,w,h,0); return; } + if (stm(fmt,"win",1)) { plotxwin(cpl,&devwin,curwin,w,h,0); return; } + if (stm(fmt,"post",1)) + { psfn = getargval(v,"file",1); + plotxwin(cpl,&devps,curwin,w,h,0); + return; + } +} + +void pvari(cmd,xyz,win,ax) +char *cmd, ax; +plxyz *xyz; +plots *win; +{ vari *vv; + INT k; + varname vname; + vv = varith(cmd,pvarname(xyz,ax,vname),STPLOTVAR); + if (vv==NULL) return; + k = xyz->id>>4; + switch(ax) + { case 'x': + xyz->x = vv; + if (k==0) strcpy(win->xlab,cmd); + return; + case 'y': + xyz->y = vv; + if (k==0) strcpy(win->ylab,cmd); + return; + case 'z': + xyz->z = vv; + if (k==0) strcpy(win->zlab,cmd); + return; + } + ERROR(("pvari: unknown axis %c",ax)); +} + +void plotdata(v) +vari *v; +{ INT add, i, j, k; + plxyz *xyz = NULL, *xyz2 = NULL; + char *type; + + cpl = get_graphics_window(v); + + i = getarg(v,"add",0); + add = (i>0) ? getlogic(v,i) : 0; + + type = getargval(v,"type",0); + + i = getarg(v,"data",0); + if (i>0) doreaddata(argval(v,i),(INT)0); + + i = getarg(v,"pch",0); + if (i>0) sscanf(argval(v,i),"%d",&xyz->pch); + + xyz = nextxyz(cpl,add,2); + if (xyz==NULL) return; + xyz->x = xyz->y = xyz->z = NULL; + type = settype(xyz,type,'p'); + + i = getarg(v,"x",1); + j = getarg(v,"y",1); + k = getarg(v,"z",1); + + if (!add) /* set the default view angle */ + { cpl->theta = 45*( 1 - ((j==0)|(k==0)) - 3*(i==0) ); + cpl->phi = 45*( 1 + ((i==0)|(j==0)) - (k==0) ); + } + + if (i>0) pvari(argval(v,i),xyz,cpl,'x'); + if (j>0) pvari(argval(v,j),xyz,cpl,'y'); + if (k>0) pvari(argval(v,k),xyz,cpl,'z'); + + i = getarg(v,"x2",1); + j = getarg(v,"y2",1); + k = getarg(v,"z2",1); + if (i+j+k>0) + { xyz2= nextxyz(cpl,1,1); + if (xyz2==NULL) return; + xyz2->x = xyz->x; + xyz2->y = xyz->y; + xyz2->z = xyz->z; + type = settype(xyz2,type,'s'); + if (i>0) pvari(argval(v,i),xyz2,cpl,'x'); + if (j>0) pvari(argval(v,j),xyz2,cpl,'y'); + if (k>0) pvari(argval(v,k),xyz2,cpl,'z'); + } + if (lf_error) return; + + cpl->ty |= PLDATA; + + plotopt(v,add); +} + +void plotfit(v) +vari *v; +{ INT add, d, dp, i = 0, j = 0, n, sef; + INT dt, mg[MXDIM], ix, iy; + double c, sd, xl[2*MXDIM], xll[2]; + char cb; + varname vn; + plxyz *xyz, *xyzl, *xyzu, *xyzd; + char *type; + + cpl = get_graphics_window(v); + + i = getarg(v,"fit",1); + if (i>0) dosavefit(&lf,argval(v,i),"rb",(INT)0); + if (nofit()) ERROR(("plotfit: no fit to plot.")); + if (lf_error) return; + dp = 0; + + d = lf.mi[MDIM]; + for (i=0; i0) + { j = arvect(argval(v,j),xll,2,1); + if (j==1) + xl[i] = xl[i+d] = xll[0]; + else + { xl[i] = xll[0]; + xl[i+d] = xll[1]; + } + } + else + { xl[i] = lf.fl[i]; + xl[i+d] = lf.fl[i+d]; + j = 2; + } + if (j==2) + { if (dp==2) + { xl[i] = xl[i+d] = (xl[i]+xl[i+d])/2; + WARN(("plotfit: fixing %s=%f",lf.xname[i],xl[i])); + j = 1; + } + if (dp==1) { iy = i; dp++; } + if (dp==0) { ix = i; dp++; } + } + mg[i] = 2-j; + } + if (dp<=0) + { ERROR(("No plot variables")); + return; + } + sef = 0; dt = 0; + i = getarg(v,"data",1); if (i>0) dt =getlogic(v,i); + i = getarg(v,"band",1); cb = (i>0) ? *argval(v,i) : 'n'; + + for (i=0; i0) readilist(mg,argval(v,i),1,lf.mi[MDIM],1); + + i = getarg(v,"add",1); + add = (i>0) ? getlogic(v,i) : 0; + + type = getargval(v,"type",1); + + if ((lf.mi[MEV]==EDATA) | (lf.mi[MEV]==ECROS)) + n = setpppoints(&pp,"fitp",mg,xl); + else + n = setpppoints(&pp,"grid",mg,xl); + pp.fit = createvar("_ppfit",STPLOTVAR,n,VDOUBLE); + if (cb=='n') + pp.se = NULL; + else + pp.se = createvar("_ppsef",STPLOTVAR,n,VDOUBLE); + if (lf_error) return; + cpreplot(&pp,v,cb); + if (lf_error) return; + + xyz = nextxyz(cpl,add,4); + if (xyz==NULL) return; + /* set up first predictor variable */ + xyz->x = pp.data[ix]; + setvarname(xyz->x,pvarname(xyz,'x',vn)); + strcpy(cpl->xlab,lf.xname[ix]); + + /* set up second predictor variable */ + if (dp==2) + { xyz->y = pp.data[iy]; + setvarname(xyz->y,pvarname(xyz,'y',vn)); + strcpy(cpl->ylab,lf.xname[iy]); + } + else + { xyz->y = NULL; + cpl->ylab[0] = '\0'; + } + + xyz->z = pp.fit; + setvarname(xyz->z,pvarname(xyz,'z',vn)); + switch(lf.mi[MTG]&63) + { case TDEN: strcpy(cpl->zlab,"Density"); break; + case TRAT: strcpy(cpl->zlab,"Rate"); break; + case THAZ: strcpy(cpl->zlab,"Hazard"); break; + default: strcpy(cpl->zlab,lf.yname); + } + type = settype(xyz,type,(dp==1) ? 'l' : 'c'); + + if (pp.se!=NULL) + { xyzl = nextxyz(cpl,1,3); xyzu = nextxyz(cpl,1,2); + if ((xyzl!=NULL) & (xyzu!=NULL)) + { sd = sqrt(lf.dp[DRV]); + xyzl->x = xyzu->x = xyz->x; + xyzl->y = xyzu->y = xyz->y; + xyzl->z = createvar(pvarname(xyzl,'z',vn),STPLOTVAR,n,VDOUBLE); + xyzu->z = createvar(pvarname(xyzu,'z',vn),STPLOTVAR,n,VDOUBLE); + if (lf_error) return; + c = docrit(v); + for (i=0; iz,i,backtr(vitem(pp.fit,i)+c*vitem(pp.se,i),lf.mi,lf.nd)); + vassn(xyzl->z,i,backtr(vitem(pp.fit,i)-c*vitem(pp.se,i),lf.mi,lf.nd)); + } + type = settype(xyzl,type,(d==1) ? 'l' : 'c'); + type = settype(xyzu,type,(d==1) ? 'l' : 'c'); + } + deletevar(pp.se); + } + if (pp.wh==PCOEF) + for (i=0; iz); i++) + vassn(xyz->z,i,backtr(vitem(pp.fit,i),lf.mi,lf.nd)); + if (dt) + { + recondat(0,&n); + if (lf_error) return; + xyzd = nextxyz(cpl,1,1); + if (xyzd!=NULL) + { xyzd->x = createvar(pvarname(xyzd,'x',vn),STPLOTVAR,n,VDOUBLE); + for (i=0; ix,i,datum(&lf,ix,i)); + if (d==2) + { xyzd->y = createvar(pvarname(xyzd,'y',vn),STPLOTVAR,n,VDOUBLE); + for (i=0; iy,i,datum(&lf,iy,i)); + } + else xyzd->y = NULL; + xyzd->z = createvar(pvarname(xyzd,'z',vn),STPLOTVAR,n,VDOUBLE); + for (i=0; iz,i,((lf.mi[MTG]&63)==TGAUS) ? resp(&lf,i) : resp(&lf,i)/prwt(&lf,i)); + type = settype(xyzd,type,'p'); + } + } + + /* now, set default view angle */ + if (!add) + { if (dp==1) { cpl->theta = 0; cpl->phi = 90; } /* x-z axis */ + else + { if (xyz->type=='w') + cpl->theta = cpl->phi = 45; /* wireframes */ + else + cpl->theta = cpl->phi = 0; /* x-y plot; e.g. for contours */ + } + } + if (lf_error) return; + cpl->ty |= PLFIT; + if (dt) cpl->ty |= PLDATA; + + plotopt(v,add); +} + +void plottrack(v) +vari *v; +{ INT i, j; + plxyz *xyz; + varname vn; + + cpl = get_graphics_window(v); + + if ((cpl->ty & PLTRK)!=PLTRK) /* initialize */ + { xyz = nextxyz(cpl,0,1); + xyz->x = createvar(pvarname(xyz,'x',vn),STPLOTVAR,100,VDOUBLE); + xyz->y = createvar(pvarname(xyz,'y',vn),STPLOTVAR,100,VDOUBLE); + xyz->z = createvar(pvarname(xyz,'z',vn),STPLOTVAR,100,VDOUBLE); + if (lf_error) return; + vlength(xyz->x) = vlength(xyz->y) = vlength(xyz->z) = 0; + settype(xyz,NULL,'p'); + cpl->theta = cpl->phi = 0; + cpl->ty = PLTRK; + } + else + { vlength(cpl->xyzs) = 0; + xyz = nextxyz(cpl,1,1); + } + j = vlength(xyz->x); + i = getarg(v,"x",1); + if (i>0) + { vassn(xyz->x,j,darith(argval(v,i))); + strcpy(cpl->xlab,argval(v,i)); + vlength(xyz->x) = j+1; + } + i = getarg(v,"y",1); + if (i>0) + { vassn(xyz->y,j,darith(argval(v,i))); + strcpy(cpl->ylab,argval(v,i)); + vlength(xyz->y) = j+1; + } + i = getarg(v,"z",1); + if (i>0) + { vassn(xyz->z,j,darith(argval(v,i))); + strcpy(cpl->zlab,argval(v,i)); + vlength(xyz->z) = j+1; + } + plotopt(v,0); +} + +void setplot(v) +vari *v; +{ INT i, j, w; + carg *ct; + varname tname; + i = getarg(v,"win",1); + if (i==0) + { ERROR(("setplot: no win argument")); + return; + } + sscanf(argval(v,i),"%d",&w); + if ((w<0) | (w>=MAXWIN)) + { ERROR(("setplot: invalid win %s",argval(v,i))); + return; + } + if (vlength(v)==2) + { deletevar(pl[w].track); + pl[w].track = NULL; + return; + } + sprintf(tname,"=tpc%d",w); + pl[w].track = createvar(tname,STSYSTEM,v->n-2,VARGL); + j = 0; + pl[w].ty = PLNONE; /* to ensure previous track is cleared */ + for (i=1; iarg = argarg(v,i); + ct->val = argval(v,i); + setused(v,i); + j++; + } + } + sprintf(tname,"=tps%d",w); + setvarname(curstr,tname); +} diff --git a/src/locfit/cmd.c b/src/locfit/cmd.c new file mode 100644 index 0000000..cc52b50 --- /dev/null +++ b/src/locfit/cmd.c @@ -0,0 +1,801 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + */ + +#include +#ifdef DOS +#include +#endif + +#include "local.h" + +#ifdef CVERSION + +#define MAXK 20 + +FILE *ofile; + +device devps, devwin; +design des; +lfit lf; +vari *aru; + +extern plots pl[]; +pplot pp; +struct lfcol mycol[MAXCOLOR]; +char *lfhome; +extern char filename[100]; + +INT lf_error, lfcm[10]; + +vari *curstr; +void cmdint(); +void del_lines(); + +/* + INDEX data input and output functions + savefit: f/end for user savefit. + readdata: f/end for readdata + savedata: f/end for savedata + recondat: reconnect data to fit + */ + +void savefit(v,mode) +vari *v; +char *mode; +{ INT j, fp; + char *filename; + filename = getargval(v,"file",1); + if (filename==NULL) + { ERROR(("savefit: no filename")); + return; + } + j = getarg(v,"fp",1); + fp = (j>0) ? getlogic(v,j) : 0; + dosavefit(&lf,filename,mode,fp); + if (mode[0]=='r') endfit(); +} + +void readdata(v) +vari *v; +{ INT i, fp; + i = getarg(v,"data",1); + if (i==0) i = getarg(v,"file",1); + if (i==0) { ERROR(("readdata: no file name")); return; } + fp = getarg(v,"fp",1); + fp = (fp>0) ? getlogic(v,fp) : 0; + doreaddata(argval(v,i),fp); +} + +void savedata(v) +vari *v; +{ INT fp; + if (argarg(v,0)==NULL) { ERROR(("savedata: no file name")); return; } + fp = getarg(v,"fp",0); + fp = (fp>0) ? getlogic(v,fp) : 0; + dosavedata(v,fp); +} + +void recondat(xonly,n) +INT xonly, *n; +{ INT i; + *n = -1; + for (i=0; in==1) /* compute for existing fit */ + { if (nofit()) { ERROR(("ckap: no fit, no arguments")); } + else recondat(0,&lf.mi[MN]); + } + else /* new fit specification */ + fitoptions(&lf,v,0); + if (lf_error) return; + lf.nk = constants(&des,&lf,lf.kap); + if (lf_error) { lf.nk=0; return; } + printf("kappa0:"); + for (i=0; i0) h0 = darith(argval(v,i)); + i = getarg(v,"h1",1); if (i>0) h1 = darith(argval(v,i)); + + deschk(des,n,1); + mm[0]=1; mm[1]=2; mm[2]=3; mm[3]=4; mm[4]=5; mm[5]=6; nm=6; + kdeselect(band,x,des.ind,h0,h1,mm,nm,WGAUS,n); + for (i=0; i0) sscanf(argval(v,i),"%lf",&df); + i = getarg(v,"al",1); if (i>0) sscanf(argval(v,i),"%lf",&al); + return(critval(lf.kap,lf.nk,lf.mi[MDIM],al,10,2,df)); +} + +void crit(v) +vari *v; +{ vari *vr; + vr = createvar("crit",STHIDDEN,1,VDOUBLE); + if (lf_error) return; + vassn(vr,0,docrit(v)); + saveresult(vr,argarg(v,0),STREGULAR); +} + +double backtr(th,mi,nd) +double th; +INT *mi, nd; +{ if (nd>0) return(th); + return(invlink(th,mi[MLINK])); +} + +void predict(vc) +vari *vc; +{ + double *data[MXDIM]; + varname vn; + INT i, k, j, gr, n, z, mg[MXDIM]; + memset(mg, 0, sizeof(mg)); + dosavefit(&lf,getargval(vc,"fit",0),"rb",(INT)0); + if (nofit()) ERROR(("predict: no fit to interpolate\n")); + if (lf_error) return; + + gr=0; + i = getarg(vc,"grid",0); + if (i>0) gr = getlogic(vc,i); + + i = getarg(vc,"where",0); + if (i>0) n = setpppoints(&pp,argval(vc,i),NULL,lf.fl); + else + { + for (j=0; jn; + } + n = pp.data[0]->n; + pp.gr = 1+gr; + } + + for (j=0; jn; + break; + case 2: + n = 1; + for (i=0; in; + n *= mg[i]; + } + break; + case 3: + n = lf.mi[MN]; + break; + case 4: + n = lf.nv; + break; + default: + ERROR(("cpreplot where problem")); + } + + if (argarg(vc,0)==NULL) + pp.fit = createvar("predict",STHIDDEN,n,VDOUBLE); + else + pp.fit = createvar(argarg(vc,0),STREGULAR,n,VDOUBLE); + if (lf_error) return; + pp.se = NULL; + cpreplot(&pp,vc,'n'); + if (lf_error) return; + for (j=0; jn; i++) if (!argused(v,i)) + { if (argvalis(v,i,"x")) { setused(v,i); wh[nk++]=1; } + if (argvalis(v,i,"fhat")) { setused(v,i); wh[nk++]=2; } + if (argvalis(v,i,"coef")) { setused(v,i); wh[nk++]=2; } + if (argvalis(v,i,"nlx")) { setused(v,i); wh[nk++]=3; } + if (argvalis(v,i,"infl")) { setused(v,i); wh[nk++]=4; } + if (argvalis(v,i,"se")) { setused(v,i); wh[nk++]=5; cs=1; } + if (argvalis(v,i,"cband")) { setused(v,i); wh[nk++]=7; cs=ck=1; } + if (argvalis(v,i,"h")) { setused(v,i); wh[nk++]=8; } + if (argvalis(v,i,"deg")) { setused(v,i); wh[nk++]=9; } + } + if (nk==0) /* default: x and fhat */ + { wh[nk++] = 1; wh[nk++] = 2; + } + d = lf.mi[MDIM]; + alp = 0.95; + + if (cs) rs = sqrt(lf.dp[DRV]); + if (ck) + { c = critval(lf.kap,lf.nk,lf.mi[MDIM],1-alp,10,2,0.0); + printf("using c = %8.5f\n",c); + } + + for (i=0; in; k++) + { vr = NULL; + for (j=0; jresult = vr; + } +} + +void summfit(v) +vari *v; +{ int i; + dosavefit(&lf,getargval(v,"fit",1),"rb",0); + printf("Response variable: %s\n",lf.yname); + printf("Predictor variables: "); + for (i=0; i0) +// { lfcm[CAXI] = getcolidx(argval(v,i)); +// for (i=CTEX; ixl[1]) xl[1] = x[i]; + } + i = getarg(v,"m",0); + if (i>0) sscanf(argval(v,i),"%d",&m); else m = 5; + mx = pretty(xl,m,xs); + if (lf_error) return; + + i = getarg(v,"y",1); + if (i>0) + { y = vdptr(findvar(argval(v,i),1,&n)); + yl[0] = yl[1] = y[0]; + for (i=1; iyl[1]) yl[1] = y[i]; + } + my = pretty(yl,m,ys); + } + else { y = NULL; my = 0; } + if (lf_error) return; + + for (i=0; i<15; i++) nx[i] = ny[i] = 0; + for (i=0; i<=(mx+1)*(my+1); i++) count[i] = 0; + for (i=0; i=xs[mx-1]) ix = mx; + if ((x[i]>=xs[0]) & (x[i]=xs[j-1]) & (x[i]0) + { if (y[i]=ys[my-1]) iy = my; + if ((y[i]>=ys[0]) & (y[i]=ys[j-1]) & (y[i]0) printf(" "); + for (i=0; i<=mx; i++) if (nx[i]>0) + printf(" %4g-",(i==0) ? xl[0] : xs[i-1]); + printf("\n"); + if (my>0) printf(" "); + for (i=0; i<=mx; i++) if (nx[i]>0) + printf(" %4g ",(i==mx) ? xl[1] : xs[i]); + printf("\n\n"); + for (j=0; j<=my; j++) if (ny[j]>0) + { if (my>0) + printf("%4g-%4g ",(j==0) ? yl[0] : ys[j-1], + (j==my) ? yl[1] : ys[j]); + for (i=0; i<=mx; i++) + if (nx[i]>0) printf("%6d ",count[i*(my+1)+j]); + printf("\n"); + } +} + +/* + INDEX control functions: + setout(): set output file. + cmdint(): send off the command... + locfit_dispatch(): called by the main program. + */ + +void setout(v) +vari *v; +{ INT i, i0; + char md; + i0 = getarg(v,"file",1); + if (i0==0) + { if (ofile!=NULL) fclose(ofile); + ofile = NULL; + printf("Output set to stdout\n"); + return; + } + + md = 'w'; + i = getarg(v,"mode",1); + if ((i>0) && (argval(v,i)[0]=='a')) md = 'a'; + + setfilename(argval(v,i0),"",&md,0); + if (ofile != NULL) fclose(ofile); + ofile = fopen(filename,&md); + if (ofile == NULL) + ERROR(("setout: can't open %s for writing",filename)); + else + printf("Output set to file %s\n",filename); +} + +void dosleep(v) +vari *v; +{ INT i; + i = getarg(v,"time",1); + if (i==0) return; + sscanf(argval(v,i),"%d",&i); + (void)sleep(i); +} + +void setdef(v) +vari *v; +{ INT i, n; + carg *ca; + vari *vd; + + if (argarg(v,0)==NULL) + { ERROR(("Unnamed Defintion")); + return; + } + n = vlength(v)-1; + vd = createvar(argarg(v,0),STSYSTEM,n,VARGL); + if (lf_error) return; + + for (i=0; iarg = argarg(v,i+1); + ca->val = argval(v,i+1); + setused(v,i+1); + } + sprintf(curstr->name,"=%s",argarg(v,0)); +} + +extern void cscbsim(); + +void dcmdint(v) +vari *v; +{ INT i; + if (v==NULL) + { ERROR(("dcmdint received NULL")); + return; + } + if (argvalis(v,0,"band")) { band(v); return; } + if (argvalis(v,0,"crit")) { crit(v); return; } + if (argvalis(v,0,"def")) { setdef(v); return; } + if (argvalis(v,0,"endfor")) { dec_forvar(); return; } + if (argvalis(v,0,"for")) { inc_forvar(); return; } + if (argvalis(v,0,"example")){example(v); return; } + if (argvalis(v,0,"help")) {example(v); return; } + if (argvalis(v,0,"?")) {example(v); return; } + if (argvalis(v,0,"exit")) exit(0); + if (argvalis(v,0,"quit")) exit(0); + if (argvalis(v,0,"q()")) exit(0); + if (argvalis(v,0,"fitted")){ cfitted(v,RMEAN); return; } + if (argvalis(v,0,"greyscale")) { greyscale(v); return; } + if (argvalis(v,0,"kappa")) { ckap(v); return; } + if (argvalis(v,0,"kdeb")) { ckdeb(v); return; } + if (argvalis(v,0,"knots")) { knots(v); return; } + if (argvalis(v,0,"locfit")) { clocfit(v,0); return; } + if (argvalis(v,0,"relocfit")) { clocfit(v,1); return; } + if (argvalis(v,0,"plot")) { printf("use plotfit or plotdata\n"); return; } + if (argvalis(v,0,"plotdata")) { plotdata(v); return; } + if (argvalis(v,0,"plotfit")) { plotfit(v); return; } + if (argvalis(v,0,"replot")) { plotopt(v,1); return; } + if (argvalis(v,0,"predict")) { predict(v); return; } + if (argvalis(v,0,"prfit")) { printfit(v); return; } + if (argvalis(v,0,"rband")) { crband(v); return; } + if (argvalis(v,0,"readdata")) { readdata(v); return; } + if (argvalis(v,0,"readfile")) { readfile(v); return; } + if (argvalis(v,0,"readfit")) { savefit(v,"rb"); return; } + if (argvalis(v,0,"residuals")){ cfitted(v,RDEV); return; } + if (argvalis(v,0,"run")) return; + if (argvalis(v,0,"savedata")) { savedata(v); return; } + if (argvalis(v,0,"savefit")) { savefit(v,"wb"); return; } + if (argvalis(v,0,"scbmax")) { cscbsim(v); return; } + if (argvalis(v,0,"scbsim")) { cscbsim(v); return; } + if (argvalis(v,0,"seed")) { rseed(argval(v,1)); setused(v,1); return; } + if (argvalis(v,0,"setcolor")) { setcolor(v); return; } + if (argvalis(v,0,"setout")) { setout(v); return; } + if (argvalis(v,0,"outf")) { setout(v); return; } + if (argvalis(v,0,"setplot")) { setplot(v); return; } + if (argvalis(v,0,"sleep")) { dosleep(v); return; } + if (argvalis(v,0,"summfit")) { summfit(v); return; } + if (argvalis(v,0,"table")) { table(v); return; } + if (argvalis(v,0,"track")) { plottrack(v); return; } + if (argvalis(v,0,"wdiag")) { cwdiag(v); return; } + for (i=0; iresult = varith(argval(v,i),argarg(v,i),STREGULAR); + setused(v,i); + if (lf_error) return; + } +} + +void cmdint(v) +vari *v; +{ vari *vv, *vr; + INT i, j, mn, nr; + if (v==NULL) return; + + for (i=0; iused = 0; */ + ((carg *)viptr(v,i))->result = NULL; + } + + setused(v,0); + if (vlength(v)==1) + { j = 0; + vv = findvar(argval(v,0),0,&j); + if ((vv!=NULL) && ((vv->mode==VARGL) & (!argvalis(v,0,"=cline")))) + { + cmdint(vv); + return; + } + } + + /* dcmdint processes command */ + dcmdint(v); + + /* print the results of unassigned expression. + * First, determine mn = maximum number of rows in the + * output. Note that vr->stat==STHIDDEN determines whether + * the result was unassigned. + */ + mn = 0; nr = 0; + for (i=0; iresult; + if ((vr != NULL) && (vr->stat==STHIDDEN)) + switch(vr->mode) + { case VCHAR: if (mn<1) mn = 1; + break; + case VINT: + case VDOUBLE: if (mnn) mn = vr->n; + break; + } + } + + /* now, print the unassigned variables. + + for (i=0; iresult; + if ((vr != NULL) && (vr->stat==STHIDDEN)) + switch(vr->mode) + { case VDOUBLE: printf("%8.5f ",vitem(vr,i)); break; + case VCHAR: printf("%s ",vdptr(vr)); break; + case VINT: printf("%4d ", vitem(vr,i)); break; + } + } + printf("\n"); + } + */ + + for (i=0; iresult); +} + +INT locfit_dispatch(char *z) + +{ vari *v; + + makecmd(z); + while (1) + { lf_error = 0; + v = getcmd(); + if (v==NULL) + { del_lines(); + return(0); + } + cmdint(v); + } +} + +void setuplf() +{ INT i; + char command[100]; + vari *v; + + lfhome = getenv("LFHOME"); + initdb(); + + ofile = NULL; + lf.tw = lf.xxev = lf.L = lf.iw = des.dw = lf.pc.wk = NULL; + des.index = NULL; + lf.mg = calloc(MXDIM,sizeof(INT)); + + v = createvar("mi",STSYSPEC,LENM,VINT); v->dpr = (double *)lf.mi; + v = createvar("dp",STSYSPEC,LEND,VDOUBLE); v->dpr = lf.dp; + v = createvar("alpha",STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DALP]; + v = createvar("h", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DFXH]; + v = createvar("pen", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DADP]; + v = createvar("infl", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DT0]; + v = createvar("vari", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DT1]; + v = createvar("like", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DLK]; + v = createvar("resv", STSYSPEC,1,VDOUBLE); v->dpr = &lf.dp[DRV]; + + for (i=0; i= max, and only do that integration once. + */ + +#include "local.h" + +static double ilim[2*MXDIM], *ff, tmax; + +/* + * hrao returns 0 if integration region is empty. + * 1 otherwise. + */ +INT haz_sph_int(lf,dfx,cf,h,r1) +lfit *lf; +double *dfx, *cf, h, *r1; +{ double s, t0, t1, wt, th; + INT dim, j, p, *mi; + mi = lf->mi; + s = 0; p = mi[MP]; + dim = mi[MDIM]; + for (j=1; jsca[j])); + if (s>1) return(0); + + setzero(r1,p*p); + t1 = sqrt(1-s)*h*lf->sca[0]; + t0 = -t1; + if (t0ilim[dim]) t1 = ilim[dim]; + if (t1>dfx[0]) t1 = dfx[0]; + if (t1mi[MP]; + setzero(resp,p*p); + sb = 0.0; + + for (i=0; i<=lf->mi[MN]; i++) + { + if (i==lf->mi[MN]) + { dfx[0] = tmax-t[0]; + for (j=1; jmi[MDIM]; j++) dfx[j] = 0.0; + eb = exp(sb/lf->mi[MN]); + } + else + { eb = exp(base(lf,i)); sb += base(lf,i); + for (j=0; jmi[MDIM]; j++) dfx[j] = datum(lf,j,i)-t[j]; + } + + st = haz_sph_int(lf,dfx,cf,h,r1); + if (st) + for (j=0; jmi[MP]; d = lf->mi[MDIM]; + deg = lf->mi[MDEG]; + setzero(resp,p*p); + hj = hs = h*lf->sca[0]; + memset(dfx, 0.0, sizeof(dfx)); + ncf[0] = cf[0]; + for (i=1; i<=deg; i++) + { ncf[i] = hj*cf[(i-1)*d+1]; hj *= hs; + } + +/* for i=0..n.... + * First we compute prod_wk[j], j=0..d. + * For j=0, this is int_0^T_i (u-t)^k W((u-t)/h) exp(b0*(u-t)) du + * For remaining j, (x(i,j)-x(j))^k Wj exp(bj*(x..-x.)) + * + * Second, we add to the integration (exp(a) incl. in integral) + * with the right factorial denominators. + */ + t_prev = ilim[0]; sb = 0.0; + for (i=0; i<=lf->mi[MN]; i++) + { if (i==lf->mi[MN]) + { dfx[0] = tmax-t[0]; + for (j=1; jmi[MN]); + } + else + { eb = exp(base(lf,i)); sb += base(lf,i); + for (j=0; jilim[0]) /* else it doesn't contribute */ + { +/* time integral */ + il1 = (dfx[0]>ilim[d]) ? ilim[d] : dfx[0]; + if (il1 != t_prev) /* don't repeat! */ + { st = onedint(ncf,lf->mi,ilim[0]/hs,il1/hs,prod_wk[0]); + if (st>0) return(st); + hj = eb; + for (j=0; j<=2*deg; j++) + { hj *= hs; + prod_wk[0][j] *= hj; + } + t_prev = il1; + } + +/* covariate terms */ + for (j=1; j0; k--) ef = (ef+dfx[j])*cf[1+(k-1)*d+j]; + ef = exp(ef); + prod_wk[j][0] = ef * W(dfx[j]/(h*lf->sca[j]),lf->mi[MKER]); + for (k=1; k<=2*deg; k++) + prod_wk[j][k] = prod_wk[j][k-1] * dfx[j]; + } + +/* add to the integration. */ + prodint_resp(resp,prod_wk,d,deg,p); + } /* if dfx0 > ilim0 */ + } /* n loop */ + +/* symmetrize */ + for (k=0; kmi[MDIM]==1) return(hazint_prod(t,resp,resp1,lf,cf,h)); + if (lf->mi[MKT]==KPROD) return(hazint_prod(t,resp,resp1,lf,cf,h)); + + return(hazint_sph(t,resp,resp1,lf,cf,h)); +} + +void haz_init(lf,des,il) +lfit *lf; +design *des; +double *il; +{ int i; + tmax = datum(lf,0,0); + for (i=1; imi[MN]; i++) tmax = MAX(tmax,datum(lf,0,i)); + ff = des->xtwx.wk; + for (i=0; i<2*lf->mi[MDIM]; i++) ilim[i] = il[i]; +} diff --git a/src/locfit/dens_int.c b/src/locfit/dens_int.c new file mode 100644 index 0000000..eda5df3 --- /dev/null +++ b/src/locfit/dens_int.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * The function dens_integrate(lf,des,z) is used to integrate a density + * estimate (z=1) or the density squared (z=2). This is used to renormalize + * the estimate (function dens_renorm) or in the computation of LSCV + * (function dnes_lscv). The implementation is presently for d=1. + * + * The computation orders the fit points selected by locfit, and + * integrates analytically over each interval. For the log-link, + * the interpolant used is peicewise quadratic (with one knot in + * the middle of each interval); this differs from the cubic interpolant + * used elsewhere in Locfit. + * + * TODO: allow for xlim. What can be done simply in >=2 dimensions? + * fix df computation (in lscv) for link=IDENT. + */ + +#include "local.h" + +/* + * Finds the order of observations in the array x, and + * stores in integer array ind. + * At input, lset l=0 and r=length(x)-1. + * At output, x[ind[0]] <= x[ind[1]] <= ... + */ +void lforder(ind,x,l,r) +INT *ind, l, r; +double *x; +{ double piv; + INT i, i0, i1; + piv = (x[ind[l]]+x[ind[r]])/2; + i0 = l; i1 = r; + while (i0<=i1) + { while ((i0<=i1) && (x[ind[i0]]<=piv)) i0++; + while ((i0<=i1) && (x[ind[i1]]>piv)) i1--; + if (i0=l) && (x[ind[i1]]==piv)) i1--; + for (i=l; i<=i1; i++) + if (x[ind[i]]==piv) + { ISWAP(ind[i],ind[i1]); + while (x[ind[i1]]==piv) i1--; + } + + if (lmi[MDIM]>=2) + { WARN(("dens_integrate requires d=1")); + return(0.0); + } + + link = lf->mi[MLINK]; + has_deriv = (lf->mi[MDEG] > 0); /* not right? */ + fit = lf->coef; + if (has_deriv) + deriv = &lf->coef[lf->nvm]; + xev = vdptr(lf->xxev); + + /* + * order the vertices + */ + nv = lf->nv; + if (lf->mi[MN]ind; + for (i=0; i= 0) WARN(("dens_integrate - ouch!")); + if (z==2) + { if (link==LLOG) + { f0 *= 2; d0 *= 2; } + else + { d0 = 2*d0*f0; f0 = f0*f0; } + } + term = (link==LIDENT) ? -f0*f0/(2*d0) : exp(f0)/d0; + sum += term; + + for (i=1; inv; i++) lf->coef[i] -= sum; +} + +void dens_lscv(des,lf) +lfit *lf; +design *des; +{ double df, fh, fh_cv, infl, z0, z1, x[MXDIM]; + int i, n, j, ev; + z1 = df = 0.0; + ev = lf->mi[MEV]; + n = lf->mi[MN]; + if ((ev==EDATA) | (ev==ECROS)) ev = EFITP; + + z0 = dens_integrate(lf,des,2); + + for (i=0; imi[MDIM]; j++) x[j] = datum(lf,j,i); + fh = base(lf,i)+dointpoint(lf,des,x,PCOEF,ev,i); + if (lf->mi[MLINK]==LLOG) fh = exp(fh); + infl = dointpoint(lf,des,x,PT0,ev,i); + infl = infl * infl; + if (infl>1) infl = 1; + fh_cv = (lf->mi[MLINK] == LIDENT) ? + (n*fh - infl) / (n-1.0) : fh*(1-infl)*n/(n-1.0); + z1 += fh_cv; + df += infl; + } + + vdptr(lf->L)[0] = z0-2*z1/n; + vdptr(lf->L)[1] = df; +} diff --git a/src/locfit/dens_odi.c b/src/locfit/dens_odi.c new file mode 100644 index 0000000..e7724ec --- /dev/null +++ b/src/locfit/dens_odi.c @@ -0,0 +1,515 @@ +/* + * Copyright (c) 1996-200 Lucent Technologies. + * See README file for details. + * + * + * + * Routines for one-dimensional numerical integration + * in density estimation. The entry point is + * + * onedint(cf,mi,l0,l1,resp) + * + * which evaluates int W(u)u^j exp( P(u) ), j=0..2*deg. + * P(u) = cf[0] + cf[1]u + cf[2]u^2/2 + ... + cf[deg]u^deg/deg! + * l0 and l1 are the integration limits. + * The results are returned through the vector resp. + * + */ + +#include "local.h" + +static int debug; + +INT exbctay(b,c,n,z) /* n-term taylor series of e^(bx+cx^2) */ +double b, c, *z; +INT n; +{ double ec[20]; + INT i, j; + z[0] = 1; + for (i=1; i<=n; i++) z[i] = z[i-1]*b/i; + if (c==0.0) return(n); + if (n>=40) + { WARN(("exbctay limit to n<40")); + n = 39; + } + ec[0] = 1; + for (i=1; 2*i<=n; i++) ec[i] = ec[i-1]*c/i; + for (i=n; i>1; i--) + for (j=1; 2*j<=i; j++) + z[i] += ec[j]*z[i-2*j]; + return(n); +} + +double explinjtay(l0,l1,j,cf) +/* int_l0^l1 x^j e^(a+bx+cx^2); exbctay aroud l1 */ +double l0, l1, *cf; +INT j; +{ double tc[40], f, s; + INT k, n; + if ((l0!=0.0) | (l1!=1.0)) WARN(("explinjtay: invalid l0, l1")); + n = exbctay(cf[1]+2*cf[2]*l1,cf[2],20,tc); + s = tc[0]/(j+1); + f = 1/(j+1); + for (k=1; k<=n; k++) + { f *= -k/(j+k+1.0); + s += tc[k]*f; + } + return(f); +} + +void explint1(l0,l1,cf,I,p) /* int x^j exp(a+bx); j=0..p-1 */ +double l0, l1, *cf, *I; +INT p; +{ double y0, y1, f; + INT j, k, k1; + y0 = lf_exp(cf[0]+l0*cf[1]); + y1 = lf_exp(cf[0]+l1*cf[1]); + if (p<2*fabs(cf[1])) k = p; else k = (INT)fabs(cf[1]); + + if (k>0) + { I[0] = (y1-y0)/cf[1]; + for (j=1; j1.0e-8)) /* initially Ik = diff(x^{k+1}e^{a+bx}) */ + { y1 *= l1; y0 *= l0; + I[k] = y1-y0; + if (k>=p) f *= fabs(cf[1])/(k+1); + k++; + } + if (k==50) WARN(("explint1: want k>50")); + I[k] = 0.0; + for (j=k-1; j>=k1; j--) /* now do back step recursion */ + I[j] = (I[j]-cf[1]*I[j+1])/(j+1); +} + +void explintyl(l0,l1,cf,I,p) /* small c, use taylor series and explint1 */ +double l0, l1, *cf, *I; +INT p; +{ INT i; + double c; + explint1(l0,l1,cf,I,p+8); + c = cf[2]; + for (i=0; i=0; i--) + { s = X[3*i+2]/X[3*i+4]; + X[3*i+2] = 0; + y[i] -= s*y[i+1]; + } + for (i=0; i0) + { if (a0>6) I[0] = (y0*ptail(-a0)-y1*ptail(-a1))/c; + else I[0] = S2PI*(pnorm(-a0,0.0,1.0)-pnorm(-a1,0.0,1.0))*bi; + } + else + { if (a1< -6) I[0] = (y1*ptail(a1)-y0*ptail(a0))/c; + else I[0] = S2PI*(pnorm(a1,0.0,1.0)-pnorm(a0,0.0,1.0))*bi; + } + } + else + I[0] = (y1*daws(a1)-y0*daws(a0))/c; + I[1] = (y1-y0)/(2*cf[2])+d*I[0]; +} + +void explinsid(l0,l1,cf,I,p) /* large b; don't use fwd recursion */ +double l0, l1, *cf, *I; +INT p; +{ INT k, k0, k1, k2; + double y0, y1, Z[150]; +if (debug) printf("side: %8.5f %8.5f %8.5f limt %8.5f %8.5f p %2d\n",cf[0],cf[1],cf[2],l0,l1,p); + + k0 = 2; + k1 = (INT)(fabs(cf[1])+fabs(2*cf[2])); + if (k1<2) k1 = 2; + if (k1>p+20) k1 = p+20; + k2 = p+20; + + if (debug) printf("k0 %2d k1 %2d k2 %2d p %2d\n",k0,k1,k2,p); + + y0 = lf_exp(cf[0]+l0*(cf[1]+l0*cf[2])); + y1 = lf_exp(cf[0]+l1*(cf[1]+l1*cf[2])); + initi0i1(I,cf,y0,y1,l0,l1); +if (debug) printf("i0 %8.5f i1 %8.5f\n",I[0],I[1]); + + y1 *= l1; y0 *= l0; /* should be x^(k1)*exp(..) */ + if (k0=k1; k--) + I[k] = (I[k]-cf[1]*I[k+1]-2*cf[2]*I[k+2])/(k+1); + + if (k0=0; k--) + I[k] = (I[k]-cf[1]*I[k+1]-2*cf[2]*I[k+2])/(k+1); +} + +void explinfbk0(l0,l1,cf,I,p) /* fwd and bac recur; b=0; c<0 */ +double l0, l1, *cf, *I; +INT p; +{ double y0, y1, f1, f2, f, ml2; + INT k, ks; + + y0 = lf_exp(cf[0]+l0*l0*cf[2]); + y1 = lf_exp(cf[0]+l1*l1*cf[2]); + initi0i1(I,cf,y0,y1,l0,l1); + + ml2 = MAX(l0*l0,l1*l1); + ks = 1+(INT)(2*fabs(cf[2])*ml2); + if (ks<2) ks = 2; + if (ks>p-3) ks = p; + + /* forward recursion for k < ks */ + for (k=2; k1.0e-8) + { y1 *= l1; y0 *= l0; + if ((k-p)%2==0) /* add to I[p-2] */ + { f2 *= -2*cf[2]/(k+1); + I[p-2] += (y1-y0)*f2; + } + else /* add to I[p-1] */ + { f1 *= -2*cf[2]/(k+1); + I[p-1] += (y1-y0)*f1; + f *= 2*fabs(cf[2])*ml2/(k+1); + } + k++; + } + + /* use back recursion for I[ks..(p-3)] */ + for (k=p-3; k>=ks; k--) + I[k] = (I[k]-2*cf[2]*I[k+2])/(k+1); +} + +void explinfbk(l0,l1,cf,I,p) /* fwd and bac recur; b not too large */ +double l0, l1, *cf, *I; +INT p; +{ double y0, y1; + INT k, ks, km; + + y0 = lf_exp(cf[0]+l0*(cf[1]+l0*cf[2])); + y1 = lf_exp(cf[0]+l1*(cf[1]+l1*cf[2])); + initi0i1(I,cf,y0,y1,l0,l1); + + ks = (INT)(3*fabs(cf[2])); + if (ks<3) ks = 3; + if (ks>0.75*p) ks = p; /* stretch the forward recurs as far as poss. */ + /* forward recursion for k < ks */ + for (k=2; k=ks; k--) + I[k] = (I[k]-cf[1]*I[k+1]-2*cf[2]*I[k+2])/(k+1); +} + +void recent(I,resp,wt,p,s,x) +double *I, *resp, *wt, x; +INT p, s; +{ INT i, j; + + /* first, use W taylor series I -> resp */ + for (i=0; i<=p; i++) + { resp[i] = 0.0; + for (j=0; j 0 */ + if (x==0) return; + for (j=0; j<=p; j++) for (i=p; i>j; i--) resp[i] += x*resp[i-1]; +} + +void recurint(l0,l2,cf,resp,p,ker) +double l0, l2, *cf, *resp; +INT p, ker; +{ INT i, s; + double l1, d0, d1, d2, dl, z0, z1, z2, wt[20], ncf[3], I[50], r1[5], r2[5]; +if (debug) printf("\nrecurint: %8.5f %8.5f %8.5f %8.5f %8.5f\n",cf[0],cf[1],cf[2],l0,l2); + + if (cf[2]==0) /* go straight to explint1 */ + { s = wtaylor(wt,0.0,ker); +if (debug) printf("case 1\n"); + explint1(l0,l2,cf,I,p+s); + recent(I,resp,wt,p,s,0.0); + return; + } + + dl = l2-l0; + d0 = cf[1]+2*l0*cf[2]; + d2 = cf[1]+2*l2*cf[2]; + z0 = cf[0]+l0*(cf[1]+l0*cf[2]); + z2 = cf[0]+l2*(cf[1]+l2*cf[2]); + + if ((fabs(cf[1]*dl)<1) && (fabs(cf[2]*dl*dl)<1)) + { ncf[0] = z0; ncf[1] = d0; ncf[2] = cf[2]; +if (debug) printf("case 2\n"); + s = wtaylor(wt,l0,ker); + explinbkr(0.0,dl,ncf,I,p+s); + recent(I,resp,wt,p,s,l0); + return; + } + + if (fabs(cf[2]*dl*dl)<0.001) /* small c, use explint1+tay.ser */ + { ncf[0] = z0; ncf[1] = d0; ncf[2] = cf[2]; +if (debug) printf("case small c\n"); + s = wtaylor(wt,l0,ker); + explintyl(0.0,l2-l0,ncf,I,p+s); + recent(I,resp,wt,p,s,l0); + return; + } + + if (d0*d2<=0) /* max/min in [l0,l2] */ + { l1 = -cf[1]/(2*cf[2]); + z1 = cf[0]+l1*(cf[1]+l1*cf[2]); + d1 = 0.0; + if (cf[2]<0) /* peak, integrate around l1 */ + { s = wtaylor(wt,l1,ker); + ncf[0] = z1; ncf[1] = 0.0; ncf[2] = cf[2]; +if (debug) printf("case peak p %2d s %2d\n",p,s); + explinfbk0(l0-l1,l2-l1,ncf,I,p+s); + recent(I,resp,wt,p,s,l1); + return; + } + } + + if ((d0-2*cf[2]*dl)*(d2+2*cf[2]*dl)<0) /* max/min is close to [l0,l2] */ + { l1 = -cf[1]/(2*cf[2]); + z1 = cf[0]+l1*(cf[1]+l1*cf[2]); + if (l1l2) { l1 = l2; z1 = z2; } + + if ((z1>=z0) & (z1>=z2)) /* peak; integrate around l1 */ + { s = wtaylor(wt,l1,ker); +if (debug) printf("case 4\n"); + d1 = cf[1]+2*l1*cf[2]; + ncf[0] = z1; ncf[1] = d1; ncf[2] = cf[2]; + explinfbk(l0-l1,l2-l1,ncf,I,p+s); + recent(I,resp,wt,p,s,l1); + return; + } + + /* trough; integrate [l0,l1] and [l1,l2] */ + for (i=0; i<=p; i++) r1[i] = r2[i] = 0.0; + if (l0z0+3) /* steep increase, expand around l2 */ + { s = wtaylor(wt,l2,ker); +if (debug) printf("case 7\n"); + + + ncf[0] = z2; ncf[1] = d2; ncf[2] = cf[2]; + explinsid(l0-l2,0.0,ncf,I,p+s); + recent(I,resp,wt,p,s,l2); +if (debug) printf("7 resp: %8.5f %8.5f %8.5f %8.5f\n",resp[0],resp[1],resp[2],resp[3]); + return; + } + + /* bias towards expansion around l0, because it's often 0 */ +if (debug) printf("case 8\n"); + s = wtaylor(wt,l0,ker); + ncf[0] = z0; ncf[1] = d0; ncf[2] = cf[2]; + explinsid(0.0,l2-l0,ncf,I,p+s); + recent(I,resp,wt,p,s,l0); + return; +} + +INT onedexpl(cf,mi,resp) +double *cf, *resp; +INT *mi; +{ INT i; + double f0, fr, fl; + if (mi[MDEG]>=2) ERROR(("onedexpl only valid for deg=0,1")); + if (fabs(cf[1])>=EFACT) return(LF_BADP); + + f0 = exp(cf[0]); fl = fr = 1.0; + for (i=0; i<=2*mi[MDEG]; i++) + { f0 *= i+1; + fl /=-(EFACT+cf[1]); + fr /= EFACT-cf[1]; + resp[i] = f0*(fr-fl); + } + return(LF_OK); +} + +INT onedgaus(cf,mi,resp) +double *cf, *resp; +INT *mi; +{ INT i; + double f0, mu, s2; + if (mi[MDEG]>=3) + { ERROR(("onedgaus only valid for deg=0,1,2")); + return(LF_ERR); + } + if (2*cf[2]>=GFACT*GFACT) return(LF_BADP); + + s2 = 1/(GFACT*GFACT-2*cf[2]); + mu = cf[1]*s2; + resp[0] = 1.0; + if (mi[MDEG]>=1) + { resp[1] = mu; + resp[2] = s2+mu*mu; + if (mi[MDEG]==2) + { resp[3] = mu*(3*s2+mu*mu); + resp[4] = 3*s2*s2 + mu*mu*(6*s2+mu*mu); + } + } + f0 = S2PI * exp(cf[0]+mu*mu/(2*s2))*sqrt(s2); + for (i=0; i<=2*mi[MDEG]; i++) resp[i] *= f0; + return(LF_OK); +} + +INT onedint(cf,mi,l0,l1,resp) /* int W(u)u^j exp(..), j=0..2*deg */ +double *cf, l0, l1, *resp; +INT *mi; +{ double u, uj, y, ncf[4], rr[5]; + INT deg, i, j; + memset(rr, 0, sizeof(rr)); +if (debug) printf("onedint: %f %f %f %f %f\n",cf[0],cf[1],cf[2],l0,l1); + deg = mi[MDEG]; + + if (deg<=2) + { for (i=0; i<3; i++) ncf[i] = (i>deg) ? 0.0 : cf[i]; + ncf[2] /= 2; + + if (mi[MKER]==WEXPL) return(onedexpl(ncf,mi,resp)); + if (mi[MKER]==WGAUS) return(onedgaus(ncf,mi,resp)); + + if (l1>0) + recurint(MAX(l0,0.0),l1,ncf,resp,2*deg,mi[MKER]); + else for (i=0; i<=2*deg; i++) resp[i] = 0; + + if (l0<0) + { ncf[1] = -ncf[1]; + l0 = -l0; l1 = -l1; + recurint(MAX(l1,0.0),l0,ncf,rr,2*deg,mi[MKER]); + } + else for (i=0; i<=2*deg; i++) rr[i] = 0.0; + + for (i=0; i<=2*deg; i++) + resp[i] += (i%2==0) ? rr[i] : -rr[i]; + + return(LF_OK); + } + + /* For degree >= 3, we use Simpson's rule. */ + for (j=0; j<=2*deg; j++) resp[j] = 0.0; + for (i=0; i<=mi[MMINT]; i++) + { u = l0+(l1-l0)*i/mi[MMINT]; + y = cf[0]; uj = 1; + for (j=1; j<=deg; j++) + { uj *= u; + y += cf[j]*uj/fact[j]; + } + y = (4-2*(i%2==0)-(i==0)-(i==mi[MMINT])) * + W(fabs(u),mi[MKER])*exp(MIN(y,300.0)); + for (j=0; j<=2*deg; j++) + { resp[j] += y; + y *= u; + } + } + for (j=0; j<=2*deg; j++) resp[j] = resp[j]*(l1-l0)/(3*mi[MMINT]); + return(LF_OK); +} + diff --git a/src/locfit/density.c b/src/locfit/density.c new file mode 100644 index 0000000..45b4f60 --- /dev/null +++ b/src/locfit/density.c @@ -0,0 +1,509 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +extern int lf_status; +static double u[MXDIM], ilim[2*MXDIM], *ff; +static lfit *den_lf; +static design *den_des; +INT fact[] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800}; + +INT multint(), prodint(), gausint(), mlinint(); + +void prresp(coef,resp,p) +double *coef, *resp; +INT p; +{ INT i, j; + printf("Coefficients:\n"); + for (i=0; imi[MDIM]; p = lf->mi[MP]; + setzero(resp1,p*p); + m = 1; m1 = lf->mi[MMINT]+1; + for (i=0; imi[MMINT]; + } + for (i=0; i=0; j--) + { z1 = z%m1; + u[j] = t[j]+ilim[j]+dj[j]*z1; + w *= (4-2*(z1%2==0)-(z1==0)-(z1==lf->mi[MMINT])); + z /= m1; + } + wt = w*weight(lf,u,t,h,0,0.0); + if (wt>0) + { fitfun(lf,u,t,ff,NULL,0); + th = innerprod(ff,cf,p); + switch(lf->mi[MLINK]) + { case LLOG: + addouter(resp1,ff,ff,p,wt*lf_exp(th)); + break; + case LIDENT: + addouter(resp1,ff,ff,p,wt); + break; + default: + ERROR(("multint: Invalid link")); + return(LF_LNK); + } + } + } + wt = 1; + for (j=0; jmi[MDIM]; p = lf->mi[MP]; sca = lf->sca; + hd = 1; + for (i=0; imi[MLINK]==LIDENT) + { setzero(resp1,p*p); + resp1[0] = wint(d,NULL,0,lf->mi[MKER])*hd; + if (lf->mi[MDEG]==0) return(LF_OK); + jj[0] = 2; w0 = wint(d,jj,1,lf->mi[MKER])*hd*h*h; + for (i=0; imi[MDEG]==1) return(LF_OK); + for (i=0; i1) { jj[1] = 2; w0 = wint(d,jj,2,lf->mi[MKER])*hd*h*h*h*h; } + jj[0] = 4; w1 = wint(d,jj,1,lf->mi[MKER])*hd*h*h*h*h/4; + z = d+1; + for (i=0; imi[MDEG]) + { case 0: + resp1[0] = lf_exp(cf[0])*wint(d,NULL,0,lf->mi[MKER])*hd; + return(LF_OK); + case 1: + nb = 0.0; + for (i=1; i<=d; i++) + { v = h*cf[i]*sca[i-1]; + nb += v*v; + } + if (lf->mi[MKER]==WGAUS) + { w0 = 1/(GFACT*GFACT); + g[0] = lf_exp(cf[0]+w0*nb/2+d*log(S2PI/2.5)); + g[1] = g[3] = g[0]*w0; + g[2] = g[0]*w0*w0; + } + else + { wt = wu = lf_exp(cf[0]); + w0 = wint(d,NULL,0,lf->mi[MKER]); g[0] = wt*w0; + g[1] = g[2] = g[3] = 0.0; + j = 0; jmax = (d+2)*lf->mi[MMINT]; + while ((j1.0e-8)) + { j++; + jj[0] = 2*j; w0 = wint(d,jj,1,lf->mi[MKER]); + if (d==1) g[3] += wt * w0; + else + { jj[0] = 2; jj[1] = 2*j-2; w1 = wint(d,jj,2,lf->mi[MKER]); + g[3] += wt*w1; + g[2] += wu*(w0-w1); + } + wt /= (2*j-1.0); g[1] += wt*w0; + wt *= nb/(2*j); g[0] += wt*w0; + wu /= (2*j-1.0)*(2*j); + if (j>1) wu *= nb; + } + if (j==jmax) WARN(("mlinint: series not converged")); + } + g[0] *= hd; g[1] *= hd; + g[2] *= hd; g[3] *= hd; + resp1[0] = g[0]; + for (i=1; i<=d; i++) + { resp1[i] = resp1[(d+1)*i] = cf[i]*SQR(h*sca[i-1])*g[1]; + for (j=1; j<=d; j++) + { resp1[(d+1)*i+j] = (i==j) ? g[3]*SQR(h*sca[i-1]) : 0; + resp1[(d+1)*i+j] += g[2]*SQR(h*h*sca[i-1]*sca[j-1])*cf[i]*cf[j]; + } + } + return(LF_OK); + } + ERROR(("mlinint: deg=0,1 only")); + return(LF_ERR); +} + +void prodint_resp(resp,prod_wk,dim,deg,p) +double *resp, prod_wk[MXDIM][2*MXDEG+1]; +int dim, deg, p; +{ double prod; + int i, j, k, j1, k1; + + prod = 1.0; + for (i=0; imi[MDIM]; + deg = lf->mi[MDEG]; + p = lf->mi[MP]; + for (i=0; isca[i]; + for (j=0; jmi,ilim[i]/hs,ilim[i+dim]/hs,prod_wk[i]); + if (st==LF_BADP) return(st); + hj = 1; + for (j=0; j<=2*deg; j++) + { hj *= hs; + prod_wk[i][j] *= hj; + } + cf[0] = 0.0; /* so we only include it once, when d>=2 */ + } + +/* transfer to the resp array + */ + prodint_resp(resp,prod_wk,dim,deg,p); + +/* Symmetrize. +*/ + for (k=0; kp; + if ((den_lf->mi[MLINK]==LIDENT) && (coef[0] != 0.0)) return(NR_BREAK); + lf_status = (den_des->itype)(den_des->xev,A,den_des->xtwx.Q,den_lf,coef,den_des->h); + if (lf_error) lf_status = LF_ERR; + if (lf_status==LF_BADP) + { *lk0 = -1.0e300; + return(NR_REDUCE); + } + if (lf_status!=LF_OK) return(NR_BREAK); + if (den_lf->mi[MDEB]>2) prresp(coef,A,(INT)p); + + den_des->xtwx.p = p; + rstat = NR_OK; + switch(den_lf->mi[MLINK]) + { case LLOG: + r = den_des->ss[0]/A[0]; + coef[0] += log(r); + multmatscal(A,r,(INT)p*p); + A[0] = den_des->ss[0]; + lk = -A[0]; + if (fabs(coef[0]) > 700) + { lf_status = LF_OOB; + rstat = NR_REDUCE; + } + for (i=0; iss[i]; + f1[i] = den_des->ss[i]-A[i]; + } + break; + case LIDENT: + lk = 0.0; + for (i=0; iss[i]; + for (j=0; jres[i] -= A[i*p+j]*coef[j]; + } + break; + } + *lk0 = den_des->llk = lk; + + return(rstat); +} + +INT inre(x,bound,d) +double *x, *bound; +INT d; +{ INT i, z; + z = 1; + for (i=0; i=bound[i]) & (x[i]<=bound[i+d]); + return(z); +} + +INT setintlimits(lf, x, h, ang, lset) +lfit *lf; +INT *ang, *lset; +double *x, h; +{ INT d, i; + d = lf->mi[MDIM]; + *ang = *lset = 0; + for (i=0; isty[i]==STANGL) + { ilim[i+d] = ((h<2) ? 2*asin(h/2) : PI)*lf->sca[i]; + ilim[i] = -ilim[i+d]; + *ang = 1; + } + else + { ilim[i+d] = h*lf->sca[i]; + ilim[i] = -ilim[i+d]; + + if (lf->sty[i]==STLEFT) { ilim[i+d] = 0; *lset = 1; } + if (lf->sty[i]==STRIGH) { ilim[i] = 0; *lset = 1; } + + if (lf->xl[i]xl[i+d]) /* user limits for this variable */ + { if (lf->xl[i]-x[i]> ilim[i]) + { ilim[i] = lf->xl[i]-x[i]; *lset=1; } + if (lf->xl[i+d]-x[i]< ilim[i+d]) + { ilim[i+d] = lf->xl[i+d]-x[i]; *lset=1; } + } + } + if (ilim[i]==ilim[i+d]) return(LF_DEMP); /* empty integration */ + } + return(LF_OK); +} + +INT selectintmeth(mi,lset,ang) +INT *mi, lset, ang; +{ + if (mi[MIT]==IDEFA) /* select the default method */ + { if (mi[MTG]==THAZ) + { if (ang) return(IDEFA); + return( IHAZD ); + } + + if (mi[MUBAS]) return(IMULT); + + if (ang) return(IMULT); + + if (iscompact(mi[MKER])) + { if (mi[MKT]==KPROD) return(IPROD); + if (lset) + return( (mi[MDIM]==1) ? IPROD : IMULT ); + if (mi[MDEG]<=1) return(IMLIN); + if (mi[MDIM]==1) return(IPROD); + return(IMULT); + } + + if (mi[MKER]==WGAUS) + { if (lset) WARN(("Integration for Gaussian weights ignores limits")); + if ((mi[MDIM]==1)|(mi[MKT]==KPROD)) return(IPROD); + return(IMLIN); + } + + return(IDEFA); + } + + /* user provided an integration method, check it is valid */ + + if (mi[MTG]==THAZ) + { if (ang) return(INVLD); + if (!iscompact(mi[MKER])) return(INVLD); + return( ((mi[MKT]==KPROD) | (mi[MKT]==KSPH)) ? IHAZD : INVLD ); + } + + if ((ang) && (mi[MIT] != IMULT)) return(INVLD); + + switch(mi[MIT]) + { case IMULT: return( iscompact(mi[MKER]) ? IMULT : INVLD ); + case IPROD: return( ((mi[MDIM]==1) | (mi[MKT]==KPROD)) ? IPROD : INVLD ); + case IMLIN: return( ((mi[MKT]==KSPH) && (!lset) && + (mi[MDEG]<=1)) ? IMLIN : INVLD ); + } + + return(INVLD); +} + +INT densinit(lf,des,h,cf,m) +lfit *lf; +design *des; +double h, *cf; +INT m; +{ INT deg, p, i, ii, j, nnz, rnz, lset, ang, status; + double w; + + den_lf = lf; + den_des = des; + + p = des->p; deg = lf->mi[MDEG]; + ff = des->xtwx.wk; + cf[0] = NOSLN; + for (i=1; ixev,lf->xl,lf->mi[MDIM])) return(LF_XOOR); + + status = setintlimits(lf,des->xev,h,&ang,&lset); + if (status != LF_OK) return(status); + + switch(selectintmeth(lf->mi,lset,ang)) + { case IMULT: des->itype = multint; break; + case IPROD: des->itype = prodint; break; + case IMLIN: des->itype = mlinint; break; + case IHAZD: des->itype = hazint; break; + case INVLD: ERROR(("Invalid integration method %d",lf->mi[MIT])); + break; + case IDEFA: ERROR(("No integration type available for this model")); + break; + default: ERROR(("densinit: unknown integral type")); + } + + switch(deg) + { case 0: rnz = 1; break; + case 1: rnz = 1; break; + case 2: rnz = lf->mi[MDIM]+1; break; + case 3: rnz = lf->mi[MDIM]+2; break; + default: ERROR(("densinit: invalid degree %d",deg)); + } + if (lf_error) return(LF_ERR); + + setzero(des->ss,p); + nnz = 0; + for (i=0; iind[i]; + if (!cens(lf,ii)) + { w = des->w[i]*prwt(lf,ii); + for (j=0; jss[j] += d_xij(des,i,j)*w; + if (des->w[i]>0.00001) nnz++; + } } + + if (lf->mi[MTG]==THAZ) haz_init(lf,des,ilim); + + if (lf->mi[MDEB]>2) + { printf(" LHS: "); + for (i=0; iss[i]); + printf("\n"); + } + + switch(lf->mi[MLINK]) + { case LIDENT: + cf[0] = 0.0; + return(LF_OK); + case LLOG: + if (nnzcfn[i]) +#define d_x(des) ((des)->X) +#define d_xi(des,i) (&(des)->X[i*((des)->p)]) +#define d_xij(des,i,j) ((des)->X[i*((des)->p)+j]) +#define is_fixed(des,i) ((des)->fix[i]==1) + +extern int des_reqd(), des_reqi(); diff --git a/src/locfit/dist.c b/src/locfit/dist.c new file mode 100644 index 0000000..7eec06c --- /dev/null +++ b/src/locfit/dist.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +#define LOG_2 0.6931471805599453094172321214581765680755 +#define IBETA_LARGE 1.0e30 +#define IBETA_SMALL 1.0e-30 +#define IGAMMA_LARGE 1.0e30 +#define DOUBLE_EP 2.2204460492503131E-16 + +double dchisq(x, df) +double x, df; +{ return(exp(log(x/2)*(df/2-1) - x/2 - LGAMMA(df/2) - LOG_2)); +} + +double df(x, df1, df2) +double x, df1, df2; +{ double p; + p = exp(LGAMMA((df1+df2)/2) + df1/2*log(df1/df2) + (df1/2-1)*log(x) + - LGAMMA(df1/2) - LGAMMA(df2/2) - (df1+df2)/2*log(1+x*df1/df2)); + return(p); +} + +double ibeta(x, a, b) +double x, a, b; +{ int flipped = 0, i, k, count; + double I = 0, temp, pn[6], ak, bk, next, prev, factor, val; + if (x <= 0) return(0); + if (x >= 1) return(1); +/* use ibeta(x,a,b) = 1-ibeta(1-x,b,z) */ + if ((a+b+1)*x > (a+1)) + { flipped = 1; + temp = a; + a = b; + b = temp; + x = 1 - x; + } + pn[0] = 0.0; + pn[2] = pn[3] = pn[1] = 1.0; + count = 1; + val = x/(1.0-x); + bk = 1.0; + next = 1.0; + do + { count++; + k = count/2; + prev = next; + if (count%2 == 0) + ak = -((a+k-1.0)*(b-k)*val)/((a+2.0*k-2.0)*(a+2.0*k-1.0)); + else + ak = ((a+b+k-1.0)*k*val)/((a+2.0*k)*(a+2.0*k-1.0)); + pn[4] = bk*pn[2] + ak*pn[0]; + pn[5] = bk*pn[3] + ak*pn[1]; + next = pn[4] / pn[5]; + for (i=0; i<=3; i++) + pn[i] = pn[i+2]; + if (fabs(pn[4]) >= IBETA_LARGE) + for (i=0; i<=3; i++) + pn[i] /= IBETA_LARGE; + if (fabs(pn[4]) <= IBETA_SMALL) + for (i=0; i<=3; i++) + pn[i] /= IBETA_SMALL; + } while (fabs(next-prev) > DOUBLE_EP*prev); + factor = a*log(x) + (b-1)*log(1-x); + factor -= LGAMMA(a+1) + LGAMMA(b) - LGAMMA(a+b); + I = exp(factor) * next; + return(flipped ? 1-I : I); +} + +/* + * Incomplete gamma function. + * int_0^x u^{df-1} e^{-u} du / Gamma(df). + */ +double igamma(x, df) +double x, df; +{ double factor, term, gintegral, pn[6], rn, ak, bk; + int i, count, k; + if (x <= 0.0) return(0.0); + + if (df < 1.0) + return( exp(df*log(x)-x-LGAMMA(df+1.0)) + igamma(x,df+1.0) ); + +/* + * this is unstable for large df + */ + factor = exp(df*log(x) - x - LGAMMA(df)); + + if (x > 1.0 && x >= df) + { + pn[0] = 0.0; + pn[2] = pn[1] = 1.0; + pn[3] = x; + count = 1; + rn = 1.0 / x; + do + { count++; + k = count / 2; + gintegral = rn; + if (count%2 == 0) + { bk = 1.0; + ak = (double)k - df; + } else + { bk = x; + ak = (double)k; + } + pn[4] = bk*pn[2] + ak*pn[0]; + pn[5] = bk*pn[3] + ak*pn[1]; + rn = pn[4] / pn[5]; + for (i=0; i<4; i++) + pn[i] = pn[i+2]; + if (pn[4] > IGAMMA_LARGE) + for (i=0; i<4; i++) + pn[i] /= IGAMMA_LARGE; + } while (fabs(gintegral-rn) > DOUBLE_EP*rn); + gintegral = 1.0 - factor*rn; + } + else + { /* For x DOUBLE_EP*gintegral); + gintegral *= factor/df; + } + return(gintegral); +} + +double pf(q, df1, df2) +double q, df1, df2; +{ return(ibeta(q*df1/(df2+q*df1), df1/2, df2/2)); +} + +double pchisq(q, df) +double q, df; +{ return(igamma(q/2, df/2)); +} + +#ifdef RVERSION +extern double Rf_pnorm5(); +double pnorm(x,mu,s) +double x, mu, s; +{ return(Rf_pnorm5(x, mu, s, 1L, 0L)); +} +#else +double pnorm(x,mu,s) +double x, mu, s; +{ if(x == mu) + return(0.5); + x = (x-mu)/s; + if(x > 0) return((1 + erf(x/SQRT2))/2); + return(erfc(-x/SQRT2)/2); +} +#endif diff --git a/src/locfit/ev_atree.c b/src/locfit/ev_atree.c new file mode 100644 index 0000000..b659738 --- /dev/null +++ b/src/locfit/ev_atree.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * This file contains functions for constructing and + * interpolating the adaptive tree structure. This is + * the default evaluation structure used by Locfit. + */ + +#include "local.h" + +/* + Guess the number of fitting points. + Needs improving! +*/ +void atree_guessnv(nvm,ncm,vc,dp,mi) +double *dp; +INT *nvm, *ncm, *vc, *mi; +{ double a0, cu, ifl; + int i, nv, nc; + + *ncm = 1<<30; *nvm = 1<<30; + *vc = 1 << mi[MDIM]; + + if (dp[DALP]>0) + { a0 = (dp[DALP] > 1) ? 1 : 1/dp[DALP]; + if (dp[DCUT]<0.01) + { WARN(("guessnv: cut too small.")); + dp[DCUT] = 0.01; + } + cu = 1; + for (i=0; imi[MDIM]; vc = 1<h[ce[i]]; + if ((h>0) && ((hmin==0)|(hsca[i]; + if ((lf->sty[i]==STCPAR) || (hmin==0)) + score[i] = 2*(ur[i]-ll[i])/(lf->fl[i+d]-lf->fl[i]); + else + score[i] = le[i]/hmin; + if (score[i]>score[is]) is = i; + } + if (lf->dp[DCUT]mi[MDIM]; vc = 1<sty[i]!=STCPAR) && + (le[ns] < (lf->dp[DCUT]*MIN(lf->h[i0],lf->h[i1]))); + nce[i] = newsplit(des,lf,i0,i1,pv); + if (lf_error) return; + } + } + z = ur[ns]; ur[ns] = (z+ll[ns])/2; + atree_grow(des,lf,nce,ct,term,ll,ur); + if (lf_error) return; + ur[ns] = z; + for (i=0; imi[MDIM]; + atree_guessnv(&nvm,&ncm,&vc,lf->dp,lf->mi); + trchck(lf,nvm,ncm,d,des->p,vc); + + /* Set the lower left, upper right limits. */ + for (j=0; jfl[j]; + ur[j] = lf->fl[j+d]; + } + + /* Set the initial cell; fit at the vertices. */ + for (i=0; i>= 1; + } + lf->ce[i] = i; + des->vfun(des,lf,i); + if (lf_error) return; + lf->s[i] = 0; + } + lf->nv = vc; + + /* build the tree */ + atree_grow(des,lf,lf->ce,NULL,NULL,ll,ur); + lf->nce = 1; +} + +double atree_int(tr,x,what) +lfit *tr; +double *x; +INT what; +{ double vv[64][64], *ll, *ur, h, xx[MXDIM]; + INT d, i, lo, tk, ns, nv, nc, vc, ce[64]; + d = tr->mi[MDIM]; + vc = 1<mi[MDIM]; + for (i=0; ice[i]; + } + ns = 0; + while(ns!=-1) + { ll = evpt(tr,ce[0]); ur = evpt(tr,ce[vc-1]); + ns = atree_split(tr,ce,xx,ll,ur); + if (ns!=-1) + { tk = 1<s[nv]) exvvalpv(vv[i+tk],vv[i],vv[i+tk],d,ns,h,nc); + else exvval(tr,vv[i+tk],nv,d,what,1); + } + else + { ce[i] = nv; + if (tr->s[nv]) exvvalpv(vv[i],vv[i],vv[i+tk],d,ns,h,nc); + else exvval(tr,vv[i],nv,d,what,1); + } } + } } + ll = evpt(tr,ce[0]); ur = evpt(tr,ce[vc-1]); + return(rectcell_interp(x,vdptr(tr->xxev),vv,ll,ur,d,nc)); +} diff --git a/src/locfit/ev_interp.c b/src/locfit/ev_interp.c new file mode 100644 index 0000000..badc23e --- /dev/null +++ b/src/locfit/ev_interp.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +double linear_interp(h,d,f0,f1) +double h, d, f0, f1; +{ if (d==0) return(f0); + return( ( (d-h)*f0 + h*f1 ) / d ); +} + +void hermite2(x,z,phi) +double x, z, *phi; +{ double h; + if (z==0) + { phi[0] = 1.0; phi[1] = phi[2] = phi[3] = 0.0; + return; + } + h = x/z; + if (h<0) + { phi[0] = 1; phi[1] = 0; + phi[2] = h; phi[3] = 0; + return; + } + if (h>1) + { phi[0] = 0; phi[1] = 1; + phi[2] = 0; phi[3] = h-1; + return; + } + phi[1] = h*h*(3-2*h); + phi[0] = 1-phi[1]; + phi[2] = h*(1-h)*(1-h); + phi[3] = h*h*(h - 1); +} + +double cubic_interp(h,f0,f1,d0,d1) +double h, f0, f1, d0, d1; +{ double phi[4]; + hermite2(h,1.0,phi); + return(phi[0]*f0+phi[1]*f1+phi[2]*d0+phi[3]*d1); +} + +double cubintd(h,f0,f1,d0,d1) +double h, f0, f1, d0, d1; +{ double phi[4]; + phi[1] = 6*h*(1-h); + phi[0] = -phi[1]; + phi[2] = (1-h)*(1-3*h); + phi[3] = h*(3*h-2); + return(phi[0]*f0+phi[1]*f1+phi[2]*d0+phi[3]*d1); +} + +/* + interpolate over a rectangular cell. + x = interpolation point. + xev = evaluation points (do I need this?) + vv = array of vertex values. + ll = lower left corner. + ur = upper right corner. + d = dimension. + nc = no of coefficients. +*/ +double rectcell_interp(x,xev,vv,ll,ur,d,nc) +double *x, *xev, vv[64][64], *ll, *ur; +INT d, nc; +{ double phi[4]; + INT i, j, k, tk; + + tk = 1<=0; i--) + { tk = 1<=0; i--) + { hermite2(x[i]-ll[i],ur[i]-ll[i],phi); + tk = 1<=0; i--) + { hermite2(x[i]-ll[i],ur[i]-ll[i],phi); + tk = 1<coef; + break; + case PVARI: + case PNLX: + values = lf->nlx; + break; + case PT0: + values = lf->t0; + break; + case PBAND: + vv[0] = lf->h[nv]; + return(1); + case PDEGR: + vv[0] = lf->deg[nv]; + return(1); + case PLIK: + vv[0] = lf->lik[nv]; + return(1); + case PRDF: + vv[0] = lf->lik[2*lf->nvm+nv]; + return(1); + default: + ERROR(("Invalid what in exvval")); + return(0); + } + vv[0] = values[nv]; + if ((lf->mi[MDEG]==0) && (lf->mi[MDC]==0))return(1); + if (z) + { for (i=0; invm+nv]; + return(1<nvm+nv]; + return(d+1); + } +} + +void exvvalpv(vv,vl,vr,d,k,dl,nc) +double *vv, *vl, *vr, dl; +INT d, k, nc; +{ INT i, tk, td; + double f0, f1; + if (nc==1) + { vv[0] = (vl[0]+vr[0])/2; + return; + } + tk = 1<mi[MDIM]; + ll = evpt(tr,0); ur = evpt(tr,tr->nv-1); + z0 = 0; vc = 1<=0; j--) + { v[j] = (INT)((tr->mg[j]-1)*(x[j]-ll[j])/(ur[j]-ll[j])); + if (v[j]<0) v[j]=0; + if (v[j]>=tr->mg[j]-1) v[j] = tr->mg[j]-2; + z0 = z0*tr->mg[j]+v[j]; + } + nce[0] = z0; nce[1] = z0+1; sk = jj = 1; + memset(nce, 0, sizeof(nce)); + for (i=1; img[i-1]; + jj<<=1; + for (j=0; jxxev),vv,ll,ur,d,nc)); +} + +double fitpint(lf,x,what,i) +lfit *lf; +double *x; +INT what, i; +{ double vv[1+MXDIM]; + exvval(lf,vv,i,lf->mi[MDIM],what,0); + return(vv[0]); +} + +double dointpointpf(lf,des,x,what) +lfit *lf; +design *des; +double *x; +INT what; +{ locfit(lf,des,0.0,0); + if (what==PCOEF) return(des->cf[0]); + if ((what==PNLX)|(what==PT0)) return(sqrt(comp_infl(lf,des))); + ERROR(("dointpointpf: invalid what")); + return(0.0); +} + +double xbarint(lf,x,what) +lfit *lf; +double *x; +INT what; +{ INT i, nc; + double vv[1+MXDIM], f; + nc = exvval(lf,vv,0,lf->mi[MDIM],what,0); + f = vv[0]; + if (nc>1) + for (i=0; imi[MDIM]; i++) + f += vv[i+1]*(x[i]-evptx(lf,0,i)); + return(f); +} + +double dointpoint(lf,des,x,what,ev,j) +lfit *lf; +design *des; +double *x; +INT what, ev, j; +{ double xf, f; + INT i; + for (i=0; imi[MDIM]; i++) if (lf->sty[i]==STANGL) + { xf = floor(x[i]/(2*PI*lf->sca[i])); + x[i] -= xf*2*PI*lf->sca[i]; + } + f = 0; + if (ident==1) return(dointpointpf(lf,des,x,what)); + switch(ev) + { case EGRID: f = gridint(lf,x,what); break; + case EKDTR: f = kdtre_int(lf,x,what); break; + case ETREE: f = atree_int(lf,x,what); break; + case EPHULL: f = triang_int(lf,x,what); break; + case EFITP: f = fitpint(lf,x,what,j); break; + case EXBAR: f = xbarint(lf,x,what); break; + case ENONE: f = 0; break; + default: ERROR(("dointpoint: cannot interpolate this structure")); + } + if (((what==PT0)|(what==PNLX)) && (f<0)) f = 0.0; + f += addparcomp(lf,x,what); + return(f); +} diff --git a/src/locfit/ev_kdtre.c b/src/locfit/ev_kdtre.c new file mode 100644 index 0000000..5dbada6 --- /dev/null +++ b/src/locfit/ev_kdtre.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * Routines for building and interpolating the kd tree. + * Initially, this started from the loess code. + * + * Todo: EKDCE isn't working. + */ + +#include "local.h" + +void newcell(); +static INT nterm; + +void kdtre_guessnv(nvm,ncm,vc,dp,mi) +double *dp; +INT *nvm, *ncm, *vc, *mi; +{ int k; + if (mi[MEV] == EKDTR) + { nterm = (INT) (dp[DCUT]/4 * mi[MN] * MIN(dp[DALP],1.0) ); + k = 2*mi[MN]/nterm; + *vc = 1<=l) && (x[pi[il]]>= t)) il--; + if (ir t + */ + jl = ir; jr = r; + while (ir=jl) && (x[pi[jr]] > t)) jr--; + if (ir=m)) return(jr); + + /* + update l or r. + */ + if (m>=ir) l = ir; + if (m<=il) r = il; + } + if (l==r) return(l); + ERROR(("ksmall failure")); + return(0); +} + +INT terminal(tr,p,pi,fc,d,m,split_val) +lfit *tr; +INT p, *pi, d, fc, *m; +double *split_val; +{ INT i, k, lo, hi, split_var; + double max, min, score, max_score, t; + + /* + if there are fewer than fc points in the cell, this cell + is terminal. + */ + lo = tr->lo[p]; hi = tr->hi[p]; + if (hi-lo < fc) return(-1); + + /* determine the split variable */ + max_score = 0.0; split_var = 0; + for (k=0; kmax) max = t; + } + score = (max-min) / tr->sca[k]; + if (score > max_score) + { max_score = score; + split_var = k; + } + } + if (max_score==0) /* all points in the cell are equal */ + return(-1); + + *m = ksmall(lo,hi,(lo+hi)/2, dvari(tr,split_var), pi); + *split_val = datum(tr, split_var, pi[*m]); + + if (*m==hi) /* all observations go lo */ + return(-1); + return(split_var); +} + +void kdtre_start(des,tr) +design *des; +lfit *tr; +{ INT i, j, vc, d, nc, nv, ncm, nvm, k, m, n, p, *pi; + double sv; + d = tr->mi[MDIM]; n = tr->mi[MN]; pi = des->ind; + kdtre_guessnv(&nvm,&ncm,&vc,tr->dp,tr->mi); + trchck(tr,nvm,ncm,d,des->p,vc); + + nv = 0; + if (tr->mi[MEV] != EKDCE) + { for (i=0; ifl[d*(j%2)+k]; + j >>= 1; + } + } + nv = vc; + for (j=0; jce[j] = j; + } + + for (i=0; ilo[p] = 0; tr->hi[p] = n-1; + tr->s[p] = -1; + while (p=0) + { + if ((ncmnce = nc; tr->nv = nv; + return; + } + + /* new lo cell has obsn's tr->lo[p]..m */ + tr->lo[nc] = tr->lo[p]; + tr->hi[nc] = m; + tr->s[nc] = -1; + + /* new hi cell has obsn's m+1..tr->hi[p] */ + tr->lo[nc+1] = m+1; + tr->hi[nc+1] = tr->hi[p]; + tr->s[nc+1] = -1; + + /* cell p is split on variable k, value sv */ + tr->s[p] = k; + tr->sv[p] = sv; + tr->lo[p] = nc; tr->hi[p] = nc+1; + + nc=nc+2; i = nv; + + /* now compute the new vertices. */ + if (tr->mi[MEV] != EKDCE) + newcell(&nv,vc,vdptr(tr->xxev), d, k, sv, + &tr->ce[p*vc], &tr->ce[(nc-2)*vc], &tr->ce[(nc-1)*vc]); + + } + else if (tr->mi[MEV]==EKDCE) /* new vertex at cell center */ + { sv = 0; + for (i=0; ilo[p]; j<=tr->hi[p]; j++) + { sv += prwt(tr,pi[j]); + for (i=0; imi[MN] = tr->hi[p]-tr->lo[p]+1; + des->ind = &pi[tr->lo[p]]; + des->vfun(des,tr,nv); + tr->mi[MN] = n; des->ind = pi; + nv++; + } + p++; + } + + /* We've built the tree. Now do the fitting. */ + if (tr->mi[MEV]==EKDTR) + for (i=0; ivfun(des,tr,i); + + tr->nce = nc; tr->nv = nv; + return; +} + +void newcell(nv,vc,xev, d, k, split_val, cpar, clef, crig) +double *xev, split_val; +INT *nv, vc, d, k, *cpar, *clef, *crig; +{ INT i, ii, j, j2, tk, match; + tk = 1<ce; + for (k=0; k<4; k++) /* North South East West */ + { k1 = (k>1); + v0 = ll[k1]; v1 = ur[k1]; + j0 = ce[j+2*(k==0)+(k==2)]; + j1 = ce[j+3-2*(k==1)-(k==3)]; + xibar = (k%2==0) ? ur[k<2] : ll[k<2]; + m = nt; + while ((m>=0) && ((lf->s[t[m]] != (k<=1)) | (lf->sv[t[m]] != xibar))) m--; + if (m >= 0) + { m = (k%2==1) ? lf->lo[t[m]] : lf->hi[t[m]]; + while (lf->s[m] != -1) + m = (x[lf->s[m]] < lf->sv[m]) ? lf->lo[m] : lf->hi[m]; + if (v0 < evptx(lf,ce[4*m+2*(k==1)+(k==3)],k1)) + { j0 = ce[4*m+2*(k==1)+(k==3)]; + v0 = evptx(lf,j0,k1); + } + if (evptx(lf,ce[4*m+3-2*(k==0)-(k==2)],k1) < v1) + { j1 = ce[4*m+3-2*(k==0)-(k==2)]; + v1 = evptx(lf,j1,k1); + } + } + nc = exvval(lf,g0,j0,2,what,0); + nc = exvval(lf,g1,j1,2,what,0); + if (nc==1) + gg[k] = linear_interp((x[(k>1)]-v0),v1-v0,g0[0],g1[0]); + else + { hermite2(x[(k>1)]-v0,v1-v0,phi); + gg[k] = phi[0]*g0[0]+phi[1]*g1[0]+(phi[2]*g0[1+k1]+phi[3]*g1[1+k1])*(v1-v0); + gp[k] = phi[0]*g0[2-k1] + phi[1]*g1[2-k1]; + } + } + s = -s; + if (nc==1) + for (k=0; k<2; k++) + s += linear_interp(x[k]-ll[k],ur[k]-ll[k],gg[3-2*k],gg[2-2*k]); + else + for (k=0; k<2; k++) /* EW NS */ + { hermite2(x[k]-ll[k],ur[k]-ll[k],phi); + s += phi[0]*gg[3-2*k] + phi[1]*gg[2-2*k] + +(phi[2]*gp[3-2*k] + phi[3]*gp[2-2*k]) * (ur[k]-ll[k]); + } + return(s); +} + +double kdtre_int(lf,x,what) +lfit *lf; +double *x; +INT what; +{ INT d, vc, k, t[20], nt, nc, *ce, j; + double *ll, *ur, ff, vv[64][64]; + d = lf->mi[MDIM]; + vc = lf->vc; + if (d > 6) ERROR(("d too large in kdint")); + + /* descend the tree to find the terminal cell */ + nt = 0; t[nt] = 0; k = 0; + while (lf->s[k] != -1) + { nt++; + if (nt>=20) { ERROR(("Too many levels in kdint")); return(NOSLN); } + k = t[nt] = (x[lf->s[k]] < lf->sv[k]) ? lf->lo[k] : lf->hi[k]; + } + + ce = &lf->ce[k*vc]; + ll = evpt(lf,ce[0]); + ur = evpt(lf,ce[vc-1]); + nc = 0; + for (j=0; jxxev = checkvarlen(tr->xxev,d*nvm,"_lfxev",VDOUBLE); + + rw = nvm*(3*d+8)+ncm; + tr->tw = checkvarlen(tr->tw,lfit_reqd(d,nvm,ncm),"_lfwork",VDOUBLE); + z = (double *)vdptr(tr->tw); + tr->coef= z; z += nvm*(d+1); + tr->nlx = z; z += nvm*(d+1); + tr->t0 = z; z += nvm*(d+1); + tr->lik = z; z += 3*nvm; + tr->h = z; z += nvm; + tr->deg = z; z += nvm; + tr->sv = z; z += ncm; + if (z != (double *)vdptr(tr->tw)+rw) + WARN(("trchck: double assign problem")); + + rw = lfit_reqi(nvm,ncm,vc); + tr->iw = checkvarlen(tr->iw,rw,"_lfiwork",VINT); + k = (INT *)vdptr(tr->iw); + tr->ce = k; k += vc*ncm; + tr->s = k; k += MAX(ncm,nvm); + tr->lo = k; k += MAX(ncm,nvm); + tr->hi = k; k += MAX(ncm,nvm); + if (k != (INT *)vdptr(tr->iw)+rw) + WARN(("trchck: int assign problem")); + + tr->nvm = nvm; tr->ncm = ncm; tr->mi[MDIM] = d; tr->mi[MP] = p; tr->vc = vc; +} + +#ifdef CVERSION +void reassign(lf) +lfit *lf; +{ INT i, nvm, ncm, vc, d, k, p, *iw; + double *tw, *ntw; + setvarname(lf->tw,"__lfwork"); /* prevent overwrite */ + setvarname(lf->iw,"__lfiwork"); + nvm = lf->nvm; ncm = lf->ncm; vc = lf->vc; + tw = (double *)vdptr(lf->tw); + iw = (INT *)vdptr(lf->iw); + d = lf->mi[MDIM]; + p = lf->mi[MP]; + trchck(lf,2*nvm,ncm,d,p,vc); + ntw = vdptr(lf->tw); +/* + xev is stored in blocks of d. other matrices by blocks on nvm +*/ + k = nvm*d; + memcpy(vdptr(lf->xxev),tw,k*sizeof(double)); + tw += k; ntw += 2*k; + for (i=0; i<2*p+2*d+6; i++) + { memcpy(ntw,tw,nvm*sizeof(double)); + tw += nvm; ntw += 2*nvm; + } + k = ncm; memcpy(lf->sv,tw,k*sizeof(double)); tw += k; + + k = vc*ncm; memcpy(lf->ce,iw,k*sizeof(INT)); iw += k; + k = MAX(ncm,nvm); memcpy(lf->s,iw,k*sizeof(INT)); iw += k; + k = MAX(ncm,nvm); memcpy(lf->lo,iw,k*sizeof(INT)); iw += k; + k = MAX(ncm,nvm); memcpy(lf->hi,iw,k*sizeof(INT)); iw += k; + deletename("__lfwork"); + deletename("__lfiwork"); +} +#endif + +void dataf(des,lf) +design *des; +lfit *lf; +{ INT d, i, j, ncm, nv, vc; + + d = lf->mi[MDIM]; + guessnv(&nv,&ncm,&vc,lf->dp,lf->mi); + trchck(lf,nv,0,d,des->p,0); + + for (i=0; ivfun(des,lf,i); + lf->s[i] = 0; + } + lf->nv = lf->nvm = nv; lf->nce = 0; +} + +void xbarf(des,lf) +design *des; +lfit *lf; +{ int i, d, nvm, ncm, vc; + d = lf->mi[MDIM]; + guessnv(&nvm,&ncm,&vc,lf->dp,lf->mi); + trchck(lf,1,0,d,des->p,0); + for (i=0; ipc.xbar[i]; + des->vfun(des,lf,0); + lf->s[0] = 0; + lf->nv = 1; lf->nce = 0; +} + +#ifndef GR +void preset(des,lf) +design *des; +lfit *lf; +{ INT i, nv; + double *tmp; + nv = lf->nvm; + tmp = vdptr(lf->xxev); + trchck(lf,nv,0,lf->mi[MDIM],des->p,0); + lf->xxev->dpr = tmp; + for (i=0; ivfun(des,lf,i); + lf->s[i] = 0; + } + lf->nv = nv; lf->nce = 0; +} +#endif + +void crossf(des,lf) +design *des; +lfit *lf; +{ INT d, i, j, n, nv, ncm, vc; + + n = lf->mi[MN]; d = lf->mi[MDIM]; + guessnv(&nv,&ncm,&vc,lf->dp,lf->mi); + trchck(lf,n,0,d,des->p,0); + + for (i=0; is[cvi] = 0; + des->vfun(des,lf,cvi); + } + cvi = -1; + lf->nv = n; lf->nce = 0; lf->mi[MN] = n; +} + +void gridf(des,tr) +design *des; +lfit *tr; +{ INT d, i, j, nv, u0, u1, z; + nv = 1; d = tr->mi[MDIM]; + for (i=0; img[i]==0) + tr->mg[i] = 2+(INT)((tr->fl[i+d]-tr->fl[i])/(tr->sca[i]*tr->dp[DCUT])); + nv *= tr->mg[i]; + } + trchck(tr,nv,0,d,des->p,1<mg[j]; + u1 = tr->mg[j]-1-u0; + evptx(tr,i,j) = (tr->mg[j]==1) ? tr->fl[j] : + (u1*tr->fl[j]+u0*tr->fl[j+d])/(tr->mg[j]-1); + z = z/tr->mg[j]; + } + tr->s[i] = 0; + des->vfun(des,tr,i); + } + tr->nv = nv; tr->nce = 0; +} + +/* + add a new vertex at the midpoint of (x[i0],x[i1]). + return the vertex number. +*/ +INT newsplit(des,lf,i0,i1,pv) +design *des; +lfit *lf; +INT i0, i1, pv; +{ INT i, nv; + + /* first, check to see if the new point already exists */ + if (i0>i1) ISWAP(i0,i1); + nv = lf->nv; + for (i=i1+1; ilo[i]==i0) && (lf->hi[i]==i1)) return(i); + + /* the point is new. Now check we have space for the new point. */ + if (nv==lf->nvm) + { +#ifdef CVERSION + reassign(lf); +#else + ERROR(("newsplit: out of vertex space")); + return(-1); +#endif + } + + /* compute the new point, and evaluate the fit */ + lf->lo[nv] = i0; + lf->hi[nv] = i1; + for (i=0; imi[MDIM]; i++) + evptx(lf,nv,i) = (evptx(lf,i0,i)+evptx(lf,i1,i))/2; + if (pv) /* pseudo vertex */ + { lf->h[nv] = (lf->h[i0]+lf->h[i1])/2; + lf->s[nv] = 1; /* pseudo-vertex */ + } + else /* real vertex */ + { des->vfun(des,lf,nv); + lf->s[nv] = 0; + } + lf->nv++; + + return(nv); +} diff --git a/src/locfit/ev_trian.c b/src/locfit/ev_trian.c new file mode 100644 index 0000000..c53369b --- /dev/null +++ b/src/locfit/ev_trian.c @@ -0,0 +1,457 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +INT triang_split(tr,ce,le) +lfit *tr; +double *le; +INT *ce; +{ INT d, i, j, k, nts, vc; + double di, dfx[MXDIM]; + nts = 0; d = tr->mi[MDIM]; vc = d+1; + for (i=0; isca,d,KSPH,NULL); + le[i*vc+j] = le[j*vc+i] = di/MIN(tr->h[ce[i]],tr->h[ce[j]]); + nts = nts || le[i*vc+j]>tr->dp[DCUT]; + } + return(nts); +} + +void resort(pv,xev,dig) +double *xev; +INT *pv, *dig; +{ double d0, d1, d2; + INT i; + d0 = d1 = d2 = 0; + for (i=0; i<3; i++) + { d0 += (xev[3*pv[11]+i]-xev[3*pv[1]+i])*(xev[3*pv[11]+i]-xev[3*pv[1]+i]); + d1 += (xev[3*pv[ 7]+i]-xev[3*pv[2]+i])*(xev[3*pv[ 7]+i]-xev[3*pv[2]+i]); + d2 += (xev[3*pv[ 6]+i]-xev[3*pv[3]+i])*(xev[3*pv[ 6]+i]-xev[3*pv[3]+i]); + } + if ((d0<=d1) & (d0<=d2)) + { dig[0] = pv[1]; dig[1] = pv[11]; + dig[2] = pv[2]; dig[3] = pv[7]; + dig[4] = pv[3]; dig[5] = pv[6]; + } + else if (d1<=d2) + { dig[0] = pv[2]; dig[1] = pv[7]; + dig[2] = pv[1]; dig[3] = pv[11]; + dig[4] = pv[3]; dig[5] = pv[6]; + } + else + { dig[0] = pv[3]; dig[1] = pv[6]; + dig[2] = pv[2]; dig[3] = pv[7]; + dig[4] = pv[1]; dig[5] = pv[11]; + } +} + +void triang_grow(des,tr,ce,ct,term) +design *des; +lfit *tr; +INT *ce, *ct, *term; +{ double le[(1+MXDIM)*(1+MXDIM)], ml; + INT pv[(1+MXDIM)*(1+MXDIM)], nce[1+MXDIM], d, i, j, im, jm, vc, dig[6]; + if (lf_error) return; + d = tr->mi[MDIM]; vc = d+1; + if (!triang_split(tr,ce,le)) + { if (ct != NULL) + { for (i=0; i3) + { ml = 0; + for (i=0; iml) { ml = le[i*vc+j]; im = i; jm = j; } + pv[0] = newsplit(des,tr,ce[im],ce[jm],0); + for (i=0; idp[DCUT]); + for (i=0; i<=d; i++) /* corners */ + { for (j=0; j<=d; j++) nce[j] = (j==i) ? ce[i] : pv[i*vc+j]; + triang_grow(des,tr,nce,ct,term); + } + + if (d==2) /* center for d=2 */ + { nce[0] = pv[5]; nce[1] = pv[2]; nce[2] = pv[1]; + triang_grow(des,tr,nce,ct,term); + } + if (d==3) /* center for d=3 */ + { resort(pv,vdptr(tr->xxev),dig); + nce[0] = dig[0]; nce[1] = dig[1]; + nce[2] = dig[2]; nce[3] = dig[4]; triang_grow(des,tr,nce,ct,term); + nce[2] = dig[5]; nce[3] = dig[3]; triang_grow(des,tr,nce,ct,term); + nce[2] = dig[2]; nce[3] = dig[5]; triang_grow(des,tr,nce,ct,term); + nce[2] = dig[4]; nce[3] = dig[3]; triang_grow(des,tr,nce,ct,term); + } + if (d==1) return; +} + +void triang_descend(tr,xa,ce) +lfit *tr; +double *xa; +INT *ce; +{ double le[(1+MXDIM)*(1+MXDIM)], ml; + INT d, vc, i, j, pv[(1+MXDIM)*(1+MXDIM)], im, jm; + design *des; + des = NULL; + if (!triang_split(tr,ce,le)) return; + d = tr->mi[MDIM]; vc = d+1; + + if (d>3) /* split longest edge */ + { ml = 0; + for (i=0; iml) { ml = le[i*vc+j]; im = i; jm = j; } + pv[0] = newsplit(des,tr,ce[im],ce[jm],0); + if (xa[im]>xa[jm]) + { xa[im] -= xa[jm]; xa[jm] *= 2; ce[jm] = pv[0]; } + else + { xa[jm] -= xa[im]; xa[im] *= 2; ce[im] = pv[0]; } + triang_descend(tr,xa,ce); + return; + } + + for (i=0; idp[DCUT]); + for (i=0; i<=d; i++) if (xa[i]>=0.5) /* in corner */ + { for (j=0; j<=d; j++) + { if (i!=j) ce[j] = pv[i*vc+j]; + xa[j] = 2*xa[j]; + } + xa[i] -= 1; + triang_descend(tr,xa,ce); + return; + } + if (d==1) { ERROR(("weights sum to < 1")); } + if (d==2) /* center */ + { ce[0] = pv[5]; xa[0] = 1-2*xa[0]; + ce[1] = pv[2]; xa[1] = 1-2*xa[1]; + ce[2] = pv[1]; xa[2] = 1-2*xa[2]; + triang_descend(tr,xa,ce); + } + if (d==3) /* center */ + { double z; INT dig[6]; + resort(pv,vdptr(tr->xxev),dig); + ce[0] = dig[0]; ce[1] = dig[1]; + xa[0] *= 2; xa[1] *= 2; xa[2] *= 2; xa[3] *= 2; + if (xa[0]+xa[2]>=1) + { if (xa[0]+xa[3]>=1) + { ce[2] = dig[2]; ce[3] = dig[4]; + z = xa[0]; + xa[3] += z-1; xa[2] += z-1; xa[0] = xa[1]; xa[1] = 1-z; + } + else + { ce[2] = dig[2]; ce[3] = dig[5]; + z = xa[3]; xa[3] = xa[1]+xa[2]-1; xa[1] = z; + z = xa[2]; xa[2] += xa[0]-1; xa[0] = 1-z; + } } + else + { if (xa[1]+xa[2]>=1) + { ce[2] = dig[5]; ce[3] = dig[3]; + xa[1] = 1-xa[1]; xa[2] -= xa[1]; xa[3] -= xa[1]; + } + else + { ce[2] = dig[4]; ce[3] = dig[3]; + z = xa[3]; xa[3] += xa[1]-1; xa[1] = xa[2]; + xa[2] = z+xa[0]-1; xa[0] = 1-z; + } } + triang_descend(tr,xa,ce); +} } + +void covrofdata(lf,V,mn) /* covar of data; mean in mn */ +lfit *lf; +double *V, *mn; +{ INT d, i, j, k; + double s; + s = 0; d = lf->mi[MDIM]; + for (i=0; imi[MN]; i++) + { s += prwt(lf,i); + for (j=0; j1+eps)) return(0); + return(1); +} + +void triang_start(des,tr) /* Triangulation with polyhedral start */ +design *des; +lfit *tr; +{ INT i, j, k, n, d, nc, nvm, ncm, vc, *ce, ed[1+MXDIM]; + double V[MXDIM*MXDIM], P[MXDIM*MXDIM], sigma, z[MXDIM], xa[1+MXDIM], *xev; + xev = vdptr(tr->xxev); + d = tr->mi[MDIM]; n = tr->mi[MN]; tr->nv = nc = 0; + + guessnv(&nvm,&ncm,&vc,tr->dp,tr->mi); + trchck(tr,nvm,ncm,d,des->p,vc); + + ce = tr->ce; + for (j=0; jpc.xbar[j]; + tr->nv = 1; + covrofdata(tr,V,tr->pc.xbar); /* fix this with scaling */ + eig_dec(V,P,d); + + for (i=0; inv*d+j] = xev[j]-2*sigma*P[j*d+i]; + tr->nv++; + for (j=0; jnv*d+j] = xev[j]+2*sigma*P[j*d+i]; + tr->nv++; + } + + for (i=0; i0); + for (k=0; k>=1; + } + } + + for (i=0; inv; i++) + { des->vfun(des,tr,i); + if (lf_error) return; + tr->s[i] = 0; + } + for (i=0; ice[i*vc+j]; + triang_grow(des,tr,&tr->ce[i*vc],(INT *)NULL,(INT *)NULL); + } + tr->nce = nc; +} + +double triang_cubicint(v,vv,w,d,nc,xxa) +double *v, *vv, *xxa; +INT d, *w, nc; +{ double sa, lb, *vert0, *vert1, *vals0, *vals1, deriv0, deriv1; + INT i, j, k; + if (nc==1) /* linear interpolate */ + { sa = 0; + for (i=0; i<=d; i++) sa += xxa[i]*vv[i]; + return(sa); + } + sa = 1.0; + vals0 = NULL; + for (j=d; j>0; j--) /* eliminate v[w[j]] */ + { lb = xxa[j]/sa; + for (k=0; kmi[MDIM]; + if (lf->s[i]==0) return(exvval(lf,vv,i,d,what,0)); + + il = lf->lo[i]; nc = triang_getvertexvals(lf,vl,il,what); + ih = lf->hi[i]; nc = triang_getvertexvals(lf,vh,ih,what); + vv[0] = (vl[0]+vh[0])/2; + if (nc==1) return(nc); + P = 1.5*(vh[0]-vl[0]); + le = 0.0; + for (j=0; jmi[MDIM]; vc = d+1; + ce = tr->ce; + i = 0; + while ((ince) && (!intri(x,&ce[i*vc],vdptr(tr->xxev),xa,d))) i++; + if (i==tr->nce) return(NOSLN); + i *= vc; + for (j=0; jnce[i+1]) + { j=nce[i]; nce[i]=nce[i+1]; nce[i+1]=j; k=1; + lb = xa[i]; xa[i] = xa[i+1]; xa[i+1] = lb; + } + } while(k); + nc = 0; + for (i=0; ixxev),vv,nce,nc,xa) : + triang_cubicint(vdptr(tr->xxev),vv,nce,d,nc,xa)); +} diff --git a/src/locfit/family.c b/src/locfit/family.c new file mode 100644 index 0000000..8de7a8b --- /dev/null +++ b/src/locfit/family.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +#define HUBERC 2.0 + +extern double rs, log(); + +INT defaultlink(link,family) +INT link, family; +{ if (link==LDEFAU) + switch(family&63) + { case TDEN: + case TRAT: + case THAZ: + case TGAMM: + case TGEOM: + case TPROB: + case TPOIS: return(LLOG); + case TCIRC: + case TGAUS: + case TCAUC: + case TROBT: return(LIDENT); + case TRBIN: + case TLOGT: return(LLOGIT); + } + if (link==LCANON) + switch(family&63) + { case TDEN: + case TRAT: + case THAZ: + case TPROB: + case TPOIS: return(LLOG); + case TGEOM: + WARN(("Canonical link unavaialable for geometric family; using inverse")); + case TGAMM: return(LINVER); + case TCIRC: + case TGAUS: + case TCAUC: + case TROBT: return(LIDENT); + case TRBIN: + case TLOGT: return(LLOGIT); + } + return(link); +} + +INT validlinks(link,family) +INT link, family; +{ switch(family&63) + { case TDEN: + case TRAT: + case THAZ: + return((link==LLOG) | (link==LIDENT)); + case TGAUS: + return((link==LIDENT) | (link==LLOG) | (link==LLOGIT)); + case TROBT: + case TCAUC: + case TCIRC: + return(link==LIDENT); + case TLOGT: + return((link==LLOGIT) | (link==LIDENT) | (link==LASIN)); + case TRBIN: + return(link==LLOGIT); + case TGAMM: + return((link==LLOG) | (link==LINVER) | (link==LIDENT)); + case TGEOM: + return((link==LLOG) | (link==LIDENT)); + case TPOIS: + case TPROB: + return((link==LLOG) | (link==LSQRT) | (link==LIDENT)); + } + ERROR(("Unknown family %d in validlinks",family)); + return(0); +} + +INT famdens(mean,th,link,res,cens,w) +double mean, th, *res, w; +INT link, cens; +{ if (cens) + res[ZLIK] = res[ZDLL] = res[ZDDLL] = 0.0; + else + { res[ZLIK] = w*th; + res[ZDLL] = res[ZDDLL] = w; + } + return(LF_OK); +} + +INT famgaus(y,mean,th,link,res,cens,w) +double y, mean, th, *res, w; +INT link, cens; +{ double z, pz, dp; + if (link==LINIT) + { res[ZDLL] = w*y; + return(LF_OK); + } + z = y-mean; + if (cens) + { if (link!=LIDENT) + { ERROR(("Link invalid for censored Gaussian family")); + return(LF_LNK); + } + pz = pnorm(-z,0.0,1.0); + dp = ((z>6) ? ptail(-z) : exp(-z*z/2)/pz)/2.5066283; + res[ZLIK] = w*log(pz); + res[ZDLL] = w*dp; + res[ZDDLL]= w*dp*(dp-z); + return(LF_OK); + } + res[ZLIK] = -w*z*z/2; + switch(link) + { case LIDENT: + res[ZDLL] = w*z; + res[ZDDLL]= w; + break; + case LLOG: + res[ZDLL] = w*z*mean; + res[ZDDLL]= w*mean*mean; + break; + case LLOGIT: + res[ZDLL] = w*z*mean*(1-mean); + res[ZDDLL]= w*mean*mean*(1-mean)*(1-mean); + break; + default: + ERROR(("Invalid link for Gaussian family")); + return(LF_LNK); + } + return(LF_OK); +} + +INT famrobu(y,mean,th,link,res,cens,w,rs) +double y, mean, th, *res, w, rs; +INT link, cens; +{ double z, sw; + if (link==LINIT) + { res[ZDLL] = w*y; + return(LF_OK); + } + sw = (w==1.0) ? 1.0 : sqrt(w); /* don't want unnecess. sqrt! */ + z = sw*(y-mean)/rs; + res[ZLIK] = (fabs(z) HUBERC) + { res[ZDLL] = sw*HUBERC/rs; + res[ZDDLL]= 0.0; + return(LF_OK); + } + res[ZDLL] = sw*z/rs; + res[ZDDLL] = w/(rs*rs); + return(LF_OK); +} + +INT famcauc(y,p,th,link,res,cens,w,rs) +double y, p, th, *res, w, rs; +INT link, cens; +{ double z; + if (link!=LIDENT) + { ERROR(("Invalid link in famcauc")); + return(LF_LNK); + } + z = w*(y-th)/rs; + res[ZLIK] = -log(1+z*z); + res[ZDLL] = 2*w*z/(rs*(1+z*z)); + res[ZDDLL] = 2*w*w*(1-z*z)/(rs*rs*(1+z*z)*(1+z*z)); + return(LF_OK); +} + +INT famrbin(y,p,th,link,res,cens,w) +double y, p, th, *res, w; +INT link, cens; +{ double s2y; + if (link==LINIT) + { res[ZDLL] = y; + return(LF_OK); + } + if ((y<0) | (y>w)) /* goon observation; delete it */ + { res[ZLIK] = res[ZDLL] = res[ZDDLL] = 0.0; + return(LF_OK); + } + res[ZLIK] = (th<0) ? th*y-w*log(1+exp(th)) : th*(y-w)-w*log(1+exp(-th)); + if (y>0) res[ZLIK] -= y*log(y/w); + if (yHUBERC*HUBERC/2.0) + { s2y = sqrt(-2*res[ZLIK]); + res[ZLIK] = HUBERC*(HUBERC/2.0-s2y); + res[ZDLL] *= HUBERC/s2y; + res[ZDDLL] = HUBERC/s2y*(res[ZDDLL]-1/(s2y*s2y)*w*p*(1-p)); + } + return(LF_OK); +} + +INT fambino(y,p,th,link,res,cens,w) +double y, p, th, *res, w; +INT link, cens; +{ double wp; + if (link==LINIT) + { if (y<0) y = 0; + if (y>w) y = w; + res[ZDLL] = y; + return(LF_OK); + } + wp = w*p; + if (link==LIDENT) + { if ((p<=0) && (y>0)) return(LF_BADP); + if ((p>=1) && (y0) + { res[ZLIK] += y*log(wp/y); + res[ZDLL] += y/p; + res[ZDDLL]+= y/(p*p); + } + if (yw)) /* goon observation; delete it */ + { res[ZLIK] = res[ZDLL] = res[ZDDLL] = 0.0; + return(LF_OK); + } + res[ZLIK] = (th<0) ? th*y-w*log(1+exp(th)) : th*(y-w)-w*log(1+exp(-th)); + if (y>0) res[ZLIK] -= y*log(y/w); + if (y0)) return(LF_BADP); + if ((p>=1) && (yPI/2)) return(LF_BADP); + res[ZDLL] = res[ZDDLL] = res[ZLIK] = 0; + if (y>0) + { res[ZDLL] += 2*y*sqrt((1-p)/p); + res[ZLIK] += y*log(wp/y); + } + if (y0) res[ZLIK] += y*(th-log(y/w)); + res[ZDDLL] = wmu; + return(LF_OK); + } + if (link==LIDENT) + { if ((mean<=0) && (y>0)) return(LF_BADP); + res[ZLIK] = y-wmu; + res[ZDLL] = -w; + res[ZDDLL] = 0; + if (y>0) + { res[ZLIK] += y*log(wmu/y); + res[ZDLL] += y/mean; + res[ZDDLL]= y/(mean*mean); + } + return(LF_OK); + } + if (link==LSQRT) + { if ((mean<=0) && (y>0)) return(LF_BADP); + res[ZLIK] = y-wmu; + res[ZDLL] = -2*w*th; + res[ZDDLL]= 2*w; + if (y>0) + { res[ZLIK] += y*log(wmu/y); + res[ZDLL] += 2*y/th; + res[ZDDLL]+= 2*y/mean; + } + return(LF_OK); + } + ERROR(("link %d invalid for Poisson family",link)); + return(LF_LNK); +} + +INT famgamm(y,mean,th,link,res,cens,w) +double y, mean, th, *res, w; +INT link, cens; +{ double pt, dg; + if (link==LINIT) + { res[ZDLL] = MAX(y,0.0); + return(LF_OK); + } + if ((mean<=0) & (y>0)) return(LF_BADP); + if (cens) + { if (y<=0) + { res[ZLIK] = res[ZDLL] = res[ZDDLL] = 0.0; + return(LF_OK); + } + if (link==LLOG) + { pt = 1-igamma(y/mean,w); + dg = exp((w-1)*log(y/mean)-y/mean-LGAMMA(w)); + res[ZLIK] = log(pt); + res[ZDLL] = y*dg/(mean*pt); + res[ZDDLL]= dg*(w*y/mean-y*y/(mean*mean))/pt+SQR(res[ZDLL]); + return(LF_OK); + } + if (link==LINVER) + { pt = 1-igamma(th*y,w); + dg = exp((w-1)*log(th*y)-th*y-LGAMMA(w)); + res[ZLIK] = log(pt); + res[ZDLL] = -y*dg/pt; + res[ZDDLL]= dg*y*((w-1)*mean-y)/pt+SQR(res[ZDLL]); + return(LF_OK); + } + } + else + { if (y<0) WARN(("Negative Gamma observation")); + if (link==LLOG) + { res[ZLIK] = -y/mean+w*(1-th); + if (y>0) res[ZLIK] += w*log(y/w); + res[ZDLL] = y/mean-w; + res[ZDDLL]= y/mean; + return(LF_OK); + } + if (link==LINVER) + { res[ZLIK] = -y/mean+w-w*log(mean); + if (y>0) res[ZLIK] += w*log(y/w); + res[ZDLL] = -y+w*mean; + res[ZDDLL]= w*mean*mean; + return(LF_OK); + } + if (link==LIDENT) + { res[ZLIK] = -y/mean+w-w*log(mean); + if (y>0) res[ZLIK] += w*log(y/w); + res[ZDLL] = (y-mean)/(mean*mean); + res[ZDDLL]= w/(mean*mean); + return(LF_OK); + } + } + ERROR(("link %d invalid for Gamma family",link)); + return(LF_LNK); +} + +INT famgeom(y,mean,th,link,res,cens,w) +double y, mean, th, *res, w; +INT link, cens; +{ double p, pt, dp, dq; + if (link==LINIT) + { res[ZDLL] = MAX(y,0.0); + return(LF_OK); + } + p = 1/(1+mean); + if (cens) /* censored observation */ + { if (y<=0) + { res[ZLIK] = res[ZDLL] = res[ZDDLL] = 0; + return(LF_OK); + } + pt = 1-ibeta(p,w,y); + dp = -exp(LGAMMA(w+y)-LGAMMA(w)-LGAMMA(y)+(y-1)*th+(w+y-2)*log(p))/pt; + dq = ((w-1)/p-(y-1)/(1-p))*dp; + res[ZLIK] = log(pt); + res[ZDLL] = -dp*p*(1-p); + res[ZDDLL]= (dq-dp*dp)*p*p*(1-p)*(1-p)+dp*(1-2*p)*p*(1-p); + res[ZDDLL]= -res[ZDDLL]; + return(LF_OK); + } + else + { res[ZLIK] = (y+w)*log((y/w+1)/(mean+1)); + if (y>0) res[ZLIK] += y*log(w*mean/y); + if (link==LLOG) + { res[ZDLL] = (y-w*mean)*p; + res[ZDDLL]= (y+w)*p*(1-p); + return(LF_OK); + } + if (link==LIDENT) + { res[ZDLL] = (y-w*mean)/(mean*(1+mean)); + res[ZDDLL]= w/(mean*(1+mean)); + return(LF_OK); + } + } + ERROR(("link %d invalid for geometric family",link)); + return(LF_LNK); +} + +INT famweib(y,mean,th,link,res,cens,w) +double y, mean, th, *res, w; +INT link, cens; +{ double yy; + yy = pow(y,w); + if (link==LINIT) + { res[ZDLL] = MAX(yy,0.0); + return(LF_OK); + } + if (cens) + { res[ZLIK] = -yy/mean; + res[ZDLL] = res[ZDDLL] = yy/mean; + return(LF_OK); + } + res[ZLIK] = 1-yy/mean-th; + if (yy>0) res[ZLIK] += log(w*yy); + res[ZDLL] = -1+yy/mean; + res[ZDDLL]= yy/mean; + return(LF_OK); +} + +INT famcirc(y,mean,th,link,res,cens,w) +double y, mean, th, *res, w; +INT link, cens; +{ if (link==LINIT) + { res[ZDLL] = w*sin(y); + res[ZLIK] = w*cos(y); + return(LF_OK); + } + res[ZDLL] = w*sin(y-mean); + res[ZDDLL]= w*cos(y-mean); + res[ZLIK] = res[ZDDLL]-w; + return(LF_OK); +} + +void robustify(res,rs) +double *res, rs; +{ double sc, z; + sc = rs*HUBERC; + if (res[ZLIK] > -sc*sc/2) return; + z = sqrt(-2*res[ZLIK]); + res[ZDDLL]= -sc*res[ZDLL]*res[ZDLL]/(z*z*z)+sc*res[ZDDLL]/z; + res[ZDLL]*= sc/z; + res[ZLIK] = sc*sc/2-sc*z; +} + +double lf_link(y,lin) +double y; +INT lin; +{ switch(lin) + { case LIDENT: return(y); + case LLOG: return(log(y)); + case LLOGIT: return(logit(y)); + case LINVER: return(1/y); + case LSQRT: return(sqrt(fabs(y))); + case LASIN: return(asin(sqrt(y))); + } + ERROR(("link: unknown link %d",lin)); + return(0.0); +} + +double invlink(th,lin) +double th; +INT lin; +{ switch(lin) + { case LIDENT: return(th); + case LLOG: return(lf_exp(th)); + case LLOGIT: return(expit(th)); + case LINVER: return(1/th); + case LSQRT: return(th*fabs(th)); + case LASIN: return(sin(th)*sin(th)); + case LINIT: return(0.0); + } + ERROR(("invlink: unknown link %d",lin)); + return(0.0); +} + +INT links(th,y,fam,lin,res,cd,w,rs) /* the link and various related functions */ +double th, y, *res, w, cd, rs; +INT fam, lin; +{ double mean; + INT c, link, st; + c = (INT)cd; link = (INT)lin; + + mean = res[ZMEAN] = invlink(th,lin); + if (lf_error) return(LF_LNK); + + switch(fam&63) + { case THAZ: + case TDEN: + case TRAT: return(famdens(mean,th,link,res,c,w)); + case TGAUS: st = famgaus(y,mean,th,link,res,c,w); + break; + case TLOGT: st = fambino(y,mean,th,link,res,c,w); + break; + case TRBIN: return(famrbin(y,mean,th,link,res,c,w)); + case TPROB: + case TPOIS: st = fampois(y,mean,th,link,res,c,w); + break; + case TGAMM: st = famgamm(y,mean,th,link,res,c,w); + break; + case TGEOM: st = famgeom(y,mean,th,link,res,c,w); + break; + case TWEIB: return(famweib(y,mean,th,link,res,c,w)); + case TCIRC: st = famcirc(y,mean,th,link,res,c,w); + break; + case TROBT: return(famrobu(y,mean,th,link,res,c,w,rs)); + case TCAUC: return(famcauc(y,mean,th,link,res,c,w,rs)); + default: + ERROR(("links: invalid family %d",fam)); + return(LF_FAM); + } + if (st!=LF_OK) return(st); + if (link==LINIT) return(st); + if ((fam&128)==128) robustify(res,rs); + return(st); +} + +/* + stdlinks is a version of links when family, link, response e.t.c + all come from the standard places. +*/ +INT stdlinks(res,lf,i,th,rs) +lfit *lf; +double th, rs, *res; +INT i; +{ return(links(th,resp(lf,i),lf->mi[MTG],lf->mi[MLINK],res,cens(lf,i),prwt(lf,i),rs)); +} + +/* + * functions used in variance, skewness, kurtosis calculations + * in scb corrections. + */ + +double b2(th,tg,w) +double th, w; +INT tg; +{ double y; + switch(tg&63) + { case TGAUS: return(w); + case TPOIS: return(w*lf_exp(th)); + case TLOGT: + y = expit(th); + return(w*y*(1-y)); + } + ERROR(("b2: invalid family %d",tg)); + return(0.0); +} + +double b3(th,tg,w) +double th, w; +INT tg; +{ double y; + switch(tg&63) + { case TGAUS: return(0.0); + case TPOIS: return(w*lf_exp(th)); + case TLOGT: + y = expit(th); + return(w*y*(1-y)*(1-2*y)); + } + ERROR(("b3: invalid family %d",tg)); + return(0.0); +} + +double b4(th,tg,w) +double th, w; +INT tg; +{ double y; + switch(tg&63) + { case TGAUS: return(0.0); + case TPOIS: return(w*lf_exp(th)); + case TLOGT: + y = expit(th); y = y*(1-y); + return(w*y*(1-6*y)); + } + ERROR(("b4: invalid family %d",tg)); + return(0.0); +} diff --git a/src/locfit/fitted.c b/src/locfit/fitted.c new file mode 100644 index 0000000..0964a02 --- /dev/null +++ b/src/locfit/fitted.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +/* + Functions for computing residuals and fitted values from + the locfit object. + + fitted(lf,des,fit,what,cv,ty) computes fitted values from the + fit structure in lf. + resid(y,c,w,th,mi,ty) converts fitted values to residuals + cfitted(v,ty) is CVERSION front end, interpreting command + line arguments and computing th. + vfitted(ty) for use by arithmetic interpreter. +*/ + +#include "local.h" + +double resid(y,w,th,mi,ty,res) +INT *mi, ty; +double y, w, th, *res; +{ double raw; + INT tg; + + tg = mi[MTG] & 63; + if ((tg==TGAUS) | (tg==TROBT) | (tg==TCAUC)) + raw = y-res[ZMEAN]; + else + raw = y-w*res[ZMEAN]; + switch(ty) + { case RDEV: + if (res[ZDLL]>0) return(sqrt(-2*res[ZLIK])); + else return(-sqrt(-2*res[ZLIK])); + case RPEAR: + if (res[ZDDLL]<=0) + { if (res[ZDLL]==0) return(0); + return(NOSLN); + } + return(res[ZDLL]/sqrt(res[ZDDLL])); + case RRAW: return(raw); + case RLDOT: return(res[ZDLL]); + case RDEV2: return(-2*res[ZLIK]); + case RLDDT: return(res[ZDDLL]); + case RFIT: return(th); + case RMEAN: return(res[ZMEAN]); + default: ERROR(("resid: unknown residual type %d",ty)); + } + return(0.0); +} + +double studentize(res,inl,var,ty,link) +double res, inl, var, *link; +int ty; +{ double den; + inl *= link[ZDDLL]; + var = var*var*link[ZDDLL]; + if (inl>1) inl = 1; + if (var>inl) var = inl; + den = 1-2*inl+var; + if (den<0) return(0.0); + switch(ty) + { case RDEV: + case RPEAR: + case RRAW: + case RLDOT: + return(res/sqrt(den)); + case RDEV2: + return(res/den); + default: return(res); + } +} + +void fitted(lf,des,fit,what,cv,st,ty) +lfit *lf; +design *des; +double *fit; +INT what, cv, st, ty; +{ INT i, j, d, n, ev; + double xx[MXDIM], th, inl, var, link[LLEN]; + inl = 0.0; + var = 0.0; + n = lf->mi[MN]; + d = lf->mi[MDIM]; + ev = lf->mi[MEV]; + cv &= (ev!=ECROS); + if ((lf->mi[MEV]==EDATA)|(lf->mi[MEV]==ECROS)) ev = EFITP; + for (i=0; idp[DRSC]); + if ((cv)|(st)) + { inl = dointpoint(lf,des,xx,PT0,ev,i); + inl = inl*inl; + if (cv) + { th -= inl*link[ZDLL]; + stdlinks(link,lf,i,th,lf->dp[DRSC]); + } + if (st) var = dointpoint(lf,des,xx,PNLX,ev,i); + } + fit[i] = resid(resp(lf,i),prwt(lf,i),th,lf->mi,ty,link); + if (st) fit[i] = studentize(fit[i],inl,var,ty,link); + } else fit[i] = th; + if (lf_error) return; + } +} + +#ifdef CVERSION +extern lfit lf; +extern design des; + +vari *vfitted(type) +INT type; +{ vari *v; + INT n; + n = lf.mi[MN]; + v = createvar("vfitted",STHIDDEN,n,VDOUBLE); + recondat(1,&n); + if (lf_error) return(NULL); + + fitted(&lf,&des,vdptr(v),PCOEF,0,0,type); + return(v); +} + +void cfitted(v,ty) +vari *v; +INT ty; +{ double *f; + vari *vr; + INT i, n, cv, st, wh; + + i = getarg(v,"type",1); + if (i>0) ty = restyp(argval(v,i)); + + i = getarg(v,"cv",1); cv = (i>0) ? getlogic(v,i) : 0; + i = getarg(v,"studentize",1); st = (i>0) ? getlogic(v,i) : 0; + + wh = PCOEF; + i = getarg(v,"what",1); + if (i>0) wh = ppwhat(argval(v,i)); + + recondat(ty==5,&n); + if (lf_error) return; + + vr = createvar("fitted",STHIDDEN,n,VDOUBLE); + f = vdptr(vr); + fitted(&lf,&des,f,wh,cv,st,ty); + + saveresult(vr,argarg(v,0),STREGULAR); +} +#endif diff --git a/src/locfit/frend.c b/src/locfit/frend.c new file mode 100644 index 0000000..7b5f830 --- /dev/null +++ b/src/locfit/frend.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +extern INT cvi; +extern double robscale; + +double resp(lf,i) +lfit *lf; +INT i; +{ if (lf->y==NULL) return(0.0); + return(lf->y[i]); +} + +double prwt(lf,i) +lfit *lf; +INT i; +{ if (i==cvi) return(0.0); + if (lf->w==NULL) return(1.0); + return(lf->w[i]); +} + +double base(lf,i) +lfit *lf; +INT i; +{ if (lf->base==NULL) return(0.0); + return(lf->base[i]); +} + +double cens(lf,i) +lfit *lf; +INT i; +{ if (lf->c==NULL) return(0.0); + return(lf->c[i]); +} + +double vocri(lk,t0,t2,pen) +double lk, t0, t2, pen; +{ if (pen==0) return(-2*t0*lk/((t0-t2)*(t0-t2))); + return((-2*lk+pen*t2)/t0); +} + +INT procvraw(des,lf,v) +design *des; +lfit *lf; +INT v; +{ INT lf_status; + int i; + double h, coef[1+MXDIM]; + des->xev = evpt(lf,v); + + lf_status = ainitband(des,lf); + + if (!lf_error) switch(lf->mi[MACRI]) + { case AKAT: + case ACP: + case AMDI: + h = aband2(des,lf,des->h); + h = aband3(des,lf,h); + h = nbhd(lf,des,0,h,1); + lf_status = locfit(lf,des,h,0); + break; + case ANONE: + case AOK: + break; + } + + lf->h[v] = des->h; + for (i=0; incoef; i++) coef[i] = des->cf[cfn(des,i)]; + + if (!lf_error) + { if (lf->mi[MDC]) dercor(des,lf,coef); + subparcomp(des,lf,coef); + for (i=0; incoef; i++) lf->coef[i*lf->nvm+v] = coef[i]; + } + + lf->deg[v] = lf->mi[MDEG]; + + return(lf_status); +} + +/* + * Set default values for the likelihood e.t.c. This + * is called in cases where the optimization for the fit + * has failed. + */ + +void set_default_like(lf,nvm,v,d) +lfit *lf; +INT nvm, v; +int d; +{ INT i; + lf->lik[v] = lf->lik[nvm+v] = 0; + lf->lik[2*nvm+v] = 0; /* should use sum of weights here? */ + for (i=0; i<=d; i++) + lf->t0[i*nvm+v] = lf->nlx[i*nvm+v] = 0.0; +} + +INT procv(des,lf,v) +design *des; +lfit *lf; +INT v; +{ INT d, p, nvm, i, k; + double trc[6], t0[1+MXDIM], vari[1+MXDIM]; + memset(vari, 0, sizeof(vari)); + k = procvraw(des,lf,v); + if (lf_error) return(k); + + d = lf->mi[MDIM]; p = lf->mi[MP]; + nvm = lf->nvm; + + switch(k) + { case LF_OK: break; + case LF_NCON: + WARN(("procv: locfit did not converge")); + break; + case LF_OOB: + WARN(("procv: parameters out of bounds")); + break; + case LF_PF: + if (lf->mi[MDEB]>1) WARN(("procv: perfect fit")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_NOPT: + WARN(("procv: no points with non-zero weight")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_INFA: + if (lf->mi[MDEB]>1) WARN(("procv: initial value problem")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_DEMP: + WARN(("procv: density estimate, empty integration region")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_XOOR: + WARN(("procv: fit point outside xlim region")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_DNOP: + if (lf->mi[MDEB]>1) + WARN(("density estimation -- insufficient points in smoothing window")); + set_default_like(lf,nvm,v,d); + return(k); + case LF_FPROB: + WARN(("procv: f problem; likelihood failure")); + set_default_like(lf,nvm,v,d); + return(k); + default: + WARN(("procv: unknown return code %d",k)); + set_default_like(lf,nvm,v,d); + return(k); + } + + comp_vari(lf,des,trc,t0); + lf->lik[v] = des->llk; + lf->lik[nvm+v] = trc[2]; + lf->lik[2*nvm+v] = trc[0]-trc[2]; + lf->nlx[v] = sqrt(des->V[0]); + + for (i=0; incoef; i++) + vari[i] = des->V[p*cfn(des,0) + cfn(des,i)]; + vari[0] = sqrt(vari[0]); + if (vari[0]>0) for (i=1; incoef; i++) vari[i] /= vari[0]; + t0[0] = sqrt(t0[0]); + if (t0[0]>0) for (i=1; incoef; i++) t0[i] /= t0[0]; + + subparcomp2(des,lf,vari,t0); + for (i=0; incoef; i++) + { lf->nlx[i*lf->nvm+v] = vari[i]; + lf->t0[i*lf->nvm+v] = t0[i]; + } + + return(k); +} + +double intvo(des,lf,c0,c1,a,p,t0,t20,t21) +design *des; +lfit *lf; +double *c0, *c1, a, t0, t20, t21; +INT p; +{ double th, lk, link[LLEN]; + INT i; + lk = 0; + for (i=0; in; i++) + { th = (1-a)*innerprod(c0,&des->X[i*p],p) + a*innerprod(c1,&des->X[i*p],p); + stdlinks(link,lf,des->ind[i],th,robscale); + lk += des->w[i]*link[ZLIK]; + } + des->llk = lk; + return(vocri(des->llk,t0,(1-a)*t20+a*t21,lf->dp[DADP])); +} + +INT procvvord(des,lf,v) +design *des; +lfit *lf; +INT v; +{ + static const int x_1 = 4; + static const int y_1 = 10; + double tr[6], gcv, g0, ap, coef[x_1][y_1], t2[4], th, md = 0.0; + INT i, j, k = 0, d1, *mi, i0, p1, ip; + mi = lf->mi; + des->xev = evpt(lf,v); + + for (i = 0; i < x_1; ++i) + { + for (j = 0; j < y_1; ++j) + { + coef[i][j] = 0; + } + } + + lf->h[v] = nbhd(lf,des,(INT)(mi[MN]*lf->dp[DALP]),lf->dp[DFXH],0); + if (lf->h[v]<=0) WARN(("zero bandwidth in procvvord")); + + ap = lf->dp[DADP]; + if ((ap==0) & ((mi[MTG]&63)!=TGAUS)) ap = 2.0; + d1 = mi[MDEG]; p1 = mi[MP]; + for (i=0; ip = mi[MP] = calcp(mi,i); + k = locfit(lf,des,lf->h[v],0); + + local_df(lf,des,tr); + gcv = vocri(des->llk,tr[0],tr[2],ap); + if ((i==mi[MDEG0]) || (gcvp; j++) coef[i][j] = des->cf[j]; + t2[i] = tr[2]; + +#ifdef RESEARCH + printf("variable order\n"); + if ((ip) && (i>mi[MDEG0])) + { for (j=1; j<10; j++) + { gcv = intvo(des,lf,coef[i-1],coef[i],j/10.0,des->p,tr[0],t2[i-1],t2[i]); + if (gcvp = mi[MP] = calcp(mi,i0); + k = locfit(lf,des,lf->h[v],0); + for (i=mi[MP]; icf[i] = 0.0; + i0 = (INT)md; if (i0==d1) i0--; + th = md-i0; + for (i=0; icf[i] = (1-th)*coef[i0][i]+th*coef[i0+1][i]; + mi[MDEG] = d1; mi[MP] = p1; + } + + for (i=0; icoef[i*lf->nvm+v] = des->cf[i]; + lf->deg[v] = md; + return(k); +} + +/* special version of ressumm to estimate sigma^2, with derivative estimation */ +void ressummd(lf,des) +lfit *lf; +design *des; +{ INT i; + double s0, s1; + s0 = s1 = 0.0; + if ((lf->mi[MTG]&64)==0) + { lf->dp[DRV] = 1.0; + return; + } + for (i=0; inv; i++) + { s0 += lf->lik[2*lf->nvm+i]; + s1 += lf->lik[i]; + } + if (s0==0.0) + lf->dp[DRV] = 0.0; + else + lf->dp[DRV] = -2*s1/s0; +} + +void ressumm(lf,des) +lfit *lf; +design *des; +{ INT i, j, ev, tg, orth; + double *dp, *oy, pw, r1, r2, rdf, t0, t1, u[MXDIM], link[LLEN]; + dp = lf->dp; + dp[DLK] = dp[DT0] = dp[DT1] = 0; + if ((lf->mi[MEV]==EKDCE) | (lf->mi[MEV]==EPRES)) + { dp[DRV] = 1.0; + return; + } + if (lf->nd>0) + { ressummd(lf,des); + return; + } + r1 = r2 = 0.0; + ev = lf->mi[MEV]; + if ((ev==EDATA) | (ev==ECROS)) ev = EFITP; + orth = (lf->mi[MGETH]==4) | (lf->mi[MGETH]==5); + for (i=0; imi[MN]; i++) + { for (j=0; jmi[MDIM]; j++) u[j] = datum(lf,j,i); + des->th[i] = base(lf,i)+dointpoint(lf,des,u,PCOEF,ev,i); + des->wd[i] = resp(lf,i) - des->th[i]; + des->w[i] = 1.0; + des->ind[i] = i; + } + + tg = lf->mi[MTG]; + lf->dp[DRSC] = 1.0; + if ((tg==TROBT+64) | (tg==TCAUC+64)) /* global robust scale */ + { oy = lf->y; lf->y = des->wd; + des->xev = lf->pc.xbar; + locfit(lf,des,0.0,1); + lf->y = oy; + lf->dp[DRSC] = robscale; + } + + if (orth) /* orthog. residuals */ + { int od, op; + des->n = lf->mi[MN]; + od = lf->mi[MDEG]; op = lf->mi[MP]; + lf->mi[MDEG] = 1; + lf->mi[MP] = des->p = 1+lf->mi[MDIM]; + oy = lf->y; lf->y = des->wd; + des->xev = lf->pc.xbar; + locfit(lf,des,0.0,1); + for (i=0; imi[MN]; i++) oy[i] = resp(lf,i) - des->th[i]; + lf->y = oy; + lf->mi[MDEG] = od; lf->mi[MP] = op; + } + + for (i=0; imi[MN]; i++) + { for (j=0; jmi[MDIM]; j++) u[j] = datum(lf,j,i); + t0 = dointpoint(lf,des,u,PT0,ev,i); + t1 = dointpoint(lf,des,u,PNLX,ev,i); + stdlinks(link,lf,i,des->th[i],lf->dp[DRSC]); + t1 = t1*t1*link[ZDDLL]; + t0 = t0*t0*link[ZDDLL]; + if (t1>1) t1 = 1; + if (t0>1) t0 = 1; /* no observation gives >1 deg.free */ + dp[DLK] += link[ZLIK]; + dp[DT0] += t0; + dp[DT1] += t1; + pw = prwt(lf,i); + if (pw>0) + { r1 += link[ZDLL]*link[ZDLL]/pw; + r2 += link[ZDDLL]/pw; + } + if (orth) des->di[i] = t1; + } + + if (orth) return; + + dp[DRV] = 1.0; + if ((lf->mi[MTG]&64)==64) /* quasi family */ + { rdf = lf->mi[MN]-2*dp[DT0]+dp[DT1]; + if (rdf<1.0) + { WARN(("Estimated rdf < 1.0; not estimating variance")); + } + else + dp[DRV] = r1/r2 * lf->mi[MN] / rdf; + } + + /* try to ensure consistency for family="circ"! */ + if (((lf->mi[MTG]&63)==TCIRC) & (lf->mi[MDIM]==1)) + { INT *ind, nv; + double dlt, th0, th1; + ind = des->ind; + nv = lf->nv; + for (i=0; ixxev),0,nv-1); + for (i=1; icoef[ind[i]]-dlt*lf->coef[ind[i]+nv]-lf->coef[ind[i-1]]; + th1 = lf->coef[ind[i]]-dlt*lf->coef[ind[i-1]+nv]-lf->coef[ind[i-1]]; + if ((th0>PI)&(th1>PI)) + { for (j=0; jcoef[ind[j]] += 2*PI; + i--; + } + if ((th0<(-PI))&(th1<(-PI))) + { for (j=0; jcoef[ind[j]] -= 2*PI; + i--; + } + } + } +} + +double rss(lf,des,df) +lfit *lf; +design *des; +double *df; +{ double ss; + INT i; + ss = 0; + if (ident==1) + { for (i=0; imi[MN]; i++) + ss += SQR(resp(lf,i)-lf->coef[i]); + *df = lf->mi[MN]-lf->mi[MP]; + return(ss); + } + ressumm(lf,des); + *df = lf->mi[MN] - 2*lf->dp[DT0] + lf->dp[DT1]; + return(-2*lf->dp[DLK]); +} diff --git a/src/locfit/help.c b/src/locfit/help.c new file mode 100644 index 0000000..c5bb242 --- /dev/null +++ b/src/locfit/help.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * + * + * The example() function is used to tnterpreting examples + * in the locfit.ceg file. + */ + +#include "local.h" + +#ifdef CVERSION + +static FILE *help; +extern char *lfhome; + +void example(v) +vari *v; +{ int i, j, run, ti; + char *z, elin[10], helpfile[100], line[200]; + + run = 0; + i = getarg(v,"ex",1); + ti = (i==0); + if (!ti) + { z = strchr(argval(v,i),'.'); + if (z==NULL) + { ERROR(("example: invalid number %s",argval(v,i))); + return; + } + j = getarg(v,"run",1); + if (j>0) run = getlogic(v,j); + } + + if (lfhome!=NULL) + sprintf(helpfile,"%s/locfit.ceg",lfhome); + else + sprintf(helpfile,"locfit.ceg"); + + help = fopen(helpfile,"r"); + if (help==NULL) + { ERROR(("Can't find locfit.ceg file -- is LFHOME set?")); + return; + } + + do + { z = fgets(line,190,help); + if (z==NULL) /* reached end of file */ + { if (!ti) ERROR(("No example %s in help file",argval(v,i))); + fclose(help); + return; + } + if (line[0]=='e') + { sscanf(&line[2],"%s",elin); + if (ti) printf("Example %s. ",elin); + } + else elin[0] = 0; + if ((ti) && (line[0]=='t')) + printf("%s",&line[2]); + } while ((ti) || (strcmp(elin,argval(v,i))!=0)); + + while(1) + { z = fgets(line,190,help); + switch(z[0]) + { case 'f': /* end example */ + fclose(help); + printf("\n"); + return; + case 't': /* title */ + printf("\nExample %s. %s\n",argval(v,i),&line[2]); + break; + case 'c': /* code */ + printf(" %s",&line[2]); + case 'h': /* hidden code, usually sleep */ + if (run) makecmd(&line[2]); + break; + case 'd': /* discussion */ + printf("%s",&line[2]); + break; + case 'n': /* no code */ + printf("There is no code for this example.\n"); + break; + } + } +} + +#endif diff --git a/src/locfit/imatlb.h b/src/locfit/imatlb.h new file mode 100644 index 0000000..73331f3 --- /dev/null +++ b/src/locfit/imatlb.h @@ -0,0 +1,36 @@ +typedef struct { + int n; + double *dpr; +} vari; + +typedef struct { + double *Z, *Q, *dg, *f2; + int p, sm; } xtwxstruc; + +typedef struct { + vari *wk; + double *coef, *xbar, *f; + xtwxstruc xtwx; } paramcomp; + +typedef struct { + vari *dw, *index; + double *xev, *X, *w, *di, *res, *th, *wd, h, xb[15]; + double *V, *P, *f1, *ss, *oc, *cf, llk; + xtwxstruc xtwx; + int *ind, n, p, pref, (*itype)(); + int (*vfun)(); } design; + +typedef struct { + vari *tw, *L, *iw, *xxev; + double *x[15], *y, *w, *base, *c, *xl; + double *coef, *nlx, *t0, *lik, *h, *deg; + double *sv, *fl, *sca, *dp, kap[3]; + int *ce, *s, *lo, *hi, sty[15]; + int *mg, nvm, ncm, vc; + int nl, nv, nnl, nce, nk, nn, *mi, ord, deriv[9], nd; + paramcomp pc; + varname yname, xname[15], wname, bname, cname; } lfit; + +extern void mlbcall( + double *x, double *y, + double *xx, double *ff, int n); diff --git a/src/locfit/lf_dercor.c b/src/locfit/lf_dercor.c new file mode 100644 index 0000000..e4a5e65 --- /dev/null +++ b/src/locfit/lf_dercor.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * Derivative corrections. The local slopes are not the derivatives + * of the local likelihood estimate; the function dercor() computes + * the adjustment to get the correct derivatives under the assumption + * that h is constant. + * + * By differentiating the local likelihood equations, one obtains + * + * d ^ ^ T -1 T d . ^ + * -- a = a - (X W V X) X -- W l( Y, X a) + * dx 0 1 dx + */ + +#include "local.h" +extern double robscale; + +void dercor(des,lf,coef) +design *des; +lfit *lf; +double *coef; +{ double s1, dc[MXDIM], wd, link[LLEN]; + INT i, ii, j, m, p, d, *mi; + mi = lf->mi; + if (mi[MTG]<=THAZ) return; + if (mi[MKER]==WPARM) return; + + d = mi[MDIM]; + p = des->p; m = des->n; + + if (mi[MDEB]>1) printf(" Correcting derivatives\n"); + fitfun(lf,des->xev,des->xev,des->f1,NULL,0); + jacob_solve(&des->xtwx,des->f1); + setzero(dc,d); + + /* correction term is e1^T (XTWVX)^{-1} XTW' ldot. */ + for (i=0; if1,&des->X[i*p],p); + ii = des->ind[i]; + stdlinks(link,lf,ii,des->th[i],robscale); + for (j=0; jw[i]*weightd(datum(lf,j,ii)-des->xev[j],lf->sca[j],d,mi[MKER],mi[MKT],des->h,lf->sty[j],des->di[ii]); + dc[j] += s1*wd*link[ZDLL]; + } + + } + for (j=0; j=3")); + return(-1); +} + +void makecfn(des,lf) +design *des; +lfit *lf; +{ int i; + INT *mi, nd; + + nd = lf->nd; + mi = lf->mi; + + des->cfn[0] = coefnumber(lf->deriv,nd,mi[MKT],mi[MDIM],mi[MDEG]); + des->ncoef = 1; + if (nd >= mi[MDEG]) return; + if (mi[MDIM]>1) + { if (nd>=2) return; + if ((nd>=1) && (mi[MKT]==KPROD)) return; + } + + for (i=0; ideriv[nd] = i; + des->cfn[i+1] = coefnumber(lf->deriv,nd+1,mi[MKT],mi[MDIM],mi[MDEG]); + } + des->ncoef = 1+mi[MDIM]; +} + +void fitfunangl(dx,ff,sca,cd,deg) +double dx, *ff, sca; +INT deg, cd; +{ + if (deg>=3) WARN(("Can't handle angular model with deg>=3")); + + switch(cd) + { case 0: + ff[0] = 1; + ff[1] = sin(dx/sca)*sca; + ff[2] = (1-cos(dx/sca))*sca*sca; + return; + case 1: + ff[0] = 0; + ff[1] = cos(dx/sca); + ff[2] = sin(dx/sca)*sca; + return; + case 2: + ff[0] = 0; + ff[1] = -sin(dx/sca)/sca; + ff[2] = cos(dx/sca); + return; + default: WARN(("Can't handle angular model with >2 derivs")); + } +} + +void fitfun(lf,x,t,f,deriv,nd) +lfit *lf; +double *x, *t, *f; +INT *deriv, nd; +{ + INT d, deg, m, i, j, k, ct_deriv[MXDIM]; + double ff[MXDIM][1+MXDEG], dx[MXDIM]; + +#ifdef SVERSION + if (lf->mi[MUBAS]) + { if (nd>0) WARN(("User basis does not take derivatives")); + basis(x,t,f,lf->mi[MDIM],lf->mi[MP]); + return; + } +#endif + + d = lf->mi[MDIM]; + deg = lf->mi[MDEG]; + m = 0; + + if (lf->mi[MKT]==KLM) + { for (i=0; isty[i]) + { + case STANGL: + fitfunangl(dx[i],ff[i],lf->sca[i],ct_deriv[i],lf->mi[MDEG]); + break; + default: + for (j=0; jmi[MKT]==KPROD)) + { for (j=1; j<=deg; j++) + for (i=0; iind contains the indices of + * the required data points; des->n the number of points; des->xev + * the fitting point. + */ +void designmatrix(lf,des) +lfit *lf; +design *des; +{ int i, ii, j, p; + double *X, u[MXDIM]; + + X = d_x(des); + p = des->p; + + if (lf->mi[MUBAS]) + { +#ifdef SVERSION + vbasis(lf->x,des->xev,lf->mi[MN],lf->mi[MDIM],des->ind,des->n,p,X); +#else + ERROR(("user basis in S version only\n")); +#endif + return; + } + + for (i=0; in; i++) + { ii = des->ind[i]; + for (j=0; jmi[MDIM]; j++) u[j] = datum(lf,j,ii); + fitfun(lf,u,des->xev,&X[i*p],NULL,(INT)0); + } +} diff --git a/src/locfit/lf_robust.c b/src/locfit/lf_robust.c new file mode 100644 index 0000000..589b9d4 --- /dev/null +++ b/src/locfit/lf_robust.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * This file includes functions to solve for the scale estimate in + * local robust regression and likelihood. The main entry point is + * lf_robust(lf,des,noit), + * called from the locfit() function. + * + * The update_rs(x) accepts a residual scale x as the argument (actually, + * it works on the log-scale). The function computes the local fit + * assuming this residual scale, and re-estimates the scale from this + * new fit. The final solution satisfies the fixed point equation + * update_rs(x)=x. The function lf_robust() automatically calls + * update_rs() through the fixed point iterations. + * + * The estimation of the scale from the fit is based on the sqrt of + * the median deviance of observations with non-zero weights (in the + * gaussian case, this is the median absolute residual). + * + * TODO: + * Should use smoothing weights in the median. + */ + +#include "local.h" + +void lfiter(lfit* lf, design* des); + +extern int lf_status; +double robscale; + +static lfit *rob_lf; +static design *rob_des; + +double median(x,n) +double *x; +INT n; +{ INT i, j, lt, eq, gt; + double lo, hi, s; + lo = hi = x[0]; + for (i=0; ilo) & (x[i]s); + } + if ((2*(lt+eq)>n) && (2*(gt+eq)>n)) return(s); + if (2*(lt+eq)<=n) lo = s; + if (2*(gt+eq)<=n) hi = s; + } + } + return((hi+lo)/2); +} + +double nrobustscale(lf,des,rs) +lfit *lf; +design *des; +double rs; +{ int i, ii, p; + double link[LLEN], sc, sd, sw, e; + p = des->p; sc = sd = sw = 0.0; + for (i=0; in; i++) + { ii = des->ind[i]; + des->th[i] = base(lf,ii)+innerprod(des->cf,d_xi(des,i),p); + e = resp(lf,ii)-des->th[i]; + stdlinks(link,lf,ii,des->th[i],rs); + sc += des->w[i]*e*link[ZDLL]; + sd += des->w[i]*e*e*link[ZDDLL]; + sw += des->w[i]; + } + + /* newton-raphson iteration for log(s) + -psi(ei/s) - log(s); s = e^{-th} + */ + rs *= exp((sc-sw)/(sd+sc)); + return(rs); +} + +double robustscale(lf,des) +lfit *lf; +design *des; +{ INT i, ii, p; + double rs, link[LLEN]; + p = des->p; + for (i=0; in; i++) + { ii = des->ind[i]; + des->th[i] = base(lf,ii) + innerprod(des->cf,d_xi(des,i),p); + links(des->th[i],resp(lf,ii),lf->mi[MTG]&127,lf->mi[MLINK],link,cens(lf,ii),prwt(lf,ii),1.0); + des->res[i] = -2*link[ZLIK]; + } + rs = sqrt(median(des->res,des->n)); + if (rs==0.0) rs = 1.0; + return(rs); +} + +double update_rs(x) +double x; +{ + if (lf_status != LF_OK) return(x); + robscale = exp(x); + lfiter(rob_lf,rob_des); + if (lf_status != LF_OK) return(x); + + return(log(robustscale(rob_lf,rob_des))); +} + +void lf_robust(lf,des) +lfit *lf; +design *des; +{ double x; + rob_lf = lf; + rob_des = des; + lf_status = LF_OK; + + x = log(robustscale(lf,des)); + solve_fp(update_rs, x, 1.0e-6, (int)lf->mi[MMXIT]); +} diff --git a/src/locfit/lf_vari.c b/src/locfit/lf_vari.c new file mode 100644 index 0000000..984063e --- /dev/null +++ b/src/locfit/lf_vari.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * Post-fitting functions to compute the local variance and + * influence functions. Also the local degrees of freedom + * calculations for adaptive smoothing. + */ + +#include "local.h" + +extern double robscale; + +/* + vmat() computes (after the local fit..) the matrix + M2 = X^T W^2 V X. + M12 = (X^T W V X)^{-1} M2 + Also, for convenience, tr[0] = sum(wi) tr[1] = sum(wi^2). +*/ +void vmat(lf, des, M12, M2, tr) +lfit *lf; +design *des; +double *M12, *M2, *tr; +{ INT i, p, nk, ok; + double link[LLEN], h, ww; + p = des->p; + setzero(M2,p*p); + + nk = -1; + + /* for density estimation, use integral rather than + sum form, if W^2 is programmed... + */ + if ((lf->mi[MTG]<=THAZ) && (lf->mi[MLINK]==LLOG)) + { switch(lf->mi[MKER]) + { case WGAUS: nk = WGAUS; h = des->h/SQRT2; break; + case WRECT: nk = WRECT; h = des->h; break; + case WEPAN: nk = WBISQ; h = des->h; break; + case WBISQ: nk = WQUQU; h = des->h; break; + case WTCUB: nk = W6CUB; h = des->h; break; + case WEXPL: nk = WEXPL; h = des->h/2; break; + } + } + + tr[0] = tr[1] = 0.0; + if (nk != -1) + { ok = lf->mi[MKER]; lf->mi[MKER] = nk; +/* compute M2 using integration. Use M12 as work matrix. */ + (des->itype)(des->xev, M2, M12, lf, des->cf, h); + lf->mi[MKER] = ok; + if (lf->mi[MTG]==TDEN) multmatscal(M2,lf->dp[DSWT],p*p); + tr[0] = des->ss[0]; + tr[1] = M2[0]; /* n int W e^ */ + } + else + { for (i=0; in; i++) + { stdlinks(link,lf,des->ind[i],des->th[i],robscale); + ww = SQR(des->w[i])*link[ZDDLL]; + tr[0] += des->w[i]; + tr[1] += SQR(des->w[i]); + addouter(M2,d_xi(des,i),d_xi(des,i),p,ww); + } + } + + memcpy(M12,M2,p*p*sizeof(double)); + for (i=0; ixtwx,&M12[i*p]); +} + +/* Compute influence function and estimated derivatives. + * Results stored in des->f1. + * This assumes weight function is scaled so that W(0)=1.0. + */ +double comp_infl(lf,des) +lfit *lf; +design *des; +{ unitvec(des->f1,0,des->p); + jacob_solve(&des->xtwx,des->f1); + return(des->f1[0]); +} + +void comp_vari(lf,des,tr,t0) +lfit *lf; +design *des; +double *tr, *t0; +{ int i, j, k, p; + double *M12, *M2; + M12 = des->V; M2 = des->P; p = des->p; + vmat(lf,des,M12,M2,tr); /* M2 = X^T W^2 V X tr0=sum(W) tr1=sum(W*W) */ + tr[2] = m_trace(M12,p); /* tr (XTWVX)^{-1}(XTW^2VX) */ + + comp_infl(lf,des); + for (i=0; i<=lf->mi[MDIM]; i++) t0[i] = des->f1[i]; + +/* + * Covariance matrix is M1^{-1} * M2 * M1^{-1} + * We compute this using the cholesky decomposition of + * M2; premultiplying by M1^{-1} and squaring. This + * is more stable than direct computation in near-singular cases. + */ + chol_dec(M2,p); + for (i=0; ixtwx,&M2[i*p]); + for (i=0; imi[MTG]==TDEN) && (lf->mi[MLINK]==LIDENT)) + multmatscal(M12,1/SQR(lf->dp[DSWT]),p*p); +} + +/* local_df computes: + * tr[0] = trace(W) + * tr[1] = trace(W*W) + * tr[2] = trace( M1^{-1} M2 ) + * tr[3] = trace( M1^{-1} M3 ) + * tr[4] = trace( (M1^{-1} M2)^2 ) + * tr[5] = var(theta-hat). + */ +void local_df(lf,des,tr) +lfit *lf; +design *des; +double *tr; +{ int i, j, p; + double *m2, *V, ww, link[LLEN]; + + tr[0] = tr[1] = tr[2] = tr[3] = tr[4] = tr[5] = 0.0; + m2 = des->V; V = des->P; p = des->p; + + vmat(lf,des,m2,V,tr); /* M = X^T W^2 V X tr0=sum(W) tr1=sum(W*W) */ + tr[2] = m_trace(m2,p); /* tr (XTWVX)^{-1}(XTW^2VX) */ + + unitvec(des->f1,0,p); + jacob_solve(&des->xtwx,des->f1); + for (i=0; if1[i]*V[i*p+j]*des->f1[j]; /* var(thetahat) */ + } + tr[5] = sqrt(tr[5]); + + setzero(m2,p*p); + for (i=0; in; i++) + { stdlinks(link,lf,des->ind[i],des->th[i],robscale); + ww = SQR(des->w[i])*des->w[i]*link[ZDDLL]; + addouter(m2,d_xi(des,i),d_xi(des,i),p,ww); + } + for (i=0; ixtwx,&m2[i*p]); + tr[3] += m2[i*(p+1)]; + } + + return; +} diff --git a/src/locfit/lfcons.h b/src/locfit/lfcons.h new file mode 100644 index 0000000..0a7ae93 --- /dev/null +++ b/src/locfit/lfcons.h @@ -0,0 +1,280 @@ +/* + * Copyright (c) 1998 Lucent Technologies. + * See README file for details. + */ + +/* + Numeric values for constants used in locfit +*/ + +/* + MXDIM and MXDEG are maximum dimension and local polynomial + degree for Locfit. Note that some parts of the code may be + more restrictive. +*/ +#define MXDIM 15 +#define MXDEG 7 + +/* + floating point constants +*/ +#ifndef PI +#define PI 3.141592653589793238462643 +#endif +#define S2PI 2.506628274631000502415765 +#define SQRT2 1.4142135623730950488 +#define SQRPI 1.77245385090552 +#define LOGPI 1.144729885849400174143427 +#define GOLDEN 0.61803398874989484820 +#define HL2PI 0.91893853320467267 /* log(2pi)/2 */ +#define SQRPI 1.77245385090552 /* sqrt(pi) */ + +/* + Criteria for adaptive local fitting mi[MACRI] + 1: localized CP; 2: ICI (katkovnik); 3: curvature model index + 4: Increase bandwidth until locfit returns LF_OK +*/ +#define ANONE 0 +#define ACP 1 +#define AKAT 2 +#define AMDI 3 +#define AOK 4 + +/* + vector of double precision parameters. + 0, 1, 2 are the three components of the smoothing parameter. + 3 cut parameter for adaptive evaluation structures. + 4-8 are likelihood, degrees of freedom and residual variance, + computed as part of the fit. + Stored as the lf.dp vector. +*/ +#define DALP 0 +#define DFXH 1 +#define DADP 2 +#define DCUT 3 +#define DLK 4 +#define DT0 5 +#define DT1 6 +#define DRV 7 +#define DSWT 8 +#define DRSC 9 +#define LEND 10 + +/* + Evaluation structures mi[MEV] + EFITP special for `interpolation' at fit points +*/ +#define ENULL 0 +#define ETREE 1 +#define EPHULL 2 +#define EDATA 3 +#define EGRID 4 +#define EKDTR 5 +#define EKDCE 6 +#define ECROS 7 +#define EPRES 8 +#define EXBAR 9 +#define ENONE 10 +#define EFITP 50 + +/* + integer parameters: sample size; dimension; number of local parameters etc. + stored as the lf.mi vector. +*/ +#define MN 0 +#define MP 1 +#define MDEG0 2 +#define MDEG 3 +#define MDIM 4 +#define MACRI 5 +#define MKER 6 +#define MKT 7 +#define MIT 8 +#define MMINT 9 +#define MMXIT 10 +#define MREN 11 +#define MEV 12 +#define MTG 13 +#define MLINK 14 +#define MDC 15 +#define MK 16 +#define MDEB 17 +#define MGETH 18 +#define MPC 19 +#define MUBAS 20 +#define LENM 21 + +/* + Link functions mi[MLINK]. + Mostly as in table 4.1 of the book. + LDEFAU and LCANON are used to select default and canonical + links respectively. LINIT shouldn't be selected by user... +*/ +#define LINIT 0 +#define LDEFAU 1 +#define LCANON 2 +#define LIDENT 3 +#define LLOG 4 +#define LLOGIT 5 +#define LINVER 6 +#define LSQRT 7 +#define LASIN 8 + +/* + components of vector returned by the links() function + in family.c. ZLIK the likelihood; ZMEAN = estimated mean; + ZDLL = derivative of log-likelihood; ZDDLL = - second derivative +*/ +#define LLEN 4 +#define ZLIK 0 +#define ZMEAN 1 +#define ZDLL 2 +#define ZDDLL 3 + +/* + weight functions mi[MKER]. + see Table 3.1 or the function W() in weights.c for definitions. +*/ +#define WRECT 1 +#define WEPAN 2 +#define WBISQ 3 +#define WTCUB 4 +#define WTRWT 5 +#define WGAUS 6 +#define WTRIA 7 +#define WQUQU 8 +#define W6CUB 9 +#define WMINM 10 +#define WEXPL 11 +#define WMACL 12 +#define WPARM 13 + +/* + type of multivariate weight function mi[MKT] + KSPH (spherical) KPROD (product) + others shouldn't be used at present. +*/ +#define KSPH 1 +#define KPROD 2 +#define KCE 3 +#define KLM 4 + +#define STANGL 4 +#define STLEFT 5 +#define STRIGH 6 +#define STCPAR 7 + +/* + Local likelihood family mi[MTG] + for quasi-likelihood, add 64. +*/ +#define TNUL 0 +#define TDEN 1 +#define TRAT 2 +#define THAZ 3 +#define TGAUS 4 +#define TLOGT 5 +#define TPOIS 6 +#define TGAMM 7 +#define TGEOM 8 +#define TCIRC 9 +#define TROBT 10 +#define TRBIN 11 +#define TWEIB 12 +#define TCAUC 13 +#define TPROB 14 + +/* + Integration type mi[MIT] for integration in + density estimation. +*/ +#define INVLD 0 +#define IDEFA 1 +#define IMULT 2 +#define IPROD 3 +#define IMLIN 4 +#define IHAZD 5 +#define IMONT 7 + +/* + For prediction functions, what to predict? + PCOEF -- coefficients PT0 -- influence function + PNLX -- ||l(x)|| PBAND -- bandwidth h(x) + PDEGR -- local poly. degree PLIK -- max. local likelihood + PRDF -- local res. d.f. PVARI -- ||l(x)||^2 +*/ +#define PCOEF 1 +#define PT0 2 +#define PNLX 3 +#define PBAND 4 +#define PDEGR 5 +#define PLIK 6 +#define PRDF 7 +#define PVARI 8 + +/* + Residual Types +*/ +#define RDEV 1 +#define RPEAR 2 +#define RRAW 3 +#define RLDOT 4 +#define RDEV2 5 +#define RLDDT 6 +#define RFIT 7 +#define RMEAN 8 + +/* + components of the colour vector +*/ +#define CBAK 0 +#define CAXI 1 +#define CTEX 2 +#define CLIN 3 +#define CPOI 4 +#define CCON 5 +#define CCLA 6 +#define CSEG 7 +#define CPA1 8 +#define CPA2 9 + +/* + variable types: double, INT, char, argument list +*/ +#define VDOUBLE 0 +#define VINT 1 +#define VCHAR 2 +#define VARGL 3 +#define VPREP 4 +#define VARC 5 +#define VVARI 6 +#define VXYZ 7 + +/* + variable status +*/ +#define STEMPTY 0 +#define STREGULAR 1 +#define STHIDDEN 3 +#define STPLOTVAR 4 +#define STSYSTEM 5 +#define STSYSPEC 6 +#define STREADFI 7 + +/* + return status for the locfit() function +*/ +#define LF_OK 0 +#define LF_OOB 2 /* out of bounds, or large unstable parameter */ +#define LF_PF 3 /* perfect fit; interpolation; deviance=0 */ +#define LF_NCON 4 /* not converged */ +#define LF_NOPT 6 /* no or insufficient points with non-zero wt */ +#define LF_INFA 7 /* initial failure e.g. log(0) */ +#define LF_DEMP 10 /* density -- empty integration region */ +#define LF_XOOR 11 /* density -- fit point outside xlim region */ +#define LF_DNOP 12 /* density version of 6 */ +#define LF_FPROB 80 +#define LF_BADP 81 /* bad parameters e.g. neg prob for binomial */ +#define LF_LNK 82 /* invalid link */ +#define LF_FAM 83 /* invalid family */ +#define LF_ERR 99 /* error */ diff --git a/src/locfit/lfd.c b/src/locfit/lfd.c new file mode 100644 index 0000000..4a6b74b --- /dev/null +++ b/src/locfit/lfd.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + */ + + +/* Functions for reading/writing to LFData directory */ + +#include +#include "local.h" + +#ifdef CVERSION + +FILE *lfd=NULL; +extern char *lfhome; +char filename[100]; + +void closefile() +{ fclose(lfd); + lfd = NULL; +} + +void openfile(mode) +char *mode; +{ if (lfd!=NULL) closefile(); + lfd = fopen(filename,mode); +} + +/* + * setfilename() places name[] in the filename[] array. + * -- quotes are stripped from name. + * -- ext is the default extension; .ext is added as an extension. + * (unless fp=1). + * If ext = "lfd" or "fit", the LFData (and paths) are used. + * -- mode is a standard unix mode ("r", "w" etc) + * -- fp indicates the full path is given (used by Windoze GUI). + * -- checks for validity of filename and mode. + * -- returns 0 for successful, 1 for unsuccessful. + */ +INT setfilename(name,ext,mode,fp) +char *name, *ext, *mode; +INT fp; +{ char subp[20]; + int n, quote, use_lfd; + + n = strlen(name); + quote = ((name[0]=='"') && (name[n-1]=='"')); + if (quote) + { name++; + name[n-2] = '\0'; + } + + use_lfd = (strcmp(ext,"lfd")==0) | (strcmp(ext,"fit")==0); + if (fp) + sprintf(filename,"%s",name); + else + { if (use_lfd) + sprintf(subp,"LFData%c",DIRSEP); + else + sprintf(subp,""); + if (strlen(ext)==0) + sprintf(filename,"%s%s",subp,name); + else + sprintf(filename,"%s%s.%s",subp,name,ext); + } + if (quote) name[n-2] = '"'; + +/* + * If we are writing, check the file is writeable and that the + * LFData directory exists. + */ + if ((mode[0]=='w') | (mode[0]=='a')) + { if (use_lfd) + { if (access("LFData",F_OK)==-1) + { if (access(".",W_OK)==0) + { printf("Creating LFData Directory...\n"); + system("mkdir LFData"); + } + } + if (access("LFData",W_OK)==-1) + { ERROR(("LFData directory not writeable")); + return(0); + } + } + return(1); /* definitive test is whether fopen works. */ + } + +/* + * If we are reading, check the file exists. + * If it doesn't and use_lfd is true, also check a defined lfhome. + */ + if (mode[0]=='r') + { if (access(filename,R_OK)==0) return(1); + + if ((use_lfd) && (lfhome!=NULL)) /* search system lfhome */ + { if (quote) name[n-2] = '\0'; + sprintf(filename,"%s/%s%s.%s",lfhome,subp,name,ext); + if (quote) name[n-2] = '"'; + return(access(filename,R_OK)==0); + } + + return(0); + } + ERROR(("setfilename: invalid mode %s",mode)); + return(0); +} + +void readchar(c,n) +char *c; +INT n; +{ fread(c,1,n,lfd); +} + +void readstr(z) +char *z; +{ while(1) + { readchar(z,1); + if (*z=='\0') return; + z++; + } +} + +void dumpchar(c,n) +char *c; +INT n; +{ fwrite(c,1,n,lfd); +} + +void dumpstr(z) +char *z; +{ dumpchar(z,strlen(z)+1); +} + +#define LFDATAID -281 + +void dosavedata(v,fp) +vari *v; +int fp; +{ void (*fn)(), (*fs)(); + INT i, n; + char *name; + vari *v1; + if (argarg(v,0)==NULL) + { ERROR(("savedata: no filename")); + return; + } + name = argarg(v,0); + + if (setfilename(name,"lfd","wb",fp)==0) + { ERROR(("savedata: cannot access file %s",filename)); + return; + } + openfile("wb"); + if (lf_error) return; + fn = dumpchar; + fs = dumpstr; + + i = LFDATAID; + (*fn)(&i, sizeof(INT)); + n = 0; + for (i=1; in; i++) if (!argused(v,i)) + { v1 = findvar(argval(v,i),0,&n); + if (v==NULL) + { WARN(("variable %s not found; skipping",argval(v,i))); + } + else + { (*fs)(v1->name); + (*fn)(&v1->n,sizeof(INT)); + (*fn)(&v1->mode,sizeof(INT)); /* mode indicator for later */ + (*fn)(v1->dpr,v1->n*sizeof(double)); + } + setused(v,i); + } + (*fs)("__end__"); + closefile(); +} + +void doreaddata(name,fp) +char *name; +int fp; +{ void (*fn)(), (*fs)(); + INT i, k, md, n, of; + varname vn; + vari *v; + + if (setfilename(name,"lfd","rb",fp)==0) + { ERROR(("readdata: cannot access file %s",filename)); + return; + } + openfile("rb"); + if (lf_error) return; + fn = readchar; + fs = readstr; + + of = 0; + (*fn)(&i, sizeof(INT)); + if (i!=LFDATAID) /* wrong or old format */ + { if (i==-367) + { printf("Old format LFData file\n"); + of = 1; + } + else + { ERROR(("not a Locfit data file: %s",name)); + } + } + if (lf_error) { closefile(); return; } + + if (of) /* old format: n nv name (10 char) data */ + { (*fn)(&n,sizeof(INT)); + (*fn)(&k,sizeof(INT)); + for (i=0; idpr,n*sizeof(double)); + } + } + else /* new format: name (str) n mode data __end__ */ + { k = 999999; + for (i=0; idpr,n*sizeof(double)); + } } } + closefile(); +} + +#define FITID 4395943.3249934 + +void dosavefit(lf,fi,mode,fp) +lfit *lf; +char *fi, *mode; +int fp; +{ void (*fn)(); + double z; + INT d = 0, i, k, lm, ld; + + if (fi==NULL) return; + if (setfilename(fi,"fit",mode,fp)==0) + { ERROR(("savefit: cannot access file %s.",fi)); + return; + } + + if (mode[0]=='r') + fn = readchar; + else + { if (lf->mi[MEV]==ENULL) ERROR(("savefit: No fit to save.")); + if (lf_error) return; + fn = dumpchar; + z = FITID; + lm = LENM; ld = LEND; + d = lf->mi[MDIM]; + } + + openfile(mode); + (*fn)(&z,sizeof(double)); + + if ((mode[0]=='r') && (z!=FITID)) + { ERROR(("readfit: file %s is not an evaluation structure",filename)); + closefile(); + return; + } + + /* if reading, ensure lf.mi etc are assigned */ + if (mode[0]=='r') fitdefault(lf,0,1); + + (*fn)(&lm,sizeof(INT)); + (*fn)(lf->mi,lm*sizeof(INT)); + (*fn)(&ld,sizeof(INT)); + (*fn)(lf->dp,LEND*sizeof(double)); + (*fn)(&lf->nv,sizeof(INT)); + (*fn)(&lf->nce,sizeof(INT)); + (*fn)(&lf->vc,sizeof(INT)); + (*fn)(&lf->nnl,sizeof(INT)); /* no longer used -- delete sometime! */ + + if (mode[0]=='r') + { d = lf->mi[MDIM]; + trchck(lf,lf->nv,lf->nce,d,lf->mi[MP],lf->vc); + pcchk(&lf->pc,d,lf->mi[MP],1); + if ((mode[0]=='r') && (lm<20)) lf->mi[MPC] = 1-noparcomp(lf); + } + (*fn)(vdptr(lf->xxev),d*lf->nv*sizeof(double)); + for (i=0; i<3*lf->mi[MDIM]+8; i++) + (*fn)(&lf->coef[i*lf->nvm],lf->nv*sizeof(double)); + + for (i=0; ixname[i],10); + (*fn)(lf->yname,10); + (*fn)(lf->bname,10); + (*fn)(lf->cname,10); + (*fn)(lf->wname,10); + + (*fn)(lf->sv,lf->nce*sizeof(double)); + (*fn)(lf->fl,2*d*sizeof(double)); + (*fn)(lf->sca,d*sizeof(double)); + (*fn)(lf->ce,lf->nce*lf->vc*sizeof(INT)); + (*fn)(lf->s,lf->nce*sizeof(INT)); + k = 0; + if ((lf->mi[MEV]==EPHULL) | (lf->mi[MEV]==ETREE)) k = lf->nv; + if (lf->mi[MEV]==EKDTR) k = lf->nce; + (*fn)(lf->lo,k*sizeof(INT)); + (*fn)(lf->hi,k*sizeof(INT)); + (*fn)(lf->sty,d*sizeof(INT)); + if (lf->mi[MEV]==EGRID) + (*fn)(lf->mg,d*sizeof(INT)); + (*fn)(&lf->nd,sizeof(INT)); + (*fn)(lf->deriv,lf->nd*sizeof(INT)); + + (*fn)(vdptr(lf->pc.wk),pc_reqd(d,lf->mi[MP])*sizeof(double)); + lf->pc.xtwx.p = lf->mi[MP]; +/* MUST save lf->pc.xtwx.sm here */ + lf->pc.xtwx.sm = lf->pc.xtwx.st = JAC_EIGD; + + closefile(); +} + +#endif diff --git a/src/locfit/lffuns.h b/src/locfit/lffuns.h new file mode 100644 index 0000000..f4fe621 --- /dev/null +++ b/src/locfit/lffuns.h @@ -0,0 +1,215 @@ + + + +/* FILES IN THE src DIRECTORY */ + +/* adap.c */ +extern double afit(), aband2(), aband3(); +extern INT ainitband(); + +/* band.c */ +extern void band(), kdeselect(); + +/* density.c */ +extern INT densinit(); +extern INT fact[]; +extern int likeden(); +extern void prodint_resp(), prresp(); + +/* dens_haz.c */ +extern void haz_init(); +extern INT hazint(); + +/* dens_int.c */ +extern double dens_integrate(); +extern void dens_renorm(), dens_lscv(), lforder(); + +/* dist.c */ +extern double igamma(), ibeta(); +extern double pf(), pchisq(), pnorm(); +extern double df(), dchisq(); + +/* ev_atree.c */ +extern void atree_start(), atree_grow(), atree_guessnv(); +extern double atree_int(); + +/* ev_interp.c */ +extern double dointpoint(), cubintd(); +extern double linear_interp(), cubic_interp(), rectcell_interp(); +extern INT exvval(); +extern void exvvalpv(), hermite2(); + +/* ev_kdtre.c */ +extern void kdtre_start(); +extern double kdtre_int(); + +/* ev_main.c */ +extern void trchck(), guessnv(); +extern void dataf(), gridf(), crossf(), xbarf(), preset(); +extern INT newsplit(); +extern int lfit_reqd(), lfit_reqi(); +#ifndef CVERSION +extern vari *createvar(); +#endif + +/* ev_trian.c */ +extern void triang_start(), triang_grow(); +extern double triang_int(); + +/* family.c */ +extern INT links(), stdlinks(), defaultlink(), validlinks(); +extern double b2(), b3(), b4(), lf_link(), invlink(); + +/* frend.c */ +extern void fitfun(), degfree(), ressumm(), makecfn(); +extern INT procv(), procvraw(), procvvord(); +extern double base(), cens(), prwt(), resp(), getxi(), rss(); +extern INT calcp(); + +/* kappa0.c */ +extern double critval(), critvalc(), tailp(), taild(); +extern INT constants(); + +/* lf_dercor.c */ +extern void dercor(); + +/* lf_fitfun.c */ +extern void fitfun(), designmatrix(); +extern INT calcp(), coefnumber(); + +/* lf_robust.c */ +extern double median(); +extern void lf_robust(); + +/* lfstr.c */ +extern void setstrval(); +extern INT ppwhat(), restyp(); + +/* lf_vari.c */ +extern void comp_vari(), local_df(); +extern double comp_infl(); + +/* linalg.c */ +extern void svd(), hsvdsolve(); +extern void addouter(), multmatscal(); +extern void QRupd(), QR1(), bacK(), bacT(), solve(), grsc(); +extern void setzero(), unitvec(); +extern void transpose(); +extern double innerprod(), m_trace(); +extern INT svdsolve(); + +/* locfit.c or parfit.c (most) */ +extern int ident, locfit(), lf_iter(); + +/* math.c */ +extern double lflgamma(), lferf(), lferfc(), lfdaws(); +extern double ptail(), logit(), expit(); +//extern double lgamma(), erf(), erfc(); +extern int factorial(); + +/* minmax.c */ +extern double ipower(), minmax(); + +/* nbhd.c */ +extern double kordstat(), nbhd(), rho(); + +/* odint.c */ +extern INT onedint(); +extern void recurint(); + +/* pcomp.c */ +extern double addparcomp(); +extern void compparcomp(), subparcomp(), subparcomp2(), pcchk(); +extern int pc_reqd(); +extern INT noparcomp(), hasparcomp(); + +/* preplot.c */ +extern void preplot(), cpreplot(); +extern INT setpppoints(); + +/* resid.c */ +extern double resid(); +extern void cfitted(); +extern vari *vfitted(), *vresid(); + +/* scb.c */ +extern void scb(), cscbsim(); + +/* simul.c */ +extern void liksim(), scbsim(), scbmax(), regband(), rband(); + +/* startlf.c */ +extern void bbox(), deschk(), startlf(), preproc(), fitdefault(); +extern void fitoptions(), clocfit(), endfit(); +extern INT nofit(); + +/* strings.c */ +extern int stm(), pmatch(), matchlf(), matchrt(), checkltor(), checkrtol(); +extern void strip(); + +/* wdiag.c */ +extern INT wdiag(), procvhatm(); +extern void cwdiag(); + +/* weight.c */ +extern double W(), weight(), weightd(), Wd(), Wdd(), wint(); +extern double Wconv(), Wconv1(), Wconv4(), Wconv5(), Wconv6(), Wikk(); +extern INT iscompact(), wtaylor(); + +/* arith.c */ +extern INT arvect(), intitem(); +extern double areval(), arith(), darith(), dareval(); +extern vari *varith(), *saveresult(), *arbuild(); + +/* c_args.c */ +#define argused(v,i) (((carg *)viptr(v,i))->used) +#define setused(v,i) { ((carg *)viptr(v,i))->used = 1; } +#define setunused(v,i) { ((carg *)viptr(v,i))->used = 0; } +#define argarg(v,i) (((carg *)viptr(v,i))->arg) +#define argvalis(v,i,z) (strcmp(argval(v,i),z)==0) +extern char *argval(), *getargval(); +extern int getarg(), readilist(), getlogic(); + +/* cmd.c */ +extern int locfit_dispatch(char*); +extern void setuplf(), recondat(), cmdint(); +extern double backtr(), docrit(); + +/* c_plot.c */ +extern void plotdata(), plotfit(), plottrack(), plotopt(), setplot(); + +/* help.c */ +extern void example(); + +/* lfd.c */ +extern void doreaddata(), dosavedata(), dosavefit(); +extern INT setfilename(); + +/* main.c */ +extern void SetWinDev(); + +/* makecmd.c */ +extern vari *getcmd(); +extern void makecmd(), del_clines(), inc_forvar(), dec_forvar(); + +/* post.c */ +extern void SetPSDev(); + +/* pout.c */ +extern INT pretty(); +extern void displayplot(); +extern void plotmaple(), plotmathe(), plotmatlb(), plotgnup(), plotxwin(); + +/* random.c */ +extern double rnorm(), rexp(), runif(), rpois(); +extern void rseed(); + +/* readfile.c */ +extern void readfile(); + +/* scbmax.c */ +extern void cscbmax(); + +/* vari.c */ +#include "vari.hpp" + diff --git a/src/locfit/lfstr.c b/src/locfit/lfstr.c new file mode 100644 index 0000000..ad5bf43 --- /dev/null +++ b/src/locfit/lfstr.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * + * setstrval() is a function for converting string arguments to Locfit's + * numeric values. A typical call will be setstrval(lf.mi,MKER,"gauss"). + * + * components that can be set in this manner are + * MKER (weight function) + * MKT (kernel type -- spherical or product) + * MTG (local likelihood family) + * MLINK (link function) + * MIT (integration type for density estimation) + * MEV (evaluation structure) + * MACRI (adaptive criterion) + * + * INT ppwhat(str) interprets the preplot what argument. + * INT restyp(str) interprets the residual type argument. + * + */ + +#include "local.h" + +static char *famil[17] = + { "density", "ate", "hazard", "gaussian", "binomial", + "poisson", "gamma", "geometric", "circular", "obust", "huber", + "weibull", "cauchy","probab", "logistic", "nbinomial", "vonmises" }; +static int fvals[17] = + { TDEN, TRAT, THAZ, TGAUS, TLOGT, + TPOIS, TGAMM, TGEOM, TCIRC, TROBT, TROBT, + TWEIB, TCAUC, TPROB, TLOGT, TGEOM, TCIRC }; + +INT lffamily(z) +char *z; +{ INT quasi, robu, f; + quasi = robu = 0; + while ((z[0]=='q') | (z[0]=='r')) + { quasi |= (z[0]=='q'); + robu |= (z[0]=='r'); + z++; + } + f = pmatch(z,famil,fvals,16,-1); + if ((z[0]=='o') | (z[0]=='a')) robu = 0; + if (f==-1) + { WARN(("unknown family %s",z)); + f = TGAUS; + } + if (quasi) f += 64; + if (robu) f += 128; + return(f); +} + +void getlffam(z,x) +char **z; +INT *x; +{ *x = lffamily(z[0]); +} + +static char *wfuns[13] = { + "rectangular", "epanechnikov", "bisquare", "tricube", + "triweight", "gaussian", "triangular", "ququ", + "6cub", "minimax", "exponential", "maclean", "parametric" }; +static int wvals[13] = { WRECT, WEPAN, WBISQ, WTCUB, + WTRWT, WGAUS, WTRIA, WQUQU, W6CUB, WMINM, WEXPL, WMACL, WPARM }; + +static char *ktype[3] = { "spherical", "product", "center" }; +static int kvals[3] = { KSPH, KPROD, KCE }; + +static char *ltype[8] = { "default", "canonical", "identity", "log", + "logi", "inverse", "sqrt", "arcsin" }; +static int lvals[8] = { LDEFAU, LCANON, LIDENT, LLOG, + LLOGIT, LINVER, LSQRT, LASIN }; + +static char *etype[9] = { "tree", "phull", "data", "grid", "kdtree", + "kdcenter", "cross", "xbar", "none" }; +static int evals[9] = { ETREE, EPHULL, EDATA, EGRID, EKDTR, + EKDCE, ECROS, EXBAR, ENONE }; + +static char *itype[6] = { "default", "multi", "product", "mlinear", + "hazard", "monte" }; +static int ivals[6] = { IDEFA, IMULT, IPROD, IMLIN, IHAZD, IMONT }; + +static char *atype[5] = { "none", "cp", "ici", "mindex", "ok" }; +static int avals[5] = { ANONE, ACP, AKAT, AMDI, AOK }; + +void setstrval(mi,v,z) +INT *mi, v; +char *z; +{ + switch(v) + { case MKER: + mi[v] = pmatch(z, wfuns, wvals, 13, WTCUB); + return; + + case MKT: + mi[v] = pmatch(z, ktype, kvals, 3, KSPH); + return; + + case MTG: + mi[v] = lffamily(z); + return; + + case MLINK: + mi[v] = pmatch(z, ltype, lvals, 8, LDEFAU); + return; + + case MIT: + mi[v] = pmatch(z, itype, ivals, 6, IDEFA); + return; + + case MEV: + mi[v] = pmatch(z, etype, evals, 9, ETREE); + return; + + case MACRI: + mi[v] = pmatch(z, atype, avals, 5, ANONE); + return; + } + + WARN(("setstrval: invalid value %d",v)); + return; +} + +static char *rtype[8] = { "deviance", "d2", "pearson", "raw", + "ldot", "lddot", "fit", "mean" }; +static int rvals[8] = { RDEV, RDEV2, RPEAR, RRAW, RLDOT, RLDDT, RFIT, RMEAN}; + +static char *whtyp[8] = { "coef", "nlx", "infl", "band", + "degr", "like", "rdf", "vari" }; +static int whval[8] = { PCOEF, PNLX, PT0, PBAND, PDEGR, PLIK, PRDF, PVARI }; + +INT restyp(z) +char *z; +{ int val; + + val = pmatch(z, rtype, rvals, 8, -1); + if (val==-1) ERROR(("Unknown type = %s",z)); + return((INT)val); +} + +INT ppwhat(z) +char *z; +{ int val; + + val = pmatch(z, whtyp, whval, 8, -1); + if (val==-1) ERROR(("Unknown what = %s",z)); + return((INT)val); +} diff --git a/src/locfit/lfstruc.h b/src/locfit/lfstruc.h new file mode 100644 index 0000000..80caea6 --- /dev/null +++ b/src/locfit/lfstruc.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1998-2000 Lucent Technologies. + * See README file for details. + * + * + * + * Structures, typedefs etc used in Locfit + */ + +typedef char varname[64]; + +/* + * Define the vari type for locfit variables and related macros. + * For the C version, an enhanced vari type is needed; + * for other versions a simple structure suffices. + */ +#ifdef CVERSION + +typedef struct { + varname name; + INT n, bytes, mode, stat; + double *dpr; } vari; +#define checkvarlen(v,n,name,mode) (createvar(name,STSYSTEM,n,mode)) +#define vmode(v) ((v)->mode) + +#else + +typedef struct { + INT n; + double *dpr; +} vari; +#define viptr(v,i) (&(v)->dpr[i]) +#define checkvarlen(v,len,name,mode) \ + ((((v)!=NULL) && (vlength(v) >= (len))) ? (v) : createvar((name),0,(len),(mode))) +#endif + +#define vlength(v) ((v)->n) + +typedef struct { + char *arg, *val; + vari *result; + INT used; } carg; + +typedef struct { + void (*AddColor)(), (*SetColor)(), (*ClearScreen)(), (*TextDim)(), (*DoText)(); + void (*DrawPoint)(), (*DrawLine)(), (*DrawPatch)(), (*wrapup)(); + INT (*makewin)(), ticklength, defth, deftw; +} device; + +typedef struct { + vari *wk; + double *coef, *xbar, *f; + jacobian xtwx; } paramcomp; + +typedef struct { + vari *tw, *L, *iw, *xxev; + double *x[MXDIM], *y, *w, *base, *c; + double *coef, *nlx, *t0, *lik, *h, *deg; + double *sv, dp[LEND], kap[3]; + double sca[MXDIM], fl[2*MXDIM], xl[2*MXDIM]; + INT *ce, *s, *lo, *hi, sty[MXDIM]; + INT *mg, nvm, ncm, vc; + INT nl, nv, nnl, nce, nk, nn, mi[LENM], ord, deriv[MXDEG+2], nd; + paramcomp pc; + varname yname, xname[MXDIM], wname, bname, cname; } lfit; + +#define datum(lf,i,j) (lf)->x[i][j] +#define dvari(lf,i) (lf)->x[i] +#define evpt(lf,i) (&(lf)->xxev->dpr[(i)*(lf)->mi[MDIM]]) +#define evptx(lf,i,k) ((lf)->xxev->dpr[(i)*(lf)->mi[MDIM]+(k)]) + +typedef struct { + vari *data[MXDIM], *fit, *se; + INT d, wh, gr; +} pplot; + +typedef struct { + char cmd; + double x, *v, (*f)(); + INT m, nx[3]; + vari *vv; } arstruct; + +typedef struct { + vari *x, *y, *z; + char type; + INT id, t, n, nx, ny, pch; } plxyz; + +typedef struct { + double theta, phi, xl[2], yl[2], zl[2], sl[10]; + INT id, ty, nsl; + char main[50], xlab[50], ylab[50], zlab[50]; + vari *track, *xyzs; } plots; + +#define PLNONE 0 +#define PLDATA 1 +#define PLFIT 2 +#define PLTRK 4 + +struct lfcol { + char name[10]; + INT n, r, g, b; +}; diff --git a/src/locfit/lfwin.h b/src/locfit/lfwin.h new file mode 100755 index 0000000..a96015e --- /dev/null +++ b/src/locfit/lfwin.h @@ -0,0 +1,117 @@ +#define LFM_EXIT 0 +#define LFM_COPY 1 +#define LFM_PASTE 2 +#define LFM_RUN 3 + +#define LFM_READA 10 +#define LFM_SAVED 11 +#define LFM_READD 12 +#define LFM_SUMD 13 +#define LFM_PLOTD 18 + +#define LFM_LOCF 20 +#define LFM_READF 22 +#define LFM_SUMF 23 +#define LFM_PRFIT 24 + +#define LFM_ALPH 70 +#define LFM_FIXH 71 +#define LFM_APEN 72 +#define LFM_DEG0 75 +#define LFM_DEG1 76 +#define LFM_DEG2 77 +#define LFM_DEG3 78 + +#define LFM_ABOUT 81 +#define LFM_INDEX 82 +#define LFM_READM 83 +#define LFM_WWW 84 + +#define LFP_ROT 10 +#define LFP_STY 11 +#define LFP_PS 42 +#define LFP_COL 13 + +#define LFP_XLAB 20 +#define LFP_YLAB 21 +#define LFP_ZLAB 22 +#define LFP_MAIN 23 + +#define AB_WWW 10 + +#define CM_LINE 1 +#define CM_OK 99 + +#define RL_ALP 0 +#define RL_ALPV 1 +#define RL_H 2 +#define RL_HV 3 +#define RL_PEN 4 +#define RL_PENV 5 +#define RL_DEG 10 +#define RL_FORM 20 +#define RL_FAMY 21 +#define RL_QUAS 22 +#define RL_ROBU 23 +#define RL_FIT 98 +#define RL_OK 99 + +#define RP_VS 1 +#define RP_HS 2 +#define RP_AUT 3 +#define RP_DRAW 98 +#define RP_OK 99 + +#define PD_X 1 +#define PD_Y 2 +#define PD_Z 3 +#define PD_DRAW 10 +#define PD_ADD 11 +#define PD_WIN 12 + +#define PS_FIL 1 +#define PS_DR 8 +#define PS_CA 9 +#define PS_H 10 +#define PS_W 11 + +#define SC_COL 1 +#define SC_SCO 2 +#define SC_DR 8 +#define SC_OK 9 + +#define VN_VN 1 +#define VN_SA 2 +#define VN_RF 98 +#define VN_CA 99 + +#define BP_ALP 1 +#define BP_ALV 2 +#define BP_AUT 3 +#define BP_FIT 4 +#define BP_EX 99 + +#define GR_CM 10 +#define GR_ST 11 + +#define LB_LAB 10 +#define LB_DRAW 11 + +#define LD_QUIT 99 + +/* about.c */ +extern void AboutDlg(); + +/* devwin.c */ +extern void getwinsize(), GetFontInfo(); + +/* dlgraph.c */ +extern void GStyleDlg(), LabelDlg(), PostDlg(), RotateDlg(), SetColDlg(); + +/* winfile.c */ +extern void ReadFileDlg(), ReadDataDlg(), SaveDataDlg(), RunDlg(); +extern void ReadFitDlg(); + +/* windlg.c */ +extern void BandDlg(), LocfitDlg(), PlotDataDlg(), wlocfit_dispatch(); +extern int LFDefDlgProc(); diff --git a/src/locfit/linalg.c b/src/locfit/linalg.c new file mode 100644 index 0000000..3244538 --- /dev/null +++ b/src/locfit/linalg.c @@ -0,0 +1,337 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +void svd(x,p,q,d,mxit) /* svd of square matrix */ +double *x, *p, *q; +INT d, mxit; +{ INT i, j, k, iter, ms, zer; + double r, u, v, cp, cm, sp, sm, c1, c2, s1, s2, mx; + for (i=0; i1.0e-15*fabs(x[i*d+i]*x[j*d+j])) + { if (fabs(x[i*(d+1)])0) { cp /= r; sp /= r; } + else { cp = 1.0; zer = 0;} + cm = x[i*(d+1)]-x[j*(d+1)]; + sm = x[i*d+j]+x[j*d+i]; + r = sqrt(cm*cm+sm*sm); + if (r>0) { cm /= r; sm /= r; } + else { cm = 1.0; zer = 0;} + c1 = cm+cp; + s1 = sm+sp; + r = sqrt(c1*c1+s1*s1); + if (r>0) { c1 /= r; s1 /= r; } + else { c1 = 1.0; zer = 0;} + if (fabs(s1)>ms) ms = fabs(s1); + c2 = cm+cp; + s2 = sp-sm; + r = sqrt(c2*c2+s2*s2); + if (r>0) { c2 /= r; s2 /= r; } + else { c2 = 1.0; zer = 0;} + for (k=0; k0) + { mx = D[0]; + for (i=1; imx) mx = D[i*(d+1)]; + tol *= mx; + } + rank = 0; + for (i=0; itol) + { w[i] /= D[i*(d+1)]; + rank++; + } + for (i=0; i0) + { mx = D[0]; + for (i=1; imx) mx = D[i*(d+1)]; + tol *= mx; + } + for (i=0; itol) w[i] /= sqrt(D[i*(d+1)]); + for (i=0; imx) { mi = i; mx = fabs(X[p*i+j]); } + } + for (i=0; i0) + { for (i=j; i=0; i--) + { for (j=i+1; j=i0; i--) + { for (j=i+1; j +#include +#include +#include + +#ifdef RVERSION +#undef LGAMMA +#define LGAMMA(arg) Rf_lgammafn(arg) +extern double Rf_lgammafn(); +#define SVERSION +#endif + +#ifdef SWINVERSION +#define SVERSION +#include "newredef.h" +#endif + +#include "mutil.h" +#include "lfcons.h" +#include "lfstruc.h" +#include "design.h" +#include "lffuns.h" + +#ifdef CVERSION +//#undef printf +//#define printf lfprintf +//extern int lfprintf(const char *format, ...); +//extern int printf(const char *format, ...); +#endif + +#ifdef SVERSION +#define printf printf +#endif + +#ifdef INTERFACE +#define printf printf +#endif + +#define ERROR(args) printf("Error: "), printf args , printf("\n"), lf_error=1 +#define WARN(args) printf("Warning: "),printf args, printf("\n") + +#define MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) +#define SGN(x) (((x)>0) ? 1 : -1) +#define SQR(x) ((x)*(x)) +#define NOSLN 0.1278433 +#define GFACT 2.5 +#define EFACT 3.0 + +#define MAXCOLOR 20 +#define MAXWIN 5 + +#ifdef SWINVERSION +#define ISWAP(a,b) { int zz; zz = a; a = b; b = zz; } +#else +#define ISWAP(a,b) { INT zz; zz = a; a = b; b = zz; } +extern INT lf_error; +#endif + +extern INT lf_error; + +double lf_exp(double x); + +#endif /* I_LF_H */ diff --git a/src/locfit/locfit.c b/src/locfit/locfit.c new file mode 100644 index 0000000..b62dffb --- /dev/null +++ b/src/locfit/locfit.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +static double s0, s1, tol; +static lfit *lf_lf; +static design *lf_des; +int lf_status; +int ident=0; +int (*like)(); +extern double robscale; + +int likereg(coef, lk0, f1, Z) +double *coef, *lk0, *f1, *Z; +{ INT i, ii, j, p, *mi; + double lk, ww, link[LLEN], *X; + lf_status = LF_OK; + lk = 0.0; p = lf_des->p; + mi = lf_lf->mi; + setzero(Z,p*p); + setzero(f1,p); + for (i=0; in; i++) + { ii = lf_des->ind[i]; + X = d_xi(lf_des,i); + lf_des->th[i] = base(lf_lf,ii)+innerprod(coef,X,p); + lf_status = stdlinks(link,lf_lf,ii,lf_des->th[i],robscale); + if (lf_status == LF_BADP) + { *lk0 = -1.0e300; + return(NR_REDUCE); + } + if (lf_error) lf_status = LF_ERR; + if (lf_status != LF_OK) return(NR_BREAK); + + ww = lf_des->w[i]; + lk += ww*link[ZLIK]; + for (j=0; j2) prresp(coef,Z,p); + if (mi[MDEB]>1) printf(" likelihood: %8.5f\n",lk); + *lk0 = lf_des->llk = lk; + + switch (lf_lf->mi[MTG]&63) /* parameter checks */ + { case TGAUS: /* prevent iterations! */ + if ((mi[MLINK]==LIDENT)&((mi[MTG]&128)==0)) return(NR_BREAK); + break; + case TPOIS: + case TGEOM: + case TWEIB: + case TGAMM: + if ((mi[MLINK]==LLOG) && (fabs(coef[0])>700)) + { lf_status = LF_OOB; + return(NR_REDUCE); + } + if (lk > -1.0e-5*s0) + { lf_status = LF_PF; + return(NR_REDUCE); + } + break; + case TRBIN: + case TLOGT: + if (lk > -1.0e-5*s0) + { lf_status = LF_PF; + return(NR_REDUCE); + } + if (fabs(coef[0])>700) + { lf_status = LF_OOB; + return(NR_REDUCE); + } + break; + } + return(NR_OK); +} + +INT robustinit(lf,des) +lfit *lf; +design *des; +{ int i; + for (i=0; in; i++) + des->res[i] = resp(lf,des->ind[i])-base(lf,des->ind[i]); + des->cf[0] = median(des->res,des->n); + for (i=1; ip; i++) des->cf[i] = 0.0; + tol = 1.0e-6; + return(LF_OK); +} + +INT circinit(lf,des) +lfit *lf; +design *des; +{ int i, ii; + double s0, s1; + s0 = s1 = 0.0; + for (i=0; in; i++) + { ii = des->ind[i]; + s0 += des->w[i]*prwt(lf,ii)*sin(resp(lf,ii)-base(lf,ii)); + s1 += des->w[i]*prwt(lf,ii)*cos(resp(lf,ii)-base(lf,ii)); + } + des->cf[0] = atan2(s0,s1); + for (i=1; ip; i++) des->cf[i] = 0.0; + tol = 1.0e-6; + return(LF_OK); +} + +INT reginit(lf,des) +lfit *lf; +design *des; +{ int i, ii; + double sb, link[LLEN]; + s0 = s1 = sb = 0; + for (i=0; in; i++) + { ii = des->ind[i]; + links(base(lf,ii),resp(lf,ii),lf->mi[MTG],LINIT,link,cens(lf,ii),prwt(lf,ii),1.0); + s1 += des->w[i]*link[ZDLL]; + s0 += des->w[i]*prwt(lf,ii); + sb += des->w[i]*prwt(lf,ii)*base(lf,ii); + } + if (s0==0) return(LF_NOPT); /* no observations with W>0 */ + setzero(des->cf,des->p); + tol = 1.0e-6*s0; + switch(lf->mi[MLINK]) + { case LIDENT: + des->cf[0] = (s1-sb)/s0; + return(LF_OK); + case LLOG: + if (s1<=0.0) + { des->cf[0] = -1000; + return(LF_INFA); + } + des->cf[0] = log(s1/s0) - sb/s0; + return(LF_OK); + case LLOGIT: + if (s1<=0.0) + { des->cf[0] = -1000; + return(LF_INFA); + } + if (s1>=s0) + { des->cf[0] = +1000; + return(LF_INFA); + } + des->cf[0] = logit(s1/s0)-sb/s0; + return(LF_OK); + case LINVER: + if (s1<=0.0) + { des->cf[0] = 1000; + return(LF_INFA); + } + des->cf[0] = s0/s1-sb/s0; + return(LF_OK); + case LSQRT: + des->cf[0] = sqrt(s1/s0)-sb/s0; + return(LF_OK); + case LASIN: + des->cf[0] = asin(sqrt(s1/s0))-sb/s0; + return(LF_OK); + default: + ERROR(("reginit: invalid link %d",lf->mi[MLINK])); + return(LF_ERR); + } +} + +int lfinit(lf,des) +lfit *lf; +design *des; +{ + //double u[MXDIM]; + INT *mi; + + mi = lf->mi; + des->xtwx.sm = (mi[MDEG0]h,des->cf,des->n)); + case TCAUC: + case TROBT: + return(robustinit(lf,des)); + case TCIRC: + return(circinit(lf,des)); + default: + return(reginit(lf,des)); + } +} + +void lfiter(lf,des) +lfit *lf; +design *des; +{ int err; + max_nr(like, des->cf, des->oc, des->res, des->f1, + &des->xtwx, (int)des->p, (int)lf->mi[MMXIT], tol, &err); + switch(err) + { case NR_OK: return; + case NR_NCON: + WARN(("max_nr not converged")); + return; + case NR_NDIV: + WARN(("max_nr reduction problem")); + return; + } + WARN(("max_nr return status %d",err)); +} + +int use_robust_scale(int tg) +{ if ((tg&64)==0) return(0); /* not quasi - no scale */ + if (((tg&128)==0) & (((tg&63)!=TROBT) & ((tg&63)!=TCAUC))) return(0); + return(1); +} + +int locfit(lf,des,h,noit) +lfit *lf; +design *des; +double h; +int noit; +{ int i, p; + + if (lf->mi[MDEB]>0) + { printf("locfit: "); + for (i=0; imi[MDIM]; i++) printf(" %10.6f",des->xev[i]); + printf(" h = %8.5f\n",h); + } + + lf_lf = lf; + lf_des = des; + des->h = h; + p = des->p; + + lf_status = lfinit(lf,des); + if (lf_status != LF_OK) return(lf_status); + + if (use_robust_scale((int)lf->mi[MTG])) + lf_robust(lf,des); + else + { robscale = 1.0; + lfiter(lf,des); + } + + if (lf_status == LF_OOB) + for (i=1; icf[i] = 0.0; + + if ((lf->mi[MTG]&63)==TDEN) /* convert from rate to density */ + { switch(lf->mi[MLINK]) + { case LLOG: + des->cf[0] -= log(lf->dp[DSWT]); + break; + case LIDENT: + multmatscal(des->cf,1.0/lf->dp[DSWT],des->p); + break; + default: ERROR(("Density adjustment; invalid link")); + } + } + + return(lf_status); +} diff --git a/src/locfit/m_chol.c b/src/locfit/m_chol.c new file mode 100644 index 0000000..b14f757 --- /dev/null +++ b/src/locfit/m_chol.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include +#include "mutil.h" + +void chol_dec(A,n) +double *A; +int n; +{ int i, j, k; + for (j=0; j=0; i--) + { for (j=i+1; j +#include "mutil.h" +#define E_MAXIT 20 +#define E_TOL 1.0e-8 +#define SQR(x) ((x)*(x)) + +double e_tol(D,p) +double *D; +int p; +{ double mx; + int i; + if (E_TOL <= 0.0) return(0.0); + mx = D[0]; + for (i=1; imx) mx = D[i*(p+1)]; + return(E_TOL*mx); +} + +void eig_dec(X,P,d) +double *X, *P; +int d; +{ int i, j, k, iter, ms; + double c, s, r, u, v; + + for (i=0; i 1.0e-15*fabs(X[i*d+i]*X[j*d+j])) + { c = (X[j*d+j]-X[i*d+i])/2; + s = -X[i*d+j]; + r = sqrt(c*c+s*s); + c /= r; + s = sqrt((1-c)/2)*(2*(s>0)-1); + c = sqrt((1+c)/2); + for (k=0; kZ; + P = Q = J->Q; + d = J->p; + w = J->wk; + + tol = e_tol(D,d); + + rank = 0; + for (i=0; itol) + { w[i] /= D[i*(d+1)]; + rank++; + } + for (i=0; iZ; + Q = J->Q; + p = J->p; + w = J->wk; + + tol = e_tol(D,p); + + for (i=0; itol) + { w[i] /= sqrt(D[i*(p+1)]); + rank++; + } + return(rank); +} + +double eig_qf(J,v) +jacobian *J; +double *v; +{ int i, j, p; + double sum, tol; + + p = J->p; + sum = 0.0; + tol = e_tol(J->Z,p); + + for (i=0; iZ[i*p+i]>tol) + { J->wk[i] = 0.0; + for (j=0; jwk[i] += J->Q[j*p+i]*v[j]; + sum += J->wk[i]*J->wk[i]/J->Z[i*p+i]; + } + return(sum); +} diff --git a/src/locfit/m_jacob.c b/src/locfit/m_jacob.c new file mode 100644 index 0000000..2b6e8c2 --- /dev/null +++ b/src/locfit/m_jacob.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include +#include "math.h" +#include "stdio.h" +#include "mutil.h" + +#define DEF_METH JAC_EIGD + +int jac_reqd(int p) { return(2*p*(p+1)); } + +double *jac_alloc(J,p,wk) +jacobian *J; +int p; +double *wk; +{ if (wk==NULL) + wk = (double *)calloc(2*p*(p+1),sizeof(double)); + J->Z = wk; wk += p*p; + J->Q = wk; wk += p*p; + J->wk= wk; wk += p; + J->dg= wk; wk += p; + return(wk); +} + +void jacob_dec(J, meth) +jacobian *J; +int meth; +{ int i, j, p; + + if (J->st != JAC_RAW) return; + + J->sm = J->st = meth; + switch(meth) + { case JAC_EIG: + eig_dec(J->Z,J->Q,J->p); + return; + case JAC_EIGD: + p = J->p; + for (i=0; idg[i] = (J->Z[i*(p+1)]<=0) ? 0.0 : 1/sqrt(J->Z[i*(p+1)]); + for (i=0; iZ[i*p+j] *= J->dg[i]*J->dg[j]; + eig_dec(J->Z,J->Q,J->p); + J->st = JAC_EIGD; + return; + case JAC_CHOL: + chol_dec(J->Z,J->p); + return; + default: printf("jacob_dec: unknown method %d",meth); + } +} + +int jacob_solve(J,v) /* (X^T W X)^{-1} v */ +jacobian *J; +double *v; +{ int i, rank; + + if (J->st == JAC_RAW) jacob_dec(J,DEF_METH); + + switch(J->st) + { case JAC_EIG: + return(eig_solve(J,v)); + case JAC_EIGD: + for (i=0; ip; i++) v[i] *= J->dg[i]; + rank = eig_solve(J,v); + for (i=0; ip; i++) v[i] *= J->dg[i]; + return(rank); + case JAC_CHOL: + return(chol_solve(J->Z,v,J->p)); + } + printf("jacob_solve: unknown method %d",J->st); + return(0); +} + +int jacob_hsolve(J,v) /* J^{-1/2} v */ +jacobian *J; +double *v; +{ int i; + + if (J->st == JAC_RAW) jacob_dec(J,DEF_METH); + + switch(J->st) + { case JAC_EIG: + return(eig_hsolve(J,v)); + case JAC_EIGD: /* eigenvalues on corr matrix */ + for (i=0; ip; i++) v[i] *= J->dg[i]; + return(eig_hsolve(J,v)); + case JAC_CHOL: + return(chol_hsolve(J->Z,v,J->p)); + } + printf("jacob_hsolve: unknown method %d",J->st); + return(0); +} + +double jacob_qf(J,v) /* vT J^{-1} v */ +jacobian *J; +double *v; +{ int i; + + if (J->st == JAC_RAW) jacob_dec(J,DEF_METH); + + switch (J->st) + { case JAC_EIG: + return(eig_qf(J,v)); + case JAC_EIGD: + for (i=0; ip; i++) v[i] *= J->dg[i]; + return(eig_qf(J,v)); + case JAC_CHOL: + return(chol_qf(J->Z,v,J->p)); + default: + printf("jacob_qf: invalid method\n"); + return(0.0); + } +} diff --git a/src/locfit/m_max.c b/src/locfit/m_max.c new file mode 100644 index 0000000..845207a --- /dev/null +++ b/src/locfit/m_max.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * Routines for maximization of a one dimensional function f() + * over an interval [xlo,xhi]. In all cases. the flag argument + * controls the return: + * flag='x', the maximizer xmax is returned. + * otherwise, maximum f(xmax) is returned. + * + * max_grid(f,xlo,xhi,n,flag) + * grid maximization of f() over [xlo,xhi] with n intervals. + * + * max_golden(f,xlo,xhi,n,tol,err,flag) + * golden section maximization. + * If n>2, an initial grid search is performed with n intervals + * (this helps deal with local maxima). + * convergence criterion is |x-xmax| < tol. + * err is an error flag. + * if flag='x', return value is xmax. + * otherwise, return value is f(xmax). + * + * max_quad(f,xlo,xhi,n,tol,err,flag) + * quadratic maximization. + * + * max_nr() + * newton-raphson, handles multivariate case. + * + * TODO: additional error checking, non-convergence stop. + */ + +#include +#include +#include "mutil.h" +extern double innerprod(); + +#define gold_rat 0.6180339887498948482045870 +#define max_val(x,y) ((flag=='x') ? x : y) + +double max_grid(f,xlo,xhi,n,flag) +double (*f)(), xlo, xhi; +int n; +char flag; +{ int i, mi; + mi = 0; + double x, y, mx, my; + my = 0.0; + for (i=0; i<=n; i++) + { x = xlo + (xhi-xlo)*i/n; + y = f(x); + if ((i==0) || (y>my)) + { mx = x; + my = y; + mi = i; + } + } + if (mi==0) return(max_val(xlo,my)); + if (mi==n) return(max_val(xhi,my)); + return(max_val(mx,my)); +} + +double max_golden(f,xlo,xhi,n,tol,err,flag) +double (*f)(), xhi, xlo, tol; +int n, *err; +char flag; +{ double x0, x1, x2, x3, y0, y1, y2, y3; + *err = 0; + + if (n>2) + { x0 = max_grid(f,xlo,xhi,n,'x'); + if (xlox0) xhi = x0+1.0/n; + } + + x0 = xlo; y0 = f(xlo); + x3 = xhi; y3 = f(xhi); + x1 = gold_rat*x0 + (1-gold_rat)*x3; y1 = f(x1); + x2 = gold_rat*x3 + (1-gold_rat)*x1; y2 = f(x2); + + while (fabs(x3-x0)>tol) + { if ((y1>=y0) && (y1>=y2)) + { x3 = x2; y3 = y2; + x2 = x1; y2 = y1; + x1 = gold_rat*x0 + (1-gold_rat)*x3; y1 = f(x1); + } + else if ((y2>=y3) && (y2>=y1)) + { x0 = x1; y0 = y1; + x1 = x2; y1 = y2; + x2 = gold_rat*x3 + (1-gold_rat)*x1; y2 = f(x2); + } + else + { if (y3>y0) { x0 = x2; y0 = y2; } + else { x3 = x1; y3 = y1; } + x1 = gold_rat*x0 + (1-gold_rat)*x3; y1 = f(x1); + x2 = gold_rat*x3 + (1-gold_rat)*x1; y2 = f(x2); + } + } + if (y0>=y1) return(max_val(x0,y0)); + if (y3>=y2) return(max_val(x3,y3)); + return((y1>y2) ? max_val(x1,y1) : max_val(x2,y2)); +} + +double max_quad(f,xlo,xhi,n,tol,err,flag) +double (*f)(), xhi, xlo, tol; +int n, *err; +char flag; +{ double x0, x1, x2, xnew, y0, y1, y2, ynew, a, b; + *err = 0; + + if (n>2) + { x0 = max_grid(f,xlo,xhi,n,'x'); + if (xlox0) xhi = x0+1.0/n; + } + + x0 = xlo; y0 = f(x0); + x2 = xhi; y2 = f(x2); + x1 = (x0+x2)/2; y1 = f(x1); + + while (x2-x0>tol) + { + /* first, check (y0,y1,y2) is a peak. If not, + * next interval is the halve with larger of (y0,y2). + */ + if ((y0>y1) | (y2>y1)) + { + if (y0>y2) { x2 = x1; y2 = y1; } + else { x0 = x1; y0 = y1; } + x1 = (x0+x2)/2; + y1 = f(x1); + } + else /* peak */ + { a = (y1-y0)*(x2-x1) + (y1-y2)*(x1-x0); + b = ((y1-y0)*(x2-x1)*(x2+x1) + (y1-y2)*(x1-x0)*(x1+x0))/2; + /* quadratic maximizer is b/a. But first check if a's too + * small, since we may be close to constant. + */ + if ((a<=0) | (bx2*a)) + { /* split the larger halve */ + xnew = ((x2-x1) > (x1-x0)) ? (x1+x2)/2 : (x0+x1)/2; + } + else + { xnew = b/a; + if (10*xnew < (9*x0+x1)) xnew = (9*x0+x1)/10; + if (10*xnew > (9*x2+x1)) xnew = (9*x2+x1)/10; + if (fabs(xnew-x1) < 0.001*(x2-x0)) + { + if ((x2-x1) > (x1-x0)) + xnew = (99*x1+x2)/100; + else + xnew = (99*x1+x0)/100; + } + } + ynew = f(xnew); + if (xnew>x1) + { if (ynew >= y1) { x0 = x1; y0 = y1; x1 = xnew; y1 = ynew; } + else { x2 = xnew; y2 = ynew; } + } + else + { if (ynew >= y1) { x2 = x1; y2 = y1; x1 = xnew; y1 = ynew; } + else { x0 = xnew; y0 = ynew; } + } + } + } + return(max_val(x1,y1)); +} + +double max_nr(F, coef, old_coef, f1, delta, J, p, maxit, tol, err) +double *coef, *old_coef, *f1, *delta, tol; +int (*F)(), p, maxit, *err; +jacobian *J; +{ double old_f, f, lambda; + int i, j, fr; + double nc, nd, cut; + int rank; + + *err = NR_OK; + J->p = p; + fr = F(coef, &f, f1, J->Z); J->st = JAC_RAW; + + for (i=0; i1.0) cut = 1.0; + cut *= 0.0001; + do + { for (j=0; jZ); J->st = JAC_RAW; + if (fr==NR_BREAK) return(f); + + lambda = (fr==NR_REDUCE) ? lambda/2 : lambda/10.0; + } while ((lambda>cut) & (f <= old_f - 1.0e-3)); + + if (f < old_f - 1.0e-3) { *err = NR_NDIV; return(f); } + if (fr==NR_REDUCE) return(f); + + if (fabs(f-old_f) < tol) return(f); + + } + *err = NR_NCON; + return(f); +} diff --git a/src/locfit/makecmd.c b/src/locfit/makecmd.c new file mode 100644 index 0000000..12ced82 --- /dev/null +++ b/src/locfit/makecmd.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * + * The makecmd() function converts a command line string + * into a locfit command variable. If the line has no + * commands (for example, a blank line or a comment) + * it returns NULL; otherwise, it returns the pointer to + * the command variable. + * + * The command line is split into arguments, with arguments + * separated by spaces. Exception: A space in a quoted + * "str ing" is not split into separate fields. + * + * getcmd() returns a pointer to the next line for processing. + * If no lines are ready for processing, it returns NULL. + * + * del_lines() frees the work space used by processed command lines. + * + * set_forvar(), inc_forvar(), dec_forvar() are used in the + * control of for loops. + */ + +#include "local.h" + +#define isterminator(c) (((c)=='\0') | ((c)=='\n') | ((c)=='#')) +static int clcount = 0; +static int proc_to = 0; +static int del_to = 0; +extern vari *curstr; +extern char filename[]; + +typedef struct { + vari *v; + char *name; + int line_no; + int index; } forvar; +static forvar fv[10]; +static int for_level = 0, proc_level = 0; + +int countfields(z) +char *z; +{ int n, instr; + + n = 0; + instr = 0; + + while (1) + { while (*z==' ') z++; + if (isterminator(*z)) return(n); + + n++; + + while ((instr) || (*z!=' ')) + { if (isterminator(*z)) + { if (instr) ERROR(("Unterminated String")); + return(n); + } + if (*z=='"') instr = !instr; + z++; + } + } +} + +void makefields(z, va, n) +char *z; +vari *va; +int n; +{ int i, instr; + char *st, *eq; + carg *curr_arg; + + instr = 0; + for (i=0; ival = st = z; + curr_arg->arg = NULL; + curr_arg->result = NULL; + + eq = NULL; + do + { if (*z=='"') instr = !instr; + if ((eq==NULL) && (!instr) && (*z=='=')) eq = z; + z++; + } while ((instr) || (*z !=' ') && (!isterminator(*z))); + *z = '\0'; + + if (eq != NULL) + { if (eq==st) + { ERROR(("command line argument begins with =")); + return; + } + if ((eq[1]!='=') & (strchr("<>!",eq[-1])==NULL)) + { curr_arg->arg = st; + curr_arg->val = &eq[1]; + *eq = '\0'; + } + } /* eq != */ + z++; + + } /* for i */ +} + +/* + * set_forvar and inc_forvar are used to control for loops. + * set_forvar is called when the for cmd is built, making the + * variable to loop through. + * inc_forvar is called when the for is processed, to update + * the value of the variable. + * dec_forvar is called when the endfor is processed. This + * resets the proc_to line count to the beginning of the for loop. + */ +void set_forvar(v,ct) +vari *v; +int ct; +{ + varname vn; + + if (vlength(v)<2) + { ERROR(("for: missing variable")); + return; + } + + sprintf(vn,"=forv%d",for_level); + fv[for_level].v = varith(argval(v,1),vn,STHIDDEN); + fv[for_level].line_no = ct; + fv[for_level].index = 0; + fv[for_level].name = argarg(v,1); + for_level++; +} + +void inc_forvar() +{ varname vn; + vari *v, *va; + double x; + + if (fv[proc_level].name == NULL) + { sprintf(vn,"=fv%d",proc_level); + v = createvar(vn,STHIDDEN,1,VDOUBLE); + } + else + v = createvar(fv[proc_level].name,STREGULAR,1,VDOUBLE); + + va = fv[proc_level].v; + x = vitem(va, fv[proc_level].index); + vassn(v,0,x); + fv[proc_level].index++; + proc_level++; +} + +void dec_forvar() +{ proc_level--; + if (fv[proc_level].index < vlength(fv[proc_level].v)) + proc_to = fv[proc_level].line_no - 1; + else + fv[proc_level].index = 0; +} + +void run(va) +vari *va; +{ FILE *runf; + char cline[256], *z; + + if (vlength(va)<2) + { ERROR(("run: no command file")); + return; + } + + if (!setfilename(argval(va,1),"","r",0)) + { ERROR(("run: cannot read file %s",argval(va,1))); + return; + } + + runf = fopen(filename,"r"); + while (1) + { z = fgets(cline,256,runf); + if (z==NULL) + { fclose(runf); + return; + } + makecmd(cline); + } +} + +void makecmd(cmdline) +char *cmdline; +{ + varname vn; + vari *va, *vs; + int n; + + n = countfields(cmdline); + if (lf_error) return; + if (n==0) return; + clcount++; + + /* vs is used to store the command line string. */ + sprintf(vn,"=clstr%d",clcount); + vs = createvar(vn,STSYSTEM,1+strlen(cmdline),VCHAR); + sprintf((char *)vdptr(vs),cmdline); + + /* va is used to store pointers to the command line fields. */ + sprintf(vn,"=cline%d",clcount); + va = createvar(vn,STSYSTEM,(INT)n,VARGL); + makefields((char *)vdptr(vs), va, n); + + if (argvalis(va,0,"for")) set_forvar(va,clcount); + if (argvalis(va,0,"endfor")) for_level--; + +/* we want to read in run files here, not when commands are executed. + * otherwise, we would have problems with run commands in a for loop. + */ + if (argvalis(va,0,"run")) run(va); + + return; +} + +void del_lines() +{ int i; + varname vn; + for (i=proc_to; i>del_to; i--) + { sprintf(vn,"=cline%d",i); + deletename(vn); + sprintf(vn,"=clstr%d",i); + deletename(vn); + } + del_to = proc_to; +} + +vari *getcmd() +{ + varname vn; + vari *v; + + if (for_level > 0) return(NULL); + + if (proc_to < clcount) + { + sprintf(vn,"=cline%d",++proc_to); + v = findvar(vn,1,NULL); + if (v==NULL) return(v); + +/* this nonsense is req'd by setplot and setdef. + * get rid of it, I hope. + */ +sprintf(vn,"=clstr%d",proc_to); +curstr = findvar(vn,1,NULL); + + return(v); + } + + return(NULL); +} diff --git a/src/locfit/math.c b/src/locfit/math.c new file mode 100644 index 0000000..a420542 --- /dev/null +++ b/src/locfit/math.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + miscellaneous functions that may not be defined in the math + libraries. The implementations are crude. + lflgamma(x) -- log(gamma(x)) + lferf(x) -- erf(x) + lferfc(x) -- erfc(x) + lfdaws(x) -- dawson's function + + where required, these must be #define'd in local.h. + + also includes + ptail(x) -- exp(x*x/2)*int_{-\infty}^x exp(-u^2/2)du for x < -6. + logit(x) -- logistic function. + expit(x) -- inverse of logit. + */ + +#include "local.h" + +double lferfc(); + +double lferf(x) +double x; +{ static double val[] = { 0.0, 0.52049987781304674, + 0.84270079294971501, 0.96610514647531076, 0.99532226501895282, + 0.99959304798255499, 0.99997790950300125 }; + double h, xx, y, z, f0, f1, f2; + int m, j; + if (x<0) return(-lferf(-x)); + if (x>3.2) return(1-lferfc(x)); + m = (int) (2*x+0.5); + xx= ((double)m)/2; + h = x-xx; y = h; + f0 = val[m]; + f1 = 2*exp(-xx*xx)/SQRPI; + z = f0+h*f1; + j = 0; + while (fabs(y)>1.0e-12) + { f2 = -2*j*f0-2*xx*f1; + f0 = f1; f1 = f2; + y *= h/(j+2); + z += y*f2; + j++; + } + return(z); +} + +double lferfc(x) +double x; +{ if (x<0) return(1+lferf(-x)); + if (x<2.5) return(1-lferf(x)); + return(exp(-x*x)/(x*SQRPI)); +} + +double lflgamma(x) +double x; +{ double x1; + static double ilg[] = { 0.0, 0.0, 0.69314718055994529, + 1.791759469228055, 3.1780538303479458, 4.7874917427820458, 6.5792512120101012, + 8.5251613610654147, 10.604602902745251, 12.801827480081469 }; + static double hlg[] = { 0.57236494292470008, -0.12078223763524520, + 0.28468287047291918, 1.20097360234707430, 2.45373657084244230, + 3.95781396761871650, 5.66256205985714270, 7.53436423675873360, + 9.54926725730099870, 11.68933342079726900 }; + + if (x<=0.0) return(0.0); + if (x<10) + { if (x==(int)x) return(ilg[(int)x-1]); + if ((x-0.5)==(int)(x-0.5)) return(hlg[(int)(x-0.5)]); + } + if (x<3) return(lflgamma(x+1)-log(x)); + + x1 = x-1; + return(HL2PI+(x1+0.5)*log(x1)-x1+1/(12*x1)); +} + +double lfdaws(x) +double x; +{ static double val[] = { + 0, 0.24485619356002, 0.46034428261948, 0.62399959848185, 0.72477845900708, + 0.76388186132749, 0.75213621001998, 0.70541701910853, 0.63998807456541, + 0.56917098836654, 0.50187821196415, 0.44274283060424, 0.39316687916687, + 0.35260646480842, 0.31964847250685, 0.29271122077502, 0.27039629581340, + 0.25160207761769, 0.23551176224443, 0.22153505358518, 0.20924575719548, + 0.19833146819662, 0.18855782729305, 0.17974461154688, 0.17175005072385 }; + double h, f0, f1, f2, y, z, xx; + int j, m; + if (x<0) return(-daws(-x)); + if (x>6) + { /* Tail series: 1/x + 1/x^3 + 1.3/x^5 + 1.3.5/x^7 + ... */ + y = z = 1/x; + j = 0; + while (((f0=(2*j+1)/(x*x))<1) && (y>1.0e-10*z)) + { y *= f0; + z += y; + j++; + } + return(z); + } + m = (int) (4*x); + h = x-0.25*m; + if (h>0.125) + { m++; + h = h-0.25; + } + xx = 0.25*m; + f0 = val[m]; + f1 = 1-xx*f0; + z = f0+h*f1; + y = h; + j = 2; + while (fabs(y)>z*1.0e-10) + { f2 = -(j-1)*f0-xx*f1; + y *= h/j; + z += y*f2; + f0 = f1; f1 = f2; + j++; + } + return(z); +} + +double ptail(x) /* exp(x*x/2)*int_{-\infty}^x exp(-u^2/2)du for x < -6 */ +double x; +{ double y, z, f0; + int j; + y = z = -1.0/x; + j = 0; + while ((fabs(f0= -(2*j+1)/(x*x))<1) && (fabs(y)>1.0e-10*z)) + { y *= f0; + z += y; + j++; + } + return(z); +} + +double logit(x) +double x; +{ return(log(x/(1-x))); +} + +double expit(x) +double x; +{ double u; + if (x<0) + { u = exp(x); + return(u/(1+u)); + } + return(1/(1+exp(-x))); +} + +int factorial(n) +int n; +{ if (n<=1) return(1.0); + return(n*factorial(n-1)); +} diff --git a/src/locfit/minmax.c b/src/locfit/minmax.c new file mode 100644 index 0000000..1dd7ec3 --- /dev/null +++ b/src/locfit/minmax.c @@ -0,0 +1,302 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * Compute minimax weights for local regression. + */ + +#include "local.h" + +int mmsm_ct; + +static int debug=0; +#define CONVTOL 1.0e-8 +#define SINGTOL 1.0e-10 +#define NR_SINGULAR 100 + +static lfit *mm_lf; +static design *mm_des; +static double mm_gam; + +double ipower(x,n) /* use for n not too large!! */ +double x; +int n; +{ if (n==0) return(1.0); + if (n<0) return(1/ipower(x,-n)); + return(x*ipower(x,n-1)); +} + +double setmmwt(des,lf,a,gam) +design *des; +lfit *lf; +double *a, gam; +{ double ip, w0, w1, sw, wt; + INT i, p; + sw = 0.0; + p = lf->mi[MP]; + for (i=0; imi[MN]; i++) + { ip = innerprod(a,d_xi(des,i),p); + wt = prwt(lf,i); + w0 = ip - gam*des->wd[i]; + w1 = ip + gam*des->wd[i]; + des->w[i] = 0.0; + if (w0>0) { des->w[i] = w0; sw += wt*w0*w0; } + if (w1<0) { des->w[i] = w1; sw += wt*w1*w1; } + } + return(sw/2-a[0]); +} + +/* compute sum_{w!=0} AA^T; e1-sum wA */ +int mmsums(coef,f,z,J) +double *coef, *f, *z; +jacobian *J; +{ int i, j, p, sing; + double *A; + + mmsm_ct++; + A = J->Z; + *f = setmmwt(mm_des,mm_lf,coef,mm_gam); + + p = mm_lf->mi[MP]; + setzero(A,p*p); + setzero(z,p); + z[0] = 1.0; + + for (i=0; imi[MN]; i++) + if (mm_des->w[i]!=0.0) + { addouter(A,d_xi(mm_des,i),d_xi(mm_des,i),p,prwt(mm_lf,i)); + for (j=0; jw[i]*mm_des->X[i*p+j]; + } + + J->st = JAC_RAW; + jacob_dec(J,JAC_EIGD); + + sing = 0; + for (i=0; iZ[i*p+i]xtwx.Z[i*p+i]xtwx.dg[sd]>0) + for (i=0; ixtwx.Q[p*i+sd]*des->xtwx.dg[i]; + else + { for (i=0; ixtwx); + + c0 = c1 = 0.0; + for (i=0; ixtwx.Z[i*p+j]*tmp[j]; + } + if (debug) printf("sdir: c0 %8.5f c1 %8.5f z %8.5f %8.5f tmp %8.5f %8.5f\n",c0,c1,z[0],z[1],tmp[0],tmp[1]); + if (c0<0) for (i=0; isw0-CONVTOL) /* go back one step */ + { f /= 2; + for (i=0; ixtwx); + if (st==NR_OK) return(0); + coef[0] *= 2; + if (coef[0]>1e8) return(1); + } +} + +int mmax(coef, old_coef, f1, delta, J, p, maxit, tol, err) +double *coef, *old_coef, *f1, *delta, tol; +int p, maxit, *err; +jacobian *J; +{ double f, old_f, lambda; + int i, j, fr, sing; + + *err = NR_OK; + J->p = p; + J->st = JAC_RAW; + fr = mmsums(coef,&f,f1,J); + + for (j=0; jst = JAC_RAW; + if (j==0) printf("init singular\n"); + f = updatesd(mm_des,mm_lf,delta,p,coef,old_coef,f,mm_gam); + fr = mmsums(coef,&f,f1,J); + } + else + { + jacob_solve(J,f1); + memcpy(delta,f1,p*sizeof(double)); + /* printf("delta %8.5f %8.5f\n",f1[0],f1[1]); */ + lambda = 1.0; + do + { + for (i=0; ist = JAC_RAW; + fr = mmsums(coef,&f,f1,J); + + lambda = lambda/2.0; + /* if (fr==NR_SINGULAR) printf("singular\n"); */ + } while (((lambda>0.000000001) & (f > old_f+0.001)) /* | (fr==NR_SINGULAR) */ ); + + if (f>old_f+0.001) { printf("lambda prob\n"); *err = NR_NDIV; return(f); } + + } + if (f==0.0) + { if (sing) printf("final singular - conv\n"); + return(f); + } + + if (debug) + { for (i=0; i0) & (fabs(f-old_f)mi[MP]; + + /* starting values for nr iteration */ + coef = mm_des->cf; + for (i=0; if1, p, coef)) + { WARN(("findab: initial value divergence")); + return(0.0); + } + else + mmax(coef, mm_des->oc, mm_des->res, mm_des->f1, + &mm_des->xtwx, p, mm_lf->mi[MMXIT], CONVTOL, &nr_stat); + + if (nr_stat != NR_OK) return(0.0); + + sl = 0.0; + for (i=0; imi[MN]; i++) sl += fabs(mm_des->w[i])*mm_des->wd[i]; + + return(sl-gam); +} + +double weightmm(coef,di,ff,mi,gam) +double *coef, di, *ff, gam; +INT *mi; +{ double y1, y2, ip; + ip = innerprod(ff,coef,mi[MP]); + y1 = ip-gam*di; if (y1>0) return(y1/ip); + y2 = ip+gam*di; if (y2<0) return(y2/ip); + return(0.0); +} + +double minmax(lf,des) +lfit *lf; +design *des; +{ double h, u[MXDIM], gam; + int i, j, m, d1, p1, err_flag; + + mmsm_ct = 0; + d1 = lf->mi[MDEG]+1; + p1 = factorial(d1); + for (i=0; imi[MN]; i++) + { for (j=0; jmi[MDIM]; j++) u[j] = datum(lf,j,i); + des->wd[i] = lf->dp[DALP]/p1*ipower(des->di[i],d1); + des->ind[i] = i; + fitfun(lf,u,des->xev,d_xi(des,i),NULL,(INT)0); + } + /* designmatrix(lf,des); */ + + /* find gamma (i.e. solve eqn 13.17 from book), using the secant method. + * As a side effect, this finds the other minimax coefficients. + * First, set some global pointers a, mm_lf, mm_des. + * Note that 13.17 is rewritten as + * g2 = sum |l_i(x)| (||xi-x||^(p+1) M/(s*(p+1)!)) + * where g2 = gamma * s * (p+1)! / M. The gam variable below is g2. + * The smoothing parameter is lf->dp[DALP] == M/s. + */ + mm_lf = lf; + mm_des = des; + gam = solve_secant(findab, 0.0, 0.0,1.0, 0.0000001, BDF_EXPRIGHT, &err_flag); + + /* + * Set the smoothing weights, in preparation for the actual fit. + */ + h = 0.0; m = 0; + for (i=0; imi[MN]; i++) + { des->w[m] = weightmm(des->cf, des->wd[i],&des->X[i*lf->mi[MP]],lf->mi,gam); + if (des->w[m]>0) + { if (des->di[i]>h) h = des->di[i]; + des->ind[m] = i; + m++; + } + } + des->n = m; + return(h); +} diff --git a/src/locfit/mutil.h b/src/locfit/mutil.h new file mode 100644 index 0000000..3a7d825 --- /dev/null +++ b/src/locfit/mutil.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1998-2000 Lucent Technologies. + * See README file for details. + * + * + * Headers for math utility functions. + */ + +#ifndef I_MUT_H +#define I_MUT_H + +typedef struct { + double *Z; /* jacobian matrix, length p*p */ + double *Q; /* eigenvalue matrix, length p*p */ + double *wk; /* work vector in eig_solve, length p */ + double *dg; /* diag vector in eigd, length p */ + int p; /* dimension */ + int st; /* status */ + int sm; /* requested decomposition */ +} jacobian; + +/* m_jacob.c */ +extern int jac_reqd(); +extern double *jac_alloc(); +extern void jacob_dec(), chol_dec(), eig_dec(); +extern int jacob_solve(), chol_solve(), eig_solve(); +extern int jacob_hsolve(),chol_hsolve(),eig_hsolve(); +extern double jacob_qf(), chol_qf(), eig_qf(); + +/* m_max.c */ +extern double max_grid(), max_golden(), max_quad(), max_nr(); + +/* solve.c */ +extern double solve_secant(), solve_nr(), solve_fp(); + +#define BDF_NONE 0 +#define BDF_EXPLEFT 1 +#define BDF_EXPRIGHT 2 + +/* return codes for functions optimized by max_nr */ +#define NR_OK 0 +#define NR_INVALID 1 +#define NR_BREAK 2 +#define NR_REDUCE 3 +#define NR_NCON 10 +#define NR_NDIV 11 + + +/* jacobian status definitions */ +#define JAC_RAW 0 +#define JAC_CHOL 1 +#define JAC_EIG 2 +#define JAC_EIGD 3 + + +#endif /* define I_MUT_H */ diff --git a/src/locfit/nbhd.c b/src/locfit/nbhd.c new file mode 100644 index 0000000..23b8fe4 --- /dev/null +++ b/src/locfit/nbhd.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * + * Functions for determining bandwidth; smoothing neighborhood + * and smoothing weights. + */ + +#include "local.h" + +extern vari *vb; + +double rho(x,sc,d,kt,sty) /* ||x|| for appropriate distance metric */ +double *x, *sc; +INT d, kt, *sty; +{ double rhoi[MXDIM], s; + INT i; + for (i=0; is) s = rhoi[i]; + } + return(s); + } + + if (kt==KSPH) + { for (i=0; i=i0) && (x[ind[r]]>piv)) r--; + if (l<=r) ISWAP(ind[l],ind[r]); + } /* now, x[ind[i0..r]] <= piv < x[ind[l..i1]] */ + if (r=xlim[j]) & (datum(lf,j,i)<=xlim[j+d])); + } + return(k); +} + +double compbandwid(di,ind,x,n,d,nn,fxh) +double *di, *x, fxh; +INT n, d, *ind, nn; +{ INT i; + double nnh; + +#ifdef CVERSION + if (nn<0) + return(dareval(vb,0,x)); +#endif + + if (nn<=0) return(fxh); + + if (nnmi[MN]; + x = des->xev[0]; + xd = dvari(lf,0); + + /* find closest data point to x */ + if (x<=xd[0]) z = 0; + else + if (x>=xd[n-1]) z = n-1; + else + { l = 0; r = n-1; + while (r-l>1) + { z = (r+l)/2; + if (xd[z]>x) r = z; + else l = z; + } + /* now, xd[0..l] <= x < x[r..n-1] */ + if ((x-xd[l])>(xd[r]-x)) z = r; else z = l; + } + /* closest point to x is xd[z] */ + + if (k>0) /* set h to nearest neighbor bandwidth */ + { l = r = z; + if (l==0) r = k-1; + if (r==n-1) l = n-k; + while (r-lx) z--; /* so xd[z]<=x */ + /* look left */ + for (i=z; i>=0; i--) if (inlim(lf,lf->xl,i,1)) + { des->di[i] = x-xd[i]; + des->w[m] = weight(lf,&xd[i],&x,h,1,des->di[i]); + if (des->w[m]>0) + { des->ind[m] = i; + m++; + } else i = 0; + } + /* look right */ + for (i=z+1; ixl,i,1)) + { des->di[i] = xd[i]-x; + des->w[m] = weight(lf,&xd[i],&x,h,1,des->di[i]); + if (des->w[m]>0) + { des->ind[m] = i; + m++; + } else i = n; + } + + des->n = m; + return(h); +} + +double nbhd(lf,des,nn,fxh,redo) +lfit *lf; +design *des; +double fxh; +INT redo, nn; +{ INT d, i, j, m, n, *mi; + double h, u[MXDIM]; + + mi = lf->mi; + + if (mi[MKT]==KCE) return(0.0); + d = mi[MDIM]; n = mi[MN]; + + /* ordered 1-dim; use fast searches */ + if ((nn<=n) & (lf->ord) & (mi[MKER]!=WMINM) & (lf->sty[0]!=STANGL)) + return(nbhd1(lf,des,nn,fxh)); + + if (!redo) + { for (i=0; ixev[j]; + des->di[i] = rho(u,lf->sca,d,mi[MKT],lf->sty); + des->ind[i] = i; + } + } + else + for (i=0; iind[i] = i; + + if (mi[MKER]==WMINM) return(minmax(lf,des)); + + h = compbandwid(des->di,des->ind,des->xev,n,mi[MDIM],nn,fxh); + + m = 0; + for (i=0; ixl,i,d)) + { for (j=0; jw[m] = weight(lf,u,des->xev,h,1,des->di[i]); + if (des->w[m]>0) + { des->ind[m] = i; + m++; + } + } + des->n = m; + return(h); +} diff --git a/src/locfit/pcomp.c b/src/locfit/pcomp.c new file mode 100644 index 0000000..ad2a749 --- /dev/null +++ b/src/locfit/pcomp.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * functions for computing and subtracting, adding the + * parametric component + */ + +#include "local.h" + +INT noparcomp(lf) +lfit *lf; +{ + INT tg; + if (lf->mi[MDEG0]mi[MDEG]) return(1); + if (lf->mi[MUBAS]) return(1); + tg = lf->mi[MTG] & 63; + if (tg<=THAZ) return(1); + if (tg==TROBT) return(1); + if (tg==TCAUC) return(1); + return(0); +} + +INT hasparcomp(lf) +lfit *lf; +{ + return(lf->mi[MPC]); +} + +int pc_reqd(d,p) +INT d, p; +{ + return(d + 2*p + jac_reqd(p)); +} + +void pcchk(pc,d,p,lc) +paramcomp *pc; +INT d, p, lc; +{ + //INT k; + double *z; + pc->wk = checkvarlen(pc->wk,pc_reqd(d,p),"_pcwork",VDOUBLE); + z = vdptr(pc->wk); + //k = 0; + + pc->xbar = z; z += d; + pc->coef = z; z += p; + pc->f = z; z += p; + + z = jac_alloc(&pc->xtwx,p,z); + pc->xtwx.p = p; +} + +void compparcomp(des,lf,nopc) +design *des; +lfit *lf; +INT nopc; +{ + INT i, j, k; + double wt, sw; + paramcomp *pc; + pc = &lf->pc; + pcchk(pc,lf->mi[MDIM],lf->mi[MP],1); + + if (pc == NULL) + { + fprintf(stderr, "Error: locfit cannot allocate pc working memory\n"); + pcchk(pc,lf->mi[MDIM],lf->mi[MP],1); + return; + } + + for (i=0; imi[MDIM]; i++) pc->xbar[i] = 0.0; + sw = 0.0; + for (i=0; imi[MN]; i++) + { + wt = prwt(lf,i); + sw += wt; + for (j=0; jmi[MDIM]; j++) + pc->xbar[j] += datum(lf,j,i)*wt; + des->ind[i] = i; + des->w[i] = 1.0; + } + for (i=0; imi[MDIM]; i++) pc->xbar[i] /= sw; + if ((nopc) || noparcomp(lf)) + { lf->mi[MPC] = 0; + return; + } + lf->mi[MPC] = 1; + des->xev = pc->xbar; + k = locfit(lf,des,0.0,0); + + if (lf_error) return; + switch(k) + { case LF_NOPT: + ERROR(("compparcomp: no points in dataset?")); + return; + case LF_INFA: + ERROR(("compparcomp: infinite parameters in param. component")); + return; + case LF_NCON: + ERROR(("compparcom: not converged")); + return; + case LF_OOB: + ERROR(("compparcomp: parameters out of bounds")); + return; + case LF_PF: + WARN(("compparcomp: perfect fit")); + case LF_OK: + for (i=0; imi[MP]; i++) + { pc->coef[i] = des->cf[i]; + pc->xtwx.dg[i] = des->xtwx.dg[i]; + pc->xtwx.wk[i] = des->xtwx.wk[i]; + } + for (i=0; imi[MP]*lf->mi[MP]; i++) + { pc->xtwx.Z[i] = des->xtwx.Z[i]; + pc->xtwx.Q[i] = des->xtwx.Q[i]; + } + pc->xtwx.sm = des->xtwx.sm; + pc->xtwx.st = des->xtwx.st; + return; + default: + ERROR(("compparcomp: locfit unknown return status %d",k)); + return; + } +} + +void subparcomp(des,lf,coef) +design *des; +lfit *lf; +double *coef; +{ INT i, *deriv, nd; + + if (!hasparcomp(lf)) return; + + deriv = lf->deriv; + nd = lf->nd; + fitfun(lf,des->xev,lf->pc.xbar,des->f1,deriv,nd); + coef[0] -= innerprod(lf->pc.coef,des->f1,lf->mi[MP]); + if (des->ncoef == 1) return; + + for (i=0; imi[MDIM]; i++) + { deriv[nd] = i; + fitfun(lf,des->xev,lf->pc.xbar,des->f1,deriv,nd+1); + coef[i+1] -= innerprod(lf->pc.coef,des->f1,lf->mi[MP]); + } +} + +void subparcomp2(des,lf,vr,il) +design *des; +lfit *lf; +double *vr, *il; +{ double t0, t1; + INT i, *deriv, nd, *mi; + + if (!hasparcomp(lf)) return; + + mi = lf->mi; + deriv = lf->deriv; + nd = lf->nd; + fitfun(lf,des->xev,lf->pc.xbar,des->f1,deriv,nd); + for (i=0; ipc.f[i] = des->f1[i]; + jacob_solve(&lf->pc.xtwx,des->f1); + t0 = sqrt(innerprod(lf->pc.f,des->f1,mi[MP])); + vr[0] -= t0; + il[0] -= t0; + if ((t0==0) | (des->ncoef==1)) return; + + for (i=0; ixev,lf->pc.xbar,lf->pc.f,deriv,nd+1); + t1 = innerprod(lf->pc.f,des->f1,mi[MP])/t0; + vr[i+1] -= t1; + il[i+1] -= t1; + } +} + +double addparcomp(lf,x,c) +lfit *lf; +double *x; +int c; +{ double y; + if (!hasparcomp(lf)) return(0.0); + fitfun(lf,x,lf->pc.xbar,lf->pc.f,lf->deriv,lf->nd); + if (c==PCOEF) return(innerprod(lf->pc.coef,lf->pc.f,lf->mi[MP])); + if ((c==PNLX)|(c==PT0)|(c==PVARI)) + { y = sqrt(jacob_qf(&lf->pc.xtwx,lf->pc.f)); + return(y); + } + return(0.0); +} diff --git a/src/locfit/pout.c b/src/locfit/pout.c new file mode 100644 index 0000000..04eab50 --- /dev/null +++ b/src/locfit/pout.c @@ -0,0 +1,780 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * + * print a plot structure in the format of various graph packages + */ + +#include "local.h" + +#ifdef CVERSION + +#define XCON(x) (INT)(i0+(i1-i0)*(x-px[0])/(px[1]-px[0])) +#define YCON(y) (INT)(k0+(k1-k0)*(y-py[0])/(py[1]-py[0])) + +extern double sin(), cos(), sqrt(), atan2(), ceil(), floor(), log10(), pow(); +static INT i0, i1, k0, k1, cw, ch; +#ifdef YAWN +static FILE *plf; +#endif +extern INT curwin, lfcm[10]; + +#ifdef NOPIPES +FILE *popen(const char *cmd,const char *mode) { return(NULL); } +int pclose(FILE *file) { return(0); } +#endif + +extern device devps, devwin; + +char f2(fmt) +char *fmt; +{ char *z; + z = strchr(fmt,','); + if (z==NULL) return('d'); + z++; + return(*z); +} + +INT pretty(xl,k,z) +double *xl, *z; +INT k; +{ double dlt, m; + INT i, j, u; + if (k<=0) return(0); + dlt = (xl[1]-xl[0])/k; + m = floor(log10(dlt)); + dlt *= pow(10.0,-m); + if (dlt<2) u = 2; + else if (dlt<5) u = 5; + else { u = 1; m++; } /* increments should be u*10^m; */ + dlt = u*pow(10.0,m); + i = (INT)ceil(xl[0]/dlt); + j = 0; + while ((jx; y = xyz->y; z = xyz->z; + if ((x!=NULL) & (y!=NULL) & (z!=NULL)) + { if ((x->n*y->n)==z->n) + { xyz->nx = x->n; + xyz->ny = y->n; + xyz->n = z->n; + xyz->t = 1; + return; + } + if ((x->n>1) & (y->n>1)) + { i = 0; n = z->n; + while ((in) && (vitem(y,0)==vitem(y,i))) i++; + if ((i>1) && (i*(n/i)==n)) + { xyz->nx = n/i; + xyz->ny = i; + xyz->n = n; + xyz->t = 1; + return; + } + } + } + xyz->t = 0; + xyz->n = 0; + if ((x!=NULL) && (x->n>xyz->n)) xyz->n = x->n; + if ((y!=NULL) && (y->n>xyz->n)) xyz->n = y->n; + if ((z!=NULL) && (z->n>xyz->n)) xyz->n = z->n; +} + +void getxyzitem(x,xyz,i) +plxyz *x; +double xyz[3]; +int i; +{ xyz[0] = vitem(x->x,i); + if (x->t==1) + xyz[1] = vitem(x->y,i/x->x->n); + else + xyz[1] = vitem(x->y,i); + xyz[2] = vitem(x->z,i); +} + +static double xl[2], yl[2], zl[2], px[2], py[2]; + +void project(z,x,theta,phi) +double *z, *x, theta, phi; +{ double z0, z1, z2; + z0 = (z[0]-xl[0])/(xl[1]-xl[0]); + z1 = (z[1]-yl[0])/(yl[1]-yl[0]); + z2 = (z[2]-zl[0])/(zl[1]-zl[0]); + x[0] = cos(theta)*z0-sin(theta)*z1; + x[1] = (sin(theta)*z0+cos(theta)*z1)*cos(phi)+sin(phi)*z2; +} + +void iproject(z,i,theta,phi) +double *z, theta, phi; +INT *i; +{ double x[2]; + project(z,x,theta,phi); + i[0] = XCON(x[0]); i[1] = YCON(x[1]); +} + +void line3d(z1,z2,theta,phi,dev,col) +double *z1, *z2, theta, phi; +device *dev; +INT col; +{ INT x1[2], x2[2]; + iproject(z1,x1,theta,phi); + iproject(z2,x2,theta,phi); + dev->SetColor(lfcm[col]); + dev->DrawLine(x1[0],x1[1],x2[0],x2[1]); +} + +void xyztext(tx,x,ah,av,theta,phi,dev,col) +double *x, theta, phi; +INT ah, av, col; +char *tx; +device *dev; +{ INT xy[2]; + iproject(x,xy,theta,phi); + dev->SetColor(lfcm[col]); + dev->DoText(0,xy[0],xy[1],tx,ah,av); +} + +int getgreylevel(z) +double z; +{ int c; + c = 8+11*(z-zl[0])/(zl[1]-zl[0]); + if (c<8) return(8); + if (c>18)return(18); + return(c); +} + +void points3d(x,theta,phi,dev,type) +plxyz *x; +double theta, phi; +char type; +device *dev; +{ INT i, xy[2]; + double xyz[3]; + for (i=0; in; i++) + { + getxyzitem(x,xyz,i); + iproject(xyz,xy,theta,phi); + if (type=='q') + dev->SetColor(getgreylevel(xyz[2])); + else + dev->SetColor(lfcm[CPOI]); + dev->DrawPoint(xy[0],xy[1],x->pch); + } +} + +void lines3d(xyz,theta,phi,dev) +plxyz *xyz; +double theta, phi; +device *dev; +{ INT i; + double x0[3], x1[3]; + getxyzitem(xyz,x0,0); + for (i=1; in; i++) + { if (i&1) + { getxyzitem(xyz,x1,i); + line3d(x0,x1,theta,phi,dev,CLIN); + } + else + { getxyzitem(xyz,x0,i); + line3d(x1,x0,theta,phi,dev,CLIN); + } + } +} + +void segments(xyz0,xyz1,theta,phi,dev) +plxyz *xyz0, *xyz1; +double theta, phi; +device *dev; +{ INT i, n; + double x0[3], x1[3]; + n = xyz0->n; + if (xyz1->n>n) n = xyz1->n; + for (i=0; it==0) ERROR(("Contour: not a grid")); + if (lf_error) return; + if (nsl==0) nsl = pretty(zl,10,sl); + nx = x->nx; ny = x->ny; + for (k=0; k0)+4*(u[2]>0)+2*(u[1]>0)+(u[0]>0); + switch(s) + { case 0: + case 15: break; + case 1: + case 14: + line3d(x0,x3,theta,phi,dev,CCON); + break; + case 2: + case 13: + line3d(x0,x1,theta,phi,dev,CCON); + break; + case 3: + case 12: + line3d(x3,x1,theta,phi,dev,CCON); + break; + case 4: + case 11: + line3d(x3,x2,theta,phi,dev,CCON); + break; + case 5: + case 10: + line3d(x0,x2,theta,phi,dev,CCON); + break; + case 6: + case 9: + line3d(x0,x1,theta,phi,dev,CCON); + break; + case 7: + case 8: + line3d(x1,x2,theta,phi,dev,CCON); + break; + default: ERROR(("severe contouring error...")); + } + } + } +} + +double angle(x0,x1,x2) /* rotation angle from (x0,x1) to (x0,x2) */ +double *x0, *x1, *x2; +/* If x0=0, ||x1=1|| then express + x2 = u x1 + v y1 where y1=(-x11,x10) = 90 deg anticlk rot. + i.e. u = v = + tan(theta) = v/u + atan2(v,u) returns positive for anticlkws rot; + negative for clockwise rot. +*/ +{ double u, v; + u = (x1[0]-x0[0])*(x2[0]-x0[0]) + (x1[1]-x0[1])*(x2[1]-x0[1]); + v = -(x1[1]-x0[1])*(x2[0]-x0[0]) + (x1[0]-x0[0])*(x2[1]-x0[1]); + return(atan2(v,u)); +} + +void persp3d(xyz,theta,phi,DP,sl,nsl) +plxyz *xyz; +double theta, phi, *sl; +void (*DP)(); +INT nsl; +{ INT i, j, k, m, nx, ny, ii, jj, cb, cp, cx[4], cy[4], xhi, yhi; + double u[4][3], w[4][2], r; + if (xyz->t==0) ERROR(("persp3d: not a grid")); + if (lf_error) return; + /* theta=135 --- 225 + | | + 45 --- 315; + i.e start at top right for theta=45 e.t.c. Reverse if sin(phi)<0. + x starts hi if cos(theta)>0 + y starts hi if sin(theta)>0 + */ + xhi = (cos(theta)*sin(phi)) > 0; + yhi = (sin(theta)*sin(phi)) > 0; + nx = xyz->nx; ny = xyz->ny; + for (i=0; i1); + jj += (k==1)+(k==2); + m = jj*nx+ii; + getxyzitem(xyz,u[k],m); + project(u[k],w[k],theta,phi); + cx[k] = XCON(w[k][0]); cy[k] = YCON(w[k][1]); + } + + switch(xyz->type) + { case 'w': + /* wireframe: + from top, use color CPA1 for border, CPA2 for patch + angles are anti-clock from top; r approx 2pi + clock from bot; r approx -2pi + */ + r = angle(w[0],w[3],w[1])+angle(w[1],w[0],w[2]) + +angle(w[2],w[1],w[3])+angle(w[3],w[2],w[0]); + if (r>0) { cb = lfcm[CPA1]; cp = lfcm[CPA2]; } + else { cp = lfcm[CPA1]; cb = lfcm[CPA2]; } + DP(cx,cy,cp,cb); + break; + case 'i': /* image */ + if (nsl==0) nsl = pretty(zl,10,sl); + r = u[0][2] + u[1][2] + u[2][2] + u[3][2]; + cb = cp = getgreylevel(r/4); + DP(cx,cy,cp,cb); + break; + } + } +} + +void updatelim(v,xl) +vari *v; +double *xl; +{ INT i; + if ((v==NULL) || (vlength(v)==0)) return; + if (xl[0]==xl[1]) + xl[0] = xl[1] = vitem(v,0); + for (i=0; ixl[1]) xl[1] = vitem(v,i); + } +} + +void axis(z1,z2,zl,lab,theta,phi,a,s,dev) +double *z1, *z2, *zl, theta, phi; +char *lab; +INT a, s; +device *dev; +{ double x1[3], x2[3], z[50]; + INT i, u0, u1, v0, v1, n, horiz; + char lb[20]; + dev->SetColor(lfcm[CAXI]); + project(z1,x1,theta,phi); + project(z2,x2,theta,phi); + u0 = XCON(x1[0]); v0 = XCON(x2[0]); + u1 = YCON(x1[1]); v1 = YCON(x2[1]); + horiz = abs(v0-u0)>abs(v1-u1); + dev->DrawLine(u0,u1,v0,v1); + n = (INT)sqrt((double)((v0-u0)*(v0-u0)+(v1-u1)*(v1-u1)))/((horiz) ? 5*cw : 2*ch); + if (n>50) n = 50; + n = pretty(zl,n,z); + if (n==0) return; + x1[0] = z1[0]; x1[1] = z1[1]; x1[2] = z1[2]; + if (abs(v0-u0)>abs(v1-u1)) /* horizontal axis */ + { dev->SetColor(lfcm[CTEX]); + if (lab!=NULL) + dev->DoText(0,(u0+v0)/2,(u1+v1)/2+s*(dev->ticklength+ch),lab,0,s); + for (i=0; iticklength; + sprintf(lb,"%g",z[i]); + dev->SetColor(lfcm[CAXI]); + dev->DrawLine(u0,u1,v0,v1); + dev->SetColor(lfcm[CTEX]); + dev->DoText(0,v0,v1,lb,0,s); + } } + else /* vertical axis */ + { s = 2*((2*v0)>(i0+i1))-1; + dev->SetColor(lfcm[CTEX]); + if (lab!=NULL) + dev->DoText(0,v0,v1-ch,lab,-s,-1); + for (i=0; iticklength; v1 = u1; + sprintf(lb,"%g",z[i]); + dev->SetColor(lfcm[CAXI]); + dev->DrawLine(u0,u1,v0,v1); + dev->SetColor(lfcm[CTEX]); + dev->DoText(0,v0,v1,lb,-s,0); + } } +} + +void plotxwin(pl,dev,wn,w,h,rd) +plots *pl; +device *dev; +INT wn, w, h, rd; +{ INT i, j, k, s; + double z[3], z2[3], xx[2], vx, vy, vz; + static double theta, phi; + plxyz *xyz; + if (pl->ty==PLNONE) return; + if (h<=0) h = dev->defth; + if (w<=0) w = dev->deftw; + if (!dev->makewin(&w,&h,wn,rd)) return; + dev->TextDim(0,"0",&cw,&ch); + i0 = 4*cw+dev->ticklength; i1 = w-2*cw; + k0 = h-3*ch-dev->ticklength; k1 = 3*ch; + dev->ClearScreen(lfcm[CBAK]); + if (pl->xl[0]xl[1]) + { xl[0] = pl->xl[0]; xl[1] = pl->xl[1]; } + else + { xl[0] = xl[1] = 0.0; + for (i=0; ixyzs->n; i++) + { xyz = (plxyz *)viptr(pl->xyzs,i); + updatelim(xyz->x,xl); + } + if (xl[0]==xl[1]) { xl[0] -= 0.5; xl[1] += 0.5; } + } + if (pl->yl[0]yl[1]) + { yl[0] = pl->yl[0]; yl[1] = pl->yl[1]; } + else + { yl[0] = yl[1] = 0.0; + for (i=0; ixyzs->n; i++) + { xyz = (plxyz *)viptr(pl->xyzs,i); + updatelim(xyz->y,yl); + } + if (yl[0]==yl[1]) { yl[0] -= 0.5; yl[1] += 0.5; } + } + if (pl->zl[0]zl[1]) + { zl[0] = pl->zl[0]; zl[1] = pl->zl[1]; } + else + { zl[0] = zl[1] = 0.0; + for (i=0; ixyzs->n; i++) + { xyz = (plxyz *)viptr(pl->xyzs,i); + updatelim(xyz->z,zl); + } + if (zl[0]==zl[1]) { zl[0] -= 0.5; zl[1] += 0.5; } + } + theta = pl->theta*PI/180; phi = pl->phi*PI/180; + vx = -sin(theta)*sin(phi); + vy = -cos(theta)*sin(phi); + vz = cos(phi); + + for (i=0; i<2; i++) + for (j=0; j<2; j++) + for (k=0; k<2; k++) + { z[0] = xl[i]; z[1] = yl[j]; z[2] = zl[k]; + project(z,xx,theta,phi); + if ((i+j+k==0) | (xx[0]px[1])) px[1]=xx[0]; + if ((i+j+k==0) | (xx[1]py[1])) py[1]=xx[1]; + } + s = 1-2*((cos(phi)<0)^(sin(phi)<0)); + z[0] = xl[0]; z2[0] = xl[1]; + z[1] = z2[1] = yl[(cos(theta)<0)^(sin(phi)<0)]; + z[2] = z2[2] = zl[cos(phi)<0]; + axis(z,z2,xl,pl->xlab,theta,phi,0,s,dev); + z[0] = z2[0] = xl[(sin(theta)<0)^(sin(phi)<0)]; + z[1] = yl[0]; z2[1] = yl[1]; + z[2] = z2[2] = zl[cos(phi)<0]; + axis(z,z2,yl,pl->ylab,theta,phi,1,s,dev); + z[0] = z2[0] = xl[cos(theta)<0]; + z[1] = z2[1] = yl[sin(theta)>0]; + z[2] = zl[0]; z2[2] = zl[1]; + axis(z,z2,zl,pl->zlab,theta,phi,2,s,dev); + if (strlen(pl->main)>0) dev->DoText(1,(i0+i1)/2,2*ch,pl->main,0,-1); + + for (i=0; ixyzs->n; i++) + { xyz = viptr(pl->xyzs,i); + isgrid(xyz); + switch (xyz->type) + { case 'c': + contour3d(xyz,theta,phi,dev,pl->sl,pl->nsl); + break; + case 'i': + persp3d(xyz,theta,phi,dev->DrawPatch,pl->sl,pl->nsl); + break; + case 'b': + points3d(xyz,theta,phi,dev,'p'); + case 'l': + lines3d(xyz,theta,phi,dev); + break; + case 'p': + points3d(xyz,theta,phi,dev,'p'); + break; + case 'q': + points3d(xyz,theta,phi,dev,'q'); + break; + case 's': + if (i==0) { ERROR(("invalid segements")); } + else + segments(viptr(pl->xyzs,i-1),xyz,theta,phi,dev); + break; + case 'w': + persp3d(xyz,theta,phi,dev->DrawPatch,pl->sl,pl->nsl); + break; + } } + + dev->wrapup(rd); +} + +#ifdef YAWN +void plotmaple(pl) +plots *pl; +{ INT i, j; + plf = fopen("lfplot","w"); + switch(pl->d) + { case 1: + fprintf(plf,"PLOT(\n"); + for (j=0; jr; j++) + { fprintf(plf,"CURVES([\n"); + for (i=0; imx[j]; i++) + { if (i>0) fprintf(plf,",\n"); + fprintf(plf,"[%f,%f]",pl->x[j][i],pl->y[j][i]); + } + fprintf(plf,"],\nCOLOUR(RGB,0,0,0)),\n"); + } + if (pl->type[0]=='p') fprintf(plf,"STYLE(POINT),"); + if (pl->main!=NULL) fprintf(plf,"TITLE(%s),",pl->main); + fprintf(plf,"AXESLABELS(%s,%s)",pl->xlab,pl->ylab); + fprintf(plf,");\n"); + + break; + case 2: + fprintf(plf,"PLOT3D(GRID(%f..%f,%f..%f,[[\n",pl->x[0][0],pl->x[0][pl->mx[0]-1],pl->y[0][0],pl->y[0][pl->my[0]-1]); + for (i=0; imx[0]; i++) + { if (i>0) fprintf(plf,"],\n["); + for (j=0; jmy[0]; j++) + { if (j>0) fprintf(plf,",\n"); + fprintf(plf,"%f",pl->z[0][i*pl->my[0]+j]); + } + } + fprintf(plf,"]]),\nAXESLABELS(%s,%s,%s),AXESSTYLE(FRAME)",pl->xlab,pl->ylab,pl->zlab); + if (pl->type[0]=='c') fprintf(plf,",STYLE(CONTOUR),CONTOURS(DEFAULT),ORIENTATION(-90,0.1),COLOUR(ZSHADING)"); + if (pl->main!=NULL) fprintf(plf,",\nTITLE(%s)\n",pl->main); + fprintf(plf,");\n"); + break; + } + fclose(plf); + printf("Created lfplot file; Maple format.\n"); +} + +void plotmathe(pl,fmt) +plots *pl; +char *fmt; +{ INT i, j, aut; + static FILE *plm=NULL; + aut = f2(fmt)!='m'; +#ifdef NOPIPES + aut = 0; +#endif + if (aut) + { if (plm==NULL) plm = (FILE *)popen("math >/dev/null","w"); + plf = plm; + } + else + plf = fopen("lfplot","w"); + switch(pl->d) + { case 1: + fprintf(plf,"ListPlot[{{\n"); + for (i=0; imx[0]; i++) + { if (i>0) fprintf(plf,"},\n{"); + fprintf(plf,"%f,%f",pl->x[0][i],pl->y[0][i]); + } + fprintf(plf,"}}"); + fprintf(plf,",AxesLabel->{%s,%s}",pl->xlab,pl->ylab); + if (pl->type[0]=='l') fprintf(plf,",PlotJoined->True"); + break; + case 2: + if (pl->type[0]=='c') fprintf(plf,"ListContourPlot[{{"); + else fprintf(plf,"ListPlot3D[{{"); + for (j=0; jmy[0]; j++) + { if (j>0) fprintf(plf,"},\n{"); + for (i=0; imx[0]; i++) + { if (i>0) fprintf(plf,",\n"); + fprintf(plf,"%f",pl->z[0][i*pl->my[0]+j]); + } + } + fprintf(plf,"}},\nMeshRange->{{"); + fprintf(plf,"%f,%f},{%f,%f}}\n",pl->x[0][0],pl->x[0][pl->mx[0]-1],pl->y[0][0],pl->y[0][pl->my[0]-1]); + if (pl->type[0]=='c') fprintf(plf,",FrameLabel->{%s,%s}",pl->xlab,pl->ylab); + else fprintf(plf,",AxesLabel->{%s,%s,%s}",pl->xlab,pl->ylab,pl->zlab); + break; + } + if (pl->main!=NULL) fprintf(plf,",PlotLabel->%s\n",pl->main); + fprintf(plf,"];\n"); + if (aut) + fflush(plf); + else + { fclose(plf); + printf("Created lfplot file; Mathematica format.\n"); + } +} + +void plotmatlb(pl) /* Matlab */ +plots *pl; +{ INT i, j; + plf = fopen("lfplot.m","w"); + switch(pl->d) + { case 1: + fprintf(plf,"plot(["); + for (i=0; imx[0]; i++) + { if (i>0) putc(',',plf); + fprintf(plf,"%f",pl->x[0][i]); + } + fprintf(plf,"],["); + for (i=0; imx[0]; i++) + { if (i>0) putc(',',plf); + fprintf(plf,"%f",pl->y[0][i]); + } + fprintf(plf,"])\n"); + break; + case 2: + if (pl->type[0]=='c') fprintf(plf,"contour(["); + else fprintf(plf,"mesh(["); + for (i=0; imy[0]; i++) fprintf(plf,"%f ",pl->y[0][i]); + fprintf(plf,"],["); + for (i=0; imx[0]; i++) fprintf(plf,"%f ",pl->x[0][i]); + fprintf(plf,"],[\n"); + for (j=0; jmy[0]; j++) + { fprintf(plf,"["); + for (i=0; imx[0]; i++) + fprintf(plf,"%f ",pl->z[0][i*pl->my[0]+j]); + fprintf(plf,"]\n"); + } + fprintf(plf,"])\n"); + fprintf(plf,"xlabel('%s')\n",pl->xlab); + fprintf(plf,"ylabel('%s')\n",pl->ylab); + break; + default: ERROR(("plotmatlb: invalid dimension %d",pl->d)); + } + if (pl->main!=NULL) fprintf(plf,"title('%s')\n",pl->main); + fclose(plf); + printf("Created lfplot.m file; matlab format.\n"); +} + +void plotgnup(pl,fmt) +plots *pl; +char *fmt; +{ INT i, j, z, aut; + char m; + static FILE *plc=NULL; + + /* first, the data file */ + plf=fopen("lfplot.dat","w"); + switch(pl->d) + { case 1: + z = pl->mx[0]; + for (j=0; jr; j++) if (pl->mx[j]>z) z = pl->mx[j]; + for (i=0; ir; j++) + if (imx[j]) + fprintf(plf,"%f %f ",pl->x[j][i],pl->y[j][i]); + else + fprintf(plf,"%f %f ",pl->x[j][pl->mx[j]-1],pl->y[j][pl->mx[j]-1]); + fprintf(plf,"\n"); + } + break; + case 2: + for (j=0; jmy[0]; j++) + { for (i=0; imx[0]; i++) + fprintf(plf,"%f %f %f\n",pl->x[0][i],pl->y[0][j],pl->z[0][i*pl->my[0]+j]); + fprintf(plf,"\n"); + } + } + fclose(plf); + + /* second, the command file */ + m = f2(fmt); + aut = (m!='m'); +#ifdef NOPIPES + aut = 0; +#endif + if (aut) + { if ((m=='s') && (plc!=NULL)) pclose(plc); + if ((m=='s') || (plc==NULL)) + plc = (FILE *)popen("gnuplot","w"); + plf = plc; + } + else plf = fopen("lfplot","w"); + switch(pl->d) + { case 1: + fprintf(plf,"set nokey\n"); + fprintf(plf,"set xlab \"%s\"\n",pl->xlab); + fprintf(plf,"set ylab \"%s\"\n",pl->ylab); + if (pl->main != NULL) + fprintf(plf,"set title \"%s\"\n",pl->main); + fprintf(plf,"plot "); + for (i=0; ir; i++) + { if (i>0) fprintf(plf,", "); + fprintf(plf,"\"lfplot.dat\" using %d:%d ",2*i+1,2*i+2); + switch(pl->type[i]) + { case 'l': fprintf(plf,"with lines"); break; + case 'p': fprintf(plf,"with points"); break; + case 'b': fprintf(plf,"with linespoints"); break; + } + } + fprintf(plf,"\n"); + break; + case 2: + fprintf(plf,"set xlab \"%s\"\n",pl->xlab); + fprintf(plf,"set ylab \"%s\"\n",pl->ylab); + fprintf(plf,"set zlab \"%s\"\n",pl->zlab); + if (pl->type[0]=='c') + { fprintf(plf,"set contour\n"); + fprintf(plf,"set nosurface\n"); + fprintf(plf,"set key\n"); + } + else + { fprintf(plf,"set nocontour\n"); + fprintf(plf,"set surface\n"); + fprintf(plf,"set nokey\n"); + } + fprintf(plf,"set view %g,%g\n",pl->phi,pl->theta); + fprintf(plf,"set parametric\n"); + if (pl->main != NULL) + fprintf(plf,"set title \"%s\"\n",pl->main); + fprintf(plf,"splot \"lfplot.dat\" with lines\n"); + break; + } + if (aut) + fflush(plf); + else + { fclose(plf); + printf("Created lfplot, lfplot.dat files; gnuplot format.\n"); + } +} +#endif + +#endif diff --git a/src/locfit/preplot.c b/src/locfit/preplot.c new file mode 100644 index 0000000..2e355c4 --- /dev/null +++ b/src/locfit/preplot.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +/* + preplot(): interpolates the fit to a new set of points. + lf -- the fit structure. + des -- design structure; really only needed for parfit. + x -- the points to predict at. + f -- vector to return the predictions. + se -- vector to return std errors (NULL if not req'd) + band-- char for conf band type. ('n'=none, 'g'=global etc.) + n -- no of predictions (or vector of margin lengths for grid) + where -- where to predict: + 1 = points in the array x. + 2 = grid defined by margins in x. + 3 = data points from lf (ignore x). + 4 = fit points from lf (ignore x). + what -- what to predict. + (PCOEF etc; see lfcons.h file) + + cpreplot(): C version front end. + setpppoints(): (C version) used to set preplot points. + */ + +static char cb; +double *sef, *fit, sigmahat; + +void predptall(lf,des,x,what,ev,i) +lfit *lf; +design *des; +double *x; +INT what, ev, i; +{ double lik, rdf; + fit[i] = dointpoint(lf,des,x,what,ev,i); + if (cb=='n') return; + sef[i] = dointpoint(lf,des,x,PNLX,ev,i); + if (cb=='g') + { sef[i] *= sigmahat; + return; + } + if (cb=='l') + { lik = dointpoint(lf,des,x,PLIK,ev,i); + rdf = dointpoint(lf,des,x,PRDF,ev,i); + sef[i] *= sqrt(-2*lik/rdf); + return; + } + if (cb=='p') + { sef[i] = sigmahat*sqrt(1+sef[i]*sef[i]); + return; + } +} + +void prepvector(lf,des,x,n,what) /* interpolate a vector */ +lfit *lf; +design *des; +double **x; +INT n, what; +{ INT i, j; + double xx[MXDIM]; + for (i=0; imi[MDIM]; j++) xx[j] = x[j][i]; + predptall(lf,des,xx,what,lf->mi[MEV],i); + if (lf_error) return; + } +} + +void prepfitp(lf,des,what) +lfit *lf; +design *des; +INT what; +{ + INT i; + //d = lf->mi[MDIM]; + for (i=0; inv; i++) + { predptall(lf,des,evpt(lf,i),what,EFITP,i); + if (lf_error) return; + } +} + +void prepgrid(lf,des,x,mg,n,what) /* interpolate a grid given margins */ +design *des; +lfit *lf; +double **x; +INT *mg, n, what; +{ INT i, ii, j, d; + double xv[MXDIM]; + d = lf->mi[MDIM]; + for (i=0; imi[MEV],i); + if (lf_error) return; + } +} + +void preplot(lf,des,x,f,se,band,mg,where,what) +lfit *lf; +design *des; +double **x, *f, *se; +INT *mg, where, what; +char band; +{ INT d = 0, i, n; + double *xx[MXDIM]; + d = lf->mi[MDIM]; + fit = f; + sef = se; + cb = band; + if (cb!='n') sigmahat = sqrt(lf->dp[DRV]); + + switch(where) + { case 1: /* vector */ + n = mg[0]; + prepvector(lf,des,x,n,what); + break; + case 2: /* grid */ + n = 1; + for (i=0; imi[MN]; + if ((lf->mi[MEV]==EDATA) | (lf->mi[MEV]==ECROS)) + prepfitp(lf,des,what); + else + { for (i=0; inv; + prepfitp(lf,des,what); + break; + default: + ERROR(("unknown where in preplot")); + return; + } + + if ((what==PT0)|(what==PVARI)) + for (i=0; id; j++) + { data[j] = vdptr(pp->data[j]); + mg[j] = pp->data[j]->n; + } + j = getarg(vc,"what",0); + pp->wh = (j>0) ? ppwhat(argval(vc,j)) : PCOEF; + + preplot(&lf,&des,data,vdptr(pp->fit),vdptr(pp->se),band,mg,pp->gr,pp->wh); +} + +INT setpppoints(pp,where,mg,xl) +pplot *pp; +char *where; +INT *mg; +double *xl; +{ INT d, i, j, n, m; + varname vn; + d = pp->d = lf.mi[MDIM]; + if (strcmp(where,"fitp")==0) + { n = lf.nv; + for (j=0; jdata[j] = createvar(vn,STPLOTVAR,n,VDOUBLE); + if (lf_error) return(0); + for (i=0; idata[j],i,evptx(&lf,i,j)); + } + pp->gr = 4; + return(n); + } + if (strcmp(where,"data")==0) + { recondat(1,&n); + for (j=0; jdata[j] = createvar(vn,STPLOTVAR,n,VDOUBLE); + if (lf_error) return(0); + for (i=0; idata[j],i,datum(&lf,j,i)); + } + pp->gr = 3; + return(n); + } + if (strcmp(where,"grid")==0) + { n = 1; + for (j=0; jdata[j] = createvar(vn,STPLOTVAR,m,VDOUBLE); + if (lf_error) return(0); + if (m==1) + vassn(pp->data[j],0,(xl[d+j]+xl[j])/2); + else + for (i=0; idata[j],i,xl[j]+i*(xl[d+j]-xl[j])/(m-1)); + n *= m; + pp->gr = 2; + } + return(n); + } + ERROR(("setpppoints: invalid where=%s",where)); + return(0); +} + +#endif diff --git a/src/locfit/random.c b/src/locfit/random.c new file mode 100644 index 0000000..70af7f5 --- /dev/null +++ b/src/locfit/random.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +#define PI_HALF 1.5707963267948966192313216916397514420986 /*pi/2*/ +#define PI_QUARTER 0.7853981633974483096156608458198757210493 /*pi/4*/ +#define EXP78 1.0129030479320018583185514777512982888868 /*e^(1/78)*/ +#define PI128 40.7436654315252059568342434233636766808217 /*128/pi*/ + +static unsigned long cc, tv, ss=1; + +double runif() +{ if (ss) + { WARN(("runif: No seed set.")); + return(0.0); + } + cc = cc * 69069; /* congruential part */ + tv ^= tv >> 15; /* tausworthe part */ + tv ^= tv << 17; + return(((tv ^ cc) >> 1) / 2147483648.0); +} + +void rseed(seed) +/* + Seed should be string of at least 8 characters. +*/ +char *seed; +{ ss = 0; + tv = seed[0]; + tv = (tv<<8) | seed[1]; + tv = (tv<<8) | seed[2]; + tv = (tv<<8) | seed[3]; + cc = seed[4]; + cc = (cc<<8) | seed[5]; + cc = (cc<<8) | seed[6]; + cc = (cc<<8) | seed[7]; + if(cc % 2 == 0) + cc++; +} + +/* + * Gaussian random variable. + * Reference: Kinderman & Monahan, Proceedings of + * the ASA, Statistical Computing Section, 1975, 128-131. + */ +double rnorm(mu,s) +double mu, s; +{ + double rnormk, u, x2; + + do { + u = runif(); + rnormk = 1.715527769 * (runif()-0.5) / u; + x2 = rnormk * rnormk / 4; + } while((x2>1-u) || (x2 > -log(u))); + return(mu+s*rnormk); +} + +double rexp(lb) +double lb; +{ return(-log(runif())/lb); +} + +/* + * Poisson random variable. + * Simple algorithm for small lambda, else complex algorithm. + * Crossover point must be at least 5 for the complex algorithm + * to work correctly. + * Reference: Devroye, pages 504, 511 and 516 (with corrections!) + */ +double rpois(lambda) +double lambda; +{ + static double olambda = -1, a, mu, delta, d, c1, c2, c3, c4, c5; + double u, e, n, x, y, w, t, p, q; + int new = lambda != olambda; + + olambda = lambda; + if(lambda < 8) { + if(new) + a = exp(-lambda); + q = 1; + x = -1; + do { + q *= runif(); + x++; + } while(q >= a); + return(x); + } + + if(new) { + mu = floor(lambda); + delta = sqrt(2 * mu * log(mu * PI128)); + delta = MAX(6.0, MIN(mu, floor(delta))); + d = 2*mu + delta; + c1 = sqrt(mu * PI_HALF); + c2 = c1 + sqrt(d * PI_QUARTER) * exp(1/d); + c3 = c2 + 1; + c4 = c3 + EXP78; + c5 = c4 + 2 * d * exp(-delta*(1+delta/2)/d) / delta; + } + while(1) { + u = c5 * runif(); + e = -log(runif()); + if(u <= c1) { + n = rnorm(0.0,1.0); + x = floor(-fabs(n) * sqrt(mu)); + if(x < -mu) + continue; + w = n*n/2 + e + x*log(lambda/mu); + } else if(u <= c2) { + y = 1 + fabs(rnorm(0.0,1.0)) * sqrt(d/2); + x = ceil(y); + if(x > delta) + continue; + w = y*(y-2)/d + e + x*log(lambda/mu); + } else if(u <= c3) { + x = 0; + w = e; + } else if(u <= c4) { + x = 1; + w = e + log(lambda/mu); + } else { + y = delta - 2*d*log(runif())/delta; + x = ceil(y); + w = delta*(1+y/2)/d + e + x*log(lambda/mu); + } + w = -w; + t = x*(x+1) / (2*mu); + if(x >= 0 && w <= -t) + return(x+mu); + if(x < 0 && w > -t) + continue; + q = t * ((2*x+1)/(6*mu) - 1); + if(w > q) + continue; + p = x+1 <= 0 ? x+1 : 0; + p = q - t*t/(3*(mu+p)); + if(w <= p) + return(x+mu); + if(w <= x*log(mu) - LGAMMA(mu+x+1) + LGAMMA(mu+1)) + return(x+mu); + } +} diff --git a/src/locfit/readfile.c b/src/locfit/readfile.c new file mode 100644 index 0000000..ac8be4b --- /dev/null +++ b/src/locfit/readfile.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * + * + * Function to read and create Locfit variables from an ASCII file. + * + * Command syntax: + * locfit> readfile filename v1 v2 v3 + * filename is the file to be read (no default extension or path is added). + * v1 v2 v3 etc are the names of the variables to create. + * + * File format: The file should be a plain ASCII file organized + * in matrix format, with one variable per column and one observation + * per row. Fields are separated by spaces. + * + */ + +#include "local.h" + +extern char filename[]; +static FILE *aaa; + +void readfile(vc) +vari *vc; +{ int i, j, k, n, nv; + char wc[50], *fn; + double *dpr; + vari *v; + + i = getarg(vc,"file",1); + if (i==0) + { ERROR(("readfile: no file")); + return; + } + + fn = argval(vc,i); + setfilename(fn,"","r",0); + if (lf_error) return; + + i = getarg(vc,"arith",0); /* now automatic - leave for backward compat. */ + + aaa = fopen(filename,"r"); + v = createvar("readfile",STREADFI,0,VDOUBLE); + + n = 0; + do + { k = fscanf(aaa,"%s",wc); + if (k==1) + { vassn(v,n,darith(wc)); + n++; + } + } while (k==1); + fclose(aaa); + dpr = vdptr(v); + deletevar(v); + + nv = 0; + for (i=1; if1; v2 = des->ss; wk = des->oc; + ispar = (lf->mi[MKER]==WPARM) && (hasparcomp(lf)); + p = lf->mi[MP]; + +/* for parametric models, the covariance is + * A(x1)^T (X^T W V X)^{-1} A(x2) + * which we can find easily from the parametric component. + */ + if (ispar) + { pc = &lf->pc; + fitfun(lf,&x1,pc->xbar,v1,NULL,0); + fitfun(lf,&x2,pc->xbar,v2,NULL,0); + jacob_hsolve(&lf->pc.xtwx,v1); + jacob_hsolve(&lf->pc.xtwx,v2); + } + +/* for non-parametric models, we must use the cholseky decomposition + * of M2 = X^T W^2 V X. Courtesy of comp_vari, we already have + * des->P = M2^{1/2} M1^{-1}. + */ + if (!ispar) + { fitfun(lf,&x1,des->xev,wk,NULL,0); + for (i=0; iP[i*p+j]*wk[j]; + } + fitfun(lf,&x2,des->xev,wk,NULL,0); + for (i=0; iP[i*p+j]*wk[j]; + } + } + + return(innerprod(v1,v2,p)); +} + +void cumulant(lf,des,sd) +lfit *lf; +design *des; +double sd; +{ double b2i, b3i, b3j, b4i; + double ss, si, sj, uii, uij, ujj, k1; + INT ii, i, j, jj, *mi; + for (i=1; i<10; i++) c[i] = 0.0; + k1 = 0; + mi = lf->mi; + + /* ss = sd*sd; */ + ss = covar_par(lf,des,des->xev[0],des->xev[0]); + +/* + * this isn't valid for nonparametric models. At a minimum, + * the sums would have to include weights. Still have to work + * out the right way. + */ + for (i=0; iind[i]; + b2i = b2(des->th[i],mi[MTG],prwt(lf,ii)); + b3i = b3(des->th[i],mi[MTG],prwt(lf,ii)); + b4i = b4(des->th[i],mi[MTG],prwt(lf,ii)); + si = covar_par(lf,des,des->xev[0],datum(lf,0,ii)); + uii= covar_par(lf,des,datum(lf,0,ii),datum(lf,0,ii)); + if (lf_error) return; + + c[2] += b4i*si*si*uii; + c[6] += b4i*si*si*si*si; + c[7] += b3i*si*uii; + c[8] += b3i*si*si*si; + /* c[9] += b2i*si*si*si*si; + c[9] += b2i*b2i*si*si*si*si; */ + k1 += b3i*si*(si*si/ss-uii); + + /* i=j components */ + c[1] += b3i*b3i*si*si*uii*uii; + c[3] += b3i*b3i*si*si*si*si*uii; + c[4] += b3i*b3i*si*si*uii*uii; + + for (j=i+1; jind[j]; + b3j = b3(des->th[j],mi[MTG],prwt(lf,jj)); + sj = covar_par(lf,des,des->xev[0],datum(lf,0,jj)); + uij= covar_par(lf,des,datum(lf,0,ii),datum(lf,0,jj)); + ujj= covar_par(lf,des,datum(lf,0,jj),datum(lf,0,jj)); + + c[1] += 2*b3i*b3j*si*sj*uij*uij; + c[3] += 2*b3i*b3j*si*si*sj*sj*uij; + c[4] += b3i*b3j*uij*(si*si*ujj+sj*sj*uii); + if (lf_error) return; + } + } + c[5] = c[1]; + c[7] = c[7]*c[8]; + c[8] = c[8]*c[8]; + + c[1] /= ss; c[2] /= ss; c[3] /= ss*ss; c[4] /= ss; + c[5] /= ss; c[6] /= ss*ss; c[7] /= ss*ss; + c[8] /= ss*ss*ss; c[9] /= ss*ss; + +/* constants used in p(x,z) computation */ + kap[1] = k1/(2*sqrt(ss)); + kap[2] = 1 + 0.5*(c[1]-c[2]+c[4]-c[7]) - 3*c[3] + c[6] + 1.75*c[8]; + kap[4] = -9*c[3] + 3*c[6] + 6*c[8] + 3*c[9]; + +/* constants used in q(x,u) computation */ + kaq[2] = c[3] - 1.5*c[8] - c[5] - c[4] + 0.5*c[7] + c[6] - c[2]; + kaq[4] = -3*c[3] - 6*c[4] - 6*c[5] + 3*c[6] + 3*c[7] - 3*c[8] + 3*c[9]; +} + +/* q2(u) := u+q2(x,u) in paper */ +double q2(u) +double u; +{ return(u-u*(36.0*kaq[2] + 3*kaq[4]*(u*u-3) + c[8]*((u*u-10)*u*u+15))/72.0); +} + +/* p2(u) := p2(x,u) in paper */ +double p2(u) +double u; +{ return( -u*( 36*(kap[2]-1+kap[1]*kap[1]) + + 3*(kap[4]+4*kap[1]*sqrt(kap[3]))*(u*u-3) + + c[8]*((u*u-10)*u*u+15) ) / 72 ); +} + +void procvscb2(des,lf,v) +design *des; +lfit *lf; +INT v; +{ double thhat, sd, *lo, *hi, u; + int err, tmp; + x = des->xev = evpt(lf,v); + tmp = lf->mi[MPC]; + if ((lf->mi[MKER]==WPARM) && (hasparcomp(lf))) + { lf->coef[v] = thhat = addparcomp(lf,des->xev,PCOEF); + lf->nlx[v] = sd = addparcomp(lf,des->xev,PNLX); + } + else + { lf->mi[MPC] = 0; + procv(des,lf,v); + thhat = lf->coef[v]; + sd = lf->nlx[v]; + } + if (type >= 2) + { if (lf->mi[MKER] != WPARM) + WARN(("nonparametric fit; correction is invalid")); + cumulant(lf,des,sd); + } + lf->mi[MPC] = tmp; + lo = vdptr(lf->L); + hi = &lo[lf->nvm]; + switch(type) + { case 0: + case 1: /* basic scr */ + lo[v] = thhat - scb_crit * sd; + hi[v] = thhat + scb_crit * sd; + return; + case 2: /* centered scr */ + lo[v] = thhat - kap[1]*sd - scb_crit*sd*sqrt(kap[2]); + hi[v] = thhat - kap[1]*sd + scb_crit*sd*sqrt(kap[2]); + return; + case 3: /* corrected 2 */ + u = solve_secant(q2,scb_crit,0.0,2*scb_crit,0.000001,BDF_NONE,&err); + lo[v] = thhat - u*sd; + hi[v] = thhat + u*sd; + return; + case 4: /* corrected 2' */ + u = fabs(p2(scb_crit)); + max_p2 = MAX(max_p2,u); + lo[v] = thhat; + hi[v] = thhat; + return; + } + ERROR(("procvscb2: invalid type")); +} + +void scb(des,lf) +design *des; +lfit *lf; +{ double kap[10], *lo, *hi; + INT i, *mi, nterms; + mi = lf->mi; + mi[MP] = calcp(mi,mi[MDEG]); + type = mi[MGETH] - 70; + deschk(des,mi[MN],mi[MP]); + des->pref = 0; + cvi = -1; /* inhibit cross validation */ + mi[MLINK] = defaultlink(mi[MLINK],mi[MTG]); + + if (type==0) + { kap[0] = 1; + scb_crit = critval(kap,1,0,0.05,10,2,0.0); + } + else + { compparcomp(des,lf,0); + nterms = constants(des,lf,kap); + scb_crit = critval(kap,nterms,mi[MDIM],0.05,10,2,0.0); + } + + max_p2 = 0.0; + startlf(des,lf,procvscb2,0); + if (type==4) + { lo = vdptr(lf->L); + hi = &lo[lf->nvm]; + for (i=0; inv; i++) + { + lo[i] -= (scb_crit-max_p2)*lf->nlx[i]; + hi[i] += (scb_crit-max_p2)*lf->nlx[i]; + } + } +} + +#ifdef CVERSION +extern lfit lf; +extern design des; +extern vari *aru; + +lfit *lf_sim; +design *des_sim; + +double scbsim_fun(x) +double x; +{ double y; + evptx(lf_sim,0,0) = x; + procv(des_sim,lf_sim,0); + + if (type>=2) + { if (lf_sim->mi[MKER] != WPARM) + WARN(("nonparametric fit; correction is invalid")); + cumulant(lf_sim,des_sim,lf_sim->nlx[0]); + } + + y = lf_link(dareval(aru,0,&x),lf_sim->mi[MLINK]); + y = (lf_sim->coef[0] - y) / lf_sim->nlx[0]; + + switch(type) + { + case 2: + y = (y-kap[1]) / sqrt(kap[2]); + break; + case 3: + y = (y-kap[1])/sqrt(kap[2]); + y = (y>0) ? y+q2(y) : y - q2(y); + break; + } + + switch(side) + { case -1: return(-y); + case 1: return(y); + default: return(fabs(y)); + } +} + +static double max; + +void do_scbsim(des,lf) +design *des; +lfit *lf; +{ double y; + int err; + + lf_sim = lf; + des_sim = des; + + trchck(lf,1,1,lf->mi[MDIM],lf->mi[MP],1); + y = max_quad(scbsim_fun,lf->fl[0],lf->fl[1],10,0.00001,&err,'y'); + max = y; +} + +void scbsim(lf,des) +lfit *lf; +design *des; +{ double kap[5]; + int nterms; + + lf->mi[MEV] = 100; + startlf(des,lf,scbsim_fun,1); + + nterms = constants(des,lf,kap); + printf("xmx: %10.6f max: %10.6f k0 %10.6f %10.6f pr %10.6f\n",0.0,max,kap[0],kap[1],tailp(max,kap,nterms,lf->mi[MDIM],0.0)); +} + +void cscbsim(v) +vari *v; +{ int i; + side = 0; type = 1; + fitoptions(&lf,v,0); + + i = getarg(v,"mean",1); + if (i==0) + { WARN(("cscbsim: no mean function; setting = 0")); + aru = arbuild("0",0,0,NULL,0,1); + } + else + { aru = arbuild(argval(v,i),0,strlen(argval(v,i))-1,NULL,0,1); + setvarname(aru,"_aru"); + } + + i = getarg(v,"corr",1); + if (i>0) type = getlogic(v,i); + if (lf_error) return; + + i = getarg(v,"side",1); + if (i>0) sscanf(argval(v,i),"%d",&side); + if (lf_error) return; + + scbsim(&lf,&des); +} +#endif diff --git a/src/locfit/scb_cons.c b/src/locfit/scb_cons.c new file mode 100644 index 0000000..49c8a04 --- /dev/null +++ b/src/locfit/scb_cons.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 1996-2001 Jiayang Sun, Catherine Loader. + * This file is used by the simultaneous confidence band + * additions to Locfit. + * + */ + +#include "local.h" + +extern INT cvi; + +static double *fd, *ft, *lij, *d1a; +static INT par; + +void assignk0(z,d,n) /* z should be n*(2*d*d+2*d+2); */ +double *z; +INT d, n; +{ d1a= z; z += d*d*n; + ft = z; z += n*(d*(d+1)+1); + fd = z; z += n*(d+1); +} + +void christ(d,nn,nl) /* lij[i][j] = res proj. of Tij to (T1...Td) */ +double nl; +INT d, nn; +{ INT i, j, k, l; + double p4, *ll, v[1+MXDIM]; + for (i=0; imi[MDIM]; + m = wdiag(lf,des,ft,1+(d>1),2,0); + lij = &ft[(d+1)*m]; + for (i=0; i1) + { christ(d,m,fd[0]); + d1(m,d); + for (j=0; jmi[MDIM]; u = des->res; + m = wdiag(lf,des,ft,2,2,0); + lij = &ft[(d+1)*m]; + for (i=0; imi[MDIM]; + m = wdiag(lf,des,ft,1,2,0); + for (i=0; imi; + d = mi[MDIM]; + if (lf_error) return(0); + if ((lf->mi[MKER] != WPARM) && (lf->dp[DALP]>0)) + WARN(("constants are approximate for varying h")); + mi[MP] = calcp(mi,mi[MDEG]); + deschk(des,mi[MN],mi[MP]); + preproc(des,lf,mi[MKER]!=WPARM); + nnn = (ident==1) ? lf->mi[MP] : lf->mi[MN]; + lf->L = checkvarlen(lf->L,2*nnn*(d*d+d+1),"_hatmat",VDOUBLE); + assignk0(vdptr(lf->L),d,nnn); + mi[MDC] = 1; + des->xev = z; + + mk = 1.0; + for (i=0; ifl[i]; + delt[i] = (lf->fl[i+d]-z[i])/(3*mi[MMINT]); + mk *= delt[i]; + } + i = 0; + + k0[0] = k0[1] = k0[2] = 0.0; + l0[0] = l0[1] = 0.0; + m0[0] = 0.0; + +#ifdef CVERSION + if (mi[MIT]==IMONT) + { for (i=0; ifl[j]+(lf->fl[j+d]-lf->fl[j])*runif(); + if ((mi[MKER]!=WPARM) | (!hasparcomp(lf))) + { h = nbhd(lf,des,(INT)(mi[MN]*lf->dp[DALP]),lf->dp[DFXH],0); + locfit(lf,des,h,1); + } + k2x(lf,des,k1); + k0[0] += k1[0]; + } + for (j=0; jfl[j+d]-lf->fl[j]; + kap[0] = k0[0]/mi[MMINT]; + return(1); + } +#endif + + while(1) + { + wt = 1; + for (i=0; idp[DALP]),lf->dp[DFXH],0); + locfit(lf,des,h,1); + } + k2x(lf,des,k1); + k0[0] += wt*mk*k1[0]; + k0[2] += wt*mk*k1[2]; + + for (re=0; refl[i]+3*delt[i]*index[i]; + if (index[i]>mi[MMINT]) + { index[i] = 0; + z[i] = lf->fl[i]; + if (i==d-1) /* done */ + { kap[0] = k0[0]; + kap[1] = l0[0]/2; + if (d==1) return(2); + k0[2] = -k0[2] - d*(d-1)*k0[0]/2; + if (mi[MDEB]>0) + { printf("constants:\n"); + printf(" k0: %8.5f k2: %8.5f\n",k0[0],k0[2]); + printf(" l0: %8.5f l1: %8.5f\n",l0[0],l1[1]); + printf(" m0: %8.5f\n",m0[0]); + printf(" check: %8.5f\n",(k0[0]+k0[2]+l0[1]+m0[0])/(2*PI)); + } + kap[2] = (k0[2]+l0[1]+m0[0])/(2*PI); + return(3); + } + } + else i = d; + } + + } +} + +double tailp(c,k0,m,d,nu) +double c, *k0, nu; +INT m, d; +{ INT i; + double p; + p = 0; + if (nu==0) + { for (i=0; i0) + p += k0[i]*exp(LGAMMA((d+1-i)/2.0)-(d+1-i)*LOGPI/2) + *(1-pchisq(c*c,(double) d+1-i)); + } + else + { for (i=0; i0) + p += k0[i]*exp(LGAMMA((d+1-i)/2.0)-(d+1-i)*LOGPI/2) + *(1-pf(c*c/(d+1-i),(double) (d+1-i), nu)); + } + return(p); +} + +double taild(c,k0,m,d,nu) +double c, *k0, nu; +INT m, d; +{ double p; + INT i; + p = 0; + if (nu==0) + { for (i=0; i0) + p += k0[i]*exp(LGAMMA((d+1-i)/2.0)-(d+1-i)*LOGPI/2) + *2*c*dchisq(c*c,(double) (d+1-i)); + } + else + { for (i=0; i0) + p += k0[i]*exp(LGAMMA((d+1-i)/2.0)-(d+1-i)*LOGPI/2) + *2*c*df(c*c/(d+1-i),(double) (d+1-i), nu)/(d+1-i); + } + return(-p); +} + +double critval(k0,m,d,al,it,s,nu) +double *k0, al, nu; +INT m, d, it, s; +{ double c, cn, c0, c1, tp, td; + INT j; + if (m<0) ERROR(("critval: no terms?")); + if (m>d+1) m = d+1; + if ((al<=0) | (al>=1)) ERROR(("critval: invalid alpha %8.5f",al)); + if (lf_error) return(0.0); + if (al>0.5) WARN(("critval: A mighty large tail probability al=%8.5f",al)); + if (s==1) al = 2*al; + if (m==0) { d = 0; k0[0] = 1; m = 1; } + c = 2.0; c0 = 0.0; c1 = 0.0; + for (j=0; j0) c0 = c; + if (tp<0) c1 = c; + cn = c - tp/td; + if (cn0.0) && (cn>c1)) cn = (c+c1)/2; + c = cn; + if (fabs(tp/al)<1.0e-10) return(c); + } + return(c); +} + +#ifdef SVERSION +void scritval(k0,d,cov,m,rdf,z) +double *k0, *z, *cov, *rdf; +INT *d, *m; +{ lf_error = 0; + *z = critval(k0,*m,*d,1-*cov,10,2,*rdf); +} +#endif diff --git a/src/locfit/simul.c b/src/locfit/simul.c new file mode 100644 index 0000000..b26c23c --- /dev/null +++ b/src/locfit/simul.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + */ + +#include "local.h" + +static double pen, sig2; + +void goldensec(f,des,tr,eps,xm,ym,meth) +double (*f)(), eps, *xm, *ym; +INT meth; +design *des; +lfit *tr; +{ double x[4], y[4], xx[11], yy[11]; + INT i, im; + xx[0] = tr->dp[DFXH]; + if (xx[0]<=0) + { ERROR(("regband: initialize h>0")); + return; + } + for (i=0; i<=10; i++) + { if (i>0) xx[i] = (1+GOLDEN)*xx[i-1]; + yy[i] = f(xx[i],des,tr,meth); + if ((i==0) || (yy[i]eps) + { if (y[1]dp[DALP] = h; + startlf(des,tr,procv,0); + ressumm(tr,des); + cp = -2*tr->dp[DLK]+pen*tr->dp[DT0]; + return(cp); +} + +double loccp(h,des,tr,m) /* m=1: cp m=2: gcv */ +double h; +design *des; +lfit *tr; +int m; +{ double cp; + INT dg; + tr->dp[DALP] = 0; + tr->dp[DFXH] = h; + dg = tr->mi[MDEG]; tr->mi[MDEG] = tr->mi[MDEG0]; + startlf(des,tr,procv,0); + ressumm(tr,des); + if (m==1) + cp = -2*tr->dp[DLK]/sig2 - tr->mi[MN] + 2*tr->dp[DT0]; + else cp = -2*tr->mi[MN]*tr->dp[DLK]/((tr->mi[MN]-tr->dp[DT0])*(tr->mi[MN]-tr->dp[DT0])); + printf("h %8.5f deg %2d rss %8.5f trl %8.5f cp: %8.5f\n",h,tr->mi[MDEG],-2*tr->dp[DLK],tr->dp[DT0],cp); + tr->mi[MDEG0] = tr->mi[MDEG]; tr->mi[MDEG] = dg; + return(cp); +} + +double cp(des,tr,meth) +design *des; +lfit *tr; +INT meth; +{ double hm, ym; + goldensec(loccp,des,tr,0.001,&hm,&ym,meth); + return(hm); +} + +double gkk(des,tr) +design *des; +lfit *tr; +{ double h, h5, nf, th; + INT i, j, n, dg0, dg1; + tr->mi[MEV]=EDATA; + tr->dp[DALP] = 0; + n = tr->mi[MN]; + dg0 = tr->mi[MDEG0]; /* target degree */ + dg1 = dg0+1+(dg0%2==0); /* pilot degree */ + nf = exp(log(1.0*n)/10); /* bandwidth inflation factor */ + h = tr->dp[DFXH]; /* start bandwidth */ + for (i=0; i<=10; i++) + { tr->mi[MDEG] = dg1; + tr->dp[DFXH] = h*nf; + startlf(des,tr,procv,0); + th = 0; + for (j=10; jcoef[dg1*n+j]*tr->coef[dg1*n+j]; +th *= n/(n-20.0); + h5 = sig2*Wikk(tr->mi[MKER],dg0)/th; + h = exp(log(h5)/(2*dg1+1)); +/* printf("pilot %8.5f sel %8.5f\n",tr->dp[DFXH],h); */ + } + return(h); +} + +double rsw(des,tr,kk) +design *des; +lfit *tr; +INT *kk; +{ INT i, j, k, nmax, nvm, n, mk, ev, dg0, dg1; + double rss[6], cp[6], th22, dx, d2, hh; + nmax = 5; + ev = tr->mi[MEV]; tr->mi[MEV] = EGRID; + mk = tr->mi[MKER]; tr->mi[MKER]= WRECT; + dg0 = tr->mi[MDEG0]; + dg1 = 1 + dg0 + (dg0%2==0); + tr->mi[MDEG]= 4; + for (k=nmax; k>0; k--) + { tr->mg[0] = k; + tr->fl[0] = 1.0/(2*k); tr->fl[1] = 1-1.0/(2*k); + tr->dp[DALP] = 0; tr->dp[DFXH] = 1.0/(2*k); + startlf(des,tr,procv,0); + nvm = tr->nvm; + rss[k] = 0; + for (i=0; ilik[i]; + } + n = tr->mi[MN]; k = 1; + for (i=1; i<=nmax; i++) + { /* cp[i] = (n-5*nmax)*rss[i]/rss[nmax]-(n-10*i); */ + cp[i] = rss[i]/sig2-(n-10*i); + if (cp[i]mg[0] = k; + tr->fl[0] = 1.0/(2*k); tr->fl[1] = 1-1.0/(2*k); + tr->dp[DALP] = 0; tr->dp[DFXH] = 1.0/(2*k); + startlf(des,tr,procv,0); + tr->mi[MKER] = mk; tr->mi[MEV] = ev; + nvm = tr->nvm; + th22 = 0; + for (i=10; i=k) j = k-1; + dx = datum(tr,0,i)-evptx(tr,0,j); + if (dg1==2) + d2 = tr->coef[2*nvm+j]+dx*tr->coef[3*nvm+j]+dx*dx*tr->coef[4*nvm+j]/2; + else d2 = tr->coef[4*nvm+j]; + th22 += d2*d2; + } + hh = Wikk(mk,dg0)*sig2/th22*(n-20.0)/n; + return(exp(log(hh)/(2*dg1+1))); +} + +void rband(des,tr,hhat,meth,nmeth,kk) +design *des; +lfit *tr; +double *hhat; +INT *meth, *nmeth, *kk; +{ INT i, deg; + double h0; + + /* first, estimate sigma^2 */ + deg = tr->mi[MDEG]; tr->mi[MDEG] = 2; + h0 = tr->dp[DFXH]; tr->dp[DFXH] = 0.05; +printf("alp: %8.5f h: %8.5f deg %2d ev %2d\n",tr->dp[DALP],tr->dp[DFXH],tr->mi[MDEG],tr->mi[MEV]); + startlf(des,tr,procv,0); + ressumm(tr,des); + tr->mi[MDEG] = deg; tr->dp[DFXH] = h0; + sig2 = tr->dp[DRV]; + printf("sd est: %8.5f\n",sqrt(tr->dp[DRV])); + + for (i=0; i<*nmeth; i++) + { switch(meth[i]) + { case 1: hhat[i] = cp(des,tr,1); + break; + case 2: hhat[i] = cp(des,tr,2); + break; + case 3: hhat[i] = gkk(des,tr); + break; + case 4: hhat[i] = rsw(des,tr,kk); + break; + default: hhat[i] = 0; + } + tr->dp[DFXH] = h0; + tr->mi[MDEG] = deg; + } +} diff --git a/src/locfit/solve.c b/src/locfit/solve.c new file mode 100644 index 0000000..1ad3e64 --- /dev/null +++ b/src/locfit/solve.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * solve f(x)=c by various methods, with varying stability etc... + * xlo and xhi should be initial bounds for the solution. + * convergence criterion is |f(x)-c| < tol. + * + * double solve_secant(f,c,xlo,xhi,tol,bd_flag,err) + * secant method solution of f(x)=c. + * xlo and xhi are starting values and bound for solution. + * tol = convergence criterion, |f(x)-c| < tol. + * bd_flag = if (xlo,xhi) doesn't bound a solution, what action to take? + * BDF_NONE returns error. + * BDF_EXPRIGHT increases xhi. + * BDF_EXPLEFT decreases xlo. + * err = error flag. + * The (xlo,xhi) bound is not formally necessary for the secant method. + * But having such a bound vastly improves stability; the code performs + * a bisection step whenever the iterations run outside the bounds. + * + * double solve_nr(f,f1,c,x0,tol,err) + * Newton-Raphson solution of f(x)=c. + * f1 = f'(x). + * x0 = starting value. + * tol = convergence criteria, |f(x)-c| < tol. + * err = error flag. + * No stability checks at present. + * + * double solve_fp(f,x0,tol) + * fixed-point iteration to solve f(x)=x. + * x0 = starting value. + * tol = convergence criteria, stops when |f(x)-x| < tol. + * Convergence requires |f'(x)|<1 in neighborhood of true solution; + * f'(x) \approx 0 gives the fastest convergence. + * No stability checks at present. + * + * TODO: additional error checking, non-convergence stop. + */ + +#include +#include + +#include "mutil.h" + +double solve_secant(f,c,xlo,xhi,tol,bd_flag,err) +double (*f)(), c, xhi, xlo, tol; +int bd_flag, *err; +{ double ylo, yhi, x1, x2, x, y1, y2, y; + *err = 0; + ylo = f(xlo)-c; + yhi = f(xhi)-c; + + switch(bd_flag) + { case BDF_EXPRIGHT: + while (yhi*ylo > 0) + { xhi += xhi-xlo; + yhi = f(xhi)-c; + } + break; + case BDF_EXPLEFT: + while (yhi*ylo > 0) + { xlo -= xhi-xlo; + ylo = f(xlo)-c; + } + break; + case BDF_NONE: + default: + if (yhi*ylo > 0) + { *err = 1; + return((xlo+xhi)/2); + } + break; + } + + x1 = xlo; y1 = ylo; + x2 = xhi; y2 = yhi; + + while (1) + { x = x2 + (x1-x2)*y2/(y2-y1); + if ((x<=xlo) | (x>=xhi)) x = (xlo+xhi)/2; + y = f(x)-c; + if (fabs(y) < tol) return(x); + if (y*ylo>0) { xlo = x; ylo = y; } + else { xhi = x; yhi = y; } +if (y2==y) +{ //printf("secant: y2 %12.9f\n",y2); + return(x); +} + x1 = x2; y1 = y2; + x2 = x; y2 = y; + } +} + +double solve_nr(f,f1,c,x0,tol,err) +double (*f)(), (*f1)(), c, x0, tol; +int *err; +{ double y; + do + { y = f(x0)-c; + x0 -= y/f1(x0); + } while (fabs(y)>tol); + return(x0); +} + +double solve_fp(f,x0,tol,maxit) +double (*f)(), x0, tol; +int maxit; +{ double x1; + int i; + x1 = 0; + for (i=0; idp; + mi = lf->mi; + + mi[MTG] = TNUL; + mi[MTG] = (lf->y==NULL) ? TDEN : (64+TGAUS); + mi[MLINK] = LDEFAU; + mi[MACRI] = ANONE; + mi[MDEG] = mi[MDEG0] = 2; + mi[MEV] = (ident==1) ? EDATA : ETREE; + mi[MKT] = KSPH; mi[MKER] = WTCUB; + mi[MIT] = IDEFA; mi[MDC] = mi[MREN] = 0; + mi[MK] = 100; mi[MMINT] = 20; + mi[MMXIT] = 20; + mi[MN] = n; mi[MDIM] = d; + mi[MDEB] = 0; + mi[MUBAS] = 0; + + + dp[DALP] = 0.7; dp[DFXH] = dp[DADP] = 0.0; + dp[DCUT] = 0.8; + + if (d<=0) + ERROR(("must set MDIM before calling fitdefault")); + for (i=0; isca[i] = 1.0; + lf->xl[i] = lf->xl[i+d] = 0.0; + lf->fl[i] = lf->fl[i+d] = 0.0; + } +} + +int des_reqd(n,p) +INT n, p; +{ + return (n*(p+5)+2*p*p+4*p + jac_reqd(p)); +} +int des_reqi(INT n) { return(n); } + +void deschk(des,n,p) +design *des; +INT n, p; +{ + double *z; + des->dw = checkvarlen(des->dw,des_reqd(n,p),"_deswork",VDOUBLE); + z = vdptr(des->dw); + des->X = z; z += n*p; + setzero(des->X, n*p); + + des->w = z; z += n; + setzero(des->w, n); + + des->res=z; z += n; + setzero(des->res, n); + + des->di =z; z += n; + setzero(des->di, n); + + des->th =z; z += n; + setzero(des->th, n); + + des->wd =z; z += n; + setzero(des->wd, n); + + des->V =z; z += p*p; + setzero(des->V, p*p); + + des->P =z; z += p*p; + setzero(des->P, p*p); + + des->f1 =z; z += p; + setzero(des->f1, p); + + des->ss =z; z += p; + setzero(des->ss, p); + + des->oc =z; z += p; + setzero(des->oc, p); + + des->cf =z; z += p; + setzero(des->cf, p); + + z = jac_alloc(&des->xtwx,p,z); + + des->index = checkvarlen(des->index,des_reqi(n),"_desidx",VINT); + des->ind = (INT *)vdptr(des->index); + des->n = n; + des->p = p; + des->xtwx.p = p; +} + +void bbox(lf,bx) +lfit *lf; +double *bx; +{ INT i, j, d, n; + double z, mx, mn; + d = lf->mi[MDIM]; n = lf->mi[MN]; + for (i=0; isty[i]==STANGL) + { bx[i] = 0.0; bx[i+d] = 2*PI*lf->sca[i]; + } + else + { mx = mn = datum(lf,i,0); + for (j=1; jxl[i]xl[i+d]) /* user set xlim; maybe use them. */ + { z = mx-mn; + if (mn-0.2*z < lf->xl[i]) mn = lf->xl[i]; + if (mx+0.2*z > lf->xl[i+d]) mx = lf->xl[i+d]; + } + bx[i] = mn; + bx[i+d] = mx; + } + } +} + +void preproc(des,lf,nopc) +design *des; +lfit *lf; +INT nopc; +{ INT d, i, j, n; + double xb; + d = lf->mi[MDIM]; n = lf->mi[MN]; + lf->mi[MLINK] = defaultlink(lf->mi[MLINK],lf->mi[MTG]); + if (!validlinks(lf->mi[MLINK],lf->mi[MTG])) + { ERROR(("Invalid family/link combination")); + return; + } + compparcomp(des,lf,nopc); + if (lf->w==NULL) + lf->dp[DSWT] = lf->mi[MN]; + else + { lf->dp[DSWT] = 0; + for (i=0; imi[MN]; i++) lf->dp[DSWT] += prwt(lf,i); + } + for (i=0; isca[i]<=0) /* set automatic scales */ + { if (lf->sty[i]==STANGL) lf->sca[i] = 1.0; + else + { xb = lf->sca[i] = 0.0; + for (j=0; jsca[i] += SQR(datum(lf,i,j)-xb); + lf->sca[i] = sqrt(lf->sca[i]/(n-1)); + } + } + bbox(lf,lf->fl); +} + +#ifdef CVERSION +extern void do_scbsim(); +#endif + +void startlf(des,lf,vfun,nopc) +design *des; +lfit *lf; +INT (*vfun)(), nopc; +{ + INT i, *mi; + des->vfun = vfun; + mi = lf->mi; + mi[MP] = calcp(mi,mi[MDEG]); + des->pref = 0; + cvi = -1; /* inhibit cross validation */ + deschk(des,mi[MN],mi[MP]); + if (mi[MDEB]>0) printf("preprocess\n"); + preproc(des,lf,nopc); + if (mi[MDEB]>0) printf("preprocess ok\n"); + if (lf_error) return; + lf->ord = 0; + makecfn(des,lf); + if ((mi[MDIM]==1) && (lf->sty[0]!=STANGL)) + { i = 1; + while ((i=datum(lf,0,i-1))) i++; + lf->ord = (i==mi[MN]); + } + + if (mi[MDEB]>0) printf("call eval structure\n"); + switch(mi[MEV]) + { case EPHULL: triang_start(des,lf); break; + case EDATA: dataf(des,lf); break; + case ECROS: crossf(des,lf); break; + case EGRID: gridf(des,lf); break; + case ETREE: atree_start(des,lf); break; + case EKDCE: mi[MKT] = KCE; + case EKDTR: kdtre_start(des,lf); break; + case EPRES: preset(des,lf); break; + case EXBAR: xbarf(des,lf); break; + case ENONE: lf->nv = lf->nce = 0; + return; +#ifdef CVERSION + case 100: do_scbsim(des,lf); break; +#endif + default: ERROR(("startlf: Invalid evaluation structure")); + } + + /* renormalize for family=density */ + if ((mi[MREN]) && (mi[MTG]==TDEN)) dens_renorm(lf,des); + } + +#ifdef CVERSION +extern lfit lf; +extern design des; +extern plots pl[]; +int curwin; +vari *vb; + +INT nofit() +{ if (lf.mi==NULL) return(1); + return(lf.mi[MEV]==ENULL); +} + +void endfit() +{ INT i; + for (i=0; imi[MDIM])) + ERROR(("drl: Invalid derivatives %s",key)); + dv[i]--; + } + return(nd); +} + +void fitoptions(lf,vc,re) +lfit *lf; +vari *vc; +INT re; +{ + INT d = 0, n, i, i0, i1, *mi; + char kc, *key; + vari *v; + + re &= (!nofit()); + i0 = getarg(vc,"formula",1); + if ((!re) && (i0==0)) { ERROR(("no formula")); return; } + i1 = getarg(vc,"data",1); + if (i1>0) doreaddata(argval(vc,i1),(INT)0); + if (re) + recondat(0,&lf->mi[MN]); + else + { lf->base = lf->y = lf->c = lf->w = NULL; + lf->nd = 0; + strcpy(lf->yname,"_NuLl"); + strcpy(lf->wname,"_NuLl"); + strcpy(lf->bname,"_NuLl"); + strcpy(lf->cname,"_NuLl"); + } + if (i0>0) /* interpret formula */ + { key = argval(vc,i0); + n = -1; + i0 = i1 = 0; d = 0; + while ((i00) + { key[i0] = '\0'; + lf->y = vdptr(findvar(key,1,&n)); + strcpy(lf->yname,key); + key[i0] = '~'; + } + i1 = i0 = i0+1; + while (i1sty[d] = KPROD; + if (stm(&key[i0],"left(",5)) + { lf->sty[d] = STLEFT; + i0 = i0+5; key[i1-1] = '\0'; + } + else if (stm(&key[i0],"right(",6)) + { lf->sty[d] = STRIGH; + i0 = i0+6; key[i1-1] = '\0'; + } + else if (stm(&key[i0],"ang(",4)) + { lf->sty[d] = STANGL; + i0 = i0+4; key[i1-1] = '\0'; + } + else if (stm(&key[i0],"cpar(",5)) + { lf->sty[d] = STCPAR; + i0 = i0+5; key[i1-1] = '\0'; + } + dvari(lf,d) = vdptr(findvar(&key[i0],1,&n)); + strcpy(lf->xname[d],&key[i0]); + if (lf->sty[d]!=KPROD) key[i1-1] = ')'; + d++; key[i1] = kc; + i0 = i1 = i1+1; + } + fitdefault(lf,n,d); + } + mi = lf->mi; + + i = getarg(vc,"weights",1); + if (i>0) + { lf->w = vdptr(findvar(argval(vc,i),1,&mi[MN])); + strcpy(lf->wname,argval(vc,i)); + } + i = getarg(vc,"cens",1); + if (i>0) + { lf->c = vdptr(findvar(argval(vc,i),1,&mi[MN])); + strcpy(lf->cname,argval(vc,i)); + } + i = getarg(vc,"base",1); + if (i>0) + { lf->base = vdptr(findvar(argval(vc,i),1,&mi[MN])); + strcpy(lf->bname,argval(vc,i)); + } + + i = getarg(vc,"scale",1); + if (i>0) + { if (argvalis(vc,i,"T")) + for (i=0; isca[i] = 0; + else if (argvalis(vc,i,"F")) + for (i=0; isca[i] = 1; + else + arvect(argval(vc,i),lf->sca,d,0); + } + + i = getarg(vc,"vb",0); + if (i>0) + { lf->dp[DALP] = -1; + vb = arbuild(argval(vc,i),0,strlen(argval(vc,i))-1,NULL,0,1); + setvarname(vb,"_varband"); + } + else + { i = getarg(vc,"alpha",1); + if (i>0) arvect(argval(vc,i),&lf->dp[DALP],3,1); + } + + i = getarg(vc,"deg",1); + if (i>0) + { i = readilist(&mi[MDEG0],argval(vc,i),1,2,0); + if (i==1) mi[MDEG] = mi[MDEG0]; + } + + i = getarg(vc,"family",1);if (i>0) setstrval(mi,MTG,argval(vc,i)); + i = getarg(vc,"link",1); if (i>0) setstrval(mi,MLINK,argval(vc,i)); + i = getarg(vc,"ev",1); + if (i>0) + { v = findvar(argval(vc,i),0,NULL); + if (v!=NULL) + { mi[MEV] = EPRES; + lf->xxev= v; + lf->nvm = v->n; + } + else + setstrval(mi,MEV,argval(vc,i)); + } + i = getarg(vc,"acri",1); if (i>0) setstrval(mi,MACRI,argval(vc,i)); + + i = getarg(vc,"mg",1); + if (i>0) readilist(lf->mg,argval(vc,i),1,MXDIM,1); + + i = getarg(vc,"kt",1); if (i>0) setstrval(mi,MKT, argval(vc,i)); + i = getarg(vc,"kern",1); if (i>0) setstrval(mi,MKER,argval(vc,i)); + i = getarg(vc,"itype",1);if (i>0) setstrval(mi,MIT, argval(vc,i)); + + i = getarg(vc,"cut",1); + if (i>0) lf->dp[DCUT] = darith(argval(vc,i)); + + i = getarg(vc,"flim",1); + if (i>0) arvect(argval(vc,i),lf->fl,2*d,2); + + i = getarg(vc,"xlim",1); + if (i>0) arvect(argval(vc,i),lf->xl,2*d,2); + + i = getarg(vc,"deriv",0); + if (i>0) lf->nd = drl(argval(vc,i),lf->deriv,lf->mi); + i = getarg(vc,"dc",1); if (i>0) mi[MDC] = getlogic(vc,i); + i = getarg(vc,"maxk",1); if (i>0) readilist(&mi[MK],argval(vc,i),1,1,0); + i = getarg(vc,"mint",1); if (i>0) readilist(&mi[MMINT],argval(vc,i),1,1,0); + i = getarg(vc,"maxit",1); if (i>0) readilist(&mi[MMXIT],argval(vc,i),1,1,0); + i = getarg(vc,"renorm",1);if (i>0) mi[MREN] = getlogic(vc,i); + i = getarg(vc,"debug",1); if (i>0) readilist(&mi[MDEB],argval(vc,i),1,1,0); +} + +void clocfit(v,re) +INT re; +vari *v; +{ + lf.ord = 0; + lf.kap[0] = lf.kap[1] = lf.kap[2] = 0.0; lf.nk = 0; + fitoptions(&lf,v,re); + if (lf_error) + { if (lf.mi!=NULL) lf.mi[MEV] = ENULL; + return; + } + + + lf.nv = 0; + if (lf.mi[MDEG0]==lf.mi[MDEG]) + { startlf(&des,&lf,procv,0); + if (!lf_error) ressumm(&lf,&des); + } + else + startlf(&des,&lf,procvvord,0); + if (lf_error) + { if (!re) lf.mi[MEV] = ENULL; + return; + } + + //printf("Evaluation structure %d, %d points.\n",lf.mi[MEV],lf.nv); + if (argarg(v,0) != NULL) dosavefit(&lf,argarg(v,0),"wb",(INT)0); + endfit(); +} + +#endif diff --git a/src/locfit/strings.c b/src/locfit/strings.c new file mode 100644 index 0000000..ce3dd32 --- /dev/null +++ b/src/locfit/strings.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * + * miscellaneous string handling functions. + * used mostly in arith (C version) and lfstr.c + * + * stm(u,v,k) do the first k components of u, v match? + * ct_match(z1,z2) counts number of matching components. + * pmatch(z,strings,vals,n,def) + * finds the best match for z among the strings; + * returns corresponding component of vals. + * n = no of strings; def = value if no match. + * matchrt(z,i,i2,op,cl) + * matchlf(z,i1,i,op,cl) + * Parenthesis matching. If op='(' and cl=')', matchrt + * searches z, starting at z[i]='(' and ending and z[i2], + * for the closing ')', taking care of nesting. + * matchlf does the reverse. + * + * checkltor(z,i1,i2,c) + * Checks the string z, left to right from z[i1] to z[i2] + * but skipping parenthesized () and [] parts, for the + * occurence of any character from c. If a match is found, + * the index is returned. If no match is found, return -1. + * + * checkrtol(z,i1,i2,c) Same as checkltor(), but searches right to left. + * strip(z) replaces underscores in z by spaces. + */ + +#include "local.h" + +/* do the first k components of u, v match? */ +int stm(char *u, char *v, int k) { return((strncmp(u,v,k)==0)); } + +int ct_match(z1, z2) +char *z1, *z2; +{ int ct = 0; + while (z1[ct]==z2[ct]) + { if (z1[ct]=='\0') return(ct+1); + ct++; + } + return(ct); +} + +int pmatch(z, strings, vals, n, def) +char *z, **strings; +int *vals, n, def; +{ int i, ct, best, best_ct; + best = -1; + best_ct = 0; + + for (i=0; ibest_ct) { best = i; best_ct = ct; } + } + if (best==-1) return(def); + return(vals[best]); +} + +int matchrt(z,i,i2,op,cl) +char *z, op, cl; +int i, i2; +{ int k; + if (z[i] != op) + { ERROR(("matchrt: wrong start character")); + return(i); + } + k = 0; + while (1) + { if (z[i]==op) k++; + if (z[i]==cl) k--; + if (k==0) return(i); + i++; + if (i>i2) + { ERROR(("matchrt: unbalanced %c%c: %s",op,cl,z)); + return(i); + } + } +} + +int matchlf(z,i1,i,op,cl) +char *z, op, cl; +int i, i1; +{ int k; + if (z[i] != cl) + { ERROR(("matchlf: wrong end character")); + return(i); + } + k = 0; + while (1) + { if (z[i]==op) k--; + if (z[i]==cl) k++; + if (k==0) return(i); + i--; + if (i= i1) + { if (strchr(c,z[i]) != NULL) return(i); + if (z[i]==')') i = matchlf(z,i1,i,'(',')'); + if (z[i]==']') i = matchlf(z,i1,i,'[',']'); + i--; + if (lf_error) return(-1); + } + return(-1); +} + +void strip(z) +char *z; +{ do { if (*z=='_') *z=' '; } while (*(++z)!='\0'); +} diff --git a/src/locfit/vari.cpp b/src/locfit/vari.cpp new file mode 100644 index 0000000..eabc450 --- /dev/null +++ b/src/locfit/vari.cpp @@ -0,0 +1,299 @@ +/* + * Copyright (c) 1996-2000 Lucent Technologies. + * See README file for details. + * + * Functions for handling locfit variables in the C version. + */ + +extern "C" +{ +#include "local.h" +} + +#include + +#include "vari.hpp" +//#define MAXV 1000 +//#define LF_WORK 102400 + +//static char *db = NULL; +//static INT lfwptr, lf_work; +//vari root; + + + +#include +#include + +using namespace std; + + +typedef map VarTable; +VarTable var_table; + +void cleardb() +{ + for (VarTable::iterator i = var_table.begin(); i != var_table.end(); ++i) + { + if (i->second && i->second->stat != STSYSPEC) + { + free(i->second->dpr); + } + free(i->second); + } + var_table.clear(); +} + +void initdb() /* initialize locfit's work space */ +{ + cleardb(); + +// char *z = NULL; +// z = getenv("LFWORK"); +// if (z==NULL) lf_work = LF_WORK; +// else sscanf(z,"%d",&lf_work); +// lf_work <<= 10; +// if (db != NULL) +// { +// free(db); +// lfwptr = 0; +// } +// db = (char *)calloc(lf_work, 1); +// if (db == NULL) +// { +// fprintf(stderr, "Error: Locfit working space could not be allocated!\n"); +// fprintf(stderr, "Error code %d\n", errno); +// } +// +// root.stat = STSYSTEM; +// root.mode = VVARI; +// root.dpr = (double *)db; +// lfwptr = root.bytes = MAXV*sizeof(vari); +// root.n = 0; +} + + +INT vbytes(int n, int mode) +{ + switch(mode) + { + case VDOUBLE: return(n*sizeof(double)); + case VINT: return(n*sizeof(INT)); + case VCHAR: return(n); + case VARGL: return(n*sizeof(carg)); + case VPREP: return(sizeof(pplot)); + case VARC: return(n*sizeof(arstruct)); + case VVARI: return(n*sizeof(vari)); + case VXYZ: return(n*sizeof(plxyz)); + } + ERROR(("unknown mode %d in vbytes",mode)); + return(0); +} + +/* vdptr with NULL check */ +double *vdptr(vari* v) + +{ + if (v==NULL) + return(NULL); + return(v->dpr); +} + +/* return the i'th data item. Cyclic. */ +double vitem(vari* v, int i) +{ + int index; + if ((v==NULL) || (vlength(v)==0)) + return(0.0); + index = i % vlength(v); + switch(v->mode) + { case VDOUBLE: return( vdptr(v)[index] ); + case VINT: + { INT *z; + z = (INT *)vdptr(v); + return(z[index]); + } + case VCHAR: + { char *z; + z = (char *)vdptr(v); + return(z[index]); + } + } + ERROR(("Invalid mode in vitem()")); + return(0.0); +} + +void vassn(vari* v, int i, double x) +{ + vdptr(v)[i] = x; +} + +vari *growvar(vari* vold, int n) +{ + fprintf(stderr, "Error: attempting to grow variable not supported\n"); + return NULL; +// vari *vnew; +// int reqd_bytes; +// +// if (vold==NULL) +// { +// ERROR(("growvar: NULL old")); +// return(NULL); +// } +// +// reqd_bytes = vbytes(n, vmode(vold)); +// if (reqd_bytes <= vold->bytes) +// return(vold); +// +// vnew = createvar("_grow",vold->stat,n,vmode(vold)); +// memcpy(vdptr(vnew),vdptr(vold),vbytes(vlength(vold),vmode(vold))); +// setvarname(vnew,vold->name); +// vlength(vnew) = vlength(vold); +// deletevar(vold); +// return(vnew); +} + +void *viptr(vari* v, int i) /* return pointer to ith data item, take account of mode */ +{ switch(vmode(v)) + { case VDOUBLE: return(&v->dpr[i]); + case VCHAR: return(&((char *)v->dpr)[i]); + case VARGL: return(&((carg *)v->dpr)[i]); + case VARC: return(&((arstruct *)v->dpr)[i]); + case VVARI: return(&((vari *)v->dpr)[i]); + case VXYZ: return(&((plxyz *)v->dpr)[i]); + } + ERROR(("Unknown mode %d in viptr",vmode(v))); + return(NULL); +} + +void setvarname(vari* v, varname name) +{ + if (strcmp(v->name,name)==0) + return; + deletename(name); + strcpy(v->name,name); +} + +/* + findvar finds the variable name. + err=0, keep quiet if not found; 1 produce error message. + *n returns length of variable (if initially>0, checks length) + */ + +vari *findvar(varname name, int err, int* n) +{ + INT status; + vari *v; + + if (strcmp(name,"_NuLl")==0) return(NULL); + + VarTable::iterator i = var_table.find(name); + + if (i != var_table.end()) + { + v = i->second; + if (v == NULL) + { + fprintf(stderr, "Found variable named %s, but data is NULL\n", name); + return NULL; + } + status = v->stat; + if (status != STHIDDEN && status != STEMPTY) + { + if (n == NULL) + return v; + if (*n==-1) + *n = vlength(v); + if (*n==0 || *n==vlength(v)) + return(v); + if (err) + ERROR(("Variable %s has wrong length",name)); + } + } + + if (err) + ERROR(("Variable %s not found",name)); + return NULL; +} + +void deletevar(vari* v) /* delete variable, or top variable if NULL */ +{ + if (v == NULL) + { + fprintf(stderr, "Error: attempting to clear entire table through NULL delete\n"); + return; + } + + VarTable::iterator i = var_table.find(v->name); + if (i != var_table.end()) + { + if (i->second && i->second->stat != STSYSPEC) + { + free(i->second->dpr); + } + free(i->second); + var_table.erase(i); + } +} + +void deleteifhidden(vari* v) +{ + if (v==NULL) + return; + if (v->stat == STHIDDEN) deletevar(v); +} + +void deletename(varname name) /* delete variable name, or top variable if NULL */ +{ + vari *v; + v = findvar(name,0,NULL); + if (v!=NULL) + deletevar(v); +} + +vari *createvar(varname name, int status, int n, int mode) +{ + int bytes; + vari *v; + + /* + compute the length of the variable in bytes. some systems + mess up is this is not a multiple of 8. + */ + bytes = vbytes(n,mode); + while ( (bytes & 8) > 0 ) bytes++; + + if (lf_error) + return(NULL); + + // Don't delete the hidden vars + if (status==STSYSTEM || status==STREGULAR || status==STPLOTVAR) + deletename(name); + + v = findvar(name,0,NULL); + if (v != NULL) + { + fprintf(stderr, "Error: attempting to re-initialize still-live variable %s\n", name); + } + + pair::iterator, bool> inserted; + string str_name = name; + pair p; + p.first = str_name; + p.second = (vari*)calloc(1, sizeof(vari)); + inserted = var_table.insert(p); + + v = inserted.first->second; + + strcpy(v->name,name); + vlength(v) = n; + v->stat = status; + v->bytes = bytes; + v->mode = mode; + if (status!=STSYSPEC) + { + v->dpr = (double*)calloc(bytes, 1); + } + + return(v); +} diff --git a/src/locfit/vari.hpp b/src/locfit/vari.hpp new file mode 100644 index 0000000..bc3ce96 --- /dev/null +++ b/src/locfit/vari.hpp @@ -0,0 +1,33 @@ +// +// vari.hpp +// cufflinks +// +// Created by Cole Trapnell on 3/22/11. +// Copyright 2011 Cole Trapnell. All rights reserved. +// + +#ifdef __cplusplus +extern "C" +{ +#endif + void cleardb(); + void initdb(); + INT vbytes(int n, int mode); + void setvarname(vari* v, varname name); + double *vdptr(vari* v); + double vitem(vari* v, int i); + void vassn(vari* v, int i, double x); + vari *findvar(varname name, int err, int* n); + vari *growvar(vari* vold, int n); + void *viptr(vari* v, int i); + void setvarname(vari* v, varname name); + vari *findvar(varname name, int err, int* n); + + void deletevar(vari* v); /* delete variable, or top variable if NULL */ + void deleteifhidden(vari* v); + vari *createvar(varname name, int status, int n, int mode); + void deletename(varname name); + +#ifdef __cplusplus +} +#endif diff --git a/src/locfit/wdiag.c b/src/locfit/wdiag.c new file mode 100644 index 0000000..8cb241d --- /dev/null +++ b/src/locfit/wdiag.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + Routines for computing weight diagrams. + wdiag(lf,des,lx,deg,ty,exp) + Must locfit() first, unless ker==WPARM and has par. comp. + + also, vertex processing function procvhatm(). + + cwdiag() entry for CLOCFIT. + */ + +#include "local.h" + +static double *wd; +extern double robscale; +extern void unitvec(); + +void nnresproj(lf,des,u,m,p,mi) +lfit *lf; +design *des; +double *u; +INT m, p, *mi; +{ INT i, j; + double link[LLEN]; + setzero(des->f1,p); + for (j=0; jind[j],des->th[j],robscale); + for (i=0; if1[i] += link[ZDDLL]*d_xij(des,j,i)*u[j]; + } + jacob_solve(&des->xtwx,des->f1); + for (i=0; if1,d_xi(des,i),p)*des->w[i]; +} + +void wdexpand(l,n,ind,m) +double *l; +INT n, m, *ind; +{ INT i, j, t; + double z; + for (j=m; j=0) + { if (ind[j]==j) j--; + else + { i = ind[j]; + z = l[j]; l[j] = l[i]; l[i] = z; + t = ind[j]; ind[j] = ind[i]; ind[i] = t; + if (ind[j]==-1) j--; + } + } + +/* for (i=n-1; i>=0; i--) + { l[i] = ((j>=0) && (ind[j]==i)) ? l[j--] : 0.0; } */ +} + +INT wdiagp(lf,des,lx,deg,ty,exp) +lfit *lf; +design *des; +double *lx; +INT deg, ty, exp; +{ INT i, j, p, *mi, *deriv, nd; + double *l1; + mi = lf->mi; p = des->p; + deriv = lf->deriv; nd = lf->nd; + fitfun(lf,des->xev,lf->pc.xbar,des->f1,deriv,nd); + if (exp) + { jacob_solve(&lf->pc.xtwx,des->f1); + for (i=0; if1,d_xi(des,i),p); + return(mi[MN]); + } + jacob_hsolve(&lf->pc.xtwx,des->f1); + for (i=0; if1[i]; + + if (deg>=1) + for (i=0; ixev,lf->pc.xbar,l1,deriv,nd+1); + jacob_hsolve(&lf->pc.xtwx,l1); + } + + if (deg>=2) + for (i=0; ixev,lf->pc.xbar,l1,deriv,nd+2); + jacob_hsolve(&lf->pc.xtwx,l1); + } } + return(p); +} + +INT wdiag(lf,des,lx,deg,ty,exp) +lfit *lf; +design *des; +double *lx; +INT deg, ty, exp; +/* deg=0: l(x) only. + deg=1: l(x), l'(x) (approx/exact ? mi[MDC] ); + deg=2: l(x), l'(x), l''(x); + ty = 1: e1 (X^T WVX)^{-1} X^T W -- hat matrix + ty = 2: e1 (X^T WVX)^{-1} X^T WV^{1/2} -- scb's +*/ +{ double w, *X, *lxd, *lxdd, wdd, wdw, *ulx, link[LLEN], h; + double dfx[MXDIM], hs[MXDIM]; + INT i, ii, j, k, l, m, d, p, *mi, *deriv, nd; + if ((lf->mi[MKER]==WPARM) && (hasparcomp(lf))) + return(wdiagp(lf,des,lx,deg,ty,exp)); + mi = lf->mi; h = des->h; + deriv = lf->deriv; nd = lf->nd; + wd = des->wd; + d = mi[MDIM]; p = des->p; X = d_x(des); + ulx = des->res; + m = des->n; + for (i=0; isca[i]; + if (deg>0) + { lxd = &lx[m]; + setzero(lxd,m*d); + if (deg>1) + { lxdd = &lxd[d*m]; + setzero(lxdd,m*d*d); + } } + if (nd>0) fitfun(lf,des->xev,des->xev,des->f1,deriv,nd); /* c(0) */ + else unitvec(des->f1,0,p); + jacob_solve(&des->xtwx,des->f1); /* c(0) (X^TWX)^{-1} */ + for (i=0; iind[i]; + lx[i] = innerprod(des->f1,&X[i*p],p); /* c(0)(XTWX)^{-1}X^T */ + if ((deg>0) && (mi[MDC])) + { wd[i] = Wd(des->di[ii]/h,mi[MKER]); + for (j=0; jxev[j]; + lxd[j*m+i] = lx[i]*des->w[i]*weightd(dfx[j],lf->sca[j], + d,mi[MKER],mi[MKT],h,lf->sty[j],des->di[ii]); + /* c(0) (XTWX)^{-1}XTW' */ + } + if (deg>1) + { wdd = Wdd(des->di[ii]/h,mi[MKER]); + for (j=0; jdi[ii]==0) ? 0 : h/des->di[ii]; + w = wdd * (des->xev[k]-datum(lf,k,ii)) * (des->xev[j]-datum(lf,j,ii)) + * w*w / (hs[k]*hs[k]*hs[j]*hs[j]); + if (j==k) w += wd[i]/(hs[j]*hs[j]); + lxdd[(j*d+k)*m+i] = lx[i]*w; + /* c(0)(XTWX)^{-1}XTW'' */ + } + } + } + lx[i] *= des->w[i]; + } + if ((deg==2) && (mi[MDC])) + { for (i=0; ixev,des->xev,des->f1,deriv,nd+1); + for (k=0; kind[k],des->th[k],robscale); + for (j=0; jf1[j] -= link[ZDDLL]*lxd[i*m+k]*X[k*p+j]; + /* c'(x)-c(x)(XTWX)^{-1}XTW'X */ + } + jacob_solve(&des->xtwx,des->f1); /* (...)(XTWX)^{-1} */ + for (j=0; jf1,&X[j*p],p); /* (...)XT */ + for (j=0; jind[k]; + dfx[j] = datum(lf,j,ii)-des->xev[j]; + wdw = des->w[k]*weightd(dfx[j],lf->sca[j],d,mi[MKER],mi[MKT],h, + lf->sty[j],des->di[ii]); + lxdd[(i*d+j)*m+k] += ulx[k]*wdw; + lxdd[(j*d+i)*m+k] += ulx[k]*wdw; + } + /* + 2(c'-c(XTWX)^{-1}XTW'X)(XTWX)^{-1}XTW' */ + } + for (j=0; j0) + { if (mi[MDC]) for (j=0; jxev,des->xev,des->f1,deriv,nd+1); + jacob_solve(&des->xtwx,des->f1); + for (k=0; kf1[l]*X[k*p+l]*des->w[k]; + /* add c'(0)(XTWX)^{-1}XTW */ + } + } + if (deg==2) + { for (i=0; ixev,des->xev,des->f1,deriv,nd+2); + jacob_solve(&des->xtwx,des->f1); + for (k=0; kf1[l]*X[k*p+l]*des->w[k]; + /* + c''(x)(XTWX)^{-1}XTW */ + } + } + } + k = 1+d*(deg>0)+d*d*(deg==2); + + if (exp) wdexpand(lx,mi[MN],des->ind,m); + + if (ty==1) return(m); + for (i=0; iind[i],des->th[i],robscale); + link[ZDDLL] = sqrt(fabs(link[ZDDLL])); + for (j=0; jmi[MN] : des->p; + if ((lf->mi[MKER]!=WPARM) | (!hasparcomp(lf))) k = procvraw(des,lf,v); + l = (double *)viptr(lf->L,v*n); + wdiag(lf,des,l,(INT)0,(INT)1,1); + return(k); +} + +#ifdef CVERSION +extern lfit lf; +extern design des; + +void cwdiag(v) +vari *v; +{ INT i; + vari *ve, *vr; + i = getarg(v,"ev",0); + if (i==0) ERROR(("wdiag: no ev point")); + fitoptions(&lf,v,0); + if (lf_error) return; + ve = varith(argval(v,i),"wdev",STPLOTVAR); + vr = createvar("wdres",STHIDDEN,lf.mi[MN],VDOUBLE); + lf.xxev = ve; + lf.nv = lf.nvm = 1; + lf.L = vr; + lf.mi[MEV] = EPRES; + startlf(&des,&lf,procvhatm,1); + deletevar(ve); + saveresult(vr,argarg(v,0),STREGULAR); +} + +#endif diff --git a/src/locfit/weight.c b/src/locfit/weight.c new file mode 100644 index 0000000..1e1e0ef --- /dev/null +++ b/src/locfit/weight.c @@ -0,0 +1,462 @@ +/* + * Copyright (c) 1996-2001 Lucent Technologies. + * See README file for details. + * + * + * Defines the weight functions and related quantities used + * in LOCFIT. + */ + +#include "local.h" + + +/* The weight functions themselves. Used everywhere. */ +double W(u,ker) +double u; +INT ker; +{ u = fabs(u); + switch(ker) + { case WRECT: return((u>1) ? 0.0 : 1.0); + case WEPAN: return((u>1) ? 0.0 : 1-u*u); + case WBISQ: if (u>1) return(0.0); + u = 1-u*u; return(u*u); + case WTCUB: if (u>1) return(0.0); + u = 1-u*u*u; return(u*u*u); + case WTRWT: if (u>1) return(0.0); + u = 1-u*u; return(u*u*u); + case WQUQU: if (u>1) return(0.0); + u = 1-u*u; return(u*u*u*u); + case WTRIA: if (u>1) return(0.0); + return(1-u); + case W6CUB: if (u>1) return(0.0); + u = 1-u*u*u; u = u*u*u; return(u*u); + case WGAUS: return(exp(-SQR(GFACT*u)/2.0)); + case WEXPL: return(exp(-EFACT*u)); + case WMACL: return(1/((u+1.0e-100)*(u+1.0e-100))); + case WMINM: ERROR(("WMINM in W")); + return(0.0); + case WPARM: return(1.0); + } + return(0.0); +} + +INT iscompact(ker) +INT ker; +{ if ((ker==WEXPL) | (ker==WGAUS) | (ker==WMACL) | (ker==WPARM)) return(0); + return(1); +} + +double weightprod(lf,u,h) +lfit *lf; +double *u, h; +{ INT i, ker; + double sc, w; + w = 1.0; + ker = lf->mi[MKER]; + for (i=0; imi[MDIM]; i++) + { sc = lf->sca[i]; + switch(lf->sty[i]) + { case STLEFT: + if (u[i]>0) return(0.0); + w *= W(-u[i]/(h*sc),ker); + break; + case STRIGH: + if (u[i]<0) return(0.0); + w *= W(u[i]/(h*sc),ker); + break; + case STANGL: + w *= W(2*fabs(sin(u[i]/(2*sc)))/h,ker); + break; + case STCPAR: + break; + default: + w *= W(fabs(u[i])/(h*sc),ker); + } + if (w==0.0) return(w); + } + return(w); +} + +double weightsph(lf,u,h,hasdi,di) +lfit *lf; +double *u, h, di; +INT hasdi; +{ INT i; + + if (!hasdi) di = rho(u,lf->sca,lf->mi[MDIM],lf->mi[MKT],lf->sty); + + for (i=0; imi[MDIM]; i++) + { if ((lf->sty[i]==STLEFT) && (u[i]>0.0)) return(0.0); + if ((lf->sty[i]==STRIGH) && (u[i]<0.0)) return(0.0); + } + if (h==0) return((di==0.0) ? 1.0 : 0.0); + + return(W(di/h,lf->mi[MKER])); +} + +double weight(lf,x,t,h,hasdi,di) +lfit *lf; +double *x, *t, h, di; +INT hasdi; +{ double u[MXDIM]; + INT i; + for (i=0; imi[MDIM]; i++) u[i] = (t==NULL) ? x[i] : x[i]-t[i]; + switch(lf->mi[MKT]) + { case KPROD: return(weightprod(lf,u,h)); + case KSPH: return(weightsph(lf,u,h,hasdi,di)); + } + ERROR(("weight: unknown kernel type %d",lf->mi[MKT])); + return(1.0); +} + +double sgn(x) +double x; +{ if (x>0) return(1.0); + if (x<0) return(-1.0); + return(0.0); +} + +double WdW(u,ker) /* W'(u)/W(u) */ +double u; +INT ker; +{ double eps=1.0e-10; + if (ker==WGAUS) return(-GFACT*GFACT*u); + if (ker==WPARM) return(0.0); + if (fabs(u)>=1) return(0.0); + switch(ker) + { case WRECT: return(0.0); + case WTRIA: return(-sgn(u)/(1-fabs(u)+eps)); + case WEPAN: return(-2*u/(1-u*u+eps)); + case WBISQ: return(-4*u/(1-u*u+eps)); + case WTRWT: return(-6*u/(1-u*u+eps)); + case WTCUB: return(-9*sgn(u)*u*u/(1-u*u*fabs(u)+eps)); + case WEXPL: return((u>0) ? -EFACT : EFACT); + } + ERROR(("WdW: invalid kernel")); + return(0.0); +} + +/* deriv. weights .. spherical, product etc + u, sc, sty needed only in relevant direction + Acutally, returns (d/dx W(||x||/h) ) / W(.) +*/ +double weightd(u,sc,d,ker,kt,h,sty,di) +double u, sc, h, di; +INT d, ker, kt, sty; +{ if (sty==STANGL) + { if (kt==KPROD) + return(-WdW(2*sin(u/(2*sc)),ker)*cos(u/(2*sc))/(h*sc)); + if (di==0.0) return(0.0); + return(-WdW(di/h,ker)*sin(u/sc)/(h*sc*di)); + } + if (sty==STCPAR) return(0.0); + if (kt==KPROD) + return(-WdW(u/(h*sc),ker)/(h*sc)); + if (di==0.0) return(0.0); + return(-WdW(di/h,ker)*u/(h*di*sc*sc)); +} + +double weightdd(u,sc,d,ker,kt,h,sty,di,i0,i1) +double *u, *sc, h, di; +INT d, ker, kt, *sty, i0, i1; +{ double w; + w = 1; + if (kt==KPROD) + { + w = WdW(u[i0]/(h*sc[i0]),ker)*WdW(u[i1]/(h*sc[i1]),ker)/(h*h*sc[i0]*sc[i1]); + } + return(0.0); +} + +/* Derivatives W'(u)/u. + Used in simult. conf. band computations, + and kernel density bandwidth selectors. */ +double Wd(u,ker) +double u; +INT ker; +{ double v; + if (ker==WGAUS) return(-SQR(GFACT)*exp(-SQR(GFACT*u)/2)); + if (ker==WPARM) return(0.0); + if (fabs(u)>1) return(0.0); + switch(ker) + { case WEPAN: return(-2.0); + case WBISQ: return(-4*(1-u*u)); + case WTCUB: v = 1-u*u*u; + return(-9*v*v*u); + case WTRWT: v = 1-u*u; + return(-6*v*v); + default: ERROR(("Invalid kernel %d in Wd",ker)); + } + return(0.0); +} + +/* Second derivatives W''(u)-W'(u)/u. + used in simult. conf. band computations in >1 dimension. */ +double Wdd(u,ker) +double u; +INT ker; +{ double v; + if (ker==WGAUS) return(SQR(u*GFACT*GFACT)*exp(-SQR(u*GFACT)/2)); + if (ker==WPARM) return(0.0); + if (u>1) return(0.0); + switch(ker) + { case WBISQ: return(12*u*u); + case WTCUB: v = 1-u*u*u; + return(-9*u*v*v+54*u*u*u*u*v); + case WTRWT: return(24*u*u*(1-u*u)); + default: ERROR(("Invalid kernel %d in Wdd",ker)); + } + return(0.0); +} + +/* int u1^j1..ud^jd W(u) du. + Used for local log-linear density estimation. + Assume all j_i are even. + Also in some bandwidth selection. +*/ +double wint(d,j,nj,ker) +INT d, *j, nj, ker; +{ double I, z; + int k, dj; + dj = d; + I = 0.0; + for (k=0; k2) return(0.0); + return(2-v); + case WEPAN: + v = fabs(v); + if (v>2) return(0.0); + return((2-v)*(16+v*(8-v*(16-v*(2+v))))/30); + case WBISQ: + v = fabs(v); + if (v>2) return(0.0); + v2 = 2-v; + return(v2*v2*v2*v2*v2*(16+v*(40+v*(36+v*(10+v))))/630); + } + ERROR(("Wconv not implemented for kernel %d",ker)); + return(0.0); +} + +/* derivative of Wconv. + 1/v d/dv int W(x)W(x+v)dx + used in kde bandwidth selection. +*/ +double Wconv1(v,ker) +double v; +INT ker; +{ double v2; + v = fabs(v); + switch(ker) + { case WGAUS: return(-0.5*SQRPI*GFACT*exp(-SQR(GFACT*v)/4)); + case WRECT: + if (v>2) return(0.0); + return(1.0); + case WEPAN: + if (v>2) return(0.0); + return((-16+v*(12-v*v))/6); + case WBISQ: + if (v>2) return(0.0); + v2 = 2-v; + return(-v2*v2*v2*v2*(32+v*(64+v*(24+v*3)))/210); + } + ERROR(("Wconv1 not implemented for kernel %d",ker)); + return(0.0); +} + +/* 4th derivative of Wconv. + used in kde bandwidth selection (BCV, SJPI, GKK) +*/ +double Wconv4(v,ker) +double v; +INT ker; +{ double gv; + switch(ker) + { case WGAUS: + gv = GFACT*v; + return(exp(-SQR(gv)/4)*GFACT*GFACT*GFACT*(12-gv*gv*(12-gv*gv))*SQRPI/16); + } + ERROR(("Wconv4 not implemented for kernel %d",ker)); + return(0.0); +} + +/* 5th derivative of Wconv. + used in kde bandwidth selection (BCV method only) +*/ +double Wconv5(v,ker) /* (d/dv)^5 int W(x)W(x+v)dx */ +double v; +INT ker; +{ double gv; + switch(ker) + { case WGAUS: + gv = GFACT*v; + return(-exp(-SQR(gv)/4)*GFACT*GFACT*GFACT*GFACT*gv*(60-gv*gv*(20-gv*gv))*SQRPI/32); + } + ERROR(("Wconv5 not implemented for kernel %d",ker)); + return(0.0); +} + +/* 6th derivative of Wconv. + used in kde bandwidth selection (SJPI) +*/ +double Wconv6(v,ker) +double v; +INT ker; +{ double gv, z; + switch(ker) + { case WGAUS: + gv = GFACT*v; + gv = gv*gv; + z = exp(-gv/4)*(-120+gv*(180-gv*(30-gv)))*0.02769459142; + gv = GFACT*GFACT; + return(z*gv*gv*GFACT); + } + ERROR(("Wconv6 not implemented for kernel %d",ker)); + return(0.0); +} + +/* int W(v)^2 dv / (int v^2 W(v) dv)^2 + used in some bandwidth selectors +*/ +double Wikk(ker,deg) +INT ker, deg; +{ switch(deg) + { case 0: + case 1: /* int W(v)^2 dv / (int v^2 W(v) dv)^2 */ + switch(ker) + { case WRECT: return(4.5); + case WEPAN: return(15.0); + case WBISQ: return(35.0); + case WGAUS: return(0.2820947918*GFACT*GFACT*GFACT*GFACT*GFACT); + case WTCUB: return(34.15211105); + case WTRWT: return(66.08391608); + } + case 2: + case 3: /* 4!^2/8*int(W1^2)/int(v^4W1)^2 + W1=W*(n4-v^2n2)/(n0n4-n2n2) */ + switch(ker) + { case WRECT: return(11025.0); + case WEPAN: return(39690.0); + case WBISQ: return(110346.9231); + case WGAUS: return(14527.43412); + case WTCUB: return(126500.5904); + case WTRWT: return(254371.7647); + } + } + ERROR(("Wikk not implemented for kernel %d",ker)); + return(0.0); +} diff --git a/src/matching_merge.cpp b/src/matching_merge.cpp new file mode 100644 index 0000000..b3cb415 --- /dev/null +++ b/src/matching_merge.cpp @@ -0,0 +1,103 @@ +/* + * matching_merge.cpp + * cufflinks + * + * Created by Cole Trapnell on 6/1/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include "matching_merge.h" + +using namespace std; +using namespace boost; + +void find_path(const DAG& bundle_dag, + const adjacency_list<>& TC, + const DAGNode& source, + const DAGNode& target, + vector& path) +{ + if (source == target) + return; + bool done = false; + DAGNode curr = source; + while(!done) + { + graph_traits::adjacency_iterator i, iend; + for (tie(i,iend) = adjacent_vertices(curr, bundle_dag); i != iend; ++i) + { + DAGNode I = *i; + pair::edge_descriptor, bool> p; + p = edge(I, target, TC); + if (p.second) + { + path.push_back(*i); + curr = *i; + break; + } + if (*i == target) + { + path.push_back(*i); + done = true; + break; + } + } + } +} + +void extend_chains_to_paths(const DAG& bundle_dag, + vector >& chains, + adjacency_list<>& TC, + DAGNode source, + DAGNode sink, + vector >& paths) +{ + //Extend each chain to a path + for(size_t c = 0; c < chains.size(); ++c) + { + vector& chain = chains[c]; + assert (!chain.empty()); + reverse(chain.begin(), chain.end()); + vector path; + find_path(bundle_dag, TC, source, chain[0], path); + for (size_t n = 1; n < chain.size(); ++n) + { + assert (path.back() == chain[n - 1]); + DAGNode last = chain[n-1]; + DAGNode next = chain[n]; + find_path(bundle_dag, TC, last, next, path); + } + find_path(bundle_dag, TC, chain.back(), sink, path); + assert (path.back() == sink); + path.pop_back(); + paths.push_back(path); + } +} + +void make_scaffolds_from_paths(DAG& bundle_dag, + const vector >& paths, + vector& scaffolds) +{ + HitsForNodeMap hits_for_node = get(vertex_name, bundle_dag); + for (size_t p = 0; p < paths.size(); ++p) + { + const vector& path = paths[p]; + + vector path_alignments; + for (size_t m = 0; m < path.size(); ++m) + { + //fprintf(stderr, "%d ", scaff_id); + path_alignments.push_back(*(hits_for_node[path[m]])); + } + //fprintf(stderr,"\n"); + //fprintf(stderr, "\tMerging path %d into scaffold\n", p); + Scaffold s(path_alignments); + + //fprintf(stderr, "PATH %d\n-------------------------------------\n",s.get_id()); + scaffolds.push_back(s); + } + sort(scaffolds.begin(), scaffolds.end(), scaff_lt); +} + + diff --git a/src/matching_merge.h b/src/matching_merge.h new file mode 100644 index 0000000..3bd30bb --- /dev/null +++ b/src/matching_merge.h @@ -0,0 +1,113 @@ +#ifndef MATCHING_MERGE_H +#define MATCHING_MERGE_H + +/* + * matching_merge.h + * cufflinks + * + * Created by Cole Trapnell on 6/1/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include + +//#include +#include +#include + +#include "scaffold_graph.h" +#include "scaffolds.h" + +using namespace std; + +typedef lemon::SmartBpUGraph ReachGraph; + +template +void make_chains_from_matching(const ReachGraph& bp, + const Matcher& matcher, + vector >& chains) +{ + // Make chains out of the matching + ReachGraph::ANodeMap matched_a_nodes(bp); + matcher.aMatching(matched_a_nodes); + + ReachGraph::BNodeMap matched_b_nodes(bp); + matcher.bMatching(matched_b_nodes); + + set chain_heads; + + for (ReachGraph::ANodeIt i(bp); i!=lemon::INVALID; ++i) + { + int a_id = bp.aNodeId(i); + ReachGraph::ANode a = bp.nodeFromANodeId(a_id); + if (matched_a_nodes[a] == lemon::INVALID) + chain_heads.insert(bp.nodeFromANodeId(bp.aNodeId(i))); + } + + for (set::iterator i = chain_heads.begin(); + i != chain_heads.end(); + ++i) + { + vector chain; + ReachGraph::ANode n = *i; + chain.push_back(bp.aNodeId(*i)); + while(true) + { + //int a_id = bp.aNodeId(n); + int b_id = bp.bNodeId(n); + + ReachGraph::BNode b = bp.nodeFromBNodeId(b_id); + + if (matched_b_nodes[b] == lemon::INVALID) + { + break; + } + else + { + ReachGraph::ANode a_match_to_b = bp.source(matched_b_nodes[b]); + chain.push_back(bp.aNodeId(a_match_to_b)); + n = a_match_to_b; + } + } + chains.push_back(chain); + } + assert (chains.size() == chain_heads.size()); +} + +template +void merge_from_matching(const ReachGraph& bp_graph, + const Matcher& matcher, + vector& scaffolds) +{ + vector merged_scaffolds; + + vector > chains; + + make_chains_from_matching(bp_graph, matcher, chains); + for (size_t i = 0; i < chains.size(); ++i) + { + vector chain; + for (size_t j = 0; j < chains[i].size(); ++j) + { + chain.push_back(scaffolds[chains[i][j]]); + } + merged_scaffolds.push_back(Scaffold(chain)); + } + + sort (merged_scaffolds.begin(), merged_scaffolds.end(), scaff_lt); + + scaffolds = merged_scaffolds; +} + +void extend_chains_to_paths(const DAG& bundle_dag, + vector >& chains, + boost::adjacency_list<>& TC, + DAGNode source, + DAGNode sink, + vector >& paths); + +void make_scaffolds_from_paths(DAG& bundle_dag, + const vector >& paths, + vector& scaffolds); +#endif diff --git a/src/multireads.cpp b/src/multireads.cpp new file mode 100644 index 0000000..4f43b8c --- /dev/null +++ b/src/multireads.cpp @@ -0,0 +1,131 @@ +/* + * multireads.cpp + * cufflinks + * + * Created by Adam Roberts on 3/6/11. + * Copyright 2011 Adam Roberts. All rights reserved. + * + */ + + +#include "hits.h" +#include "multireads.h" + +void MultiRead::add_hit(RefID r_id, int left, int right) +{ + _hits.push_back(MultiHit(r_id, left, right)); +} + +MultiHit* MultiRead::get_hit(RefID r_id, int left, int right) +{ + for (size_t i = 0; i < num_hits(); ++i) + { + MultiHit& hit = _hits[_curr_index]; + if (hit.r_id == r_id && hit.left == left && hit.right == right) + { + return &hit; + } + _curr_index = (_curr_index + 1) % num_hits(); + } + fprintf(stderr, "\nWARNING: Multi-Hit not found (%d,%d).\n", left, right); + return NULL; +} + +void MultiRead::add_expr(RefID r_id, int left, int right, double expr) +{ + MultiHit* hit = get_hit(r_id, left, right); + if (hit) + { + hit->expr += expr; + _tot_expr += expr; + } +} + +double MultiRead::get_mass(RefID r_id, int left, int right, bool valid_mass) +{ + if (!valid_mass) + { + return 1.0/num_hits(); + } + + if (_tot_expr == 0.0) + return 0.0; + + MultiHit* hit = get_hit(r_id, left, right); + if (hit) + return hit->expr/_tot_expr; + else + return 1.0/num_hits(); +} + +MultiRead* MultiReadTable::get_read(InsertID mr_id) +{ + MultiReadMap::iterator it; + it = _read_map.find(mr_id); + if (it == _read_map.end()) + { + return NULL; + } + else + { + return &(it->second); + } +} + +void MultiReadTable::add_hit(const MateHit& hit) +{ + add_hit(hit.ref_id(), hit.left(), hit.right(), hit.insert_id(), hit.num_hits()); +} + +void MultiReadTable::add_hit(RefID r_id, int left, int right, InsertID mr_id, int exp_num_hits) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_lock); +#endif + MultiRead* mr = get_read(mr_id); + if (!mr) + { + mr = &((_read_map.insert(std::make_pair(mr_id, MultiRead(mr_id, exp_num_hits)))).first->second); + } + mr->add_hit(r_id, left, right); +} + +void MultiReadTable::add_expr(const MateHit& hit, double expr) +{ + add_expr(hit.ref_id(), hit.left(), hit.right(), hit.insert_id(), expr); +} + +void MultiReadTable::add_expr(RefID r_id, int left, int right, InsertID mr_id, double expr) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_lock); +#endif + MultiRead* mr = get_read(mr_id); + mr->add_expr(r_id, left, right, expr); +} + +double MultiReadTable::get_mass(const MateHit& hit) +{ +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_lock); +#endif + MultiRead* mr = get_read(hit.insert_id()); + if(!mr) + return 1.0; + return mr->get_mass(hit.ref_id(), hit.left(), hit.right(), _valid_mass); +} + +size_t MultiReadTable::num_multireads() +{ + return (int)_read_map.size(); +} + +size_t MultiReadTable::num_multihits() +{ + size_t count = 0; + for (MultiReadMap::iterator it=_read_map.begin() ; it != _read_map.end(); it++ ) + { + count += it->second.num_hits(); + } + return count; +} diff --git a/src/multireads.h b/src/multireads.h new file mode 100644 index 0000000..da017ad --- /dev/null +++ b/src/multireads.h @@ -0,0 +1,72 @@ +#ifndef MULTIREADS_H +#define MULTIREADS_H + +#include + +typedef uint64_t RefID; +typedef uint64_t InsertID; + +struct MultiHit +{ + MultiHit(RefID id, int l, int r) + : r_id(id), + left(l), + right(r), + expr(0) {} + RefID r_id; + int left; + int right; + double expr; +}; + +class MultiRead +{ + size_t _curr_index; + std::vector _hits; + double _tot_expr; + InsertID _id; + + MultiHit* get_hit(RefID r_id, int left, int right); + +public: + + MultiRead(InsertID id, int exp_num_hits) + : _curr_index(0), + _tot_expr(0.0), + _id(id) + { + _hits.reserve(exp_num_hits); + } + + size_t num_hits() { return (int)_hits.size(); } + void add_hit(RefID r_id, int left, int right); + void add_expr(RefID r_id, int left, int right, double expr); + double get_mass(RefID r_id, int left, int right, bool valid_mass); +}; + +class MateHit; + +class MultiReadTable +{ + typedef std::map MultiReadMap; + MultiReadMap _read_map; + bool _valid_mass; + MultiRead* get_read(InsertID mr_id); +#if ENABLE_THREADS + boost::mutex _lock; +#endif +public: + MultiReadTable(): _valid_mass(false) {} + + void valid_mass(bool vm) { _valid_mass = vm; } + void add_hit(const MateHit& hit); + void add_hit(RefID r_id, int left, int right, InsertID mr_id, int exp_num_hits); + void add_expr(const MateHit& hit, double expr); + void add_expr(RefID r_id, int left, int right, InsertID mr_id, double expr); + double get_mass(const MateHit& hit); + size_t num_multireads(); + size_t num_multihits(); + +}; + +#endif diff --git a/src/progressbar.h b/src/progressbar.h new file mode 100644 index 0000000..d5b953d --- /dev/null +++ b/src/progressbar.h @@ -0,0 +1,110 @@ +#ifndef PROGRESS_H +#define PROGRESS_H + +#include "time.h" + +using namespace std; + +const int BAR_BUF_SIZE = 28; +const char SYMBOL = '*'; + +class ProgressBar +{ + char _bar_buf[BAR_BUF_SIZE]; + string _process; + long double _num_complete; + long double _tot_num; + int _num_updates; + int _num_remaining; + +public: + ProgressBar() {} + + ProgressBar(string process, double tot_num) + { + _tot_num = tot_num; + _process = process; + _num_complete = -1.0; + _num_remaining = -1.0; + _num_updates = 0; + + for(int i=0; i < BAR_BUF_SIZE; ++i) _bar_buf[i] = ' '; + _bar_buf[0] = '['; + _bar_buf[BAR_BUF_SIZE-2] = ']'; + _bar_buf[BAR_BUF_SIZE-1] = '\0'; + + + time_t rawtime; + struct tm * timeinfo; + char time_buf [80]; + + time ( &rawtime ); + timeinfo = localtime ( &rawtime ); + + strftime (time_buf,80,"%H:%M:%S",timeinfo); + + fprintf(stderr, "[%s] %s\n", time_buf, _process.c_str()); + } + + void update(const char* bundle_label_buf, double inc_amt) + { + + _num_complete += inc_amt; + _num_updates ++; + + if (cuff_verbose||cuff_quiet||_tot_num==0) return; + + char bundle_buf[28]; + bundle_buf[27] = '\0'; + strncpy(bundle_buf, bundle_label_buf, 27); + + int percent = (_num_complete * 100)/_tot_num; + + percent = min(percent, 99); + + int last_bar = percent/(100/(BAR_BUF_SIZE-3)); + for (int i=1; i <= last_bar; ++i) + _bar_buf[i] = SYMBOL; + + char line_buf[82]; + snprintf(line_buf, 82, "\r> Processing Locus %-27s %s %3d%%", bundle_buf, _bar_buf, percent); + + fprintf(stderr,"%s",line_buf); + } + + void remaining(int num_remaining) + { + if (cuff_verbose||cuff_quiet||_tot_num==0||num_remaining==_num_remaining) return; + + _num_remaining = num_remaining; + + int percent = (_num_complete * 100)/_tot_num; + percent = min(percent, 99); + + char msg_buff[45]; + sprintf(msg_buff, "Waiting for %d threads to complete.", num_remaining); + + int last_bar = percent/(100/(BAR_BUF_SIZE-3)); + for (int i=1; i <= last_bar; ++i) + _bar_buf[i] = SYMBOL; + + char line_buf[82]; + snprintf(line_buf, 81, "\r> %-44s %s %3d%%", msg_buff, _bar_buf, percent); + + fprintf(stderr,"%s",line_buf); + } + + void complete() + { + for (int i=1; i < BAR_BUF_SIZE-2; ++i) + _bar_buf[i] = SYMBOL; + char complete_buf[45]; + snprintf(complete_buf, 44, "Processed %d loci.", _num_updates); + if (cuff_verbose||cuff_quiet) + fprintf(stderr, "%-44s\n", complete_buf); + else + fprintf(stderr, "\r> %-44s %s %3d%%\n", complete_buf, _bar_buf, 100); + } +}; + +#endif diff --git a/src/replicates.cpp b/src/replicates.cpp new file mode 100644 index 0000000..634f209 --- /dev/null +++ b/src/replicates.cpp @@ -0,0 +1,455 @@ +// +// replicates.cpp +// cufflinks +// +// Created by Cole Trapnell on 3/11/11. +// Copyright 2011 Cole Trapnell. All rights reserved. +// + +#include + +extern "C" { +#include "locfit/local.h" +} + +#include "replicates.h" + +#if ENABLE_THREADS +boost::mutex _locfit_lock; +#endif + +MassDispersionModel::MassDispersionModel(const std::vector& scaled_mass_means, + const std::vector& scaled_raw_variances, + const std::vector& scaled_mass_variances) +{ + if (scaled_mass_means.size() != scaled_mass_variances.size()) + { + fprintf (stderr, "Error: dispersion model table is malformed\n"); + } + + double last_val = 0; + for (size_t i = 0; i < scaled_mass_means.size(); i++) + { + + if (last_val > scaled_mass_means[i]) + { + fprintf (stderr, "Error: DispersionModel input is malformed\n"); + } + + if ( i == 0 || last_val < scaled_mass_means[i]) + { + _scaled_mass_means.push_back(scaled_mass_means[i]); + _scaled_raw_variances.push_back(scaled_raw_variances[i]); + _scaled_mass_variances.push_back(scaled_mass_variances[i]); + } + else + { + // skip this element if it's equal to what we've already seen + } + + last_val = scaled_mass_means[i]; + } +} + +double MassDispersionModel::scale_mass_variance(double scaled_mass) const +{ + if (scaled_mass <= 0) + return 0.0; + + if (_scaled_mass_means.size() < 2 || _scaled_mass_variances.size() < 2) + { + return scaled_mass; // revert to poisson. + } + if (scaled_mass > _scaled_mass_means.back()) + { + // extrapolate to the right + // off the right end + double x1_mean = _scaled_mass_means[_scaled_mass_means.size()-2]; + double x2_mean = _scaled_mass_means[_scaled_mass_means.size()-1]; + + double y1_var = _scaled_mass_variances[_scaled_mass_means.size()-2]; + double y2_var = _scaled_mass_variances[_scaled_mass_means.size()-1]; + double slope = 0.0; + if (x2_mean != x1_mean) + { + slope = (y2_var - y1_var) / (x2_mean-x1_mean); + } + else if (y1_var == y2_var) + { + assert (false); // should have a unique'd table + } + double mean_interp = _scaled_mass_variances[_scaled_mass_means.size()-1] - + slope*(scaled_mass - _scaled_mass_means.size()-1); + if (mean_interp < scaled_mass) + mean_interp = scaled_mass; + assert (!isnan(mean_interp) && !isinf(mean_interp)); + return mean_interp; + } + else if (scaled_mass < _scaled_mass_means.front()) + { + // extrapolate to the left + // off the left end? + double x1_mean = _scaled_mass_means[0]; + double x2_mean = _scaled_mass_means[1]; + + double y1_var = _scaled_mass_variances[0]; + double y2_var = _scaled_mass_variances[1]; + double slope = 0.0; + if (x2_mean != x1_mean) + { + slope = (y2_var - y1_var) / (x2_mean-x1_mean); + } + else if (y1_var == y2_var) + { + assert (false); // should have a unique'd table + } + double mean_interp = _scaled_mass_variances[0] - slope*(_scaled_mass_means[0] - scaled_mass); + if (mean_interp < scaled_mass) + mean_interp = scaled_mass; + + assert (!isnan(mean_interp) && !isinf(mean_interp)); + return mean_interp; + } + + vector::const_iterator lb; + lb = lower_bound(_scaled_mass_means.begin(), + _scaled_mass_means.end(), + scaled_mass); + if (lb < _scaled_mass_means.end()) + { + int d = lb - _scaled_mass_means.begin(); + if (*lb == scaled_mass || lb == _scaled_mass_means.begin()) + { + double var = _scaled_mass_variances[d]; + if (var < scaled_mass) // revert to poisson if underdispersed + var = scaled_mass; + assert (!isnan(var) && !isinf(var)); + return var; + } + + + //in between two points on the scale. + d--; + + if (d < 0) + { + fprintf(stderr, "ARG d < 0, d = %d \n", d); + } + + if (d >= _scaled_mass_means.size()) + { + fprintf(stderr, "ARG d >= _scaled_mass_means.size(), d = %d\n", d); + } + if (d >= _scaled_mass_variances.size()) + { + fprintf(stderr, "ARG d >= _scaled_mass_variances.size(), d = %d\n", d); + } + + double x1_mean = _scaled_mass_means[d]; + double x2_mean = _scaled_mass_means[d + 1]; + + double y1_var = _scaled_mass_variances[d]; + double y2_var = _scaled_mass_variances[d + 1]; + double slope = 0.0; + if (x2_mean != x1_mean) + { + slope = (y2_var - y1_var) / (x2_mean-x1_mean); + } + else if (y1_var == y2_var) + { + assert (false); // should have a unique'd table + } + double mean_interp = _scaled_mass_variances[d] + slope*(scaled_mass - _scaled_mass_means[d]); + if (mean_interp < scaled_mass) // revert to poisson if underdispersed + mean_interp = scaled_mass; + + assert (!isnan(mean_interp) && !isinf(mean_interp)); + return mean_interp; + } + else + { + assert (!isnan(scaled_mass) && !isinf(scaled_mass)); + return scaled_mass; // revert to poisson assumption + } +} + +void calc_scaling_factors(const vector& sample_count_table, + vector& scale_factors) +{ + vector geom_means(sample_count_table.size(), 0.0); + + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + const LocusCountList& p = sample_count_table[i]; + + for (size_t j = 0; j < p.counts.size(); ++j) + { + //assert (geom_means.size() > j); + if (geom_means[i] > 0 && p.counts[j] > 0) + { + geom_means[i] *= p.counts[j]; + } + else if (p.counts[j] > 0) + { + geom_means[i] = p.counts[j]; + } + + } + geom_means[i] = pow(geom_means[i], 1.0/(double)p.counts.size()); + } + + for (size_t j = 0; j < scale_factors.size(); ++j) + { + vector tmp_counts; + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + if (geom_means[i] && !isinf(geom_means[i]) && !isnan(geom_means[i]) && sample_count_table[i].counts[j]) + { + double gm = (double)sample_count_table[i].counts[j] / geom_means[i]; + assert (!isinf(gm)); + tmp_counts.push_back(gm); + } + } + sort(tmp_counts.begin(), tmp_counts.end()); + if (!tmp_counts.empty()) + scale_factors[j] = tmp_counts[tmp_counts.size()/2]; + else + scale_factors[j] = 1.0; + } +} + +static const int min_loci_for_fitting = 30; + +boost::shared_ptr +fit_dispersion_model_helper(const string& condition_name, + const vector& scale_factors, + const vector& sample_count_table) +{ + vector > raw_means_and_vars; + map > labeled_mv_table; + + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + const LocusCountList& p = sample_count_table[i]; + double mean = accumulate(p.counts.begin(), p.counts.end(), 0.0); + if (mean > 0.0 && p.counts.size() > 0) + mean /= p.counts.size(); + + double var = 0.0; + foreach (double d, p.counts) + { + var += (d - mean) * (d - mean); + } + if (var > 0.0 && p.counts.size()) + var /= p.counts.size(); + labeled_mv_table[p.locus_desc] = make_pair(mean, var); + if (mean > 0 && var > 0.0) + { + //fprintf(stderr, "%s\t%lg\t%lg\n", p.locus_desc.c_str(), mean, var); + raw_means_and_vars.push_back(make_pair(mean, var)); + } + } + + if (raw_means_and_vars.size() < min_loci_for_fitting) + { + shared_ptr disperser; + disperser = shared_ptr(new PoissonDispersionModel); + + for (map >::iterator itr = labeled_mv_table.begin(); + itr != labeled_mv_table.end(); + ++itr) + { + string label = itr->first; + pair p = itr->second; + disperser->set_raw_mean_and_var(itr->first, itr->second); + } + //fprintf(stderr, "Warning: fragment count variances between replicates are all zero, reverting to Poisson model\n"); + return disperser; + } + + sort(raw_means_and_vars.begin(), raw_means_and_vars.end()); + + vector raw_means(raw_means_and_vars.size(), 0.0); + vector raw_variances(raw_means_and_vars.size(), 0.0); + + for(size_t i = 0; i < raw_means_and_vars.size(); ++i) + { + raw_means[i] = raw_means_and_vars[i].first; + raw_variances[i] = raw_means_and_vars[i].second; + } + + vector fitted_values(raw_means_and_vars.size(), 0.0); + + setuplf(); + + // WARNING: locfit doesn't like undescores - need camel case for + // variable names + + char namebuf[256]; + sprintf(namebuf, "countMeans"); + vari* cm = createvar(namebuf,STREGULAR,raw_means.size(),VDOUBLE); + for (size_t i = 0; i < raw_means.size(); ++i) + { + cm->dpr[i] = log(raw_means[i]); + } + + sprintf(namebuf, "countVariances"); + vari* cv = createvar(namebuf,STREGULAR,raw_variances.size(),VDOUBLE); + for (size_t i = 0; i < raw_variances.size(); ++i) + { + cv->dpr[i] = raw_variances[i]; + } + + char locfit_cmd[2048]; + sprintf(locfit_cmd, "locfit countVariances~countMeans family=gamma"); + + locfit_dispatch(locfit_cmd); + + sprintf(locfit_cmd, "fittedVars=predict countMeans"); + locfit_dispatch(locfit_cmd); + + //sprintf(locfit_cmd, "prfit x fhat h nlx"); + //locfit_dispatch(locfit_cmd); + + int n = 0; + sprintf(namebuf, "fittedVars"); + vari* cp = findvar(namebuf, 1, &n); + assert(cp != NULL); + for (size_t i = 0; i < cp->n; ++i) + { + fitted_values[i] = cp->dpr[i]; + } + + shared_ptr disperser; + disperser = shared_ptr(new MassDispersionModel(raw_means, raw_variances, fitted_values)); + if (poisson_dispersion) + disperser = shared_ptr(new PoissonDispersionModel); + + for (map >::iterator itr = labeled_mv_table.begin(); + itr != labeled_mv_table.end(); + ++itr) + { + string label = itr->first; + pair p = itr->second; + disperser->set_raw_mean_and_var(itr->first, itr->second); + } + + //pair p = disperser->get_raw_mean_and_var("chr1:11873-29961"); + + return disperser; +} + +boost::shared_ptr +fit_dispersion_model(const string& condition_name, + const vector& scale_factors, + const vector& sample_count_table) +{ +// +//#if ENABLE_THREADS +// boost::mutex::scoped_lock lock(_locfit_lock); +//#endif + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + if (sample_count_table[i].counts.size() <= 1) + { + // only one replicate - no point in fitting variance + return shared_ptr(new PoissonDispersionModel); + } + } +#if ENABLE_THREADS + _locfit_lock.lock(); +#endif + + ProgressBar p_bar("Modeling fragment count overdispersion.",0); + + int max_transcripts = 0; + foreach(const LocusCountList& L, sample_count_table) + { + if (L.num_transcripts > max_transcripts) + { + max_transcripts = L.num_transcripts; + } + } + + // This vector holds a dispersion model for each transcript multiplicity. + // The model for multiplicity is fitted to all the data, and lives at index 0 in the + // vector below. + vector > disp_models(max_transcripts+1); + + for (size_t i = 0; i < max_transcripts; i++) + { + boost::shared_ptr model; + if (i != 0) + { +// vector sample_count_subtable; +// foreach(const LocusCountList& L, sample_count_table) +// { +// if (L.num_transcripts == i) +// { +// sample_count_subtable.push_back(L); +// } +// } +// model = fit_dispersion_model_helper(condition_name, scale_factors, sample_count_subtable); + } + else + { + model = fit_dispersion_model_helper(condition_name, scale_factors, sample_count_table); + } + disp_models[i] = model; + } + + if (emit_count_tables) + { +// string cond_count_filename = output_dir + "/" + condition_name + "_counts.txt"; +// +// FILE* sample_count_file = fopen(cond_count_filename.c_str(), "w"); +// +// if (sample_count_file) +// { +// fprintf(sample_count_file, "count_mean\tcount_var\tfitted_var\tnum_transcripts\n"); +// for (size_t j = 0; j < max_transcripts; j++) +// { +// boost::shared_ptr model = disp_models[j]; +// const vector& means = model->scaled_mass_means(); +// const vector& raw_vars = model->scaled_raw_variances(); +// +// for (size_t i = 0; i < means.size(); ++i) +// { +// fprintf(sample_count_file, "%lg\t%lg\t%lg\t%lu\n", +// means[i], +// raw_vars[i], +// model->scale_mass_variance(means[i]), +// j); +// } +// } +// fclose(sample_count_file); +// } + + string cond_count_filename = output_dir + "/" + condition_name + "_counts.txt"; + + FILE* sample_count_file = fopen(cond_count_filename.c_str(), "w"); + + if (sample_count_file) + { + fprintf(sample_count_file, "count_mean\tcount_var\tfitted_var\n"); + + boost::shared_ptr model = disp_models[0]; + const vector& means = model->scaled_mass_means(); + const vector& raw_vars = model->scaled_raw_variances(); + + for (size_t i = 0; i < means.size(); ++i) + { + fprintf(sample_count_file, "%lg\t%lg\t%lg\n", + means[i], + raw_vars[i], + model->scale_mass_variance(means[i])); + } + fclose(sample_count_file); + } + } + +#if ENABLE_THREADS + _locfit_lock.unlock(); +#endif + return disp_models[0]; +} diff --git a/src/replicates.h b/src/replicates.h new file mode 100644 index 0000000..ca4484f --- /dev/null +++ b/src/replicates.h @@ -0,0 +1,304 @@ +// +// replicates.h +// cufflinks +// +// Created by Cole Trapnell on 3/11/11. +// Copyright 2011 Cole Trapnell. All rights reserved. +// + +#include "common.h" +#include "bundles.h" +#include +#include +#include + +class MassDispersionModel +{ +public: + MassDispersionModel() {} + MassDispersionModel(const std::vector& scaled_mass_means, + const std::vector& scaled_raw_variances, + const std::vector& scaled_mass_variances); + + + virtual double scale_mass_variance(double scaled_mass) const; + + const vector& scaled_mass_means() const { return _scaled_mass_means; } + const vector& scaled_raw_variances() const { return _scaled_raw_variances; } + const vector& scaled_mass_variances() const { return _scaled_mass_variances; } + + std::pair get_raw_mean_and_var(const std::string& locus_desc) const + { + std::map >::const_iterator itr; + itr = _raw_mv_by_locus.find(locus_desc); + std::pair p = make_pair(0.0,0.0); + if (itr != _raw_mv_by_locus.end()) + { + p = itr->second; + } + return p; + } + + void set_raw_mean_and_var(const std::string& locus_desc, const std::pair& p) + { + _raw_mv_by_locus[locus_desc] = p; + } + +private: + std::vector _scaled_mass_means; + std::vector _scaled_raw_variances; + std::vector _scaled_mass_variances; + + std::map > _raw_mv_by_locus; +}; + +class PoissonDispersionModel : public MassDispersionModel +{ +public: + + virtual double scale_mass_variance(double scaled_mass) const + { + return scaled_mass; + } +}; + +struct LocusCountList +{ + LocusCountList(std::string ld, int num_reps, int nt) : + locus_desc(ld), counts(std::vector(num_reps, 0)), num_transcripts(nt) {} + std::string locus_desc; + std::vector counts; + int num_transcripts; +}; + +void calc_scaling_factors(const std::vector& sample_count_table, + std::vector& scale_factors); + + +boost::shared_ptr +fit_dispersion_model(const string& condition_name, + const std::vector& scale_factors, + const std::vector& sample_count_table); + +// This factory merges bundles in a requested locus from several replicates +class ReplicatedBundleFactory +{ +public: + ReplicatedBundleFactory(const std::vector >& factories, + const string& condition_name) + : _factories(factories), _condition_name(condition_name) {} + + int num_bundles() { return _factories[0]->num_bundles(); } + std::vector > factories() { return _factories; } + + const string& condition_name() const { return _condition_name; } + void condition_name(const string& cn) { _condition_name = cn; } + + bool bundles_remain() + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + foreach (boost::shared_ptr fac, _factories) + { + if (fac->bundles_remain()) + return true; + } + return false; + } + + bool next_bundle(HitBundle& bundle_out) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + std::vector bundles; + + bool non_empty_bundle = false; + foreach (boost::shared_ptr fac, _factories) + { + bundles.push_back(new HitBundle()); + if (fac->next_bundle(*(bundles.back()))) + { + non_empty_bundle = true; + } + } + + if (non_empty_bundle == false) + { + foreach (HitBundle* in_bundle, bundles) + { + in_bundle->ref_scaffolds().clear(); + in_bundle->clear_hits(); + delete in_bundle; + } + return false; + } + + for (size_t i = 1; i < bundles.size(); ++i) + { + const vector >& s1 = bundles[i]->ref_scaffolds(); + const vector >& s2 = bundles[i-1]->ref_scaffolds(); + assert (s1.size() == s2.size()); + for (size_t j = 0; j < s1.size(); ++j) + { + assert (s1[j]->annotated_trans_id() == s2[j]->annotated_trans_id()); + } + } + + // Merge the replicates into a combined bundle of hits. + HitBundle::combine(bundles, bundle_out); + + foreach (HitBundle* in_bundle, bundles) + { + in_bundle->ref_scaffolds().clear(); + in_bundle->clear_hits(); + delete in_bundle; + } + return true; + } + + void reset() + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + foreach (shared_ptr fac, _factories) + { + fac->reset(); + } + } + + void inspect_replicate_maps(int& min_len, int& max_len) + { + vector sample_count_table; + vector sample_masses; + + for (size_t fac_idx = 0; fac_idx < _factories.size(); ++fac_idx) + { + shared_ptr fac = _factories[fac_idx]; + BadIntronTable bad_introns; + + vector count_table; + inspect_map(*fac, NULL, count_table, false); + + shared_ptr rg_props = fac->read_group_properties(); + + for (size_t i = 0; i < count_table.size(); ++i) + { + LocusCount& c = count_table[i]; + double raw_count = c.count; + + if (i >= sample_count_table.size()) + { + LocusCountList locus_count(c.locus_desc, _factories.size(), c.num_transcripts); + sample_count_table.push_back(locus_count); + sample_count_table.back().counts[0] = raw_count; + } + else + { + if (sample_count_table[i].locus_desc != c.locus_desc) + { + fprintf (stderr, "Error: bundle boundaries don't match across replicates!\n"); + exit(1); + } + sample_count_table[i].counts[fac_idx] = raw_count; + } + } + sample_masses.push_back(rg_props->total_map_mass()); + min_len = min(min_len, rg_props->frag_len_dist()->min()); + max_len = max(max_len, rg_props->frag_len_dist()->max()); + } + + vector scale_factors(_factories.size(), 0.0); + + calc_scaling_factors(sample_count_table, scale_factors); + + for (size_t i = 0; i < scale_factors.size(); ++i) + { + shared_ptr rg_props = _factories[i]->read_group_properties(); + assert (scale_factors[i] != 0); + rg_props->mass_scale_factor(scale_factors[i]); + } + + // Transform raw counts to the common scale + for (size_t i = 0; i < sample_count_table.size(); ++i) + { + LocusCountList& p = sample_count_table[i]; + for (size_t j = 0; j < p.counts.size(); ++j) + { + assert (scale_factors.size() > j); + p.counts[j] *= (1.0 / scale_factors[j]); + } + } + + for (size_t i = 0; i < _factories.size(); ++i) + { + shared_ptr rg_props = _factories[i]->read_group_properties(); + vector scaled_counts; + for (size_t j = 0; j < sample_count_table.size(); ++j) + { + scaled_counts.push_back(LocusCount(sample_count_table[j].locus_desc, sample_count_table[j].counts[i], sample_count_table[j].num_transcripts)); + } + rg_props->common_scale_counts(scaled_counts); + } + + shared_ptr disperser; + disperser = fit_dispersion_model(_condition_name,scale_factors, sample_count_table); + + foreach (shared_ptr fac, _factories) + { + shared_ptr rg_props = fac->read_group_properties(); + rg_props->mass_dispersion_model(disperser); + } + } + + // This function NEEDS to deep copy the ref_mRNAs, otherwise cuffdiff'd + // samples will clobber each other + void set_ref_rnas(const vector >& mRNAs) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + foreach(shared_ptr fac, _factories) + { + fac->set_ref_rnas(mRNAs); + } + } + + void set_mask_rnas(const vector >& mRNAs) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + foreach(shared_ptr fac, _factories) + { + fac->set_mask_rnas(mRNAs); + } + } + + int num_replicates() const { return _factories.size(); } + + void mass_dispersion_model(shared_ptr disperser) + { +#if ENABLE_THREADS + boost::mutex::scoped_lock lock(_rep_factory_lock); +#endif + foreach(shared_ptr& fac, _factories) + { + fac->read_group_properties()->mass_dispersion_model(disperser); + } + } + + shared_ptr mass_dispersion_model() const + { + return _factories.front()->read_group_properties()->mass_dispersion_model(); + } + +private: + vector > _factories; +#if ENABLE_THREADS + boost::mutex _rep_factory_lock; +#endif + string _condition_name; +}; diff --git a/src/sampling.cpp b/src/sampling.cpp new file mode 100644 index 0000000..490cd49 --- /dev/null +++ b/src/sampling.cpp @@ -0,0 +1,63 @@ +// +// sampling.cpp +// cufflinks +// +// Created by Cole Trapnell on 12/19/11. +// Copyright 2011 __MyCompanyName__. All rights reserved. +// + +#include "sampling.h" +#include + +using namespace std; + +void generate_importance_samples(multinormal_generator& generator, + std::vector >& samples, + int num_samples, + bool no_zeros) +{ + for (int i = 0; i < num_samples; ++i) + { + boost::numeric::ublas::vector r = generator.next_rand(); + + boost::numeric::ublas::vector scaled_sample = r; + + for (size_t j = 0; j < scaled_sample.size(); ++j) { + // if (scaled_sample(j) < 0) + // scaled_sample(j) = 1e-10; + if (scaled_sample(j) < 0) + scaled_sample(j) = -scaled_sample(j); + } + + double m = sum(scaled_sample); + if (m && !isnan(m)) + { + for (size_t j = 0; j < scaled_sample.size(); ++j) + { + scaled_sample(j) = scaled_sample(j) / m; + } + if (no_zeros) + { + bool has_zero = false; + for (size_t j = 0; j < scaled_sample.size(); ++j) + { + if (scaled_sample[j] == 0) + { + has_zero = true; + break; + } + } + + if (has_zero) + continue; + } + samples.push_back(scaled_sample); + } + else + { + samples.push_back(boost::numeric::ublas::zero_vector(scaled_sample.size())); + //cerr << r << endl; + //cerr << scaled_sample << endl; + } + } +} diff --git a/src/sampling.h b/src/sampling.h new file mode 100644 index 0000000..8616877 --- /dev/null +++ b/src/sampling.h @@ -0,0 +1,293 @@ +#ifndef SAMPLING_H +#define SAMPLING_H +// +// sampling.h +// cufflinks +// +// Created by Cole Trapnell on 12/19/11. +// Copyright 2011 Cole Trapnell. All rights reserved. +// + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "common.h" + +#include +#include +#include +#include + +#include +#include +#include + +//#define BOOST_UBLAS_TYPE_CHECK 0 +#include + +#include + +#include +#include +#include +#include +#include + +// Boost Cholesky factorizations in the spirit of lu.hpp +// Written by Robbie Vogt, found at: +// http://lists.boost.org/MailArchives/ublas/2005/07/0568.php + +namespace boost { namespace numeric { namespace ublas { + + // Cholesky factorization + template + double cholesky_factorize (M &m) + { + typedef M matrix_type; + typedef typename M::size_type size_type; + typedef typename M::value_type value_type; + + BOOST_UBLAS_CHECK (m.size1() == m.size2(), external_logic("Cholesky decomposition is only valid for a square, positive definite matrix.")); + + size_type size = m.size1(); + vector d(size); + //bool positive_definite = true; + for (size_type i = 0; i < size; ++ i) { + matrix_row mri (row (m, i)); + for (size_type j = i; j < size; ++ j) { + matrix_row mrj (row (m, j)); + + value_type elem = m(i,j) - inner_prod(project(mri,range(0,i)), project(mrj,range(0,i))); + + if (i == j) { + if (elem <= 0.0) { + // matrix after rounding errors is not positive definite + return elem; + } + else { + d(i) = sqrtl(elem); + } + } + else { + m(j,i) = elem / d(i); + } + } + } + + // put the diagonal back in + for (size_type i = 0; i < size; ++ i) { + m(i,i) = d(i); + } + + //cerr << m << endl; + for (size_type i = 0; i < size; ++i) { + for (size_type j = 0; j < i; ++j) + { + m(j,i) = 0; + } + } + //cerr << m << endl; + // decomposition succeeded + return 0.0; + } + + + // Cholesky substitution + template + void cholesky_substitute (const M &m, vector_expression &e) { + typedef const M const_matrix_type; + typedef vector vector_type; + inplace_solve (m, e, lower_tag ()); + inplace_solve (trans(m), e, upper_tag ()); + } + template + void cholesky_substitute (const M &m, matrix_expression &e) { + typedef const M const_matrix_type; + typedef matrix matrix_type; + inplace_solve (m, e, lower_tag ()); + inplace_solve (trans(m), e, upper_tag ()); + } + template + void cholesky_substitute_left (vector_expression &e, const M &m) { + typedef const M const_matrix_type; + typedef vector vector_type; + inplace_solve (trans(m), e, upper_tag ()); + inplace_solve (m, e, lower_tag ()); + } + template + void cholesky_substitute_left (matrix_expression &e, const M &m) { + typedef const M const_matrix_type; + typedef matrix matrix_type; + inplace_solve (trans(m), e, upper_tag ()); + inplace_solve (m, e, lower_tag ()); + } + // Cholesky matrix inversion + template + void cholesky_invert (M &m) + { + typedef typename M::size_type size_type; + typedef typename M::value_type value_type; + size_type size = m.size1(); + // determine the inverse of the lower traingular matrix + for (size_type i = 0; i < size; ++ i) { + m(i,i) = 1 / m(i,i); + for (size_type j = i+1; j < size; ++ j) { + value_type elem(0); + for (size_type k = i; k < j; ++ k) { + elem -= m(j,k)*m(k,i); + } + m(j,i) = elem / m(j,j); + } + } + // multiply the upper and lower inverses together + m = prod(trans(triangular_adaptor(m)), triangular_adaptor(m)); + } + + +}}} + +/* Matrix inversion routine. + Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ +template +bool lu_invert_matrix (const boost::numeric::ublas::matrix& input, boost::numeric::ublas::matrix& inverse) { + using namespace boost::numeric::ublas; + typedef permutation_matrix pmatrix; + // create a working copy of the input + matrix A(input); + // create a permutation matrix for the LU-factorization + pmatrix pm(A.size1()); + + // perform LU-factorization + int res = lu_factorize(A,pm); + if( res != 0 ) return false; + + // create identity matrix of "inverse" + inverse.assign(boost::numeric::ublas::identity_matrix(A.size1())); + + // backsubstitute to get the inverse + lu_substitute(A, pm, inverse); + + return true; +} + +///* Matrix inversion routine. +// Expects input to be PRE-FACTORIZED */ +template +bool chol_invert_matrix (const boost::numeric::ublas::matrix& input, boost::numeric::ublas::matrix& inverse) { + + using namespace boost::numeric::ublas; + inverse = input; + + cholesky_invert(inverse); + + return true; +} + + +// Adapted for Boost from Numerical Recipes +template +class multinormal_generator +{ + typedef boost::mt19937 base_generator_type; + typedef boost::normal_distribution<> distribution_type; + +public: + // expects the mean vector and the *CHOLESKY* factorization of the covariance + multinormal_generator(const boost::numeric::ublas::vector& mean, + const boost::numeric::ublas::matrix& chol_cov) + : + _engine(random_seed), + _distribution(), + _generator(boost::variate_generator(_engine, + _distribution)) + { + _rand = boost::numeric::ublas::zero_vector(mean.size()); + _mean = mean; + _cholesky = chol_cov; + } + + const boost::numeric::ublas::vector& next_rand() + { + boost::numeric::ublas::vector temp(_mean.size()); + for (size_t i = 0; i < _mean.size(); ++i) + { + double r = _generator(); + temp(i) = r; + _rand(i) = 0.0; + } + + //cerr << "rand ="<,ublas::lower>(_cholesky), temp); + for (size_t i = 0; i < _cholesky.size1(); ++i) + { + for (size_t j = 0; j <= i; ++j) + { + _rand(i) += _cholesky(i,j) * temp(j); + } + } + //cerr <<_rand << " + " << _mean << "="; + _rand = _rand + _mean; + //cerr <<_rand <& mean, + const boost::numeric::ublas::matrix& chol_cov) + { + _rand = boost::numeric::ublas::zero_vector(mean.size()); + _mean = mean; + _cholesky = chol_cov; + } + +private: + boost::numeric::ublas::vector _rand; + boost::numeric::ublas::vector _mean; + boost::numeric::ublas::matrix _cholesky; + + base_generator_type _engine; + distribution_type _distribution; + boost::variate_generator _generator; +}; + +// expects a cholesky factorized covariance matrix +template +double determinant(boost::numeric::ublas::matrix_expression const& mat_r) +{ + double det = 1.0; + + matrix_T chol(mat_r()); + + for (size_t i = 0; i < chol.size1(); ++i) + { + det *= chol(i,i); + } + + return det * det; +} + + +// Given log(p) and log(q) returns log(p+q)double +template +float_type log_space_add(float_type log_p, float_type log_q) +{ + if (log_p < log_q) + { + float_type tmp = log_p; + log_p = log_q; + log_q = tmp; + } + + assert (log_p >= log_q); + return log (1.0 + exp(log_q - log_p)) + log_p; +} + +void generate_importance_samples(multinormal_generator& generator, + std::vector >& samples, + int num_samples, + bool no_zeros = true); + +#endif diff --git a/src/scaffold_graph.cpp b/src/scaffold_graph.cpp new file mode 100644 index 0000000..4cb917f --- /dev/null +++ b/src/scaffold_graph.cpp @@ -0,0 +1,304 @@ +/* + * scaffold_graph.cpp + * cufflinks + * + * Created by Cole Trapnell on 6/2/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include +#include "scaffold_graph.h" +#include "scaffolds.h" + +#include +#include + +#ifndef NDEBUG +#include "transitive_reduction.h" +#endif + +using namespace std; +using namespace boost; + +struct HitBufBasket +{ + HitBufBasket(int coord, Scaffold* h, DAGNode d) + : expiration_coord(coord), hit(h), node(d) {} + int expiration_coord; + Scaffold* hit; + DAGNode node; +}; + + + +bool right_lt (const HitBufBasket& lhs, + const HitBufBasket& rhs) +{ + return lhs.expiration_coord < rhs.expiration_coord; +} + +struct Expired +{ + Expired(int ec) : expiration_coord(ec) {} + bool operator()(const HitBufBasket& lhs) + { + return lhs.expiration_coord <= expiration_coord; + } + + int expiration_coord; +}; + +enum ConnectState { UNKNOWN, CONNECT, DONT_CONNECT }; + +template +struct connect_visitor +: public base_visitor > +{ + typedef Tag event_filter; + connect_visitor(CompatibilityMap compatibility, ConnectMap connect, DAGNode t) + : _connect(connect),_compatibility(compatibility), _target(t) { } + + template + void operator()(Vertex u, const Graph& g) + { + typedef graph_traits GraphTraits; + + typename GraphTraits::adjacency_iterator v, vend; + + if (_compatibility[u] == true) + { + for (tie(v,vend) = adjacent_vertices(u, g); v != vend; ++v) + { + if (_compatibility[*v]) + { + //fprintf(stderr, "Avoiding a redundant edge from %d to %d\n", u, *v); + _connect[u] = DONT_CONNECT; + return; + } + } + + // If we get here, u is compatible with the target, but has no + // compatible successors, so it's safe to add the edge after the DFS + _connect[u] = CONNECT; + } + else + { + _connect[u] = DONT_CONNECT; + } + //put(_compat, v, compatible); + } + + ConnectMap _connect; + CompatibilityMap _compatibility; + DAGNode _target; +}; + +template +connect_visitor +record_connections(CompatibilityMap compatibility, + ConnectMap connect, + DAGNode target, + Tag) +{ + return connect_visitor (compatibility, connect, target); +} + + +bool create_overlap_dag(vector& hits, + DAG& bundle_dag) +{ + bundle_dag = DAG(); + vector::iterator hi = hits.begin(); + bool found_compatible_scaffolds = false; + + typedef list HitBuf; + HitBuf hit_buf; + + HitsForNodeMap hits_for_node = get(vertex_name, bundle_dag); + + while (hi != hits.end()) + { + int new_left = hi->left(); + int new_right = hi->right(); + + //fprintf(stderr, "Adding to hit buffer: [%d, %d)\n", new_left, new_right); + + HitBufBasket new_basket(new_right, &(*hi), add_vertex(bundle_dag)); + hits_for_node[new_basket.node] = new_basket.hit; + + HitBuf::iterator new_end = remove_if(hit_buf.begin(), + hit_buf.end(), + Expired(new_left)); + + hit_buf.erase(new_end, hit_buf.end()); + + // Now check the each hit in the buffer for compatibility with this + // new one + + vector containing_hits; + + boost::vector_property_map c(num_vertices(bundle_dag)); + boost::vector_property_map connected(num_vertices(bundle_dag)); + + for (HitBuf::iterator bi = hit_buf.begin(); + bi != hit_buf.end(); + ++bi) + + { + const Scaffold& lhs = *(bi->hit); + const Scaffold& rhs = *(new_basket.hit); + + assert (lhs.left() <= rhs.left()); + if (!lhs.contains(rhs)) + { + //fprintf(stderr, "Checking [%d, %d) and [%d, %d)\n", lhs.left(), lhs.right(), rhs.left(), rhs.right()); + if (Scaffold::compatible(lhs, rhs)) + { + c[bi->node] = true; + } + } + } + + for (HitBuf::iterator bi = hit_buf.begin(); + bi != hit_buf.end(); + ++bi) + + { + if (connected[bi->node] == UNKNOWN) + { + depth_first_search(bundle_dag, + root_vertex(bi->node). + visitor(make_dfs_visitor(make_pair(record_connections(c, connected, new_basket.node, on_finish_vertex()), null_visitor())))); + } + } + + for (HitBuf::iterator bi = hit_buf.begin(); + bi != hit_buf.end(); + ++bi) + { + if (connected[bi->node] == CONNECT) + { + add_edge(bi->node, new_basket.node, bundle_dag); + found_compatible_scaffolds = true; + } + } + + hit_buf.push_back(new_basket); + + ++hi; + } + + vector has_parent(num_vertices(bundle_dag), false); + vector has_child (num_vertices(bundle_dag), false); + + graph_traits < DAG >::vertex_iterator u, uend; + for (tie(u, uend) = vertices(bundle_dag); u != uend; ++u) + { + graph_traits < DAG >::adjacency_iterator v, vend; + for (tie(v,vend) = adjacent_vertices(*u, bundle_dag); v != vend; ++v) + { + DAGNode U = *u; + DAGNode V = *v; + has_parent[V] = true; + has_child[U] = true; + } + } + +#ifdef DEBUG + set introns; +#endif + for (size_t i = 0; i < num_vertices(bundle_dag); ++i) + { + if (has_child[i]) + continue; + const Scaffold* hit_i = hits_for_node[i]; + + for (size_t j = 0; j < num_vertices(bundle_dag); ++j) + { + if (has_parent[j]) + continue; + const Scaffold* hit_j = hits_for_node[j]; + if (hit_i->right() < hit_j->left() && + hit_j->left() - hit_i->right() < olap_radius) + { + add_edge(i, j, bundle_dag); + } + } + } + +#ifndef NDEBUG + DAG tr; + boost::vector_property_map G_to_TR; + property_map::type w = get(vertex_index, bundle_dag); + transitive_reduction(bundle_dag, + tr, + G_to_TR, + w); + verbose_msg("dag has %lu edges, tr has %lu edges\n", num_edges(bundle_dag), num_edges(tr)); + + //assert (num_edges(bundle_dag) == num_edges(tr)); +#endif + + return found_compatible_scaffolds; +} + +pair add_terminal_nodes(DAG& bundle_dag) +{ + vector has_parent(num_vertices(bundle_dag) + 2, false); + vector has_child (num_vertices(bundle_dag) + 2, false); + + graph_traits < DAG >::vertex_iterator u, uend; + for (tie(u, uend) = vertices(bundle_dag); u != uend; ++u) + { + graph_traits < DAG >::adjacency_iterator v, vend; + for (tie(v,vend) = adjacent_vertices(*u, bundle_dag); v != vend; ++v) + { + DAGNode U = *u; + DAGNode V = *v; + has_parent[V] = true; + has_child[U] = true; + } + } + + DAGNode source = add_vertex(bundle_dag); + DAGNode sink = add_vertex(bundle_dag); + + int num_attached_to_source = 0; + int num_attached_to_sink = 0; + + for (size_t i = 0; i < num_vertices(bundle_dag); ++i) + { + if (!has_parent[i] && i != sink && i != source) + { + num_attached_to_source++; + add_edge(source, i, bundle_dag); + } + if (!has_child[i] && i != source && i != sink) + { + num_attached_to_sink++; + add_edge(i, sink, bundle_dag); + } + } + +#if verbose_msg + HitsForNodeMap hits_for_node = get(vertex_name, bundle_dag); + DAG::vertex_iterator ki, ke; + for (tie(ki, ke) = vertices(bundle_dag); ki != ke; ++ki) + { + if (edge(source, *ki, bundle_dag).second) + { + const Scaffold* pS = hits_for_node[*ki]; + fprintf(stderr, "%d-%d has edge from source\n", pS->left(), pS->right()); + } + + if (edge(*ki, sink, bundle_dag).second) + { + const Scaffold* pS = hits_for_node[*ki]; + fprintf(stderr, "%d-%d has edge to sink\n", pS->left(), pS->right()); + } + } + verbose_msg("%d source nodes, %d sink nodes\n", num_attached_to_source, num_attached_to_sink); +#endif + return make_pair(source, sink); +} diff --git a/src/scaffold_graph.h b/src/scaffold_graph.h new file mode 100644 index 0000000..b36e892 --- /dev/null +++ b/src/scaffold_graph.h @@ -0,0 +1,39 @@ +#ifndef SCAFFOLD_GRAPH_H +#define SCAFFOLD_GRAPH_H +/* + * scaffold_graph.h + * cufflinks + * + * Created by Cole Trapnell on 6/2/10. + * Copyright 2010 Cole Trapnell. All rights reserved. + * + */ + +#include +#include +#include +#include + +#include + +#if (BOOST_VERSION < 103800) +#include +#else +#include +#endif + +class Scaffold; + +typedef boost::adjacency_list > DAG; + +typedef boost::graph_traits::vertex_descriptor DAGNode; + +typedef boost::property_map::type HitsForNodeMap; + +bool create_overlap_dag(std::vector& hits, DAG& bundle_dag); +std::pair add_terminal_nodes(DAG& bundle_dag); + +#endif diff --git a/src/scaffolds.cpp b/src/scaffolds.cpp new file mode 100644 index 0000000..096f58a --- /dev/null +++ b/src/scaffolds.cpp @@ -0,0 +1,1834 @@ +/* + * scaffolds.cpp + * cufflinks + * + * Created by Cole Trapnell on 3/30/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#include +#include +#include +#include "common.h" +#include "scaffolds.h" + +using namespace std; + +bool AugmentedCuffOp::compatible(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs, + int overhang_tolerance) +{ + if (rhs.opcode == CUFF_INTRON) + { + if (lhs.opcode == CUFF_INTRON) + { + if (lhs != rhs /*&& !(lhs.genomic_length == rhs.genomic_length && + abs(lhs.genomic_offset - rhs.genomic_offset) < 2)*/) + return false; + } + else if (lhs.opcode == CUFF_UNKNOWN) + { + //int left_diff = abs(lhs.g_left() - rhs.g_left()); + //int right_diff = abs(lhs.g_right() - rhs.g_right()); + //if (left_diff + right_diff > max_frag_len) + // return false; + } + else + { + int l_match = match_length(lhs, rhs.g_left(), rhs.g_right()); + if (l_match > overhang_tolerance) + { + return false; + } + } + } + else if (rhs.opcode == CUFF_UNKNOWN) + { + int l_match = match_length(lhs, rhs.g_left(), rhs.g_right()); + if (l_match > max_frag_len) + return false; + } + + if (lhs.opcode == CUFF_INTRON) + { + if (rhs.opcode == CUFF_INTRON) + { + if (lhs != rhs /*&& !(lhs.genomic_length == rhs.genomic_length && + abs(lhs.genomic_offset - rhs.genomic_offset) < 2)*/) + return false; + } + else if (lhs.opcode == CUFF_UNKNOWN) + { + //int left_diff = abs(lhs.g_left() - rhs.g_left()); + //int right_diff = abs(lhs.g_right() - rhs.g_right()); + //if (left_diff + right_diff > max_frag_len) + // return false; + } + else + { + int r_match = match_length(rhs, lhs.g_left(), lhs.g_right()); + if (r_match > overhang_tolerance) + { + return false; + } + } + } + else if (lhs.opcode == CUFF_UNKNOWN) + { + int r_match = match_length(rhs, lhs.g_left(), lhs.g_right()); + if (r_match > max_frag_len) + return false; + } + + return true; +} + +bool AugmentedCuffOp::g_left_lt(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs) +{ + return lhs.g_left() < rhs.g_left(); +} + +void disjoint_ops(vector& to_reduce) +{ + if (to_reduce.empty()) + return; + + vector reduced; + reduced.push_back(to_reduce.front()); + for (int i = 1; i < (int)to_reduce.size(); ++i) + { + assert (to_reduce[i].opcode == to_reduce[i - 1].opcode); + if (reduced.back().g_right() >= to_reduce[i].g_left()) + { + int delta = to_reduce[i].g_right() - reduced.back().g_right(); + if (delta > 0) + reduced.back().genomic_length += delta; + } + else + { + reduced.push_back(to_reduce[i]); + } + } + + to_reduce = reduced; +} + +// Adds open intervals not covered in the genomic coordinate covered by to_fill +// to the vector gaps. DOES NOT CLEAR gaps. +void record_gaps(const vector& to_fill, + vector >& gaps) +{ + for (size_t i = 1; i < to_fill.size(); ++i) + { + if (to_fill[i].g_left() - to_fill[i-1].g_right() > 0) + { + gaps.push_back(make_pair(to_fill[i-1].g_right(), to_fill[i].g_left())); + } + } +} + +// This function "fills" the gaps in to_fill with +// AugmentedCuffOps from filler. The intersection of the gaps in both vectors +// remains as gaps in the modified to_fill. + +// IMPORTANT: both vectors MUST be disjoint (see disjoint_matches) before calling +// this function +void AugmentedCuffOp::fill_interstices(vector& to_fill, + const vector& filler, + bool allow_flank_fill, + bool allow_flanking_introns) +{ + vector filled = to_fill; + vector > gaps; + + + size_t j = 0; + + if (to_fill.empty()) + { + to_fill = filler; + sort(to_fill.begin(), to_fill.end(), g_left_lt); + return; + } + + // This first loop could scan from either end and bail on hitting + // the first gap, but this is straightforward, and probably just as + // fast in practice, since these arrays are generally tiny + if (allow_flank_fill) + { + gaps.push_back(make_pair(0, to_fill.front().g_left())); + record_gaps(to_fill, gaps); + gaps.push_back(make_pair(to_fill.back().g_right(), INT_MAX)); + } + else + { + record_gaps(to_fill, gaps); + } + + size_t i = 0; + + while (i < gaps.size()) + { + pair& gap = gaps[i]; + + // a break in this loop will advance the gap index + while (j < filler.size()) + { + const AugmentedCuffOp& op = filler[j]; + + if (op.g_left() == gap.first && op.g_right() == gap.second) + { + // CASE 1 + // gap [ ) + // op [ ) + filled.push_back(op); + + // advance both indexes (i is advanced after the break); + ++j; + //fprintf (stderr, "CASE 1: advancing both indexes\n"); + break; + } + + else if (op.g_right() <= gap.first) + { + // CASE 2 + // gap [ ) + // op [ ) + + //fprintf (stderr, "CASE 2: skipping op %d:%d-%d due to gap %d-%d\n", op.opcode, op.g_left(), op.g_right(), gap.first, gap.second); + // now just move to the next op, we can't add this one + } + else if (op.g_left() >= gap.second) + { + // CASE 3 + // gap [ ) + // op [ ) + //fprintf (stderr, "CASE 3: advancing gap from %d-%d due to op %d:%d-%d\n", gap.first, gap.second, op.opcode, op.g_left(), op.g_right()); + break; // not safe to add yet, we've gone beyond the current gap + // advance the gap index + } + else if (op.g_left() < gap.first && op.g_right() > gap.second) + { + // CASE 4 + // gap [ ) + // op [ ) + + // create a new op to fill the gap + AugmentedCuffOp gap_op(op.opcode, + gap.first, + gap.second - gap.first); + assert (gap_op.genomic_length > 0); + + filled.push_back(gap_op); + + //fprintf (stderr, "CASE 4: advancing gap from %d-%d due to op %d:%d-%d\n", gap.first, gap.second, op.opcode, op.g_left(), op.g_right()); + // advance the gap index + break; + } + else if (op.g_left() > gap.first && op.g_right() < gap.second) + { + // CASE 5 + // gap [ ) + // op [ ) + + // just add this op + filled.push_back(op); + // advance the op index + //fprintf (stderr, "CASE 5: adding %d:%d-%d, advancing op index\n", op.opcode, op.g_left(), op.g_right()); + } + else if (op.g_right() >= gap.second && op.g_left() >= gap.first) + { + // CASE 6 + // gap [ ) + // op [ ) + + // create a new op from the left part of this one and add it + AugmentedCuffOp gap_op(op.opcode, + op.g_left(), + gap.second - op.g_left()); + assert (gap_op.genomic_length > 0); + filled.push_back(gap_op); + + break; + } + else if (op.g_left() <= gap.first && op.g_right() >= gap.first) + { + // CASE 7 + // gap [ ) + // op [ ) + + // create a new op from the right part of this one and add it + AugmentedCuffOp gap_op(op.opcode, + gap.first, + op.g_right() - gap.first); + assert (gap_op.genomic_length > 0); + + filled.push_back(gap_op); + + // advance the op index + //fprintf (stderr, "CASE 7: advancing op\n"); + } + else + { + assert(false); + } + + ++j; + } + + ++i; + } + + sort(filled.begin(), filled.end(), g_left_lt); + if (!allow_flanking_introns) + { + for (size_t i = 0; i < filled.size(); ++i) + { + if (filled[i].opcode == CUFF_INTRON) + { + assert (i > 0); + assert (i < filled.size() -1); + assert (filled[i-1].opcode == CUFF_MATCH); + assert (filled[i+1].opcode == CUFF_MATCH); + assert (filled[i-1].g_right() == filled[i].g_left()); + assert (filled[i+1].g_left() == filled[i].g_right()); + } + } + } + to_fill = filled; +} + + + +// ops is assumed to be sorted +void AugmentedCuffOp::merge_ops(const vector& ops, + vector& merged, + bool introns_overwrite_matches, + bool allow_flank_introns) +{ +#if DEBUG + //assert(std::adjacent_find(ops.begin(), ops.end(), g_left_lt) == ops.end()); +#endif + + if (ops.size() < 2) + { + merged = ops; + return; + } + + size_t g_max = 0; + size_t g_min = 0xFFFFFFFF; + + vector matches; + vector introns; + + vector unknowns; + + for (size_t i = 0; i < ops.size(); ++i) + { + //if (ops[i].opcode == CUFF_INTRON) + // fprintf (stderr, "[%d] %d, %d (%d)\n", i, ops[i].g_left(),ops[i].g_right(), ops[i].g_right() - ops[i].g_left() ); + assert (ops[i].g_left() < ops[i].g_right()); + + if ((size_t)ops[i].g_left() < g_min) + g_min = ops[i].g_left(); + if ((size_t)ops[i].g_right() > g_max) + g_max = ops[i].g_right(); + + switch(ops[i].opcode) + { + case CUFF_MATCH: + { + matches.push_back(ops[i]); + }break; + + case CUFF_INTRON: + { + introns.push_back(ops[i]); + }break; + + case CUFF_UNKNOWN: + { + + }break; + + default: + fprintf(stderr, "Unknown opcode, exiting\n"); + exit(1); + break; + } + } + + int merged_length = (int)g_max - (int)g_min + 1; + if (merged_length < 0) + { + fprintf(stderr, "Error: nonsense gene merge - negative length product\n"); + exit(1); + } + if (gaurd_assembly() && merged_length > (int)max_gene_length) + { + fprintf(stderr, "Error: nonsense gene merge - product > max_gene_length\n"); + exit(1); + } + + unknowns.push_back(AugmentedCuffOp(CUFF_UNKNOWN, g_min, g_max - g_min + 1)); + + vector pre_merge = matches; + + disjoint_ops(matches); + disjoint_ops(introns); + + // below, the flank fill flag is set to preserve the invariant that + // all scaffolds must start with CUFF_MATCH + if (introns_overwrite_matches) + { + merged = introns; + fill_interstices(merged, matches, true, allow_flank_introns); + vector > gaps; + record_gaps(merged, gaps); + if (!gaps.empty()) + fill_interstices(merged, unknowns, false, allow_flank_introns); + } + else + { + merged = matches; + fill_interstices(merged, introns, false, allow_flank_introns); + vector > gaps; + record_gaps(merged, gaps); + if (!gaps.empty()) + fill_interstices(merged, unknowns, false, allow_flank_introns); + } + + if (!allow_flank_introns) + { + assert (merged.front().opcode == CUFF_MATCH); + assert (merged.back().opcode == CUFF_MATCH); + } + for (size_t i = 1; i < merged.size(); ++i) + { + assert(merged[i-1].g_right() == merged[i].g_left()); + if (merged[i].opcode == CUFF_INTRON) + { + assert (merged[i-1].opcode == CUFF_MATCH); + } + else if (merged[i].opcode == CUFF_UNKNOWN) + { + assert (merged[i-1].opcode != CUFF_INTRON); + } + } +} + + +bool is_known(const AugmentedCuffOp& op) +{ + return op.opcode != CUFF_UNKNOWN; +} + +// verifies that no matter how the merge goes, the result wont be an insanely +// long gene. +bool check_merge_length(const vector& ops) +{ + size_t g_max = 0; + size_t g_min = 0xFFFFFFFF; + + for (size_t i = 0; i < ops.size(); ++i) + { + //if (ops[i].opcode == CUFF_INTRON) + // fprintf (stderr, "[%d] %d, %d (%d)\n", i, ops[i].g_left(),ops[i].g_right(), ops[i].g_right() - ops[i].g_left() ); + assert (ops[i].g_left() < ops[i].g_right()); + + if ((size_t)ops[i].g_left() < g_min) + g_min = ops[i].g_left(); + if ((size_t)ops[i].g_right() > g_max) + g_max = ops[i].g_right(); + } + int merged_length = (int)g_max - (int)g_min + 1; + if (merged_length < 0 || merged_length > (int)max_gene_length) + { + return false; + } + return true; + } + +inline bool has_intron(const Scaffold& scaff) +{ + + const vector& ops = scaff.augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + if (ops[j].opcode == CUFF_INTRON) + return true; + } + + return false; +} + +//inline bool has_intron(const Scaffold& scaff) +//{ +// +// const vector& ops = scaff.augmented_ops(); +// for (size_t j = 0; j < ops.size(); ++j) +// { +// if (ops[j].opcode == CUFF_INTRON) +// return true; +// } +// +// return false; +//} + +void Scaffold::extend_5(const Scaffold& other) +{ + assert(Scaffold::compatible(*this, other, ref_merge_overhang_tolerance)); + + if (strand() == CUFF_FWD) + { + AugmentedCuffOp& my_first_op = _augmented_ops.front(); + const AugmentedCuffOp& other_first_op = other.augmented_ops().front(); + + my_first_op.g_left(other_first_op.g_left()); + } + else if (strand() == CUFF_REV) + { + AugmentedCuffOp& my_last_op = _augmented_ops.back(); + const AugmentedCuffOp& other_last_op = other.augmented_ops().back(); + + my_last_op.g_right(other_last_op.g_right()); + } + else + { + assert(false); + } + + int id_length = annotated_trans_id().length(); + if (id_length < 4 || _annotated_trans_id.substr(id_length-4)!="_ext") + _annotated_trans_id += "_ext"; +} + +// Clip final 3' exon by given amount +void Scaffold::trim_3(int to_remove) +{ + if(strand() == CUFF_FWD) + { + AugmentedCuffOp& exon_3 = _augmented_ops.back(); + assert (exon_3.genomic_length > to_remove); + exon_3.genomic_length -= to_remove; + } + else if(strand() == CUFF_REV) + { + AugmentedCuffOp& exon_3 = _augmented_ops.front(); + assert (exon_3.genomic_length > to_remove); + exon_3.genomic_offset += to_remove; + exon_3.genomic_length -= to_remove; + + } + else + { + assert(false); + } +} + +// Clip final 3' exon by given amount +void Scaffold::extend_3(int to_add) +{ + if(strand() == CUFF_FWD) + { + AugmentedCuffOp& exon_3 = _augmented_ops.back(); + //assert (exon_3.genomic_length > to_remove); + exon_3.genomic_length += to_add; + } + else if(strand() == CUFF_REV) + { + AugmentedCuffOp& exon_3 = _augmented_ops.front(); + //assert (exon_3.genomic_length > to_remove); + exon_3.genomic_offset -= to_add; + exon_3.genomic_length += to_add; + + } + else + { + assert(false); + } +} + +void Scaffold::tile_with_scaffs(vector& tile_scaffs, int max_len, int tile_offset) const +{ + int min_len = tile_offset; + assert(min_len % tile_offset == 0 && max_len % tile_offset == 0); + + if (length() < max_len) + return; + + size_t l = 0; + size_t r = 0; + int l_off = 0; + int r_off = min_len; // One past end + int curr_len = min_len; + int remaining_len = length(); + + const vector& orig_ops = augmented_ops(); + + while(true) + { + while (l < orig_ops.size() && (orig_ops[l].opcode != CUFF_MATCH || l_off >= orig_ops[l].genomic_length)) + { + if (orig_ops[l].opcode == CUFF_MATCH) + l_off -= orig_ops[l].genomic_length; + ++l; + //if(++l == augmented_ops().size()) + // assert(false); + } + while (r < orig_ops.size() && (orig_ops[r].opcode != CUFF_MATCH || r_off > orig_ops[r].genomic_length)) // Strictly > because r_off is one past + { + if (orig_ops[r].opcode == CUFF_MATCH) + r_off -= orig_ops[r].genomic_length; + ++r; + //if(++r == augmented_ops().size()) + // assert(false); + } + + vector ops; + + //if (l >= orig_ops.size() && r >= orig_ops.size()) + // break; + + if (l==r) + { + assert (l < orig_ops.size()); + ops.push_back(AugmentedCuffOp(CUFF_MATCH, orig_ops[l].g_left() + l_off, r_off - l_off)); + } + else + { + assert(orig_ops.size()); + //assert(orig_ops[l].genomic_offset != 0); + + ops.push_back(AugmentedCuffOp(CUFF_MATCH, orig_ops[l].g_left() + l_off, orig_ops[l].genomic_length - l_off)); + assert(ops.back().g_right() > ops.back().g_left()); + for(size_t i = l+1; i < r; i++) + { + ops.push_back(orig_ops[i]); + } + if (r_off > 0 && r < orig_ops.size()) + { + assert (r < orig_ops.size()); + //assert (orig_ops[r].genomic_offset != 0); + ops.push_back(AugmentedCuffOp(CUFF_MATCH, orig_ops[r].g_left(), r_off)); + } + } + assert(ops.back().g_right() > ops.back().g_left()); + + // genomic_offset actually could be zero - from an exon starting at coord + // 1 in some chromosome of the ref. +// foreach(const AugmentedCuffOp& op, ops) +// { +// assert (op.genomic_offset != 0); +// } + + tile_scaffs.push_back(Scaffold(this->ref_id(), this->strand(), ops, true)); + assert(tile_scaffs.back().length() == curr_len ); + assert(tile_scaffs.back().left() >= left() && + tile_scaffs.back().right() <= right()); + + if (l==0 && l_off == 0 && curr_len < max_len) // On left end, not yet full length + { + curr_len += tile_offset; + r_off += tile_offset; + } + else if(remaining_len - tile_offset < max_len) // On the right end of transcript, decreasing in length + { + curr_len += augmented_ops().back().genomic_length - r_off - tile_offset; + r_off = augmented_ops().back().genomic_length; + l_off += tile_offset; + remaining_len -= tile_offset; + if (curr_len < min_len) + return; + } + else // In the middle of the transcript, full length + { + l_off += tile_offset; + r_off += tile_offset; + remaining_len -= tile_offset; + } + + } + +} + +bool Scaffold::sub_scaffold(Scaffold& sub_scaff, int g_left, int match_length) const +{ + size_t i; + for(i = 0; i < augmented_ops().size(); ++i) + { + if (augmented_ops()[i].g_right() > g_left) + break; + } + const AugmentedCuffOp& l_op = augmented_ops()[i++]; + + assert(l_op.opcode == CUFF_MATCH); + + vector sub_ops; + sub_ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, min(match_length, l_op.g_right()-g_left))); + int len_so_far = sub_ops.back().genomic_length; + + while(len_so_far < match_length && i < augmented_ops().size()) + { + const AugmentedCuffOp& op = augmented_ops()[i++]; + if(op.opcode==CUFF_MATCH) + { + sub_ops.push_back(AugmentedCuffOp(CUFF_MATCH, op.g_left(), min(match_length-len_so_far, op.genomic_length))); + len_so_far += sub_ops.back().genomic_length; + if (len_so_far >= match_length) + break; + } + else + { + sub_ops.push_back(op); + } + } + + //assert(len_so_far == match_length); + sub_scaff = Scaffold(this->ref_id(), this->strand(), sub_ops, true); + return (len_so_far == match_length); +} + +void Scaffold::merge(const Scaffold& lhs, + const Scaffold& rhs, + Scaffold& merged, + bool introns_overwrite_matches) +{ + OpList ops; + + assert (merged.ref_id() == 0); + assert (lhs.ref_id() == rhs.ref_id()); + + ops.insert(ops.end(), lhs._augmented_ops.begin(), lhs._augmented_ops.end()); + ops.insert(ops.end(), rhs._augmented_ops.begin(), rhs._augmented_ops.end()); + + sort(ops.begin(), ops.end(), AugmentedCuffOp::g_left_lt); + + AugmentedCuffOp::merge_ops(ops, merged._augmented_ops, introns_overwrite_matches); + + assert (ops.empty() || !(merged._augmented_ops.empty())); + + merged._ref_id = lhs.ref_id(); + merged._mates_in_scaff.insert(merged._mates_in_scaff.end(), + lhs._mates_in_scaff.begin(), + lhs._mates_in_scaff.end()); + merged._mates_in_scaff.insert(merged._mates_in_scaff.end(), + rhs._mates_in_scaff.begin(), + rhs._mates_in_scaff.end()); + + sort(merged._mates_in_scaff.begin(), + merged._mates_in_scaff.end()); + vector::iterator new_end = unique(merged._mates_in_scaff.begin(), + merged._mates_in_scaff.end()); + merged._mates_in_scaff.erase(new_end, merged._mates_in_scaff.end()); + + + int r_check = merged.left(); + for (size_t i = 0; i < merged._augmented_ops.size(); ++i) + r_check += merged._augmented_ops[i].genomic_length; + + assert(r_check == merged.right()); + + if (lhs.strand() != CUFF_STRAND_UNKNOWN) + merged._strand = lhs.strand(); + if (rhs.strand() != CUFF_STRAND_UNKNOWN) + merged._strand = rhs.strand(); + + merged._has_intron = has_intron(merged); + assert(merged._strand != CUFF_STRAND_UNKNOWN || !merged.has_intron() ); + assert(!merged.is_ref()); + + if (library_type == "transfrags") + { + double avg_fpkm = lhs.fpkm() + rhs.fpkm(); + avg_fpkm /= 2; + merged.fpkm(avg_fpkm); + } +} + + + +void Scaffold::merge(const vector& s, + Scaffold& merged, + bool introns_overwrite_matches) +{ + OpList ops; + + CuffStrand strand = CUFF_STRAND_UNKNOWN; + + for (size_t i = 0; i < s.size(); ++i) + { + ops.insert(ops.end(), s[i]._augmented_ops.begin(), s[i]._augmented_ops.end()); + merged._mates_in_scaff.insert(merged._mates_in_scaff.end(), + s[i]._mates_in_scaff.begin(), + s[i]._mates_in_scaff.end()); + + if (s[i].strand() != CUFF_STRAND_UNKNOWN) + { + //assert (strand == CUFF_STRAND_UNKNOWN || strand == s[i].strand()); + strand = s[i].strand(); + } + } + + sort(merged._mates_in_scaff.begin(),merged._mates_in_scaff.end()); + vector::iterator new_end = unique(merged._mates_in_scaff.begin(), + merged._mates_in_scaff.end()); + merged._mates_in_scaff.erase(new_end, merged._mates_in_scaff.end()); + + sort(ops.begin(), ops.end(), AugmentedCuffOp::g_left_lt); + //merged._contigs.push_back(CuffAlign(ops.front().g_left(), vector())); + + if (ops.empty()) + return; + + AugmentedCuffOp::merge_ops(ops, merged._augmented_ops, introns_overwrite_matches); + + + //assert (ops.empty() || !(merged._augmented_ops.empty())); +#ifdef DEBUG + if (merged._augmented_ops.empty() || + merged._augmented_ops.front().opcode != CUFF_MATCH || + merged._augmented_ops.back().opcode != CUFF_MATCH) + { + AugmentedCuffOp::merge_ops(ops, merged._augmented_ops, introns_overwrite_matches); + } +#endif + + merged._ref_id = s.front().ref_id(); + merged._strand = strand; + + int r_check = merged.left(); + for (size_t i = 0; i < merged._augmented_ops.size(); ++i) + r_check += merged._augmented_ops[i].genomic_length; + +#ifdef DEBUG + if (r_check != merged.right()) + { + AugmentedCuffOp::merge_ops(ops, merged._augmented_ops, introns_overwrite_matches); + } +#endif + + assert (r_check == merged.right()); + merged._has_intron = has_intron(merged); + + assert(merged._strand != CUFF_STRAND_UNKNOWN || !merged.has_strand_support()); + + if (library_type == "transfrags") + { + double avg_fpkm = 0.0; + foreach (const Scaffold& sc, s) + { + avg_fpkm += sc.fpkm(); + } + avg_fpkm /= s.size(); + merged.fpkm(avg_fpkm); + } +} + +void Scaffold::fill_gaps(int filled_gap_size) +{ + OpList ops; + + const vector& orig_ops = augmented_ops(); + for (size_t i = 0; i < orig_ops.size(); ++i) + { + if (orig_ops[i].opcode == CUFF_UNKNOWN && + orig_ops[i].genomic_length < filled_gap_size) + { + ops.push_back(AugmentedCuffOp(CUFF_MATCH, + orig_ops[i].genomic_offset, + orig_ops[i].genomic_length)); + } + else + { + ops.push_back(orig_ops[i]); + } + } + + sort(ops.begin(), ops.end(), AugmentedCuffOp::g_left_lt); + + AugmentedCuffOp::merge_ops(ops, _augmented_ops, false); + _has_intron = has_intron(*this); + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); + +} + +void Scaffold::fill_gaps(const vector& filler) +{ + OpList ops; + + const vector& orig_ops = augmented_ops(); + + const vector og_ops = augmented_ops(); + + vector unknowns; + + size_t g_max = 0; + size_t g_min = 0xFFFFFFFF; + + vector tmp_filler = filler; + + foreach(const AugmentedCuffOp& op, orig_ops) + { + assert (op.g_left() < op.g_right()); + + if ((size_t)op.g_left() < g_min) + g_min = op.g_left(); + if ((size_t)op.g_right() > g_max) + g_max = op.g_right(); + } + + tmp_filler.push_back(AugmentedCuffOp(CUFF_UNKNOWN, g_min, g_max - g_min + 1)); + sort(tmp_filler.begin(), tmp_filler.end(), AugmentedCuffOp::g_left_lt); + + vector padded_filler; + AugmentedCuffOp::merge_ops(tmp_filler, padded_filler, false); + + vector overlapping; + foreach (const AugmentedCuffOp& op, padded_filler) + { + //if (left() <= op.g_left() && right() >= op.g_right() + if(::overlap_in_genome(op.g_left(),op.g_right(), left(), right()) + && (op.opcode != CUFF_UNKNOWN || !overlapping.empty())) + { + overlapping.push_back(op); + } + } + + overlapping.insert(overlapping.end(), _augmented_ops.begin(),_augmented_ops.end()); + sort(overlapping.begin(), overlapping.end(), AugmentedCuffOp::g_left_lt); + + // Trim first in last in case they hang over the scaffold boundaries + if (overlapping.front().g_left() < left()) + { + AugmentedCuffOp& first_op = overlapping.front(); + first_op.genomic_length -= (left() - first_op.g_left()); + first_op.genomic_offset = left(); + } + if (overlapping.back().g_right() > right()) + { + AugmentedCuffOp& last_op = overlapping.back(); + last_op.genomic_length -= (last_op.g_right() - right()); + } + if (overlapping.back().opcode == CUFF_INTRON && overlapping.back().genomic_length <= bowtie_overhang_tolerance) + { + overlapping.pop_back(); + } + if (overlapping.front().opcode == CUFF_INTRON && overlapping.front().genomic_length <= bowtie_overhang_tolerance) + { + overlapping.erase(overlapping.begin(), overlapping.begin()+1); + } + + // we don't want either the terminal ops in the filler to be unknowns, + // because they'll just propogate to the scaffold we're trying to fill. + if (!overlapping.empty() && overlapping.back().opcode == CUFF_UNKNOWN) + overlapping.pop_back(); + + if (overlapping.empty()) + return; + + AugmentedCuffOp::merge_ops(overlapping, _augmented_ops, true); + _has_intron = has_intron(*this); + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); +} + +bool Scaffold::overlap_in_genome(const Scaffold& lhs, + const Scaffold& rhs, + int overlap_radius) +{ + int ll = lhs.left() - overlap_radius; + int rr = rhs.right() + overlap_radius; + int lr = lhs.right() + overlap_radius; + int rl = rhs.left() - overlap_radius; + + if (ll >= rl && ll < rr) + return true; + if (lr >rl && lr < rr) + return true; + if (rl >= ll && rl < lr) + return true; + if (rr > ll && rr < lr) + return true; + + return false; +} + +bool intron_op(const AugmentedCuffOp& op) +{ + return op.opcode == CUFF_INTRON; +} + +bool Scaffold::strand_agree(const Scaffold& lhs, + const Scaffold& rhs) +{ + bool strand = (lhs.strand() == CUFF_STRAND_UNKNOWN || + rhs.strand() == CUFF_STRAND_UNKNOWN || + lhs.strand() == rhs.strand()); + return strand; +} + +bool Scaffold::exons_overlap(const Scaffold& lhs, + const Scaffold& rhs) +{ + const vector& lhs_ops = lhs.augmented_ops(); + const vector& rhs_ops = rhs.augmented_ops(); + + for (size_t l = 0; l < lhs_ops.size(); l++) + { + if (lhs_ops[l].opcode != CUFF_MATCH) + continue; + + for (size_t r = 0; r < rhs_ops.size(); r++) + { + if (rhs_ops[r].opcode == CUFF_MATCH && + AugmentedCuffOp::overlap_in_genome(lhs_ops[l], rhs_ops[r])) + { + return true; + } + } + } + return false; +} + +bool Scaffold::compatible(const Scaffold& lhs, + const Scaffold& rhs, + int overhang_tolerance) +{ + if (!strand_agree(lhs, rhs)) + return false; + + if (lhs.left() <= rhs.left()) + { + if (overlap_in_genome(lhs, rhs, olap_radius)) + { + // check compatibility + if (!compatible_contigs(lhs, rhs, overhang_tolerance)) + return false; + } + } + else if (rhs.left() < lhs.left()) + { + if (overlap_in_genome(rhs, lhs, olap_radius)) + { + // check compatibility + if (!compatible_contigs(rhs, lhs, overhang_tolerance)) + return false; + } + } + + + return true; +} + +bool Scaffold::compatible_contigs(const Scaffold& lhs, + const Scaffold& rhs, + int overhang_tolerance) +{ + const vector& l_aug = lhs._augmented_ops; + const vector& r_aug = rhs._augmented_ops; + + size_t curr_l_op = 0; + size_t curr_r_op = 0; + + while (curr_l_op != l_aug.size() && + curr_r_op != r_aug.size()) + { + const AugmentedCuffOp& l_op = l_aug[curr_l_op]; + const AugmentedCuffOp& r_op = r_aug[curr_r_op]; + + if (l_op.g_left() <= r_op.g_left()) + { + if (AugmentedCuffOp::overlap_in_genome(l_op, r_op)) + { + // check compatibility + if (!AugmentedCuffOp::compatible(l_op, r_op, overhang_tolerance)) + return false; +// if (l_op.opcode == CUFF_UNKNOWN && +// r_op.opcode == CUFF_MATCH) +// { +// //int m_len = AugmentedCuffOp::match_length(r_op, l_op.g_left(), l_op.g_right()); +// if (l_op.properly_contains(r_op)) +// return false; +// } + } + if (l_op.g_right() < r_op.g_right()) + ++curr_l_op; + else if (r_op.g_right() < l_op.g_right()) + ++curr_r_op; + else // Indentical boundaries, advance both + { + ++curr_l_op; + ++curr_r_op; + } + } + else if (r_op.g_left() < l_op.g_left()) + { + if (AugmentedCuffOp::overlap_in_genome(r_op, l_op)) + { + // check compatibility + if (!AugmentedCuffOp::compatible(r_op, l_op, overhang_tolerance)) + return false; +// if (r_op.opcode == CUFF_UNKNOWN && +// l_op.opcode == CUFF_MATCH) +// { +// //int m_len = AugmentedCuffOp::match_length(l_op, r_op.g_left(), r_op.g_right()); +// if (r_op.properly_contains(l_op)) +// return false; +// } + } + if (r_op.g_right() < l_op.g_right()) + ++curr_r_op; + else if (l_op.g_right() < r_op.g_right()) + ++curr_l_op; + else // Indentical boundaries, advance both + { + ++curr_l_op; + ++curr_r_op; + } + } + + } + + return true; + +} + +bool overlap_in_genome(int ll, int lr, int rl, int rr) +{ + if (ll >= rl && ll < rr) + return true; + if (lr > rl && lr < rr) + return true; + if (rl >= ll && rl < lr) + return true; + if (rr > ll && rr < lr) + return true; + return false; +} + +double Scaffold::score_overlap(const AugmentedCuffOp& op, + int inner_left_edge, + int inner_right_edge) +{ + assert(false); + return 0; + /* + assert (op.opcode == CUFF_MATCH); + int mlen = AugmentedCuffOp::match_length(op, + inner_left_edge, + inner_right_edge); + if (mlen > inner_dist_mean) + { + double c = cdf(inner_dist_norm, mlen) - 0.5; + // if (c == 1.0) + // return -1000.0; + double weight = log(1 - c); + assert (weight <= 0.0); + return weight; + } + else + { + return 0; + } + */ +} + +double Scaffold::min_score(const Scaffold& contig, + const vector >& inners) +{ + double score = 0.0; + + size_t curr_inner = 0; + size_t curr_op = 0; + + while (curr_op != contig._augmented_ops.size() && + curr_inner != inners.size()) + { + const pair inner = inners[curr_inner]; + const AugmentedCuffOp& op = contig._augmented_ops[curr_op]; + + if (inner.first <= op.g_left()) + { + if (op.opcode == CUFF_MATCH && + ::overlap_in_genome(inner.first, + inner.second, + op.g_left(), + op.g_right())) + { + score = min(score, score_overlap(op, inner.first, inner.second)); + } + if (inner.second < op.g_right()) + ++curr_inner; + else if (op.g_right() < inner.second) + ++curr_op; + else // Indentical boundaries, advance both + { + ++curr_inner; + ++curr_op; + } + } + else if (op.g_left() < inner.first) + { + if (op.opcode == CUFF_MATCH && + ::overlap_in_genome(op.g_left(), + op.g_right(), + inner.first, + inner.second)) + { + score = min(score, score_overlap(op, inner.first, inner.second)); + } + if (op.g_right() < inner.second) + ++curr_op; + else if (inner.second < op.g_right()) + ++curr_inner; + else // Indentical boundaries, advance both + { + ++curr_inner; + ++curr_op; + } + } + } + return score; +} + +double Scaffold::score_contigs(const Scaffold& contig, + const vector >& inners) +{ + double score = 0.0; + + size_t curr_inner = 0; + size_t curr_op = 0; + + while (curr_op != contig._augmented_ops.size() && + curr_inner != inners.size()) + { + const pair inner = inners[curr_inner]; + const AugmentedCuffOp& op = contig._augmented_ops[curr_op]; + + if (inner.first <= op.g_left()) + { + if (op.opcode == CUFF_MATCH && + ::overlap_in_genome(inner.first, + inner.second, + op.g_left(), + op.g_right())) + { + score += score_overlap(op, inner.first, inner.second); + } + if (inner.second < op.g_right()) + ++curr_inner; + else if (op.g_right() < inner.second) + ++curr_op; + else // Indentical boundaries, advance both + { + ++curr_inner; + ++curr_op; + } + } + else if (op.g_left() < inner.first) + { + if (op.opcode == CUFF_MATCH && + ::overlap_in_genome(op.g_left(), + op.g_right(), + inner.first, + inner.second)) + { + score += score_overlap(op, inner.first, inner.second); + } + if (op.g_right() < inner.second) + ++curr_op; + else if (inner.second < op.g_right()) + ++curr_inner; + else // Indentical boundaries, advance both + { + ++curr_inner; + ++curr_op; + } + } + } + return score; +} + +double Scaffold::score_merge(const Scaffold& lhs, const Scaffold& rhs) +{ + double score = 0.0; + + vector > l_inners, r_inners; + + vector lhs_but_not_rhs; + vector rhs_but_not_lhs; + + set_difference(lhs.mate_hits().begin(), + lhs.mate_hits().end(), + rhs.mate_hits().begin(), + rhs.mate_hits().end(), + back_inserter(lhs_but_not_rhs)); + + set_difference(rhs.mate_hits().begin(), + rhs.mate_hits().end(), + lhs.mate_hits().begin(), + lhs.mate_hits().end(), + back_inserter(rhs_but_not_lhs)); + + for (vector::iterator i = lhs_but_not_rhs.begin(); + i != lhs_but_not_rhs.end(); + ++i) + { + pair p = (*i)->genomic_inner_span(); + if (p.first != -1 && p.second != -1) + l_inners.push_back(p); + } + + for (vector::iterator i = rhs_but_not_lhs.begin(); + i != rhs_but_not_lhs.end(); + ++i) + { + pair p = (*i)->genomic_inner_span(); + if (p.first != -1 && p.second != -1) + r_inners.push_back(p); + } + + score += Scaffold::score_contigs(lhs, r_inners); + score += Scaffold::score_contigs(rhs, l_inners); + + return score; +} + +double Scaffold::score() const +{ + double score = 0.0; + + vector > inners; + + for (vector::const_iterator i = mate_hits().begin(); + i != mate_hits().end(); + ++i) + { + pair p = (*i)->genomic_inner_span(); + if (p.first != -1 && p.second != -1) + inners.push_back(p); + } + + double contig_score = Scaffold::score_contigs(*this, inners); + score += contig_score; + + return score; +} + +double Scaffold::worst_mate_score() const +{ + double min_score = 0.0; + + vector > inners; + + for (vector::const_iterator i = mate_hits().begin(); + i != mate_hits().end(); + ++i) + { + pair p = (*i)->genomic_inner_span(); + if (p.first != -1 && p.second != -1) + inners.push_back(p); + } + + //return overlap_in_genome(lhs._contigs[0], rhs._contigs[0]); + + + min_score = Scaffold::min_score(*this, inners); + + return min_score; +} + + +int Scaffold::genomic_to_transcript_coord(int g_coord) const +{ + int s_coord = 0; + size_t curr_op = 0; + const AugmentedCuffOp* op=NULL; + + while (curr_op != _augmented_ops.size()) + { + op = &_augmented_ops[curr_op]; + + if (op->g_right() > g_coord) + break; + if (op->opcode == CUFF_MATCH) + s_coord += op->genomic_length; + + ++curr_op; + } + + int remainder = g_coord - (*op).g_left(); + + if (remainder <= bowtie_overhang_tolerance || op->opcode == CUFF_MATCH) + s_coord += remainder; + else + s_coord -= (op->genomic_length-remainder); + + if(strand()==CUFF_REV) + s_coord = length() - 1 - s_coord; + + return s_coord; +} + +// start and end (first and second) are the first and final coordinates of the span +// Does not handle overhangs (can return negative values) +pair Scaffold::genomic_to_transcript_span(pair g_span) const +{ + int s_start; + int s_end; + + int s_coord = 0; + size_t curr_op = 0; + const AugmentedCuffOp* op = NULL; + // First, find start + while (curr_op != _augmented_ops.size()) + { + op = &_augmented_ops[curr_op]; + + if (op->g_right() > g_span.first) + break; + if (op->opcode == CUFF_MATCH) + s_coord += op->genomic_length; + + ++curr_op; + } + + int remainder = g_span.first - (*op).g_left(); + + if (remainder <= bowtie_overhang_tolerance || op->opcode == CUFF_MATCH) + s_start = s_coord + remainder; + else + s_start = s_coord - (op->genomic_length-remainder); + + + // Now, find end + while (curr_op != _augmented_ops.size()) + { + op = &_augmented_ops[curr_op]; + + if (op->g_right() > g_span.second) + break; + if (op->opcode == CUFF_MATCH) + s_coord += op->genomic_length; + ++curr_op; + } + + remainder = g_span.second - op->g_left(); + + if (remainder < bowtie_overhang_tolerance || op->opcode == CUFF_MATCH) + s_end = s_coord + remainder; + else + s_end = s_coord - (op->genomic_length-remainder); + + if(strand()==CUFF_REV) + { + int scaff_len = length(); + s_start = scaff_len - 1 - s_start; + s_end = scaff_len - 1 - s_end; + swap(s_start, s_end); + } + + return make_pair(s_start, s_end); +} + +// Returns true only when both the start and end are found (ie, frag_len is known), which +// can only happen if the read is paired. Returned values that are equal to the trans_len +// should be ignored, as they are invalid. +// We can't use EmpDist unless it is unpaired since we call this function in inspect_bundle +// End is inclusive +// Returned indices are oriented with the transript! +bool Scaffold::map_frag(const MateHit& hit, int& start, int& end, int& frag_len) const +{ + + int trans_len = length(); + +// if (Scaffold(hit).augmented_ops() == augmented_ops()) +// { +// int a = 4; +// } + + start = trans_len; + end = trans_len; + + Scaffold h(hit); + if(!(contains(h) && Scaffold::compatible(*this, h))) + return false; + + if (hit.read_group_props()->complete_fragments()) + { + pair g_span = make_pair(hit.left(), hit.right() - 1); + pair t_span = genomic_to_transcript_span(g_span); + start = t_span.first; + end = t_span.second; + frag_len = abs(end-start)+1; + } + else if (hit.is_pair()) + { + pair g_span = hit.genomic_outer_span(); + pair t_span = genomic_to_transcript_span(g_span); + start = t_span.first; + end = t_span.second; + frag_len = abs(end-start)+1; + } + else if (hit.read_group_props()->mate_strand_mapping()==FF) + { + shared_ptr frag_len_dist = hit.read_group_props()->frag_len_dist(); + frag_len = min(frag_len_dist->mode(), trans_len); + } + else + { + shared_ptr frag_len_dist = hit.read_group_props()->frag_len_dist(); + + if ((hit.left_alignment()->antisense_align() && strand() != CUFF_REV) + || (!hit.left_alignment()->antisense_align() && strand() == CUFF_REV)) + { + int g_end = (strand()!=CUFF_REV) ? hit.right()-1:hit.left(); + end = genomic_to_transcript_coord(g_end); + frag_len = min(frag_len_dist->mode(), end); + } + else + { + int g_start = (strand()!=CUFF_REV) ? hit.left():hit.right()-1; + start = genomic_to_transcript_coord(g_start); + if (start == trans_len) // Overhang + frag_len = min(frag_len_dist->mode(), trans_len); + else + frag_len = min(frag_len_dist->mode(), trans_len-start); + } + } + + if (start <= 0 || start > trans_len) + start = trans_len; + if (end <= 0 || end > trans_len) + end = trans_len; + + return (start != trans_len && end != trans_len); +} + +int Scaffold::match_length(int left, int right) const +{ + int len = 0; + + + size_t curr_op = 0; + + while (curr_op != _augmented_ops.size()) + { + const AugmentedCuffOp& op = _augmented_ops[curr_op]; + + if (op.opcode == CUFF_MATCH && + ::overlap_in_genome(left, + right, + op.g_left(), + op.g_right())) + { + len += AugmentedCuffOp::match_length(op, left, right); + } + if (op.g_left() >= right) + break; + ++curr_op; + } + + return len; +} + + +void Scaffold::clear_hits() +{ + _mates_in_scaff.clear(); + vector(_mates_in_scaff).swap(_mates_in_scaff); + //_mates_in_scaff.clear(); +} + +bool Scaffold::add_hit(const MateHit* hit) +{ + Scaffold hs(*hit); + if (contains(hs) && + Scaffold::compatible(*this, hs)) + { + if (!binary_search(_mates_in_scaff.begin(), + _mates_in_scaff.end(), + hit)) + { + _mates_in_scaff.push_back(hit); + } + return true; + } + + return false; +} + + +void Scaffold::get_complete_subscaffolds(vector& complete) +{ + if (!has_unknown()) + { + complete.push_back(*this); + } + else + { + int last_unknown = -1; + int leftmost_known_op = -1; + int rightmost_known_op = -1; + for (size_t i = 0; i < _augmented_ops.size(); ++i) + { + const AugmentedCuffOp& op = _augmented_ops[i]; + if (op.opcode != CUFF_UNKNOWN) + { + if (leftmost_known_op == -1) + { + leftmost_known_op = i; + assert (_augmented_ops[leftmost_known_op].opcode != CUFF_INTRON); + } + rightmost_known_op = i; + } + if (op.opcode == CUFF_UNKNOWN || i == _augmented_ops.size() - 1) + { + int left_known; + int right_known; + + if (i == _augmented_ops.size() - 1) + right_known = right(); + else + right_known = op.g_left() - 1; + + if (last_unknown == -1) + left_known = left(); + else + left_known = _augmented_ops[last_unknown].g_right(); + + //fprintf(stderr, "excluding unknown region between %d-%d\n", left_known, right_known); + if (leftmost_known_op != -1 && rightmost_known_op != -1) + { + + vector known_ops; + known_ops.insert(known_ops.end(), + &_augmented_ops[leftmost_known_op], + &_augmented_ops[rightmost_known_op] + 1); + Scaffold known(ref_id(), strand(), known_ops); + + for (vector::iterator mitr = _mates_in_scaff.begin(); + mitr != _mates_in_scaff.end(); + ++mitr) + { + + const MateHit& m = **mitr; + if (left_known <= m.left() && m.right() <= right_known) + { + known.add_hit(&m); + } + } + + // if (known.has_intron()) + known.strand(_strand); + + //assert (!known.mate_hits().empty()); + //assert(!known.has_intron()|| known.strand() != CUFF_STRAND_UNKNOWN); + + // const vector& hits = known.mate_hits(); + // bool contains_spliced_hit = false; + // foreach (const MateHit* h, hits) + // { + // const ReadHit* left = h->left_alignment(); + // const ReadHit* right = h->right_alignment(); + // if (left && left->contains_splice()) + // { + // contains_spliced_hit = true; + // break; + // } + // if (right && right->contains_splice()) + // { + // contains_spliced_hit = true; + // break; + // } + // } + + // if (!hits.empty() && !contains_spliced_hit && !known.is_ref()) + // { + // known.strand(CUFF_STRAND_UNKNOWN); + // } + + complete.push_back(known); + } + + last_unknown = i; + leftmost_known_op = -1; + } + } + } +} + +double Scaffold::internal_exon_coverage() const +{ + // First check if there are internal exons + if (augmented_ops().size() < 5) + return 0.0; + + int left = augmented_ops()[2].g_left(); + int right = augmented_ops()[augmented_ops().size() - 3].g_right(); + vector covered(right-left, 0); + foreach(const MateHit* h, mate_hits()) + { + if (::overlap_in_genome(h->left(),h->right(), left, right)) + { + for (int i = max(h->left()-left, 0); i < min(h->right()-left, right-left); ++i) + { + assert(i < covered.size()); + covered[i] = 1; + } + } + } + double percent_covered = accumulate(covered.begin(),covered.end(),0.0)/(right-left); + return percent_covered; +} + +bool Scaffold::has_strand_support(vector >* ref_scaffs) const +{ + if (strand() == CUFF_STRAND_UNKNOWN) + return false; + + // FIXME: This is not true for non-canonical splicing + if (has_intron()) + return true; + + foreach (const MateHit* h, mate_hits()) + { + if (h->strand() == strand()) + return true; + assert(h->strand() == CUFF_STRAND_UNKNOWN && !h->contains_splice()); + } + + if (ref_scaffs == NULL) + return false; + + foreach (shared_ptr ref_scaff, *ref_scaffs) + { + if (ref_scaff->strand() == strand() && exons_overlap(*this, *ref_scaff)) + return true; + } + + return false; +} + +bool Scaffold::has_struct_support(set& hit_introns) const +{ + if(augmented_ops().size() == 1) + return mate_hits().size() > 0; + + if(augmented_ops().size() == 3) + return hits_support_introns(hit_introns); + + return (hits_support_introns(hit_introns) && internal_exon_coverage() == 1.0); +} + + +bool Scaffold::hits_support_introns() const +{ + set hit_introns; + set scaffold_introns; + foreach(const MateHit* h, _mates_in_scaff) + { + Scaffold s(*h); + foreach (AugmentedCuffOp a, s.augmented_ops()) + { + if (a.opcode == CUFF_INTRON) + { + hit_introns.insert(a); + } + } + } + foreach (AugmentedCuffOp a, _augmented_ops) + { + if (a.opcode == CUFF_INTRON) + { + scaffold_introns.insert(a); + } + } + + if (hit_introns != scaffold_introns) + { + fprintf(stderr, "********************\n"); + foreach(const AugmentedCuffOp& a, hit_introns) + { + fprintf(stderr, "%d - %d\n", a.g_left(), a.g_right()); + } + + fprintf(stderr, "####################\n"); + foreach(const AugmentedCuffOp& a, scaffold_introns) + { + fprintf(stderr, "%d - %d\n", a.g_left(), a.g_right()); + } + } + + return hit_introns == scaffold_introns; +} + +bool Scaffold::hits_support_introns(set& hit_introns) const +{ + set scaffold_introns; + + foreach (AugmentedCuffOp a, _augmented_ops) + { + if (a.opcode == CUFF_INTRON) + { + scaffold_introns.insert(a); + } + } + + return includes(hit_introns.begin(),hit_introns.end(), scaffold_introns.begin(), scaffold_introns.end()); +} + +bool scaff_lt(const Scaffold& lhs, const Scaffold& rhs) +{ + return lhs.left() < rhs.left(); +} + +bool scaff_lt_rt(const Scaffold& lhs, const Scaffold& rhs) +{ + if (lhs.left() != rhs.left()) + return lhs.left() < rhs.left(); + return lhs.right() < rhs.right(); +} + +bool scaff_lt_rt_oplt(const Scaffold& lhs, const Scaffold& rhs) +{ + if (scaff_lt_rt(lhs, rhs)) + return true; + if (scaff_lt_rt(rhs, lhs)) + return false; + + // Now we need to actually compare the alignment ops + const vector& lhs_ops = lhs.augmented_ops(); + const vector& rhs_ops = rhs.augmented_ops(); + + if (lhs_ops.size() != rhs_ops.size()) + return lhs_ops.size() < rhs_ops.size(); + + for (size_t i = 0; i < lhs_ops.size(); ++i) + { + if (lhs_ops[i] != rhs_ops[i]) + { + return lhs_ops[i] < rhs_ops[i]; + } + } + + return false; +} + +bool scaff_lt_sp(shared_ptr lhs, shared_ptr rhs) +{ + return scaff_lt(*lhs,*rhs); +} + +bool scaff_lt_rt_sp(shared_ptr lhs, shared_ptr rhs) +{ + return scaff_lt_rt(*lhs,*rhs); +} + +bool scaff_lt_rt_oplt_sp(shared_ptr lhs, shared_ptr rhs) +{ + return scaff_lt_rt_oplt(*lhs,*rhs); +} diff --git a/src/scaffolds.h b/src/scaffolds.h new file mode 100644 index 0000000..0f29e80 --- /dev/null +++ b/src/scaffolds.h @@ -0,0 +1,698 @@ +#ifndef SCAFFOLDS_H +#define SCAFFOLDS_H +/* + * scaffolds.h + * cufflinks + * + * Created by Cole Trapnell on 3/30/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "common.h" +#include "hits.h" + +#include + +using namespace std; + +enum CuffOpCode { CUFF_MATCH, CUFF_INTRON, CUFF_UNKNOWN }; + +struct AugmentedCuffOp +{ + AugmentedCuffOp(const CuffOpCode& O, int g_off, int g_len) + : opcode(O), + genomic_offset(g_off), + genomic_length(g_len) + { + assert (genomic_length >= 0); + } + + void g_left(int left) + { + int right = genomic_offset + genomic_length; + genomic_offset = left; + genomic_length = right - left; + } + int g_left() const { return genomic_offset; } + + void g_right(int right) + { + genomic_length = right - genomic_offset; + } + int g_right() const { return genomic_offset + genomic_length; } + + static bool overlap_in_genome(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs) + { + if (lhs.g_left() >= rhs.g_left() && lhs.g_left() < rhs.g_right()) + return true; + if (lhs.g_right() > rhs.g_left() && lhs.g_right() < rhs.g_right()) + return true; + if (rhs.g_left() >= lhs.g_left() && rhs.g_left() < lhs.g_right()) + return true; + if (rhs.g_right() > lhs.g_left() && rhs.g_right() < lhs.g_right()) + return true; + return false; + } + + bool contains(const AugmentedCuffOp& other) const + { + if (g_left() <= other.g_left() && g_right() >= other.g_right()) + return true; + return false; + } + + bool properly_contains(const AugmentedCuffOp& other) const + { + if ((g_left() < other.g_left() && g_right() >= other.g_right()) || + (g_left() <= other.g_left() && g_right() > other.g_right())) + return true; + return false; + } + + static int match_length(const AugmentedCuffOp& op, int left, int right) + { + int len = 0; + int left_off = op.g_left(); + if (op.opcode == CUFF_MATCH) + { + if (left_off + op.genomic_length > left && left_off < right) + { + if (left_off > left) + { + if (left_off + op.genomic_length <= right + 1) + len += op.genomic_length; + else + len += right - left_off; + } + else + { + if (left_off + op.genomic_length <= right + 1) + len += (left_off + op.genomic_length - left); + else + return right - left; + } + } + } + return len; + } + + static bool compatible(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs, + int overhang_tolerance = bowtie_overhang_tolerance); + + bool operator==(const AugmentedCuffOp& rhs) const + { + return (opcode == rhs.opcode && + genomic_offset == rhs.genomic_offset && + genomic_length == rhs.genomic_length); + } + + bool operator<(const AugmentedCuffOp& rhs) const + { + if (opcode != rhs.opcode) + { + return opcode < rhs.opcode; + } + if (genomic_offset != rhs.genomic_offset) + { + return genomic_offset < rhs.genomic_offset; + } + if (genomic_length != rhs.genomic_length) + { + return genomic_length < rhs.genomic_length; + } + return false; + } + + static bool g_left_lt(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs); + + bool operator!=(const AugmentedCuffOp& rhs) const + { + return !(*this == rhs); + } + + static void merge_ops(const std::vector& ops, + vector& merged, + bool introns_overwrite_matches, + bool allow_flank_introns = false); + + static void fill_interstices(vector& to_fill, + const vector& filler, + bool allow_flank_fill, + bool allow_flank_introns); + + CuffOpCode opcode; + int genomic_offset; + int genomic_length; +}; + +class Scaffold +{ + void cuff_ops_from_cigar(vector& ops, + const vector& cig, + int& g_left) + { + for (size_t i = 0; i < cig.size(); ++i) + { + assert(cig[i].length >= 0); + switch(cig[i].opcode) + { + case MATCH: + ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, cig[i].length)); + g_left += cig[i].length; + break; + case REF_SKIP: + ops.push_back(AugmentedCuffOp(CUFF_INTRON, g_left, cig[i].length)); + g_left += cig[i].length; + break; + case SOFT_CLIP: + g_left += cig[i].length; + break; + case HARD_CLIP: + //g_left += cig[i].length; + break; + case INS: + //ops.back().genomic_length -= cig[i].length; + //g_left -= cig[i].length; + break; + case DEL: + if (!ops.empty()) + ops.back().genomic_length += cig[i].length; + g_left += cig[i].length; + break; + default: + assert(false); + break; + } + assert (ops.empty() || ops.back().genomic_length >= 1); + } + } + + RefID _ref_id; + +public: + + Scaffold() : + _ref_id(0), + _is_ref(false), + _strand(CUFF_STRAND_UNKNOWN), + _classcode(0), + _fpkm(0.0) {} + + Scaffold(const MateHit& mate) : + _ref_id(mate.ref_id()), + _is_ref(false), + _classcode(0) + { + const ReadHit* left_hit = mate.left_alignment(); + //CuffAlign a; + _strand = mate.strand(); + + vector aug_ops; + + if (left_hit) + { + const vector& l_cig = left_hit->cigar(); + int g_left = left_hit->left(); + cuff_ops_from_cigar(aug_ops, l_cig, g_left); + + const ReadHit* right_hit = mate.right_alignment(); + if (right_hit) + { + const vector& r_cig = right_hit->cigar(); + int gap = (right_hit->left() - g_left); + + if (gap < 0) + { + g_left += gap; + cuff_ops_from_cigar(aug_ops, r_cig, g_left); + } + else + { + if (gap > 0) + { + //if (gap < (int)min_intron_length) + // aug_ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, gap)); + //else + aug_ops.push_back(AugmentedCuffOp(CUFF_UNKNOWN, g_left, gap)); + g_left += gap; + + } + cuff_ops_from_cigar(aug_ops, r_cig, g_left); + } + + } + } + else + { + assert(false); + } + _mates_in_scaff.push_back(&mate); + sort(aug_ops.begin(), aug_ops.end(), AugmentedCuffOp::g_left_lt); + + for(size_t i = 0; i < aug_ops.size(); ++i) + { + assert (aug_ops[i].genomic_length >= 1); + } + + AugmentedCuffOp::merge_ops(aug_ops, _augmented_ops, false); + + int r_check = left(); + for (size_t i = 0; i < _augmented_ops.size(); ++i) + r_check += _augmented_ops[i].genomic_length; + +#ifdef DEBUG + if (r_check != right()) + { + AugmentedCuffOp::merge_ops(aug_ops, _augmented_ops, false); + } +#endif + assert (r_check == right()); + + _has_intron = has_intron(*this); + + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); + + for(size_t i = 1; i < _augmented_ops.size(); ++i) + { + assert(_augmented_ops[i-1].g_right() == _augmented_ops[i].g_left()); + } + + assert (_augmented_ops.front().opcode == CUFF_MATCH); + assert (_augmented_ops.back().opcode == CUFF_MATCH); + + if (library_type == "transfrags") + { + double avg_fpkm = mate.mass(); + fpkm(avg_fpkm); + } + } + + Scaffold(const vector& hits, bool introns_overwrite_matches = true) + : _is_ref(false), _classcode(0) + { + assert (!hits.empty()); + _ref_id = hits[0].ref_id(); + + Scaffold::merge(hits, *this, introns_overwrite_matches); + + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); + + assert (_augmented_ops.front().opcode == CUFF_MATCH); + assert (_augmented_ops.back().opcode == CUFF_MATCH); + + if (library_type == "transfrags") + { + double avg_fpkm = 0.0; + foreach (const Scaffold& sc, hits) + { + avg_fpkm += sc.fpkm(); + } + avg_fpkm /= hits.size(); + fpkm(avg_fpkm); + } + } + + // For manually constructing scaffolds, for example when a reference is + // available + Scaffold(RefID ref_id, CuffStrand strand, const vector& ops, bool is_ref = false) + : _ref_id(ref_id), + _augmented_ops(ops), + _strand(strand), + _classcode(0) + { + _has_intron = has_intron(*this); + _is_ref = is_ref; + + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); + + assert (_augmented_ops.front().opcode == CUFF_MATCH); + assert (_augmented_ops.back().opcode == CUFF_MATCH); + } + + //int get_id() const { return _id; } + + int left() const { return _augmented_ops.front().g_left(); } + int right() const { return _augmented_ops.back().g_right(); } + + const vector& mate_hits() const { return _mates_in_scaff; } + + RefID ref_id() const { return _ref_id; } + void ref_id(RefID rid) { _ref_id = rid; } + + const string& annotated_trans_id() const { return _annotated_trans_id; } + void annotated_trans_id(const string& ann_name) { _annotated_trans_id = ann_name; } + + const string& annotated_gene_id() const { return _annotated_gene_id; } + void annotated_gene_id(const string& ann_name) { _annotated_gene_id = ann_name; } + + const string& annotated_gene_name() const { return _annotated_gene_name; } + void annotated_gene_name(const string& ann_name) { _annotated_gene_name = ann_name; } + + const string& annotated_protein_id() const { return _annotated_protein_id; } + void annotated_protein_id(const string& ann_name) { _annotated_protein_id = ann_name; } + + const string& annotated_tss_id() const { return _annotated_tss_id; } + void annotated_tss_id(const string& ann_name) { _annotated_tss_id = ann_name; } + + const string& nearest_ref_id() const { return _nearest_ref_id; } + void nearest_ref_id(const string& ann_name) { _nearest_ref_id = ann_name; } + + double fpkm() const {return _fpkm; } + void fpkm(double fpkm) { _fpkm = fpkm; } + + const string& seq() const { return _seq; } + void seq(const string& s) { _seq = s; } + + double gc_content() const + { + if (_seq != "") + { + int count = 0; + for(size_t i = 0; i < _seq.length(); ++i) + { + if (_seq[i] == 'G' or _seq[i] == 'g' or _seq[i] == 'C' or _seq[i] == 'c') + count ++; + } + return count/double(_seq.length()); + } + return -1.0; + } + + char nearest_ref_classcode() const { return _classcode; } + void nearest_ref_classcode(char cc) { _classcode = cc; } + + bool has_intron() const { return _has_intron; } + bool has_suspicious_unknown() const { return has_suspicious_unknown(*this); } + + // returns the fraction coverage of internal exons, returns 0 if no internal exons + double internal_exon_coverage() const; + + // returns true if the scaffold strand is supported with reads or exon overlap with + // a reference scaffold of known strand (since the scaffold may have been created with faux reads) + bool has_strand_support(vector >* ref_scaffs = NULL) const; + + // returns true if all introns are supported with splice reads, false ow + bool hits_support_introns() const; + bool hits_support_introns(set& hit_introns) const; + + // returns true if all internal exons are fully covered and hits support introns, false ow + bool has_struct_support(set& hit_introns) const; + + bool is_ref() const { return _is_ref; } + void is_ref(bool ir) { _is_ref = ir; } + + CuffStrand strand() const + { + return _strand; + } + + void strand(CuffStrand strand) + { + assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN); + _strand = strand; + } + + // Could we merge lhs and rhs together? + static bool compatible(const Scaffold& lhs, + const Scaffold& rhs, + int overhang_tolerance = bowtie_overhang_tolerance); + + static bool strand_agree(const Scaffold& lhs, + const Scaffold& rhs); + + static bool exons_overlap(const Scaffold& lhs, + const Scaffold& rhs); + + // Incorporate Scaffold chow into this one. + static void merge(const Scaffold& lhs, + const Scaffold& rhs, + Scaffold& merged, + bool introns_overwrite_matches); + + static void merge(const vector& s, + Scaffold& merged, + bool introns_overwrite_matches); + + // Extend 5' end using beginning of other scaffold without adding new exons. + void extend_5(const Scaffold& other); + // Clip final 3' exon by given amount + void trim_3(int to_remove); + + // Extend final 3' exon by given amount + void extend_3(int to_add); + + void tile_with_scaffs(vector& tile_scaffs, int max_len, int tile_offset) const; + + // Creates a scaffold that matches this one but only covers the section from g_left for + // a distance of match_length. It is assumed that this region is contained in the scaffold. + // sub_scaff should be an empty Scaffold object. + bool sub_scaffold(Scaffold& sub_scaff, int g_left, int match_length) const; + + // Tests whether the other scaffold is contained allowing the given overhang + bool contains(const Scaffold& other, int ohang_5 = 0, int ohang_3 = 0) const + { + if (left() <= other.left() && right()>= other.right()) + return true; + + if (!(ohang_5 || ohang_3)) + return false; + + int left_hang; + int right_hang; + switch(strand()) + { + case CUFF_FWD: + left_hang = ohang_5; + right_hang = ohang_3; + break; + case CUFF_REV: + left_hang = ohang_3; + right_hang = ohang_5; + break; + default: + left_hang = max(ohang_3, ohang_5); + right_hang = left_hang; + } + + // Test to see if it is contained within the relaxed boundaries + if ((left()-left_hang) <= other.left() && (right() + right_hang) >= other.right()) + { + // Ensure that there are no exons outside of the strict boundaries + return (other.augmented_ops().front().g_right() > left() && other.augmented_ops().back().g_left() < right()); + } + + return false; + + } + + // Tests whether the other scaffold contains the 5' end and is contained (allowing some overhang) on the 3' end + // There can be no additional exons on the 5' end + bool overlapped_3(const Scaffold& other, int ohang_5 = 0, int ohang_3 = 0) const + { + switch(strand()) + { + case CUFF_FWD: + return((left() + ohang_5 >= other.left() && right() + ohang_3 >= other.right()) && other.augmented_ops().front().g_right() > left()); + case CUFF_REV: + return ((right() - ohang_5 <= other.right() && left() - ohang_3 <= other.left()) && other.augmented_ops().back().g_left() < right()); + default: + return false; + } + } + + int match_length(int left, int right) const; + + int length() const + { + + if(_seq != "") + return _seq.length(); + + int len = 0; + + // FIXME: this estimate really should include estimates of the CUFF_UNKNOWN lengths + // for better abundance estimation. + + for (size_t j = 0; j < _augmented_ops.size(); ++j) + { + if (_augmented_ops[j].opcode == CUFF_MATCH) + len += _augmented_ops[j].genomic_length; + } + + + return len; + } + + //void augmented_ops(vector& augmented) const; + + // The "score" of a merge is the log odds of lhs and rhs coming from + // different transcripts. We want to minimize the total score of + // all of our merges. + static double score_merge(const Scaffold& lhs, const Scaffold& rhs); + + double score() const; + + double worst_mate_score() const; + + pair genomic_to_transcript_span(pair g_span) const; + int genomic_to_transcript_coord(int g_coord) const; + bool map_frag(const MateHit& hit, int& start, int& end, int& frag_len) const; + + + static bool g_left_lt(const AugmentedCuffOp& lhs, + const AugmentedCuffOp& rhs); + + const vector& augmented_ops() const { return _augmented_ops; } + void augmented_ops(vector aug_ops) { _augmented_ops = aug_ops; } + + + static bool overlap_in_genome(const Scaffold& lhs, + const Scaffold& rhs, + int overlap_radius); + + vector > gaps() const + { + vector > g; + const vector& ops = augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + if (ops[j].opcode == CUFF_INTRON) + { + g.push_back(make_pair(ops[j].g_left(), ops[j].g_right())); + } + } + return g; + } + + inline bool has_unknown() const + { + const vector& ops = augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + if (ops[j].opcode == CUFF_UNKNOWN) + return true; + } + return false; + } + + // Fills in CUFF_UNKNOWNs up to filled_gap_size long + void fill_gaps(int filled_gap_size); + + // Fills in CUFF_UNKNOWNs with the contents of filler. Filler must be + // a sortted, contiguous, non-overlapping vector of AugmentedCuffOps + void fill_gaps(const vector& filler); + + void clear_hits(); + bool add_hit(const MateHit*); + + void get_complete_subscaffolds(vector& complete); +private: + + void initialize_exon_lists(); + + static bool has_intron(const Scaffold& scaff) + { + + const vector& ops = scaff.augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + if (ops[j].opcode == CUFF_INTRON) + return true; + } + + return false; + } + + static bool has_suspicious_unknown(const Scaffold& scaff) + { + + const vector& ops = scaff.augmented_ops(); + for (size_t j = 0; j < ops.size(); ++j) + { + if (ops[j].opcode == CUFF_UNKNOWN && + ops[j].genomic_length > max_frag_len) + return true; + } + + return false; + } + + static double score_overlap(const AugmentedCuffOp& op, + int inner_left_edge, + int inner_right_edge); + + static double score_contigs(const Scaffold& contig, + const vector >& inners); + + static double min_score(const Scaffold& contig, + const vector >& inners); + + static bool compatible_contigs(const Scaffold& lhs, + const Scaffold& rhs, + int overhang_tolerance = bowtie_overhang_tolerance); + + + typedef vector OpList; + + bool check_merge_length(const vector& ops); + + static void fill_interstices(vector& to_fill, + const vector& filler, + bool allow_flank_fill); + + static void merge_ops(const std::vector& ops, + vector& merged, + bool introns_overwrite_matches); + + vector _mates_in_scaff; + + bool _has_intron; + bool _is_ref; + + vector _augmented_ops; + CuffStrand _strand; + + string _annotated_trans_id; + string _annotated_gene_id; + string _annotated_gene_name; + string _annotated_protein_id; + string _annotated_tss_id; + string _nearest_ref_id; + char _classcode; + + string _seq; + double _fpkm; + +}; + +bool scaff_lt(const Scaffold& lhs, const Scaffold& rhs); +bool scaff_lt_rt(const Scaffold& lhs, const Scaffold& rhs); +bool scaff_lt_rt_oplt(const Scaffold& lhs, const Scaffold& rhs); +bool scaff_lt_sp(shared_ptr lhs, shared_ptr rhs); +bool scaff_lt_rt_sp(shared_ptr lhs, shared_ptr rhs); +bool scaff_lt_rt_oplt_sp(shared_ptr lhs, shared_ptr rhs); + + +bool overlap_in_genome(int ll, int lr, int rl, int rr); + +struct StructurallyEqualScaffolds +{ + bool operator()(shared_ptr lhs, shared_ptr rhs) + { + return lhs->ref_id() == rhs->ref_id() && + lhs->augmented_ops() == rhs->augmented_ops(); + } +}; + +#endif diff --git a/src/tokenize.cpp b/src/tokenize.cpp new file mode 100644 index 0000000..99e1e9a --- /dev/null +++ b/src/tokenize.cpp @@ -0,0 +1,39 @@ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +using namespace std; + +/** + * Split string s according to given delimiters. Mostly borrowed + * from C++ Programming HOWTO 7.3. + */ +void tokenize(const string& s, const string& delims, vector& ss) { + string::size_type lastPos = s.find_first_not_of(delims, 0); + string::size_type pos = s.find_first_of(delims, lastPos); + while (string::npos != pos || string::npos != lastPos) { + ss.push_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delims, pos); + pos = s.find_first_of(delims, lastPos); + } +} + +/** + * Split string s according to given delimiters. If two delimiters occur in + * succession, sticks an empty string token at that position in ss + */ +void tokenize_strict(const string& s, const string& delims, vector& ss) { + string::size_type lastPos = s.find_first_not_of(delims, 0); + string::size_type pos = s.find_first_of(delims, lastPos); + while (lastPos < s.length() || pos < s.length()) { + ss.push_back(s.substr(lastPos, pos - lastPos)); + if (pos == string::npos) + break; + lastPos = pos + 1; + pos = s.find_first_of(delims, lastPos); + } +} diff --git a/src/tokenize.h b/src/tokenize.h new file mode 100644 index 0000000..d1a7503 --- /dev/null +++ b/src/tokenize.h @@ -0,0 +1,18 @@ +#ifndef TOKENIZE_H_ +#define TOKENIZE_H_ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +void tokenize(const std::string& s, + const std::string& delims, + std::vector& ss); + +void tokenize_strict(const std::string& s, + const std::string& delims, + std::vector& ss); +#endif /*TOKENIZE_H_*/ diff --git a/src/transitive_closure.h b/src/transitive_closure.h new file mode 100644 index 0000000..fc23976 --- /dev/null +++ b/src/transitive_closure.h @@ -0,0 +1,391 @@ +/* + * transitive_closure.h + * cufflinks + * + * Created by Cole Trapnell on 3/26/09. + * Copyright 2009 Cole Trapnell. All rights reserved. + * + */ + +// This file was modified from transitive_closure.hpp in Boost. +// The original copyright info is listed below + +// Copyright (C) 2001 Vladimir Prus +// Copyright (C) 2001 Jeremy Siek +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_GRAPH_TRANSITIVE_CLOSURE_HPP +#define BOOST_GRAPH_TRANSITIVE_CLOSURE_HPP + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include // for std::min and std::max +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace boost; + +typedef uint16_t v_id_size_type; + +inline void +union_successor_sets(const std::vector < v_id_size_type > &s1, + const std::vector < v_id_size_type > &s2, + std::vector < v_id_size_type > &s3) +{ + BOOST_USING_STD_MIN(); + for (std::size_t k = 0; k < s1.size(); ++k) + s3[k] = min BOOST_PREVENT_MACRO_SUBSTITUTION(s1[k], s2[k]); +} + +template < typename Container, + typename ST = std::size_t, + typename VT = typename Container::value_type > +struct subscript_t : public std::unary_function < ST, VT > +{ + typedef VT& result_type; + + subscript_t(Container & c):container(&c) + { + } + VT & operator() (const ST & i) const + { + return (*container)[i]; + } +protected: + Container * container; +}; + +template < typename Container > +subscript_t < Container > subscript(Container & c) { + return subscript_t < Container > (c); +} + +template < typename Graph, + typename GraphTC, + typename G_to_TC_VertexMap, + typename VertexIndexMap > +void transitive_closure(const Graph & g, GraphTC & tc, + G_to_TC_VertexMap g_to_tc_map, + VertexIndexMap index_map) +{ + if (num_vertices(g) == 0) + return; + typedef typename graph_traits < Graph >::vertex_descriptor vertex; + typedef typename graph_traits < Graph >::edge_descriptor edge; + typedef typename graph_traits < Graph >::vertex_iterator vertex_iterator; + //typedef typename property_traits < VertexIndexMap >::value_type size_type; + + typedef typename graph_traits < + Graph >::adjacency_iterator adjacency_iterator; + + function_requires < VertexListGraphConcept < Graph > >(); + function_requires < AdjacencyGraphConcept < Graph > >(); + function_requires < VertexMutableGraphConcept < GraphTC > >(); + function_requires < EdgeMutableGraphConcept < GraphTC > >(); + function_requires < ReadablePropertyMapConcept < VertexIndexMap, + vertex > >(); + + typedef v_id_size_type cg_vertex; + std::vector < cg_vertex > component_number_vec(num_vertices(g)); + iterator_property_map < cg_vertex *, VertexIndexMap, cg_vertex, cg_vertex& > + component_number(&component_number_vec[0], index_map); + + //int num_scc = strong_components(g, component_number, + // vertex_index_map(index_map)); + + size_t cn = 0; + vertex_iterator cu, cu_end; + for (tie(cu, cu_end) = vertices(g); cu != cu_end; ++cu) { + component_number[*cu] = cn++; + //fprintf(stderr, "%d\n", component_number[*cu]); + } + + std::vector < std::vector < vertex > >components; + build_component_lists(g, num_vertices(g), component_number, components); + + typedef std::vector > CG_t; + CG_t CG(num_vertices(g)); + + for (cg_vertex s = 0; s < components.size(); ++s) { + std::vector < cg_vertex > adj; + + vertex u = components[s][0]; + + adjacency_iterator v, v_end; + for (tie(v, v_end) = adjacent_vertices(u, g); v != v_end; ++v) { + cg_vertex t = component_number[*v]; + if (s != t) // Avoid loops in the condensation graph + adj.push_back(t); + } + + std::sort(adj.begin(), adj.end()); + typename std::vector::iterator di = + std::unique(adj.begin(), adj.end()); + if (di != adj.end()) + adj.erase(di, adj.end()); + CG[s] = adj; + } + + std::vector topo_order; + std::vector topo_number(num_vertices(CG)); + topological_sort(CG, std::back_inserter(topo_order), + vertex_index_map(identity_property_map())); + std::reverse(topo_order.begin(), topo_order.end()); + v_id_size_type n = 0; + for (typename std::vector::iterator iter = topo_order.begin(); + iter != topo_order.end(); ++iter) + topo_number[*iter] = n++; + + for (size_t i = 0; i < num_vertices(CG); ++i) + std::sort(CG[i].begin(), CG[i].end(), + boost::bind(std::less(), + boost::bind(subscript(topo_number), _1), + boost::bind(subscript(topo_number), _2))); + + std::vector > chains; + { + std::vector in_a_chain(num_vertices(CG)); + for (typename std::vector::iterator i = topo_order.begin(); + i != topo_order.end(); ++i) { + cg_vertex v = *i; + if (!in_a_chain[v]) { + chains.resize(chains.size() + 1); + std::vector& chain = chains.back(); + for (;;) { + chain.push_back(v); + in_a_chain[v] = true; + typename graph_traits::adjacency_iterator adj_first, adj_last; + tie(adj_first, adj_last) = adjacent_vertices(v, CG); + typename graph_traits::adjacency_iterator next + = std::find_if(adj_first, adj_last, + std::not1(subscript(in_a_chain))); + if (next != adj_last) + v = *next; + else + break; // end of chain, dead-end + + } + } + } + } + std::vector chain_number(num_vertices(CG)); + std::vector pos_in_chain(num_vertices(CG)); + for (size_t i = 0; i < chains.size(); ++i) + for (size_t j = 0; j < chains[i].size(); ++j) { + cg_vertex v = chains[i][j]; + chain_number[v] = i; + pos_in_chain[v] = j; + } + + cg_vertex inf = (std::numeric_limits< cg_vertex >::max)(); + std::vector > successors(num_vertices(CG), + std::vector + (chains.size(), inf)); + for (typename std::vector::reverse_iterator + i = topo_order.rbegin(); i != topo_order.rend(); ++i) { + cg_vertex u = *i; + typename graph_traits::adjacency_iterator adj, adj_last; + for (tie(adj, adj_last) = adjacent_vertices(u, CG); + adj != adj_last; ++adj) { + cg_vertex v = *adj; + if (topo_number[v] < successors[u][chain_number[v]]) { + // Succ(u) = Succ(u) U Succ(v) + union_successor_sets(successors[u], successors[v], + successors[u]); + // Succ(u) = Succ(u) U {v} + successors[u][chain_number[v]] = topo_number[v]; + } + } + } + + for (size_t i = 0; i < CG.size(); ++i) + CG[i].clear(); + for (size_t i = 0; i < CG.size(); ++i) + for (size_t j = 0; j < chains.size(); ++j) { + size_t topo_num = successors[i][j]; + if (topo_num < inf) { + cg_vertex v = topo_order[topo_num]; + for (size_t k = pos_in_chain[v]; k < chains[j].size(); ++k) + CG[i].push_back(chains[j][k]); + } + } + + + // Add vertices to the transitive closure graph + typedef typename graph_traits < GraphTC >::vertex_descriptor tc_vertex; + { + vertex_iterator i, i_end; + for (tie(i, i_end) = boost::vertices(g); i != i_end; ++i) + g_to_tc_map[*i] = add_vertex(tc); + } + // Add edges between all the vertices in two adjacent SCCs + typename graph_traits::vertex_iterator si, si_end; + for (tie(si, si_end) = boost::vertices(CG); si != si_end; ++si) { + cg_vertex s = *si; + typename graph_traits::adjacency_iterator i, i_end; + for (tie(i, i_end) = adjacent_vertices(s, CG); i != i_end; ++i) { + cg_vertex t = *i; + for (size_t k = 0; k < components[s].size(); ++k) + for (size_t l = 0; l < components[t].size(); ++l) + add_edge(g_to_tc_map[components[s][k]], + g_to_tc_map[components[t][l]], tc); + } + } + // Add edges connecting all vertices in a SCC + for (size_t i = 0; i < components.size(); ++i) + if (components[i].size() > 1) + for (size_t k = 0; k < components[i].size(); ++k) + for (size_t l = 0; l < components[i].size(); ++l) { + vertex u = components[i][k], v = components[i][l]; + add_edge(g_to_tc_map[u], g_to_tc_map[v], tc); + } + + // Find loopbacks in the original graph. + // Need to add it to transitive closure. + { + vertex_iterator i, i_end; + for (tie(i, i_end) = vertices(g); i != i_end; ++i) + { + adjacency_iterator ab, ae; + for (boost::tie(ab, ae) = adjacent_vertices(*i, g); ab != ae; ++ab) + { + if (*ab == *i) + if (components[component_number[*i]].size() == 1) + add_edge(g_to_tc_map[*i], g_to_tc_map[*i], tc); + } + } + } +} + +template +void transitive_closure(const Graph & g, GraphTC & tc) +{ + if (num_vertices(g) == 0) + return; + typedef typename property_map::const_type + VertexIndexMap; + VertexIndexMap index_map = get(vertex_index, g); + + typedef typename graph_traits::vertex_descriptor tc_vertex; + std::vector to_tc_vec(num_vertices(g)); + iterator_property_map < tc_vertex *, VertexIndexMap, tc_vertex, tc_vertex&> + g_to_tc_map(&to_tc_vec[0], index_map); + + transitive_closure(g, tc, g_to_tc_map, index_map); +} + + +template < typename Graph, typename GraphTC, typename G_to_TC_VertexMap, +typename VertexIndexMap> +void transitive_closure_dispatch +(const Graph & g, GraphTC & tc, + G_to_TC_VertexMap g_to_tc_map, VertexIndexMap index_map) +{ + typedef typename graph_traits < GraphTC >::vertex_descriptor tc_vertex; + typename std::vector < tc_vertex >::size_type + n = is_default_param(g_to_tc_map) ? num_vertices(g) : 1; + std::vector < tc_vertex > to_tc_vec(n); + + transitive_closure + (g, tc, + choose_param(g_to_tc_map, make_iterator_property_map + (to_tc_vec.begin(), index_map, to_tc_vec[0])), + index_map); +} + + +template < typename Graph, typename GraphTC, +typename P, typename T, typename R > +void transitive_closure(const Graph & g, GraphTC & tc, + const bgl_named_params < P, T, R > ¶ms) +{ + if (num_vertices(g) == 0) + return; + transitive_closure_dispatch + (g, tc, get_param(params, orig_to_copy_t()), + choose_const_pmap(get_param(params, vertex_index), g, vertex_index) ); +} + + +template < typename G > void warshall_transitive_closure(G & g) +{ + typedef typename graph_traits < G >::vertex_descriptor vertex; + typedef typename graph_traits < G >::vertex_iterator vertex_iterator; + + function_requires < AdjacencyMatrixConcept < G > >(); + function_requires < EdgeMutableGraphConcept < G > >(); + + // Matrix form: + // for k + // for i + // if A[i,k] + // for j + // A[i,j] = A[i,j] | A[k,j] + vertex_iterator ki, ke, ii, ie, ji, je; + for (tie(ki, ke) = vertices(g); ki != ke; ++ki) + for (tie(ii, ie) = vertices(g); ii != ie; ++ii) + if (edge(*ii, *ki, g).second) + for (tie(ji, je) = vertices(g); ji != je; ++ji) + if (!edge(*ii, *ji, g).second && edge(*ki, *ji, g).second) { + add_edge(*ii, *ji, g); + } +} + + +template < typename G > void warren_transitive_closure(G & g) +{ + using namespace boost; + typedef typename graph_traits < G >::vertex_descriptor vertex; + typedef typename graph_traits < G >::vertex_iterator vertex_iterator; + + function_requires < AdjacencyMatrixConcept < G > >(); + function_requires < EdgeMutableGraphConcept < G > >(); + + // Make sure second loop will work + if (num_vertices(g) == 0) + return; + + // for i = 2 to n + // for k = 1 to i - 1 + // if A[i,k] + // for j = 1 to n + // A[i,j] = A[i,j] | A[k,j] + + vertex_iterator ic, ie, jc, je, kc, ke; + for (tie(ic, ie) = vertices(g), ++ic; ic != ie; ++ic) + for (tie(kc, ke) = vertices(g); *kc != *ic; ++kc) + if (edge(*ic, *kc, g).second) + for (tie(jc, je) = vertices(g); jc != je; ++jc) + if (!edge(*ic, *jc, g).second && edge(*kc, *jc, g).second) { + add_edge(*ic, *jc, g); + } + // for i = 1 to n - 1 + // for k = i + 1 to n + // if A[i,k] + // for j = 1 to n + // A[i,j] = A[i,j] | A[k,j] + + for (tie(ic, ie) = vertices(g), --ie; ic != ie; ++ic) + for (kc = ic, ke = ie, ++kc; kc != ke; ++kc) + if (edge(*ic, *kc, g).second) + for (tie(jc, je) = vertices(g); jc != je; ++jc) + if (!edge(*ic, *jc, g).second && edge(*kc, *jc, g).second) { + add_edge(*ic, *jc, g); + } +} + + +#endif + + diff --git a/src/transitive_reduction.h b/src/transitive_reduction.h new file mode 100644 index 0000000..25802ae --- /dev/null +++ b/src/transitive_reduction.h @@ -0,0 +1,129 @@ +// (C) Copyright 2009 Eric Bose-Wolf +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0 (See accompanying file +// LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_GRAPH_TRANSITIVE_REDUCTION_HPP +#define BOOST_GRAPH_TRANSITIVE_REDUCTION_HPP + +#include +#include //std::find +#include +#include + +#include +#include + +// also I didn't got all of the concepts thin. Am I suppose to check +// for all concepts, which are needed for functions I call? (As if I +// wouldn't do that, the users would see the functions called by +// complaining about missings concepts, which would be clearly an error +// message revealing internal implementation and should therefore be avoided?) + +// the pseudocode which I followed implementing this algorithmn was taken +// from the german book Algorithmische Graphentheorie by Volker Turau +// it is proposed to be of O(n + nm_red ) where n is the number +// of vertices and m_red is the number of edges in the transitive +// reduction, but I think my implementation spoiled this up at some point +// indicated below. + +namespace boost { + + template < + typename Graph, typename GraphTR, typename G_to_TR_VertexMap, + typename VertexIndexMap + > + BOOST_CONCEPT_REQUIRES( + ((VertexListGraphConcept< Graph >)) + ((IncidenceGraphConcept< Graph >)) + ((MutableGraphConcept< GraphTR >)) + ((ReadablePropertyMapConcept< VertexIndexMap, + typename graph_traits::vertex_descriptor >)) + ((Integer< typename + property_traits< VertexIndexMap >::value_type >)) + ((LvaluePropertyMapConcept< G_to_TR_VertexMap, + typename graph_traits::vertex_descriptor >)), + (void)) + transitive_reduction(const Graph& g, GraphTR& tr, + G_to_TR_VertexMap g_to_tr_map, + VertexIndexMap g_index_map ) + { + typedef typename graph_traits::vertex_descriptor Vertex; + typedef typename graph_traits::vertex_iterator VertexIterator; + typedef typename std::vector::size_type size_type; + + std::vector topo_order; + topological_sort(g, std::back_inserter(topo_order)); + + std::vector topo_number_storage(num_vertices(g)); + + iterator_property_map topo_number( &topo_number_storage[0], g_index_map ); + + { + typename std::vector::reverse_iterator it = topo_order.rbegin(); + size_type n = 0; + for(; it != topo_order.rend(); ++it,++n ) { + topo_number[ *it ] = n; + } + } + + std::vector< std::vector< bool > > edge_in_closure(num_vertices(g), + std::vector( num_vertices(g), false)); + { + typename std::vector::reverse_iterator it = topo_order.rbegin(); + for( ; it != topo_order.rend(); ++it ) { + g_to_tr_map[*it] = add_vertex(tr); + } + } + + typename std::vector::iterator + it = topo_order.begin(), + end = topo_order.end(); + for( ; it != end; ++it ) { + size_type i = topo_number[ *it ]; + edge_in_closure[i][i] = true; + std::vector neighbors; + + //I have to collect the successors of *it and traverse them in + //ascending topological order. I didn't know a better way, how to + //do that. So what I'm doint is, collection the successors of *it here + { + typename Graph::out_edge_iterator oi,oi_end; + for( tie(oi, oi_end) = out_edges( *it, g ); oi != oi_end; ++oi ) { + neighbors.push_back( target( *oi, g ) ); + } + } + + { + //and run through all vertices in topological order + typename std::vector::reverse_iterator rit = topo_order.rbegin(); + typename std::vector::reverse_iterator rend = topo_order.rend(); + for(; rit != rend; ++rit ) { + //looking if they are successors of *it + if( std::find( neighbors.begin(), neighbors.end(), *rit) != neighbors.end() ) { + size_type j = topo_number[ *rit ]; + if( not edge_in_closure[i][j] ) { + for(size_type k = j; k < num_vertices(g); ++k) { + if( not edge_in_closure[i][k] ) { + //here we need edge_in_closure to be in topological order, + edge_in_closure[i][k] = edge_in_closure[j][k]; + } + } + //therefore we only access edge_in_closure only through + //topo_number property_map + add_edge(g_to_tr_map[*it], g_to_tr_map[*rit], tr); + } //if ( not edge_in_ + } //if (find ( + } //for( typename vector::reverse_iterator + } // { + + } //for( typename vector::iterator + + } //void transitive_reduction + +} // namespace boost + +#endif + diff --git a/src/update_check.h b/src/update_check.h new file mode 100644 index 0000000..703c2f8 --- /dev/null +++ b/src/update_check.h @@ -0,0 +1,112 @@ +/* + * update_check.h + * cufflinks + * Based on code from http://www.linuxhowtos.org/C_C++/socket.htm + * Modified by Adam Roberts on 1/18/11. + * + */ + +#include +#include +#include +#include +#include + +int NUM_SEPS = 3; +int CONNECT_TIMEOUT = 5; + +static int sTimeout = 0; + +static void AlarmHandler(int sig) +{ + sTimeout = 1; +} + +bool error(const char *msg) +{ + return false; +} + +int parse_version_str(char* version_str) +{ + int version_int = 0; + char* token = strtok(version_str,"."); + for(int i = 0; i < NUM_SEPS; ++i) + { + version_int += atoi(token)*pow(100.,NUM_SEPS-i); + } + return version_int; +} + +bool get_current_version(char* curr_version) +{ + int sockfd, portno, n; + struct sockaddr_in serv_addr; + struct hostent *server; + + portno = 80; + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + return error("ERROR opening socket"); + + server = gethostbyname("cufflinks.cbcb.umd.edu"); + if (server == NULL) + return error("ERROR, no such host"); + + bzero((char *) &serv_addr, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + bcopy((char *)server->h_addr, + (char *)&serv_addr.sin_addr.s_addr, + server->h_length); + serv_addr.sin_port = htons(portno); + + signal(SIGALRM, AlarmHandler); + sTimeout = 0; + alarm(CONNECT_TIMEOUT); + + int ret; + ret = connect(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)); + if (ret < 0 || sTimeout) + { + return error("ERROR connecting"); + } + + char buffer[1024]; + strcpy(buffer, "GET /curr_cuff_version HTTP/1.1\nHost: cufflinks.cbcb.umd.edu\n\n"); + n = write(sockfd,buffer,1024); + + if (n < 0) + return error("ERROR writing to socket"); + bzero(curr_version, sizeof(curr_version)); + n = read(sockfd,buffer,1024); + if (n < 0) + return error("ERROR reading from socket"); + + char* token; + token = strtok(buffer, "$"); + token = strtok(NULL, "$"); + if (token==NULL) + return error("ERROR parsing response"); + + strcpy(curr_version, token); + + return true; +} + +void check_version(const char* this_version) +{ + char curr_version[256]; + memset(curr_version, 0, sizeof(curr_version)); + if (get_current_version(curr_version)) + { + if (strcmp(curr_version, this_version)==0) + fprintf(stderr, "You are using Cufflinks v%s, which is the most recent release.\n", PACKAGE_VERSION); + else + fprintf(stderr, "Warning: Your version of Cufflinks is not up-to-date. It is recommended that you upgrade to Cufflinks v%s to benefit from the most recent features and bug fixes (http://cufflinks.cbcb.umd.edu).\n", curr_version); + + } + else + { + fprintf(stderr, "Warning: Could not connect to update server to verify current version. Please check at the Cufflinks website (http://cufflinks.cbcb.umd.edu).\n"); + } +}