diff --git a/dep/tbb/Makefile b/dep/tbb/Makefile new file mode 100644 index 000000000..ceac272c3 --- /dev/null +++ b/dep/tbb/Makefile @@ -0,0 +1,85 @@ +# Copyright 2005-2009 Intel Corporation. All Rights Reserved. +# +# This file is part of Threading Building Blocks. +# +# Threading Building Blocks is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# version 2 as published by the Free Software Foundation. +# +# Threading Building Blocks is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Threading Building Blocks; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# As a special exception, you may use this file as part of a free software +# library without restriction. Specifically, if other files instantiate +# templates or use macros or inline functions from this file, or you compile +# this file and link it with other files to produce an executable, this +# file does not by itself cause the resulting executable to be covered by +# the GNU General Public License. This exception does not however +# invalidate any other reasons why the executable file might be covered by +# the GNU General Public License. + +tbb_root?=. +include $(tbb_root)/build/common.inc +.PHONY: default all tbb tbbmalloc test examples + +#workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.tmp +#According to documentation submakes should run in parallel +.NOTPARALLEL: tbb tbbmalloc + +default: tbb tbbmalloc + +all: tbb tbbmalloc test examples + +tbb: mkdir + $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbb cfg=debug tbb_root=$(tbb_root) + $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root) + +tbbmalloc: mkdir + $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc tbb_root=$(tbb_root) + $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root) + +test: tbb tbbmalloc + -$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test tbb_root=$(tbb_root) + -$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.test cfg=debug tbb_root=$(tbb_root) + -$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test tbb_root=$(tbb_root) + -$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.test cfg=release tbb_root=$(tbb_root) + +rml: mkdir + $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.rml cfg=debug tbb_root=$(tbb_root) + $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.rml cfg=release tbb_root=$(tbb_root) + + +examples: tbb tbbmalloc + $(MAKE) -C examples -r -f Makefile tbb_root=.. release test + +.PHONY: clean clean_examples mkdir info + +clean: clean_examples + $(shell $(RM) $(work_dir)_release$(SLASH)*.* >$(NUL) 2>$(NUL)) + $(shell $(RD) $(work_dir)_release >$(NUL) 2>$(NUL)) + $(shell $(RM) $(work_dir)_debug$(SLASH)*.* >$(NUL) 2>$(NUL)) + $(shell $(RD) $(work_dir)_debug >$(NUL) 2>$(NUL)) + @echo clean done + +clean_examples: + $(shell $(MAKE) -s -i -r -C examples -f Makefile tbb_root=.. clean >$(NUL) 2>$(NUL)) + +mkdir: + $(shell $(MD) "$(work_dir)_release" >$(NUL) 2>$(NUL)) + $(if $(subst undefined,,$(origin_build_dir)),,cd "$(work_dir)_release" && $(MAKE_TBBVARS) $(tbb_build_prefix)_release) + $(shell $(MD) "$(work_dir)_debug" >$(NUL) 2>$(NUL)) + $(if $(subst undefined,,$(origin_build_dir)),,cd "$(work_dir)_debug" && $(MAKE_TBBVARS) $(tbb_build_prefix)_debug) + +info: + @echo OS: $(tbb_os) + @echo arch=$(arch) + @echo compiler=$(compiler) + @echo runtime=$(runtime) + @echo tbb_build_prefix=$(tbb_build_prefix) + diff --git a/dep/tbb/Makefile.am b/dep/tbb/Makefile.am deleted file mode 100644 index d85e69268..000000000 --- a/dep/tbb/Makefile.am +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2005-2009 Intel Corporation. All Rights Reserved. -# -# This file is part of Threading Building Blocks. -# -# Threading Building Blocks is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License -# version 2 as published by the Free Software Foundation. -# -# Threading Building Blocks is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Threading Building Blocks; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# -# As a special exception, you may use this file as part of a free software -# library without restriction. Specifically, if other files instantiate -# templates or use macros or inline functions from this file, or you compile -# this file and link it with other files to produce an executable, this -# file does not by itself cause the resulting executable to be covered by -# the GNU General Public License. This exception does not however -# invalidate any other reasons why the executable file might be covered by -# the GNU General Public License. - -tbb_root = $(srcdir) - -include $(tbb_root)/build/common.inc - -# change these -override work_dir = $(CWD) -export work_dir -override tbb_root = $(srcdir) -export work_dir - -.PHONY: all tbb tbbmalloc - -#workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.tmp -#According to documentation submakes should run in parallel -.NOTPARALLEL: tbb tbbmalloc - -all: tbb tbbmalloc - -tbb: - $(MAKE) -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root) - -tbbmalloc: - $(MAKE) -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root) - -install-exec-local: - $(INSTALL) $(work_dir)/lib*.so* $(DESTDIR)$(libdir) - -clean-local: - -rm -f *.d *.o - -rm -f lib*.so* - -rm -f *.def *.tmp tbbvars.* - diff --git a/dep/tbb/build/version_info_winlrb.js b/dep/tbb/build/version_info_winlrb.js deleted file mode 100644 index 67f2a2920..000000000 --- a/dep/tbb/build/version_info_winlrb.js +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2005-2009 Intel Corporation. All Rights Reserved. -// -// This file is part of Threading Building Blocks. -// -// Threading Building Blocks is free software; you can redistribute it -// and/or modify it under the terms of the GNU General Public License -// version 2 as published by the Free Software Foundation. -// -// Threading Building Blocks is distributed in the hope that it will be -// useful, but WITHOUT ANY WARRANTY; without even the implied warranty -// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with Threading Building Blocks; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -// -// As a special exception, you may use this file as part of a free software -// library without restriction. Specifically, if other files instantiate -// templates or use macros or inline functions from this file, or you compile -// this file and link it with other files to produce an executable, this -// file does not by itself cause the resulting executable to be covered by -// the GNU General Public License. This exception does not however -// invalidate any other reasons why the executable file might be covered by -// the GNU General Public License. - -var WshShell = WScript.CreateObject("WScript.Shell"); - -var tmpExec; - -WScript.Echo("#define __TBB_VERSION_STRINGS \\"); - -//Getting BUILD_HOST -WScript.echo( "\"TBB: BUILD_HOST\\t\\t" + - WshShell.ExpandEnvironmentStrings("%COMPUTERNAME%") + - "\" ENDL \\" ); - -//Getting BUILD_OS -tmpExec = WshShell.Exec("cmd /c ver"); -while ( tmpExec.Status == 0 ) { - WScript.Sleep(100); -} -tmpExec.StdOut.ReadLine(); - -WScript.echo( "\"TBB: BUILD_OS\\t\\t" + - tmpExec.StdOut.ReadLine() + - "\" ENDL \\" ); - -var Unknown = "Unknown"; - -WScript.echo( "\"TBB: BUILD_KERNEL\\t" + - Unknown + - "\" ENDL \\" ); - -//Getting BUILD_COMPILER -tmpExec = WshShell.Exec("icc --version"); -while ( tmpExec.Status == 0 ) { - WScript.Sleep(100); -} -var ccVersion = tmpExec.StdErr.ReadLine(); -WScript.echo( "\"TBB: BUILD_GCC\\t" + - ccVersion + - "\" ENDL \\" ); -WScript.echo( "\"TBB: BUILD_COMPILER\\t" + - ccVersion + - "\" ENDL \\" ); - -WScript.echo( "\"TBB: BUILD_GLIBC\\t" + - Unknown + - "\" ENDL \\" ); - -WScript.echo( "\"TBB: BUILD_LD\\t" + - Unknown + - "\" ENDL \\" ); - -//Getting BUILD_TARGET -WScript.echo( "\"TBB: BUILD_TARGET\\t" + - WScript.Arguments(1) + - "\" ENDL \\" ); - -//Getting BUILD_COMMAND -WScript.echo( "\"TBB: BUILD_COMMAND\\t" + WScript.Arguments(2) + "\" ENDL" ); - -//Getting __TBB_DATETIME and __TBB_VERSION_YMD -var date = new Date(); -WScript.echo( "#define __TBB_DATETIME \"" + date.toUTCString() + "\"" ); -WScript.echo( "#define __TBB_VERSION_YMD " + date.getUTCFullYear() + ", " + - (date.getUTCMonth() > 8 ? (date.getUTCMonth()+1):("0"+(date.getUTCMonth()+1))) + - (date.getUTCDate() > 9 ? date.getUTCDate():("0"+date.getUTCDate())) ); - - diff --git a/dep/tbb/build/winlrb.cl.inc b/dep/tbb/build/winlrb.cl.inc deleted file mode 100644 index 618dba5bf..000000000 --- a/dep/tbb/build/winlrb.cl.inc +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2005-2009 Intel Corporation. All Rights Reserved. -# -# This file is part of Threading Building Blocks. -# -# Threading Building Blocks is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License -# version 2 as published by the Free Software Foundation. -# -# Threading Building Blocks is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Threading Building Blocks; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# -# As a special exception, you may use this file as part of a free software -# library without restriction. Specifically, if other files instantiate -# templates or use macros or inline functions from this file, or you compile -# this file and link it with other files to produce an executable, this -# file does not by itself cause the resulting executable to be covered by -# the GNU General Public License. This exception does not however -# invalidate any other reasons why the executable file might be covered by -# the GNU General Public License. - -include $(tbb_root)/build/windows.cl.inc - -ifeq ($(cfg), debug) - CFG_LETTER = d -else - CFG_LETTER = r -endif - -_CPLUS_FLAGS_HOST := $(CPLUS_FLAGS) /I$(LRB_INC_DIR) $(LINK_FLAGS) /LIBPATH:$(LRB_LIB_DIR) xn_host$(LRB_HOST_ARCH)$(CFG_LETTER).lib - -TEST_EXT = dll -CPLUS_FLAGS += /I$(LRB_INC_DIR) /D__LRB__ -LIB_LINK_FLAGS += /LIBPATH:$(LRB_LIB_DIR) xn_lrb$(LRB_HOST_ARCH)$(CFG_LETTER).lib -LINK_FLAGS = $(LIB_LINK_FLAGS) -OPENMP_FLAG = - -ifdef TEST_RESOURCE -LINK_FLAGS += $(TEST_RESOURCE) - -TEST_LAUNCHER_NAME = harness_lrb_host -AUX_TEST_DEPENDENCIES = $(TEST_LAUNCHER_NAME).exe - -$(TEST_LAUNCHER_NAME).exe: $(TEST_LAUNCHER_NAME).cpp - cl /Fe$@ $< $(_CPLUS_FLAGS_HOST) - -NO_LEGACY_TESTS = 1 -NO_C_TESTS = 1 -TEST_LAUNCHER= -endif # TEST_RESOURCE - -#test_model_plugin.%: -# @echo test_model_plugin is not supported for LRB architecture so far - -ifeq ($(BUILDING_PHASE),0) # examples - export RM = del /Q /F - export LIBS = -shared -lthr -z muldefs -L$(work_dir)_debug -L$(work_dir)_release - export UI = con - export x64 = 64 - export CXXFLAGS = -xR -I..\..\..\include -endif # examples diff --git a/dep/tbb/build/winlrb.icc.inc b/dep/tbb/build/winlrb.icc.inc deleted file mode 100644 index 427d06c9d..000000000 --- a/dep/tbb/build/winlrb.icc.inc +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2005-2009 Intel Corporation. All Rights Reserved. -# -# This file is part of Threading Building Blocks. -# -# Threading Building Blocks is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License -# version 2 as published by the Free Software Foundation. -# -# Threading Building Blocks is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Threading Building Blocks; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# -# As a special exception, you may use this file as part of a free software -# library without restriction. Specifically, if other files instantiate -# templates or use macros or inline functions from this file, or you compile -# this file and link it with other files to produce an executable, this -# file does not by itself cause the resulting executable to be covered by -# the GNU General Public License. This exception does not however -# invalidate any other reasons why the executable file might be covered by -# the GNU General Public License. - - -include $(tbb_root)/build/winlrb.cl.inc - -TEST_EXT = so -.PRECIOUS: %.$(TEST_EXT) - -include $(tbb_root)/build/freebsd.gcc.inc - -WARNING_KEY = -w1 -CPLUS = icpc -CONLY = icc -#LIBS = -u _read -lcprts -lthr -lc -#LIBS = -lthr -LIBS = -u _read -lcprts -lthr -limf -lc -LINK_FLAGS = -L$(LRB_LIB_DIR) $(DYLIB_KEY) -lxn$(XN_VER)_lrb64$(CFG_LETTER) -CPLUS_FLAGS += -xR $(PIC_KEY) -I$(LRB_INC_DIR) -DXENSIM -C_FLAGS = $(CPLUS_FLAGS) -LIB_LINK_FLAGS = $(LINK_FLAGS) - -ifeq ($(cfg), release) - # workaround for LRB compiler issues - CPLUS_FLAGS := $(subst -O2,-O0, $(CPLUS_FLAGS)) -endif diff --git a/dep/tbb/build/winlrb.inc b/dep/tbb/build/winlrb.inc deleted file mode 100644 index f72c66fde..000000000 --- a/dep/tbb/build/winlrb.inc +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2005-2009 Intel Corporation. All Rights Reserved. -# -# This file is part of Threading Building Blocks. -# -# Threading Building Blocks is free software; you can redistribute it -# and/or modify it under the terms of the GNU General Public License -# version 2 as published by the Free Software Foundation. -# -# Threading Building Blocks is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Threading Building Blocks; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# -# As a special exception, you may use this file as part of a free software -# library without restriction. Specifically, if other files instantiate -# templates or use macros or inline functions from this file, or you compile -# this file and link it with other files to produce an executable, this -# file does not by itself cause the resulting executable to be covered by -# the GNU General Public License. This exception does not however -# invalidate any other reasons why the executable file might be covered by -# the GNU General Public License. - -ifndef XN_VER -export LRBSDK = $(LARRABEE_CORE_LATEST) -export LRB_LIB_DIR = "$(LRBSDK)lib" -export LRB_INC_DIR = "$(LRBSDK)include" - -# Function $(wildcard pattern) does not work with paths containing spaces! -_lrb_lib = $(shell cmd /C "dir /B "$(LRBSDK)lib\libxn*_lrb64d.so") -export XN_VER = $(patsubst libxn%_lrb64d.so,%,$(_lrb_lib)) - -ifeq (1,$(NETSIM_LRB_32_OVERRIDE)) - export LRB_HOST_ARCH = 32 -else - export LRB_HOST_ARCH = 64 -endif - -export run_cmd = harness_lrb_host.exe - -export UI = con - -endif #XN_VER - -include $(tbb_root)/build/windows.inc - -ifneq (1,$(netsim)) -# Target environment is native LRB or LrbFSim - -export compiler = icc -export arch := lrb - -target_machine = $(subst -,_,$(shell icpc -dumpmachine)) -runtime = $(subst _lrb_,_,$(target_machine)) -# -dumpmachine option does not work in R9 Core SDK 5 -ifeq ($(runtime),) - runtime = x86_64_freebsd -endif -export runtime:=$(runtime)_xn$(XN_VER) - -OBJ = o -DLL = so -LIBEXT = so - -TBB.DEF = -TBB.DLL = libtbb$(DEBUG_SUFFIX).$(DLL) -TBB.LIB = $(TBB.DLL) -LINK_TBB.LIB = $(TBB.DLL) -TBB.RES = - -MALLOC.DEF := -MALLOC.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(DLL) -MALLOC.LIB = $(MALLOC.DLL) -MALLOC.RES = - -MAKE_VERSIONS = cmd /C cscript /nologo /E:jscript $(subst \,/,$(tbb_root))/build/version_info_winlrb.js $(compiler) $(arch) $(subst \,/,"$(CPLUS) $(CPLUS_FLAGS) $(INCLUDES)") > version_string.tmp -MAKE_TBBVARS = cmd /C "$(subst /,\,$(tbb_root))\build\generate_tbbvars.bat" - -ifneq (1,$(XENSIM_ENABLED)) - export run_cmd = rem -endif - -TBB_NOSTRICT = 1 - -endif # lrbfsim diff --git a/dep/tbb/src/perf/fibonacci_cutoff.cpp b/dep/tbb/src/perf/fibonacci_cutoff.cpp new file mode 100644 index 000000000..2f2f710af --- /dev/null +++ b/dep/tbb/src/perf/fibonacci_cutoff.cpp @@ -0,0 +1,134 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include +#include + +#include "tbb/task_scheduler_init.h" +#include "tbb/task.h" +#include "tbb/tick_count.h" + +long CutOff = 1; + +long SerialFib( const long n ); + +long ParallelFib( const long n ); + +inline void dump_title() { + printf("Serial/Parallel, P, N, cutoff, repetitions, time, fib, speedup\n"); +} + +inline void output(int P, long n, long c, int T, double serial_elapsed, double elapsed, long result) { + printf("%s, %d, %ld, %ld, %d, %g, %ld, %g\n", ( (P == 0) ? "Serial" : "Parallel" ), P, n, c, T, elapsed, result, serial_elapsed / elapsed); +} + +#define MOVE_BY_FOURTHS 1 +inline long calculate_new_cutoff(const long lo, const long hi) { +#if MOVE_BY_FOURTHS + return lo + (3 + hi - lo ) / 4; +#else + return (hi + lo)/2; +#endif +} + +void find_cutoff(const int P, const long n, const int T, const double serial_elapsed) { + long lo = 1, hi = n; + double elapsed = 0, lo_elapsed = 0, hi_elapsed = 0; + long final_cutoff = -1; + + tbb::task_scheduler_init init(P); + + while(true) { + CutOff = calculate_new_cutoff(lo, hi); + long result = 0; + tbb::tick_count t0; + for (int t = -1; t < T; ++t) { + if (t == 0) t0 = tbb::tick_count::now(); + result += ParallelFib(n); + } + elapsed = (tbb::tick_count::now() - t0).seconds(); + output(P,n,CutOff,T,serial_elapsed,elapsed,result); + + if (serial_elapsed / elapsed >= P/2.0) { + final_cutoff = CutOff; + if (hi == CutOff) { + if (hi == lo) { + // we have had this value at both above and below 50% + lo = 1; lo_elapsed = 0; + } else { + break; + } + } + hi = CutOff; + hi_elapsed = elapsed; + } else { + if (lo == CutOff) break; + lo = CutOff; + lo_elapsed = elapsed; + } + } + + double interpolated_cutoff = lo + ( P/2.0 - serial_elapsed/lo_elapsed ) * ( (hi - lo) / ( serial_elapsed/hi_elapsed - serial_elapsed/lo_elapsed )); + + if (final_cutoff != -1) { + printf("50%% efficiency cutoff is %ld ( linearly interpolated cutoff is %g )\n", final_cutoff, interpolated_cutoff); + } else { + printf("Cannot achieve 50%% efficiency\n"); + } + + return; +} + +int main(int argc, char *argv[]) { + if (argc < 4) { + printf("Usage: %s threads n repetitions\n",argv[0]); + return 1; + } + + dump_title(); + + int P = atoi(argv[1]); + long n = atol(argv[2]); + int T = atoi(argv[3]); + + // collect serial time + long serial_result = 0; + tbb::tick_count t0; + for (int t = -1; t < T; ++t) { + if (t == 0) t0 = tbb::tick_count::now(); + serial_result += SerialFib(n); + } + double serial_elapsed = (tbb::tick_count::now() - t0).seconds(); + output(0,n,0,T,serial_elapsed,serial_elapsed,serial_result); + + // perform search + find_cutoff(P,n,T,serial_elapsed); + + return 0; +} + diff --git a/dep/tbb/src/perf/fibonacci_impl_tbb.cpp b/dep/tbb/src/perf/fibonacci_impl_tbb.cpp new file mode 100644 index 000000000..66c6f24a0 --- /dev/null +++ b/dep/tbb/src/perf/fibonacci_impl_tbb.cpp @@ -0,0 +1,86 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include +#include + +#include "tbb/task_scheduler_init.h" +#include "tbb/task.h" +#include "tbb/tick_count.h" + +extern long CutOff; + +long SerialFib( const long n ) { + if( n<2 ) + return n; + else + return SerialFib(n-1)+SerialFib(n-2); +} + +struct FibContinuation: public tbb::task { + long* const sum; + long x, y; + FibContinuation( long* sum_ ) : sum(sum_) {} + tbb::task* execute() { + *sum = x+y; + return NULL; + } +}; + +struct FibTask: public tbb::task { + long n; + long * sum; + FibTask( const long n_, long * const sum_ ) : + n(n_), sum(sum_) + {} + tbb::task* execute() { + if( n +#include +#include +#include +#include +#include + +#include "../src/test/harness.h" + +#if __linux__ || __APPLE__ || __FreeBSD__ + #include +#endif /* __APPLE__ */ + +// The code, performance of which is to be measured, is surrounded by the StartSimpleTiming +// and StopSimpleTiming macros. It is called "target code" or "code of interest" hereafter. +// +// The target code is executed inside the nested loop. Nesting is necessary to allow +// measurements on arrays that fit cache of a particular level, while making the load +// big enough to eliminate the influence of random deviations. +// +// Macro StartSimpleTiming defines reduction variable "util::anchor", which may be modified (usually +// by adding to) by the target code. This can be necessary to prevent optimizing compilers +// from throwing out the code of interest. Besides, if the target code is complex enough, +// make sure that all its branches contribute (directly or indirectly) to the value +// being added to the "util::anchor" variable. +// +// To factor out overhead introduced by the measurement infra code it is recommended to make +// a calibration run with target code replaced by a no-op (but still modifying "sum"), and +// store the resulting time in the "util::base" variable. +// +// A generally good approach is to make the target code use elements of a preliminary +// initialized array. Then for calibration run you just need to add vector elements +// to the "sum" variable. To get rid of memory access delays make the array small +// enough to fit L2 or L1 cache (play with StartSimpleTiming arguments if necessary). +// +// Macro CalibrateSimpleTiming performs default calibration using "util::anchor += i;" operation. +// +// Macro ANCHOR_TYPE defines the type of the reduction variable. If it was not +// defined before including this header, it is defined as size_t. Depending on +// the target code modern super scalar architectures may blend reduction operation +// and instructions of interest differently for different target alternatives. So +// you may play with the type to minimize out-of-order and parallel execution impact +// on the calibration time veracity. You may even end up with different reduction +// variable types (and different calibration times) for different measurements. + + +namespace util { + +typedef std::vector durations_t; + + void trace_histogram ( const durations_t& t, char* histogramFileName ) + { + FILE* f = histogramFileName ? fopen(histogramFileName, "wt") : stdout; + size_t n = t.size(); + const size_t num_buckets = 100; + double min_val = *std::min_element(t.begin(), t.end()), + max_val = *std::max_element(t.begin(), t.end()), + bucket_size = (max_val - min_val) / num_buckets; + std::vector hist(num_buckets + 1, 0); + for ( size_t i = 0; i < n; ++i ) + ++hist[size_t((t[i]-min_val)/bucket_size)]; + fprintf (f, "Histogram: nvals = %u, min = %g, max = %g, nbuckets = %u\n", (unsigned)n, min_val, max_val, (unsigned)num_buckets); + double bucket = min_val; + for ( size_t i = 0; i <= num_buckets; ++i, bucket+=bucket_size ) + fprintf (f, "%12g\t%u\n", bucket, (unsigned)hist[i]); + fclose(f); + } + + double average ( const durations_t& d, double& variation_percent, double& std_dev_percent ) + { + durations_t t = d; + if ( t.size() > 5 ) { + t.erase(std::min_element(t.begin(), t.end())); + t.erase(std::max_element(t.begin(), t.end())); + } + size_t n = t.size(); + double sum = 0, + min_val = *std::min_element(t.begin(), t.end()), + max_val = *std::max_element(t.begin(), t.end()); + for ( size_t i = 0; i < n; ++i ) + sum += t[i]; + double avg = sum / n, + std_dev = 0; + for ( size_t i = 0; i < n; ++i ) { + double dev = fabs(t[i] - avg); + std_dev += dev * dev; + } + std_dev = sqrt(std_dev / n); + std_dev_percent = std_dev / avg * 100; + variation_percent = 100 * (max_val - min_val) / avg; + return avg; + } + + static int num_threads; + + static double base = 0, + base_dev = 0, + base_dev_percent = 0; + + static char *empty_fmt = ""; + static int rate_field_len = 11; + +#if !defined(ANCHOR_TYPE) + #define ANCHOR_TYPE size_t +#endif + + static ANCHOR_TYPE anchor = 0; + + static double sequential_time = 0; + + +#define StartSimpleTiming(nOuter, nInner) { \ + tbb::tick_count t1, t0 = tbb::tick_count::now(); \ + for ( size_t j = 0; l < nOuter; ++l ) { \ + for ( size_t i = 0; i < nInner; ++i ) { + +#define StopSimpleTiming(res) \ + } \ + util::anchor += (ANCHOR_TYPE)l; \ + } \ + t1 = tbb::tick_count::now(); \ + printf (util::empty_fmt, util::anchor); \ + res = (t1-t0).seconds() - util::base; \ +} + +#define CalibrateSimpleTiming(T, nOuter, nInner) \ + StartSimpleTiming(nOuter, nInner); \ + util::anchor += (ANCHOR_TYPE)i; \ + StopSimpleTiming(util::base); + + +#define StartTimingImpl(nRuns, nOuter, nInner) \ + tbb::tick_count t1, t0; \ + for ( size_t k = 0; k < nRuns; ++k ) { \ + t0 = tbb::tick_count::now(); \ + for ( size_t l = 0; l < nOuter; ++l ) { \ + for ( size_t i = 0; i < nInner; ++i ) { + +#define StartTiming(nRuns, nOuter, nInner) { \ + util::durations_t t_(nRuns); \ + StartTimingImpl(nRuns, nOuter, nInner) + +#define StartTimingEx(vDurations, nRuns, nOuter, nInner) { \ + util::durations_t &t_ = vDurations; \ + vDurations.resize(nRuns); \ + StartTimingImpl(nRuns, nOuter, nInner) + +#define StopTiming(Avg, StdDev, StdDevPercent) \ + } \ + util::anchor += (ANCHOR_TYPE)l; \ + } \ + t1 = tbb::tick_count::now(); \ + t_[k] = (t1 - t0).seconds()/nrep; \ + } \ + printf (util::empty_fmt, util::anchor); \ + Avg = util::average(t_, StdDev, StdDevPercent); \ +} + +#define CalibrateTiming(nRuns, nOuter, nInner) \ + StartTiming(nRuns, nOuter, nInner); \ + util::anchor += (ANCHOR_TYPE)i; \ + StopTiming(util::base, util::base_dev, util::base_dev_percent); + +} // namespace util + + +#ifndef NRUNS + #define NRUNS 7 +#endif + +#ifndef ONE_TEST_DURATION + #define ONE_TEST_DURATION 0.01 +#endif + +#define no_histogram ((char*)-1) + +inline +double RunTestImpl ( const char* title, void (*pfn)(), char* histogramFileName = no_histogram ) { + double time = 0, variation = 0, deviation = 0; + size_t nrep = 1; + for (;;) { + CalibrateTiming(NRUNS, 1, nrep); + StartTiming(NRUNS, 1, nrep); + pfn(); + StopTiming(time, variation, deviation); + time -= util::base; + if ( time > 1e-6 ) + break; + nrep *= 2; + } + nrep *= (size_t)ceil(ONE_TEST_DURATION/time); + CalibrateTiming(NRUNS, 1, nrep); // sets util::base + util::durations_t t; + StartTimingEx(t, NRUNS, 1, nrep); + pfn(); + StopTiming(time, variation, deviation); + if ( histogramFileName != (char*)-1 ) + util::trace_histogram(t, histogramFileName); + double clean_time = time - util::base; + if ( title ) { + // Deviation (in percent) is calculated for the Gross time + printf ("\n%-34s %.2e %5.1f ", title, clean_time, deviation); + if ( util::sequential_time != 0 ) + //printf ("% .2e ", clean_time - util::sequential_time); + printf ("% 10.1f ", 100*(clean_time - util::sequential_time)/util::sequential_time); + else + printf ("%*s ", util::rate_field_len, ""); + printf ("%-9u %1.6f |", (unsigned)nrep, time * nrep); + } + return clean_time; +} + + +/// Runs the test function, does statistical processing, and, if title is nonzero, prints results. +/** If histogramFileName is a string, the histogram of individual runs is generated and stored + in a file with the given name. If it is NULL then the histogram is printed on the console. + By default no histogram is generated. + The histogram format is: "rate bucket start" "number of tests in this bucket". **/ +inline +void RunTest ( const char* title_fmt, size_t workload_param, void (*pfn_test)(), char* histogramFileName = no_histogram ) { + char title[1024]; + sprintf(title, title_fmt, (long)workload_param); + RunTestImpl(title, pfn_test, histogramFileName); +} + +inline +void CalcSequentialTime ( void (*pfn)() ) { + util::sequential_time = RunTestImpl(NULL, pfn) / util::num_threads; +} + +inline +void ResetSequentialTime () { + util::sequential_time = 0; +} + + +inline void PrintTitle() { + //printf ("%-32s %-*s Std Dev,%% %-*s Repeats Gross time Infra time | NRUNS = %u", + // "Test name", util::rate_field_len, "Rate", util::rate_field_len, "Overhead", NRUNS); + printf ("%-34s %-*s Std Dev,%% Par.overhead,%% Repeats Gross time | Nruns %u, Nthreads %d", + "Test name", util::rate_field_len, "Rate", NRUNS, util::num_threads); +} + +void Test(); + +inline +int test_main( int argc, char* argv[] ) { + MinThread = 1; + MaxThread = tbb::task_scheduler_init::default_num_threads(); + ParseCommandLine( argc, argv ); + char buf[128]; + util::rate_field_len = 2 + sprintf(buf, "%.1e", 1.1); + for ( int i = MinThread; i <= MaxThread; ++i ) { + tbb::task_scheduler_init init (i); + util::num_threads = i; + PrintTitle(); + Test(); + printf("\n"); + } + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/perf/statistics.cpp b/dep/tbb/src/perf/statistics.cpp new file mode 100644 index 000000000..5edebb8ab --- /dev/null +++ b/dep/tbb/src/perf/statistics.cpp @@ -0,0 +1,408 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "statistics.h" +#include "statistics_xml.h" + +#define COUNT_PARAMETERS 3 + +#ifdef _MSC_VER +#define snprintf _snprintf +#endif + +void GetTime(char* buff,int size_buff) +{ + tm *newtime; + time_t timer; + time(&timer); + newtime=localtime(&timer); + strftime(buff,size_buff,"%H:%M:%S",newtime); +} + +void GetDate(char* buff,int size_buff) +{ + tm *newtime; + time_t timer; + time(&timer); + newtime=localtime(&timer); + strftime(buff,size_buff,"%Y-%m-%d",newtime); +} + + +StatisticsCollector::TestCase StatisticsCollector::SetTestCase(const char *name, const char *mode, int threads) +{ + string KeyName(name); + switch (SortMode) + { + case ByThreads: KeyName += Format("_%02d_%s", threads, mode); break; + default: + case ByAlg: KeyName += Format("_%s_%02d", mode, threads); break; + } + CurrentKey = Statistics[KeyName]; + if(!CurrentKey) { + CurrentKey = new StatisticResults; + CurrentKey->Mode = mode; + CurrentKey->Name = name; + CurrentKey->Threads = threads; + CurrentKey->Results.reserve(RoundTitles.size()); + Statistics[KeyName] = CurrentKey; + } + return TestCase(CurrentKey); +} + +StatisticsCollector::~StatisticsCollector() +{ + for(Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + delete i->second; +} + +void StatisticsCollector::ReserveRounds(size_t index) +{ + size_t i = RoundTitles.size(); + if (i > index) return; + char buf[16]; + RoundTitles.resize(index+1); + for(; i <= index; i++) { + snprintf( buf, 15, "%u", unsigned(i+1) ); + RoundTitles[i] = buf; + } + for(Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) { + if(!i->second) printf("!!!'%s' = NULL\n", i->first.c_str()); + else i->second->Results.reserve(index+1); + } +} + +void StatisticsCollector::AddRoundResult(const TestCase &key, value_t v) +{ + ReserveRounds(key.access->Results.size()); + key.access->Results.push_back(v); +} + +void StatisticsCollector::SetRoundTitle(size_t index, const char *fmt, ...) +{ + vargf2buff(buff, 128, fmt); + ReserveRounds(index); + RoundTitles[index] = buff; +} + +void StatisticsCollector::AddStatisticValue(const TestCase &key, const char *type, const char *fmt, ...) +{ + vargf2buff(buff, 128, fmt); + AnalysisTitles.insert(type); + key.access->Analysis[type] = buff; +} + +void StatisticsCollector::AddStatisticValue(const char *type, const char *fmt, ...) +{ + vargf2buff(buff, 128, fmt); + AnalysisTitles.insert(type); + CurrentKey->Analysis[type] = buff; +} + +void StatisticsCollector::SetStatisticFormula(const char *name, const char *formula) +{ + Formulas[name] = formula; +} + +void StatisticsCollector::SetTitle(const char *fmt, ...) +{ + vargf2buff(buff, 256, fmt); + Title = buff; +} + +string ExcelFormula(const string &fmt, size_t place, size_t rounds, bool is_horizontal) +{ + char buff[16]; + if(is_horizontal) + snprintf(buff, 15, "RC[%u]:RC[%u]", unsigned(place), unsigned(place+rounds-1)); + else + snprintf(buff, 15, "R[%u]C:R[%u]C", unsigned(place+1), unsigned(place+rounds)); + string result(fmt); size_t pos = 0; + while ( (pos = result.find("ROUNDS", pos, 6)) != string::npos ) + result.replace(pos, 6, buff); + return result; +} + +void StatisticsCollector::Print(int dataOutput, const char *ModeName) +{ + FILE *OutputFile; + if (dataOutput & StatisticsCollector::Stdout) + { + printf("\n-=# %s #=-\n", Title.c_str()); + if(SortMode == ByThreads) + printf(" Name | # | %s ", ModeName); + else + printf(" Name | %s | # ", ModeName); + for (AnalysisTitles_t::iterator i = AnalysisTitles.begin(); i != AnalysisTitles.end(); i++) + printf("|%s", i->c_str()+1); + + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + { + if(SortMode == ByThreads) + printf("\n%12s|% 5d|%6s", i->second->Name.c_str(), i->second->Threads, i->second->Mode.c_str()); + else + printf("\n%12s|%6s|% 5d", i->second->Name.c_str(), i->second->Mode.c_str(), i->second->Threads); + Analysis_t &analisis = i->second->Analysis; + AnalysisTitles_t::iterator t = AnalysisTitles.begin(); + for (Analysis_t::iterator a = analisis.begin(); a != analisis.end(); t++) + { + char fmt[8]; snprintf(fmt, 7, "|%% %us", unsigned(max(size_t(3), t->size()))); + if(*t != a->first) + printf(fmt, ""); + else { + printf(fmt, a->second.c_str()); a++; + } + } + } + printf("\n"); + } + if (dataOutput & StatisticsCollector::HTMLFile) + { + if ((OutputFile = fopen((Name+".html").c_str(), "w+t")) != NULL) + { + char TimerBuff[100], DateBuff[100]; + GetTime(TimerBuff,sizeof(TimerBuff)); + GetDate(DateBuff,sizeof(DateBuff)); + fprintf(OutputFile, "\n%s\n\n", Title.c_str()); + //----------------------- + fprintf(OutputFile, "\n"); + fprintf(OutputFile, "" + "", + DateBuff, TimerBuff, unsigned(AnalysisTitles.size() + RoundTitles.size()), Title.c_str()); + fprintf(OutputFile, "\n", ModeName); + for (AnalysisTitles_t::iterator i = AnalysisTitles.begin(); i != AnalysisTitles.end(); i++) + fprintf(OutputFile, "", i->c_str()+1); + for (size_t i = 0; i < RoundTitles.size(); i++) + fprintf(OutputFile, "", RoundTitles[i].c_str()); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + { + fprintf(OutputFile, "\n", + i->second->Name.c_str(), i->second->Threads, i->second->Mode.c_str()); + //statistics + AnalysisTitles_t::iterator t = AnalysisTitles.begin(); + for (Analysis_t::iterator j = i->second->Analysis.begin(); j != i->second->Analysis.end(); t++) + { + fprintf(OutputFile, "", (*t != j->first)?" ":(i->second->Analysis[j->first]).c_str()); + if(*t == j->first) j++; + } + //data + Results_t &r = i->second->Results; + for (size_t k = 0; k < r.size(); k++) + { + fprintf(OutputFile, ""); + } + } + fprintf(OutputFile, "\n
Flip[H]%s%s%s
NameThreads%s%s%s
%s%d%4s%s"); + fprintf(OutputFile, ResultsFmt, r[k]); + fprintf(OutputFile, "
\n"); + ////////////////////////////////////////////////////// + fprintf(OutputFile, "\n"); + fprintf(OutputFile, "\n" + "", + DateBuff, TimerBuff, unsigned(max(Statistics.size()-2,size_t(1))), Title.c_str()); + + fprintf(OutputFile, "\n"); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, "", i->second->Name.c_str()); + fprintf(OutputFile, "\n"); + for (Statistics_t::iterator n = Statistics.begin(); n != Statistics.end(); n++) + fprintf(OutputFile, "", n->second->Threads); + fprintf(OutputFile, "\n", ModeName); + for (Statistics_t::iterator m = Statistics.begin(); m != Statistics.end(); m++) + fprintf(OutputFile, "", m->second->Mode.c_str()); + + for (AnalysisTitles_t::iterator t = AnalysisTitles.begin(); t != AnalysisTitles.end(); t++) + { + fprintf(OutputFile, "\n", t->c_str()+1); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, "", i->second->Analysis.count(*t)?i->second->Analysis[*t].c_str():" "); + } + + for (size_t r = 0; r < RoundTitles.size(); r++) + { + fprintf(OutputFile, "\n", RoundTitles[r].c_str()); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + { + Results_t &result = i->second->Results; + fprintf(OutputFile, ""); + } + } + fprintf(OutputFile, "\n
Flip[V]%s%s%s
Name%s
Threads%d
%s%s
%s%s
%s"); + if(result.size() > r) + fprintf(OutputFile, ResultsFmt, result[r]); + fprintf(OutputFile, "
\n\n"); + fclose(OutputFile); + } + } + if (dataOutput & StatisticsCollector::ExcelXML) + { + if ((OutputFile = fopen((Name+".xml").c_str(), "w+t")) == NULL) { + printf("Can't open .xml file\n"); + } else { + //vector *TmpVect; + //Statistics_t::iterator ii, i = Statistics.begin(); + //Analysis_t::iterator jj, j = i->second.Analysis.begin(); + char UserName[100]; + char SheetName[20]; + char TimerBuff[100], DateBuff[100]; +#if _WIN32 || _WIN64 + strcpy(UserName,getenv("USERNAME")); +#else + strcpy(UserName,getenv("USER")); +#endif + //-------------------------------- + strcpy(SheetName,"Horizontal"); + GetTime(TimerBuff,sizeof(TimerBuff)); + GetDate(DateBuff,sizeof(DateBuff)); + //-------------------------- + fprintf(OutputFile, XMLHead, UserName, TimerBuff); + fprintf(OutputFile, XMLStyles); + fprintf(OutputFile, XMLBeginSheet, SheetName); + fprintf(OutputFile, XMLNames,1,1,1,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS)); + fprintf(OutputFile, XMLBeginTable, int(RoundTitles.size()+Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+1/*title*/), int(Statistics.size()+1)); + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLCellTopName); + fprintf(OutputFile, XMLCellTopThread); + fprintf(OutputFile, XMLCellTopMode, ModeName); + for (AnalysisTitles_t::iterator j = AnalysisTitles.begin(); j != AnalysisTitles.end(); j++) + fprintf(OutputFile, XMLAnalysisTitle, j->c_str()+1); + for (Formulas_t::iterator j = Formulas.begin(); j != Formulas.end(); j++) + fprintf(OutputFile, XMLAnalysisTitle, j->first.c_str()+1); + for (RoundTitles_t::iterator j = RoundTitles.begin(); j != RoundTitles.end(); j++) + fprintf(OutputFile, XMLAnalysisTitle, j->c_str()); + fprintf(OutputFile, XMLCellEmptyWhite, Title.c_str()); + fprintf(OutputFile, XMLERow); + //------------------------ + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + { + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLCellName, i->second->Name.c_str()); + fprintf(OutputFile, XMLCellThread,i->second->Threads); + fprintf(OutputFile, XMLCellMode, i->second->Mode.c_str()); + //statistics + AnalysisTitles_t::iterator at = AnalysisTitles.begin(); + for (Analysis_t::iterator j = i->second->Analysis.begin(); j != i->second->Analysis.end(); at++) + { + fprintf(OutputFile, XMLCellAnalysis, (*at != j->first)?"":(i->second->Analysis[j->first]).c_str()); + if(*at == j->first) j++; + } + //formulas + size_t place = 0; + Results_t &v = i->second->Results; + for (Formulas_t::iterator f = Formulas.begin(); f != Formulas.end(); f++, place++) + fprintf(OutputFile, XMLCellFormula, ExcelFormula(f->second, Formulas.size()-place, v.size(), true).c_str()); + //data + for (size_t k = 0; k < v.size(); k++) + { + fprintf(OutputFile, XMLCellData, v[k]); + } + if(v.size() < RoundTitles.size()) + fprintf(OutputFile, XMLMergeRow, int(RoundTitles.size() - v.size())); + fprintf(OutputFile, XMLERow); + } + //------------------------ + fprintf(OutputFile, XMLEndTable); + fprintf(OutputFile, XMLWorkSheetProperties,1,1,3,3,int(RoundTitles.size()+AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS)); + fprintf(OutputFile, XMLAutoFilter,1,1,1,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS)); + fprintf(OutputFile, XMLEndWorkSheet); + //---------------------------------------- + strcpy(SheetName,"Vertical"); + fprintf(OutputFile, XMLBeginSheet, SheetName); + fprintf(OutputFile, XMLNames, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),2,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS+2),int(Statistics.size()+1)); + fprintf(OutputFile, XMLBeginTable, int(max(Statistics.size()+1, size_t(7))), int(RoundTitles.size()+AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS+2)); + //fprintf(OutputFile, XMLColumsVerticalTable, Statistics.size()+1); + //---------------------------------------- + + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLNameAndTime, Name.c_str(), TimerBuff, DateBuff); + fprintf(OutputFile, XMLTableParamAndTitle, int(Statistics.size()), int(AnalysisTitles.size()), int(RoundTitles.size()), Title.c_str()); + fprintf(OutputFile, XMLERow); + fprintf(OutputFile, XMLBRow); + //------------------- + fprintf(OutputFile, XMLCellTopName); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, XMLCellName, i->second->Name.c_str()); + fprintf(OutputFile, XMLERow); + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLCellTopThread); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, XMLCellThread, i->second->Threads); + fprintf(OutputFile, XMLERow); + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLCellTopMode, ModeName); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, XMLCellMode, i->second->Mode.c_str()); + fprintf(OutputFile, XMLERow); + //----------------- + for (AnalysisTitles_t::iterator t = AnalysisTitles.begin(); t != AnalysisTitles.end(); t++) + { + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLAnalysisTitle, t->c_str()+1); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, XMLCellAnalysis, i->second->Analysis.count(*t)?(i->second->Analysis[*t]).c_str():""); + fprintf(OutputFile, XMLERow); + } + //------------------------------------- + for (Formulas_t::iterator t = Formulas.begin(); t != Formulas.end(); t++) + { + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLAnalysisTitle, t->first.c_str()+1); + size_t place = 0; + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + fprintf(OutputFile, XMLCellAnalysis, ExcelFormula(t->second, Formulas.size()-place, i->second->Results.size(), false).c_str()); + fprintf(OutputFile, XMLERow); + } + //-------------------------------------- + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLCellEmptyWhite, "Result"); + fprintf(OutputFile, XMLERow); + + for (size_t k = 0; k < RoundTitles.size(); k++) + { + fprintf(OutputFile, XMLBRow); + fprintf(OutputFile, XMLAnalysisTitle, RoundTitles[k].c_str()); + for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) + if(i->second->Results.size() > k) + fprintf(OutputFile, XMLCellData, i->second->Results[k]); + else + fprintf(OutputFile, XMLCellEmptyWhite, ""); + fprintf(OutputFile, XMLERow); + } + fprintf(OutputFile, XMLEndTable); + //---------------------------------------- + fprintf(OutputFile, XMLWorkSheetProperties, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2), int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),1,1,6); + fprintf(OutputFile, XMLAutoFilter, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),2, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2), int(Statistics.size()+1)); + //---------------------------------------- + fprintf(OutputFile, XMLEndWorkSheet); + fprintf(OutputFile, XMLEndWorkbook); + fclose(OutputFile); + } + } +} diff --git a/dep/tbb/src/perf/statistics.h b/dep/tbb/src/perf/statistics.h new file mode 100644 index 000000000..3066190d0 --- /dev/null +++ b/dep/tbb/src/perf/statistics.h @@ -0,0 +1,188 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +// Internal Intel tool + +#ifndef __STATISTICS_H__ +#define __STATISTICS_H__ + +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +typedef double value_t; + +/* + Statistical collector class. + + Resulting table output: + +---------------------------------------------------------------------------+ + | [Date] ... | + +----------+----v----+--v---+----------------+------------+-..-+------------+ + | TestName | Threads | Mode | Rounds results | Stat_type1 | .. | Stat_typeN | + +----------+---------+------+-+-+-+-..-+-+-+-+------------+-..-+------------+ + | | | | | | | .. | | | | | | | + .. ... ... .................. ...... .. + | | | | | | | .. | | | | | | | + +----------+---------+------+-+-+-+-..-+-+-+-+------------+-..-+------------+ + + Iterating table output: + +---------------------------------------------------------------------------+ + | [Date] <TestName>, Threads: <N>, Mode: <M>; for <Title>... | + +----------+----v----+--v---+----------------+------------+-..-+------------+ + +*/ + +class StatisticsCollector +{ +public: + typedef map<string, string> Analysis_t; + typedef vector<value_t> Results_t; + +protected: + StatisticsCollector(const StatisticsCollector &); + + struct StatisticResults + { + string Name; + string Mode; + int Threads; + Results_t Results; + Analysis_t Analysis; + }; + + // internal members + //bool OpenFile; + StatisticResults *CurrentKey; + string Title; + const char /**Name,*/ *ResultsFmt; + string Name; + //! Data + typedef map<string, StatisticResults*> Statistics_t; + Statistics_t Statistics; + typedef vector<string> RoundTitles_t; + RoundTitles_t RoundTitles; + //TODO: merge those into one structure + typedef map<string, string> Formulas_t; + Formulas_t Formulas; + typedef set<string> AnalysisTitles_t; + AnalysisTitles_t AnalysisTitles; + +public: + struct TestCase { + StatisticResults *access; + TestCase() : access(0) {} + TestCase(StatisticResults *link) : access(link) {} + const char *getName() const { return access->Name.c_str(); } + const char *getMode() const { return access->Mode.c_str(); } + int getThreads() const { return access->Threads; } + const Results_t &getResults() const { return access->Results; } + const Analysis_t &getAnalysis() const { return access->Analysis; } + }; + + enum Sorting { + ByThreads, ByAlg + }; + + //! Data and output types + enum DataOutput { + // Verbosity level enumeration + Statistic = 1, //< Analytical data - computed after all iterations and rounds passed + Result = 2, //< Testing data - collected after all iterations passed + Iteration = 3, //< Verbose data - collected at each iteration (for each size - in case of containers) + // ExtraVerbose is not applicabe yet :) be happy, but flexibility is always welcome + + // Next constants are bit-fields + Stdout = 1<<8, //< Output to the console + TextFile = 1<<9, //< Output to plain text file "name.txt" (delimiter is TAB by default) + ExcelXML = 1<<10, //< Output to Excel-readable XML-file "name.xml" + HTMLFile = 1<<11 //< Output to HTML file "name.html" + }; + + //! Constructor. Specify tests set name which used as name of output files + StatisticsCollector(const char *name, Sorting mode = ByThreads, const char *fmt = "%g") + : CurrentKey(NULL), ResultsFmt(fmt), Name(name), SortMode(mode) {} + + ~StatisticsCollector(); + + //! Set tests set title, supporting printf-like arguments + void SetTitle(const char *fmt, ...); + + //! Specify next test key + TestCase SetTestCase(const char *name, const char *mode, int threads); + //! Specify next test key + void SetTestCase(const TestCase &t) { SetTestCase(t.getName(), t.getMode(), t.getThreads()); } + //! Reserve specified number of rounds. Use for effeciency. Used mostly internally + void ReserveRounds(size_t index); + //! Add result of the measure + void AddRoundResult(const TestCase &, value_t v); + //! Add result of the current measure + void AddRoundResult(value_t v) { if(CurrentKey) AddRoundResult(TestCase(CurrentKey), v); } + //! Add title of round + void SetRoundTitle(size_t index, const char *fmt, ...); + //! Add numbered title of round + void SetRoundTitle(size_t index, int num) { SetRoundTitle(index, "%d", num); } + //! Get number of rounds + size_t GetRoundsCount() const { return RoundTitles.size(); } + // Set statistic value for the test + void AddStatisticValue(const TestCase &, const char *type, const char *fmt, ...); + // Set statistic value for the current test + void AddStatisticValue(const char *type, const char *fmt, ...); + //! Add Excel-processing formulas. @arg formula can contain more than one instances of + //! ROUNDS template which transforms into the range of cells with result values + //TODO://! #1 .. #n templates represent data cells from the first to the last + //TODO: merge with Analisis + void SetStatisticFormula(const char *name, const char *formula); + + //! Data output + void Print(int dataOutput, const char *ModeName = "Mode"); + +private: + Sorting SortMode; +}; + +//! using: Func(const char *fmt, ...) { vargf2buff(buff, 128, fmt);... +#define vargf2buff(name, size, fmt) char name[size]; memset(name, 0, size); va_list args; va_start(args, fmt); vsnprintf( name, size-1, fmt, args) + +inline std::string Format(const char *fmt, ...) { + vargf2buff(buf, 1024, fmt); // from statistics.h + return std::string(buf); +} + +#ifdef STATISTICS_INLINE +#include "statistics.cpp" +#endif +#endif //__STATISTICS_H__ diff --git a/dep/tbb/src/perf/statistics_xml.h b/dep/tbb/src/perf/statistics_xml.h new file mode 100644 index 000000000..7521825be --- /dev/null +++ b/dep/tbb/src/perf/statistics_xml.h @@ -0,0 +1,208 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +const char XMLBRow[]= +" <Row>\n"; + +const char XMLERow[]= +" </Row>\n"; + +const char XMLHead[]= +"<?xml version=\"1.0\"?>\n" +"<?mso-application progid=\"Excel.Sheet\"?>\n\ +<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\ + xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n\ + xmlns:x=\"urn:schemas-microsoft-com:office:excel\"\n\ + xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\ + xmlns:html=\"http://www.w3.org/TR/REC-html40\">\n\ + <DocumentProperties xmlns=\"urn:schemas-microsoft-com:office:office\">\n\ + <Author>%s</Author>\n\ + <Created>%s</Created>\n\ + <Company>Intel Corporation</Company>\n\ + </DocumentProperties>\n\ + <ExcelWorkbook xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\ + <RefModeR1C1/>\n\ + </ExcelWorkbook>\n"; + + const char XMLStyles[]= + " <Styles>\n\ + <Style ss:ID=\"Default\" ss:Name=\"Normal\">\n\ + <Alignment ss:Vertical=\"Bottom\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\ + </Style>\n\ + <Style ss:ID=\"s26\">\n\ + <Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\ + <Borders>\n\ + <Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + </Borders>\n\ + <Interior ss:Color=\"#FFFF99\" ss:Pattern=\"Solid\"/>\n\ + </Style>\n\ + <Style ss:ID=\"s25\">\n\ + <Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\ + <Borders>\n\ + <Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + </Borders>\n\ + <Interior ss:Color=\"#CCFFFF\" ss:Pattern=\"Solid\"/>\n\ + </Style>\n\ + <Style ss:ID=\"s24\">\n\ + <Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\ + <Borders>\n\ + <Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + </Borders>\n\ + <Interior ss:Color=\"#CCFFCC\" ss:Pattern=\"Solid\"/>\n\ + </Style>\n\ + <Style ss:ID=\"s23\">\n\ + <Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\ + <Borders>\n\ + <Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + <Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\ + </Borders>\n\ + </Style>\n\ + </Styles>\n"; + +const char XMLBeginSheet[]= +" <Worksheet ss:Name=\"%s\">\n"; + +const char XMLNames[]= +" <Names>\n\ + <NamedRange ss:Name=\"_FilterDatabase\" ss:RefersTo=\"R%dC%d:R%dC%d\" ss:Hidden=\"1\"/>\n\ + </Names>\n"; + +const char XMLBeginTable[]= +" <Table ss:ExpandedColumnCount=\"%d\" ss:ExpandedRowCount=\"%d\" x:FullColumns=\"1\"\n\ + x:FullRows=\"1\">\n"; + +const char XMLColumsHorizontalTable[]= +" <Column ss:Index=\"1\" ss:Width=\"108.75\"/>\n\ + <Column ss:Index=\"%d\" ss:Width=\"77.25\" ss:Span=\"%d\"/>\n"; + +const char XMLColumsVerticalTable[]= +" <Column ss:Index=\"1\" ss:Width=\"77.25\" ss:Span=\"%d\"/>\n"; + +const char XMLNameAndTime[]= +" <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n\ + <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n\ + <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLTableParamAndTitle[]= +" <Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\ + <Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\ + <Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\ + <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +//-------------- +const char XMLCellTopName[]= +" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">Name</Data></Cell>\n"; +const char XMLCellTopThread[]= +" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">Threads</Data></Cell>\n"; +const char XMLCellTopMode[]= +" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">%s</Data></Cell>\n"; +//--------------------- +const char XMLAnalysisTitle[]= +" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLCellName[]= +" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLCellThread[]= +" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"Number\">%d</Data></Cell>\n"; + +const char XMLCellMode[]= +" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLCellAnalysis[]= +" <Cell ss:StyleID=\"s26\"><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLCellFormula[]= +" <Cell ss:StyleID=\"s26\" ss:Formula=\"%s\"><Data ss:Type=\"Number\"></Data></Cell>\n"; + +const char XMLCellData[]= +" <Cell ss:StyleID=\"s23\"><Data ss:Type=\"Number\">%g</Data></Cell>\n"; + +const char XMLMergeRow[]= +" <Cell ss:StyleID=\"s23\" ss:MergeAcross=\"%d\" ><Data ss:Type=\"String\"></Data></Cell>\n"; + +const char XMLCellEmptyWhite[]= +" <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n"; + +const char XMLCellEmptyTitle[]= +" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\"></Data></Cell>\n"; + +const char XMLEndTable[]= +" </Table>\n"; + +const char XMLAutoFilter[]= +" <AutoFilter x:Range=\"R%dC%d:R%dC%d\" xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\ + </AutoFilter>\n"; + +const char XMLEndWorkSheet[]= + " </Worksheet>\n"; + +const char XMLWorkSheetProperties[]= +" <WorksheetOptions xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\ + <Unsynced/>\n\ + <Selected/>\n\ + <FreezePanes/>\n\ + <FrozenNoSplit/>\n\ + <SplitHorizontal>%d</SplitHorizontal>\n\ + <TopRowBottomPane>%d</TopRowBottomPane>\n\ + <SplitVertical>%d</SplitVertical>\n\ + <LeftColumnRightPane>%d</LeftColumnRightPane>\n\ + <ActivePane>0</ActivePane>\n\ + <Panes>\n\ + <Pane>\n\ + <Number>3</Number>\n\ + </Pane>\n\ + <Pane>\n\ + <Number>1</Number>\n\ + </Pane>\n\ + <Pane>\n\ + <Number>2</Number>\n\ + </Pane>\n\ + <Pane>\n\ + <Number>0</Number>\n\ + <ActiveRow>0</ActiveRow>\n\ + <ActiveCol>%d</ActiveCol>\n\ + </Pane>\n\ + </Panes>\n\ + <ProtectObjects>False</ProtectObjects>\n\ + <ProtectScenarios>False</ProtectScenarios>\n\ + </WorksheetOptions>\n"; + +const char XMLEndWorkbook[]= + "</Workbook>\n"; diff --git a/dep/tbb/src/perf/time_base.cpp b/dep/tbb/src/perf/time_base.cpp new file mode 100644 index 000000000..78cbef2e1 --- /dev/null +++ b/dep/tbb/src/perf/time_base.cpp @@ -0,0 +1,262 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "tbb/blocked_range.h" +#include "tbb/parallel_for.h" +#include "tbb/parallel_reduce.h" + + +#define NRUNS 10 +#define ONE_TEST_DURATION 0.01 + +#include "perf_util.h" + + +#define NUM_CHILD_TASKS 128 +#define NUM_ROOT_TASKS 16 + +#define N 1000000 +#define FINE_GRAIN 50 +#define MED_GRAIN 500 +#define COARSE_GRAIN 10000 + + +typedef ANCHOR_TYPE count_type; +typedef tbb::blocked_range<count_type> range_type; + +const count_type NUM_leaf_tasks = NUM_CHILD_TASKS * NUM_ROOT_TASKS; + +const count_type N_finest = (count_type)(N/log((double)N)/10); +const count_type N_fine = N_finest * 10; + + +class static_task_holder { +public: + tbb::task *my_simple_leaf_task_ptr; + + static_task_holder (); +}; + +static static_task_holder s_tasks; + + +static size_t s_num_iterations = 0; + + +class simple_leaf_task : public tbb::task +{ + task* execute () { + for ( size_t i=0; i < s_num_iterations; ++i ) + util::anchor += i; + //util::anchor += size_t(log10((double)util::anchor)*10); + return NULL; + } +}; + +class simple_root_task : public tbb::task +{ + task* execute () { + set_ref_count(NUM_leaf_tasks + 1); + for ( size_t i = 0; i < NUM_leaf_tasks; ++i ) { + simple_leaf_task &t = *new( allocate_child() ) simple_leaf_task; + spawn(t); + } + wait_for_all(); + return NULL; + } +}; + +void Work1 () { + for ( size_t i=0; i < NUM_leaf_tasks; ++i ) + s_tasks.my_simple_leaf_task_ptr->execute(); +} + +void Test1_1 () { + tbb::empty_task &r = *new( tbb::task::allocate_root() ) tbb::empty_task; + r.set_ref_count(NUM_leaf_tasks + 1); + for ( size_t i = 0; i < NUM_leaf_tasks; ++i ) { + simple_leaf_task &t = *new( r.allocate_child() ) simple_leaf_task; + r.spawn(t); + } + r.wait_for_all(); + r.destroy(r); +} + +void Test1_2 () +{ + simple_root_task &r = *new( tbb::task::allocate_root() ) simple_root_task; + tbb::task::spawn_root_and_wait(r); +} + + +class children_launcher_task : public tbb::task +{ + task* execute () { + set_ref_count(NUM_CHILD_TASKS + 1); + for ( size_t i = 0; i < NUM_CHILD_TASKS; ++i ) { + simple_leaf_task &t = *new( allocate_child() ) simple_leaf_task; + spawn(t); + } + wait_for_all(); + return NULL; + } +}; + +class root_launcher_task : public tbb::task +{ + task* execute () { + children_launcher_task &r = *new( allocate_root() ) children_launcher_task; + spawn_root_and_wait(r); + return NULL; + } +}; + +class hierarchy_root_task : public tbb::task +{ + task* execute () { + tbb::task_list tl; + for ( size_t i = 0; i < NUM_ROOT_TASKS; ++i ) { + root_launcher_task &r = *new( allocate_root() ) root_launcher_task; + tl.push_back(r); + } + spawn_root_and_wait(tl); + return NULL; + } +}; + +void Test1_3 () +{ + hierarchy_root_task &r = *new( tbb::task::allocate_root() ) hierarchy_root_task; + tbb::task::spawn_root_and_wait(r); +} + + +static size_t s_range = N, + s_grain = 1; + +class simple_pfor_body { +public: + void operator()( const range_type& r ) const { + count_type end = r.end(); + for( count_type i = r.begin(); i < end; ++i ) + util::anchor += i; + } +}; + +void Work2 () { + simple_pfor_body body; + range_type range(0, s_range, s_grain); + body(range); +} + +void Test2 () { + tbb::parallel_for( range_type(0, s_range, s_grain), simple_pfor_body() ); +} + +void Test2_0 () { + volatile count_type zero = 0; + tbb::parallel_for( range_type(0, zero, 1), simple_pfor_body() ); +} + + +class simple_preduce_body { +public: + count_type my_sum; + simple_preduce_body () : my_sum(0) {} + simple_preduce_body ( simple_preduce_body&, tbb::split ) : my_sum(0) {} + void join( simple_preduce_body& rhs ) { my_sum += rhs.my_sum;} + void operator()( const range_type& r ) { + count_type end = r.end(); + for( count_type i = r.begin(); i < end; ++i ) + util::anchor += i; + my_sum = util::anchor; + } +}; + +void Work3 () { + simple_preduce_body body; + range_type range(0, s_range, s_grain); + body(range); +} + +void Test3 () { + simple_preduce_body body; + tbb::parallel_reduce( range_type(0, s_range, s_grain), body ); +} + +void Test3_0 () { + volatile count_type zero = 0; + simple_preduce_body body; + tbb::parallel_reduce( range_type(0, zero, 1), body ); +} + + +static_task_holder::static_task_holder () { + static simple_leaf_task s_t1; + my_simple_leaf_task_ptr = &s_t1; +} + +void Test () { + const size_t num_task_tree_workloads = 4; + size_t task_tree_workloads[num_task_tree_workloads] = {0, 50, 500, 10000}; + for (size_t i = 0; i < num_task_tree_workloads; ++i ) { + size_t n = task_tree_workloads[i]; + s_num_iterations = n; + CalcSequentialTime(Work1); + RunTest ("Bunch of leaves: %d adds/task", n, Test1_1); + RunTest ("Simple task tree: %d adds/task", n, Test1_2); + RunTest ("Complex task tree: %d adds/task", n, Test1_3); + } + + // Using N_fine constant in the body of this function results in incorrect code + // generation by icl 10.1.014 + const size_t num_alg_workloads = 4; + size_t alg_ranges[num_alg_workloads] = {N_fine/10, N_fine, N, N}; + size_t alg_grains[num_alg_workloads] = {1, FINE_GRAIN, MED_GRAIN, COARSE_GRAIN}; + + //RunTest ("Empty pfor", 0, Test2_0); + for (size_t i = 0; i < num_alg_workloads; ++i ) { + s_range = alg_ranges[i]; + s_grain = alg_grains[i]; + CalcSequentialTime(Work2); + RunTest ("pfor: %d adds/body", s_grain, Test2); + } + + //RunTest ("Empty preduce", Test3_0); + for (size_t i = 0; i < num_alg_workloads; ++i ) { + s_range = alg_ranges[i]; + s_grain = alg_grains[i]; + CalcSequentialTime(Work3); + RunTest ("preduce: %d adds/body", s_grain, Test3); + } +} + +int main( int argc, char* argv[] ) { + test_main(argc, argv); + return 0; +} diff --git a/dep/tbb/src/perf/time_framework.h b/dep/tbb/src/perf/time_framework.h new file mode 100644 index 000000000..d301c2b3d --- /dev/null +++ b/dep/tbb/src/perf/time_framework.h @@ -0,0 +1,343 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#ifndef __TIME_FRAMEWORK_H__ +#define __TIME_FRAMEWORK_H__ + +#include <cstdlib> +#include <math.h> +#include <vector> +#include <string> +#include <sstream> +#include "tbb/tbb_stddef.h" +#include "tbb/task_scheduler_init.h" +#include "tbb/tick_count.h" +#include "../test/harness.h" +#include "../test/harness_barrier.h" +#define STATISTICS_INLINE +#include "statistics.h" + +#ifndef ARG_TYPE +typedef intptr_t arg_t; +#else +typedef ARG_TYPE arg_t; +#endif + +class Timer { + tbb::tick_count tick; +public: + Timer() { tick = tbb::tick_count::now(); } + double get_time() { return (tbb::tick_count::now() - tick).seconds(); } + double diff_time(const Timer &newer) { return (newer.tick - tick).seconds(); } + double mark_time() { tbb::tick_count t1(tbb::tick_count::now()), t2(tick); tick = t1; return (t1 - t2).seconds(); } + double mark_time(const Timer &newer) { tbb::tick_count t(tick); tick = newer.tick; return (tick - t).seconds(); } +}; + +class TesterBase /*: public tbb::internal::no_copy*/ { +protected: + friend class TestProcessor; + friend class TestRunner; + + //! it is barrier for synchronizing between threads + Harness::SpinBarrier *barrier; + + //! number of tests per this tester + const int tests_count; + + //! number of threads to operate + int threads_count; + + //! some value for tester + arg_t value; + + // avoid false sharing + char pad[128 - sizeof(arg_t) - sizeof(int)*2 - sizeof(void*) ]; + +public: + //! init tester base. @arg ntests is number of embeded tests in this tester. + TesterBase(int ntests) + : barrier(NULL), tests_count(ntests) + {} + virtual ~TesterBase() {} + + //! internal function + void base_init(arg_t v, int t, Harness::SpinBarrier &b) { + threads_count = t; + barrier = &b; + value = v; + init(); + } + + //! optionally override to init after value and threads count were set. + virtual void init() { } + + //! Override to provide your names + virtual std::string get_name(int testn) { + return Format("test %d", testn); + } + + //! optionally override to init test mode just before execution for a given thread number. + virtual void test_prefix(int testn, int threadn) { } + + //! Override to provide main test's entry function returns a value to record + virtual value_t test(int testn, int threadn) = 0; + + //! Type of aggregation from results of threads + enum result_t { + SUM, AVG, MIN, MAX + }; + + //! Override to change result type for the test. Return postfix for test name or 0 if result type is not needed. + virtual const char *get_result_type(int /*testn*/, result_t type) const { + return type == AVG ? "" : 0; // only average result by default + } +}; + +/***** +a user's tester concept: + +class tester: public TesterBase { +public: + //! init tester with known amount of work + tester() : TesterBase(<user-specified tests count>) { ... } + + //! run a test with sequental number @arg test_number for @arg thread. + / *override* / value_t test(int test_number, int thread); +}; + +******/ + +template<typename Tester, int scale = 1> +class TimeTest : public Tester { + /*override*/ value_t test(int testn, int threadn) { + Timer timer; + Tester::test(testn, threadn); + return timer.get_time() * double(scale); + } +}; + +template<typename Tester> +class NanosecPerValue : public Tester { + /*override*/ value_t test(int testn, int threadn) { + Timer timer; + Tester::test(testn, threadn); + // return time (ns) per value + return timer.get_time()*1000000.0/double(Tester::value); + } +}; + +template<typename Tester, int scale = 1> +class ValuePerSecond : public Tester { + /*override*/ value_t test(int testn, int threadn) { + Timer timer; + Tester::test(testn, threadn); + // return time value per seconds/scale + return double(Tester::value)/(timer.get_time()*scale); + } +}; + +// operate with single tester +class TestRunner { + friend class TestProcessor; + friend struct RunArgsBody; + TestRunner(const TestRunner &); // don't copy + + const char *tester_name; + StatisticsCollector *stat; + std::vector<std::vector<StatisticsCollector::TestCase> > keys; + +public: + TesterBase &tester; + + template<typename Test> + TestRunner(const char *name, Test *test) + : tester_name(name), tester(*static_cast<TesterBase*>(test)) + {} + + ~TestRunner() { delete &tester; } + + void init(arg_t value, int threads, Harness::SpinBarrier &barrier, StatisticsCollector *s) { + tester.base_init(value, threads, barrier); + stat = s; + keys.resize(tester.tests_count); + for(int testn = 0; testn < tester.tests_count; testn++) { + keys[testn].resize(threads); + std::string test_name(tester.get_name(testn)); + for(int threadn = 0; threadn < threads; threadn++) + keys[testn][threadn] = stat->SetTestCase(tester_name, test_name.c_str(), threadn); + } + } + + void run_test(int threadn) { + for(int testn = 0; testn < tester.tests_count; testn++) { + tester.test_prefix(testn, threadn); + tester.barrier->wait(); // <<<<<<<<<<<<<<<<< Barrier before running test mode + value_t result = tester.test(testn, threadn); + stat->AddRoundResult(keys[testn][threadn], result); + } + } + + void post_process(StatisticsCollector &report) { + const int threads = tester.threads_count; + for(int testn = 0; testn < tester.tests_count; testn++) { + size_t coln = keys[testn][0].getResults().size()-1; + value_t rsum = keys[testn][0].getResults()[coln]; + value_t rmin = rsum, rmax = rsum; + for(int threadn = 1; threadn < threads; threadn++) { + value_t result = keys[testn][threadn].getResults()[coln]; + rsum += result; // for both SUM or AVG + if(rmin > result) rmin = result; + if(rmax < result) rmax = result; + } + std::string test_name(tester.get_name(testn)); + const char *rname = tester.get_result_type(testn, TesterBase::SUM); + if( rname ) { + report.SetTestCase(tester_name, (test_name+rname).c_str(), threads); + report.AddRoundResult(rsum); + } + rname = tester.get_result_type(testn, TesterBase::MIN); + if( rname ) { + report.SetTestCase(tester_name, (test_name+rname).c_str(), threads); + report.AddRoundResult(rmin); + } + rname = tester.get_result_type(testn, TesterBase::AVG); + if( rname ) { + report.SetTestCase(tester_name, (test_name+rname).c_str(), threads); + report.AddRoundResult(rsum / threads); + } + rname = tester.get_result_type(testn, TesterBase::MAX); + if( rname ) { + report.SetTestCase(tester_name, (test_name+rname).c_str(), threads); + report.AddRoundResult(rmax); + } + } + } +}; + +struct RunArgsBody { + const vector<TestRunner*> &run_list; + RunArgsBody(const vector<TestRunner*> &a) : run_list(a) { } +#ifndef __TBB_parallel_for_H + void operator()(int thread) const { +#else + void operator()(const tbb::blocked_range<int> &r) const { + ASSERT( r.begin() + 1 == r.end(), 0); + int thread = r.begin(); +#endif + for(size_t i = 0; i < run_list.size(); i++) + run_list[i]->run_test(thread); + } +}; + +//! Main test processor. +/** Override or use like this: + class MyTestCollection : public TestProcessor { + void factory(arg_t value, int threads) { + process( value, threads, + run("my1", new tester<my1>() ), + run("my2", new tester<my2>() ), + end ); + if(value == threads) + stat->Print(); + } +}; +*/ + +class TestProcessor { + friend class TesterBase; + + // <threads, collector> + typedef std::map<int, StatisticsCollector *> statistics_collection; + statistics_collection stat_by_threads; + +protected: + // Members + const char *collection_name; + // current stat + StatisticsCollector *stat; + // token + size_t end; + +public: + StatisticsCollector report; + + // token of tests list + template<typename Test> + TestRunner *run(const char *name, Test *test) { + return new TestRunner(name, test); + } + + // iteration processing + void process(arg_t value, int threads, ...) { + // prepare items + stat = stat_by_threads[threads]; + if(!stat) { + stat_by_threads[threads] = stat = new StatisticsCollector((collection_name + Format("@%d", threads)).c_str(), StatisticsCollector::ByAlg); + stat->SetTitle("Detailed log of %s running with %d threads.", collection_name, threads); + } + Harness::SpinBarrier barrier(threads); + // init args + va_list args; va_start(args, threads); + vector<TestRunner*> run_list; run_list.reserve(16); + while(true) { + TestRunner *item = va_arg(args, TestRunner*); + if( !item ) break; + item->init(value, threads, barrier, stat); + run_list.push_back(item); + } + va_end(args); + std::ostringstream buf; + buf << value; + const size_t round_number = stat->GetRoundsCount(); + stat->SetRoundTitle(round_number, buf.str().c_str()); + report.SetRoundTitle(round_number, buf.str().c_str()); + // run them +#ifndef __TBB_parallel_for_H + NativeParallelFor(threads, RunArgsBody(run_list)); +#else + tbb::parallel_for(tbb::blocked_range<int>(0,threads,1), RunArgsBody(run_list)); +#endif + // destroy args + for(size_t i = 0; i < run_list.size(); i++) { + run_list[i]->post_process(report); + delete run_list[i]; + } + } + +public: + TestProcessor(const char *name, StatisticsCollector::Sorting sort_by = StatisticsCollector::ByAlg) + : collection_name(name), stat(NULL), end(0), report(collection_name, sort_by) + { } + + ~TestProcessor() { + for(statistics_collection::iterator i = stat_by_threads.begin(); i != stat_by_threads.end(); i++) + delete i->second; + } +}; + +#endif// __TIME_FRAMEWORK_H__ diff --git a/dep/tbb/src/perf/time_hash_map.cpp b/dep/tbb/src/perf/time_hash_map.cpp new file mode 100644 index 000000000..a72cf48a0 --- /dev/null +++ b/dep/tbb/src/perf/time_hash_map.cpp @@ -0,0 +1,366 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +// configuration: + +//! enable/disable std::map tests +#define STDTABLE 1 + +//! enable/disable old implementation tests (correct include file also) +#define OLDTABLE 0 +#define OLDTABLEHEADER "tbb/concurrent_hash_map-4078.h"//-4329 + +//! enable/disable experimental implementation tests (correct include file also) + +#define TESTTABLE 0 +#define TESTTABLEHEADER "tbb/concurrent_unordered_map.h" + +////////////////////////////////////////////////////////////////////////////////// + +#include <cstdlib> +#include <math.h> +#include "tbb/tbb_stddef.h" +#include <vector> +#include <map> +// needed by hash_maps +#include <stdexcept> +#include <iterator> +#include <algorithm> // std::swap +#include <utility> // Need std::pair from here +#include "tbb/cache_aligned_allocator.h" +#include "tbb/tbb_allocator.h" +#include "tbb/spin_rw_mutex.h" +#include "tbb/aligned_space.h" +#include "tbb/atomic.h" +// for test +#include "tbb/spin_mutex.h" +#include "time_framework.h" + + +using namespace tbb; +using namespace tbb::internal; + +struct IntHashCompare { + size_t operator() ( int x ) const { return x; } + bool operator() ( int x, int y ) const { return x==y; } + static long hash( int x ) { return x; } + bool equal( int x, int y ) const { return x==y; } +}; + +namespace version_current { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } + #include "tbb/concurrent_hash_map.h" +} +typedef version_current::tbb::concurrent_hash_map<int,int,IntHashCompare> IntTable; + +#if OLDTABLE +#undef __TBB_concurrent_hash_map_H +namespace version_base { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } + #include OLDTABLEHEADER +} +typedef version_base::tbb::concurrent_hash_map<int,int,IntHashCompare> OldTable; +#endif + +#if TESTTABLE +#undef __TBB_concurrent_hash_map_H +namespace version_new { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } + #include TESTTABLEHEADER +} +typedef version_new::tbb::concurrent_unordered_map<int,int,IntHashCompare,IntHashCompare> TestTable; +#define TESTTABLE 1 +#endif + +/////////////////////////////////////// + +static const char *map_testnames[] = { + "1.insert", "2.count(w/rehash)", "3.find/wr", "4.erase" +}; + +template<typename TableType> +struct TestTBBMap : TesterBase { + typedef typename TableType::accessor accessor; + typedef typename TableType::const_accessor const_accessor; + TableType Table; + int n_items; + + TestTBBMap() : TesterBase(4) {} + void init() { n_items = value/threads_count; } + + std::string get_name(int testn) { + return std::string(map_testnames[testn]); + } + + double test(int test, int t) + { + switch(test) { + case 0: // fill + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; + Table.insert( a, i ); + a->second = 0; + } + break; + case 1: // work1 + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + size_t c = Table.count( i ); + ASSERT( c == 1, NULL); + } + break; + case 2: // work2 + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; + Table.find( a, i ); + ASSERT( !a->second, "A key should be incremented only once"); + a->second += 1; + } + break; + case 3: // clean + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + ASSERT( Table.erase( i ), NULL); + } + } + return 0; + } +}; + +template<typename M> +struct TestSTLMap : TesterBase { + std::map<int, int> Table; + M mutex; + + int n_items; + TestSTLMap() : TesterBase(4) {} + void init() { n_items = value/threads_count; } + + std::string get_name(int testn) { + return std::string(map_testnames[testn]); + } + + double test(int test, int t) + { + switch(test) { + case 0: // fill + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + typename M::scoped_lock with(mutex); + Table[i] = 0; + } + break; + case 1: // work1 + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + typename M::scoped_lock with(mutex); + size_t c = Table.count(i); + ASSERT( c == 1, NULL); + } + break; + case 2: // work2 + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + typename M::scoped_lock with(mutex); + Table[i] += 1; + } + break; + case 3: // clean + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + typename M::scoped_lock with(mutex); + Table.erase(i); + } + } + return 0; + } +}; + +class fake_mutex { + int a; +public: + class scoped_lock { + fake_mutex *p; + + public: + scoped_lock() {} + scoped_lock( fake_mutex &m ) { p = &m; } + ~scoped_lock() { p->a = 0; } + void acquire( fake_mutex &m ) { p = &m; } + void release() { } + }; +}; + +class test_hash_map : public TestProcessor { +public: + test_hash_map() : TestProcessor("test_hash_map") {} + void factory(int value, int threads) { + if(Verbose) printf("Processing with %d threads: %d...\n", threads, value); + process( value, threads, +#if STDTABLE + run("std::map ", new NanosecPerValue<TestSTLMap<spin_mutex> >() ), +#endif +#if OLDTABLE + run("old::hmap", new NanosecPerValue<TestTBBMap<OldTable> >() ), +#endif + run("tbb::hmap", new NanosecPerValue<TestTBBMap<IntTable> >() ), +#if TESTTABLE + run("new::hmap", new NanosecPerValue<TestTBBMap<TestTable> >() ), +#endif + end ); + //stat->Print(StatisticsCollector::Stdout); + if(value >= 2097152) stat->Print(StatisticsCollector::HTMLFile); + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// +template<typename TableType> +struct TestHashMapFind : TesterBase { + typedef typename TableType::accessor accessor; + typedef typename TableType::const_accessor const_accessor; + TableType Table; + int n_items; + + std::string get_name(int testn) { + return std::string(!testn?"find":"insert"); + } + + TestHashMapFind() : TesterBase(2) {} + void init() { + n_items = value/threads_count; + for(int i = 0; i < value; i++) { + accessor a; Table.insert( a, i ); + } + } + + double test(int test, int t) + { + switch(test) { + case 0: // find + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; + Table.find( a, i ); + a->second = i; + } + break; + case 1: // insert + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; + Table.insert( a, i ); + a->second = -i; + } + break; + } + return 0; + } +}; + +const int test2_size = 65536; +int Data[test2_size]; + +template<typename TableType> +struct TestHashCountStrings : TesterBase { + typedef typename TableType::accessor accessor; + typedef typename TableType::const_accessor const_accessor; + TableType Table; + int n_items; + + std::string get_name(int testn) { + return !testn?"insert":"find"; + } + + TestHashCountStrings() : TesterBase(2) {} + void init() { + n_items = value/threads_count; + } + + double test(int testn, int t) + { + if(!testn) { + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; Table.insert( a, Data[i] ); + } + } else { // + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + accessor a; Table.find( a, Data[i] ); + } + } + return 0; + } +}; + +class test_hash_map_find : public TestProcessor { +public: + test_hash_map_find() : TestProcessor("test_hash_map_find") {} + void factory(int value, int threads) { + if(Verbose) printf("Processing with %d threads: %d...\n", threads, value); + process( value, threads, +#if OLDTABLE + run("Filled old::hashmap", new NanosecPerValue<TestHashMapFind<OldTable> >() ), +#endif + run("Filled tbb::hashmap", new NanosecPerValue<TestHashMapFind<IntTable> >() ), +#if TESTTABLE + run("Filled new::hashmap", new NanosecPerValue<TestHashMapFind<TestTable> >() ), +#endif +#if OLDTABLE + run("CountStr old::hashmap", new TimeTest<TestHashCountStrings<OldTable> >() ), +#endif + run("CountStr tbb::hashmap", new TimeTest<TestHashCountStrings<IntTable> >() ), +#if TESTTABLE + run("CountStr new::hashmap", new TimeTest<TestHashCountStrings<TestTable> >() ), +#endif + end ); + //stat->Print(StatisticsCollector::HTMLFile); + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +int main(int argc, char* argv[]) { + if(argc>1) Verbose = true; + //if(argc>2) ExtraVerbose = true; + MinThread = 1; MaxThread = task_scheduler_init::default_num_threads(); + ParseCommandLine( argc, argv ); + + ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded. Please build it by:\n\t\tmake tbbmalloc"); + + { + test_hash_map_find test_find; int o = test2_size; + for(int i = 0; i < o; i++) + Data[i] = i%60; + for( int t=MinThread; t <= MaxThread; t++) + test_find.factory(o, t); + test_find.report.SetTitle("Nanoseconds per operation of finding operation (Mode) for %d items", o); + test_find.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); + } + { + test_hash_map the_test; + for( int t=MinThread; t <= MaxThread; t*=2) + for( int o=/*2048*/(1<<8)*8; o<2200000; o*=2 ) + the_test.factory(o, t); + the_test.report.SetTitle("Nanoseconds per operation of (Mode) for N items in container (Name)"); + the_test.report.SetStatisticFormula("1AVG per size", "=AVERAGE(ROUNDS)"); + the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); + } + return 0; +} + diff --git a/dep/tbb/src/perf/time_hash_map_fill.cpp b/dep/tbb/src/perf/time_hash_map_fill.cpp new file mode 100644 index 000000000..1b9644724 --- /dev/null +++ b/dep/tbb/src/perf/time_hash_map_fill.cpp @@ -0,0 +1,155 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +// configuration: + +// Size of input array +const int INPUT_SIZE = 2000000; +// Specify list of unique percents to test against. Max - 10 +#define SOURCE_ARRAY UNIQUE_PERCENT(5); UNIQUE_PERCENT(10); UNIQUE_PERCENT(20); UNIQUE_PERCENT(40) + +// enable/disable tests for: +#define BOX1 "TBB" +#define BOX1TEST ValuePerSecond<Uniques<tbb::concurrent_hash_map<int,int> >, 1000000/*ns*/> +#define BOX1HEADER "tbb/concurrent_hash_map.h" + +// enable/disable tests for: +#define BOX2 "OLD" +#define BOX2TEST ValuePerSecond<Uniques<tbb::concurrent_hash_map<int,int> >, 1000000/*ns*/> +#define BOX2HEADER "tbb/concurrent_hash_map-5468.h" + +#define TBB_USE_THREADING_TOOLS 0 +////////////////////////////////////////////////////////////////////////////////// + +#include <cstdlib> +#include <math.h> +#include "tbb/tbb_stddef.h" +#include <vector> +#include <map> +// needed by hash_maps +#include <stdexcept> +#include <iterator> +#include <algorithm> // std::swap +#include <utility> // Need std::pair +#include <cstring> // Need std::memset +#include <typeinfo> +#include "tbb/cache_aligned_allocator.h" +#include "tbb/tbb_allocator.h" +#include "tbb/spin_rw_mutex.h" +#include "tbb/aligned_space.h" +#include "tbb/atomic.h" +// for test +#include "tbb/spin_mutex.h" +#include "time_framework.h" + + +using namespace tbb; +using namespace tbb::internal; + +///////////////////////////////////////////////////////////////////////////////////////// +// Input data built for SOURCE_ARRAY settings +int Mixtures = 0; +int Percents[10]; +int *Data[10]; + +// Main test class used to run the timing tests. All overridden methods are called by the framework +template<typename TableType> +struct Uniques : TesterBase { + typedef typename TableType::accessor accessor; + typedef typename TableType::const_accessor const_accessor; + TableType *Table; + int n_items; + + // Returns name of test mode specified by number + /*override*/ std::string get_name(int testn) { + return Format("%d%% uniques", Percents[testn]); + } + + // Initializes base class with number of test modes + Uniques() : TesterBase(Mixtures), Table(0) {} + ~Uniques() { if(Table) delete Table; } + + // Informs the class that value and threads number become known + /*override*/ void init() { + n_items = value/threads_count; + } + + // Informs the class that the test mode for specified thread is about to start + /*override*/ void test_prefix(int testn, int t) { + barrier->wait(); + if( t ) return; + if(Table) delete Table; + Table = new TableType(MaxThread*4); + } + + // Executes test mode for a given thread. Return value is ignored when used with timing wrappers. + /*override*/ double test(int testn, int t) + { + for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) { + Table->insert( std::make_pair(Data[testn][i],t) ); + } + return 0; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +// Using BOX declarations from configuration +#include "time_sandbox.h" + +// Prepares the input data for given unique percent +inline void UNIQUE_PERCENT(int p) { + Percents[Mixtures] = p; + Data[Mixtures] = new int[INPUT_SIZE]; + int uniques = INPUT_SIZE/100*p; + srand(10101); + for(int i = 0; i < INPUT_SIZE; i++) + Data[Mixtures][i] = rand()%uniques; + Mixtures++; +} + +int main(int argc, char* argv[]) { + if(argc>1) Verbose = true; + //if(argc>2) ExtraVerbose = true; + MinThread = 1; MaxThread = task_scheduler_init::default_num_threads(); + ParseCommandLine( argc, argv ); + + ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded. Please build it by:\n\t\tmake tbbmalloc"); + SOURCE_ARRAY; // prepare source array + + { + // Declares test processor + TEST_PROCESSOR_NAME the_test("time_hash_map_fill"/*, StatisticsCollector::ByThreads*/); + for( int t=MinThread; t <= MaxThread; t++) + the_test.factory(INPUT_SIZE, t); // executes the tests specified in BOX-es for given 'value' and threads + the_test.report.SetTitle("Operations per nanosecond", INPUT_SIZE); + the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); // Write files + } + return 0; +} + diff --git a/dep/tbb/src/perf/time_locked_work.cpp b/dep/tbb/src/perf/time_locked_work.cpp new file mode 100644 index 000000000..62b9f38a6 --- /dev/null +++ b/dep/tbb/src/perf/time_locked_work.cpp @@ -0,0 +1,174 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +////// Test configuration //////////////////////////////////////////////////// +#define SECONDS_RATIO 1000000 // microseconds + +#ifndef REPEAT_K +#define REPEAT_K 50 // repeat coefficient +#endif + +int outer_work[] = {/*256,*/ 64, 16, 4, 0}; +int inner_work[] = {32, 8, 0 }; + +// keep it to calibrate the time of work without synchronization +#define BOX1 "baseline" +#define BOX1TEST TimeTest< TBB_Mutex<tbb::null_mutex>, SECONDS_RATIO > + +// enable/disable tests for: +#define BOX2 "spin_mutex" +#define BOX2TEST TimeTest< TBB_Mutex<tbb::spin_mutex>, SECONDS_RATIO > + +// enable/disable tests for: +#define BOX3 "spin_rw_mutex" +#define BOX3TEST TimeTest< TBB_Mutex<tbb::spin_rw_mutex>, SECONDS_RATIO > + +// enable/disable tests for: +#define BOX4 "queuing_mutex" +#define BOX4TEST TimeTest< TBB_Mutex<tbb::queuing_mutex>, SECONDS_RATIO > + +// enable/disable tests for: +//#define BOX5 "queuing_rw_mutex" +#define BOX5TEST TimeTest< TBB_Mutex<tbb::queuing_rw_mutex>, SECONDS_RATIO > + +////////////////////////////////////////////////////////////////////////////// + +#include <cstdlib> +#include <math.h> +#include <algorithm> // std::swap +#include <utility> // Need std::pair from here +#include <sstream> +#include "tbb/tbb_stddef.h" +#include "tbb/null_mutex.h" +#include "tbb/spin_rw_mutex.h" +#include "tbb/spin_mutex.h" +#include "tbb/queuing_mutex.h" +#include "tbb/queuing_rw_mutex.h" +#include "tbb/mutex.h" + +#if INTEL_TRIAL==2 +#include "tbb/parallel_for.h" // enable threading by TBB scheduler +#include "tbb/task_scheduler_init.h" +#include "tbb/blocked_range.h" +#endif +// for test +#include "time_framework.h" + +using namespace tbb; +using namespace tbb::internal; + +///////////////////////////////////////////////////////////////////////////////////////// + +//! base class for tests family +struct TestLocks : TesterBase { + // Inherits "value", "threads_count", and other variables + TestLocks() : TesterBase(/*number of modes*/sizeof(outer_work)/sizeof(int)) {} + //! returns name of test part/mode + /*override*/std::string get_name(int testn) { + std::ostringstream buf; + buf.width(4); buf.fill('0'); + buf << outer_work[testn]; // mode number + return buf.str(); + } + //! enables results types and returns theirs suffixes + /*override*/const char *get_result_type(int, result_t type) const { + switch(type) { + case MIN: return " min"; + case MAX: return " max"; + default: return 0; + } + } + //! repeats count + int repeat_until(int /*test_n*/) const { + return REPEAT_K*100;//TODO: suggest better? + } + //! fake work + void do_work(int work) volatile { + for(int i = 0; i < work; i++) { + volatile int x = i; + __TBB_Pause(0); // just to call inline assembler + x *= work/threads_count; + } + } +}; + +//! template test unit for any of TBB mutexes +template<typename M> +struct TBB_Mutex : TestLocks { + M mutex; + + double test(int testn, int /*threadn*/) + { + for(int r = 0; r < repeat_until(testn); ++r) { + do_work(outer_work[testn]); + { + typename M::scoped_lock with(mutex); + do_work(/*inner work*/value); + } + } + return 0; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +//Using BOX declarations +#include "time_sandbox.h" + +// run tests for each of inner work value +void RunLoops(test_sandbox &the_test, int thread) { + for( unsigned i=0; i<sizeof(inner_work)/sizeof(int); ++i ) + the_test.factory(inner_work[i], thread); +} + +int main(int argc, char* argv[]) { + if(argc>1) Verbose = true; + int DefThread = task_scheduler_init::default_num_threads(); + MinThread = 1; MaxThread = DefThread+1; + ParseCommandLine( argc, argv ); + ASSERT(MinThread <= MaxThread, 0); +#if INTEL_TRIAL && defined(__TBB_parallel_for_H) + task_scheduler_init me(MaxThread); +#endif + { + test_sandbox the_test("time_locked_work", StatisticsCollector::ByThreads); + //TODO: refactor this out as RunThreads(test&) + for( int t = MinThread; t < DefThread && t <= MaxThread; t *= 2) + RunLoops( the_test, t ); // execute undersubscribed threads + if( DefThread > MinThread && DefThread <= MaxThread ) + RunLoops( the_test, DefThread ); // execute on all hw threads + if( DefThread < MaxThread) + RunLoops( the_test, MaxThread ); // execute requested oversubscribed threads + + the_test.report.SetTitle("Time of lock/unlock for mutex Name with Outer and Inner work"); + //the_test.report.SetStatisticFormula("1AVG per size", "=AVERAGE(ROUNDS)"); + the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML, /*ModeName*/ "Outer work"); + } + return 0; +} + diff --git a/dep/tbb/src/perf/time_sandbox.h b/dep/tbb/src/perf/time_sandbox.h new file mode 100644 index 000000000..158ebe746 --- /dev/null +++ b/dep/tbb/src/perf/time_sandbox.h @@ -0,0 +1,168 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#ifndef __TIME_FRAMEWORK_H__ +#error time_framework.h must be included +#endif + +#ifdef BOX1 +namespace sandbox1 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX1HEADER +# include BOX1HEADER +# endif + typedef ::BOX1TEST testbox; +} +#endif +#ifdef BOX2 +namespace sandbox2 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX2HEADER +# include BOX2HEADER +# endif + typedef ::BOX2TEST testbox; +} +#endif +#ifdef BOX3 +namespace sandbox3 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX3HEADER +# include BOX3HEADER +# endif + typedef ::BOX3TEST testbox; +} +#endif +#ifdef BOX4 +namespace sandbox4 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX4HEADER +# include BOX4HEADER +# endif + typedef ::BOX4TEST testbox; +} +#endif +#ifdef BOX5 +namespace sandbox5 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX5HEADER +# include BOX5HEADER +# endif + typedef ::BOX5TEST testbox; +} +#endif +#ifdef BOX6 +namespace sandbox6 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX6HEADER +# include BOX6HEADER +# endif + typedef ::BOX6TEST testbox; +} +#endif +#ifdef BOX7 +namespace sandbox7 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX7HEADER +# include BOX7HEADER +# endif + typedef ::BOX7TEST testbox; +} +#endif +#ifdef BOX8 +namespace sandbox8 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX8HEADER +# include BOX8HEADER +# endif + typedef ::BOX8TEST testbox; +} +#endif +#ifdef BOX9 +namespace sandbox9 { + namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } } +# ifdef BOX9HEADER +# include BOX9HEADER +# endif + typedef ::BOX9TEST testbox; +} +#endif + +//if harness.h included +#if defined(ASSERT) && !HARNESS_NO_PARSE_COMMAND_LINE +#ifndef TEST_PREFIX +#define TEST_PREFIX if(Verbose) printf("Processing with %d threads: %ld...\n", threads, value); +#endif +#endif//harness included + +#ifndef TEST_PROCESSOR_NAME +#define TEST_PROCESSOR_NAME test_sandbox +#endif + +class TEST_PROCESSOR_NAME : public TestProcessor { +public: + TEST_PROCESSOR_NAME(const char *name, StatisticsCollector::Sorting sort_by = StatisticsCollector::ByAlg) + : TestProcessor(name, sort_by) {} + void factory(arg_t value, int threads) { +#ifdef TEST_PREFIX + TEST_PREFIX +#endif + process( value, threads, +#define RUNBOX(n) run(#n"."BOX##n, new sandbox##n::testbox() ) +#ifdef BOX1 + RUNBOX(1), +#endif +#ifdef BOX2 + RUNBOX(2), +#endif +#ifdef BOX3 + RUNBOX(3), +#endif +#ifdef BOX4 + RUNBOX(4), +#endif +#ifdef BOX5 + RUNBOX(5), +#endif +#ifdef BOX6 + RUNBOX(6), +#endif +#ifdef BOX7 + RUNBOX(7), +#endif +#ifdef BOX8 + RUNBOX(8), +#endif +#ifdef BOX9 + RUNBOX(9), +#endif + end ); +#ifdef TEST_POSTFIX + TEST_POSTFIX +#endif + } +}; diff --git a/dep/tbb/src/perf/time_unit.cpp b/dep/tbb/src/perf/time_unit.cpp new file mode 100644 index 000000000..8d8162dc4 --- /dev/null +++ b/dep/tbb/src/perf/time_unit.cpp @@ -0,0 +1,291 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "tbb/task_scheduler_init.h" +#include "tbb/tick_count.h" +#include <cmath> +#include <cstdlib> +#include <cerrno> +#include <cfloat> +#include <vector> +#include <algorithm> + +#include "../src/test/harness.h" + +#if __linux__ || __APPLE__ || __FreeBSD__ + #include <sys/resource.h> +#endif /* __APPLE__ */ + +// The code, performance of which is to be measured, is surrounded by the StartSimpleTiming +// and StopSimpleTiming macros. It is called "target code" or "code of interest" hereafter. +// +// The target code is executed inside the nested loop. Nesting is necessary to allow +// measurements on arrays that fit cache of a particular level, while making the load +// big enough to eliminate the influence of random deviations. +// +// Macro StartSimpleTiming defines reduction variable "util::anchor", which may be modified (usually +// by adding to) by the target code. This can be necessary to prevent optimizing compilers +// from throwing out the code of interest. Besides, if the target code is complex enough, +// make sure that all its branches contribute (directly or indirectly) to the value +// being added to the "util::anchor" variable. +// +// To factor out overhead introduced by the measurement infra code it is recommended to make +// a calibration run with target code replaced by a no-op (but still modifying "sum"), and +// store the resulting time in the "util::base" variable. +// +// A generally good approach is to make the target code use elements of a preliminary +// initialized array. Then for calibration run you just need to add vector elements +// to the "sum" variable. To get rid of memory access delays make the array small +// enough to fit L2 or L1 cache (play with StartSimpleTiming arguments if necessary). +// +// Macro CalibrateSimpleTiming performs default calibration using "util::anchor += i;" operation. +// +// Macro ANCHOR_TYPE defines the type of the reduction variable. If it was not +// defined before including this header, it is defined as size_t. Depending on +// the target code modern super scalar architectures may blend reduction operation +// and instructions of interest differently for different target alternatives. So +// you may play with the type to minimize out-of-order and parallel execution impact +// on the calibration time veracity. You may even end up with different reduction +// variable types (and different calibration times) for different measurements. + + +namespace util { + +typedef std::vector<double> durations_t; + + void trace_histogram ( const durations_t& t, char* histogramFileName ) + { + FILE* f = histogramFileName ? fopen(histogramFileName, "wt") : stdout; + size_t n = t.size(); + const size_t num_buckets = 100; + double min_val = *std::min_element(t.begin(), t.end()), + max_val = *std::max_element(t.begin(), t.end()), + bucket_size = (max_val - min_val) / num_buckets; + std::vector<size_t> hist(num_buckets + 1, 0); + for ( size_t i = 0; i < n; ++i ) + ++hist[size_t((t[i]-min_val)/bucket_size)]; + fprintf (f, "Histogram: nvals = %u, min = %g, max = %g, nbuckets = %u\n", (unsigned)n, min_val, max_val, (unsigned)num_buckets); + double bucket = min_val; + for ( size_t i = 0; i <= num_buckets; ++i, bucket+=bucket_size ) + fprintf (f, "%12g\t%u\n", bucket, (unsigned)hist[i]); + fclose(f); + } + + double average ( const durations_t& d, double& variation_percent, double& std_dev_percent ) + { + durations_t t = d; + if ( t.size() > 5 ) { + t.erase(std::min_element(t.begin(), t.end())); + t.erase(std::max_element(t.begin(), t.end())); + } + size_t n = t.size(); + double sum = 0, + min_val = *std::min_element(t.begin(), t.end()), + max_val = *std::max_element(t.begin(), t.end()); + for ( size_t i = 0; i < n; ++i ) + sum += t[i]; + double avg = sum / n, + std_dev = 0; + for ( size_t i = 0; i < n; ++i ) { + double dev = fabs(t[i] - avg); + std_dev += dev * dev; + } + std_dev = sqrt(std_dev / n); + std_dev_percent = std_dev / avg * 100; + variation_percent = 100 * (max_val - min_val) / avg; + return avg; + } + + static int num_threads; + + static double base = 0, + base_dev = 0, + base_dev_percent = 0; + + static char *empty_fmt = ""; + static int rate_field_len = 11; + +#if !defined(ANCHOR_TYPE) + #define ANCHOR_TYPE size_t +#endif + + static ANCHOR_TYPE anchor = 0; + + static double sequential_time = 0; + + +#define StartSimpleTiming(nOuter, nInner) { \ + tbb::tick_count t1, t0 = tbb::tick_count::now(); \ + for ( size_t j = 0; l < nOuter; ++l ) { \ + for ( size_t i = 0; i < nInner; ++i ) { + +#define StopSimpleTiming(res) \ + } \ + util::anchor += (ANCHOR_TYPE)l; \ + } \ + t1 = tbb::tick_count::now(); \ + printf (util::empty_fmt, util::anchor); \ + res = (t1-t0).seconds() - util::base; \ +} + +#define CalibrateSimpleTiming(T, nOuter, nInner) \ + StartSimpleTiming(nOuter, nInner); \ + util::anchor += (ANCHOR_TYPE)i; \ + StopSimpleTiming(util::base); + + +#define StartTimingImpl(nRuns, nOuter, nInner) \ + tbb::tick_count t1, t0; \ + for ( size_t k = 0; k < nRuns; ++k ) { \ + t0 = tbb::tick_count::now(); \ + for ( size_t l = 0; l < nOuter; ++l ) { \ + for ( size_t i = 0; i < nInner; ++i ) { + +#define StartTiming(nRuns, nOuter, nInner) { \ + util::durations_t t_(nRuns); \ + StartTimingImpl(nRuns, nOuter, nInner) + +#define StartTimingEx(vDurations, nRuns, nOuter, nInner) { \ + util::durations_t &t_ = vDurations; \ + vDurations.resize(nRuns); \ + StartTimingImpl(nRuns, nOuter, nInner) + +#define StopTiming(Avg, StdDev, StdDevPercent) \ + } \ + util::anchor += (ANCHOR_TYPE)l; \ + } \ + t1 = tbb::tick_count::now(); \ + t_[k] = (t1 - t0).seconds()/nrep; \ + } \ + printf (util::empty_fmt, util::anchor); \ + Avg = util::average(t_, StdDev, StdDevPercent); \ +} + +#define CalibrateTiming(nRuns, nOuter, nInner) \ + StartTiming(nRuns, nOuter, nInner); \ + util::anchor += (ANCHOR_TYPE)i; \ + StopTiming(util::base, util::base_dev, util::base_dev_percent); + +} // namespace util + + +#ifndef NRUNS + #define NRUNS 7 +#endif + +#ifndef ONE_TEST_DURATION + #define ONE_TEST_DURATION 0.01 +#endif + +#define no_histogram ((char*)-1) + +inline +double RunTestImpl ( const char* title, void (*pfn)(), char* histogramFileName = no_histogram ) { + double time = 0, variation = 0, deviation = 0; + size_t nrep = 1; + while (true) { + CalibrateTiming(NRUNS, 1, nrep); + StartTiming(NRUNS, 1, nrep); + pfn(); + StopTiming(time, variation, deviation); + time -= util::base; + if ( time > 1e-6 ) + break; + nrep *= 2; + } + nrep *= (size_t)ceil(ONE_TEST_DURATION/time); + CalibrateTiming(NRUNS, 1, nrep); // sets util::base + util::durations_t t; + StartTimingEx(t, NRUNS, 1, nrep); + pfn(); + StopTiming(time, variation, deviation); + if ( histogramFileName != (char*)-1 ) + util::trace_histogram(t, histogramFileName); + double clean_time = time - util::base; + if ( title ) { + // Deviation (in percent) is calulated for the Gross time + printf ("\n%-34s %.2e %5.1f ", title, clean_time, deviation); + if ( util::sequential_time != 0 ) + //printf ("% .2e ", clean_time - util::sequential_time); + printf ("% 10.1f ", 100*(clean_time - util::sequential_time)/util::sequential_time); + else + printf ("%*s ", util::rate_field_len, ""); + printf ("%-9u %1.6f |", (unsigned)nrep, time * nrep); + } + return clean_time; +} + + +/// Runs the test function, does statistical processing, and, if title is nonzero, prints results. +/** If histogramFileName is a string, the histogram of individual runs is generated and stored + in a file with the given name. If it is NULL then the histogram is printed on the console. + By default no histogram is generated. + The histogram format is: "rate bucket start" "number of tests in this bucket". **/ +inline +void RunTest ( const char* title_fmt, size_t workload_param, void (*pfn_test)(), char* histogramFileName = no_histogram ) { + char title[1024]; + sprintf(title, title_fmt, (long)workload_param); + RunTestImpl(title, pfn_test, histogramFileName); +} + +inline +void CalcSequentialTime ( void (*pfn)() ) { + util::sequential_time = RunTestImpl(NULL, pfn) / util::num_threads; +} + +inline +void ResetSequentialTime () { + util::sequential_time = 0; +} + + +inline void PrintTitle() { + //printf ("%-32s %-*s Std Dev,%% %-*s Repeats Gross time Infra time | NRUNS = %u", + // "Test name", util::rate_field_len, "Rate", util::rate_field_len, "Overhead", NRUNS); + printf ("%-34s %-*s Std Dev,%% Par.overhead,%% Repeats Gross time | Nruns %u, Nthreads %d", + "Test name", util::rate_field_len, "Rate", NRUNS, util::num_threads); +} + +void Test(); + +inline +int test_main( int argc, char* argv[] ) { + ParseCommandLine( argc, argv ); + ASSERT (MinThread>=2, "Minimal number of threads must be 2 or more"); + char buf[128]; + util::rate_field_len = 2 + sprintf(buf, "%.1e", 1.1); + for ( int i = MinThread; i <= MaxThread; ++i ) { + tbb::task_scheduler_init init (i); + util::num_threads = i; + PrintTitle(); + Test(); + printf("\n"); + } + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/perf/time_vector.cpp b/dep/tbb/src/perf/time_vector.cpp new file mode 100644 index 000000000..f2d86725d --- /dev/null +++ b/dep/tbb/src/perf/time_vector.cpp @@ -0,0 +1,256 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +//#define DO_SCALABLEALLOC + +#include <cstdlib> +#include <cmath> +#include <vector> +#include <algorithm> +#include <functional> +#include <numeric> +#include "tbb/tbb_stddef.h" +#include "tbb/spin_mutex.h" +#ifdef DO_SCALABLEALLOC +#include "tbb/scalable_allocator.h" +#endif +#include "tbb/concurrent_vector.h" +#include "tbb/tbb_allocator.h" +#include "tbb/cache_aligned_allocator.h" +#include "tbb/task_scheduler_init.h" +#include "tbb/parallel_for.h" +#include "tbb/tick_count.h" +#include "tbb/blocked_range.h" +#include "../test/harness.h" +//#include "harness_barrier.h" +#include "../test/harness_allocator.h" +#define STATISTICS_INLINE +#include "statistics.h" + +using namespace tbb; +bool ExtraVerbose = false; + +class Timer { + tbb::tick_count tick; +public: + Timer() { tick = tbb::tick_count::now(); } + double get_time() { return (tbb::tick_count::now() - tick).seconds(); } + double diff_time(const Timer &newer) { return (newer.tick - tick).seconds(); } + double mark_time() { tick_count t1(tbb::tick_count::now()), t2(tick); tick = t1; return (t1 - t2).seconds(); } + double mark_time(const Timer &newer) { tick_count t(tick); tick = newer.tick; return (tick - t).seconds(); } +}; + +/************************************************************************/ +/* TEST1 */ +/************************************************************************/ +#define mk_vector_test1(v, a) vector_test1<v<Timer, static_counting_allocator<a<Timer> > >, v<double, static_counting_allocator<a<double> > > > +template<class timers_vector_t, class values_vector_t> +class vector_test1 { + const char *mode; + StatisticsCollector &stat; + StatisticsCollector::TestCase key[16]; + +public: + vector_test1(const char *m, StatisticsCollector &s) : mode(m), stat(s) {} + + vector_test1 &operator()(size_t len) { + if(Verbose) printf("test1<%s>(%u): collecting timing statistics\n", mode, unsigned(len)); + __TBB_ASSERT(sizeof(Timer) == sizeof(double), NULL); + static const char *test_names[] = { + "b)creation wholly", + "a)creation by push", + "c)operation time per item", + 0 }; + for(int i = 0; test_names[i]; ++i) key[i] = stat.SetTestCase(test_names[i], mode, len); + + Timer timer0; timers_vector_t::allocator_type::init_counters(); + timers_vector_t tv(len); + Timer timer1; values_vector_t::allocator_type::init_counters(); + values_vector_t dv; + for (size_t i = 0; i < len; ++i) + dv.push_back( i ); + Timer timer2; + for (size_t i = 0; i < len; ++i) + { + dv[len-i-1] = timer0.diff_time(tv[i]); + tv[i].mark_time(); + } + stat.AddStatisticValue( key[2], "1total, ms", "%.3f", timer2.get_time()*1000.0 ); + stat.AddStatisticValue( key[1], "1total, ms", "%.3f", timer1.diff_time(timer2)*1000.0 ); + stat.AddStatisticValue( key[0], "1total, ms", "%.3f", timer0.diff_time(timer1)*1000.0 ); + //allocator statistics + stat.AddStatisticValue( key[0], "2total allocations", "%d", int(timers_vector_t::allocator_type::allocations) ); + stat.AddStatisticValue( key[1], "2total allocations", "%d", int(values_vector_t::allocator_type::allocations) ); + stat.AddStatisticValue( key[2], "2total allocations", "%d", 0); + stat.AddStatisticValue( key[0], "3total alloc#items", "%d", int(timers_vector_t::allocator_type::items_allocated) ); + stat.AddStatisticValue( key[1], "3total alloc#items", "%d", int(values_vector_t::allocator_type::items_allocated) ); + stat.AddStatisticValue( key[2], "3total alloc#items", "%d", 0); + //remarks + stat.AddStatisticValue( key[0], "9note", "segment creation time, ns:"); + stat.AddStatisticValue( key[2], "9note", "average op-time per item, ns:"); + Timer last_timer(timer2); double last_value = 0; + for (size_t j = 0, i = 2; i < len; i *= 2, j++) { + stat.AddRoundResult( key[0], (dv[len-i-1]-last_value)*1000000.0 ); + last_value = dv[len-i-1]; + stat.AddRoundResult( key[2], last_timer.diff_time(tv[i])/double(i)*1000000.0 ); + last_timer = tv[i]; + stat.SetRoundTitle(j, i); + } + tv.clear(); dv.clear(); + //__TBB_ASSERT(timers_vector_t::allocator_type::items_allocated == timers_vector_t::allocator_type::items_freed, NULL); + //__TBB_ASSERT(values_vector_t::allocator_type::items_allocated == values_vector_t::allocator_type::items_freed, NULL); + return *this; + } +}; + +/************************************************************************/ +/* TEST2 */ +/************************************************************************/ +#define mk_vector_test2(v, a) vector_test2<v<size_t, a<size_t> > > +template<class vector_t> +class vector_test2 { + const char *mode; + static const int ntrial = 10; + StatisticsCollector &stat; + +public: + vector_test2(const char *m, StatisticsCollector &s) : mode(m), stat(s) {} + + vector_test2 &operator()(size_t len) { + if(Verbose) printf("test2<%s>(%u): performing standard transformation sequence on vector\n", mode, unsigned(len)); + StatisticsCollector::TestCase init_key = stat.SetTestCase("allocate", mode, len); + StatisticsCollector::TestCase fill_key = stat.SetTestCase("fill", mode, len); + StatisticsCollector::TestCase proc_key = stat.SetTestCase("process", mode, len); + StatisticsCollector::TestCase full_key = stat.SetTestCase("total time", mode, len); + for (int i = 0; i < ntrial; i++) { + Timer timer0; + vector_t v1(len); + vector_t v2(len); + Timer timer1; + std::generate(v1.begin(), v1.end(), values(0)); + std::generate(v2.begin(), v2.end(), values(size_t(-len))); + Timer timer2; + std::reverse(v1.rbegin(), v1.rend()); + std::inner_product(v1.begin(), v1.end(), v2.rbegin(), 1); + std::sort(v1.rbegin(), v1.rend()); + std::sort(v2.rbegin(), v2.rend()); + std::set_intersection(v1.begin(), v1.end(), v2.rbegin(), v2.rend(), v1.begin()); + Timer timer3; + stat.AddRoundResult( proc_key, timer2.diff_time(timer3)*1000.0 ); + stat.AddRoundResult( fill_key, timer1.diff_time(timer2)*1000.0 ); + stat.AddRoundResult( init_key, timer0.diff_time(timer1)*1000.0 ); + stat.AddRoundResult( full_key, timer0.diff_time(timer3)*1000.0 ); + } + stat.SetStatisticFormula("1Average", "=AVERAGE(ROUNDS)"); + stat.SetStatisticFormula("2+/-", "=(MAX(ROUNDS)-MIN(ROUNDS))/2"); + return *this; + } + + class values + { + size_t value; + public: + values(size_t i) : value(i) {} + size_t operator()() { + return value++%(1|(value^55)); + } + }; +}; + +/************************************************************************/ +/* TEST3 */ +/************************************************************************/ +#define mk_vector_test3(v, a) vector_test3<v<char, local_counting_allocator<a<char>, size_t > > > +template<class vector_t> +class vector_test3 { + const char *mode; + StatisticsCollector &stat; + +public: + vector_test3(const char *m, StatisticsCollector &s) : mode(m), stat(s) {} + + vector_test3 &operator()(size_t len) { + if(Verbose) printf("test3<%s>(%u): collecting allocator statistics\n", mode, unsigned(len)); + static const size_t sz = 1024; + vector_t V[sz]; + StatisticsCollector::TestCase vinst_key = stat.SetTestCase("instances number", mode, len); + StatisticsCollector::TestCase count_key = stat.SetTestCase("allocations count", mode, len); + StatisticsCollector::TestCase items_key = stat.SetTestCase("allocated items", mode, len); + //stat.ReserveRounds(sz-1); + for (size_t c = 0, i = 0, s = sz/2; s >= 1 && i < sz; s /= 2, c++) + { + const size_t count = c? 1<<(c-1) : 0; + for (size_t e = i+s; i < e; i++) { + //if(count >= 16) V[i].reserve(count); + for (size_t j = 0; j < count; j++) + V[i].push_back(j); + } + stat.SetRoundTitle ( c, count ); + stat.AddRoundResult( vinst_key, s ); + stat.AddRoundResult( count_key, V[i-1].get_allocator().allocations ); + stat.AddRoundResult( items_key, V[i-1].get_allocator().items_allocated ); + } + return *this; + } +}; + +/************************************************************************/ +/* TYPES SET FOR TESTS */ +/************************************************************************/ +#define types_set(n, title, op) { StatisticsCollector Collector("time_vector"#n); Collector.SetTitle title; \ + {mk_vector_test##n(tbb::concurrent_vector, tbb::cache_aligned_allocator) ("TBB:NFS", Collector)op;} \ + {mk_vector_test##n(tbb::concurrent_vector, tbb::tbb_allocator) ("TBB:TBB", Collector)op;} \ + {mk_vector_test##n(tbb::concurrent_vector, std::allocator) ("TBB:STD", Collector)op;} \ + {mk_vector_test##n(std::vector, tbb::cache_aligned_allocator) ("STL:NFS", Collector)op;} \ + {mk_vector_test##n(std::vector, tbb::tbb_allocator) ("STL:TBB", Collector)op;} \ + {mk_vector_test##n(std::vector, std::allocator) ("STL:STD", Collector)op;} \ + Collector.Print(StatisticsCollector::Stdout|StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); } + + +/************************************************************************/ +/* MAIN DRIVER */ +/************************************************************************/ +int main(int argc, char* argv[]) { + if(argc>1) Verbose = true; + if(argc>2) ExtraVerbose = true; + MinThread = 0; MaxThread = 500000; // use in another meaning - test#:problem size + ParseCommandLine( argc, argv ); + + ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded"); + + if(!MinThread || MinThread == 1) + types_set(1, ("Vectors performance test #1 for %d", MaxThread), (MaxThread) ) + if(!MinThread || MinThread == 2) + types_set(2, ("Vectors performance test #2 for %d", MaxThread), (MaxThread) ) + if(!MinThread || MinThread == 3) + types_set(3, ("Vectors performance test #3 for %d", MaxThread), (MaxThread) ) + + if(!Verbose) printf("done\n"); + return 0; +} + diff --git a/dep/tbb/src/rml/perfor/omp_nested.cpp b/dep/tbb/src/rml/perfor/omp_nested.cpp new file mode 100644 index 000000000..b63358cd3 --- /dev/null +++ b/dep/tbb/src/rml/perfor/omp_nested.cpp @@ -0,0 +1,135 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include <cstddef> +#include <cstdlib> +#include <cstdio> +#include <float.h> +#include <math.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> + +#include <omp.h> +#include <assert.h> + +#include "thread_level.h" + +using namespace std; +using namespace tbb; + +// Algorithm parameters +const int Max_OMP_Outer_Threads = 16; +const int Max_OMP_Inner_Threads = 16; + +// Global variables +int max_outer_threads = Max_OMP_Outer_Threads; +int max_inner_threads = Max_OMP_Inner_Threads; + +// Print help on command-line arguments +void help_message(char *prog_name) { + fprintf(stderr, "\n%s usage:\n", prog_name); + fprintf(stderr, + " Parameters:\n" + " -o<num> : max # of threads OMP should use at outer level\n" + " -i<num> : max # of threads OMP should use at inner level\n" + "\n Help:\n" + " -h : print this help message\n"); +} + +// Process command-line arguments +void process_args(int argc, char *argv[], int *max_outer_t, int *max_inner_t) { + for (int i=1; i<argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'i': // set max_inner_threads + if (sscanf(&argv[i][2], "%d", max_inner_t) != 1 || *max_inner_t < 1) { + fprintf(stderr, "%s Warning: argument of -i option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'o': // set max_outer_threads + if (sscanf(&argv[i][2], "%d", max_outer_t) != 1 || *max_outer_t < 1) { + fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'h': // print help message + help_message(argv[0]); + exit(0); + break; + default: + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + break; + } + } else { + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + } + } +} + +int main(int argc, char *argv[]) { + process_args(argc, argv, &max_outer_threads, &max_inner_threads); + TotalThreadLevel.init(); + + double start, end; + start = omp_get_wtime( ); + +#pragma omp parallel num_threads(max_outer_threads) + { + int omp_thread = omp_get_thread_num(); + if (omp_thread == 0) + TotalThreadLevel.change_level(omp_get_num_threads(), omp_outer); + if (omp_thread == 0) { + sleep(3); + TotalThreadLevel.change_level(-1, omp_outer); +#pragma omp parallel num_threads(max_inner_threads) + { + int my_omp_thread = omp_get_thread_num(); + if (my_omp_thread == 0) + TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner); + printf("Inner thread %d nested inside outer thread %d\n", my_omp_thread, omp_thread); + if (my_omp_thread == 0) + TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner); + } + TotalThreadLevel.change_level(1, omp_outer); + } + else { + sleep(6); + } + if (omp_thread == 0) + TotalThreadLevel.change_level(-omp_get_num_threads(), omp_outer); + } + end = omp_get_wtime( ); + printf("Simple test of nested OMP (%d outer threads max, %d inner threads max) took: %6.6f\n", + max_outer_threads, max_inner_threads, end-start); + TotalThreadLevel.dump(); + return 0; +} diff --git a/dep/tbb/src/rml/perfor/omp_simple.cpp b/dep/tbb/src/rml/perfor/omp_simple.cpp new file mode 100644 index 000000000..34367780f --- /dev/null +++ b/dep/tbb/src/rml/perfor/omp_simple.cpp @@ -0,0 +1,159 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include <cstddef> +#include <cstdlib> +#include <cstdio> +#include <float.h> +#include <math.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> + +#include <omp.h> +#include <assert.h> + +#include "thread_level.h" +#define LOG_THREADS + +#include "tbb/task.h" +#include "tbb/tick_count.h" +#include "tbb/task_scheduler_init.h" +#include "tbb/scalable_allocator.h" +#include "tbb/parallel_for.h" +#include "tbb/blocked_range.h" + +using namespace std; +using namespace tbb; + +// Algorithm parameters +const int Max_TBB_Threads = 16; +const int Max_OMP_Threads = 16; + +// Global variables +int max_tbb_threads = Max_TBB_Threads; +int max_omp_threads = Max_OMP_Threads; + +// Print help on command-line arguments +void help_message(char *prog_name) { + fprintf(stderr, "\n%s usage:\n", prog_name); + fprintf(stderr, + " Parameters:\n" + " -t<num> : max # of threads TBB should use\n" + " -o<num> : max # of threads OMP should use\n" + "\n Help:\n" + " -h : print this help message\n"); +} + +// Process command-line arguments +void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) { + for (int i=1; i<argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 't': // set max_tbb_threads + if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) { + fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'o': // set max_omp_threads + if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) { + fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'h': // print help message + help_message(argv[0]); + exit(0); + break; + default: + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + break; + } + } else { + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + } + } +} + +int main(int argc, char *argv[]) { + process_args(argc, argv, &max_tbb_threads, &max_omp_threads); + TotalThreadLevel.init(); + + tick_count start, end; + start = tick_count::now(); + +#pragma omp parallel num_threads(max_omp_threads) + { + int omp_thread = omp_get_thread_num(); +#ifdef LOG_THREADS + if (omp_thread == 0) + TotalThreadLevel.change_level(omp_get_num_threads(), omp_outer); +#endif + task_scheduler_init phase(max_tbb_threads); + if (omp_thread == 0) { + sleep(3); +#ifdef LOG_THREADS + TotalThreadLevel.change_level(-1, omp_outer); +#endif + parallel_for(blocked_range<size_t>(0, 1000), + [=](const blocked_range<size_t>& range) { +#ifdef LOG_THREADS + TotalThreadLevel.change_level(1, tbb_inner); +#endif +#pragma ivdep + for (size_t i=range.begin(); i!=range.end(); ++i) { + if (i==range.begin()) + printf("TBB range starting at %d on OMP thread %d\n", (int)i, omp_thread); + } +#ifdef LOG_THREADS + TotalThreadLevel.change_level(-1, tbb_inner); +#endif + }, auto_partitioner()); +#ifdef LOG_THREADS + TotalThreadLevel.change_level(1, omp_outer); +#endif + } + else { + sleep(6); + } +#ifdef LOG_THREADS + if (omp_thread == 0) + TotalThreadLevel.change_level(-omp_get_num_threads(), omp_outer); +#endif + } + end = tick_count::now(); + printf("Simple test of OMP (%d threads max) with TBB (%d threads max) inside took: %6.6f\n", + max_omp_threads, max_tbb_threads, (end-start).seconds()); +#ifdef LOG_THREADS + TotalThreadLevel.dump(); +#endif + return 0; +} diff --git a/dep/tbb/src/rml/perfor/tbb_multi_omp.cpp b/dep/tbb/src/rml/perfor/tbb_multi_omp.cpp new file mode 100644 index 000000000..c3432f2c9 --- /dev/null +++ b/dep/tbb/src/rml/perfor/tbb_multi_omp.cpp @@ -0,0 +1,168 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include <cstddef> +#include <cstdlib> +#include <cstdio> +#include <float.h> +#include <math.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> + +#include <omp.h> +#include <assert.h> + +#include "thread_level.h" + +#include "tbb/task.h" +#include "tbb/tick_count.h" +#include "tbb/task_scheduler_init.h" +#include "tbb/scalable_allocator.h" + +using namespace std; +using namespace tbb; + +// Algorithm parameters +const int Max_TBB_Threads = 16; +const int Max_OMP_Threads = 16; + +// Global variables +int max_tbb_threads = Max_TBB_Threads; +int max_omp_threads = Max_OMP_Threads; + +// Print help on command-line arguments +void help_message(char *prog_name) { + fprintf(stderr, "\n%s usage:\n", prog_name); + fprintf(stderr, + " Parameters:\n" + " -t<num> : max # of threads TBB should use\n" + " -o<num> : max # of threads OMP should use\n" + "\n Help:\n" + " -h : print this help message\n"); +} + +// Process command-line arguments +void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) { + for (int i=1; i<argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 't': // set max_tbb_threads + if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) { + fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'o': // set max_omp_threads + if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) { + fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'h': // print help message + help_message(argv[0]); + exit(0); + break; + default: + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + break; + } + } else { + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + } + } +} + +class SimpleTask : public task { + bool isLeaf; + int myId; +public: + SimpleTask(bool isLeaf_, int myId_) : isLeaf(isLeaf_), myId(myId_) {} + task* execute() { + TotalThreadLevel.change_level(1, tbb_outer); + omp_set_num_threads(max_omp_threads); + if (!isLeaf) { + set_ref_count(65); + for (int i=0; i<64; ++i) { + SimpleTask& st = *new(allocate_child()) SimpleTask(true, i); + spawn(st); + } + TotalThreadLevel.change_level(-1, tbb_outer); + wait_for_all(); + TotalThreadLevel.change_level(1, tbb_outer); + } + else { + if (myId%2 == 0) { + sleep(3); + TotalThreadLevel.change_level(-1, tbb_outer); +#pragma omp parallel + { + if (omp_get_thread_num() == 0) { + TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner); + } + printf("In OMP parallel region on TBB task with myId=0: thread %d of %d\n", + omp_get_thread_num(), omp_get_num_threads()); + if (omp_get_thread_num() == 0) { + TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner); + } + } + TotalThreadLevel.change_level(1, tbb_outer); + } + else { + sleep(6); + } + } + TotalThreadLevel.change_level(-1, tbb_outer); + return NULL; + } +}; + + +int main(int argc, char *argv[]) { + TotalThreadLevel.init(); + int dbg=0; + TotalThreadLevel.change_level(1, tbb_outer); + process_args(argc, argv, &max_tbb_threads, &max_omp_threads); + + task_scheduler_init phase(max_tbb_threads); + tick_count start, end; + start = tick_count::now(); + SimpleTask& st = *new(task::allocate_root()) SimpleTask(false, -1); + TotalThreadLevel.change_level(-1, tbb_outer); + task::spawn_root_and_wait(st); + TotalThreadLevel.change_level(1, tbb_outer); + end = tick_count::now(); + printf("Simple Test of TBB (%d threads max) with OMP (%d threads max) inside took: %6.6f\n", + max_tbb_threads, max_omp_threads, (end-start).seconds()); + + TotalThreadLevel.change_level(-1, tbb_outer); + TotalThreadLevel.dump(); + return 0; +} diff --git a/dep/tbb/src/rml/perfor/tbb_simple.cpp b/dep/tbb/src/rml/perfor/tbb_simple.cpp new file mode 100644 index 000000000..a72ed0db2 --- /dev/null +++ b/dep/tbb/src/rml/perfor/tbb_simple.cpp @@ -0,0 +1,167 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include <cstddef> +#include <cstdlib> +#include <cstdio> +#include <float.h> +#include <math.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> + +#include <omp.h> +#include <assert.h> + +#include "thread_level.h" + +#include "tbb/task.h" +#include "tbb/tick_count.h" +#include "tbb/task_scheduler_init.h" +#include "tbb/scalable_allocator.h" + +using namespace std; +using namespace tbb; + +// Algorithm parameters +const int Max_TBB_Threads = 16; +const int Max_OMP_Threads = 16; + +// Global variables +int max_tbb_threads = Max_TBB_Threads; +int max_omp_threads = Max_OMP_Threads; + +// Print help on command-line arguments +void help_message(char *prog_name) { + fprintf(stderr, "\n%s usage:\n", prog_name); + fprintf(stderr, + " Parameters:\n" + " -t<num> : max # of threads TBB should use\n" + " -o<num> : max # of threads OMP should use\n" + "\n Help:\n" + " -h : print this help message\n"); +} + +// Process command-line arguments +void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) { + for (int i=1; i<argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 't': // set max_tbb_threads + if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) { + fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'o': // set max_omp_threads + if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) { + fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]); + help_message(argv[0]); + } + break; + case 'h': // print help message + help_message(argv[0]); + exit(0); + break; + default: + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + break; + } + } else { + fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]); + help_message(argv[0]); + } + } +} + +class SimpleTask : public task { + bool isLeaf; + int myId; +public: + SimpleTask(bool isLeaf_, int myId_) : isLeaf(isLeaf_), myId(myId_) {} + task* execute() { + TotalThreadLevel.change_level(1, tbb_outer); + omp_set_num_threads(max_omp_threads); + if (!isLeaf) { + set_ref_count(17); + for (int i=0; i<16; ++i) { + SimpleTask& st = *new(allocate_child()) SimpleTask(true, i); + spawn(st); + } + TotalThreadLevel.change_level(-1, tbb_outer); + wait_for_all(); + TotalThreadLevel.change_level(1, tbb_outer); + } + else { + if (myId == 0) { + sleep(3); + TotalThreadLevel.change_level(-1, tbb_outer); +#pragma omp parallel + { + if (omp_get_thread_num() == 0) { + TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner); + } + printf("In OMP parallel region on TBB task with myId=0: thread %d of %d\n", + omp_get_thread_num(), omp_get_num_threads()); + if (omp_get_thread_num() == 0) { + TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner); + } + } + TotalThreadLevel.change_level(1, tbb_outer); + } + else { + sleep(6); + } + } + TotalThreadLevel.change_level(-1, tbb_outer); + return NULL; + } +}; + + +int main(int argc, char *argv[]) { + TotalThreadLevel.init(); + TotalThreadLevel.change_level(1, tbb_outer); + process_args(argc, argv, &max_tbb_threads, &max_omp_threads); + + task_scheduler_init phase(max_tbb_threads); + tick_count start, end; + start = tick_count::now(); + SimpleTask& st = *new(task::allocate_root()) SimpleTask(false, -1); + TotalThreadLevel.change_level(-1, tbb_outer); + task::spawn_root_and_wait(st); + TotalThreadLevel.change_level(1, tbb_outer); + end = tick_count::now(); + printf("Simple Test of TBB (%d threads max) with OMP (%d threads max) inside took: %6.6f\n", + max_tbb_threads, max_omp_threads, (end-start).seconds()); + + TotalThreadLevel.change_level(-1, tbb_outer); + TotalThreadLevel.dump(); + return 0; +} diff --git a/dep/tbb/src/rml/perfor/thread_level.h b/dep/tbb/src/rml/perfor/thread_level.h new file mode 100644 index 000000000..a73afa81a --- /dev/null +++ b/dep/tbb/src/rml/perfor/thread_level.h @@ -0,0 +1,140 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +// Thread level recorder +#ifndef __THREAD_LEVEL_H +#define __THREAD_LEVEL_H +#include <cstdio> +#include <omp.h> +#include "tbb/atomic.h" +#include "tbb/tick_count.h" +#include "../src/test/harness.h" + +//#define LOG_THREADS // use this to ifdef out calls to this class + +using namespace tbb; + +typedef enum {tbb_outer, tbb_inner, omp_outer, omp_inner} client_t; + +class ThreadLevelRecorder { + tbb::atomic<int> tbb_outer_level; + tbb::atomic<int> tbb_inner_level; + tbb::atomic<int> omp_outer_level; + tbb::atomic<int> omp_inner_level; + struct record { + tbb::tick_count time; + int n_tbb_outer_thread; + int n_tbb_inner_thread; + int n_omp_outer_thread; + int n_omp_inner_thread; + }; + tbb::atomic<unsigned> next; + /** Must be power of two */ + static const unsigned max_record_count = 1<<20; + record array[max_record_count]; + int max_threads; + bool fail; + public: + void change_level(int delta, client_t whichClient); + void dump(); + void init(); +}; + +void ThreadLevelRecorder::change_level(int delta, client_t whichClient) { + int tox=tbb_outer_level, tix=tbb_inner_level, oox=omp_outer_level, oix=omp_inner_level; + if (whichClient == tbb_outer) { + tox = tbb_outer_level+=delta; + } else if (whichClient == tbb_inner) { + tix = tbb_inner_level+=delta; + } else if (whichClient == omp_outer) { + oox = omp_outer_level+=delta; + } else if (whichClient == omp_inner) { + oix = omp_inner_level+=delta; + } else { + printf("WARNING: Bad client type; ignoring.\n"); + return; + } + // log non-negative entries + tbb::tick_count t = tbb::tick_count::now(); + unsigned k = next++; + if (k<max_record_count) { + record& r = array[k]; + r.time = t; + r.n_tbb_outer_thread = tox>=0?tox:0; + r.n_omp_outer_thread = oox>=0?oox:0; + r.n_tbb_inner_thread = tix>=0?tix:0; + r.n_omp_inner_thread = oix>=0?oix:0; + } + char errStr[100]; + int tot_threads; + tot_threads = tox+tix+oox+oix; + sprintf(errStr, "ERROR: Number of threads (%d+%d+%d+%d=%d) in use exceeds maximum (%d).\n", + tox, tix, oox, oix, tot_threads, max_threads); + if (tot_threads > max_threads) { +#ifdef NO_BAIL_OUT + if (!fail) { + printf("%sContinuing...\n", errStr); + fail = true; + } +#else + dump(); + ASSERT(tot_threads <= max_threads, errStr); +#endif + } +} + +void ThreadLevelRecorder::dump() { + FILE* f = fopen("time.txt","w"); + if (!f) { + perror("fopen(time.txt)\n"); + exit(1); + } + unsigned limit = next; + if (limit>max_record_count) { // Clip + limit = max_record_count; + } + for (unsigned i=0; i<limit; ++i) { + fprintf(f,"%f\t%d\t%d\t%d\t%d\n",(array[i].time-array[0].time).seconds(), array[i].n_tbb_outer_thread, + array[i].n_tbb_inner_thread, array[i].n_omp_outer_thread, array[i].n_omp_inner_thread); + } + fclose(f); + int tox=tbb_outer_level, tix=tbb_inner_level, oox=omp_outer_level, oix=omp_inner_level; + int tot_threads; + tot_threads = tox+tix+oox+oix; + if (!fail) printf("INFO: Passed.\n"); + else printf("INFO: Failed.\n"); +} + +void ThreadLevelRecorder::init() { + fail = false; + max_threads = omp_get_max_threads(); + printf("INFO: Getting maximum hardware threads... %d.\n", max_threads); +} + +ThreadLevelRecorder TotalThreadLevel; +#endif diff --git a/dep/tbb/src/rml/test/rml_omp_stub.cpp b/dep/tbb/src/rml/test/rml_omp_stub.cpp new file mode 100644 index 000000000..d9d6ba4c1 --- /dev/null +++ b/dep/tbb/src/rml/test/rml_omp_stub.cpp @@ -0,0 +1,66 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +// This file is compiled with C++, but linked with a program written in C. +// The intent is to find dependencies on the C++ run-time. + +#include <stdlib.h> +#define RML_PURE_VIRTUAL_HANDLER abort + +#if _MSC_VER==1500 && !defined(__INTEL_COMPILER) +// VS2008/VC9 seems to have an issue; +#pragma warning( push ) +#pragma warning( disable: 4100 ) +#endif +#include "rml_omp.h" +#if _MSC_VER==1500 && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif + +rml::versioned_object::version_type Version; + +class MyClient: public __kmp::rml::omp_client { +public: + /*override*/rml::versioned_object::version_type version() const {return 0;} + /*override*/size_type max_job_count() const {return 1024;} + /*override*/size_t min_stack_size() const {return 1<<20;} + /*override*/rml::job* create_one_job() {return NULL;} + /*override*/void acknowledge_close_connection() {} + /*override*/void cleanup(job&) {} + /*override*/policy_type policy() const {return throughput;} + /*override*/void process( job&, void*, __kmp::rml::omp_client::size_type ) {} + +}; + +//! Never actually set, because point of test is to find linkage issues. +__kmp::rml::omp_server* MyServerPtr; + +extern "C" void Cplusplus() { + MyClient client; + Version = client.version(); +} diff --git a/dep/tbb/src/rml/test/test_job_automaton.cpp b/dep/tbb/src/rml/test/test_job_automaton.cpp new file mode 100644 index 000000000..29fd7928f --- /dev/null +++ b/dep/tbb/src/rml/test/test_job_automaton.cpp @@ -0,0 +1,154 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "job_automaton.h" +#define HARNESS_NO_PARSE_COMMAND_LINE 1 +#include "harness.h" +#include "harness_barrier.h" + +class State { + Harness::SpinBarrier barrier; + rml::internal::job_automaton ja; + rml::job job; + tbb::atomic<int> job_created; + tbb::atomic<int> job_destroyed; + tbb::atomic<bool> job_received; +public: + State() : barrier(2) { + job_created = 0; + job_destroyed = 0; + job_received = false; + } + void exercise( bool is_owner ); + ~State() { + ASSERT( job_created==job_destroyed, "accounting error" ); + ASSERT( job_destroyed<=1, "destroyed job twice" ); + } +}; + +int DelayMask; +const int N = 14; +tbb::atomic<int> Coverage[N]; + +//! Mark kth interval as covered and insert delay if kth bit of DelayMask is set. +/** An interval is the code between two operations on the job_automaton that we are testing. */ +void Cover( int k ) { + ASSERT( k<N, NULL ); + ++Coverage[k]; + if( DelayMask>>k&1 ) { + // Introduce delay (and possibly a thread context switch) + __TBB_Yield(); + } +} + +void State::exercise( bool is_owner ) { + barrier.wait(); + if( is_owner ) { + Cover(0); + if( ja.try_acquire() ) { + Cover(1); + ++job_created; + ja.set_and_release(job); + Cover(2); + if( ja.try_acquire() ) { + Cover(3); + ja.release(); + Cover(4); + if( ja.try_acquire() ) { + Cover(5); + ja.release(); + } + } + Cover(6); + } else { + Cover(7); + } + if( DelayMask&1<<N ) { + while( !job_received ) + __TBB_Yield(); + } + } else { + // Using extra bit of DelayMask for choosing whether to run wait_for_job or not. + if( DelayMask&1<<N ) { + rml::job* j= &ja.wait_for_job(); + if( j!=&job ) printf("%p\n",j); + ASSERT( j==&job, NULL ); + job_received = true; + } + Cover(8); + } + rml::job* j; + if( ja.try_plug(j) ) { + ASSERT( j==&job || !j, NULL ); + if( j ) { + Cover(9+is_owner); + ++job_destroyed; + } else { + __TBB_ASSERT( !is_owner, "owner failed to create job but plugged self" ); + Cover(11); + } + } else { + Cover(12+is_owner); + } +} + +class Loop: NoAssign { + State& s; +public: + Loop(State& s_) : s(s_) {} + void operator()( int i ) const {s.exercise(i==0);} +}; + +/** Return true if coverage is acceptable. + If report==true, issue message if it is unacceptable. */ +bool CheckCoverage( bool report ) { + bool okay = true; + for( int i=0; i<N; ++i ) { + const int min_coverage = 4; + if( Coverage[i]<min_coverage ) { + okay = false; + if( report ) + printf("Warning: Coverage[%d]=%d is less than acceptable minimum of %d\n", i, int(Coverage[i]),min_coverage); + } + } + return okay; +} + +int main() { + for( DelayMask=0; DelayMask<8<<N; ++DelayMask ) { + State s; + NativeParallelFor( 2, Loop(s) ); + if( CheckCoverage(false) ) { + // Reached acceptable code coverage level + break; + } + } + CheckCoverage(true); + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/rml/test/test_rml_mixed.cpp b/dep/tbb/src/rml/test/test_rml_mixed.cpp new file mode 100644 index 000000000..b70d914ac --- /dev/null +++ b/dep/tbb/src/rml/test/test_rml_mixed.cpp @@ -0,0 +1,247 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "rml_tbb.h" +#include "rml_omp.h" +#include "tbb/atomic.h" +#include "tbb/tick_count.h" +#include "harness.h" + +const int OMP_ParallelRegionSize = 16; +int TBB_MaxThread = 4; // Includes master +int OMP_MaxThread = int(~0u>>1); // Includes master + +template<typename Client> +class ClientBase: public Client { +protected: + typedef typename Client::version_type version_type; + typedef typename Client::job job; + typedef typename Client::policy_type policy_type; + +private: + /*override*/version_type version() const { + return 0; + } + /*override*/size_t min_stack_size() const { + return 1<<20; + } + /*override*/job* create_one_job() { + return new rml::job; + } + /*override*/policy_type policy() const { + return Client::turnaround; + } + /*override*/void acknowledge_close_connection() { + delete this; + } + /*override*/void cleanup( job& j ) {delete &j;} +}; + +//! Represents a TBB or OpenMP run-time that uses RML. +template<typename Factory, typename Client> +class RunTime { +public: + //! Factory that run-time uses to make servers. + Factory factory; + Client* client; + typename Factory::server_type* server; + RunTime() { + factory.open(); + } + ~RunTime() { + factory.close(); + } + //! Create server for this run-time + void create_connection(); + + //! Destroy server for this run-time + void destroy_connection(); +}; + +class ThreadLevelRecorder { + tbb::atomic<int> level; + struct record { + tbb::tick_count time; + int nthread; + }; + tbb::atomic<unsigned> next; + /** Must be power of two */ + static const unsigned max_record_count = 1<<20; + record array[max_record_count]; +public: + void change_level( int delta ); + void dump(); +}; + +void ThreadLevelRecorder::change_level( int delta ) { + int x = level+=delta; + tbb::tick_count t = tbb::tick_count::now(); + unsigned k = next++; + if( k<max_record_count ) { + record& r = array[k]; + r.time = t; + r.nthread = x; + } +} + +void ThreadLevelRecorder::dump() { + FILE* f = fopen("time.txt","w"); + if( !f ) { + perror("fopen(time.txt)\n"); + exit(1); + } + unsigned limit = next; + if( limit>max_record_count ) { + // Clip + limit = next; + } + for( unsigned i=0; i<limit; ++i ) { + fprintf(f,"%f\t%d\n",(array[i].time-array[0].time).seconds(),array[i].nthread); + } + fclose(f); +} + +ThreadLevelRecorder TotalThreadLevel; + +class TBB_Client: public ClientBase<tbb::internal::rml::tbb_client> { + /*override*/void process( job& j ); + /*override*/size_type max_job_count() const { + return TBB_MaxThread-1; + } +}; + +class OMP_Client: public ClientBase<__kmp::rml::omp_client> { + /*override*/void process( job&, void* cookie, omp_client::size_type ); + /*override*/size_type max_job_count() const { + return OMP_MaxThread-1; + } +}; + +RunTime<tbb::internal::rml::tbb_factory, TBB_Client> TBB_RunTime; +RunTime<__kmp::rml::omp_factory, OMP_Client> OMP_RunTime; + +template<typename Factory, typename Client> +void RunTime<Factory,Client>::create_connection() { + client = new Client; + typename Factory::status_type status = factory.make_server( server, *client ); + ASSERT( status==Factory::st_success, NULL ); +} + +template<typename Factory, typename Client> +void RunTime<Factory,Client>::destroy_connection() { + server->request_close_connection(); + server = NULL; +} + +class OMP_Team { +public: + OMP_Team( __kmp::rml::omp_server& ) {} + tbb::atomic<unsigned> barrier; +}; + +tbb::atomic<int> AvailWork; +tbb::atomic<int> CompletionCount; + +void OMPWork() { + tbb::atomic<int> x; + for( x=0; x<2000000; ++x ) { + continue; + } +} + +void TBBWork() { + if( AvailWork>=0 ) { + int k = --AvailWork; + if( k==-1 ) { + TBB_RunTime.server->adjust_job_count_estimate(-(TBB_MaxThread-1)); + ++CompletionCount; + } else if( k>=0 ) { + for( int k=0; k<4; ++k ) { + OMP_Team team( *OMP_RunTime.server ); + int n = OMP_RunTime.server->try_increase_load( OMP_ParallelRegionSize-1, /*strict=*/false ); + team.barrier = 0; + ::rml::job* array[OMP_ParallelRegionSize-1]; + if( n>0) + OMP_RunTime.server->get_threads( n, &team, array ); + // Master does work inside parallel region too. + OMPWork(); + // Master waits for workers to finish + if( n>0 ) + while( team.barrier!=unsigned(n) ) { + __TBB_Yield(); + } + } + ++CompletionCount; + } + } +} + +/*override*/void TBB_Client::process( job& ) { + TotalThreadLevel.change_level(1); + TBBWork(); + TotalThreadLevel.change_level(-1); +} + +/*override*/void OMP_Client::process( job& /* j */, void* cookie, omp_client::size_type ) { + TotalThreadLevel.change_level(1); + ASSERT( OMP_RunTime.server, NULL ); + OMPWork(); + ASSERT( OMP_RunTime.server, NULL ); + static_cast<OMP_Team*>(cookie)->barrier+=1; + TotalThreadLevel.change_level(-1); +} + +void TBBOutSideOpenMPInside() { + TotalThreadLevel.change_level(1); + CompletionCount = 0; + int tbbtasks = 32; + AvailWork = tbbtasks; + TBB_RunTime.server->adjust_job_count_estimate(TBB_MaxThread-1); + while( CompletionCount!=tbbtasks+1 ) { + TBBWork(); + } + TotalThreadLevel.change_level(-1); +} + +int main( int argc, char* argv[] ) { + // Set defaults + MinThread = 4; + MaxThread = 4; + ParseCommandLine(argc,argv); + for( int TBB_MaxThread=MinThread; TBB_MaxThread<=MaxThread; ++TBB_MaxThread ) { + if( Verbose ) printf("Testing with TBB_MaxThread=%d\n", TBB_MaxThread); + TBB_RunTime.create_connection(); + OMP_RunTime.create_connection(); + TBBOutSideOpenMPInside(); + OMP_RunTime.destroy_connection(); + TBB_RunTime.destroy_connection(); + } + TotalThreadLevel.dump(); + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/rml/test/test_rml_omp.cpp b/dep/tbb/src/rml/test/test_rml_omp.cpp new file mode 100644 index 000000000..fedf851aa --- /dev/null +++ b/dep/tbb/src/rml/test/test_rml_omp.cpp @@ -0,0 +1,173 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "rml_omp.h" +#include "test_server.h" +#include "tbb/tbb_misc.h" + +typedef __kmp::rml::omp_server MyServer; +typedef __kmp::rml::omp_factory MyFactory; + +static bool StrictTeam; + +class MyTeam { + MyTeam& operator=( const MyTeam& ) ; +public: + struct info_type { + rml::job* job; + bool ran; + info_type() : job(NULL), ran(false) {} + }; + MyTeam( MyServer& /* server */, size_t max_thread_ ) : + max_thread(max_thread_) + { + self_ptr = this; + info = new info_type[max_thread]; + } + ~MyTeam() { + delete[] info; + } + const size_t max_thread; + size_t n_thread; + tbb::atomic<int> barrier; + /** Indexed with 1-origin index */ + info_type* info; + int iteration; + MyTeam* self_ptr; +}; + +class MyClient: public ClientBase<__kmp::rml::omp_client> { +public: + MyServer* server; + /*override*/void process( job& j, void* cookie, size_type index ) { + MyTeam& t = *static_cast<MyTeam*>(cookie); + ASSERT( t.self_ptr==&t, "trashed cookie" ); + ASSERT( index<t.max_thread, NULL ); + ASSERT( !t.info[index].ran, "duplicate index?" ); + t.info[index].job = &j; + t.info[index].ran = true; + do_process(j); + if( index==1 && nesting.level<nesting.limit ) { + DoOneConnection<MyFactory,MyClient> doc(MaxThread,Nesting(nesting.level+1,nesting.limit),0,false); + doc(0); + } + ++t.barrier; + } + static const bool is_omp = true; + bool is_strict() const {return StrictTeam;} +}; + +void FireUpJobs( MyServer& server, MyClient& client, int max_thread, int n_extra, Checker* checker ) { + ASSERT( max_thread>=0, NULL ); + client.server = &server; + MyTeam team(server,size_t(max_thread)); + MyServer::size_type n_thread = 0; + for( int iteration=0; iteration<4; ++iteration ) { + for( size_t i=0; i<team.max_thread; ++i ) + team.info[i].ran = false; + switch( iteration ) { + default: + n_thread = int(max_thread); + break; + case 1: + // No change in number of threads + break; + case 2: + // Decrease number of threads. + n_thread = int(max_thread)/2; + break; + // Case 3 is same code as the default, but has effect of increasing the number of threads. + } + team.barrier = 0; + if( Verbose ) { + printf("client %d: server.run with n_thread=%d\n", client.client_id(), int(n_thread) ); + } + server.independent_thread_number_changed( n_extra ); + if( checker ) { + // Give RML time to respond to change in number of threads. + MilliSleep(1); + } + int n_delivered = server.try_increase_load( n_thread, StrictTeam ); + team.n_thread = n_delivered; + ::rml::job* job_array[JobArraySize]; + job_array[n_delivered] = (::rml::job*)intptr_t(-1); + server.get_threads( n_delivered, &team, job_array ); + __TBB_ASSERT( job_array[n_delivered]== (::rml::job*)intptr_t(-1), NULL ); + for( int i=0; i<n_delivered; ++i ) { + MyJob* j = static_cast<MyJob*>(job_array[i]); + int s = j->state; + ASSERT( s==MyJob::idle||s==MyJob::busy, NULL ); + } + server.independent_thread_number_changed( -n_extra ); + if( Verbose ) { + printf("client %d: team size is %d\n", client.client_id(), n_delivered); + } + if( checker ) { + checker->check_number_of_threads_delivered( n_delivered, n_thread, n_extra ); + } + // Protocol requires that master wait until workers have called "done_processing" + while( team.barrier!=n_delivered ) { + ASSERT( team.barrier>=0, NULL ); + ASSERT( team.barrier<=n_delivered, NULL ); + __TBB_Yield(); + } + if( Verbose ) { + printf("client %d: team completed\n", client.client_id() ); + } + for( int i=0; i<n_delivered; ++i ) { + ASSERT( team.info[i].ran, "thread on team allegedly delivered, but did not run?" ); + } + for( MyServer::size_type i=n_delivered; i<MyServer::size_type(max_thread); ++i ) { + ASSERT( !team.info[i].ran, "thread on team ran with illegal index" ); + } + ASSERT( !StrictTeam || n_delivered==int(n_thread), "server failed to satisfy strict request" ); + } +} + +void DoClientSpecificVerification( MyServer& server, int /*n_thread*/ ) +{ + ASSERT( server.current_balance()==int(tbb::internal::DetectNumberOfWorkers())-1, NULL ); +} + +int main( int argc, char* argv[] ) { + // Set defaults + MinThread = 0; + MaxThread = 4; + ParseCommandLine(argc,argv); + + StrictTeam = true; + VerifyInitialization<MyFactory,MyClient>( MaxThread ); + SimpleTest<MyFactory,MyClient>(); + + StrictTeam = false; + VerifyInitialization<MyFactory,MyClient>( MaxThread ); + SimpleTest<MyFactory,MyClient>(); + + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/rml/test/test_rml_omp_c_linkage.c b/dep/tbb/src/rml/test/test_rml_omp_c_linkage.c new file mode 100644 index 000000000..e94790fdd --- /dev/null +++ b/dep/tbb/src/rml/test/test_rml_omp_c_linkage.c @@ -0,0 +1,37 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include <stdio.h> + +void Cplusplus(); + +int main() { + Cplusplus(); + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/rml/test/test_rml_tbb.cpp b/dep/tbb/src/rml/test/test_rml_tbb.cpp new file mode 100644 index 000000000..a3cd666ba --- /dev/null +++ b/dep/tbb/src/rml/test/test_rml_tbb.cpp @@ -0,0 +1,122 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "rml_tbb.h" +#include "test_server.h" + +typedef tbb::internal::rml::tbb_server MyServer; +typedef tbb::internal::rml::tbb_factory MyFactory; + +class MyClient: public ClientBase<tbb::internal::rml::tbb_client> { + tbb::atomic<int> counter; + /*override*/void process( job& j ) { + do_process(j); + } +public: + MyClient() {counter=1;} + static const bool is_omp = false; + bool is_strict() const {return false;} +}; + +void FireUpJobs( MyServer& server, MyClient& client, int n_thread, int n_extra, Checker* checker ) { + if( Verbose ) + printf("client %d: calling adjust_job_count_estimate(%d)\n", client.client_id(),n_thread); + // Exercise independent_thread_number_changed, even for zero values. + server.independent_thread_number_changed( n_extra ); + // Experiments indicate that when oversubscribing, the main thread should wait a little + // while for the RML worker threads to do some work. + int delay = n_thread>int(server.default_concurrency()) ? 50 : 1; + if( checker ) { + // Give RML time to respond to change in number of threads. + MilliSleep(delay); + for( int k=0; k<n_thread; ++k ) + client.job_array[k].processing_count = 0; + } + server.adjust_job_count_estimate( n_thread ); + int n_used = 0; + if( checker ) { + MilliSleep(delay); + for( int k=0; k<n_thread; ++k ) + if( client.job_array[k].processing_count ) + ++n_used; + } + // Logic further below presumes that jobs never starve, so undo previous call + // to independent_thread_number_changed before waiting on those jobs. + server.independent_thread_number_changed( -n_extra ); + if( Verbose ) + printf("client %d: wait for each job to be processed at least once\n",client.client_id()); + // Calculate the number of jobs that are expected to get threads. + // Typically this is equal to n_thread. But if nested, subtract 1 to account for the fact + // that this thread itself cannot process the job. + int expected = client.nesting.level==0 ? n_thread : n_thread-1; + // Wait for expected number of jobs to be processed. + if( client.nesting.level==0 ) { + for(;;) { + int n = 0; + for( int k=0; k<n_thread; ++k ) + if( client.job_array[k].processing_count!=0 ) + ++n; + if( n>=expected ) break; + server.yield(); + } + } else { + printf("testing of nested tbb execution is yet to be supported\n"); + } + server.adjust_job_count_estimate(-n_thread); + if( checker ) + checker->check_number_of_threads_delivered( n_used, n_thread, n_extra ); +} + +void DoClientSpecificVerification( MyServer&, int n_thread ) +{ + MyClient* client = new MyClient; + client->initialize( n_thread, Nesting(), ClientStackSize[0] ); + MyFactory factory; + memset( &factory, 0, sizeof(factory) ); + MyFactory::status_type status = factory.open(); + ASSERT( status!=MyFactory::st_not_found, "could not find RML library" ); + ASSERT( status!=MyFactory::st_incompatible, NULL ); + ASSERT( status==MyFactory::st_success, NULL ); + MyFactory::server_type* server; + status = factory.make_server( server, *client ); + ASSERT( status==MyFactory::st_connection_exists, "Did the first connection get lost?" ); + factory.close(); + client->update(MyClient::destroyed, MyClient::live); + delete client; +} + +int main( int argc, char* argv[] ) { + // Set defaults + MinThread = 0; + MaxThread = 4; + ParseCommandLine(argc,argv); + VerifyInitialization<MyFactory,MyClient>( MaxThread ); + SimpleTest<MyFactory,MyClient>(); + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/rml/test/test_server.h b/dep/tbb/src/rml/test/test_server.h new file mode 100644 index 000000000..65e07af9c --- /dev/null +++ b/dep/tbb/src/rml/test/test_server.h @@ -0,0 +1,398 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +/* This header contains code shared by test_omp_server.cpp and test_tbb_server.cpp + There is no ifndef guard - test is supposed to include this file exactly once. + The test is also exected to have #include of rml_omp.h or rml_tbb.h before + including this header. + + This header should not use any parts of TBB that require linking in the TBB run-time. + It uses a few instances of tbb::atomic<T>, all of which are completely inlined. */ + +#include "tbb/atomic.h" +#include "tbb/tbb_thread.h" +#include "harness.h" +#include "harness_memory.h" + +//! Define TRIVIAL as 1 to test only a single client, no nesting, no extra threads. +#define TRIVIAL 0 + +//! Maximum number of clients +#if TRIVIAL +const size_t MaxClient = 1; +#else +const size_t MaxClient = 4; +#endif + +const size_t ClientStackSize[MaxClient] = { + 1000000 +#if !TRIVIAL + ,2000000 + ,1000000 + ,4000000 +#endif /* TRIVIAL */ +}; + +const size_t OverheadStackSize = 500000; + +const size_t JobArraySize = 1000; + +#if _WIN32||_WIN64 +#include <Windows.h> /* Need Sleep */ +#else +#include <unistd.h> /* Need usleep */ +#endif + +void MilliSleep( unsigned milliseconds ) { +#if _WIN32||_WIN64 + Sleep( milliseconds ); +#else + usleep( milliseconds*1000 ); +#endif /* _WIN32||_WIN64 */ +} + +class MyJob: public ::rml::job { +public: + //! Enumeration for tracking states of a job. + enum state_t { + //! Job has not yet been allocated. + unallocated, + //! Is idle. + idle, + //! Has a thread working on it. + busy, + //! After call to client::cleanup + clean + }; + tbb::atomic<int> state; + volatile int processing_count; + void update( state_t new_state, state_t old_state ) { + int o = state.compare_and_swap(new_state,old_state); + ASSERT( o==old_state, "illegal transition" ); + } + void update_from_either( state_t new_state, state_t old_state1, state_t old_state2 ) { + int snapshot; + do { + snapshot = state; + ASSERT( snapshot==old_state1||snapshot==old_state2, "illegal transition" ); + } while( state.compare_and_swap(new_state,snapshot)!=snapshot ); + } + MyJob() { + state=unallocated; + processing_count=0; + } + ~MyJob() { + // Overwrite so that accidental use after destruction can be detected. + memset(this,-1,sizeof(*this)); + } +}; + +static tbb::atomic<int> ClientConstructions; +static tbb::atomic<int> ClientDestructions; + +struct Nesting { + int level; + int limit; + Nesting() : level(0), limit(0) {} + Nesting( int level_, int limit_ ) : level(level_), limit(limit_) {} +}; + +template<typename Client> +class ClientBase: public Client { +protected: + typedef typename Client::size_type size_type; + typedef typename Client::version_type version_type; + typedef typename Client::policy_type policy_type; + typedef typename Client::job job; +private: + size_type my_max_job_count; + size_t my_stack_size; + tbb::atomic<size_t> next_job_index; + int my_client_id; + rml::server* my_server; + +public: + enum state_t { + //! Treat *this as constructed. + live=0x1, + //! Treat *this as destroyed. + destroyed=0xDEAD + }; + + tbb::atomic<int> state; + void update( state_t new_state, state_t old_state ) { + int o = state.compare_and_swap(new_state,old_state); + ASSERT( o==old_state, NULL ); + } + + tbb::atomic<bool> expect_close_connection; + + MyJob *job_array; + + /*override*/version_type version() const { + ASSERT( state==live, NULL ); + return 1; + } + + /*override*/size_type max_job_count() const { + ASSERT( state==live, NULL ); + return my_max_job_count; + } + + /*override*/size_t min_stack_size() const { + ASSERT( state==live, NULL ); + return my_stack_size; + } + + /*override*/policy_type policy() const {return Client::throughput;} + + /*override*/void acknowledge_close_connection() { + ASSERT( expect_close_connection, NULL ); + for( size_t k=next_job_index; k>0; ) { + --k; + ASSERT( job_array[k].state==MyJob::clean, NULL ); + } + delete[] job_array; + job_array = NULL; + ASSERT( my_server, NULL ); + update( destroyed, live ); + delete this; + } + + /*override*/void cleanup( job& j_ ) { + if( Verbose ) + printf("client %d: cleanup(%p) called\n",client_id(),&j_); + ASSERT( state==live, NULL ); + MyJob& j = static_cast<MyJob&>(j_); + j.update(MyJob::clean,MyJob::idle); + if( Verbose ) + printf("client %d: cleanup(%p) returns\n",client_id(),&j_); + } + + job* create_one_job(); + +protected: + void do_process( job& j_ ) { + ASSERT( state==live, NULL ); + MyJob& j = static_cast<MyJob&>(j_); + ASSERT( &j, NULL ); + j.update(MyJob::busy,MyJob::idle); + ++j.processing_count; + ASSERT( my_stack_size>OverheadStackSize, NULL ); +#ifdef __ia64__ + // Half of the stack is reserved for RSE, so test only remaining half. + UseStackSpace( (my_stack_size-OverheadStackSize)/2 ); +#else + UseStackSpace( my_stack_size-OverheadStackSize ); +#endif + j.update(MyJob::idle,MyJob::busy); + my_server->yield(); + } +public: + ClientBase() : my_server(NULL) { + my_client_id = ClientConstructions++; + next_job_index = 0; + } + int client_id() const {return my_client_id;} + + Nesting nesting; + + void initialize( size_type max_job_count, Nesting nesting_, size_t stack_size ) { + ASSERT( stack_size>0, NULL ); + my_max_job_count = max_job_count; + nesting = nesting_; + my_stack_size = stack_size; + job_array = new MyJob[JobArraySize]; + expect_close_connection = false; + state = live; + } + + void set_server( rml::server* s ) {my_server=s;} + + virtual ~ClientBase() { + ASSERT( state==destroyed, NULL ); + ++ClientDestructions; + } +}; + +template<typename Client> +typename Client::job* ClientBase<Client>::create_one_job() { + if( Verbose ) + printf("client %d: create_one_job() called\n",client_id()); + size_t k = next_job_index++; + ASSERT( state==live, NULL ); + // Following assertion depends on assumption that implementation does not destroy jobs until + // the connection is closed. If the implementation is changed to destroy jobs sooner, the + // test logic in this header will have to be reworked. + ASSERT( k<my_max_job_count, "RML allocated more than max_job_count jobs simultaneously" ); + ASSERT( k<JobArraySize, "JobArraySize not big enough (problem is in test, not RML)" ); + MyJob& j = job_array[k]; + j.update(MyJob::idle,MyJob::unallocated); + if( Verbose ) + printf("client %d: create_one_job() for k=%d returns %p\n",client_id(),int(k),&j); + return &j; +} + +class Checker { +public: + int default_concurrency; + void check_number_of_threads_delivered( int n_delivered, int n_requested, int n_extra ) const; + Checker( rml::server& server ) : default_concurrency(int(server.default_concurrency())) {} +}; + +void Checker::check_number_of_threads_delivered( int n_delivered, int n_requested, int n_extra ) const { + ASSERT( default_concurrency>=0, NULL ); + // Check that number of threads delivered is reasonable. + int n_avail = default_concurrency; + if( n_extra>0 ) + n_avail-=n_extra; + if( n_avail<0 ) + n_avail=0; + // If the client asked for more threads than the hardware provides, the difference becomes private threads + // that are available regardless of what else is running. + if( n_requested>default_concurrency ) + n_avail += n_requested-default_concurrency; + int n_expected = n_requested; + if( n_expected>n_avail ) + n_expected=n_avail; + const char* msg = NULL; + if( n_delivered>n_avail ) + msg = "server delivered more threads than were theoretically available"; + else if( n_delivered>n_expected ) + msg = "server delivered more threads than expected"; + else if( n_delivered<n_expected ) + msg = "server delivered fewer threads than ideal"; + if( msg ) { + printf("Warning: %s (n_delivered=%d n_avail=%d n_requested=%d n_extra=%d default_concurrency=%d)\n", + msg, n_delivered, n_avail, n_requested, n_extra, default_concurrency ); + } +} + +template<typename Factory,typename Client> +class DoOneConnection: NoAssign { + //! Number of threads to request + const int n_thread; + //! Nesting + const Nesting nesting; + //! Number of extra threads to pretend having outside the RML + const int n_extra; + //! If true, check number of threads actually delivered. + const bool check_delivered; +public: + DoOneConnection( int n_thread_, Nesting nesting_, int n_extra_, bool check_delivered_ ) : + n_thread(n_thread_), + nesting(nesting_), + n_extra(n_extra_), + check_delivered(check_delivered_) + { + } + + //! Test ith connection + void operator()( size_t i ) const; +}; + +template<typename Factory,typename Client> +void DoOneConnection<Factory,Client>::operator()( size_t i ) const { + ASSERT( i<MaxClient, NULL ); + Client* client = new Client; + client->initialize( Client::is_omp ? JobArraySize : n_thread, nesting, ClientStackSize[i] ); + Factory factory; + memset( &factory, 0, sizeof(factory) ); + typename Factory::status_type status = factory.open(); + + typename Factory::server_type* server; + status = factory.make_server( server, *client ); + if( Verbose ) + printf("client %d: opened server n_thread=%d nesting=(%d,%d)\n", + client->client_id(), n_thread, nesting.level, nesting.limit); + + client->set_server( server ); + Checker checker( *server ); + + FireUpJobs( *server, *client, n_thread, n_extra, check_delivered && !client->is_strict() ? &checker : NULL ); + + // Close the connection + client->expect_close_connection = true; + if( Verbose ) + printf("client %d: calling request_close_connection\n", client->client_id()); + server->request_close_connection(); + // Client deletes itself when it sees call to acknowledge_close_connection from server. + factory.close(); +} + +//! Test with n_threads threads and n_client clients. +template<typename Factory, typename Client> +void SimpleTest() { + for( int n_thread=MinThread; n_thread<=MaxThread; ++n_thread ) { + // Test nested connections + DoOneConnection<Factory,Client> doc(n_thread,Nesting(0,1),0,false); + doc(0); + } + // Let RML catch up. + while( ClientConstructions!=ClientDestructions ) { + MilliSleep(1); + } +} + +static void check_server_info( void* arg, const char* server_info ) +{ + ASSERT( strstr(server_info, (char*)arg), NULL ); +} + +template<typename Factory, typename Client> +void VerifyInitialization( int n_thread ) { + Client* client = new Client; + client->initialize( Client::is_omp ? JobArraySize : n_thread, Nesting(), ClientStackSize[0] ); + Factory factory; + memset( &factory, 0, sizeof(factory) ); + typename Factory::status_type status = factory.open(); + ASSERT( status!=Factory::st_not_found, "could not find RML library" ); + ASSERT( status!=Factory::st_incompatible, NULL ); + ASSERT( status==Factory::st_success, NULL ); + factory.call_with_server_info( check_server_info, (void*)"Intel(R) RML library built:" ); + typename Factory::server_type* server; + status = factory.make_server( server, *client ); + ASSERT( status!=Factory::st_incompatible, NULL ); + ASSERT( status!=Factory::st_not_found, NULL ); + ASSERT( status==Factory::st_success, NULL ); + if( Verbose ) + printf("client %d: opened server n_thread=%d nesting=(%d,%d)\n", + client->client_id(), n_thread, 0, 0); + ASSERT( server, NULL ); + client->set_server( server ); + + DoClientSpecificVerification( *server, n_thread ); + + // Close the connection + client->expect_close_connection = true; + if( Verbose ) + printf("client %d: calling request_close_connection\n", client->client_id()); + server->request_close_connection(); + // Client deletes itself when it sees call to acknowledge_close_connection from server. + factory.close(); +} diff --git a/dep/tbb/src/rml/test/test_thread_monitor.cpp b/dep/tbb/src/rml/test/test_thread_monitor.cpp new file mode 100644 index 000000000..89ef32554 --- /dev/null +++ b/dep/tbb/src/rml/test/test_thread_monitor.cpp @@ -0,0 +1,129 @@ +/* + Copyright 2005-2009 Intel Corporation. All Rights Reserved. + + This file is part of Threading Building Blocks. + + Threading Building Blocks is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License + version 2 as published by the Free Software Foundation. + + Threading Building Blocks is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Threading Building Blocks; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, you may use this file as part of a free software + library without restriction. Specifically, if other files instantiate + templates or use macros or inline functions from this file, or you compile + this file and link it with other files to produce an executable, this + file does not by itself cause the resulting executable to be covered by + the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +#include "thread_monitor.h" +#include "harness.h" +#include "harness_memory.h" + +class ThreadState { + void loop(); +public: + static __RML_DECL_THREAD_ROUTINE routine( void* arg ) { + static_cast<ThreadState*>(arg)->loop(); + return 0; + } + typedef rml::internal::thread_monitor thread_monitor; + thread_monitor monitor; + volatile int request; + volatile int ack; + volatile unsigned clock; + volatile unsigned stamp; + ThreadState() : request(-1), ack(-1) {} +}; + +void ThreadState::loop() { + for(;;) { + ++clock; + if( ack==request ) { + thread_monitor::cookie c; + monitor.prepare_wait(c); + if( ack==request ) { + if( Verbose ) { + printf("%p: request=%d ack=%d\n", this, request, ack ); + } + monitor.commit_wait(c); + } else + monitor.cancel_wait(); + } else { + // Throw in delay occasionally + switch( request%8 ) { + case 0: + case 1: + case 5: + rml::internal::thread_monitor::yield(); + } + int r = request; + ack = request; + if( !r ) return; + } + } +} + +// Linux on Itanium seems to require at least 1<<18 bytes per stack. +const size_t MinStackSize = 1<<18; +const size_t MaxStackSize = 1<<22; + +int main( int argc, char* argv[] ) { + // Set defaults + MinThread = 1; + MaxThread = 4; + ParseCommandLine( argc, argv ); + + for( int p=MinThread; p<=MaxThread; ++p ) { + ThreadState* t = new ThreadState[p]; + for( size_t stack_size = MinStackSize; stack_size<=MaxStackSize; stack_size*=2 ) { + if( Verbose ) + printf("launching %d threads\n",p); + for( int i=0; i<p; ++i ) + rml::internal::thread_monitor::launch( ThreadState::routine, t+i, stack_size ); + for( int k=1000; k>=0; --k ) { + if( k%8==0 ) { + // Wait for threads to wait. + for( int i=0; i<p; ++i ) { + unsigned count = 0; + do { + t[i].stamp = t[i].clock; + rml::internal::thread_monitor::yield(); + if( ++count>=1000 ) { + printf("Warning: thread %d not waiting\n",i); + break; + } + } while( t[i].stamp!=t[i].clock ); + } + } + if( Verbose ) + printf("notifying threads\n"); + for( int i=0; i<p; ++i ) { + // Change state visible to launched thread + t[i].request = k; + t[i].monitor.notify(); + } + if( Verbose ) + printf("waiting for threads to respond\n"); + for( int i=0; i<p; ++i ) + // Wait for thread to respond + while( t[i].ack!=k ) + rml::internal::thread_monitor::yield(); + } + } + delete[] t; + } + + printf("done\n"); + return 0; +} diff --git a/dep/tbb/src/tbb/gate.h b/dep/tbb/src/tbb/gate.h deleted file mode 100644 index fb1283621..000000000 --- a/dep/tbb/src/tbb/gate.h +++ /dev/null @@ -1,221 +0,0 @@ -/* - Copyright 2005-2009 Intel Corporation. All Rights Reserved. - - This file is part of Threading Building Blocks. - - Threading Building Blocks is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License - version 2 as published by the Free Software Foundation. - - Threading Building Blocks is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty - of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Threading Building Blocks; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - As a special exception, you may use this file as part of a free software - library without restriction. Specifically, if other files instantiate - templates or use macros or inline functions from this file, or you compile - this file and link it with other files to produce an executable, this - file does not by itself cause the resulting executable to be covered by - the GNU General Public License. This exception does not however - invalidate any other reasons why the executable file might be covered by - the GNU General Public License. -*/ - -#ifndef _TBB_Gate_H -#define _TBB_Gate_H - -#include "itt_notify.h" - -namespace tbb { - -namespace internal { - -#if __TBB_RML -//! Fake version of Gate for use with RML. -/** Really just an atomic intptr_t with a compare-and-swap operation, - but wrapped in syntax that makes it look like a normal Gate object, - in order to minimize source changes for RML in task.cpp. */ -class Gate { -public: - typedef intptr_t state_t; - - //! Get current state of gate - state_t get_state() const { - return state; - } - -#if defined(_MSC_VER) && defined(_Wp64) - // Workaround for overzealous compiler warnings in /Wp64 mode - #pragma warning (disable: 4244) -#endif - - bool try_update( intptr_t value, intptr_t comparand ) { - return state.compare_and_swap(value,comparand)==comparand; - } -private: - atomic<state_t> state; -}; - -#elif __TBB_USE_FUTEX - -//! Implementation of Gate based on futex. -/** Use this futex-based implementation where possible, because it is the simplest and usually fastest. */ -class Gate { -public: - typedef intptr_t state_t; - - Gate() { - ITT_SYNC_CREATE(&state, SyncType_Scheduler, SyncObj_Gate); - } - - //! Get current state of gate - state_t get_state() const { - return state; - } - //! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true) - void try_update( intptr_t value, intptr_t comparand, bool flip=false ) { - __TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" ); -retry: - state_t old_state = state; - // First test for condition without using atomic operation - if( flip ? old_state!=comparand : old_state==comparand ) { - // Now atomically retest condition and set. - state_t s = state.compare_and_swap( value, old_state ); - if( s==old_state ) { - // compare_and_swap succeeded - if( value!=0 ) - futex_wakeup_all( &state ); // Update was successful and new state is not SNAPSHOT_EMPTY - } else { - // compare_and_swap failed. But for != case, failure may be spurious for our purposes if - // the value there is nonetheless not equal to value. This is a fairly rare event, so - // there is no need for backoff. In event of such a failure, we must retry. - if( flip && s!=value ) - goto retry; - } - } - } - //! Wait for state!=0. - void wait() { - if( state==0 ) - futex_wait( &state, 0 ); - } -private: - atomic<state_t> state; -}; - -#elif USE_WINTHREAD - -class Gate { -public: - typedef intptr_t state_t; -private: - //! If state==0, then thread executing wait() suspend until state becomes non-zero. - state_t state; - CRITICAL_SECTION critical_section; - HANDLE event; -public: - //! Initialize with count=0 - Gate() : state(0) { - event = CreateEvent( NULL, true, false, NULL ); - InitializeCriticalSection( &critical_section ); - ITT_SYNC_CREATE(&event, SyncType_Scheduler, SyncObj_Gate); - ITT_SYNC_CREATE(&critical_section, SyncType_Scheduler, SyncObj_GateLock); - } - ~Gate() { - // Fake prepare/acquired pair for Intel(R) Parallel Amplifier to correctly attribute the operations below - ITT_NOTIFY( sync_prepare, &event ); - CloseHandle( event ); - DeleteCriticalSection( &critical_section ); - ITT_NOTIFY( sync_acquired, &event ); - } - //! Get current state of gate - state_t get_state() const { - return state; - } - //! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true) - void try_update( intptr_t value, intptr_t comparand, bool flip=false ) { - __TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" ); - EnterCriticalSection( &critical_section ); - state_t old = state; - if( flip ? old!=comparand : old==comparand ) { - state = value; - if( !old ) - SetEvent( event ); - else if( !value ) - ResetEvent( event ); - } - LeaveCriticalSection( &critical_section ); - } - //! Wait for state!=0. - void wait() { - if( state==0 ) { - WaitForSingleObject( event, INFINITE ); - } - } -}; - -#elif USE_PTHREAD - -class Gate { -public: - typedef intptr_t state_t; -private: - //! If state==0, then thread executing wait() suspend until state becomes non-zero. - state_t state; - pthread_mutex_t mutex; - pthread_cond_t cond; -public: - //! Initialize with count=0 - Gate() : state(0) - { - pthread_mutex_init( &mutex, NULL ); - pthread_cond_init( &cond, NULL); - ITT_SYNC_CREATE(&cond, SyncType_Scheduler, SyncObj_Gate); - ITT_SYNC_CREATE(&mutex, SyncType_Scheduler, SyncObj_GateLock); - } - ~Gate() { - pthread_cond_destroy( &cond ); - pthread_mutex_destroy( &mutex ); - } - //! Get current state of gate - state_t get_state() const { - return state; - } - //! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true) - void try_update( intptr_t value, intptr_t comparand, bool flip=false ) { - __TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" ); - pthread_mutex_lock( &mutex ); - state_t old = state; - if( flip ? old!=comparand : old==comparand ) { - state = value; - if( !old ) - pthread_cond_broadcast( &cond ); - } - pthread_mutex_unlock( &mutex ); - } - //! Wait for state!=0. - void wait() { - if( state==0 ) { - pthread_mutex_lock( &mutex ); - while( state==0 ) { - pthread_cond_wait( &cond, &mutex ); - } - pthread_mutex_unlock( &mutex ); - } - } -}; - -#else -#error Must define USE_PTHREAD or USE_WINTHREAD -#endif /* threading kind */ - -} // namespace Internal - -} // namespace ThreadingBuildingBlocks - -#endif /* _TBB_Gate_H */ diff --git a/dep/tbb/src/tbb/private_server.cpp b/dep/tbb/src/tbb/private_server.cpp index cda558e81..99234ea35 100644 --- a/dep/tbb/src/tbb/private_server.cpp +++ b/dep/tbb/src/tbb/private_server.cpp @@ -26,8 +26,8 @@ the GNU General Public License. */ -#include "../rml/include/rml_tbb.h" -#include "../rml/server/thread_monitor.h" +#include "rml_tbb.h" +#include "../server/thread_monitor.h" #include "tbb/atomic.h" #include "tbb/cache_aligned_allocator.h" #include "tbb/spin_mutex.h" diff --git a/dep/tbb/src/tbb/tbb_version.h b/dep/tbb/src/tbb/tbb_version.h index 07a91d6f5..cd86de08e 100644 --- a/dep/tbb/src/tbb/tbb_version.h +++ b/dep/tbb/src/tbb/tbb_version.h @@ -27,13 +27,13 @@ */ // Please define version number in the file: -#include "../../include/tbb/tbb_stddef.h" +#include "tbb/tbb_stddef.h" // And don't touch anything below #ifndef ENDL #define ENDL "\n" #endif -#include "../../build/vsproject/version_string.tmp" +#include "version_string.tmp" #ifndef __TBB_VERSION_STRINGS #pragma message("Warning: version_string.tmp isn't generated properly by version_info.sh script!") diff --git a/dep/tbb/src/tbbmalloc/tbbmalloc.rc b/dep/tbb/src/tbbmalloc/tbbmalloc.rc index 4e8a2ed0b..89830ebd3 100644 --- a/dep/tbb/src/tbbmalloc/tbbmalloc.rc +++ b/dep/tbb/src/tbbmalloc/tbbmalloc.rc @@ -43,7 +43,7 @@ // #include <winresrc.h> #define ENDL "\r\n" -#include "../tbb/tbb_version.h" +#include "tbb/tbb_version.h" #define TBBMALLOC_VERNUMBERS TBB_VERSION_MAJOR, TBB_VERSION_MINOR, __TBB_VERSION_YMD #define TBBMALLOC_VERSION __TBB_STRING(TBBMALLOC_VERNUMBERS) diff --git a/src/shared/revision_nr.h b/src/shared/revision_nr.h index 9b1c09c56..bf09d4b41 100644 --- a/src/shared/revision_nr.h +++ b/src/shared/revision_nr.h @@ -1,4 +1,4 @@ #ifndef __REVISION_NR_H__ #define __REVISION_NR_H__ - #define REVISION_NR "11163" + #define REVISION_NR "11164" #endif // __REVISION_NR_H__