[11164] Changed TBB lib to same version but with make script

(based on cipherCOM's repo commit e8a538e)

Signed-off-by: VladimirMangos <vladimir@getmangos.com>
This commit is contained in:
cipherCOM 2011-02-13 03:01:21 +03:00 committed by VladimirMangos
parent cc8a11bff7
commit 21794edc44
38 changed files with 5517 additions and 579 deletions

85
dep/tbb/Makefile Normal file
View file

@ -0,0 +1,85 @@
# Copyright 2005-2009 Intel Corporation. All Rights Reserved.
#
# This file is part of Threading Building Blocks.
#
# Threading Building Blocks is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# Threading Building Blocks is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Threading Building Blocks; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# As a special exception, you may use this file as part of a free software
# library without restriction. Specifically, if other files instantiate
# templates or use macros or inline functions from this file, or you compile
# this file and link it with other files to produce an executable, this
# file does not by itself cause the resulting executable to be covered by
# the GNU General Public License. This exception does not however
# invalidate any other reasons why the executable file might be covered by
# the GNU General Public License.
tbb_root?=.
include $(tbb_root)/build/common.inc
.PHONY: default all tbb tbbmalloc test examples
#workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.tmp
#According to documentation submakes should run in parallel
.NOTPARALLEL: tbb tbbmalloc
default: tbb tbbmalloc
all: tbb tbbmalloc test examples
tbb: mkdir
$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbb cfg=debug tbb_root=$(tbb_root)
$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root)
tbbmalloc: mkdir
$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc tbb_root=$(tbb_root)
$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root)
test: tbb tbbmalloc
-$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test tbb_root=$(tbb_root)
-$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.test cfg=debug tbb_root=$(tbb_root)
-$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test tbb_root=$(tbb_root)
-$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.test cfg=release tbb_root=$(tbb_root)
rml: mkdir
$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.rml cfg=debug tbb_root=$(tbb_root)
$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.rml cfg=release tbb_root=$(tbb_root)
examples: tbb tbbmalloc
$(MAKE) -C examples -r -f Makefile tbb_root=.. release test
.PHONY: clean clean_examples mkdir info
clean: clean_examples
$(shell $(RM) $(work_dir)_release$(SLASH)*.* >$(NUL) 2>$(NUL))
$(shell $(RD) $(work_dir)_release >$(NUL) 2>$(NUL))
$(shell $(RM) $(work_dir)_debug$(SLASH)*.* >$(NUL) 2>$(NUL))
$(shell $(RD) $(work_dir)_debug >$(NUL) 2>$(NUL))
@echo clean done
clean_examples:
$(shell $(MAKE) -s -i -r -C examples -f Makefile tbb_root=.. clean >$(NUL) 2>$(NUL))
mkdir:
$(shell $(MD) "$(work_dir)_release" >$(NUL) 2>$(NUL))
$(if $(subst undefined,,$(origin_build_dir)),,cd "$(work_dir)_release" && $(MAKE_TBBVARS) $(tbb_build_prefix)_release)
$(shell $(MD) "$(work_dir)_debug" >$(NUL) 2>$(NUL))
$(if $(subst undefined,,$(origin_build_dir)),,cd "$(work_dir)_debug" && $(MAKE_TBBVARS) $(tbb_build_prefix)_debug)
info:
@echo OS: $(tbb_os)
@echo arch=$(arch)
@echo compiler=$(compiler)
@echo runtime=$(runtime)
@echo tbb_build_prefix=$(tbb_build_prefix)

View file

@ -1,58 +0,0 @@
# Copyright 2005-2009 Intel Corporation. All Rights Reserved.
#
# This file is part of Threading Building Blocks.
#
# Threading Building Blocks is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# Threading Building Blocks is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Threading Building Blocks; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# As a special exception, you may use this file as part of a free software
# library without restriction. Specifically, if other files instantiate
# templates or use macros or inline functions from this file, or you compile
# this file and link it with other files to produce an executable, this
# file does not by itself cause the resulting executable to be covered by
# the GNU General Public License. This exception does not however
# invalidate any other reasons why the executable file might be covered by
# the GNU General Public License.
tbb_root = $(srcdir)
include $(tbb_root)/build/common.inc
# change these
override work_dir = $(CWD)
export work_dir
override tbb_root = $(srcdir)
export work_dir
.PHONY: all tbb tbbmalloc
#workaround for non-depend targets tbb and tbbmalloc which both depend on version_string.tmp
#According to documentation submakes should run in parallel
.NOTPARALLEL: tbb tbbmalloc
all: tbb tbbmalloc
tbb:
$(MAKE) -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbb_root=$(tbb_root)
tbbmalloc:
$(MAKE) -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbb_root=$(tbb_root)
install-exec-local:
$(INSTALL) $(work_dir)/lib*.so* $(DESTDIR)$(libdir)
clean-local:
-rm -f *.d *.o
-rm -f lib*.so*
-rm -f *.def *.tmp tbbvars.*

View file

@ -1,91 +0,0 @@
// Copyright 2005-2009 Intel Corporation. All Rights Reserved.
//
// This file is part of Threading Building Blocks.
//
// Threading Building Blocks is free software; you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// version 2 as published by the Free Software Foundation.
//
// Threading Building Blocks is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Threading Building Blocks; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
//
// As a special exception, you may use this file as part of a free software
// library without restriction. Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License. This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.
var WshShell = WScript.CreateObject("WScript.Shell");
var tmpExec;
WScript.Echo("#define __TBB_VERSION_STRINGS \\");
//Getting BUILD_HOST
WScript.echo( "\"TBB: BUILD_HOST\\t\\t" +
WshShell.ExpandEnvironmentStrings("%COMPUTERNAME%") +
"\" ENDL \\" );
//Getting BUILD_OS
tmpExec = WshShell.Exec("cmd /c ver");
while ( tmpExec.Status == 0 ) {
WScript.Sleep(100);
}
tmpExec.StdOut.ReadLine();
WScript.echo( "\"TBB: BUILD_OS\\t\\t" +
tmpExec.StdOut.ReadLine() +
"\" ENDL \\" );
var Unknown = "Unknown";
WScript.echo( "\"TBB: BUILD_KERNEL\\t" +
Unknown +
"\" ENDL \\" );
//Getting BUILD_COMPILER
tmpExec = WshShell.Exec("icc --version");
while ( tmpExec.Status == 0 ) {
WScript.Sleep(100);
}
var ccVersion = tmpExec.StdErr.ReadLine();
WScript.echo( "\"TBB: BUILD_GCC\\t" +
ccVersion +
"\" ENDL \\" );
WScript.echo( "\"TBB: BUILD_COMPILER\\t" +
ccVersion +
"\" ENDL \\" );
WScript.echo( "\"TBB: BUILD_GLIBC\\t" +
Unknown +
"\" ENDL \\" );
WScript.echo( "\"TBB: BUILD_LD\\t" +
Unknown +
"\" ENDL \\" );
//Getting BUILD_TARGET
WScript.echo( "\"TBB: BUILD_TARGET\\t" +
WScript.Arguments(1) +
"\" ENDL \\" );
//Getting BUILD_COMMAND
WScript.echo( "\"TBB: BUILD_COMMAND\\t" + WScript.Arguments(2) + "\" ENDL" );
//Getting __TBB_DATETIME and __TBB_VERSION_YMD
var date = new Date();
WScript.echo( "#define __TBB_DATETIME \"" + date.toUTCString() + "\"" );
WScript.echo( "#define __TBB_VERSION_YMD " + date.getUTCFullYear() + ", " +
(date.getUTCMonth() > 8 ? (date.getUTCMonth()+1):("0"+(date.getUTCMonth()+1))) +
(date.getUTCDate() > 9 ? date.getUTCDate():("0"+date.getUTCDate())) );

View file

@ -1,66 +0,0 @@
# Copyright 2005-2009 Intel Corporation. All Rights Reserved.
#
# This file is part of Threading Building Blocks.
#
# Threading Building Blocks is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# Threading Building Blocks is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Threading Building Blocks; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# As a special exception, you may use this file as part of a free software
# library without restriction. Specifically, if other files instantiate
# templates or use macros or inline functions from this file, or you compile
# this file and link it with other files to produce an executable, this
# file does not by itself cause the resulting executable to be covered by
# the GNU General Public License. This exception does not however
# invalidate any other reasons why the executable file might be covered by
# the GNU General Public License.
include $(tbb_root)/build/windows.cl.inc
ifeq ($(cfg), debug)
CFG_LETTER = d
else
CFG_LETTER = r
endif
_CPLUS_FLAGS_HOST := $(CPLUS_FLAGS) /I$(LRB_INC_DIR) $(LINK_FLAGS) /LIBPATH:$(LRB_LIB_DIR) xn_host$(LRB_HOST_ARCH)$(CFG_LETTER).lib
TEST_EXT = dll
CPLUS_FLAGS += /I$(LRB_INC_DIR) /D__LRB__
LIB_LINK_FLAGS += /LIBPATH:$(LRB_LIB_DIR) xn_lrb$(LRB_HOST_ARCH)$(CFG_LETTER).lib
LINK_FLAGS = $(LIB_LINK_FLAGS)
OPENMP_FLAG =
ifdef TEST_RESOURCE
LINK_FLAGS += $(TEST_RESOURCE)
TEST_LAUNCHER_NAME = harness_lrb_host
AUX_TEST_DEPENDENCIES = $(TEST_LAUNCHER_NAME).exe
$(TEST_LAUNCHER_NAME).exe: $(TEST_LAUNCHER_NAME).cpp
cl /Fe$@ $< $(_CPLUS_FLAGS_HOST)
NO_LEGACY_TESTS = 1
NO_C_TESTS = 1
TEST_LAUNCHER=
endif # TEST_RESOURCE
#test_model_plugin.%:
# @echo test_model_plugin is not supported for LRB architecture so far
ifeq ($(BUILDING_PHASE),0) # examples
export RM = del /Q /F
export LIBS = -shared -lthr -z muldefs -L$(work_dir)_debug -L$(work_dir)_release
export UI = con
export x64 = 64
export CXXFLAGS = -xR -I..\..\..\include
endif # examples

View file

@ -1,49 +0,0 @@
# Copyright 2005-2009 Intel Corporation. All Rights Reserved.
#
# This file is part of Threading Building Blocks.
#
# Threading Building Blocks is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# Threading Building Blocks is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Threading Building Blocks; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# As a special exception, you may use this file as part of a free software
# library without restriction. Specifically, if other files instantiate
# templates or use macros or inline functions from this file, or you compile
# this file and link it with other files to produce an executable, this
# file does not by itself cause the resulting executable to be covered by
# the GNU General Public License. This exception does not however
# invalidate any other reasons why the executable file might be covered by
# the GNU General Public License.
include $(tbb_root)/build/winlrb.cl.inc
TEST_EXT = so
.PRECIOUS: %.$(TEST_EXT)
include $(tbb_root)/build/freebsd.gcc.inc
WARNING_KEY = -w1
CPLUS = icpc
CONLY = icc
#LIBS = -u _read -lcprts -lthr -lc
#LIBS = -lthr
LIBS = -u _read -lcprts -lthr -limf -lc
LINK_FLAGS = -L$(LRB_LIB_DIR) $(DYLIB_KEY) -lxn$(XN_VER)_lrb64$(CFG_LETTER)
CPLUS_FLAGS += -xR $(PIC_KEY) -I$(LRB_INC_DIR) -DXENSIM
C_FLAGS = $(CPLUS_FLAGS)
LIB_LINK_FLAGS = $(LINK_FLAGS)
ifeq ($(cfg), release)
# workaround for LRB compiler issues
CPLUS_FLAGS := $(subst -O2,-O0, $(CPLUS_FLAGS))
endif

View file

@ -1,88 +0,0 @@
# Copyright 2005-2009 Intel Corporation. All Rights Reserved.
#
# This file is part of Threading Building Blocks.
#
# Threading Building Blocks is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# Threading Building Blocks is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Threading Building Blocks; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# As a special exception, you may use this file as part of a free software
# library without restriction. Specifically, if other files instantiate
# templates or use macros or inline functions from this file, or you compile
# this file and link it with other files to produce an executable, this
# file does not by itself cause the resulting executable to be covered by
# the GNU General Public License. This exception does not however
# invalidate any other reasons why the executable file might be covered by
# the GNU General Public License.
ifndef XN_VER
export LRBSDK = $(LARRABEE_CORE_LATEST)
export LRB_LIB_DIR = "$(LRBSDK)lib"
export LRB_INC_DIR = "$(LRBSDK)include"
# Function $(wildcard pattern) does not work with paths containing spaces!
_lrb_lib = $(shell cmd /C "dir /B "$(LRBSDK)lib\libxn*_lrb64d.so")
export XN_VER = $(patsubst libxn%_lrb64d.so,%,$(_lrb_lib))
ifeq (1,$(NETSIM_LRB_32_OVERRIDE))
export LRB_HOST_ARCH = 32
else
export LRB_HOST_ARCH = 64
endif
export run_cmd = harness_lrb_host.exe
export UI = con
endif #XN_VER
include $(tbb_root)/build/windows.inc
ifneq (1,$(netsim))
# Target environment is native LRB or LrbFSim
export compiler = icc
export arch := lrb
target_machine = $(subst -,_,$(shell icpc -dumpmachine))
runtime = $(subst _lrb_,_,$(target_machine))
# -dumpmachine option does not work in R9 Core SDK 5
ifeq ($(runtime),)
runtime = x86_64_freebsd
endif
export runtime:=$(runtime)_xn$(XN_VER)
OBJ = o
DLL = so
LIBEXT = so
TBB.DEF =
TBB.DLL = libtbb$(DEBUG_SUFFIX).$(DLL)
TBB.LIB = $(TBB.DLL)
LINK_TBB.LIB = $(TBB.DLL)
TBB.RES =
MALLOC.DEF :=
MALLOC.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(DLL)
MALLOC.LIB = $(MALLOC.DLL)
MALLOC.RES =
MAKE_VERSIONS = cmd /C cscript /nologo /E:jscript $(subst \,/,$(tbb_root))/build/version_info_winlrb.js $(compiler) $(arch) $(subst \,/,"$(CPLUS) $(CPLUS_FLAGS) $(INCLUDES)") > version_string.tmp
MAKE_TBBVARS = cmd /C "$(subst /,\,$(tbb_root))\build\generate_tbbvars.bat"
ifneq (1,$(XENSIM_ENABLED))
export run_cmd = rem
endif
TBB_NOSTRICT = 1
endif # lrbfsim

View file

@ -0,0 +1,134 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstdio>
#include <cstdlib>
#include "tbb/task_scheduler_init.h"
#include "tbb/task.h"
#include "tbb/tick_count.h"
long CutOff = 1;
long SerialFib( const long n );
long ParallelFib( const long n );
inline void dump_title() {
printf("Serial/Parallel, P, N, cutoff, repetitions, time, fib, speedup\n");
}
inline void output(int P, long n, long c, int T, double serial_elapsed, double elapsed, long result) {
printf("%s, %d, %ld, %ld, %d, %g, %ld, %g\n", ( (P == 0) ? "Serial" : "Parallel" ), P, n, c, T, elapsed, result, serial_elapsed / elapsed);
}
#define MOVE_BY_FOURTHS 1
inline long calculate_new_cutoff(const long lo, const long hi) {
#if MOVE_BY_FOURTHS
return lo + (3 + hi - lo ) / 4;
#else
return (hi + lo)/2;
#endif
}
void find_cutoff(const int P, const long n, const int T, const double serial_elapsed) {
long lo = 1, hi = n;
double elapsed = 0, lo_elapsed = 0, hi_elapsed = 0;
long final_cutoff = -1;
tbb::task_scheduler_init init(P);
while(true) {
CutOff = calculate_new_cutoff(lo, hi);
long result = 0;
tbb::tick_count t0;
for (int t = -1; t < T; ++t) {
if (t == 0) t0 = tbb::tick_count::now();
result += ParallelFib(n);
}
elapsed = (tbb::tick_count::now() - t0).seconds();
output(P,n,CutOff,T,serial_elapsed,elapsed,result);
if (serial_elapsed / elapsed >= P/2.0) {
final_cutoff = CutOff;
if (hi == CutOff) {
if (hi == lo) {
// we have had this value at both above and below 50%
lo = 1; lo_elapsed = 0;
} else {
break;
}
}
hi = CutOff;
hi_elapsed = elapsed;
} else {
if (lo == CutOff) break;
lo = CutOff;
lo_elapsed = elapsed;
}
}
double interpolated_cutoff = lo + ( P/2.0 - serial_elapsed/lo_elapsed ) * ( (hi - lo) / ( serial_elapsed/hi_elapsed - serial_elapsed/lo_elapsed ));
if (final_cutoff != -1) {
printf("50%% efficiency cutoff is %ld ( linearly interpolated cutoff is %g )\n", final_cutoff, interpolated_cutoff);
} else {
printf("Cannot achieve 50%% efficiency\n");
}
return;
}
int main(int argc, char *argv[]) {
if (argc < 4) {
printf("Usage: %s threads n repetitions\n",argv[0]);
return 1;
}
dump_title();
int P = atoi(argv[1]);
long n = atol(argv[2]);
int T = atoi(argv[3]);
// collect serial time
long serial_result = 0;
tbb::tick_count t0;
for (int t = -1; t < T; ++t) {
if (t == 0) t0 = tbb::tick_count::now();
serial_result += SerialFib(n);
}
double serial_elapsed = (tbb::tick_count::now() - t0).seconds();
output(0,n,0,T,serial_elapsed,serial_elapsed,serial_result);
// perform search
find_cutoff(P,n,T,serial_elapsed);
return 0;
}

View file

@ -0,0 +1,86 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstdio>
#include <cstdlib>
#include "tbb/task_scheduler_init.h"
#include "tbb/task.h"
#include "tbb/tick_count.h"
extern long CutOff;
long SerialFib( const long n ) {
if( n<2 )
return n;
else
return SerialFib(n-1)+SerialFib(n-2);
}
struct FibContinuation: public tbb::task {
long* const sum;
long x, y;
FibContinuation( long* sum_ ) : sum(sum_) {}
tbb::task* execute() {
*sum = x+y;
return NULL;
}
};
struct FibTask: public tbb::task {
long n;
long * sum;
FibTask( const long n_, long * const sum_ ) :
n(n_), sum(sum_)
{}
tbb::task* execute() {
if( n<CutOff ) {
*sum = SerialFib(n);
return NULL;
} else {
FibContinuation& c =
*new( allocate_continuation() ) FibContinuation(sum);
FibTask& b = *new( c.allocate_child() ) FibTask(n-1,&c.y);
recycle_as_child_of(c);
n -= 2;
sum = &c.x;
// Set ref_count to "two children".
c.set_ref_count(2);
c.spawn( b );
return this;
}
}
};
long ParallelFib( const long n ) {
long sum;
FibTask& a = *new(tbb::task::allocate_root()) FibTask(n,&sum);
tbb::task::spawn_root_and_wait(a);
return sum;
}

View file

@ -0,0 +1,292 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "tbb/task_scheduler_init.h"
#include "tbb/tick_count.h"
#include <cmath>
#include <cstdlib>
#include <cerrno>
#include <cfloat>
#include <vector>
#include <algorithm>
#include "../src/test/harness.h"
#if __linux__ || __APPLE__ || __FreeBSD__
#include <sys/resource.h>
#endif /* __APPLE__ */
// The code, performance of which is to be measured, is surrounded by the StartSimpleTiming
// and StopSimpleTiming macros. It is called "target code" or "code of interest" hereafter.
//
// The target code is executed inside the nested loop. Nesting is necessary to allow
// measurements on arrays that fit cache of a particular level, while making the load
// big enough to eliminate the influence of random deviations.
//
// Macro StartSimpleTiming defines reduction variable "util::anchor", which may be modified (usually
// by adding to) by the target code. This can be necessary to prevent optimizing compilers
// from throwing out the code of interest. Besides, if the target code is complex enough,
// make sure that all its branches contribute (directly or indirectly) to the value
// being added to the "util::anchor" variable.
//
// To factor out overhead introduced by the measurement infra code it is recommended to make
// a calibration run with target code replaced by a no-op (but still modifying "sum"), and
// store the resulting time in the "util::base" variable.
//
// A generally good approach is to make the target code use elements of a preliminary
// initialized array. Then for calibration run you just need to add vector elements
// to the "sum" variable. To get rid of memory access delays make the array small
// enough to fit L2 or L1 cache (play with StartSimpleTiming arguments if necessary).
//
// Macro CalibrateSimpleTiming performs default calibration using "util::anchor += i;" operation.
//
// Macro ANCHOR_TYPE defines the type of the reduction variable. If it was not
// defined before including this header, it is defined as size_t. Depending on
// the target code modern super scalar architectures may blend reduction operation
// and instructions of interest differently for different target alternatives. So
// you may play with the type to minimize out-of-order and parallel execution impact
// on the calibration time veracity. You may even end up with different reduction
// variable types (and different calibration times) for different measurements.
namespace util {
typedef std::vector<double> durations_t;
void trace_histogram ( const durations_t& t, char* histogramFileName )
{
FILE* f = histogramFileName ? fopen(histogramFileName, "wt") : stdout;
size_t n = t.size();
const size_t num_buckets = 100;
double min_val = *std::min_element(t.begin(), t.end()),
max_val = *std::max_element(t.begin(), t.end()),
bucket_size = (max_val - min_val) / num_buckets;
std::vector<size_t> hist(num_buckets + 1, 0);
for ( size_t i = 0; i < n; ++i )
++hist[size_t((t[i]-min_val)/bucket_size)];
fprintf (f, "Histogram: nvals = %u, min = %g, max = %g, nbuckets = %u\n", (unsigned)n, min_val, max_val, (unsigned)num_buckets);
double bucket = min_val;
for ( size_t i = 0; i <= num_buckets; ++i, bucket+=bucket_size )
fprintf (f, "%12g\t%u\n", bucket, (unsigned)hist[i]);
fclose(f);
}
double average ( const durations_t& d, double& variation_percent, double& std_dev_percent )
{
durations_t t = d;
if ( t.size() > 5 ) {
t.erase(std::min_element(t.begin(), t.end()));
t.erase(std::max_element(t.begin(), t.end()));
}
size_t n = t.size();
double sum = 0,
min_val = *std::min_element(t.begin(), t.end()),
max_val = *std::max_element(t.begin(), t.end());
for ( size_t i = 0; i < n; ++i )
sum += t[i];
double avg = sum / n,
std_dev = 0;
for ( size_t i = 0; i < n; ++i ) {
double dev = fabs(t[i] - avg);
std_dev += dev * dev;
}
std_dev = sqrt(std_dev / n);
std_dev_percent = std_dev / avg * 100;
variation_percent = 100 * (max_val - min_val) / avg;
return avg;
}
static int num_threads;
static double base = 0,
base_dev = 0,
base_dev_percent = 0;
static char *empty_fmt = "";
static int rate_field_len = 11;
#if !defined(ANCHOR_TYPE)
#define ANCHOR_TYPE size_t
#endif
static ANCHOR_TYPE anchor = 0;
static double sequential_time = 0;
#define StartSimpleTiming(nOuter, nInner) { \
tbb::tick_count t1, t0 = tbb::tick_count::now(); \
for ( size_t j = 0; l < nOuter; ++l ) { \
for ( size_t i = 0; i < nInner; ++i ) {
#define StopSimpleTiming(res) \
} \
util::anchor += (ANCHOR_TYPE)l; \
} \
t1 = tbb::tick_count::now(); \
printf (util::empty_fmt, util::anchor); \
res = (t1-t0).seconds() - util::base; \
}
#define CalibrateSimpleTiming(T, nOuter, nInner) \
StartSimpleTiming(nOuter, nInner); \
util::anchor += (ANCHOR_TYPE)i; \
StopSimpleTiming(util::base);
#define StartTimingImpl(nRuns, nOuter, nInner) \
tbb::tick_count t1, t0; \
for ( size_t k = 0; k < nRuns; ++k ) { \
t0 = tbb::tick_count::now(); \
for ( size_t l = 0; l < nOuter; ++l ) { \
for ( size_t i = 0; i < nInner; ++i ) {
#define StartTiming(nRuns, nOuter, nInner) { \
util::durations_t t_(nRuns); \
StartTimingImpl(nRuns, nOuter, nInner)
#define StartTimingEx(vDurations, nRuns, nOuter, nInner) { \
util::durations_t &t_ = vDurations; \
vDurations.resize(nRuns); \
StartTimingImpl(nRuns, nOuter, nInner)
#define StopTiming(Avg, StdDev, StdDevPercent) \
} \
util::anchor += (ANCHOR_TYPE)l; \
} \
t1 = tbb::tick_count::now(); \
t_[k] = (t1 - t0).seconds()/nrep; \
} \
printf (util::empty_fmt, util::anchor); \
Avg = util::average(t_, StdDev, StdDevPercent); \
}
#define CalibrateTiming(nRuns, nOuter, nInner) \
StartTiming(nRuns, nOuter, nInner); \
util::anchor += (ANCHOR_TYPE)i; \
StopTiming(util::base, util::base_dev, util::base_dev_percent);
} // namespace util
#ifndef NRUNS
#define NRUNS 7
#endif
#ifndef ONE_TEST_DURATION
#define ONE_TEST_DURATION 0.01
#endif
#define no_histogram ((char*)-1)
inline
double RunTestImpl ( const char* title, void (*pfn)(), char* histogramFileName = no_histogram ) {
double time = 0, variation = 0, deviation = 0;
size_t nrep = 1;
for (;;) {
CalibrateTiming(NRUNS, 1, nrep);
StartTiming(NRUNS, 1, nrep);
pfn();
StopTiming(time, variation, deviation);
time -= util::base;
if ( time > 1e-6 )
break;
nrep *= 2;
}
nrep *= (size_t)ceil(ONE_TEST_DURATION/time);
CalibrateTiming(NRUNS, 1, nrep); // sets util::base
util::durations_t t;
StartTimingEx(t, NRUNS, 1, nrep);
pfn();
StopTiming(time, variation, deviation);
if ( histogramFileName != (char*)-1 )
util::trace_histogram(t, histogramFileName);
double clean_time = time - util::base;
if ( title ) {
// Deviation (in percent) is calculated for the Gross time
printf ("\n%-34s %.2e %5.1f ", title, clean_time, deviation);
if ( util::sequential_time != 0 )
//printf ("% .2e ", clean_time - util::sequential_time);
printf ("% 10.1f ", 100*(clean_time - util::sequential_time)/util::sequential_time);
else
printf ("%*s ", util::rate_field_len, "");
printf ("%-9u %1.6f |", (unsigned)nrep, time * nrep);
}
return clean_time;
}
/// Runs the test function, does statistical processing, and, if title is nonzero, prints results.
/** If histogramFileName is a string, the histogram of individual runs is generated and stored
in a file with the given name. If it is NULL then the histogram is printed on the console.
By default no histogram is generated.
The histogram format is: "rate bucket start" "number of tests in this bucket". **/
inline
void RunTest ( const char* title_fmt, size_t workload_param, void (*pfn_test)(), char* histogramFileName = no_histogram ) {
char title[1024];
sprintf(title, title_fmt, (long)workload_param);
RunTestImpl(title, pfn_test, histogramFileName);
}
inline
void CalcSequentialTime ( void (*pfn)() ) {
util::sequential_time = RunTestImpl(NULL, pfn) / util::num_threads;
}
inline
void ResetSequentialTime () {
util::sequential_time = 0;
}
inline void PrintTitle() {
//printf ("%-32s %-*s Std Dev,%% %-*s Repeats Gross time Infra time | NRUNS = %u",
// "Test name", util::rate_field_len, "Rate", util::rate_field_len, "Overhead", NRUNS);
printf ("%-34s %-*s Std Dev,%% Par.overhead,%% Repeats Gross time | Nruns %u, Nthreads %d",
"Test name", util::rate_field_len, "Rate", NRUNS, util::num_threads);
}
void Test();
inline
int test_main( int argc, char* argv[] ) {
MinThread = 1;
MaxThread = tbb::task_scheduler_init::default_num_threads();
ParseCommandLine( argc, argv );
char buf[128];
util::rate_field_len = 2 + sprintf(buf, "%.1e", 1.1);
for ( int i = MinThread; i <= MaxThread; ++i ) {
tbb::task_scheduler_init init (i);
util::num_threads = i;
PrintTitle();
Test();
printf("\n");
}
printf("done\n");
return 0;
}

View file

@ -0,0 +1,408 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "statistics.h"
#include "statistics_xml.h"
#define COUNT_PARAMETERS 3
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
void GetTime(char* buff,int size_buff)
{
tm *newtime;
time_t timer;
time(&timer);
newtime=localtime(&timer);
strftime(buff,size_buff,"%H:%M:%S",newtime);
}
void GetDate(char* buff,int size_buff)
{
tm *newtime;
time_t timer;
time(&timer);
newtime=localtime(&timer);
strftime(buff,size_buff,"%Y-%m-%d",newtime);
}
StatisticsCollector::TestCase StatisticsCollector::SetTestCase(const char *name, const char *mode, int threads)
{
string KeyName(name);
switch (SortMode)
{
case ByThreads: KeyName += Format("_%02d_%s", threads, mode); break;
default:
case ByAlg: KeyName += Format("_%s_%02d", mode, threads); break;
}
CurrentKey = Statistics[KeyName];
if(!CurrentKey) {
CurrentKey = new StatisticResults;
CurrentKey->Mode = mode;
CurrentKey->Name = name;
CurrentKey->Threads = threads;
CurrentKey->Results.reserve(RoundTitles.size());
Statistics[KeyName] = CurrentKey;
}
return TestCase(CurrentKey);
}
StatisticsCollector::~StatisticsCollector()
{
for(Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
delete i->second;
}
void StatisticsCollector::ReserveRounds(size_t index)
{
size_t i = RoundTitles.size();
if (i > index) return;
char buf[16];
RoundTitles.resize(index+1);
for(; i <= index; i++) {
snprintf( buf, 15, "%u", unsigned(i+1) );
RoundTitles[i] = buf;
}
for(Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++) {
if(!i->second) printf("!!!'%s' = NULL\n", i->first.c_str());
else i->second->Results.reserve(index+1);
}
}
void StatisticsCollector::AddRoundResult(const TestCase &key, value_t v)
{
ReserveRounds(key.access->Results.size());
key.access->Results.push_back(v);
}
void StatisticsCollector::SetRoundTitle(size_t index, const char *fmt, ...)
{
vargf2buff(buff, 128, fmt);
ReserveRounds(index);
RoundTitles[index] = buff;
}
void StatisticsCollector::AddStatisticValue(const TestCase &key, const char *type, const char *fmt, ...)
{
vargf2buff(buff, 128, fmt);
AnalysisTitles.insert(type);
key.access->Analysis[type] = buff;
}
void StatisticsCollector::AddStatisticValue(const char *type, const char *fmt, ...)
{
vargf2buff(buff, 128, fmt);
AnalysisTitles.insert(type);
CurrentKey->Analysis[type] = buff;
}
void StatisticsCollector::SetStatisticFormula(const char *name, const char *formula)
{
Formulas[name] = formula;
}
void StatisticsCollector::SetTitle(const char *fmt, ...)
{
vargf2buff(buff, 256, fmt);
Title = buff;
}
string ExcelFormula(const string &fmt, size_t place, size_t rounds, bool is_horizontal)
{
char buff[16];
if(is_horizontal)
snprintf(buff, 15, "RC[%u]:RC[%u]", unsigned(place), unsigned(place+rounds-1));
else
snprintf(buff, 15, "R[%u]C:R[%u]C", unsigned(place+1), unsigned(place+rounds));
string result(fmt); size_t pos = 0;
while ( (pos = result.find("ROUNDS", pos, 6)) != string::npos )
result.replace(pos, 6, buff);
return result;
}
void StatisticsCollector::Print(int dataOutput, const char *ModeName)
{
FILE *OutputFile;
if (dataOutput & StatisticsCollector::Stdout)
{
printf("\n-=# %s #=-\n", Title.c_str());
if(SortMode == ByThreads)
printf(" Name | # | %s ", ModeName);
else
printf(" Name | %s | # ", ModeName);
for (AnalysisTitles_t::iterator i = AnalysisTitles.begin(); i != AnalysisTitles.end(); i++)
printf("|%s", i->c_str()+1);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
{
if(SortMode == ByThreads)
printf("\n%12s|% 5d|%6s", i->second->Name.c_str(), i->second->Threads, i->second->Mode.c_str());
else
printf("\n%12s|%6s|% 5d", i->second->Name.c_str(), i->second->Mode.c_str(), i->second->Threads);
Analysis_t &analisis = i->second->Analysis;
AnalysisTitles_t::iterator t = AnalysisTitles.begin();
for (Analysis_t::iterator a = analisis.begin(); a != analisis.end(); t++)
{
char fmt[8]; snprintf(fmt, 7, "|%% %us", unsigned(max(size_t(3), t->size())));
if(*t != a->first)
printf(fmt, "");
else {
printf(fmt, a->second.c_str()); a++;
}
}
}
printf("\n");
}
if (dataOutput & StatisticsCollector::HTMLFile)
{
if ((OutputFile = fopen((Name+".html").c_str(), "w+t")) != NULL)
{
char TimerBuff[100], DateBuff[100];
GetTime(TimerBuff,sizeof(TimerBuff));
GetDate(DateBuff,sizeof(DateBuff));
fprintf(OutputFile, "<html><head>\n<title>%s</title>\n</head><body>\n", Title.c_str());
//-----------------------
fprintf(OutputFile, "<table id=\"h\" style=\"position:absolute;top:20\" border=1 cellspacing=0 cellpadding=2>\n");
fprintf(OutputFile, "<tr><td><a name=hr href=#vr onclick=\"v.style.visibility='visible';"
"h.style.visibility='hidden';\">Flip[H]</a></td>"
"<td>%s</td><td>%s</td><td colspan=%u>%s</td>",
DateBuff, TimerBuff, unsigned(AnalysisTitles.size() + RoundTitles.size()), Title.c_str());
fprintf(OutputFile, "</tr>\n<tr bgcolor=#CCFFFF><td>Name</td><td>Threads</td><td>%s</td>", ModeName);
for (AnalysisTitles_t::iterator i = AnalysisTitles.begin(); i != AnalysisTitles.end(); i++)
fprintf(OutputFile, "<td>%s</td>", i->c_str()+1);
for (size_t i = 0; i < RoundTitles.size(); i++)
fprintf(OutputFile, "<td>%s</td>", RoundTitles[i].c_str());
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
{
fprintf(OutputFile, "</tr>\n<tr><td bgcolor=#CCFFCC>%s</td><td bgcolor=#CCFFCC>%d</td><td bgcolor=#CCFFCC>%4s</td>",
i->second->Name.c_str(), i->second->Threads, i->second->Mode.c_str());
//statistics
AnalysisTitles_t::iterator t = AnalysisTitles.begin();
for (Analysis_t::iterator j = i->second->Analysis.begin(); j != i->second->Analysis.end(); t++)
{
fprintf(OutputFile, "<td bgcolor=#FFFF99>%s</td>", (*t != j->first)?" ":(i->second->Analysis[j->first]).c_str());
if(*t == j->first) j++;
}
//data
Results_t &r = i->second->Results;
for (size_t k = 0; k < r.size(); k++)
{
fprintf(OutputFile, "<td>");
fprintf(OutputFile, ResultsFmt, r[k]);
fprintf(OutputFile, "</td>");
}
}
fprintf(OutputFile, "</tr>\n</table>\n");
//////////////////////////////////////////////////////
fprintf(OutputFile, "<table id=\"v\" style=\"visibility:hidden;position:absolute;top:20\" border=1 cellspacing=0 cellpadding=2>\n");
fprintf(OutputFile, "<tr><td><a name=vr href=#hr onclick=\"h.style.visibility='visible';"
"v.style.visibility='hidden';\">Flip[V]</a></td>\n"
"<td>%s</td><td>%s</td><td colspan=%u>%s</td>",
DateBuff, TimerBuff, unsigned(max(Statistics.size()-2,size_t(1))), Title.c_str());
fprintf(OutputFile, "</tr>\n<tr bgcolor=#CCFFCC><td bgcolor=#CCFFFF>Name</td>");
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, "<td>%s</td>", i->second->Name.c_str());
fprintf(OutputFile, "</tr>\n<tr bgcolor=#CCFFCC><td bgcolor=#CCFFFF>Threads</td>");
for (Statistics_t::iterator n = Statistics.begin(); n != Statistics.end(); n++)
fprintf(OutputFile, "<td>%d</td>", n->second->Threads);
fprintf(OutputFile, "</tr>\n<tr bgcolor=#CCFFCC><td bgcolor=#CCFFFF>%s</td>", ModeName);
for (Statistics_t::iterator m = Statistics.begin(); m != Statistics.end(); m++)
fprintf(OutputFile, "<td>%s</td>", m->second->Mode.c_str());
for (AnalysisTitles_t::iterator t = AnalysisTitles.begin(); t != AnalysisTitles.end(); t++)
{
fprintf(OutputFile, "</tr>\n<tr bgcolor=#FFFF99><td bgcolor=#CCFFFF>%s</td>", t->c_str()+1);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, "<td>%s</td>", i->second->Analysis.count(*t)?i->second->Analysis[*t].c_str():" ");
}
for (size_t r = 0; r < RoundTitles.size(); r++)
{
fprintf(OutputFile, "</tr>\n<tr><td bgcolor=#CCFFFF>%s</td>", RoundTitles[r].c_str());
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
{
Results_t &result = i->second->Results;
fprintf(OutputFile, "<td>");
if(result.size() > r)
fprintf(OutputFile, ResultsFmt, result[r]);
fprintf(OutputFile, "</td>");
}
}
fprintf(OutputFile, "</tr>\n</table>\n</body></html>\n");
fclose(OutputFile);
}
}
if (dataOutput & StatisticsCollector::ExcelXML)
{
if ((OutputFile = fopen((Name+".xml").c_str(), "w+t")) == NULL) {
printf("Can't open .xml file\n");
} else {
//vector<value_t> *TmpVect;
//Statistics_t::iterator ii, i = Statistics.begin();
//Analysis_t::iterator jj, j = i->second.Analysis.begin();
char UserName[100];
char SheetName[20];
char TimerBuff[100], DateBuff[100];
#if _WIN32 || _WIN64
strcpy(UserName,getenv("USERNAME"));
#else
strcpy(UserName,getenv("USER"));
#endif
//--------------------------------
strcpy(SheetName,"Horizontal");
GetTime(TimerBuff,sizeof(TimerBuff));
GetDate(DateBuff,sizeof(DateBuff));
//--------------------------
fprintf(OutputFile, XMLHead, UserName, TimerBuff);
fprintf(OutputFile, XMLStyles);
fprintf(OutputFile, XMLBeginSheet, SheetName);
fprintf(OutputFile, XMLNames,1,1,1,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS));
fprintf(OutputFile, XMLBeginTable, int(RoundTitles.size()+Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+1/*title*/), int(Statistics.size()+1));
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLCellTopName);
fprintf(OutputFile, XMLCellTopThread);
fprintf(OutputFile, XMLCellTopMode, ModeName);
for (AnalysisTitles_t::iterator j = AnalysisTitles.begin(); j != AnalysisTitles.end(); j++)
fprintf(OutputFile, XMLAnalysisTitle, j->c_str()+1);
for (Formulas_t::iterator j = Formulas.begin(); j != Formulas.end(); j++)
fprintf(OutputFile, XMLAnalysisTitle, j->first.c_str()+1);
for (RoundTitles_t::iterator j = RoundTitles.begin(); j != RoundTitles.end(); j++)
fprintf(OutputFile, XMLAnalysisTitle, j->c_str());
fprintf(OutputFile, XMLCellEmptyWhite, Title.c_str());
fprintf(OutputFile, XMLERow);
//------------------------
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
{
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLCellName, i->second->Name.c_str());
fprintf(OutputFile, XMLCellThread,i->second->Threads);
fprintf(OutputFile, XMLCellMode, i->second->Mode.c_str());
//statistics
AnalysisTitles_t::iterator at = AnalysisTitles.begin();
for (Analysis_t::iterator j = i->second->Analysis.begin(); j != i->second->Analysis.end(); at++)
{
fprintf(OutputFile, XMLCellAnalysis, (*at != j->first)?"":(i->second->Analysis[j->first]).c_str());
if(*at == j->first) j++;
}
//formulas
size_t place = 0;
Results_t &v = i->second->Results;
for (Formulas_t::iterator f = Formulas.begin(); f != Formulas.end(); f++, place++)
fprintf(OutputFile, XMLCellFormula, ExcelFormula(f->second, Formulas.size()-place, v.size(), true).c_str());
//data
for (size_t k = 0; k < v.size(); k++)
{
fprintf(OutputFile, XMLCellData, v[k]);
}
if(v.size() < RoundTitles.size())
fprintf(OutputFile, XMLMergeRow, int(RoundTitles.size() - v.size()));
fprintf(OutputFile, XMLERow);
}
//------------------------
fprintf(OutputFile, XMLEndTable);
fprintf(OutputFile, XMLWorkSheetProperties,1,1,3,3,int(RoundTitles.size()+AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS));
fprintf(OutputFile, XMLAutoFilter,1,1,1,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS));
fprintf(OutputFile, XMLEndWorkSheet);
//----------------------------------------
strcpy(SheetName,"Vertical");
fprintf(OutputFile, XMLBeginSheet, SheetName);
fprintf(OutputFile, XMLNames, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),2,int(AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS+2),int(Statistics.size()+1));
fprintf(OutputFile, XMLBeginTable, int(max(Statistics.size()+1, size_t(7))), int(RoundTitles.size()+AnalysisTitles.size()+Formulas.size()+COUNT_PARAMETERS+2));
//fprintf(OutputFile, XMLColumsVerticalTable, Statistics.size()+1);
//----------------------------------------
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLNameAndTime, Name.c_str(), TimerBuff, DateBuff);
fprintf(OutputFile, XMLTableParamAndTitle, int(Statistics.size()), int(AnalysisTitles.size()), int(RoundTitles.size()), Title.c_str());
fprintf(OutputFile, XMLERow);
fprintf(OutputFile, XMLBRow);
//-------------------
fprintf(OutputFile, XMLCellTopName);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, XMLCellName, i->second->Name.c_str());
fprintf(OutputFile, XMLERow);
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLCellTopThread);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, XMLCellThread, i->second->Threads);
fprintf(OutputFile, XMLERow);
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLCellTopMode, ModeName);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, XMLCellMode, i->second->Mode.c_str());
fprintf(OutputFile, XMLERow);
//-----------------
for (AnalysisTitles_t::iterator t = AnalysisTitles.begin(); t != AnalysisTitles.end(); t++)
{
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLAnalysisTitle, t->c_str()+1);
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, XMLCellAnalysis, i->second->Analysis.count(*t)?(i->second->Analysis[*t]).c_str():"");
fprintf(OutputFile, XMLERow);
}
//-------------------------------------
for (Formulas_t::iterator t = Formulas.begin(); t != Formulas.end(); t++)
{
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLAnalysisTitle, t->first.c_str()+1);
size_t place = 0;
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
fprintf(OutputFile, XMLCellAnalysis, ExcelFormula(t->second, Formulas.size()-place, i->second->Results.size(), false).c_str());
fprintf(OutputFile, XMLERow);
}
//--------------------------------------
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLCellEmptyWhite, "Result");
fprintf(OutputFile, XMLERow);
for (size_t k = 0; k < RoundTitles.size(); k++)
{
fprintf(OutputFile, XMLBRow);
fprintf(OutputFile, XMLAnalysisTitle, RoundTitles[k].c_str());
for (Statistics_t::iterator i = Statistics.begin(); i != Statistics.end(); i++)
if(i->second->Results.size() > k)
fprintf(OutputFile, XMLCellData, i->second->Results[k]);
else
fprintf(OutputFile, XMLCellEmptyWhite, "");
fprintf(OutputFile, XMLERow);
}
fprintf(OutputFile, XMLEndTable);
//----------------------------------------
fprintf(OutputFile, XMLWorkSheetProperties, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2), int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),1,1,6);
fprintf(OutputFile, XMLAutoFilter, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2),2, int(Formulas.size()+AnalysisTitles.size()+COUNT_PARAMETERS+2), int(Statistics.size()+1));
//----------------------------------------
fprintf(OutputFile, XMLEndWorkSheet);
fprintf(OutputFile, XMLEndWorkbook);
fclose(OutputFile);
}
}
}

View file

@ -0,0 +1,188 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
// Internal Intel tool
#ifndef __STATISTICS_H__
#define __STATISTICS_H__
#define _CRT_SECURE_NO_DEPRECATE 1
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <vector>
#include <map>
#include <set>
#include <string>
#include <time.h>
using namespace std;
typedef double value_t;
/*
Statistical collector class.
Resulting table output:
+---------------------------------------------------------------------------+
| [Date] <Title>... |
+----------+----v----+--v---+----------------+------------+-..-+------------+
| TestName | Threads | Mode | Rounds results | Stat_type1 | .. | Stat_typeN |
+----------+---------+------+-+-+-+-..-+-+-+-+------------+-..-+------------+
| | | | | | | .. | | | | | | |
.. ... ... .................. ...... ..
| | | | | | | .. | | | | | | |
+----------+---------+------+-+-+-+-..-+-+-+-+------------+-..-+------------+
Iterating table output:
+---------------------------------------------------------------------------+
| [Date] <TestName>, Threads: <N>, Mode: <M>; for <Title>... |
+----------+----v----+--v---+----------------+------------+-..-+------------+
*/
class StatisticsCollector
{
public:
typedef map<string, string> Analysis_t;
typedef vector<value_t> Results_t;
protected:
StatisticsCollector(const StatisticsCollector &);
struct StatisticResults
{
string Name;
string Mode;
int Threads;
Results_t Results;
Analysis_t Analysis;
};
// internal members
//bool OpenFile;
StatisticResults *CurrentKey;
string Title;
const char /**Name,*/ *ResultsFmt;
string Name;
//! Data
typedef map<string, StatisticResults*> Statistics_t;
Statistics_t Statistics;
typedef vector<string> RoundTitles_t;
RoundTitles_t RoundTitles;
//TODO: merge those into one structure
typedef map<string, string> Formulas_t;
Formulas_t Formulas;
typedef set<string> AnalysisTitles_t;
AnalysisTitles_t AnalysisTitles;
public:
struct TestCase {
StatisticResults *access;
TestCase() : access(0) {}
TestCase(StatisticResults *link) : access(link) {}
const char *getName() const { return access->Name.c_str(); }
const char *getMode() const { return access->Mode.c_str(); }
int getThreads() const { return access->Threads; }
const Results_t &getResults() const { return access->Results; }
const Analysis_t &getAnalysis() const { return access->Analysis; }
};
enum Sorting {
ByThreads, ByAlg
};
//! Data and output types
enum DataOutput {
// Verbosity level enumeration
Statistic = 1, //< Analytical data - computed after all iterations and rounds passed
Result = 2, //< Testing data - collected after all iterations passed
Iteration = 3, //< Verbose data - collected at each iteration (for each size - in case of containers)
// ExtraVerbose is not applicabe yet :) be happy, but flexibility is always welcome
// Next constants are bit-fields
Stdout = 1<<8, //< Output to the console
TextFile = 1<<9, //< Output to plain text file "name.txt" (delimiter is TAB by default)
ExcelXML = 1<<10, //< Output to Excel-readable XML-file "name.xml"
HTMLFile = 1<<11 //< Output to HTML file "name.html"
};
//! Constructor. Specify tests set name which used as name of output files
StatisticsCollector(const char *name, Sorting mode = ByThreads, const char *fmt = "%g")
: CurrentKey(NULL), ResultsFmt(fmt), Name(name), SortMode(mode) {}
~StatisticsCollector();
//! Set tests set title, supporting printf-like arguments
void SetTitle(const char *fmt, ...);
//! Specify next test key
TestCase SetTestCase(const char *name, const char *mode, int threads);
//! Specify next test key
void SetTestCase(const TestCase &t) { SetTestCase(t.getName(), t.getMode(), t.getThreads()); }
//! Reserve specified number of rounds. Use for effeciency. Used mostly internally
void ReserveRounds(size_t index);
//! Add result of the measure
void AddRoundResult(const TestCase &, value_t v);
//! Add result of the current measure
void AddRoundResult(value_t v) { if(CurrentKey) AddRoundResult(TestCase(CurrentKey), v); }
//! Add title of round
void SetRoundTitle(size_t index, const char *fmt, ...);
//! Add numbered title of round
void SetRoundTitle(size_t index, int num) { SetRoundTitle(index, "%d", num); }
//! Get number of rounds
size_t GetRoundsCount() const { return RoundTitles.size(); }
// Set statistic value for the test
void AddStatisticValue(const TestCase &, const char *type, const char *fmt, ...);
// Set statistic value for the current test
void AddStatisticValue(const char *type, const char *fmt, ...);
//! Add Excel-processing formulas. @arg formula can contain more than one instances of
//! ROUNDS template which transforms into the range of cells with result values
//TODO://! #1 .. #n templates represent data cells from the first to the last
//TODO: merge with Analisis
void SetStatisticFormula(const char *name, const char *formula);
//! Data output
void Print(int dataOutput, const char *ModeName = "Mode");
private:
Sorting SortMode;
};
//! using: Func(const char *fmt, ...) { vargf2buff(buff, 128, fmt);...
#define vargf2buff(name, size, fmt) char name[size]; memset(name, 0, size); va_list args; va_start(args, fmt); vsnprintf( name, size-1, fmt, args)
inline std::string Format(const char *fmt, ...) {
vargf2buff(buf, 1024, fmt); // from statistics.h
return std::string(buf);
}
#ifdef STATISTICS_INLINE
#include "statistics.cpp"
#endif
#endif //__STATISTICS_H__

View file

@ -0,0 +1,208 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
const char XMLBRow[]=
" <Row>\n";
const char XMLERow[]=
" </Row>\n";
const char XMLHead[]=
"<?xml version=\"1.0\"?>\n"
"<?mso-application progid=\"Excel.Sheet\"?>\n\
<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\
xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n\
xmlns:x=\"urn:schemas-microsoft-com:office:excel\"\n\
xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\
xmlns:html=\"http://www.w3.org/TR/REC-html40\">\n\
<DocumentProperties xmlns=\"urn:schemas-microsoft-com:office:office\">\n\
<Author>%s</Author>\n\
<Created>%s</Created>\n\
<Company>Intel Corporation</Company>\n\
</DocumentProperties>\n\
<ExcelWorkbook xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\
<RefModeR1C1/>\n\
</ExcelWorkbook>\n";
const char XMLStyles[]=
" <Styles>\n\
<Style ss:ID=\"Default\" ss:Name=\"Normal\">\n\
<Alignment ss:Vertical=\"Bottom\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\
</Style>\n\
<Style ss:ID=\"s26\">\n\
<Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\
<Borders>\n\
<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
</Borders>\n\
<Interior ss:Color=\"#FFFF99\" ss:Pattern=\"Solid\"/>\n\
</Style>\n\
<Style ss:ID=\"s25\">\n\
<Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\
<Borders>\n\
<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
</Borders>\n\
<Interior ss:Color=\"#CCFFFF\" ss:Pattern=\"Solid\"/>\n\
</Style>\n\
<Style ss:ID=\"s24\">\n\
<Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\
<Borders>\n\
<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
</Borders>\n\
<Interior ss:Color=\"#CCFFCC\" ss:Pattern=\"Solid\"/>\n\
</Style>\n\
<Style ss:ID=\"s23\">\n\
<Alignment ss:Vertical=\"Top\" ss:Horizontal=\"Left\" ss:WrapText=\"0\"/>\n\
<Borders>\n\
<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\"/>\n\
</Borders>\n\
</Style>\n\
</Styles>\n";
const char XMLBeginSheet[]=
" <Worksheet ss:Name=\"%s\">\n";
const char XMLNames[]=
" <Names>\n\
<NamedRange ss:Name=\"_FilterDatabase\" ss:RefersTo=\"R%dC%d:R%dC%d\" ss:Hidden=\"1\"/>\n\
</Names>\n";
const char XMLBeginTable[]=
" <Table ss:ExpandedColumnCount=\"%d\" ss:ExpandedRowCount=\"%d\" x:FullColumns=\"1\"\n\
x:FullRows=\"1\">\n";
const char XMLColumsHorizontalTable[]=
" <Column ss:Index=\"1\" ss:Width=\"108.75\"/>\n\
<Column ss:Index=\"%d\" ss:Width=\"77.25\" ss:Span=\"%d\"/>\n";
const char XMLColumsVerticalTable[]=
" <Column ss:Index=\"1\" ss:Width=\"77.25\" ss:Span=\"%d\"/>\n";
const char XMLNameAndTime[]=
" <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n\
<Cell><Data ss:Type=\"String\">%s</Data></Cell>\n\
<Cell><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLTableParamAndTitle[]=
" <Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\
<Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\
<Cell><Data ss:Type=\"Number\">%d</Data></Cell>\n\
<Cell><Data ss:Type=\"String\">%s</Data></Cell>\n";
//--------------
const char XMLCellTopName[]=
" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">Name</Data></Cell>\n";
const char XMLCellTopThread[]=
" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">Threads</Data></Cell>\n";
const char XMLCellTopMode[]=
" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">%s</Data></Cell>\n";
//---------------------
const char XMLAnalysisTitle[]=
" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLCellName[]=
" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLCellThread[]=
" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"Number\">%d</Data></Cell>\n";
const char XMLCellMode[]=
" <Cell ss:StyleID=\"s24\"><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLCellAnalysis[]=
" <Cell ss:StyleID=\"s26\"><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLCellFormula[]=
" <Cell ss:StyleID=\"s26\" ss:Formula=\"%s\"><Data ss:Type=\"Number\"></Data></Cell>\n";
const char XMLCellData[]=
" <Cell ss:StyleID=\"s23\"><Data ss:Type=\"Number\">%g</Data></Cell>\n";
const char XMLMergeRow[]=
" <Cell ss:StyleID=\"s23\" ss:MergeAcross=\"%d\" ><Data ss:Type=\"String\"></Data></Cell>\n";
const char XMLCellEmptyWhite[]=
" <Cell><Data ss:Type=\"String\">%s</Data></Cell>\n";
const char XMLCellEmptyTitle[]=
" <Cell ss:StyleID=\"s25\"><Data ss:Type=\"String\"></Data></Cell>\n";
const char XMLEndTable[]=
" </Table>\n";
const char XMLAutoFilter[]=
" <AutoFilter x:Range=\"R%dC%d:R%dC%d\" xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\
</AutoFilter>\n";
const char XMLEndWorkSheet[]=
" </Worksheet>\n";
const char XMLWorkSheetProperties[]=
" <WorksheetOptions xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\
<Unsynced/>\n\
<Selected/>\n\
<FreezePanes/>\n\
<FrozenNoSplit/>\n\
<SplitHorizontal>%d</SplitHorizontal>\n\
<TopRowBottomPane>%d</TopRowBottomPane>\n\
<SplitVertical>%d</SplitVertical>\n\
<LeftColumnRightPane>%d</LeftColumnRightPane>\n\
<ActivePane>0</ActivePane>\n\
<Panes>\n\
<Pane>\n\
<Number>3</Number>\n\
</Pane>\n\
<Pane>\n\
<Number>1</Number>\n\
</Pane>\n\
<Pane>\n\
<Number>2</Number>\n\
</Pane>\n\
<Pane>\n\
<Number>0</Number>\n\
<ActiveRow>0</ActiveRow>\n\
<ActiveCol>%d</ActiveCol>\n\
</Pane>\n\
</Panes>\n\
<ProtectObjects>False</ProtectObjects>\n\
<ProtectScenarios>False</ProtectScenarios>\n\
</WorksheetOptions>\n";
const char XMLEndWorkbook[]=
"</Workbook>\n";

View file

@ -0,0 +1,262 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb/parallel_reduce.h"
#define NRUNS 10
#define ONE_TEST_DURATION 0.01
#include "perf_util.h"
#define NUM_CHILD_TASKS 128
#define NUM_ROOT_TASKS 16
#define N 1000000
#define FINE_GRAIN 50
#define MED_GRAIN 500
#define COARSE_GRAIN 10000
typedef ANCHOR_TYPE count_type;
typedef tbb::blocked_range<count_type> range_type;
const count_type NUM_leaf_tasks = NUM_CHILD_TASKS * NUM_ROOT_TASKS;
const count_type N_finest = (count_type)(N/log((double)N)/10);
const count_type N_fine = N_finest * 10;
class static_task_holder {
public:
tbb::task *my_simple_leaf_task_ptr;
static_task_holder ();
};
static static_task_holder s_tasks;
static size_t s_num_iterations = 0;
class simple_leaf_task : public tbb::task
{
task* execute () {
for ( size_t i=0; i < s_num_iterations; ++i )
util::anchor += i;
//util::anchor += size_t(log10((double)util::anchor)*10);
return NULL;
}
};
class simple_root_task : public tbb::task
{
task* execute () {
set_ref_count(NUM_leaf_tasks + 1);
for ( size_t i = 0; i < NUM_leaf_tasks; ++i ) {
simple_leaf_task &t = *new( allocate_child() ) simple_leaf_task;
spawn(t);
}
wait_for_all();
return NULL;
}
};
void Work1 () {
for ( size_t i=0; i < NUM_leaf_tasks; ++i )
s_tasks.my_simple_leaf_task_ptr->execute();
}
void Test1_1 () {
tbb::empty_task &r = *new( tbb::task::allocate_root() ) tbb::empty_task;
r.set_ref_count(NUM_leaf_tasks + 1);
for ( size_t i = 0; i < NUM_leaf_tasks; ++i ) {
simple_leaf_task &t = *new( r.allocate_child() ) simple_leaf_task;
r.spawn(t);
}
r.wait_for_all();
r.destroy(r);
}
void Test1_2 ()
{
simple_root_task &r = *new( tbb::task::allocate_root() ) simple_root_task;
tbb::task::spawn_root_and_wait(r);
}
class children_launcher_task : public tbb::task
{
task* execute () {
set_ref_count(NUM_CHILD_TASKS + 1);
for ( size_t i = 0; i < NUM_CHILD_TASKS; ++i ) {
simple_leaf_task &t = *new( allocate_child() ) simple_leaf_task;
spawn(t);
}
wait_for_all();
return NULL;
}
};
class root_launcher_task : public tbb::task
{
task* execute () {
children_launcher_task &r = *new( allocate_root() ) children_launcher_task;
spawn_root_and_wait(r);
return NULL;
}
};
class hierarchy_root_task : public tbb::task
{
task* execute () {
tbb::task_list tl;
for ( size_t i = 0; i < NUM_ROOT_TASKS; ++i ) {
root_launcher_task &r = *new( allocate_root() ) root_launcher_task;
tl.push_back(r);
}
spawn_root_and_wait(tl);
return NULL;
}
};
void Test1_3 ()
{
hierarchy_root_task &r = *new( tbb::task::allocate_root() ) hierarchy_root_task;
tbb::task::spawn_root_and_wait(r);
}
static size_t s_range = N,
s_grain = 1;
class simple_pfor_body {
public:
void operator()( const range_type& r ) const {
count_type end = r.end();
for( count_type i = r.begin(); i < end; ++i )
util::anchor += i;
}
};
void Work2 () {
simple_pfor_body body;
range_type range(0, s_range, s_grain);
body(range);
}
void Test2 () {
tbb::parallel_for( range_type(0, s_range, s_grain), simple_pfor_body() );
}
void Test2_0 () {
volatile count_type zero = 0;
tbb::parallel_for( range_type(0, zero, 1), simple_pfor_body() );
}
class simple_preduce_body {
public:
count_type my_sum;
simple_preduce_body () : my_sum(0) {}
simple_preduce_body ( simple_preduce_body&, tbb::split ) : my_sum(0) {}
void join( simple_preduce_body& rhs ) { my_sum += rhs.my_sum;}
void operator()( const range_type& r ) {
count_type end = r.end();
for( count_type i = r.begin(); i < end; ++i )
util::anchor += i;
my_sum = util::anchor;
}
};
void Work3 () {
simple_preduce_body body;
range_type range(0, s_range, s_grain);
body(range);
}
void Test3 () {
simple_preduce_body body;
tbb::parallel_reduce( range_type(0, s_range, s_grain), body );
}
void Test3_0 () {
volatile count_type zero = 0;
simple_preduce_body body;
tbb::parallel_reduce( range_type(0, zero, 1), body );
}
static_task_holder::static_task_holder () {
static simple_leaf_task s_t1;
my_simple_leaf_task_ptr = &s_t1;
}
void Test () {
const size_t num_task_tree_workloads = 4;
size_t task_tree_workloads[num_task_tree_workloads] = {0, 50, 500, 10000};
for (size_t i = 0; i < num_task_tree_workloads; ++i ) {
size_t n = task_tree_workloads[i];
s_num_iterations = n;
CalcSequentialTime(Work1);
RunTest ("Bunch of leaves: %d adds/task", n, Test1_1);
RunTest ("Simple task tree: %d adds/task", n, Test1_2);
RunTest ("Complex task tree: %d adds/task", n, Test1_3);
}
// Using N_fine constant in the body of this function results in incorrect code
// generation by icl 10.1.014
const size_t num_alg_workloads = 4;
size_t alg_ranges[num_alg_workloads] = {N_fine/10, N_fine, N, N};
size_t alg_grains[num_alg_workloads] = {1, FINE_GRAIN, MED_GRAIN, COARSE_GRAIN};
//RunTest ("Empty pfor", 0, Test2_0);
for (size_t i = 0; i < num_alg_workloads; ++i ) {
s_range = alg_ranges[i];
s_grain = alg_grains[i];
CalcSequentialTime(Work2);
RunTest ("pfor: %d adds/body", s_grain, Test2);
}
//RunTest ("Empty preduce", Test3_0);
for (size_t i = 0; i < num_alg_workloads; ++i ) {
s_range = alg_ranges[i];
s_grain = alg_grains[i];
CalcSequentialTime(Work3);
RunTest ("preduce: %d adds/body", s_grain, Test3);
}
}
int main( int argc, char* argv[] ) {
test_main(argc, argv);
return 0;
}

View file

@ -0,0 +1,343 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#ifndef __TIME_FRAMEWORK_H__
#define __TIME_FRAMEWORK_H__
#include <cstdlib>
#include <math.h>
#include <vector>
#include <string>
#include <sstream>
#include "tbb/tbb_stddef.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/tick_count.h"
#include "../test/harness.h"
#include "../test/harness_barrier.h"
#define STATISTICS_INLINE
#include "statistics.h"
#ifndef ARG_TYPE
typedef intptr_t arg_t;
#else
typedef ARG_TYPE arg_t;
#endif
class Timer {
tbb::tick_count tick;
public:
Timer() { tick = tbb::tick_count::now(); }
double get_time() { return (tbb::tick_count::now() - tick).seconds(); }
double diff_time(const Timer &newer) { return (newer.tick - tick).seconds(); }
double mark_time() { tbb::tick_count t1(tbb::tick_count::now()), t2(tick); tick = t1; return (t1 - t2).seconds(); }
double mark_time(const Timer &newer) { tbb::tick_count t(tick); tick = newer.tick; return (tick - t).seconds(); }
};
class TesterBase /*: public tbb::internal::no_copy*/ {
protected:
friend class TestProcessor;
friend class TestRunner;
//! it is barrier for synchronizing between threads
Harness::SpinBarrier *barrier;
//! number of tests per this tester
const int tests_count;
//! number of threads to operate
int threads_count;
//! some value for tester
arg_t value;
// avoid false sharing
char pad[128 - sizeof(arg_t) - sizeof(int)*2 - sizeof(void*) ];
public:
//! init tester base. @arg ntests is number of embeded tests in this tester.
TesterBase(int ntests)
: barrier(NULL), tests_count(ntests)
{}
virtual ~TesterBase() {}
//! internal function
void base_init(arg_t v, int t, Harness::SpinBarrier &b) {
threads_count = t;
barrier = &b;
value = v;
init();
}
//! optionally override to init after value and threads count were set.
virtual void init() { }
//! Override to provide your names
virtual std::string get_name(int testn) {
return Format("test %d", testn);
}
//! optionally override to init test mode just before execution for a given thread number.
virtual void test_prefix(int testn, int threadn) { }
//! Override to provide main test's entry function returns a value to record
virtual value_t test(int testn, int threadn) = 0;
//! Type of aggregation from results of threads
enum result_t {
SUM, AVG, MIN, MAX
};
//! Override to change result type for the test. Return postfix for test name or 0 if result type is not needed.
virtual const char *get_result_type(int /*testn*/, result_t type) const {
return type == AVG ? "" : 0; // only average result by default
}
};
/*****
a user's tester concept:
class tester: public TesterBase {
public:
//! init tester with known amount of work
tester() : TesterBase(<user-specified tests count>) { ... }
//! run a test with sequental number @arg test_number for @arg thread.
/ *override* / value_t test(int test_number, int thread);
};
******/
template<typename Tester, int scale = 1>
class TimeTest : public Tester {
/*override*/ value_t test(int testn, int threadn) {
Timer timer;
Tester::test(testn, threadn);
return timer.get_time() * double(scale);
}
};
template<typename Tester>
class NanosecPerValue : public Tester {
/*override*/ value_t test(int testn, int threadn) {
Timer timer;
Tester::test(testn, threadn);
// return time (ns) per value
return timer.get_time()*1000000.0/double(Tester::value);
}
};
template<typename Tester, int scale = 1>
class ValuePerSecond : public Tester {
/*override*/ value_t test(int testn, int threadn) {
Timer timer;
Tester::test(testn, threadn);
// return time value per seconds/scale
return double(Tester::value)/(timer.get_time()*scale);
}
};
// operate with single tester
class TestRunner {
friend class TestProcessor;
friend struct RunArgsBody;
TestRunner(const TestRunner &); // don't copy
const char *tester_name;
StatisticsCollector *stat;
std::vector<std::vector<StatisticsCollector::TestCase> > keys;
public:
TesterBase &tester;
template<typename Test>
TestRunner(const char *name, Test *test)
: tester_name(name), tester(*static_cast<TesterBase*>(test))
{}
~TestRunner() { delete &tester; }
void init(arg_t value, int threads, Harness::SpinBarrier &barrier, StatisticsCollector *s) {
tester.base_init(value, threads, barrier);
stat = s;
keys.resize(tester.tests_count);
for(int testn = 0; testn < tester.tests_count; testn++) {
keys[testn].resize(threads);
std::string test_name(tester.get_name(testn));
for(int threadn = 0; threadn < threads; threadn++)
keys[testn][threadn] = stat->SetTestCase(tester_name, test_name.c_str(), threadn);
}
}
void run_test(int threadn) {
for(int testn = 0; testn < tester.tests_count; testn++) {
tester.test_prefix(testn, threadn);
tester.barrier->wait(); // <<<<<<<<<<<<<<<<< Barrier before running test mode
value_t result = tester.test(testn, threadn);
stat->AddRoundResult(keys[testn][threadn], result);
}
}
void post_process(StatisticsCollector &report) {
const int threads = tester.threads_count;
for(int testn = 0; testn < tester.tests_count; testn++) {
size_t coln = keys[testn][0].getResults().size()-1;
value_t rsum = keys[testn][0].getResults()[coln];
value_t rmin = rsum, rmax = rsum;
for(int threadn = 1; threadn < threads; threadn++) {
value_t result = keys[testn][threadn].getResults()[coln];
rsum += result; // for both SUM or AVG
if(rmin > result) rmin = result;
if(rmax < result) rmax = result;
}
std::string test_name(tester.get_name(testn));
const char *rname = tester.get_result_type(testn, TesterBase::SUM);
if( rname ) {
report.SetTestCase(tester_name, (test_name+rname).c_str(), threads);
report.AddRoundResult(rsum);
}
rname = tester.get_result_type(testn, TesterBase::MIN);
if( rname ) {
report.SetTestCase(tester_name, (test_name+rname).c_str(), threads);
report.AddRoundResult(rmin);
}
rname = tester.get_result_type(testn, TesterBase::AVG);
if( rname ) {
report.SetTestCase(tester_name, (test_name+rname).c_str(), threads);
report.AddRoundResult(rsum / threads);
}
rname = tester.get_result_type(testn, TesterBase::MAX);
if( rname ) {
report.SetTestCase(tester_name, (test_name+rname).c_str(), threads);
report.AddRoundResult(rmax);
}
}
}
};
struct RunArgsBody {
const vector<TestRunner*> &run_list;
RunArgsBody(const vector<TestRunner*> &a) : run_list(a) { }
#ifndef __TBB_parallel_for_H
void operator()(int thread) const {
#else
void operator()(const tbb::blocked_range<int> &r) const {
ASSERT( r.begin() + 1 == r.end(), 0);
int thread = r.begin();
#endif
for(size_t i = 0; i < run_list.size(); i++)
run_list[i]->run_test(thread);
}
};
//! Main test processor.
/** Override or use like this:
class MyTestCollection : public TestProcessor {
void factory(arg_t value, int threads) {
process( value, threads,
run("my1", new tester<my1>() ),
run("my2", new tester<my2>() ),
end );
if(value == threads)
stat->Print();
}
};
*/
class TestProcessor {
friend class TesterBase;
// <threads, collector>
typedef std::map<int, StatisticsCollector *> statistics_collection;
statistics_collection stat_by_threads;
protected:
// Members
const char *collection_name;
// current stat
StatisticsCollector *stat;
// token
size_t end;
public:
StatisticsCollector report;
// token of tests list
template<typename Test>
TestRunner *run(const char *name, Test *test) {
return new TestRunner(name, test);
}
// iteration processing
void process(arg_t value, int threads, ...) {
// prepare items
stat = stat_by_threads[threads];
if(!stat) {
stat_by_threads[threads] = stat = new StatisticsCollector((collection_name + Format("@%d", threads)).c_str(), StatisticsCollector::ByAlg);
stat->SetTitle("Detailed log of %s running with %d threads.", collection_name, threads);
}
Harness::SpinBarrier barrier(threads);
// init args
va_list args; va_start(args, threads);
vector<TestRunner*> run_list; run_list.reserve(16);
while(true) {
TestRunner *item = va_arg(args, TestRunner*);
if( !item ) break;
item->init(value, threads, barrier, stat);
run_list.push_back(item);
}
va_end(args);
std::ostringstream buf;
buf << value;
const size_t round_number = stat->GetRoundsCount();
stat->SetRoundTitle(round_number, buf.str().c_str());
report.SetRoundTitle(round_number, buf.str().c_str());
// run them
#ifndef __TBB_parallel_for_H
NativeParallelFor(threads, RunArgsBody(run_list));
#else
tbb::parallel_for(tbb::blocked_range<int>(0,threads,1), RunArgsBody(run_list));
#endif
// destroy args
for(size_t i = 0; i < run_list.size(); i++) {
run_list[i]->post_process(report);
delete run_list[i];
}
}
public:
TestProcessor(const char *name, StatisticsCollector::Sorting sort_by = StatisticsCollector::ByAlg)
: collection_name(name), stat(NULL), end(0), report(collection_name, sort_by)
{ }
~TestProcessor() {
for(statistics_collection::iterator i = stat_by_threads.begin(); i != stat_by_threads.end(); i++)
delete i->second;
}
};
#endif// __TIME_FRAMEWORK_H__

View file

@ -0,0 +1,366 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
// configuration:
//! enable/disable std::map tests
#define STDTABLE 1
//! enable/disable old implementation tests (correct include file also)
#define OLDTABLE 0
#define OLDTABLEHEADER "tbb/concurrent_hash_map-4078.h"//-4329
//! enable/disable experimental implementation tests (correct include file also)
#define TESTTABLE 0
#define TESTTABLEHEADER "tbb/concurrent_unordered_map.h"
//////////////////////////////////////////////////////////////////////////////////
#include <cstdlib>
#include <math.h>
#include "tbb/tbb_stddef.h"
#include <vector>
#include <map>
// needed by hash_maps
#include <stdexcept>
#include <iterator>
#include <algorithm> // std::swap
#include <utility> // Need std::pair from here
#include "tbb/cache_aligned_allocator.h"
#include "tbb/tbb_allocator.h"
#include "tbb/spin_rw_mutex.h"
#include "tbb/aligned_space.h"
#include "tbb/atomic.h"
// for test
#include "tbb/spin_mutex.h"
#include "time_framework.h"
using namespace tbb;
using namespace tbb::internal;
struct IntHashCompare {
size_t operator() ( int x ) const { return x; }
bool operator() ( int x, int y ) const { return x==y; }
static long hash( int x ) { return x; }
bool equal( int x, int y ) const { return x==y; }
};
namespace version_current {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
#include "tbb/concurrent_hash_map.h"
}
typedef version_current::tbb::concurrent_hash_map<int,int,IntHashCompare> IntTable;
#if OLDTABLE
#undef __TBB_concurrent_hash_map_H
namespace version_base {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
#include OLDTABLEHEADER
}
typedef version_base::tbb::concurrent_hash_map<int,int,IntHashCompare> OldTable;
#endif
#if TESTTABLE
#undef __TBB_concurrent_hash_map_H
namespace version_new {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
#include TESTTABLEHEADER
}
typedef version_new::tbb::concurrent_unordered_map<int,int,IntHashCompare,IntHashCompare> TestTable;
#define TESTTABLE 1
#endif
///////////////////////////////////////
static const char *map_testnames[] = {
"1.insert", "2.count(w/rehash)", "3.find/wr", "4.erase"
};
template<typename TableType>
struct TestTBBMap : TesterBase {
typedef typename TableType::accessor accessor;
typedef typename TableType::const_accessor const_accessor;
TableType Table;
int n_items;
TestTBBMap() : TesterBase(4) {}
void init() { n_items = value/threads_count; }
std::string get_name(int testn) {
return std::string(map_testnames[testn]);
}
double test(int test, int t)
{
switch(test) {
case 0: // fill
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a;
Table.insert( a, i );
a->second = 0;
}
break;
case 1: // work1
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
size_t c = Table.count( i );
ASSERT( c == 1, NULL);
}
break;
case 2: // work2
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a;
Table.find( a, i );
ASSERT( !a->second, "A key should be incremented only once");
a->second += 1;
}
break;
case 3: // clean
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
ASSERT( Table.erase( i ), NULL);
}
}
return 0;
}
};
template<typename M>
struct TestSTLMap : TesterBase {
std::map<int, int> Table;
M mutex;
int n_items;
TestSTLMap() : TesterBase(4) {}
void init() { n_items = value/threads_count; }
std::string get_name(int testn) {
return std::string(map_testnames[testn]);
}
double test(int test, int t)
{
switch(test) {
case 0: // fill
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
typename M::scoped_lock with(mutex);
Table[i] = 0;
}
break;
case 1: // work1
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
typename M::scoped_lock with(mutex);
size_t c = Table.count(i);
ASSERT( c == 1, NULL);
}
break;
case 2: // work2
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
typename M::scoped_lock with(mutex);
Table[i] += 1;
}
break;
case 3: // clean
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
typename M::scoped_lock with(mutex);
Table.erase(i);
}
}
return 0;
}
};
class fake_mutex {
int a;
public:
class scoped_lock {
fake_mutex *p;
public:
scoped_lock() {}
scoped_lock( fake_mutex &m ) { p = &m; }
~scoped_lock() { p->a = 0; }
void acquire( fake_mutex &m ) { p = &m; }
void release() { }
};
};
class test_hash_map : public TestProcessor {
public:
test_hash_map() : TestProcessor("test_hash_map") {}
void factory(int value, int threads) {
if(Verbose) printf("Processing with %d threads: %d...\n", threads, value);
process( value, threads,
#if STDTABLE
run("std::map ", new NanosecPerValue<TestSTLMap<spin_mutex> >() ),
#endif
#if OLDTABLE
run("old::hmap", new NanosecPerValue<TestTBBMap<OldTable> >() ),
#endif
run("tbb::hmap", new NanosecPerValue<TestTBBMap<IntTable> >() ),
#if TESTTABLE
run("new::hmap", new NanosecPerValue<TestTBBMap<TestTable> >() ),
#endif
end );
//stat->Print(StatisticsCollector::Stdout);
if(value >= 2097152) stat->Print(StatisticsCollector::HTMLFile);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
template<typename TableType>
struct TestHashMapFind : TesterBase {
typedef typename TableType::accessor accessor;
typedef typename TableType::const_accessor const_accessor;
TableType Table;
int n_items;
std::string get_name(int testn) {
return std::string(!testn?"find":"insert");
}
TestHashMapFind() : TesterBase(2) {}
void init() {
n_items = value/threads_count;
for(int i = 0; i < value; i++) {
accessor a; Table.insert( a, i );
}
}
double test(int test, int t)
{
switch(test) {
case 0: // find
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a;
Table.find( a, i );
a->second = i;
}
break;
case 1: // insert
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a;
Table.insert( a, i );
a->second = -i;
}
break;
}
return 0;
}
};
const int test2_size = 65536;
int Data[test2_size];
template<typename TableType>
struct TestHashCountStrings : TesterBase {
typedef typename TableType::accessor accessor;
typedef typename TableType::const_accessor const_accessor;
TableType Table;
int n_items;
std::string get_name(int testn) {
return !testn?"insert":"find";
}
TestHashCountStrings() : TesterBase(2) {}
void init() {
n_items = value/threads_count;
}
double test(int testn, int t)
{
if(!testn) {
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a; Table.insert( a, Data[i] );
}
} else { //
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
accessor a; Table.find( a, Data[i] );
}
}
return 0;
}
};
class test_hash_map_find : public TestProcessor {
public:
test_hash_map_find() : TestProcessor("test_hash_map_find") {}
void factory(int value, int threads) {
if(Verbose) printf("Processing with %d threads: %d...\n", threads, value);
process( value, threads,
#if OLDTABLE
run("Filled old::hashmap", new NanosecPerValue<TestHashMapFind<OldTable> >() ),
#endif
run("Filled tbb::hashmap", new NanosecPerValue<TestHashMapFind<IntTable> >() ),
#if TESTTABLE
run("Filled new::hashmap", new NanosecPerValue<TestHashMapFind<TestTable> >() ),
#endif
#if OLDTABLE
run("CountStr old::hashmap", new TimeTest<TestHashCountStrings<OldTable> >() ),
#endif
run("CountStr tbb::hashmap", new TimeTest<TestHashCountStrings<IntTable> >() ),
#if TESTTABLE
run("CountStr new::hashmap", new TimeTest<TestHashCountStrings<TestTable> >() ),
#endif
end );
//stat->Print(StatisticsCollector::HTMLFile);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]) {
if(argc>1) Verbose = true;
//if(argc>2) ExtraVerbose = true;
MinThread = 1; MaxThread = task_scheduler_init::default_num_threads();
ParseCommandLine( argc, argv );
ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded. Please build it by:\n\t\tmake tbbmalloc");
{
test_hash_map_find test_find; int o = test2_size;
for(int i = 0; i < o; i++)
Data[i] = i%60;
for( int t=MinThread; t <= MaxThread; t++)
test_find.factory(o, t);
test_find.report.SetTitle("Nanoseconds per operation of finding operation (Mode) for %d items", o);
test_find.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML);
}
{
test_hash_map the_test;
for( int t=MinThread; t <= MaxThread; t*=2)
for( int o=/*2048*/(1<<8)*8; o<2200000; o*=2 )
the_test.factory(o, t);
the_test.report.SetTitle("Nanoseconds per operation of (Mode) for N items in container (Name)");
the_test.report.SetStatisticFormula("1AVG per size", "=AVERAGE(ROUNDS)");
the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML);
}
return 0;
}

View file

@ -0,0 +1,155 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
// configuration:
// Size of input array
const int INPUT_SIZE = 2000000;
// Specify list of unique percents to test against. Max - 10
#define SOURCE_ARRAY UNIQUE_PERCENT(5); UNIQUE_PERCENT(10); UNIQUE_PERCENT(20); UNIQUE_PERCENT(40)
// enable/disable tests for:
#define BOX1 "TBB"
#define BOX1TEST ValuePerSecond<Uniques<tbb::concurrent_hash_map<int,int> >, 1000000/*ns*/>
#define BOX1HEADER "tbb/concurrent_hash_map.h"
// enable/disable tests for:
#define BOX2 "OLD"
#define BOX2TEST ValuePerSecond<Uniques<tbb::concurrent_hash_map<int,int> >, 1000000/*ns*/>
#define BOX2HEADER "tbb/concurrent_hash_map-5468.h"
#define TBB_USE_THREADING_TOOLS 0
//////////////////////////////////////////////////////////////////////////////////
#include <cstdlib>
#include <math.h>
#include "tbb/tbb_stddef.h"
#include <vector>
#include <map>
// needed by hash_maps
#include <stdexcept>
#include <iterator>
#include <algorithm> // std::swap
#include <utility> // Need std::pair
#include <cstring> // Need std::memset
#include <typeinfo>
#include "tbb/cache_aligned_allocator.h"
#include "tbb/tbb_allocator.h"
#include "tbb/spin_rw_mutex.h"
#include "tbb/aligned_space.h"
#include "tbb/atomic.h"
// for test
#include "tbb/spin_mutex.h"
#include "time_framework.h"
using namespace tbb;
using namespace tbb::internal;
/////////////////////////////////////////////////////////////////////////////////////////
// Input data built for SOURCE_ARRAY settings
int Mixtures = 0;
int Percents[10];
int *Data[10];
// Main test class used to run the timing tests. All overridden methods are called by the framework
template<typename TableType>
struct Uniques : TesterBase {
typedef typename TableType::accessor accessor;
typedef typename TableType::const_accessor const_accessor;
TableType *Table;
int n_items;
// Returns name of test mode specified by number
/*override*/ std::string get_name(int testn) {
return Format("%d%% uniques", Percents[testn]);
}
// Initializes base class with number of test modes
Uniques() : TesterBase(Mixtures), Table(0) {}
~Uniques() { if(Table) delete Table; }
// Informs the class that value and threads number become known
/*override*/ void init() {
n_items = value/threads_count;
}
// Informs the class that the test mode for specified thread is about to start
/*override*/ void test_prefix(int testn, int t) {
barrier->wait();
if( t ) return;
if(Table) delete Table;
Table = new TableType(MaxThread*4);
}
// Executes test mode for a given thread. Return value is ignored when used with timing wrappers.
/*override*/ double test(int testn, int t)
{
for(int i = t*n_items, e = (t+1)*n_items; i < e; i++) {
Table->insert( std::make_pair(Data[testn][i],t) );
}
return 0;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// Using BOX declarations from configuration
#include "time_sandbox.h"
// Prepares the input data for given unique percent
inline void UNIQUE_PERCENT(int p) {
Percents[Mixtures] = p;
Data[Mixtures] = new int[INPUT_SIZE];
int uniques = INPUT_SIZE/100*p;
srand(10101);
for(int i = 0; i < INPUT_SIZE; i++)
Data[Mixtures][i] = rand()%uniques;
Mixtures++;
}
int main(int argc, char* argv[]) {
if(argc>1) Verbose = true;
//if(argc>2) ExtraVerbose = true;
MinThread = 1; MaxThread = task_scheduler_init::default_num_threads();
ParseCommandLine( argc, argv );
ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded. Please build it by:\n\t\tmake tbbmalloc");
SOURCE_ARRAY; // prepare source array
{
// Declares test processor
TEST_PROCESSOR_NAME the_test("time_hash_map_fill"/*, StatisticsCollector::ByThreads*/);
for( int t=MinThread; t <= MaxThread; t++)
the_test.factory(INPUT_SIZE, t); // executes the tests specified in BOX-es for given 'value' and threads
the_test.report.SetTitle("Operations per nanosecond", INPUT_SIZE);
the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); // Write files
}
return 0;
}

View file

@ -0,0 +1,174 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
////// Test configuration ////////////////////////////////////////////////////
#define SECONDS_RATIO 1000000 // microseconds
#ifndef REPEAT_K
#define REPEAT_K 50 // repeat coefficient
#endif
int outer_work[] = {/*256,*/ 64, 16, 4, 0};
int inner_work[] = {32, 8, 0 };
// keep it to calibrate the time of work without synchronization
#define BOX1 "baseline"
#define BOX1TEST TimeTest< TBB_Mutex<tbb::null_mutex>, SECONDS_RATIO >
// enable/disable tests for:
#define BOX2 "spin_mutex"
#define BOX2TEST TimeTest< TBB_Mutex<tbb::spin_mutex>, SECONDS_RATIO >
// enable/disable tests for:
#define BOX3 "spin_rw_mutex"
#define BOX3TEST TimeTest< TBB_Mutex<tbb::spin_rw_mutex>, SECONDS_RATIO >
// enable/disable tests for:
#define BOX4 "queuing_mutex"
#define BOX4TEST TimeTest< TBB_Mutex<tbb::queuing_mutex>, SECONDS_RATIO >
// enable/disable tests for:
//#define BOX5 "queuing_rw_mutex"
#define BOX5TEST TimeTest< TBB_Mutex<tbb::queuing_rw_mutex>, SECONDS_RATIO >
//////////////////////////////////////////////////////////////////////////////
#include <cstdlib>
#include <math.h>
#include <algorithm> // std::swap
#include <utility> // Need std::pair from here
#include <sstream>
#include "tbb/tbb_stddef.h"
#include "tbb/null_mutex.h"
#include "tbb/spin_rw_mutex.h"
#include "tbb/spin_mutex.h"
#include "tbb/queuing_mutex.h"
#include "tbb/queuing_rw_mutex.h"
#include "tbb/mutex.h"
#if INTEL_TRIAL==2
#include "tbb/parallel_for.h" // enable threading by TBB scheduler
#include "tbb/task_scheduler_init.h"
#include "tbb/blocked_range.h"
#endif
// for test
#include "time_framework.h"
using namespace tbb;
using namespace tbb::internal;
/////////////////////////////////////////////////////////////////////////////////////////
//! base class for tests family
struct TestLocks : TesterBase {
// Inherits "value", "threads_count", and other variables
TestLocks() : TesterBase(/*number of modes*/sizeof(outer_work)/sizeof(int)) {}
//! returns name of test part/mode
/*override*/std::string get_name(int testn) {
std::ostringstream buf;
buf.width(4); buf.fill('0');
buf << outer_work[testn]; // mode number
return buf.str();
}
//! enables results types and returns theirs suffixes
/*override*/const char *get_result_type(int, result_t type) const {
switch(type) {
case MIN: return " min";
case MAX: return " max";
default: return 0;
}
}
//! repeats count
int repeat_until(int /*test_n*/) const {
return REPEAT_K*100;//TODO: suggest better?
}
//! fake work
void do_work(int work) volatile {
for(int i = 0; i < work; i++) {
volatile int x = i;
__TBB_Pause(0); // just to call inline assembler
x *= work/threads_count;
}
}
};
//! template test unit for any of TBB mutexes
template<typename M>
struct TBB_Mutex : TestLocks {
M mutex;
double test(int testn, int /*threadn*/)
{
for(int r = 0; r < repeat_until(testn); ++r) {
do_work(outer_work[testn]);
{
typename M::scoped_lock with(mutex);
do_work(/*inner work*/value);
}
}
return 0;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
//Using BOX declarations
#include "time_sandbox.h"
// run tests for each of inner work value
void RunLoops(test_sandbox &the_test, int thread) {
for( unsigned i=0; i<sizeof(inner_work)/sizeof(int); ++i )
the_test.factory(inner_work[i], thread);
}
int main(int argc, char* argv[]) {
if(argc>1) Verbose = true;
int DefThread = task_scheduler_init::default_num_threads();
MinThread = 1; MaxThread = DefThread+1;
ParseCommandLine( argc, argv );
ASSERT(MinThread <= MaxThread, 0);
#if INTEL_TRIAL && defined(__TBB_parallel_for_H)
task_scheduler_init me(MaxThread);
#endif
{
test_sandbox the_test("time_locked_work", StatisticsCollector::ByThreads);
//TODO: refactor this out as RunThreads(test&)
for( int t = MinThread; t < DefThread && t <= MaxThread; t *= 2)
RunLoops( the_test, t ); // execute undersubscribed threads
if( DefThread > MinThread && DefThread <= MaxThread )
RunLoops( the_test, DefThread ); // execute on all hw threads
if( DefThread < MaxThread)
RunLoops( the_test, MaxThread ); // execute requested oversubscribed threads
the_test.report.SetTitle("Time of lock/unlock for mutex Name with Outer and Inner work");
//the_test.report.SetStatisticFormula("1AVG per size", "=AVERAGE(ROUNDS)");
the_test.report.Print(StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML, /*ModeName*/ "Outer work");
}
return 0;
}

View file

@ -0,0 +1,168 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#ifndef __TIME_FRAMEWORK_H__
#error time_framework.h must be included
#endif
#ifdef BOX1
namespace sandbox1 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX1HEADER
# include BOX1HEADER
# endif
typedef ::BOX1TEST testbox;
}
#endif
#ifdef BOX2
namespace sandbox2 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX2HEADER
# include BOX2HEADER
# endif
typedef ::BOX2TEST testbox;
}
#endif
#ifdef BOX3
namespace sandbox3 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX3HEADER
# include BOX3HEADER
# endif
typedef ::BOX3TEST testbox;
}
#endif
#ifdef BOX4
namespace sandbox4 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX4HEADER
# include BOX4HEADER
# endif
typedef ::BOX4TEST testbox;
}
#endif
#ifdef BOX5
namespace sandbox5 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX5HEADER
# include BOX5HEADER
# endif
typedef ::BOX5TEST testbox;
}
#endif
#ifdef BOX6
namespace sandbox6 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX6HEADER
# include BOX6HEADER
# endif
typedef ::BOX6TEST testbox;
}
#endif
#ifdef BOX7
namespace sandbox7 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX7HEADER
# include BOX7HEADER
# endif
typedef ::BOX7TEST testbox;
}
#endif
#ifdef BOX8
namespace sandbox8 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX8HEADER
# include BOX8HEADER
# endif
typedef ::BOX8TEST testbox;
}
#endif
#ifdef BOX9
namespace sandbox9 {
namespace tbb { using namespace ::tbb; namespace internal { using namespace ::tbb::internal; } }
# ifdef BOX9HEADER
# include BOX9HEADER
# endif
typedef ::BOX9TEST testbox;
}
#endif
//if harness.h included
#if defined(ASSERT) && !HARNESS_NO_PARSE_COMMAND_LINE
#ifndef TEST_PREFIX
#define TEST_PREFIX if(Verbose) printf("Processing with %d threads: %ld...\n", threads, value);
#endif
#endif//harness included
#ifndef TEST_PROCESSOR_NAME
#define TEST_PROCESSOR_NAME test_sandbox
#endif
class TEST_PROCESSOR_NAME : public TestProcessor {
public:
TEST_PROCESSOR_NAME(const char *name, StatisticsCollector::Sorting sort_by = StatisticsCollector::ByAlg)
: TestProcessor(name, sort_by) {}
void factory(arg_t value, int threads) {
#ifdef TEST_PREFIX
TEST_PREFIX
#endif
process( value, threads,
#define RUNBOX(n) run(#n"."BOX##n, new sandbox##n::testbox() )
#ifdef BOX1
RUNBOX(1),
#endif
#ifdef BOX2
RUNBOX(2),
#endif
#ifdef BOX3
RUNBOX(3),
#endif
#ifdef BOX4
RUNBOX(4),
#endif
#ifdef BOX5
RUNBOX(5),
#endif
#ifdef BOX6
RUNBOX(6),
#endif
#ifdef BOX7
RUNBOX(7),
#endif
#ifdef BOX8
RUNBOX(8),
#endif
#ifdef BOX9
RUNBOX(9),
#endif
end );
#ifdef TEST_POSTFIX
TEST_POSTFIX
#endif
}
};

View file

@ -0,0 +1,291 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "tbb/task_scheduler_init.h"
#include "tbb/tick_count.h"
#include <cmath>
#include <cstdlib>
#include <cerrno>
#include <cfloat>
#include <vector>
#include <algorithm>
#include "../src/test/harness.h"
#if __linux__ || __APPLE__ || __FreeBSD__
#include <sys/resource.h>
#endif /* __APPLE__ */
// The code, performance of which is to be measured, is surrounded by the StartSimpleTiming
// and StopSimpleTiming macros. It is called "target code" or "code of interest" hereafter.
//
// The target code is executed inside the nested loop. Nesting is necessary to allow
// measurements on arrays that fit cache of a particular level, while making the load
// big enough to eliminate the influence of random deviations.
//
// Macro StartSimpleTiming defines reduction variable "util::anchor", which may be modified (usually
// by adding to) by the target code. This can be necessary to prevent optimizing compilers
// from throwing out the code of interest. Besides, if the target code is complex enough,
// make sure that all its branches contribute (directly or indirectly) to the value
// being added to the "util::anchor" variable.
//
// To factor out overhead introduced by the measurement infra code it is recommended to make
// a calibration run with target code replaced by a no-op (but still modifying "sum"), and
// store the resulting time in the "util::base" variable.
//
// A generally good approach is to make the target code use elements of a preliminary
// initialized array. Then for calibration run you just need to add vector elements
// to the "sum" variable. To get rid of memory access delays make the array small
// enough to fit L2 or L1 cache (play with StartSimpleTiming arguments if necessary).
//
// Macro CalibrateSimpleTiming performs default calibration using "util::anchor += i;" operation.
//
// Macro ANCHOR_TYPE defines the type of the reduction variable. If it was not
// defined before including this header, it is defined as size_t. Depending on
// the target code modern super scalar architectures may blend reduction operation
// and instructions of interest differently for different target alternatives. So
// you may play with the type to minimize out-of-order and parallel execution impact
// on the calibration time veracity. You may even end up with different reduction
// variable types (and different calibration times) for different measurements.
namespace util {
typedef std::vector<double> durations_t;
void trace_histogram ( const durations_t& t, char* histogramFileName )
{
FILE* f = histogramFileName ? fopen(histogramFileName, "wt") : stdout;
size_t n = t.size();
const size_t num_buckets = 100;
double min_val = *std::min_element(t.begin(), t.end()),
max_val = *std::max_element(t.begin(), t.end()),
bucket_size = (max_val - min_val) / num_buckets;
std::vector<size_t> hist(num_buckets + 1, 0);
for ( size_t i = 0; i < n; ++i )
++hist[size_t((t[i]-min_val)/bucket_size)];
fprintf (f, "Histogram: nvals = %u, min = %g, max = %g, nbuckets = %u\n", (unsigned)n, min_val, max_val, (unsigned)num_buckets);
double bucket = min_val;
for ( size_t i = 0; i <= num_buckets; ++i, bucket+=bucket_size )
fprintf (f, "%12g\t%u\n", bucket, (unsigned)hist[i]);
fclose(f);
}
double average ( const durations_t& d, double& variation_percent, double& std_dev_percent )
{
durations_t t = d;
if ( t.size() > 5 ) {
t.erase(std::min_element(t.begin(), t.end()));
t.erase(std::max_element(t.begin(), t.end()));
}
size_t n = t.size();
double sum = 0,
min_val = *std::min_element(t.begin(), t.end()),
max_val = *std::max_element(t.begin(), t.end());
for ( size_t i = 0; i < n; ++i )
sum += t[i];
double avg = sum / n,
std_dev = 0;
for ( size_t i = 0; i < n; ++i ) {
double dev = fabs(t[i] - avg);
std_dev += dev * dev;
}
std_dev = sqrt(std_dev / n);
std_dev_percent = std_dev / avg * 100;
variation_percent = 100 * (max_val - min_val) / avg;
return avg;
}
static int num_threads;
static double base = 0,
base_dev = 0,
base_dev_percent = 0;
static char *empty_fmt = "";
static int rate_field_len = 11;
#if !defined(ANCHOR_TYPE)
#define ANCHOR_TYPE size_t
#endif
static ANCHOR_TYPE anchor = 0;
static double sequential_time = 0;
#define StartSimpleTiming(nOuter, nInner) { \
tbb::tick_count t1, t0 = tbb::tick_count::now(); \
for ( size_t j = 0; l < nOuter; ++l ) { \
for ( size_t i = 0; i < nInner; ++i ) {
#define StopSimpleTiming(res) \
} \
util::anchor += (ANCHOR_TYPE)l; \
} \
t1 = tbb::tick_count::now(); \
printf (util::empty_fmt, util::anchor); \
res = (t1-t0).seconds() - util::base; \
}
#define CalibrateSimpleTiming(T, nOuter, nInner) \
StartSimpleTiming(nOuter, nInner); \
util::anchor += (ANCHOR_TYPE)i; \
StopSimpleTiming(util::base);
#define StartTimingImpl(nRuns, nOuter, nInner) \
tbb::tick_count t1, t0; \
for ( size_t k = 0; k < nRuns; ++k ) { \
t0 = tbb::tick_count::now(); \
for ( size_t l = 0; l < nOuter; ++l ) { \
for ( size_t i = 0; i < nInner; ++i ) {
#define StartTiming(nRuns, nOuter, nInner) { \
util::durations_t t_(nRuns); \
StartTimingImpl(nRuns, nOuter, nInner)
#define StartTimingEx(vDurations, nRuns, nOuter, nInner) { \
util::durations_t &t_ = vDurations; \
vDurations.resize(nRuns); \
StartTimingImpl(nRuns, nOuter, nInner)
#define StopTiming(Avg, StdDev, StdDevPercent) \
} \
util::anchor += (ANCHOR_TYPE)l; \
} \
t1 = tbb::tick_count::now(); \
t_[k] = (t1 - t0).seconds()/nrep; \
} \
printf (util::empty_fmt, util::anchor); \
Avg = util::average(t_, StdDev, StdDevPercent); \
}
#define CalibrateTiming(nRuns, nOuter, nInner) \
StartTiming(nRuns, nOuter, nInner); \
util::anchor += (ANCHOR_TYPE)i; \
StopTiming(util::base, util::base_dev, util::base_dev_percent);
} // namespace util
#ifndef NRUNS
#define NRUNS 7
#endif
#ifndef ONE_TEST_DURATION
#define ONE_TEST_DURATION 0.01
#endif
#define no_histogram ((char*)-1)
inline
double RunTestImpl ( const char* title, void (*pfn)(), char* histogramFileName = no_histogram ) {
double time = 0, variation = 0, deviation = 0;
size_t nrep = 1;
while (true) {
CalibrateTiming(NRUNS, 1, nrep);
StartTiming(NRUNS, 1, nrep);
pfn();
StopTiming(time, variation, deviation);
time -= util::base;
if ( time > 1e-6 )
break;
nrep *= 2;
}
nrep *= (size_t)ceil(ONE_TEST_DURATION/time);
CalibrateTiming(NRUNS, 1, nrep); // sets util::base
util::durations_t t;
StartTimingEx(t, NRUNS, 1, nrep);
pfn();
StopTiming(time, variation, deviation);
if ( histogramFileName != (char*)-1 )
util::trace_histogram(t, histogramFileName);
double clean_time = time - util::base;
if ( title ) {
// Deviation (in percent) is calulated for the Gross time
printf ("\n%-34s %.2e %5.1f ", title, clean_time, deviation);
if ( util::sequential_time != 0 )
//printf ("% .2e ", clean_time - util::sequential_time);
printf ("% 10.1f ", 100*(clean_time - util::sequential_time)/util::sequential_time);
else
printf ("%*s ", util::rate_field_len, "");
printf ("%-9u %1.6f |", (unsigned)nrep, time * nrep);
}
return clean_time;
}
/// Runs the test function, does statistical processing, and, if title is nonzero, prints results.
/** If histogramFileName is a string, the histogram of individual runs is generated and stored
in a file with the given name. If it is NULL then the histogram is printed on the console.
By default no histogram is generated.
The histogram format is: "rate bucket start" "number of tests in this bucket". **/
inline
void RunTest ( const char* title_fmt, size_t workload_param, void (*pfn_test)(), char* histogramFileName = no_histogram ) {
char title[1024];
sprintf(title, title_fmt, (long)workload_param);
RunTestImpl(title, pfn_test, histogramFileName);
}
inline
void CalcSequentialTime ( void (*pfn)() ) {
util::sequential_time = RunTestImpl(NULL, pfn) / util::num_threads;
}
inline
void ResetSequentialTime () {
util::sequential_time = 0;
}
inline void PrintTitle() {
//printf ("%-32s %-*s Std Dev,%% %-*s Repeats Gross time Infra time | NRUNS = %u",
// "Test name", util::rate_field_len, "Rate", util::rate_field_len, "Overhead", NRUNS);
printf ("%-34s %-*s Std Dev,%% Par.overhead,%% Repeats Gross time | Nruns %u, Nthreads %d",
"Test name", util::rate_field_len, "Rate", NRUNS, util::num_threads);
}
void Test();
inline
int test_main( int argc, char* argv[] ) {
ParseCommandLine( argc, argv );
ASSERT (MinThread>=2, "Minimal number of threads must be 2 or more");
char buf[128];
util::rate_field_len = 2 + sprintf(buf, "%.1e", 1.1);
for ( int i = MinThread; i <= MaxThread; ++i ) {
tbb::task_scheduler_init init (i);
util::num_threads = i;
PrintTitle();
Test();
printf("\n");
}
printf("done\n");
return 0;
}

View file

@ -0,0 +1,256 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
//#define DO_SCALABLEALLOC
#include <cstdlib>
#include <cmath>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>
#include "tbb/tbb_stddef.h"
#include "tbb/spin_mutex.h"
#ifdef DO_SCALABLEALLOC
#include "tbb/scalable_allocator.h"
#endif
#include "tbb/concurrent_vector.h"
#include "tbb/tbb_allocator.h"
#include "tbb/cache_aligned_allocator.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_for.h"
#include "tbb/tick_count.h"
#include "tbb/blocked_range.h"
#include "../test/harness.h"
//#include "harness_barrier.h"
#include "../test/harness_allocator.h"
#define STATISTICS_INLINE
#include "statistics.h"
using namespace tbb;
bool ExtraVerbose = false;
class Timer {
tbb::tick_count tick;
public:
Timer() { tick = tbb::tick_count::now(); }
double get_time() { return (tbb::tick_count::now() - tick).seconds(); }
double diff_time(const Timer &newer) { return (newer.tick - tick).seconds(); }
double mark_time() { tick_count t1(tbb::tick_count::now()), t2(tick); tick = t1; return (t1 - t2).seconds(); }
double mark_time(const Timer &newer) { tick_count t(tick); tick = newer.tick; return (tick - t).seconds(); }
};
/************************************************************************/
/* TEST1 */
/************************************************************************/
#define mk_vector_test1(v, a) vector_test1<v<Timer, static_counting_allocator<a<Timer> > >, v<double, static_counting_allocator<a<double> > > >
template<class timers_vector_t, class values_vector_t>
class vector_test1 {
const char *mode;
StatisticsCollector &stat;
StatisticsCollector::TestCase key[16];
public:
vector_test1(const char *m, StatisticsCollector &s) : mode(m), stat(s) {}
vector_test1 &operator()(size_t len) {
if(Verbose) printf("test1<%s>(%u): collecting timing statistics\n", mode, unsigned(len));
__TBB_ASSERT(sizeof(Timer) == sizeof(double), NULL);
static const char *test_names[] = {
"b)creation wholly",
"a)creation by push",
"c)operation time per item",
0 };
for(int i = 0; test_names[i]; ++i) key[i] = stat.SetTestCase(test_names[i], mode, len);
Timer timer0; timers_vector_t::allocator_type::init_counters();
timers_vector_t tv(len);
Timer timer1; values_vector_t::allocator_type::init_counters();
values_vector_t dv;
for (size_t i = 0; i < len; ++i)
dv.push_back( i );
Timer timer2;
for (size_t i = 0; i < len; ++i)
{
dv[len-i-1] = timer0.diff_time(tv[i]);
tv[i].mark_time();
}
stat.AddStatisticValue( key[2], "1total, ms", "%.3f", timer2.get_time()*1000.0 );
stat.AddStatisticValue( key[1], "1total, ms", "%.3f", timer1.diff_time(timer2)*1000.0 );
stat.AddStatisticValue( key[0], "1total, ms", "%.3f", timer0.diff_time(timer1)*1000.0 );
//allocator statistics
stat.AddStatisticValue( key[0], "2total allocations", "%d", int(timers_vector_t::allocator_type::allocations) );
stat.AddStatisticValue( key[1], "2total allocations", "%d", int(values_vector_t::allocator_type::allocations) );
stat.AddStatisticValue( key[2], "2total allocations", "%d", 0);
stat.AddStatisticValue( key[0], "3total alloc#items", "%d", int(timers_vector_t::allocator_type::items_allocated) );
stat.AddStatisticValue( key[1], "3total alloc#items", "%d", int(values_vector_t::allocator_type::items_allocated) );
stat.AddStatisticValue( key[2], "3total alloc#items", "%d", 0);
//remarks
stat.AddStatisticValue( key[0], "9note", "segment creation time, ns:");
stat.AddStatisticValue( key[2], "9note", "average op-time per item, ns:");
Timer last_timer(timer2); double last_value = 0;
for (size_t j = 0, i = 2; i < len; i *= 2, j++) {
stat.AddRoundResult( key[0], (dv[len-i-1]-last_value)*1000000.0 );
last_value = dv[len-i-1];
stat.AddRoundResult( key[2], last_timer.diff_time(tv[i])/double(i)*1000000.0 );
last_timer = tv[i];
stat.SetRoundTitle(j, i);
}
tv.clear(); dv.clear();
//__TBB_ASSERT(timers_vector_t::allocator_type::items_allocated == timers_vector_t::allocator_type::items_freed, NULL);
//__TBB_ASSERT(values_vector_t::allocator_type::items_allocated == values_vector_t::allocator_type::items_freed, NULL);
return *this;
}
};
/************************************************************************/
/* TEST2 */
/************************************************************************/
#define mk_vector_test2(v, a) vector_test2<v<size_t, a<size_t> > >
template<class vector_t>
class vector_test2 {
const char *mode;
static const int ntrial = 10;
StatisticsCollector &stat;
public:
vector_test2(const char *m, StatisticsCollector &s) : mode(m), stat(s) {}
vector_test2 &operator()(size_t len) {
if(Verbose) printf("test2<%s>(%u): performing standard transformation sequence on vector\n", mode, unsigned(len));
StatisticsCollector::TestCase init_key = stat.SetTestCase("allocate", mode, len);
StatisticsCollector::TestCase fill_key = stat.SetTestCase("fill", mode, len);
StatisticsCollector::TestCase proc_key = stat.SetTestCase("process", mode, len);
StatisticsCollector::TestCase full_key = stat.SetTestCase("total time", mode, len);
for (int i = 0; i < ntrial; i++) {
Timer timer0;
vector_t v1(len);
vector_t v2(len);
Timer timer1;
std::generate(v1.begin(), v1.end(), values(0));
std::generate(v2.begin(), v2.end(), values(size_t(-len)));
Timer timer2;
std::reverse(v1.rbegin(), v1.rend());
std::inner_product(v1.begin(), v1.end(), v2.rbegin(), 1);
std::sort(v1.rbegin(), v1.rend());
std::sort(v2.rbegin(), v2.rend());
std::set_intersection(v1.begin(), v1.end(), v2.rbegin(), v2.rend(), v1.begin());
Timer timer3;
stat.AddRoundResult( proc_key, timer2.diff_time(timer3)*1000.0 );
stat.AddRoundResult( fill_key, timer1.diff_time(timer2)*1000.0 );
stat.AddRoundResult( init_key, timer0.diff_time(timer1)*1000.0 );
stat.AddRoundResult( full_key, timer0.diff_time(timer3)*1000.0 );
}
stat.SetStatisticFormula("1Average", "=AVERAGE(ROUNDS)");
stat.SetStatisticFormula("2+/-", "=(MAX(ROUNDS)-MIN(ROUNDS))/2");
return *this;
}
class values
{
size_t value;
public:
values(size_t i) : value(i) {}
size_t operator()() {
return value++%(1|(value^55));
}
};
};
/************************************************************************/
/* TEST3 */
/************************************************************************/
#define mk_vector_test3(v, a) vector_test3<v<char, local_counting_allocator<a<char>, size_t > > >
template<class vector_t>
class vector_test3 {
const char *mode;
StatisticsCollector &stat;
public:
vector_test3(const char *m, StatisticsCollector &s) : mode(m), stat(s) {}
vector_test3 &operator()(size_t len) {
if(Verbose) printf("test3<%s>(%u): collecting allocator statistics\n", mode, unsigned(len));
static const size_t sz = 1024;
vector_t V[sz];
StatisticsCollector::TestCase vinst_key = stat.SetTestCase("instances number", mode, len);
StatisticsCollector::TestCase count_key = stat.SetTestCase("allocations count", mode, len);
StatisticsCollector::TestCase items_key = stat.SetTestCase("allocated items", mode, len);
//stat.ReserveRounds(sz-1);
for (size_t c = 0, i = 0, s = sz/2; s >= 1 && i < sz; s /= 2, c++)
{
const size_t count = c? 1<<(c-1) : 0;
for (size_t e = i+s; i < e; i++) {
//if(count >= 16) V[i].reserve(count);
for (size_t j = 0; j < count; j++)
V[i].push_back(j);
}
stat.SetRoundTitle ( c, count );
stat.AddRoundResult( vinst_key, s );
stat.AddRoundResult( count_key, V[i-1].get_allocator().allocations );
stat.AddRoundResult( items_key, V[i-1].get_allocator().items_allocated );
}
return *this;
}
};
/************************************************************************/
/* TYPES SET FOR TESTS */
/************************************************************************/
#define types_set(n, title, op) { StatisticsCollector Collector("time_vector"#n); Collector.SetTitle title; \
{mk_vector_test##n(tbb::concurrent_vector, tbb::cache_aligned_allocator) ("TBB:NFS", Collector)op;} \
{mk_vector_test##n(tbb::concurrent_vector, tbb::tbb_allocator) ("TBB:TBB", Collector)op;} \
{mk_vector_test##n(tbb::concurrent_vector, std::allocator) ("TBB:STD", Collector)op;} \
{mk_vector_test##n(std::vector, tbb::cache_aligned_allocator) ("STL:NFS", Collector)op;} \
{mk_vector_test##n(std::vector, tbb::tbb_allocator) ("STL:TBB", Collector)op;} \
{mk_vector_test##n(std::vector, std::allocator) ("STL:STD", Collector)op;} \
Collector.Print(StatisticsCollector::Stdout|StatisticsCollector::HTMLFile|StatisticsCollector::ExcelXML); }
/************************************************************************/
/* MAIN DRIVER */
/************************************************************************/
int main(int argc, char* argv[]) {
if(argc>1) Verbose = true;
if(argc>2) ExtraVerbose = true;
MinThread = 0; MaxThread = 500000; // use in another meaning - test#:problem size
ParseCommandLine( argc, argv );
ASSERT(tbb_allocator<int>::allocator_type() == tbb_allocator<int>::scalable, "expecting scalable allocator library to be loaded");
if(!MinThread || MinThread == 1)
types_set(1, ("Vectors performance test #1 for %d", MaxThread), (MaxThread) )
if(!MinThread || MinThread == 2)
types_set(2, ("Vectors performance test #2 for %d", MaxThread), (MaxThread) )
if(!MinThread || MinThread == 3)
types_set(3, ("Vectors performance test #3 for %d", MaxThread), (MaxThread) )
if(!Verbose) printf("done\n");
return 0;
}

View file

@ -0,0 +1,135 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstddef>
#include <cstdlib>
#include <cstdio>
#include <float.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <omp.h>
#include <assert.h>
#include "thread_level.h"
using namespace std;
using namespace tbb;
// Algorithm parameters
const int Max_OMP_Outer_Threads = 16;
const int Max_OMP_Inner_Threads = 16;
// Global variables
int max_outer_threads = Max_OMP_Outer_Threads;
int max_inner_threads = Max_OMP_Inner_Threads;
// Print help on command-line arguments
void help_message(char *prog_name) {
fprintf(stderr, "\n%s usage:\n", prog_name);
fprintf(stderr,
" Parameters:\n"
" -o<num> : max # of threads OMP should use at outer level\n"
" -i<num> : max # of threads OMP should use at inner level\n"
"\n Help:\n"
" -h : print this help message\n");
}
// Process command-line arguments
void process_args(int argc, char *argv[], int *max_outer_t, int *max_inner_t) {
for (int i=1; i<argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'i': // set max_inner_threads
if (sscanf(&argv[i][2], "%d", max_inner_t) != 1 || *max_inner_t < 1) {
fprintf(stderr, "%s Warning: argument of -i option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'o': // set max_outer_threads
if (sscanf(&argv[i][2], "%d", max_outer_t) != 1 || *max_outer_t < 1) {
fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'h': // print help message
help_message(argv[0]);
exit(0);
break;
default:
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
break;
}
} else {
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
}
}
}
int main(int argc, char *argv[]) {
process_args(argc, argv, &max_outer_threads, &max_inner_threads);
TotalThreadLevel.init();
double start, end;
start = omp_get_wtime( );
#pragma omp parallel num_threads(max_outer_threads)
{
int omp_thread = omp_get_thread_num();
if (omp_thread == 0)
TotalThreadLevel.change_level(omp_get_num_threads(), omp_outer);
if (omp_thread == 0) {
sleep(3);
TotalThreadLevel.change_level(-1, omp_outer);
#pragma omp parallel num_threads(max_inner_threads)
{
int my_omp_thread = omp_get_thread_num();
if (my_omp_thread == 0)
TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner);
printf("Inner thread %d nested inside outer thread %d\n", my_omp_thread, omp_thread);
if (my_omp_thread == 0)
TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner);
}
TotalThreadLevel.change_level(1, omp_outer);
}
else {
sleep(6);
}
if (omp_thread == 0)
TotalThreadLevel.change_level(-omp_get_num_threads(), omp_outer);
}
end = omp_get_wtime( );
printf("Simple test of nested OMP (%d outer threads max, %d inner threads max) took: %6.6f\n",
max_outer_threads, max_inner_threads, end-start);
TotalThreadLevel.dump();
return 0;
}

View file

@ -0,0 +1,159 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstddef>
#include <cstdlib>
#include <cstdio>
#include <float.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <omp.h>
#include <assert.h>
#include "thread_level.h"
#define LOG_THREADS
#include "tbb/task.h"
#include "tbb/tick_count.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/scalable_allocator.h"
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
using namespace std;
using namespace tbb;
// Algorithm parameters
const int Max_TBB_Threads = 16;
const int Max_OMP_Threads = 16;
// Global variables
int max_tbb_threads = Max_TBB_Threads;
int max_omp_threads = Max_OMP_Threads;
// Print help on command-line arguments
void help_message(char *prog_name) {
fprintf(stderr, "\n%s usage:\n", prog_name);
fprintf(stderr,
" Parameters:\n"
" -t<num> : max # of threads TBB should use\n"
" -o<num> : max # of threads OMP should use\n"
"\n Help:\n"
" -h : print this help message\n");
}
// Process command-line arguments
void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) {
for (int i=1; i<argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 't': // set max_tbb_threads
if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) {
fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'o': // set max_omp_threads
if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) {
fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'h': // print help message
help_message(argv[0]);
exit(0);
break;
default:
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
break;
}
} else {
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
}
}
}
int main(int argc, char *argv[]) {
process_args(argc, argv, &max_tbb_threads, &max_omp_threads);
TotalThreadLevel.init();
tick_count start, end;
start = tick_count::now();
#pragma omp parallel num_threads(max_omp_threads)
{
int omp_thread = omp_get_thread_num();
#ifdef LOG_THREADS
if (omp_thread == 0)
TotalThreadLevel.change_level(omp_get_num_threads(), omp_outer);
#endif
task_scheduler_init phase(max_tbb_threads);
if (omp_thread == 0) {
sleep(3);
#ifdef LOG_THREADS
TotalThreadLevel.change_level(-1, omp_outer);
#endif
parallel_for(blocked_range<size_t>(0, 1000),
[=](const blocked_range<size_t>& range) {
#ifdef LOG_THREADS
TotalThreadLevel.change_level(1, tbb_inner);
#endif
#pragma ivdep
for (size_t i=range.begin(); i!=range.end(); ++i) {
if (i==range.begin())
printf("TBB range starting at %d on OMP thread %d\n", (int)i, omp_thread);
}
#ifdef LOG_THREADS
TotalThreadLevel.change_level(-1, tbb_inner);
#endif
}, auto_partitioner());
#ifdef LOG_THREADS
TotalThreadLevel.change_level(1, omp_outer);
#endif
}
else {
sleep(6);
}
#ifdef LOG_THREADS
if (omp_thread == 0)
TotalThreadLevel.change_level(-omp_get_num_threads(), omp_outer);
#endif
}
end = tick_count::now();
printf("Simple test of OMP (%d threads max) with TBB (%d threads max) inside took: %6.6f\n",
max_omp_threads, max_tbb_threads, (end-start).seconds());
#ifdef LOG_THREADS
TotalThreadLevel.dump();
#endif
return 0;
}

View file

@ -0,0 +1,168 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstddef>
#include <cstdlib>
#include <cstdio>
#include <float.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <omp.h>
#include <assert.h>
#include "thread_level.h"
#include "tbb/task.h"
#include "tbb/tick_count.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/scalable_allocator.h"
using namespace std;
using namespace tbb;
// Algorithm parameters
const int Max_TBB_Threads = 16;
const int Max_OMP_Threads = 16;
// Global variables
int max_tbb_threads = Max_TBB_Threads;
int max_omp_threads = Max_OMP_Threads;
// Print help on command-line arguments
void help_message(char *prog_name) {
fprintf(stderr, "\n%s usage:\n", prog_name);
fprintf(stderr,
" Parameters:\n"
" -t<num> : max # of threads TBB should use\n"
" -o<num> : max # of threads OMP should use\n"
"\n Help:\n"
" -h : print this help message\n");
}
// Process command-line arguments
void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) {
for (int i=1; i<argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 't': // set max_tbb_threads
if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) {
fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'o': // set max_omp_threads
if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) {
fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'h': // print help message
help_message(argv[0]);
exit(0);
break;
default:
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
break;
}
} else {
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
}
}
}
class SimpleTask : public task {
bool isLeaf;
int myId;
public:
SimpleTask(bool isLeaf_, int myId_) : isLeaf(isLeaf_), myId(myId_) {}
task* execute() {
TotalThreadLevel.change_level(1, tbb_outer);
omp_set_num_threads(max_omp_threads);
if (!isLeaf) {
set_ref_count(65);
for (int i=0; i<64; ++i) {
SimpleTask& st = *new(allocate_child()) SimpleTask(true, i);
spawn(st);
}
TotalThreadLevel.change_level(-1, tbb_outer);
wait_for_all();
TotalThreadLevel.change_level(1, tbb_outer);
}
else {
if (myId%2 == 0) {
sleep(3);
TotalThreadLevel.change_level(-1, tbb_outer);
#pragma omp parallel
{
if (omp_get_thread_num() == 0) {
TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner);
}
printf("In OMP parallel region on TBB task with myId=0: thread %d of %d\n",
omp_get_thread_num(), omp_get_num_threads());
if (omp_get_thread_num() == 0) {
TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner);
}
}
TotalThreadLevel.change_level(1, tbb_outer);
}
else {
sleep(6);
}
}
TotalThreadLevel.change_level(-1, tbb_outer);
return NULL;
}
};
int main(int argc, char *argv[]) {
TotalThreadLevel.init();
int dbg=0;
TotalThreadLevel.change_level(1, tbb_outer);
process_args(argc, argv, &max_tbb_threads, &max_omp_threads);
task_scheduler_init phase(max_tbb_threads);
tick_count start, end;
start = tick_count::now();
SimpleTask& st = *new(task::allocate_root()) SimpleTask(false, -1);
TotalThreadLevel.change_level(-1, tbb_outer);
task::spawn_root_and_wait(st);
TotalThreadLevel.change_level(1, tbb_outer);
end = tick_count::now();
printf("Simple Test of TBB (%d threads max) with OMP (%d threads max) inside took: %6.6f\n",
max_tbb_threads, max_omp_threads, (end-start).seconds());
TotalThreadLevel.change_level(-1, tbb_outer);
TotalThreadLevel.dump();
return 0;
}

View file

@ -0,0 +1,167 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <cstddef>
#include <cstdlib>
#include <cstdio>
#include <float.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <omp.h>
#include <assert.h>
#include "thread_level.h"
#include "tbb/task.h"
#include "tbb/tick_count.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/scalable_allocator.h"
using namespace std;
using namespace tbb;
// Algorithm parameters
const int Max_TBB_Threads = 16;
const int Max_OMP_Threads = 16;
// Global variables
int max_tbb_threads = Max_TBB_Threads;
int max_omp_threads = Max_OMP_Threads;
// Print help on command-line arguments
void help_message(char *prog_name) {
fprintf(stderr, "\n%s usage:\n", prog_name);
fprintf(stderr,
" Parameters:\n"
" -t<num> : max # of threads TBB should use\n"
" -o<num> : max # of threads OMP should use\n"
"\n Help:\n"
" -h : print this help message\n");
}
// Process command-line arguments
void process_args(int argc, char *argv[], int *max_tbb_t, int *max_omp_t) {
for (int i=1; i<argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 't': // set max_tbb_threads
if (sscanf(&argv[i][2], "%d", max_tbb_t) != 1 || *max_tbb_t < 1) {
fprintf(stderr, "%s Warning: argument of -t option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'o': // set max_omp_threads
if (sscanf(&argv[i][2], "%d", max_omp_t) != 1 || *max_omp_t < 1) {
fprintf(stderr, "%s Warning: argument of -o option unacceptable: %s\n", argv[0], &argv[i][2]);
help_message(argv[0]);
}
break;
case 'h': // print help message
help_message(argv[0]);
exit(0);
break;
default:
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
break;
}
} else {
fprintf(stderr, "%s: Warning: command-line option ignored: %s\n", argv[0], argv[i]);
help_message(argv[0]);
}
}
}
class SimpleTask : public task {
bool isLeaf;
int myId;
public:
SimpleTask(bool isLeaf_, int myId_) : isLeaf(isLeaf_), myId(myId_) {}
task* execute() {
TotalThreadLevel.change_level(1, tbb_outer);
omp_set_num_threads(max_omp_threads);
if (!isLeaf) {
set_ref_count(17);
for (int i=0; i<16; ++i) {
SimpleTask& st = *new(allocate_child()) SimpleTask(true, i);
spawn(st);
}
TotalThreadLevel.change_level(-1, tbb_outer);
wait_for_all();
TotalThreadLevel.change_level(1, tbb_outer);
}
else {
if (myId == 0) {
sleep(3);
TotalThreadLevel.change_level(-1, tbb_outer);
#pragma omp parallel
{
if (omp_get_thread_num() == 0) {
TotalThreadLevel.change_level(omp_get_num_threads(), omp_inner);
}
printf("In OMP parallel region on TBB task with myId=0: thread %d of %d\n",
omp_get_thread_num(), omp_get_num_threads());
if (omp_get_thread_num() == 0) {
TotalThreadLevel.change_level(-omp_get_num_threads(), omp_inner);
}
}
TotalThreadLevel.change_level(1, tbb_outer);
}
else {
sleep(6);
}
}
TotalThreadLevel.change_level(-1, tbb_outer);
return NULL;
}
};
int main(int argc, char *argv[]) {
TotalThreadLevel.init();
TotalThreadLevel.change_level(1, tbb_outer);
process_args(argc, argv, &max_tbb_threads, &max_omp_threads);
task_scheduler_init phase(max_tbb_threads);
tick_count start, end;
start = tick_count::now();
SimpleTask& st = *new(task::allocate_root()) SimpleTask(false, -1);
TotalThreadLevel.change_level(-1, tbb_outer);
task::spawn_root_and_wait(st);
TotalThreadLevel.change_level(1, tbb_outer);
end = tick_count::now();
printf("Simple Test of TBB (%d threads max) with OMP (%d threads max) inside took: %6.6f\n",
max_tbb_threads, max_omp_threads, (end-start).seconds());
TotalThreadLevel.change_level(-1, tbb_outer);
TotalThreadLevel.dump();
return 0;
}

View file

@ -0,0 +1,140 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
// Thread level recorder
#ifndef __THREAD_LEVEL_H
#define __THREAD_LEVEL_H
#include <cstdio>
#include <omp.h>
#include "tbb/atomic.h"
#include "tbb/tick_count.h"
#include "../src/test/harness.h"
//#define LOG_THREADS // use this to ifdef out calls to this class
using namespace tbb;
typedef enum {tbb_outer, tbb_inner, omp_outer, omp_inner} client_t;
class ThreadLevelRecorder {
tbb::atomic<int> tbb_outer_level;
tbb::atomic<int> tbb_inner_level;
tbb::atomic<int> omp_outer_level;
tbb::atomic<int> omp_inner_level;
struct record {
tbb::tick_count time;
int n_tbb_outer_thread;
int n_tbb_inner_thread;
int n_omp_outer_thread;
int n_omp_inner_thread;
};
tbb::atomic<unsigned> next;
/** Must be power of two */
static const unsigned max_record_count = 1<<20;
record array[max_record_count];
int max_threads;
bool fail;
public:
void change_level(int delta, client_t whichClient);
void dump();
void init();
};
void ThreadLevelRecorder::change_level(int delta, client_t whichClient) {
int tox=tbb_outer_level, tix=tbb_inner_level, oox=omp_outer_level, oix=omp_inner_level;
if (whichClient == tbb_outer) {
tox = tbb_outer_level+=delta;
} else if (whichClient == tbb_inner) {
tix = tbb_inner_level+=delta;
} else if (whichClient == omp_outer) {
oox = omp_outer_level+=delta;
} else if (whichClient == omp_inner) {
oix = omp_inner_level+=delta;
} else {
printf("WARNING: Bad client type; ignoring.\n");
return;
}
// log non-negative entries
tbb::tick_count t = tbb::tick_count::now();
unsigned k = next++;
if (k<max_record_count) {
record& r = array[k];
r.time = t;
r.n_tbb_outer_thread = tox>=0?tox:0;
r.n_omp_outer_thread = oox>=0?oox:0;
r.n_tbb_inner_thread = tix>=0?tix:0;
r.n_omp_inner_thread = oix>=0?oix:0;
}
char errStr[100];
int tot_threads;
tot_threads = tox+tix+oox+oix;
sprintf(errStr, "ERROR: Number of threads (%d+%d+%d+%d=%d) in use exceeds maximum (%d).\n",
tox, tix, oox, oix, tot_threads, max_threads);
if (tot_threads > max_threads) {
#ifdef NO_BAIL_OUT
if (!fail) {
printf("%sContinuing...\n", errStr);
fail = true;
}
#else
dump();
ASSERT(tot_threads <= max_threads, errStr);
#endif
}
}
void ThreadLevelRecorder::dump() {
FILE* f = fopen("time.txt","w");
if (!f) {
perror("fopen(time.txt)\n");
exit(1);
}
unsigned limit = next;
if (limit>max_record_count) { // Clip
limit = max_record_count;
}
for (unsigned i=0; i<limit; ++i) {
fprintf(f,"%f\t%d\t%d\t%d\t%d\n",(array[i].time-array[0].time).seconds(), array[i].n_tbb_outer_thread,
array[i].n_tbb_inner_thread, array[i].n_omp_outer_thread, array[i].n_omp_inner_thread);
}
fclose(f);
int tox=tbb_outer_level, tix=tbb_inner_level, oox=omp_outer_level, oix=omp_inner_level;
int tot_threads;
tot_threads = tox+tix+oox+oix;
if (!fail) printf("INFO: Passed.\n");
else printf("INFO: Failed.\n");
}
void ThreadLevelRecorder::init() {
fail = false;
max_threads = omp_get_max_threads();
printf("INFO: Getting maximum hardware threads... %d.\n", max_threads);
}
ThreadLevelRecorder TotalThreadLevel;
#endif

View file

@ -0,0 +1,66 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
// This file is compiled with C++, but linked with a program written in C.
// The intent is to find dependencies on the C++ run-time.
#include <stdlib.h>
#define RML_PURE_VIRTUAL_HANDLER abort
#if _MSC_VER==1500 && !defined(__INTEL_COMPILER)
// VS2008/VC9 seems to have an issue;
#pragma warning( push )
#pragma warning( disable: 4100 )
#endif
#include "rml_omp.h"
#if _MSC_VER==1500 && !defined(__INTEL_COMPILER)
#pragma warning( pop )
#endif
rml::versioned_object::version_type Version;
class MyClient: public __kmp::rml::omp_client {
public:
/*override*/rml::versioned_object::version_type version() const {return 0;}
/*override*/size_type max_job_count() const {return 1024;}
/*override*/size_t min_stack_size() const {return 1<<20;}
/*override*/rml::job* create_one_job() {return NULL;}
/*override*/void acknowledge_close_connection() {}
/*override*/void cleanup(job&) {}
/*override*/policy_type policy() const {return throughput;}
/*override*/void process( job&, void*, __kmp::rml::omp_client::size_type ) {}
};
//! Never actually set, because point of test is to find linkage issues.
__kmp::rml::omp_server* MyServerPtr;
extern "C" void Cplusplus() {
MyClient client;
Version = client.version();
}

View file

@ -0,0 +1,154 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "job_automaton.h"
#define HARNESS_NO_PARSE_COMMAND_LINE 1
#include "harness.h"
#include "harness_barrier.h"
class State {
Harness::SpinBarrier barrier;
rml::internal::job_automaton ja;
rml::job job;
tbb::atomic<int> job_created;
tbb::atomic<int> job_destroyed;
tbb::atomic<bool> job_received;
public:
State() : barrier(2) {
job_created = 0;
job_destroyed = 0;
job_received = false;
}
void exercise( bool is_owner );
~State() {
ASSERT( job_created==job_destroyed, "accounting error" );
ASSERT( job_destroyed<=1, "destroyed job twice" );
}
};
int DelayMask;
const int N = 14;
tbb::atomic<int> Coverage[N];
//! Mark kth interval as covered and insert delay if kth bit of DelayMask is set.
/** An interval is the code between two operations on the job_automaton that we are testing. */
void Cover( int k ) {
ASSERT( k<N, NULL );
++Coverage[k];
if( DelayMask>>k&1 ) {
// Introduce delay (and possibly a thread context switch)
__TBB_Yield();
}
}
void State::exercise( bool is_owner ) {
barrier.wait();
if( is_owner ) {
Cover(0);
if( ja.try_acquire() ) {
Cover(1);
++job_created;
ja.set_and_release(job);
Cover(2);
if( ja.try_acquire() ) {
Cover(3);
ja.release();
Cover(4);
if( ja.try_acquire() ) {
Cover(5);
ja.release();
}
}
Cover(6);
} else {
Cover(7);
}
if( DelayMask&1<<N ) {
while( !job_received )
__TBB_Yield();
}
} else {
// Using extra bit of DelayMask for choosing whether to run wait_for_job or not.
if( DelayMask&1<<N ) {
rml::job* j= &ja.wait_for_job();
if( j!=&job ) printf("%p\n",j);
ASSERT( j==&job, NULL );
job_received = true;
}
Cover(8);
}
rml::job* j;
if( ja.try_plug(j) ) {
ASSERT( j==&job || !j, NULL );
if( j ) {
Cover(9+is_owner);
++job_destroyed;
} else {
__TBB_ASSERT( !is_owner, "owner failed to create job but plugged self" );
Cover(11);
}
} else {
Cover(12+is_owner);
}
}
class Loop: NoAssign {
State& s;
public:
Loop(State& s_) : s(s_) {}
void operator()( int i ) const {s.exercise(i==0);}
};
/** Return true if coverage is acceptable.
If report==true, issue message if it is unacceptable. */
bool CheckCoverage( bool report ) {
bool okay = true;
for( int i=0; i<N; ++i ) {
const int min_coverage = 4;
if( Coverage[i]<min_coverage ) {
okay = false;
if( report )
printf("Warning: Coverage[%d]=%d is less than acceptable minimum of %d\n", i, int(Coverage[i]),min_coverage);
}
}
return okay;
}
int main() {
for( DelayMask=0; DelayMask<8<<N; ++DelayMask ) {
State s;
NativeParallelFor( 2, Loop(s) );
if( CheckCoverage(false) ) {
// Reached acceptable code coverage level
break;
}
}
CheckCoverage(true);
printf("done\n");
return 0;
}

View file

@ -0,0 +1,247 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "rml_tbb.h"
#include "rml_omp.h"
#include "tbb/atomic.h"
#include "tbb/tick_count.h"
#include "harness.h"
const int OMP_ParallelRegionSize = 16;
int TBB_MaxThread = 4; // Includes master
int OMP_MaxThread = int(~0u>>1); // Includes master
template<typename Client>
class ClientBase: public Client {
protected:
typedef typename Client::version_type version_type;
typedef typename Client::job job;
typedef typename Client::policy_type policy_type;
private:
/*override*/version_type version() const {
return 0;
}
/*override*/size_t min_stack_size() const {
return 1<<20;
}
/*override*/job* create_one_job() {
return new rml::job;
}
/*override*/policy_type policy() const {
return Client::turnaround;
}
/*override*/void acknowledge_close_connection() {
delete this;
}
/*override*/void cleanup( job& j ) {delete &j;}
};
//! Represents a TBB or OpenMP run-time that uses RML.
template<typename Factory, typename Client>
class RunTime {
public:
//! Factory that run-time uses to make servers.
Factory factory;
Client* client;
typename Factory::server_type* server;
RunTime() {
factory.open();
}
~RunTime() {
factory.close();
}
//! Create server for this run-time
void create_connection();
//! Destroy server for this run-time
void destroy_connection();
};
class ThreadLevelRecorder {
tbb::atomic<int> level;
struct record {
tbb::tick_count time;
int nthread;
};
tbb::atomic<unsigned> next;
/** Must be power of two */
static const unsigned max_record_count = 1<<20;
record array[max_record_count];
public:
void change_level( int delta );
void dump();
};
void ThreadLevelRecorder::change_level( int delta ) {
int x = level+=delta;
tbb::tick_count t = tbb::tick_count::now();
unsigned k = next++;
if( k<max_record_count ) {
record& r = array[k];
r.time = t;
r.nthread = x;
}
}
void ThreadLevelRecorder::dump() {
FILE* f = fopen("time.txt","w");
if( !f ) {
perror("fopen(time.txt)\n");
exit(1);
}
unsigned limit = next;
if( limit>max_record_count ) {
// Clip
limit = next;
}
for( unsigned i=0; i<limit; ++i ) {
fprintf(f,"%f\t%d\n",(array[i].time-array[0].time).seconds(),array[i].nthread);
}
fclose(f);
}
ThreadLevelRecorder TotalThreadLevel;
class TBB_Client: public ClientBase<tbb::internal::rml::tbb_client> {
/*override*/void process( job& j );
/*override*/size_type max_job_count() const {
return TBB_MaxThread-1;
}
};
class OMP_Client: public ClientBase<__kmp::rml::omp_client> {
/*override*/void process( job&, void* cookie, omp_client::size_type );
/*override*/size_type max_job_count() const {
return OMP_MaxThread-1;
}
};
RunTime<tbb::internal::rml::tbb_factory, TBB_Client> TBB_RunTime;
RunTime<__kmp::rml::omp_factory, OMP_Client> OMP_RunTime;
template<typename Factory, typename Client>
void RunTime<Factory,Client>::create_connection() {
client = new Client;
typename Factory::status_type status = factory.make_server( server, *client );
ASSERT( status==Factory::st_success, NULL );
}
template<typename Factory, typename Client>
void RunTime<Factory,Client>::destroy_connection() {
server->request_close_connection();
server = NULL;
}
class OMP_Team {
public:
OMP_Team( __kmp::rml::omp_server& ) {}
tbb::atomic<unsigned> barrier;
};
tbb::atomic<int> AvailWork;
tbb::atomic<int> CompletionCount;
void OMPWork() {
tbb::atomic<int> x;
for( x=0; x<2000000; ++x ) {
continue;
}
}
void TBBWork() {
if( AvailWork>=0 ) {
int k = --AvailWork;
if( k==-1 ) {
TBB_RunTime.server->adjust_job_count_estimate(-(TBB_MaxThread-1));
++CompletionCount;
} else if( k>=0 ) {
for( int k=0; k<4; ++k ) {
OMP_Team team( *OMP_RunTime.server );
int n = OMP_RunTime.server->try_increase_load( OMP_ParallelRegionSize-1, /*strict=*/false );
team.barrier = 0;
::rml::job* array[OMP_ParallelRegionSize-1];
if( n>0)
OMP_RunTime.server->get_threads( n, &team, array );
// Master does work inside parallel region too.
OMPWork();
// Master waits for workers to finish
if( n>0 )
while( team.barrier!=unsigned(n) ) {
__TBB_Yield();
}
}
++CompletionCount;
}
}
}
/*override*/void TBB_Client::process( job& ) {
TotalThreadLevel.change_level(1);
TBBWork();
TotalThreadLevel.change_level(-1);
}
/*override*/void OMP_Client::process( job& /* j */, void* cookie, omp_client::size_type ) {
TotalThreadLevel.change_level(1);
ASSERT( OMP_RunTime.server, NULL );
OMPWork();
ASSERT( OMP_RunTime.server, NULL );
static_cast<OMP_Team*>(cookie)->barrier+=1;
TotalThreadLevel.change_level(-1);
}
void TBBOutSideOpenMPInside() {
TotalThreadLevel.change_level(1);
CompletionCount = 0;
int tbbtasks = 32;
AvailWork = tbbtasks;
TBB_RunTime.server->adjust_job_count_estimate(TBB_MaxThread-1);
while( CompletionCount!=tbbtasks+1 ) {
TBBWork();
}
TotalThreadLevel.change_level(-1);
}
int main( int argc, char* argv[] ) {
// Set defaults
MinThread = 4;
MaxThread = 4;
ParseCommandLine(argc,argv);
for( int TBB_MaxThread=MinThread; TBB_MaxThread<=MaxThread; ++TBB_MaxThread ) {
if( Verbose ) printf("Testing with TBB_MaxThread=%d\n", TBB_MaxThread);
TBB_RunTime.create_connection();
OMP_RunTime.create_connection();
TBBOutSideOpenMPInside();
OMP_RunTime.destroy_connection();
TBB_RunTime.destroy_connection();
}
TotalThreadLevel.dump();
printf("done\n");
return 0;
}

View file

@ -0,0 +1,173 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "rml_omp.h"
#include "test_server.h"
#include "tbb/tbb_misc.h"
typedef __kmp::rml::omp_server MyServer;
typedef __kmp::rml::omp_factory MyFactory;
static bool StrictTeam;
class MyTeam {
MyTeam& operator=( const MyTeam& ) ;
public:
struct info_type {
rml::job* job;
bool ran;
info_type() : job(NULL), ran(false) {}
};
MyTeam( MyServer& /* server */, size_t max_thread_ ) :
max_thread(max_thread_)
{
self_ptr = this;
info = new info_type[max_thread];
}
~MyTeam() {
delete[] info;
}
const size_t max_thread;
size_t n_thread;
tbb::atomic<int> barrier;
/** Indexed with 1-origin index */
info_type* info;
int iteration;
MyTeam* self_ptr;
};
class MyClient: public ClientBase<__kmp::rml::omp_client> {
public:
MyServer* server;
/*override*/void process( job& j, void* cookie, size_type index ) {
MyTeam& t = *static_cast<MyTeam*>(cookie);
ASSERT( t.self_ptr==&t, "trashed cookie" );
ASSERT( index<t.max_thread, NULL );
ASSERT( !t.info[index].ran, "duplicate index?" );
t.info[index].job = &j;
t.info[index].ran = true;
do_process(j);
if( index==1 && nesting.level<nesting.limit ) {
DoOneConnection<MyFactory,MyClient> doc(MaxThread,Nesting(nesting.level+1,nesting.limit),0,false);
doc(0);
}
++t.barrier;
}
static const bool is_omp = true;
bool is_strict() const {return StrictTeam;}
};
void FireUpJobs( MyServer& server, MyClient& client, int max_thread, int n_extra, Checker* checker ) {
ASSERT( max_thread>=0, NULL );
client.server = &server;
MyTeam team(server,size_t(max_thread));
MyServer::size_type n_thread = 0;
for( int iteration=0; iteration<4; ++iteration ) {
for( size_t i=0; i<team.max_thread; ++i )
team.info[i].ran = false;
switch( iteration ) {
default:
n_thread = int(max_thread);
break;
case 1:
// No change in number of threads
break;
case 2:
// Decrease number of threads.
n_thread = int(max_thread)/2;
break;
// Case 3 is same code as the default, but has effect of increasing the number of threads.
}
team.barrier = 0;
if( Verbose ) {
printf("client %d: server.run with n_thread=%d\n", client.client_id(), int(n_thread) );
}
server.independent_thread_number_changed( n_extra );
if( checker ) {
// Give RML time to respond to change in number of threads.
MilliSleep(1);
}
int n_delivered = server.try_increase_load( n_thread, StrictTeam );
team.n_thread = n_delivered;
::rml::job* job_array[JobArraySize];
job_array[n_delivered] = (::rml::job*)intptr_t(-1);
server.get_threads( n_delivered, &team, job_array );
__TBB_ASSERT( job_array[n_delivered]== (::rml::job*)intptr_t(-1), NULL );
for( int i=0; i<n_delivered; ++i ) {
MyJob* j = static_cast<MyJob*>(job_array[i]);
int s = j->state;
ASSERT( s==MyJob::idle||s==MyJob::busy, NULL );
}
server.independent_thread_number_changed( -n_extra );
if( Verbose ) {
printf("client %d: team size is %d\n", client.client_id(), n_delivered);
}
if( checker ) {
checker->check_number_of_threads_delivered( n_delivered, n_thread, n_extra );
}
// Protocol requires that master wait until workers have called "done_processing"
while( team.barrier!=n_delivered ) {
ASSERT( team.barrier>=0, NULL );
ASSERT( team.barrier<=n_delivered, NULL );
__TBB_Yield();
}
if( Verbose ) {
printf("client %d: team completed\n", client.client_id() );
}
for( int i=0; i<n_delivered; ++i ) {
ASSERT( team.info[i].ran, "thread on team allegedly delivered, but did not run?" );
}
for( MyServer::size_type i=n_delivered; i<MyServer::size_type(max_thread); ++i ) {
ASSERT( !team.info[i].ran, "thread on team ran with illegal index" );
}
ASSERT( !StrictTeam || n_delivered==int(n_thread), "server failed to satisfy strict request" );
}
}
void DoClientSpecificVerification( MyServer& server, int /*n_thread*/ )
{
ASSERT( server.current_balance()==int(tbb::internal::DetectNumberOfWorkers())-1, NULL );
}
int main( int argc, char* argv[] ) {
// Set defaults
MinThread = 0;
MaxThread = 4;
ParseCommandLine(argc,argv);
StrictTeam = true;
VerifyInitialization<MyFactory,MyClient>( MaxThread );
SimpleTest<MyFactory,MyClient>();
StrictTeam = false;
VerifyInitialization<MyFactory,MyClient>( MaxThread );
SimpleTest<MyFactory,MyClient>();
printf("done\n");
return 0;
}

View file

@ -0,0 +1,37 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include <stdio.h>
void Cplusplus();
int main() {
Cplusplus();
printf("done\n");
return 0;
}

View file

@ -0,0 +1,122 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "rml_tbb.h"
#include "test_server.h"
typedef tbb::internal::rml::tbb_server MyServer;
typedef tbb::internal::rml::tbb_factory MyFactory;
class MyClient: public ClientBase<tbb::internal::rml::tbb_client> {
tbb::atomic<int> counter;
/*override*/void process( job& j ) {
do_process(j);
}
public:
MyClient() {counter=1;}
static const bool is_omp = false;
bool is_strict() const {return false;}
};
void FireUpJobs( MyServer& server, MyClient& client, int n_thread, int n_extra, Checker* checker ) {
if( Verbose )
printf("client %d: calling adjust_job_count_estimate(%d)\n", client.client_id(),n_thread);
// Exercise independent_thread_number_changed, even for zero values.
server.independent_thread_number_changed( n_extra );
// Experiments indicate that when oversubscribing, the main thread should wait a little
// while for the RML worker threads to do some work.
int delay = n_thread>int(server.default_concurrency()) ? 50 : 1;
if( checker ) {
// Give RML time to respond to change in number of threads.
MilliSleep(delay);
for( int k=0; k<n_thread; ++k )
client.job_array[k].processing_count = 0;
}
server.adjust_job_count_estimate( n_thread );
int n_used = 0;
if( checker ) {
MilliSleep(delay);
for( int k=0; k<n_thread; ++k )
if( client.job_array[k].processing_count )
++n_used;
}
// Logic further below presumes that jobs never starve, so undo previous call
// to independent_thread_number_changed before waiting on those jobs.
server.independent_thread_number_changed( -n_extra );
if( Verbose )
printf("client %d: wait for each job to be processed at least once\n",client.client_id());
// Calculate the number of jobs that are expected to get threads.
// Typically this is equal to n_thread. But if nested, subtract 1 to account for the fact
// that this thread itself cannot process the job.
int expected = client.nesting.level==0 ? n_thread : n_thread-1;
// Wait for expected number of jobs to be processed.
if( client.nesting.level==0 ) {
for(;;) {
int n = 0;
for( int k=0; k<n_thread; ++k )
if( client.job_array[k].processing_count!=0 )
++n;
if( n>=expected ) break;
server.yield();
}
} else {
printf("testing of nested tbb execution is yet to be supported\n");
}
server.adjust_job_count_estimate(-n_thread);
if( checker )
checker->check_number_of_threads_delivered( n_used, n_thread, n_extra );
}
void DoClientSpecificVerification( MyServer&, int n_thread )
{
MyClient* client = new MyClient;
client->initialize( n_thread, Nesting(), ClientStackSize[0] );
MyFactory factory;
memset( &factory, 0, sizeof(factory) );
MyFactory::status_type status = factory.open();
ASSERT( status!=MyFactory::st_not_found, "could not find RML library" );
ASSERT( status!=MyFactory::st_incompatible, NULL );
ASSERT( status==MyFactory::st_success, NULL );
MyFactory::server_type* server;
status = factory.make_server( server, *client );
ASSERT( status==MyFactory::st_connection_exists, "Did the first connection get lost?" );
factory.close();
client->update(MyClient::destroyed, MyClient::live);
delete client;
}
int main( int argc, char* argv[] ) {
// Set defaults
MinThread = 0;
MaxThread = 4;
ParseCommandLine(argc,argv);
VerifyInitialization<MyFactory,MyClient>( MaxThread );
SimpleTest<MyFactory,MyClient>();
printf("done\n");
return 0;
}

View file

@ -0,0 +1,398 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
/* This header contains code shared by test_omp_server.cpp and test_tbb_server.cpp
There is no ifndef guard - test is supposed to include this file exactly once.
The test is also exected to have #include of rml_omp.h or rml_tbb.h before
including this header.
This header should not use any parts of TBB that require linking in the TBB run-time.
It uses a few instances of tbb::atomic<T>, all of which are completely inlined. */
#include "tbb/atomic.h"
#include "tbb/tbb_thread.h"
#include "harness.h"
#include "harness_memory.h"
//! Define TRIVIAL as 1 to test only a single client, no nesting, no extra threads.
#define TRIVIAL 0
//! Maximum number of clients
#if TRIVIAL
const size_t MaxClient = 1;
#else
const size_t MaxClient = 4;
#endif
const size_t ClientStackSize[MaxClient] = {
1000000
#if !TRIVIAL
,2000000
,1000000
,4000000
#endif /* TRIVIAL */
};
const size_t OverheadStackSize = 500000;
const size_t JobArraySize = 1000;
#if _WIN32||_WIN64
#include <Windows.h> /* Need Sleep */
#else
#include <unistd.h> /* Need usleep */
#endif
void MilliSleep( unsigned milliseconds ) {
#if _WIN32||_WIN64
Sleep( milliseconds );
#else
usleep( milliseconds*1000 );
#endif /* _WIN32||_WIN64 */
}
class MyJob: public ::rml::job {
public:
//! Enumeration for tracking states of a job.
enum state_t {
//! Job has not yet been allocated.
unallocated,
//! Is idle.
idle,
//! Has a thread working on it.
busy,
//! After call to client::cleanup
clean
};
tbb::atomic<int> state;
volatile int processing_count;
void update( state_t new_state, state_t old_state ) {
int o = state.compare_and_swap(new_state,old_state);
ASSERT( o==old_state, "illegal transition" );
}
void update_from_either( state_t new_state, state_t old_state1, state_t old_state2 ) {
int snapshot;
do {
snapshot = state;
ASSERT( snapshot==old_state1||snapshot==old_state2, "illegal transition" );
} while( state.compare_and_swap(new_state,snapshot)!=snapshot );
}
MyJob() {
state=unallocated;
processing_count=0;
}
~MyJob() {
// Overwrite so that accidental use after destruction can be detected.
memset(this,-1,sizeof(*this));
}
};
static tbb::atomic<int> ClientConstructions;
static tbb::atomic<int> ClientDestructions;
struct Nesting {
int level;
int limit;
Nesting() : level(0), limit(0) {}
Nesting( int level_, int limit_ ) : level(level_), limit(limit_) {}
};
template<typename Client>
class ClientBase: public Client {
protected:
typedef typename Client::size_type size_type;
typedef typename Client::version_type version_type;
typedef typename Client::policy_type policy_type;
typedef typename Client::job job;
private:
size_type my_max_job_count;
size_t my_stack_size;
tbb::atomic<size_t> next_job_index;
int my_client_id;
rml::server* my_server;
public:
enum state_t {
//! Treat *this as constructed.
live=0x1,
//! Treat *this as destroyed.
destroyed=0xDEAD
};
tbb::atomic<int> state;
void update( state_t new_state, state_t old_state ) {
int o = state.compare_and_swap(new_state,old_state);
ASSERT( o==old_state, NULL );
}
tbb::atomic<bool> expect_close_connection;
MyJob *job_array;
/*override*/version_type version() const {
ASSERT( state==live, NULL );
return 1;
}
/*override*/size_type max_job_count() const {
ASSERT( state==live, NULL );
return my_max_job_count;
}
/*override*/size_t min_stack_size() const {
ASSERT( state==live, NULL );
return my_stack_size;
}
/*override*/policy_type policy() const {return Client::throughput;}
/*override*/void acknowledge_close_connection() {
ASSERT( expect_close_connection, NULL );
for( size_t k=next_job_index; k>0; ) {
--k;
ASSERT( job_array[k].state==MyJob::clean, NULL );
}
delete[] job_array;
job_array = NULL;
ASSERT( my_server, NULL );
update( destroyed, live );
delete this;
}
/*override*/void cleanup( job& j_ ) {
if( Verbose )
printf("client %d: cleanup(%p) called\n",client_id(),&j_);
ASSERT( state==live, NULL );
MyJob& j = static_cast<MyJob&>(j_);
j.update(MyJob::clean,MyJob::idle);
if( Verbose )
printf("client %d: cleanup(%p) returns\n",client_id(),&j_);
}
job* create_one_job();
protected:
void do_process( job& j_ ) {
ASSERT( state==live, NULL );
MyJob& j = static_cast<MyJob&>(j_);
ASSERT( &j, NULL );
j.update(MyJob::busy,MyJob::idle);
++j.processing_count;
ASSERT( my_stack_size>OverheadStackSize, NULL );
#ifdef __ia64__
// Half of the stack is reserved for RSE, so test only remaining half.
UseStackSpace( (my_stack_size-OverheadStackSize)/2 );
#else
UseStackSpace( my_stack_size-OverheadStackSize );
#endif
j.update(MyJob::idle,MyJob::busy);
my_server->yield();
}
public:
ClientBase() : my_server(NULL) {
my_client_id = ClientConstructions++;
next_job_index = 0;
}
int client_id() const {return my_client_id;}
Nesting nesting;
void initialize( size_type max_job_count, Nesting nesting_, size_t stack_size ) {
ASSERT( stack_size>0, NULL );
my_max_job_count = max_job_count;
nesting = nesting_;
my_stack_size = stack_size;
job_array = new MyJob[JobArraySize];
expect_close_connection = false;
state = live;
}
void set_server( rml::server* s ) {my_server=s;}
virtual ~ClientBase() {
ASSERT( state==destroyed, NULL );
++ClientDestructions;
}
};
template<typename Client>
typename Client::job* ClientBase<Client>::create_one_job() {
if( Verbose )
printf("client %d: create_one_job() called\n",client_id());
size_t k = next_job_index++;
ASSERT( state==live, NULL );
// Following assertion depends on assumption that implementation does not destroy jobs until
// the connection is closed. If the implementation is changed to destroy jobs sooner, the
// test logic in this header will have to be reworked.
ASSERT( k<my_max_job_count, "RML allocated more than max_job_count jobs simultaneously" );
ASSERT( k<JobArraySize, "JobArraySize not big enough (problem is in test, not RML)" );
MyJob& j = job_array[k];
j.update(MyJob::idle,MyJob::unallocated);
if( Verbose )
printf("client %d: create_one_job() for k=%d returns %p\n",client_id(),int(k),&j);
return &j;
}
class Checker {
public:
int default_concurrency;
void check_number_of_threads_delivered( int n_delivered, int n_requested, int n_extra ) const;
Checker( rml::server& server ) : default_concurrency(int(server.default_concurrency())) {}
};
void Checker::check_number_of_threads_delivered( int n_delivered, int n_requested, int n_extra ) const {
ASSERT( default_concurrency>=0, NULL );
// Check that number of threads delivered is reasonable.
int n_avail = default_concurrency;
if( n_extra>0 )
n_avail-=n_extra;
if( n_avail<0 )
n_avail=0;
// If the client asked for more threads than the hardware provides, the difference becomes private threads
// that are available regardless of what else is running.
if( n_requested>default_concurrency )
n_avail += n_requested-default_concurrency;
int n_expected = n_requested;
if( n_expected>n_avail )
n_expected=n_avail;
const char* msg = NULL;
if( n_delivered>n_avail )
msg = "server delivered more threads than were theoretically available";
else if( n_delivered>n_expected )
msg = "server delivered more threads than expected";
else if( n_delivered<n_expected )
msg = "server delivered fewer threads than ideal";
if( msg ) {
printf("Warning: %s (n_delivered=%d n_avail=%d n_requested=%d n_extra=%d default_concurrency=%d)\n",
msg, n_delivered, n_avail, n_requested, n_extra, default_concurrency );
}
}
template<typename Factory,typename Client>
class DoOneConnection: NoAssign {
//! Number of threads to request
const int n_thread;
//! Nesting
const Nesting nesting;
//! Number of extra threads to pretend having outside the RML
const int n_extra;
//! If true, check number of threads actually delivered.
const bool check_delivered;
public:
DoOneConnection( int n_thread_, Nesting nesting_, int n_extra_, bool check_delivered_ ) :
n_thread(n_thread_),
nesting(nesting_),
n_extra(n_extra_),
check_delivered(check_delivered_)
{
}
//! Test ith connection
void operator()( size_t i ) const;
};
template<typename Factory,typename Client>
void DoOneConnection<Factory,Client>::operator()( size_t i ) const {
ASSERT( i<MaxClient, NULL );
Client* client = new Client;
client->initialize( Client::is_omp ? JobArraySize : n_thread, nesting, ClientStackSize[i] );
Factory factory;
memset( &factory, 0, sizeof(factory) );
typename Factory::status_type status = factory.open();
typename Factory::server_type* server;
status = factory.make_server( server, *client );
if( Verbose )
printf("client %d: opened server n_thread=%d nesting=(%d,%d)\n",
client->client_id(), n_thread, nesting.level, nesting.limit);
client->set_server( server );
Checker checker( *server );
FireUpJobs( *server, *client, n_thread, n_extra, check_delivered && !client->is_strict() ? &checker : NULL );
// Close the connection
client->expect_close_connection = true;
if( Verbose )
printf("client %d: calling request_close_connection\n", client->client_id());
server->request_close_connection();
// Client deletes itself when it sees call to acknowledge_close_connection from server.
factory.close();
}
//! Test with n_threads threads and n_client clients.
template<typename Factory, typename Client>
void SimpleTest() {
for( int n_thread=MinThread; n_thread<=MaxThread; ++n_thread ) {
// Test nested connections
DoOneConnection<Factory,Client> doc(n_thread,Nesting(0,1),0,false);
doc(0);
}
// Let RML catch up.
while( ClientConstructions!=ClientDestructions ) {
MilliSleep(1);
}
}
static void check_server_info( void* arg, const char* server_info )
{
ASSERT( strstr(server_info, (char*)arg), NULL );
}
template<typename Factory, typename Client>
void VerifyInitialization( int n_thread ) {
Client* client = new Client;
client->initialize( Client::is_omp ? JobArraySize : n_thread, Nesting(), ClientStackSize[0] );
Factory factory;
memset( &factory, 0, sizeof(factory) );
typename Factory::status_type status = factory.open();
ASSERT( status!=Factory::st_not_found, "could not find RML library" );
ASSERT( status!=Factory::st_incompatible, NULL );
ASSERT( status==Factory::st_success, NULL );
factory.call_with_server_info( check_server_info, (void*)"Intel(R) RML library built:" );
typename Factory::server_type* server;
status = factory.make_server( server, *client );
ASSERT( status!=Factory::st_incompatible, NULL );
ASSERT( status!=Factory::st_not_found, NULL );
ASSERT( status==Factory::st_success, NULL );
if( Verbose )
printf("client %d: opened server n_thread=%d nesting=(%d,%d)\n",
client->client_id(), n_thread, 0, 0);
ASSERT( server, NULL );
client->set_server( server );
DoClientSpecificVerification( *server, n_thread );
// Close the connection
client->expect_close_connection = true;
if( Verbose )
printf("client %d: calling request_close_connection\n", client->client_id());
server->request_close_connection();
// Client deletes itself when it sees call to acknowledge_close_connection from server.
factory.close();
}

View file

@ -0,0 +1,129 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "thread_monitor.h"
#include "harness.h"
#include "harness_memory.h"
class ThreadState {
void loop();
public:
static __RML_DECL_THREAD_ROUTINE routine( void* arg ) {
static_cast<ThreadState*>(arg)->loop();
return 0;
}
typedef rml::internal::thread_monitor thread_monitor;
thread_monitor monitor;
volatile int request;
volatile int ack;
volatile unsigned clock;
volatile unsigned stamp;
ThreadState() : request(-1), ack(-1) {}
};
void ThreadState::loop() {
for(;;) {
++clock;
if( ack==request ) {
thread_monitor::cookie c;
monitor.prepare_wait(c);
if( ack==request ) {
if( Verbose ) {
printf("%p: request=%d ack=%d\n", this, request, ack );
}
monitor.commit_wait(c);
} else
monitor.cancel_wait();
} else {
// Throw in delay occasionally
switch( request%8 ) {
case 0:
case 1:
case 5:
rml::internal::thread_monitor::yield();
}
int r = request;
ack = request;
if( !r ) return;
}
}
}
// Linux on Itanium seems to require at least 1<<18 bytes per stack.
const size_t MinStackSize = 1<<18;
const size_t MaxStackSize = 1<<22;
int main( int argc, char* argv[] ) {
// Set defaults
MinThread = 1;
MaxThread = 4;
ParseCommandLine( argc, argv );
for( int p=MinThread; p<=MaxThread; ++p ) {
ThreadState* t = new ThreadState[p];
for( size_t stack_size = MinStackSize; stack_size<=MaxStackSize; stack_size*=2 ) {
if( Verbose )
printf("launching %d threads\n",p);
for( int i=0; i<p; ++i )
rml::internal::thread_monitor::launch( ThreadState::routine, t+i, stack_size );
for( int k=1000; k>=0; --k ) {
if( k%8==0 ) {
// Wait for threads to wait.
for( int i=0; i<p; ++i ) {
unsigned count = 0;
do {
t[i].stamp = t[i].clock;
rml::internal::thread_monitor::yield();
if( ++count>=1000 ) {
printf("Warning: thread %d not waiting\n",i);
break;
}
} while( t[i].stamp!=t[i].clock );
}
}
if( Verbose )
printf("notifying threads\n");
for( int i=0; i<p; ++i ) {
// Change state visible to launched thread
t[i].request = k;
t[i].monitor.notify();
}
if( Verbose )
printf("waiting for threads to respond\n");
for( int i=0; i<p; ++i )
// Wait for thread to respond
while( t[i].ack!=k )
rml::internal::thread_monitor::yield();
}
}
delete[] t;
}
printf("done\n");
return 0;
}

View file

@ -1,221 +0,0 @@
/*
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#ifndef _TBB_Gate_H
#define _TBB_Gate_H
#include "itt_notify.h"
namespace tbb {
namespace internal {
#if __TBB_RML
//! Fake version of Gate for use with RML.
/** Really just an atomic intptr_t with a compare-and-swap operation,
but wrapped in syntax that makes it look like a normal Gate object,
in order to minimize source changes for RML in task.cpp. */
class Gate {
public:
typedef intptr_t state_t;
//! Get current state of gate
state_t get_state() const {
return state;
}
#if defined(_MSC_VER) && defined(_Wp64)
// Workaround for overzealous compiler warnings in /Wp64 mode
#pragma warning (disable: 4244)
#endif
bool try_update( intptr_t value, intptr_t comparand ) {
return state.compare_and_swap(value,comparand)==comparand;
}
private:
atomic<state_t> state;
};
#elif __TBB_USE_FUTEX
//! Implementation of Gate based on futex.
/** Use this futex-based implementation where possible, because it is the simplest and usually fastest. */
class Gate {
public:
typedef intptr_t state_t;
Gate() {
ITT_SYNC_CREATE(&state, SyncType_Scheduler, SyncObj_Gate);
}
//! Get current state of gate
state_t get_state() const {
return state;
}
//! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true)
void try_update( intptr_t value, intptr_t comparand, bool flip=false ) {
__TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" );
retry:
state_t old_state = state;
// First test for condition without using atomic operation
if( flip ? old_state!=comparand : old_state==comparand ) {
// Now atomically retest condition and set.
state_t s = state.compare_and_swap( value, old_state );
if( s==old_state ) {
// compare_and_swap succeeded
if( value!=0 )
futex_wakeup_all( &state ); // Update was successful and new state is not SNAPSHOT_EMPTY
} else {
// compare_and_swap failed. But for != case, failure may be spurious for our purposes if
// the value there is nonetheless not equal to value. This is a fairly rare event, so
// there is no need for backoff. In event of such a failure, we must retry.
if( flip && s!=value )
goto retry;
}
}
}
//! Wait for state!=0.
void wait() {
if( state==0 )
futex_wait( &state, 0 );
}
private:
atomic<state_t> state;
};
#elif USE_WINTHREAD
class Gate {
public:
typedef intptr_t state_t;
private:
//! If state==0, then thread executing wait() suspend until state becomes non-zero.
state_t state;
CRITICAL_SECTION critical_section;
HANDLE event;
public:
//! Initialize with count=0
Gate() : state(0) {
event = CreateEvent( NULL, true, false, NULL );
InitializeCriticalSection( &critical_section );
ITT_SYNC_CREATE(&event, SyncType_Scheduler, SyncObj_Gate);
ITT_SYNC_CREATE(&critical_section, SyncType_Scheduler, SyncObj_GateLock);
}
~Gate() {
// Fake prepare/acquired pair for Intel(R) Parallel Amplifier to correctly attribute the operations below
ITT_NOTIFY( sync_prepare, &event );
CloseHandle( event );
DeleteCriticalSection( &critical_section );
ITT_NOTIFY( sync_acquired, &event );
}
//! Get current state of gate
state_t get_state() const {
return state;
}
//! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true)
void try_update( intptr_t value, intptr_t comparand, bool flip=false ) {
__TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" );
EnterCriticalSection( &critical_section );
state_t old = state;
if( flip ? old!=comparand : old==comparand ) {
state = value;
if( !old )
SetEvent( event );
else if( !value )
ResetEvent( event );
}
LeaveCriticalSection( &critical_section );
}
//! Wait for state!=0.
void wait() {
if( state==0 ) {
WaitForSingleObject( event, INFINITE );
}
}
};
#elif USE_PTHREAD
class Gate {
public:
typedef intptr_t state_t;
private:
//! If state==0, then thread executing wait() suspend until state becomes non-zero.
state_t state;
pthread_mutex_t mutex;
pthread_cond_t cond;
public:
//! Initialize with count=0
Gate() : state(0)
{
pthread_mutex_init( &mutex, NULL );
pthread_cond_init( &cond, NULL);
ITT_SYNC_CREATE(&cond, SyncType_Scheduler, SyncObj_Gate);
ITT_SYNC_CREATE(&mutex, SyncType_Scheduler, SyncObj_GateLock);
}
~Gate() {
pthread_cond_destroy( &cond );
pthread_mutex_destroy( &mutex );
}
//! Get current state of gate
state_t get_state() const {
return state;
}
//! Update state=value if state==comparand (flip==false) or state!=comparand (flip==true)
void try_update( intptr_t value, intptr_t comparand, bool flip=false ) {
__TBB_ASSERT( comparand!=0 || value!=0, "either value or comparand must be non-zero" );
pthread_mutex_lock( &mutex );
state_t old = state;
if( flip ? old!=comparand : old==comparand ) {
state = value;
if( !old )
pthread_cond_broadcast( &cond );
}
pthread_mutex_unlock( &mutex );
}
//! Wait for state!=0.
void wait() {
if( state==0 ) {
pthread_mutex_lock( &mutex );
while( state==0 ) {
pthread_cond_wait( &cond, &mutex );
}
pthread_mutex_unlock( &mutex );
}
}
};
#else
#error Must define USE_PTHREAD or USE_WINTHREAD
#endif /* threading kind */
} // namespace Internal
} // namespace ThreadingBuildingBlocks
#endif /* _TBB_Gate_H */

View file

@ -26,8 +26,8 @@
the GNU General Public License.
*/
#include "../rml/include/rml_tbb.h"
#include "../rml/server/thread_monitor.h"
#include "rml_tbb.h"
#include "../server/thread_monitor.h"
#include "tbb/atomic.h"
#include "tbb/cache_aligned_allocator.h"
#include "tbb/spin_mutex.h"

View file

@ -27,13 +27,13 @@
*/
// Please define version number in the file:
#include "../../include/tbb/tbb_stddef.h"
#include "tbb/tbb_stddef.h"
// And don't touch anything below
#ifndef ENDL
#define ENDL "\n"
#endif
#include "../../build/vsproject/version_string.tmp"
#include "version_string.tmp"
#ifndef __TBB_VERSION_STRINGS
#pragma message("Warning: version_string.tmp isn't generated properly by version_info.sh script!")

View file

@ -43,7 +43,7 @@
//
#include <winresrc.h>
#define ENDL "\r\n"
#include "../tbb/tbb_version.h"
#include "tbb/tbb_version.h"
#define TBBMALLOC_VERNUMBERS TBB_VERSION_MAJOR, TBB_VERSION_MINOR, __TBB_VERSION_YMD
#define TBBMALLOC_VERSION __TBB_STRING(TBBMALLOC_VERNUMBERS)

View file

@ -1,4 +1,4 @@
#ifndef __REVISION_NR_H__
#define __REVISION_NR_H__
#define REVISION_NR "11163"
#define REVISION_NR "11164"
#endif // __REVISION_NR_H__