mirror of
https://github.com/mangosfour/server.git
synced 2025-12-12 01:37:00 +00:00
(based on cipherCOM's repo commit e8a538e) Signed-off-by: VladimirMangos <vladimir@getmangos.com>
291 lines
11 KiB
C++
291 lines
11 KiB
C++
/*
|
|
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
|
|
|
|
This file is part of Threading Building Blocks.
|
|
|
|
Threading Building Blocks is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License
|
|
version 2 as published by the Free Software Foundation.
|
|
|
|
Threading Building Blocks is distributed in the hope that it will be
|
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
|
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Threading Building Blocks; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
As a special exception, you may use this file as part of a free software
|
|
library without restriction. Specifically, if other files instantiate
|
|
templates or use macros or inline functions from this file, or you compile
|
|
this file and link it with other files to produce an executable, this
|
|
file does not by itself cause the resulting executable to be covered by
|
|
the GNU General Public License. This exception does not however
|
|
invalidate any other reasons why the executable file might be covered by
|
|
the GNU General Public License.
|
|
*/
|
|
|
|
#include "tbb/task_scheduler_init.h"
|
|
#include "tbb/tick_count.h"
|
|
#include <cmath>
|
|
#include <cstdlib>
|
|
#include <cerrno>
|
|
#include <cfloat>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
|
|
#include "../src/test/harness.h"
|
|
|
|
#if __linux__ || __APPLE__ || __FreeBSD__
|
|
#include <sys/resource.h>
|
|
#endif /* __APPLE__ */
|
|
|
|
// The code, performance of which is to be measured, is surrounded by the StartSimpleTiming
|
|
// and StopSimpleTiming macros. It is called "target code" or "code of interest" hereafter.
|
|
//
|
|
// The target code is executed inside the nested loop. Nesting is necessary to allow
|
|
// measurements on arrays that fit cache of a particular level, while making the load
|
|
// big enough to eliminate the influence of random deviations.
|
|
//
|
|
// Macro StartSimpleTiming defines reduction variable "util::anchor", which may be modified (usually
|
|
// by adding to) by the target code. This can be necessary to prevent optimizing compilers
|
|
// from throwing out the code of interest. Besides, if the target code is complex enough,
|
|
// make sure that all its branches contribute (directly or indirectly) to the value
|
|
// being added to the "util::anchor" variable.
|
|
//
|
|
// To factor out overhead introduced by the measurement infra code it is recommended to make
|
|
// a calibration run with target code replaced by a no-op (but still modifying "sum"), and
|
|
// store the resulting time in the "util::base" variable.
|
|
//
|
|
// A generally good approach is to make the target code use elements of a preliminary
|
|
// initialized array. Then for calibration run you just need to add vector elements
|
|
// to the "sum" variable. To get rid of memory access delays make the array small
|
|
// enough to fit L2 or L1 cache (play with StartSimpleTiming arguments if necessary).
|
|
//
|
|
// Macro CalibrateSimpleTiming performs default calibration using "util::anchor += i;" operation.
|
|
//
|
|
// Macro ANCHOR_TYPE defines the type of the reduction variable. If it was not
|
|
// defined before including this header, it is defined as size_t. Depending on
|
|
// the target code modern super scalar architectures may blend reduction operation
|
|
// and instructions of interest differently for different target alternatives. So
|
|
// you may play with the type to minimize out-of-order and parallel execution impact
|
|
// on the calibration time veracity. You may even end up with different reduction
|
|
// variable types (and different calibration times) for different measurements.
|
|
|
|
|
|
namespace util {
|
|
|
|
typedef std::vector<double> durations_t;
|
|
|
|
void trace_histogram ( const durations_t& t, char* histogramFileName )
|
|
{
|
|
FILE* f = histogramFileName ? fopen(histogramFileName, "wt") : stdout;
|
|
size_t n = t.size();
|
|
const size_t num_buckets = 100;
|
|
double min_val = *std::min_element(t.begin(), t.end()),
|
|
max_val = *std::max_element(t.begin(), t.end()),
|
|
bucket_size = (max_val - min_val) / num_buckets;
|
|
std::vector<size_t> hist(num_buckets + 1, 0);
|
|
for ( size_t i = 0; i < n; ++i )
|
|
++hist[size_t((t[i]-min_val)/bucket_size)];
|
|
fprintf (f, "Histogram: nvals = %u, min = %g, max = %g, nbuckets = %u\n", (unsigned)n, min_val, max_val, (unsigned)num_buckets);
|
|
double bucket = min_val;
|
|
for ( size_t i = 0; i <= num_buckets; ++i, bucket+=bucket_size )
|
|
fprintf (f, "%12g\t%u\n", bucket, (unsigned)hist[i]);
|
|
fclose(f);
|
|
}
|
|
|
|
double average ( const durations_t& d, double& variation_percent, double& std_dev_percent )
|
|
{
|
|
durations_t t = d;
|
|
if ( t.size() > 5 ) {
|
|
t.erase(std::min_element(t.begin(), t.end()));
|
|
t.erase(std::max_element(t.begin(), t.end()));
|
|
}
|
|
size_t n = t.size();
|
|
double sum = 0,
|
|
min_val = *std::min_element(t.begin(), t.end()),
|
|
max_val = *std::max_element(t.begin(), t.end());
|
|
for ( size_t i = 0; i < n; ++i )
|
|
sum += t[i];
|
|
double avg = sum / n,
|
|
std_dev = 0;
|
|
for ( size_t i = 0; i < n; ++i ) {
|
|
double dev = fabs(t[i] - avg);
|
|
std_dev += dev * dev;
|
|
}
|
|
std_dev = sqrt(std_dev / n);
|
|
std_dev_percent = std_dev / avg * 100;
|
|
variation_percent = 100 * (max_val - min_val) / avg;
|
|
return avg;
|
|
}
|
|
|
|
static int num_threads;
|
|
|
|
static double base = 0,
|
|
base_dev = 0,
|
|
base_dev_percent = 0;
|
|
|
|
static char *empty_fmt = "";
|
|
static int rate_field_len = 11;
|
|
|
|
#if !defined(ANCHOR_TYPE)
|
|
#define ANCHOR_TYPE size_t
|
|
#endif
|
|
|
|
static ANCHOR_TYPE anchor = 0;
|
|
|
|
static double sequential_time = 0;
|
|
|
|
|
|
#define StartSimpleTiming(nOuter, nInner) { \
|
|
tbb::tick_count t1, t0 = tbb::tick_count::now(); \
|
|
for ( size_t j = 0; l < nOuter; ++l ) { \
|
|
for ( size_t i = 0; i < nInner; ++i ) {
|
|
|
|
#define StopSimpleTiming(res) \
|
|
} \
|
|
util::anchor += (ANCHOR_TYPE)l; \
|
|
} \
|
|
t1 = tbb::tick_count::now(); \
|
|
printf (util::empty_fmt, util::anchor); \
|
|
res = (t1-t0).seconds() - util::base; \
|
|
}
|
|
|
|
#define CalibrateSimpleTiming(T, nOuter, nInner) \
|
|
StartSimpleTiming(nOuter, nInner); \
|
|
util::anchor += (ANCHOR_TYPE)i; \
|
|
StopSimpleTiming(util::base);
|
|
|
|
|
|
#define StartTimingImpl(nRuns, nOuter, nInner) \
|
|
tbb::tick_count t1, t0; \
|
|
for ( size_t k = 0; k < nRuns; ++k ) { \
|
|
t0 = tbb::tick_count::now(); \
|
|
for ( size_t l = 0; l < nOuter; ++l ) { \
|
|
for ( size_t i = 0; i < nInner; ++i ) {
|
|
|
|
#define StartTiming(nRuns, nOuter, nInner) { \
|
|
util::durations_t t_(nRuns); \
|
|
StartTimingImpl(nRuns, nOuter, nInner)
|
|
|
|
#define StartTimingEx(vDurations, nRuns, nOuter, nInner) { \
|
|
util::durations_t &t_ = vDurations; \
|
|
vDurations.resize(nRuns); \
|
|
StartTimingImpl(nRuns, nOuter, nInner)
|
|
|
|
#define StopTiming(Avg, StdDev, StdDevPercent) \
|
|
} \
|
|
util::anchor += (ANCHOR_TYPE)l; \
|
|
} \
|
|
t1 = tbb::tick_count::now(); \
|
|
t_[k] = (t1 - t0).seconds()/nrep; \
|
|
} \
|
|
printf (util::empty_fmt, util::anchor); \
|
|
Avg = util::average(t_, StdDev, StdDevPercent); \
|
|
}
|
|
|
|
#define CalibrateTiming(nRuns, nOuter, nInner) \
|
|
StartTiming(nRuns, nOuter, nInner); \
|
|
util::anchor += (ANCHOR_TYPE)i; \
|
|
StopTiming(util::base, util::base_dev, util::base_dev_percent);
|
|
|
|
} // namespace util
|
|
|
|
|
|
#ifndef NRUNS
|
|
#define NRUNS 7
|
|
#endif
|
|
|
|
#ifndef ONE_TEST_DURATION
|
|
#define ONE_TEST_DURATION 0.01
|
|
#endif
|
|
|
|
#define no_histogram ((char*)-1)
|
|
|
|
inline
|
|
double RunTestImpl ( const char* title, void (*pfn)(), char* histogramFileName = no_histogram ) {
|
|
double time = 0, variation = 0, deviation = 0;
|
|
size_t nrep = 1;
|
|
while (true) {
|
|
CalibrateTiming(NRUNS, 1, nrep);
|
|
StartTiming(NRUNS, 1, nrep);
|
|
pfn();
|
|
StopTiming(time, variation, deviation);
|
|
time -= util::base;
|
|
if ( time > 1e-6 )
|
|
break;
|
|
nrep *= 2;
|
|
}
|
|
nrep *= (size_t)ceil(ONE_TEST_DURATION/time);
|
|
CalibrateTiming(NRUNS, 1, nrep); // sets util::base
|
|
util::durations_t t;
|
|
StartTimingEx(t, NRUNS, 1, nrep);
|
|
pfn();
|
|
StopTiming(time, variation, deviation);
|
|
if ( histogramFileName != (char*)-1 )
|
|
util::trace_histogram(t, histogramFileName);
|
|
double clean_time = time - util::base;
|
|
if ( title ) {
|
|
// Deviation (in percent) is calulated for the Gross time
|
|
printf ("\n%-34s %.2e %5.1f ", title, clean_time, deviation);
|
|
if ( util::sequential_time != 0 )
|
|
//printf ("% .2e ", clean_time - util::sequential_time);
|
|
printf ("% 10.1f ", 100*(clean_time - util::sequential_time)/util::sequential_time);
|
|
else
|
|
printf ("%*s ", util::rate_field_len, "");
|
|
printf ("%-9u %1.6f |", (unsigned)nrep, time * nrep);
|
|
}
|
|
return clean_time;
|
|
}
|
|
|
|
|
|
/// Runs the test function, does statistical processing, and, if title is nonzero, prints results.
|
|
/** If histogramFileName is a string, the histogram of individual runs is generated and stored
|
|
in a file with the given name. If it is NULL then the histogram is printed on the console.
|
|
By default no histogram is generated.
|
|
The histogram format is: "rate bucket start" "number of tests in this bucket". **/
|
|
inline
|
|
void RunTest ( const char* title_fmt, size_t workload_param, void (*pfn_test)(), char* histogramFileName = no_histogram ) {
|
|
char title[1024];
|
|
sprintf(title, title_fmt, (long)workload_param);
|
|
RunTestImpl(title, pfn_test, histogramFileName);
|
|
}
|
|
|
|
inline
|
|
void CalcSequentialTime ( void (*pfn)() ) {
|
|
util::sequential_time = RunTestImpl(NULL, pfn) / util::num_threads;
|
|
}
|
|
|
|
inline
|
|
void ResetSequentialTime () {
|
|
util::sequential_time = 0;
|
|
}
|
|
|
|
|
|
inline void PrintTitle() {
|
|
//printf ("%-32s %-*s Std Dev,%% %-*s Repeats Gross time Infra time | NRUNS = %u",
|
|
// "Test name", util::rate_field_len, "Rate", util::rate_field_len, "Overhead", NRUNS);
|
|
printf ("%-34s %-*s Std Dev,%% Par.overhead,%% Repeats Gross time | Nruns %u, Nthreads %d",
|
|
"Test name", util::rate_field_len, "Rate", NRUNS, util::num_threads);
|
|
}
|
|
|
|
void Test();
|
|
|
|
inline
|
|
int test_main( int argc, char* argv[] ) {
|
|
ParseCommandLine( argc, argv );
|
|
ASSERT (MinThread>=2, "Minimal number of threads must be 2 or more");
|
|
char buf[128];
|
|
util::rate_field_len = 2 + sprintf(buf, "%.1e", 1.1);
|
|
for ( int i = MinThread; i <= MaxThread; ++i ) {
|
|
tbb::task_scheduler_init init (i);
|
|
util::num_threads = i;
|
|
PrintTitle();
|
|
Test();
|
|
printf("\n");
|
|
}
|
|
printf("done\n");
|
|
return 0;
|
|
}
|