Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_34_1/libs/parameter/test/efficiency.cpp @ 33

Last change on this file since 33 was 29, checked in by landauf, 16 years ago

updated boost from 1_33_1 to 1_34_1

File size: 6.4 KB
Line 
1// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
2// 2005. Distributed under the Boost Software License, Version
3// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
4// http://www.boost.org/LICENSE_1_0.txt)
5
6#include <boost/parameter.hpp>
7#include <boost/timer.hpp>
8#include <iostream>
9
10namespace test
11{
12  //
13  // This test measures the abstraction overhead of using the named
14  // parameter interface.  Some actual test results have been recorded
15  // in timings.txt in this source file's directory, or
16  // http://www.boost.org/libs/parameter/test/timings.txt.
17  //
18  // Caveats:
19  //
20  //   1. This test penalizes the named parameter library slightly, by
21  //      passing two arguments through the named interface, while
22  //      only passing one through the plain C++ interface.
23  //
24  //   2. This test does not measure the case where an ArgumentPack is
25  //      so large that it doesn't fit in the L1 cache.
26  //
27  //   3. Although we've tried to make this test as general as
28  //      possible, we are targeting it at a specific application.
29  //      Where that affects design decisions, we've noted it below in
30  //      ***...***.
31  //
32  //   4. The first time you run this program, the time may not be
33  //      representative because of disk and memory cache effects, so
34  //      always run it multiple times and ignore the first
35  //      measurement.  This approach will also allow you to estimate
36  //      the statistical error of your test by observing the
37  //      variation in the valid times.
38  //
39  //   5. Try to run this program on a machine that's otherwise idle,
40  //      or other processes and even device hardware interrupts may
41  //      interfere by causing caches to be flushed.
42 
43  // Accumulator function object with plain C++ interface
44  template <class T>
45  struct plain_weight_running_total
46  {
47      plain_weight_running_total()
48#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
49        : sum(T())
50#else
51        : sum()
52#endif
53      {}
54     
55      void operator()(T w)
56      {
57          this->sum += w;
58      }
59
60      T sum;
61  };
62
63  BOOST_PARAMETER_KEYWORD(tag, weight)
64  BOOST_PARAMETER_KEYWORD(tag, value)
65     
66  // Accumulator function object with named parameter interface
67  template <class T>
68  struct named_param_weight_running_total
69  {
70      named_param_weight_running_total()
71#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
72        : sum(T())
73#else
74        : sum()
75#endif
76      {}
77
78      template <class ArgumentPack>
79      void operator()(ArgumentPack const& variates)
80      {
81          this->sum += variates[weight];
82      }
83
84      T sum;
85  };
86
87  // This value is required to ensure that a smart compiler's dead
88  // code elimination doesn't optimize away anything we're testing.
89  // We'll use it to compute the return code of the executable to make
90  // sure it's needed.
91  double live_code;
92
93  // Call objects of the given Accumulator type repeatedly with x as
94  // an argument.
95  template <class Accumulator, class Arg>
96  void hammer(Arg const& x, long const repeats)
97  {
98      // Strategy: because the sum in an accumulator after each call
99      // depends on the previous value of the sum, the CPU's pipeline
100      // might be stalled while waiting for the previous addition to
101      // complete.  Therefore, we allocate an array of accumulators,
102      // and update them in sequence, so that there's no dependency
103      // between adjacent addition operations.
104      //
105      // Additionally, if there were only one accumulator, the
106      // compiler or CPU might decide to update the value in a
107      // register rather that writing it back to memory.  we want each
108      // operation to at least update the L1 cache.  *** Note: This
109      // concern is specific to the particular application at which
110      // we're targeting the test. ***
111
112      // This has to be at least as large as the number of
113      // simultaneous accumulations that can be executing in the
114      // compiler pipeline.  A safe number here is larger than the
115      // machine's maximum pipeline depth. If you want to test the L2
116      // or L3 cache, or main memory, you can increase the size of
117      // this array.  1024 is an upper limit on the pipeline depth of
118      // current vector machines.
119      const std::size_t number_of_accumulators = 1024;
120     
121      Accumulator a[number_of_accumulators];
122     
123      for (long iteration = 0; iteration < repeats; ++iteration)
124      {
125          for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
126          {
127              (*ap)(x);
128          }
129      }
130
131      // Accumulate all the partial sums to avoid dead code
132      // elimination.
133      for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
134      {
135          live_code += ap->sum;
136      }
137  }
138
139  // Measure the time required to hammer accumulators of the given
140  // type with the argument x.
141  template <class Accumulator, class T>
142  double measure(T const& x, long const repeats)
143  {
144      // Hammer accumulators a couple of times to ensure the
145      // instruction cache is full of our test code, and that we don't
146      // measure the cost of a page fault for accessing the data page
147      // containing the memory where the accumulators will be
148      // allocated
149      hammer<Accumulator>(x, repeats);
150      hammer<Accumulator>(x, repeats);
151
152      // Now start a timer
153      boost::timer time;
154      hammer<Accumulator>(x, repeats);  // This time, we'll measure
155      return time.elapsed();
156  }
157}
158
159int main()
160{
161    using namespace test;
162
163    // first decide how many repetitions to measure
164    long repeats = 100;
165    double measured = 0;
166    while (measured < 1.0 && repeats <= 10000000)
167    {
168        repeats *= 10;
169       
170        boost::timer time;
171
172        hammer<plain_weight_running_total<double> >(.1, repeats);
173        hammer<named_param_weight_running_total<double> >(
174            (weight = .1, value = .2), repeats);
175
176        measured = time.elapsed();
177    }
178   
179    std::cout
180        << "plain time:           "
181        << measure<plain_weight_running_total<double> >(.1, repeats)
182        << std::endl;
183   
184    std::cout
185        << "named parameter time: "
186        << measure<named_param_weight_running_total<double> >(
187            (weight = .1, value = .2), repeats
188        )
189        << std::endl;
190
191    // This is ultimately responsible for preventing all the test code
192    // from being optimized away.  Change this to return 0 and you
193    // unplug the whole test's life support system.
194    return live_code < 0.;
195}
Note: See TracBrowser for help on using the repository browser.