1 | // Copyright David Abrahams, Matthias Troyer, Michael Gauckler |
---|
2 | // 2005. Distributed under the Boost Software License, Version |
---|
3 | // 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
---|
4 | // http://www.boost.org/LICENSE_1_0.txt) |
---|
5 | |
---|
6 | #include <boost/parameter.hpp> |
---|
7 | #include <boost/timer.hpp> |
---|
8 | #include <iostream> |
---|
9 | |
---|
10 | namespace test |
---|
11 | { |
---|
12 | // |
---|
13 | // This test measures the abstraction overhead of using the named |
---|
14 | // parameter interface. Some actual test results have been recorded |
---|
15 | // in timings.txt in this source file's directory, or |
---|
16 | // http://www.boost.org/libs/parameter/test/timings.txt. |
---|
17 | // |
---|
18 | // Caveats: |
---|
19 | // |
---|
20 | // 1. This test penalizes the named parameter library slightly, by |
---|
21 | // passing two arguments through the named interface, while |
---|
22 | // only passing one through the plain C++ interface. |
---|
23 | // |
---|
24 | // 2. This test does not measure the case where an ArgumentPack is |
---|
25 | // so large that it doesn't fit in the L1 cache. |
---|
26 | // |
---|
27 | // 3. Although we've tried to make this test as general as |
---|
28 | // possible, we are targeting it at a specific application. |
---|
29 | // Where that affects design decisions, we've noted it below in |
---|
30 | // ***...***. |
---|
31 | // |
---|
32 | // 4. The first time you run this program, the time may not be |
---|
33 | // representative because of disk and memory cache effects, so |
---|
34 | // always run it multiple times and ignore the first |
---|
35 | // measurement. This approach will also allow you to estimate |
---|
36 | // the statistical error of your test by observing the |
---|
37 | // variation in the valid times. |
---|
38 | // |
---|
39 | // 5. Try to run this program on a machine that's otherwise idle, |
---|
40 | // or other processes and even device hardware interrupts may |
---|
41 | // interfere by causing caches to be flushed. |
---|
42 | |
---|
43 | // Accumulator function object with plain C++ interface |
---|
44 | template <class T> |
---|
45 | struct plain_weight_running_total |
---|
46 | { |
---|
47 | plain_weight_running_total() |
---|
48 | #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) |
---|
49 | : sum(T()) |
---|
50 | #else |
---|
51 | : sum() |
---|
52 | #endif |
---|
53 | {} |
---|
54 | |
---|
55 | void operator()(T w) |
---|
56 | { |
---|
57 | this->sum += w; |
---|
58 | } |
---|
59 | |
---|
60 | T sum; |
---|
61 | }; |
---|
62 | |
---|
63 | BOOST_PARAMETER_KEYWORD(tag, weight) |
---|
64 | BOOST_PARAMETER_KEYWORD(tag, value) |
---|
65 | |
---|
66 | // Accumulator function object with named parameter interface |
---|
67 | template <class T> |
---|
68 | struct named_param_weight_running_total |
---|
69 | { |
---|
70 | named_param_weight_running_total() |
---|
71 | #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) |
---|
72 | : sum(T()) |
---|
73 | #else |
---|
74 | : sum() |
---|
75 | #endif |
---|
76 | {} |
---|
77 | |
---|
78 | template <class ArgumentPack> |
---|
79 | void operator()(ArgumentPack const& variates) |
---|
80 | { |
---|
81 | this->sum += variates[weight]; |
---|
82 | } |
---|
83 | |
---|
84 | T sum; |
---|
85 | }; |
---|
86 | |
---|
87 | // This value is required to ensure that a smart compiler's dead |
---|
88 | // code elimination doesn't optimize away anything we're testing. |
---|
89 | // We'll use it to compute the return code of the executable to make |
---|
90 | // sure it's needed. |
---|
91 | double live_code; |
---|
92 | |
---|
93 | // Call objects of the given Accumulator type repeatedly with x as |
---|
94 | // an argument. |
---|
95 | template <class Accumulator, class Arg> |
---|
96 | void hammer(Arg const& x, long const repeats) |
---|
97 | { |
---|
98 | // Strategy: because the sum in an accumulator after each call |
---|
99 | // depends on the previous value of the sum, the CPU's pipeline |
---|
100 | // might be stalled while waiting for the previous addition to |
---|
101 | // complete. Therefore, we allocate an array of accumulators, |
---|
102 | // and update them in sequence, so that there's no dependency |
---|
103 | // between adjacent addition operations. |
---|
104 | // |
---|
105 | // Additionally, if there were only one accumulator, the |
---|
106 | // compiler or CPU might decide to update the value in a |
---|
107 | // register rather that writing it back to memory. we want each |
---|
108 | // operation to at least update the L1 cache. *** Note: This |
---|
109 | // concern is specific to the particular application at which |
---|
110 | // we're targeting the test. *** |
---|
111 | |
---|
112 | // This has to be at least as large as the number of |
---|
113 | // simultaneous accumulations that can be executing in the |
---|
114 | // compiler pipeline. A safe number here is larger than the |
---|
115 | // machine's maximum pipeline depth. If you want to test the L2 |
---|
116 | // or L3 cache, or main memory, you can increase the size of |
---|
117 | // this array. 1024 is an upper limit on the pipeline depth of |
---|
118 | // current vector machines. |
---|
119 | const std::size_t number_of_accumulators = 1024; |
---|
120 | |
---|
121 | Accumulator a[number_of_accumulators]; |
---|
122 | |
---|
123 | for (long iteration = 0; iteration < repeats; ++iteration) |
---|
124 | { |
---|
125 | for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) |
---|
126 | { |
---|
127 | (*ap)(x); |
---|
128 | } |
---|
129 | } |
---|
130 | |
---|
131 | // Accumulate all the partial sums to avoid dead code |
---|
132 | // elimination. |
---|
133 | for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) |
---|
134 | { |
---|
135 | live_code += ap->sum; |
---|
136 | } |
---|
137 | } |
---|
138 | |
---|
139 | // Measure the time required to hammer accumulators of the given |
---|
140 | // type with the argument x. |
---|
141 | template <class Accumulator, class T> |
---|
142 | double measure(T const& x, long const repeats) |
---|
143 | { |
---|
144 | // Hammer accumulators a couple of times to ensure the |
---|
145 | // instruction cache is full of our test code, and that we don't |
---|
146 | // measure the cost of a page fault for accessing the data page |
---|
147 | // containing the memory where the accumulators will be |
---|
148 | // allocated |
---|
149 | hammer<Accumulator>(x, repeats); |
---|
150 | hammer<Accumulator>(x, repeats); |
---|
151 | |
---|
152 | // Now start a timer |
---|
153 | boost::timer time; |
---|
154 | hammer<Accumulator>(x, repeats); // This time, we'll measure |
---|
155 | return time.elapsed(); |
---|
156 | } |
---|
157 | } |
---|
158 | |
---|
159 | int main() |
---|
160 | { |
---|
161 | using namespace test; |
---|
162 | |
---|
163 | // first decide how many repetitions to measure |
---|
164 | long repeats = 100; |
---|
165 | double measured = 0; |
---|
166 | while (measured < 1.0 && repeats <= 10000000) |
---|
167 | { |
---|
168 | repeats *= 10; |
---|
169 | |
---|
170 | boost::timer time; |
---|
171 | |
---|
172 | hammer<plain_weight_running_total<double> >(.1, repeats); |
---|
173 | hammer<named_param_weight_running_total<double> >( |
---|
174 | (weight = .1, value = .2), repeats); |
---|
175 | |
---|
176 | measured = time.elapsed(); |
---|
177 | } |
---|
178 | |
---|
179 | std::cout |
---|
180 | << "plain time: " |
---|
181 | << measure<plain_weight_running_total<double> >(.1, repeats) |
---|
182 | << std::endl; |
---|
183 | |
---|
184 | std::cout |
---|
185 | << "named parameter time: " |
---|
186 | << measure<named_param_weight_running_total<double> >( |
---|
187 | (weight = .1, value = .2), repeats |
---|
188 | ) |
---|
189 | << std::endl; |
---|
190 | |
---|
191 | // This is ultimately responsible for preventing all the test code |
---|
192 | // from being optimized away. Change this to return 0 and you |
---|
193 | // unplug the whole test's life support system. |
---|
194 | return live_code < 0.; |
---|
195 | } |
---|