@@ -198,140 +198,227 @@ struct RunResults {
198
198
bool file_report_aggregates_only = false ;
199
199
};
200
200
201
- RunResults RunBenchmark (
202
- const benchmark::internal::Benchmark::Instance& b,
203
- std::vector<BenchmarkReporter::Run>* complexity_reports) {
201
+ class BenchmarkRunner {
204
202
RunResults run_results;
205
203
206
- const bool has_explicit_iteration_count = b.iterations != 0 ;
207
- size_t iters = has_explicit_iteration_count ? b.iterations : 1 ;
208
- std::unique_ptr<internal::ThreadManager> manager;
209
- std::vector<std::thread> pool (b.threads - 1 );
210
- const int repeats =
211
- b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
212
- if (repeats != 1 ) {
213
- run_results.display_report_aggregates_only =
214
- (FLAGS_benchmark_report_aggregates_only ||
215
- FLAGS_benchmark_display_aggregates_only);
216
- run_results.file_report_aggregates_only =
217
- FLAGS_benchmark_report_aggregates_only;
218
- if (b.aggregation_report_mode != internal::ARM_Unspecified) {
219
- run_results.display_report_aggregates_only =
220
- (b.aggregation_report_mode &
221
- internal::ARM_DisplayReportAggregatesOnly);
222
- run_results.file_report_aggregates_only =
223
- (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
204
+ const benchmark::internal::Benchmark::Instance& b;
205
+ std::vector<BenchmarkReporter::Run>& complexity_reports;
206
+
207
+ const double min_time;
208
+ const int repeats;
209
+ const bool has_explicit_iteration_count;
210
+
211
+ std::vector<std::thread> pool;
212
+
213
+ size_t iters; // preserved between repetitions!
214
+ // So only the first repetition has to find/calculate it,
215
+ // the other repetitions will just use that precomputed iteration count.
216
+
217
+ struct IterationResults {
218
+ internal::ThreadManager::Result results;
219
+ size_t iters;
220
+ double seconds;
221
+ };
222
+ IterationResults doNIterations () {
223
+ VLOG (2 ) << " Running " << b.name << " for " << iters << " \n " ;
224
+
225
+ std::unique_ptr<internal::ThreadManager> manager;
226
+ manager.reset (new internal::ThreadManager (b.threads ));
227
+
228
+ // Run all but one thread in separate threads
229
+ for (std::size_t ti = 0 ; ti < pool.size (); ++ti) {
230
+ pool[ti] = std::thread (&RunInThread, &b, iters, static_cast <int >(ti + 1 ),
231
+ manager.get ());
224
232
}
233
+ // And run one thread here directly.
234
+ // (If we were asked to run just one thread, we don't create new threads.)
235
+ // Yes, we need to do this here *after* we start the separate threads.
236
+ RunInThread (&b, iters, 0 , manager.get ());
237
+
238
+ // The main thread has finished. Now let's wait for the other threads.
239
+ manager->WaitForAllThreads ();
240
+ for (std::thread& thread : pool) thread.join ();
241
+
242
+ IterationResults i;
243
+ // Acquire the measurements/counters from the manager, UNDER THE LOCK!
244
+ {
245
+ MutexLock l (manager->GetBenchmarkMutex ());
246
+ i.results = manager->results ;
247
+ }
248
+
249
+ // And get rid of the manager.
250
+ manager.reset ();
251
+
252
+ // Adjust real/manual time stats since they were reported per thread.
253
+ i.results .real_time_used /= b.threads ;
254
+ i.results .manual_time_used /= b.threads ;
255
+
256
+ VLOG (2 ) << " Ran in " << i.results .cpu_time_used << " /"
257
+ << i.results .real_time_used << " \n " ;
258
+
259
+ // So for how long were we running?
260
+ i.iters = iters;
261
+ // Base decisions off of real time if requested by this benchmark.
262
+ i.seconds = i.results .cpu_time_used ;
263
+ if (b.use_manual_time ) {
264
+ i.seconds = i.results .manual_time_used ;
265
+ } else if (b.use_real_time ) {
266
+ i.seconds = i.results .real_time_used ;
267
+ }
268
+
269
+ return i;
270
+ }
271
+
272
+ static size_t PredictNumItersNeeded (const IterationResults& i,
273
+ double min_time) {
274
+ // See how much iterations should be increased by.
275
+ // Note: Avoid division by zero with max(seconds, 1ns).
276
+ double multiplier = min_time * 1.4 / std::max (i.seconds , 1e-9 );
277
+ // If our last run was at least 10% of FLAGS_benchmark_min_time then we
278
+ // use the multiplier directly.
279
+ // Otherwise we use at most 10 times expansion.
280
+ // NOTE: When the last run was at least 10% of the min time the max
281
+ // expansion should be 14x.
282
+ bool is_significant = (i.seconds / min_time) > 0.1 ;
283
+ multiplier = is_significant ? multiplier : std::min (10.0 , multiplier);
284
+ if (multiplier <= 1.0 ) multiplier = 2.0 ;
285
+
286
+ // So what seems to be the sufficiently-large iteration count? Round up.
287
+ const size_t max_next_iters =
288
+ 0.5 + std::max (multiplier * i.iters , i.iters + 1.0 );
289
+ // But we do have *some* sanity limits though..
290
+ const size_t next_iters = std::min (max_next_iters, kMaxIterations );
291
+
292
+ VLOG (3 ) << " Next iters: " << next_iters << " , " << multiplier << " \n " ;
293
+ return next_iters; // round up before conversion to integer.
294
+ }
295
+
296
+ bool shouldReportIterationResults (const IterationResults& i,
297
+ double min_time) const {
298
+ // Determine if this run should be reported;
299
+ // Either it has run for a sufficient amount of time
300
+ // or because an error was reported.
301
+ return i.results .has_error_ ||
302
+ i.iters >= kMaxIterations || // Too many iterations already.
303
+ i.seconds >= min_time || // The elapsed time is large enough.
304
+ // CPU time is specified but the elapsed real time greatly exceeds
305
+ // the minimum time.
306
+ // Note that user provided timers are except from this sanity check.
307
+ ((i.results .real_time_used >= 5 * min_time) && !b.use_manual_time );
225
308
}
226
- for (int repetition_num = 0 ; repetition_num < repeats; repetition_num++) {
309
+
310
+ void doOneRepetition (bool not_in_the_first_repetition) {
311
+ IterationResults i;
312
+
313
+ // We *may* be gradually increasing the length (iteration count)
314
+ // of the benchmark until we decide the results are significant.
315
+ // And once we do, we report those last results and exit.
316
+ // Please do note that the if there are repetitions, the iteration count
317
+ // is *only* calculated for the *first* repetition, and other repetitions
318
+ // simply use that precomputed iteration count.
227
319
for (;;) {
228
- // Try benchmark
229
- VLOG (2 ) << " Running " << b.name << " for " << iters << " \n " ;
320
+ i = doNIterations ();
321
+
322
+ // Do we consider the results to be significant?
323
+ // If we are doing repetitions, and the first repetition was already done,
324
+ // it has calculated the correct iteration time, so we have run that very
325
+ // iteration count just now. No need to calculate anything. Just report.
326
+ // Else, the normal rules apply.
327
+ const bool results_are_significant =
328
+ not_in_the_first_repetition || has_explicit_iteration_count ||
329
+ shouldReportIterationResults (i, min_time);
330
+
331
+ if (results_are_significant) break ; // Good, let's report them!
332
+
333
+ // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
334
+ // iteration count, and run the benchmark again...
335
+
336
+ iters = PredictNumItersNeeded (i, min_time);
337
+ assert (iters > i.iters &&
338
+ " if we did more iterations than we want to do the next time, "
339
+ " then we should have accepted the current iteration run." );
340
+ }
230
341
231
- manager.reset (new internal::ThreadManager (b.threads ));
232
- for (std::size_t ti = 0 ; ti < pool.size (); ++ti) {
233
- pool[ti] = std::thread (&RunInThread, &b, iters,
234
- static_cast <int >(ti + 1 ), manager.get ());
235
- }
236
- RunInThread (&b, iters, 0 , manager.get ());
342
+ // Oh, one last thing, we need to also produce the 'memory measurements'..
343
+ MemoryManager::Result memory_result;
344
+ size_t memory_iterations = 0 ;
345
+ if (memory_manager != nullptr ) {
346
+ // Only run a few iterations to reduce the impact of one-time
347
+ // allocations in benchmarks that are not properly managed.
348
+ memory_iterations = std::min<size_t >(16 , iters);
349
+ memory_manager->Start ();
350
+ std::unique_ptr<internal::ThreadManager> manager;
351
+ manager.reset (new internal::ThreadManager (1 ));
352
+ RunInThread (&b, memory_iterations, 0 , manager.get ());
237
353
manager->WaitForAllThreads ();
238
- for (std::thread& thread : pool) thread.join ();
239
- internal::ThreadManager::Result results;
240
- {
241
- MutexLock l (manager->GetBenchmarkMutex ());
242
- results = manager->results ;
243
- }
244
354
manager.reset ();
245
- // Adjust real/manual time stats since they were reported per thread.
246
- results.real_time_used /= b.threads ;
247
- results.manual_time_used /= b.threads ;
248
-
249
- VLOG (2 ) << " Ran in " << results.cpu_time_used << " /"
250
- << results.real_time_used << " \n " ;
251
-
252
- // Base decisions off of real time if requested by this benchmark.
253
- double seconds = results.cpu_time_used ;
254
- if (b.use_manual_time ) {
255
- seconds = results.manual_time_used ;
256
- } else if (b.use_real_time ) {
257
- seconds = results.real_time_used ;
258
- }
259
355
260
- const double min_time =
261
- !IsZero (b.min_time ) ? b.min_time : FLAGS_benchmark_min_time;
262
-
263
- // clang-format off
264
- // turn off clang-format since it mangles prettiness here
265
- // Determine if this run should be reported; Either it has
266
- // run for a sufficient amount of time or because an error was reported.
267
- const bool should_report = repetition_num > 0
268
- || has_explicit_iteration_count // An exact iteration count was requested
269
- || results.has_error_
270
- || iters >= kMaxIterations // No chance to try again, we hit the limit.
271
- || seconds >= min_time // the elapsed time is large enough
272
- // CPU time is specified but the elapsed real time greatly exceeds the
273
- // minimum time. Note that user provided timers are except from this
274
- // sanity check.
275
- || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time );
276
- // clang-format on
277
-
278
- if (should_report) {
279
- MemoryManager::Result memory_result;
280
- size_t memory_iterations = 0 ;
281
- if (memory_manager != nullptr ) {
282
- // Only run a few iterations to reduce the impact of one-time
283
- // allocations in benchmarks that are not properly managed.
284
- memory_iterations = std::min<size_t >(16 , iters);
285
- memory_manager->Start ();
286
- manager.reset (new internal::ThreadManager (1 ));
287
- RunInThread (&b, memory_iterations, 0 , manager.get ());
288
- manager->WaitForAllThreads ();
289
- manager.reset ();
290
-
291
- memory_manager->Stop (&memory_result);
292
- }
293
-
294
- BenchmarkReporter::Run report = CreateRunReport (
295
- b, results, memory_iterations, memory_result, seconds);
296
- if (!report.error_occurred && b.complexity != oNone)
297
- complexity_reports->push_back (report);
298
- run_results.non_aggregates .push_back (report);
299
- break ;
300
- }
356
+ memory_manager->Stop (&memory_result);
357
+ }
358
+
359
+ // Ok, now actualy report.
360
+ BenchmarkReporter::Run report = CreateRunReport (
361
+ b, i.results , memory_iterations, memory_result, i.seconds );
362
+
363
+ if (!report.error_occurred && b.complexity != oNone)
364
+ complexity_reports.push_back (report);
365
+
366
+ run_results.non_aggregates .push_back (report);
367
+ }
301
368
302
- // See how much iterations should be increased by
303
- // Note: Avoid division by zero with max(seconds, 1ns).
304
- double multiplier = min_time * 1.4 / std::max (seconds, 1e-9 );
305
- // If our last run was at least 10% of FLAGS_benchmark_min_time then we
306
- // use the multiplier directly. Otherwise we use at most 10 times
307
- // expansion.
308
- // NOTE: When the last run was at least 10% of the min time the max
309
- // expansion should be 14x.
310
- bool is_significant = (seconds / min_time) > 0.1 ;
311
- multiplier = is_significant ? multiplier : std::min (10.0 , multiplier);
312
- if (multiplier <= 1.0 ) multiplier = 2.0 ;
313
- double next_iters = std::max (multiplier * iters, iters + 1.0 );
314
- if (next_iters > kMaxIterations ) {
315
- next_iters = kMaxIterations ;
369
+ public:
370
+ BenchmarkRunner (const benchmark::internal::Benchmark::Instance& b_,
371
+ std::vector<BenchmarkReporter::Run>* complexity_reports_)
372
+ : b(b_),
373
+ complexity_reports (*complexity_reports_),
374
+ min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
375
+ repeats(b.repetitions != 0 ? b.repetitions
376
+ : FLAGS_benchmark_repetitions),
377
+ has_explicit_iteration_count(b.iterations != 0 ),
378
+ pool(b.threads - 1 ),
379
+ iters(has_explicit_iteration_count ? b.iterations : 1 ) {
380
+ if (repeats != 1 ) {
381
+ run_results.display_report_aggregates_only =
382
+ (FLAGS_benchmark_report_aggregates_only ||
383
+ FLAGS_benchmark_display_aggregates_only);
384
+ run_results.file_report_aggregates_only =
385
+ FLAGS_benchmark_report_aggregates_only;
386
+ if (b.aggregation_report_mode != internal::ARM_Unspecified) {
387
+ run_results.display_report_aggregates_only =
388
+ (b.aggregation_report_mode &
389
+ internal::ARM_DisplayReportAggregatesOnly);
390
+ run_results.file_report_aggregates_only =
391
+ (b.aggregation_report_mode &
392
+ internal::ARM_FileReportAggregatesOnly);
316
393
}
317
- VLOG (3 ) << " Next iters: " << next_iters << " , " << multiplier << " \n " ;
318
- iters = static_cast <int >(next_iters + 0.5 );
319
394
}
320
- }
321
395
322
- // Calculate additional statistics
323
- run_results.aggregates_only = ComputeStats (run_results.non_aggregates );
396
+ for (int repetition_num = 0 ; repetition_num < repeats; repetition_num++) {
397
+ const bool not_in_the_first_repetition = repetition_num != 0 ;
398
+ doOneRepetition (not_in_the_first_repetition);
399
+ }
400
+
401
+ // Calculate additional statistics
402
+ run_results.aggregates_only = ComputeStats (run_results.non_aggregates );
324
403
325
- // Maybe calculate complexity report
326
- if ((b.complexity != oNone) && b.last_benchmark_instance ) {
327
- auto additional_run_stats = ComputeBigO (*complexity_reports);
328
- run_results.aggregates_only .insert (run_results.aggregates_only .end (),
329
- additional_run_stats.begin (),
330
- additional_run_stats.end ());
331
- complexity_reports->clear ();
404
+ // Maybe calculate complexity report
405
+ if ((b.complexity != oNone) && b.last_benchmark_instance ) {
406
+ auto additional_run_stats = ComputeBigO (complexity_reports);
407
+ run_results.aggregates_only .insert (run_results.aggregates_only .end (),
408
+ additional_run_stats.begin (),
409
+ additional_run_stats.end ());
410
+ complexity_reports.clear ();
411
+ }
332
412
}
333
413
334
- return run_results;
414
+ RunResults getResults () { return run_results; }
415
+ };
416
+
417
+ RunResults RunBenchmark (
418
+ const benchmark::internal::Benchmark::Instance& b,
419
+ std::vector<BenchmarkReporter::Run>* complexity_reports) {
420
+ BenchmarkRunner r (b, complexity_reports);
421
+ return r.getResults ();
335
422
}
336
423
337
424
} // namespace
0 commit comments