// Compile with g++ -o latencybench -lboost_thread-mt
// Should also work on MSVC and other platforms supported by Boost.
#include <boost/format.hpp>
#include <boost/thread/thread.hpp>
#include <boost/date_time.hpp>
#include <algorithm>
#include <cstdlib>
#include <csignal>
volatile bool m_quit = false;
extern "C" void sighandler(int) {
m_quit = true;
std::string num(unsigned val) {
if (val == 1) return "one occurrence";
return boost::lexical_cast<std::string>(val) + " occurrences";
int main(int argc, char** argv) {
using namespace boost::posix_time;
std::signal(SIGINT, sighandler);
std::signal(SIGTERM, sighandler);
time_duration duration = milliseconds(10);
if (argc > 1) {
try {
if (argc != 2) throw 1;
unsigned ms = boost::lexical_cast<unsigned>(argv[1]);
if (ms > 1000) throw 2;
duration = milliseconds(ms);
} catch (...) {
std::cerr << "Usage: " << argv[0] << " milliseconds" << std::endl;
typedef std::map<long, unsigned> Durations;
Durations durations;
unsigned samples = 0, wrongsamples = 0;
unsigned max = 0;
long last = -1;
std::cout << "Measuring actual sleep delays when requesting " << duration.total_milliseconds() << " ms: (Ctrl+C when done)" << std::endl;
ptime begin = boost::get_system_time();
while (!m_quit) {
ptime start = boost::get_system_time();
boost::this_thread::sleep(start + duration);
long actual = (boost::get_system_time() - start).total_milliseconds();
unsigned num = ++durations[actual];
if (actual != last) {
std::cout << "\r " << actual << " ms " << std::flush;
last = actual;
if (actual != duration.total_milliseconds()) {
if (num > max) max = num;
std::cout << "spike at " << start - begin << std::endl;
last = -1;
if (samples == 0) return 0;
std::cout << "\rTotal measurement duration: " << boost::get_system_time() - begin << "\n";
std::cout << "Number of samples collected: " << samples << "\n";
std::cout << "Incorrect delay count: " << wrongsamples << boost::format(" (%.2f %%)") % (100.0 * wrongsamples / samples) << "\n\n";
std::cout << "Histogram of actual delays:\n\n";
unsigned correctsamples = samples - wrongsamples;
const unsigned line = 60;
double scale = 1.0;
char ch = '+';
if (max > line) {
scale = double(line) / max;
ch = '*';
double correctscale = 1.0;
if (correctsamples > line) correctscale = double(line) / correctsamples;
for (Durations::const_iterator it = durations.begin(); it != durations.end(); ++it) {
std::string bar;
if (it->first == duration.total_milliseconds()) bar = std::string(correctscale * it->second, '>');
else bar = std::string(scale * it->second, ch);
std::cout << boost::format("%5d ms | %s %d") % it->first % bar % it->second << std::endl;
std::cout << "\n";
std::string indent(30, ' ');
std::cout << indent << "+-- Legend ----------------------------------\n";
std::cout << indent << "| > " << num(1.0 / correctscale) << " (of " << duration.total_milliseconds() << " ms delay)\n";
if (wrongsamples > 0) std::cout << indent << "| " << ch << " " << num(1.0 / scale) << " (of any other delay)\n";
Ubuntu 2.6.32-14-genericカーネルでの結果。測定中、4つのコアでC++コードをコンパイルし、同時にOpenGLグラフィックスでゲームをプレイしていました(より興味深いものにするため)。
Total measurement duration: 00:01:45.191465
Number of samples collected: 10383
Incorrect delay count: 196 (1.89 %)
Histogram of actual delays:
10 ms | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 10187
11 ms | *************************************************** 70
12 ms | ************************************************************ 82
13 ms | ********* 13
14 ms | ********* 13
15 ms | ** 4
17 ms | *** 5
18 ms | * 2
19 ms | **** 6
20 ms | 1
+-- Legend ----------------------------------
| > 169 occurrences (of 10 ms delay)
| * one occurrence (of any other delay)
probe scheduler.ctxswitch {
printf("Switch from %d to %d at %d\n", prev_pid, next_pid, gettimeofday_us())
コンテキストスイッチのコストを測定するのは少し難しいです。 1つのCPUで2つのプロセスを実行し、それらの間に3つのLinuxパイプを設定することで、コンテキストスイッチに費やした時間を計算できます。
次に、最初のプロセスが最初のパイプに書き込みを発行し、2番目のパイプで読み取りを待ちます。 OSは、2番目のパイプから何かが読み取られるのを待っている最初のプロセスを確認すると、最初のプロセスをブロック状態にし、最初のパイプから読み取って2番目のパイプに書き込む他のプロセスに切り替えます。 2番目のプロセスが最初のパイプから再度読み取ろうとすると、ブロックされるため、通信の往復サイクルが続行されます。このような通信のコストを繰り返し測定することで、コンテキストスイッチのコストを適切に見積もることができます。
#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <linux/unistd.h>
#include <sys/time.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <errno.h>
pid_t getpid( void )
return syscall( __NR_getpid );
int main()
To make sure context-switching processes are located on the same processor :
1. Bind a process to a particular processor using sched_setaffinity.
2. To get the maximum priority value (sched_get_priority_max) that can be used with
the scheduling algorithm identified by policy (SCHED_FIFO).**
cpu_set_t set;
struct sched_param prio_param;
int prio_max;
CPU_ZERO( &set );
CPU_SET( 0, &set );
memset(&prio_param,0,sizeof(struct sched_param));
if (sched_setaffinity( getpid(), sizeof( cpu_set_t ), &set ))
perror( "sched_setaffinity" );
if( (prio_max = sched_get_priority_max(SCHED_FIFO)) < 0 )
prio_param.sched_priority = prio_max;
if( sched_setscheduler(getpid(),SCHED_FIFO,&prio_param) < 0 )
1. To create a pipe for a fork, the parent and child processes use pipe to read and write,
read and write string, using this for context switch.
2. The parent process first to get the current timestamp (gettimeofday), then write to the pipe,.
Then the child should be read in from the back,
then the child process to write string, the parent process reads.
After the child process to get the current timestamp.
This is roughly the difference between two timestamps n * 2 times the context switch time.
int ret=-1;
int firstpipe[2];
int secondpipe[2];
int timepipe[2];
int nbytes;
char string[] = "Hello, world!\n";
char temp[] = "Sumit Gemini!\n";
char readbuffer[80];
char tempbuffer[80];
int i=0;
struct timeval start,end;
// Create an unnamed first pipe
if (pipe(firstpipe) == -1)
fprintf(stderr, "parent: Failed to create pipe\n");
return -1;
// create an unnamed Second pipe
if (pipe(secondpipe) == -1)
fprintf(stderr, "parent: Failed to create second pipe\n");
return -1;
// Create an unnamed time pipe which will share in order to show time spend between processes
if (pipe(timepipe) == -1)
fprintf(stderr, "parent: Failed to create time pipe\n");
return -1;
else if(ret==0)
int n=-1;
printf("Child ----> %d\n",getpid());
nbytes = read(firstpipe[0], readbuffer, sizeof(readbuffer));
printf("Received string: %s", readbuffer);
write(secondpipe[1], temp, strlen(temp)+1);
n = sizeof(struct timeval);
if( write(timepipe[1],&end,sizeof(struct timeval)) != n )
fprintf(stderr, "child: Failed to write in time pipe\n");
double switch_time;
int n=-1;
printf("Parent ----> %d\n",getpid());
/* Read in a string from the pipe */
write(firstpipe[1], string, strlen(string)+1);
read(secondpipe[0], tempbuffer, sizeof(tempbuffer));
printf("Received temp: %s", tempbuffer);
n = sizeof(struct timeval);
if( read(timepipe[0],&end,sizeof(struct timeval)) != n )
fprintf(stderr, "Parent: Failed to read from time pipe\n");
switch_time = ((end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec))/1000.0;
printf("context switch between two processes: %0.6lfms\n",switch_time/(5*2));
return 0;