下面的实验来自VTune,旨在探究Preemption Context Switches的来源。
实验一:多线程无锁保护
speedup-example-no-mutex.cpp
#include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #define N 4 #define M 30000 int nwait = 0; volatile long long sum; long loops = 6e3; void set_affinity(int core_id) { cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(core_id, &cpuset); assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) == 0); } void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { nwait++; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } } int main(int argc, char *argv[]) { set_affinity(23); pthread_t th[N]; int ret; for(unsigned i=0; i<N; ++i) { ret = pthread_create(&th[i], NULL, thread_func, (void*)i); assert(!ret && "pthread_create() failed!"); } for(unsigned i=0; i<N; ++i) pthread_join(th[i], NULL); exit(0); }
Preemption Context Switches由两部分组成:clone和Unknown stack frame(s)。
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { nwait++; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) { sum += i; sum += i; sum += i; sum += i; } } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) { sum += i; sum += i; sum += i; sum += i; sum += i; sum += i; sum += i; } } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { for (long i = 0; i < loops; i++) { sum += i; sum += i; sum += i; sum += i; } } }
实验二:多线程加锁
speedup-example-mutex-only.cpp
#include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <assert.h> #define N 4 #define M 30000 int nwait = 0; volatile long long sum; long loops = 6e3; pthread_mutex_t mutex; void set_affinity(int core_id) { cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(core_id, &cpuset); assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) == 0); } void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; for (long i = 0; i < loops; i++) sum += i; phtread_mutex_unlock(&mutex); for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } } int main(int argc, char *argv[]) { set_affinity(23); pthread_t th[N]; int ret; for(unsigned i=0; i<N; ++i) { ret = pthread_create(&th[i], NULL, thread_func, (void*)i); assert(!ret && "pthread_create() failed!"); } for(unsigned i=0; i<N; ++i) pthread_join(th[i], NULL); exit(0); }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; for (long i = 0; i < loops; i++) sum += i; phtread_mutex_unlock(&mutex); } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; phtread_mutex_unlock(&mutex); for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; phtread_mutex_unlock(&mutex); } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; phtread_mutex_unlock(&mutex); } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; for (long i = 0; i < loops; i++) sum += i; phtread_mutex_unlock(&mutex); } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; for (long i = 0; i < loops; i++) { sum += i; sum += i; sum += i; sum += i; } phtread_mutex_unlock(&mutex); } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; phtread_mutex_unlock(&mutex); for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; phtread_mutex_unlock(&mutex); for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; for (long i = 0; i < loops; i++) sum += i*i*i*i*i*i; } }
void* thread_func(void *arg) { set_affinity((int)(long)arg); for (int j = 0; j < M; j++) { phtread_mutex_lock(&mutex); nwait++; phtread_mutex_unlock(&mutex); for (long i = 0; i < loops; i++) { sum += i*i*i*i*i*i; sum += i*i*i*i*i*i; sum += i*i*i*i*i*i; sum += i*i*i*i*i*i; } } }
Preemption Context Switches 和 Synchronization Context Switches,布布扣,bubuko.com
Preemption Context Switches 和 Synchronization Context Switches
原文:http://blog.csdn.net/bluecloudmatrix/article/details/32178639