I'm new to threads,my teacher asked for a 9 nine threads program that makes 3 arrays A B C with 10^6 elements each-all equal to 1. So we must use 9 threads to fill (with the number 1) those arrays faster. I came with a plan to use 3 threads to fill each array. I cut the arrays into 3 parts ... i=0 to i=333332... i=333333 to i=666662... i=666666 to i=999999. But when I run the program with nine threads runs slower than the single threaded program ( which contains a for loop from 0 to 10^6 and fills the 3 arrays with the number 1).
this is my 9 threads code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/sysinfo.h>
#include <time.h>
double A[1000000];
double B[1000000];
double C[1000000];
double D[1000000];
void * fillA1(void * tid){
for (int i=0;i<333333;i ){
A[i]=1.0;
}
return NULL;
}
void * fillA2(void * tid){
for (int i=333333;i<666666;i ){
A[i]=1.0;
}
return NULL;
}
void * fillA3(void * tid){
for (int i=666666;i<1000000;i ){
A[i]=1.0;
}
return NULL;
}
void * fillB1(void*tid){
for (int i=0;i<333333;i ){
B[i]=1.0;
}
return NULL;
}
void * fillB2(void*tid){
for (int i=333333;i<666666;i ){
B[i]=1.0;
}
return NULL;
}
void * fillB3 (void*tid){
for (int i=666666;i<1000000;i ){
B[i]=1.0;
}
return NULL;
}
void * fillC1(void*tid){
for (int i=0;i<333333;i ){
C[i]=1.0;
}
return NULL;
}
void * fillC2(void*tid){
for (int i=333333;i<666666;i ){
C[i]=1.0;
}
return NULL;
}
void * fillC3(void*tid){
for (int i=666666;i<1000000;i ){
C[i]=1.0;
}
return NULL;
}
int main (void){
double time_spent = 0.0;
clock_t begin = clock();
// declare 9 thread type variables;
pthread_t tid0;// pthread_t is a data type used to uniquely identify a thread.
pthread_t tid1;
pthread_t tid2;
pthread_t tid3;
pthread_t tid4;
pthread_t tid5;
pthread_t tid6;
pthread_t tid7;
pthread_t tid8;
//create an array with 9 threads address
pthread_t * pthreads[] ={&tid0,&tid1,&tid2,&tid3,&tid4,&tid5,&tid6,&tid7,&tid8};
// use 9 threads to fill the 3 arrays simultaneously
pthread_create(pthreads[0],NULL,fillA1,NULL);
pthread_create(pthreads[1],NULL,fillA2,NULL);
pthread_create(pthreads[2],NULL,fillA3,NULL);
pthread_create(pthreads[3],NULL,fillB1, NULL);
pthread_create(pthreads[4],NULL,fillB2, NULL);
pthread_create(pthreads[5],NULL,fillB3, NULL);
pthread_create(pthreads[6],NULL,fillC1,NULL);
pthread_create(pthreads[7],NULL,fillC2,NULL);
pthread_create(pthreads[8],NULL,fillC3,NULL);
for(int i=0;i<9;i ){
pthread_join(*pthreads[i],NULL);
}
clock_t end = clock();
// calculate elapsed time by finding difference (end - begin) and
// dividing the difference by CLOCKS_PER_SEC to convert to seconds
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("The elapsed time is %f seconds", time_spent);
return 0;
}
this is my single threaded code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <pthread.h>
#include<time.h>
double A[1000000];
double B[1000000];
double C[1000000];
double D[1000000];
int main (void){
//to store the execution time of code
double time_spent = 0.0;
clock_t begin = clock();
for (int i=0;i<1000000;i ){
A[i]=1;
B[i]=1;
C[i]=1;
}
clock_t end = clock();
// calculate elapsed time by finding difference (end - begin) and
// dividing the difference by CLOCKS_PER_SEC to convert to seconds
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("The elapsed time is %f seconds\n", time_spent);
return 0;
}
CodePudding user response:
Having a separate thread function for each segment in each array, makes the program less flexible. The usual way is to have a struct that describes the work for each thread (e.g. pointer to array and start/end indexes).
Also, clock_gettime
is probably better than clock
for timing. And, do not put printf
in anything that is being timed.
Note that using 3 arrays (e.g. A/B/C
) may not be any different than a single array that is 3 times the size (but I left that in).
Here's a refactored version that will let you experiment more:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/sysinfo.h>
#include <time.h>
#define DATA_SIZE 1000000 // number of elements in array
#define NPER 3 // number of threads per array
#define NARRAY 3 // number of arrays
#define CHUNK_SIZE (DATA_SIZE / NPER) // amount of data for each thread
#define NTHREAD (NARRAY * NPER) // number of threads
double A[DATA_SIZE];
double B[DATA_SIZE];
double C[DATA_SIZE];
double D[DATA_SIZE];
struct work {
pthread_t tid;
double *data;
int start;
int count;
};
double
tscgetf(void)
{
struct timespec ts;
double sec;
clock_gettime(CLOCK_MONOTONIC,&ts);
sec = ts.tv_nsec;
sec /= 1e9;
sec = ts.tv_sec;
return sec;
}
void *
fill(void *tid)
{
struct work *work = tid;
double *data = &work->data[work->start];
double *edata = &data[work->count];
for (; data < edata; data)
*data = 1.0;
return NULL;
}
int
main(void)
{
double time_spent = 0.0;
struct work buflist[NARRAY] = {
{ .data = A },
{ .data = B },
{ .data = C },
};
struct work *buf;
struct work threads[NTHREAD];
struct work *work;
int tidx = 0;
for (int ibuf = 0; ibuf < NARRAY; ibuf) {
buf = &buflist[ibuf];
for (int ichunk = 0; ichunk < NPER; ichunk, tidx) {
work = &threads[tidx];
double *ptr = buf->data;
work->data = ptr;
work->start = buf->start;
if (ichunk == (NPER - 1))
work->count = DATA_SIZE - buf->start;
else
work->count = CHUNK_SIZE;
printf("%d: ibuf=%d start=%d count=%d (end=%d)\n",
tidx,ibuf,work->start,work->count,work->start work->count);
buf->start = work->count;
}
}
double begin = tscgetf();
for (tidx = 0; tidx < NTHREAD; tidx) {
work = &threads[tidx];
pthread_create(&work->tid,NULL,fill,work);
}
for (tidx = 0; tidx < NTHREAD; tidx) {
work = &threads[tidx];
pthread_join(work->tid,NULL);
}
double end = tscgetf();
// calculate elapsed time
time_spent = end - begin;
printf("The elapsed time is %.9f seconds\n", time_spent);
return 0;
}
Here is the output for my system:
0: ibuf=0 start=0 count=333333 (end=333333)
1: ibuf=0 start=333333 count=333333 (end=666666)
2: ibuf=0 start=666666 count=333334 (end=1000000)
3: ibuf=1 start=0 count=333333 (end=333333)
4: ibuf=1 start=333333 count=333333 (end=666666)
5: ibuf=1 start=666666 count=333334 (end=1000000)
6: ibuf=2 start=0 count=333333 (end=333333)
7: ibuf=2 start=333333 count=333333 (end=666666)
8: ibuf=2 start=666666 count=333334 (end=1000000)
The elapsed time is 0.004271893 seconds
UPDATE:
For what you really want to measure: the effect of number of threads on performance, I'd use a single array as the number of threads doesn't have to be a multiple of the number of data arrays.
Note: To account for system load, timeslicing, I would run each benckmark (e.g. at a given number of threads) a number of different times (e.g. repeat 10 times) and use the lowest elapsed time for each thread count. (You could automate this with a script)
Here's the code I would use:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/sysinfo.h>
#include <time.h>
#ifndef DATA_SIZE
#define DATA_SIZE 4000000 // number of elements in array
#endif
#ifndef NTHREAD
#define NTHREAD 3 // number of threads per array
#endif
#define CHUNK_SIZE (DATA_SIZE / NTHREAD) // amount of data for each thread
double A[DATA_SIZE];
struct work {
pthread_t tid;
double *data;
int start;
int count;
};
double
tscgetf(void)
{
struct timespec ts;
double sec;
clock_gettime(CLOCK_MONOTONIC,&ts);
sec = ts.tv_nsec;
sec /= 1e9;
sec = ts.tv_sec;
return sec;
}
void *
fill(void *tid)
{
struct work *work = tid;
double *data = &work->data[work->start];
double *edata = &data[work->count];
for (; data < edata; data)
*data = 1.0;
return NULL;
}
int
main(void)
{
double time_spent = 0.0;
struct work threads[NTHREAD];
struct work *work;
int tidx = 0;
int start = 0;
for (int tidx = 0; tidx < NTHREAD; tidx) {
work = &threads[tidx];
double *ptr = A;
work->data = ptr;
work->start = start;
if (tidx == (NTHREAD - 1))
work->count = DATA_SIZE - start;
else
work->count = CHUNK_SIZE;
printf("%d: start=%d count=%d (end=%d)\n",
tidx,work->start,work->count,work->start work->count);
start = work->count;
}
double begin = tscgetf();
for (tidx = 0; tidx < NTHREAD; tidx) {
work = &threads[tidx];
pthread_create(&work->tid,NULL,fill,work);
}
for (tidx = 0; tidx < NTHREAD; tidx) {
work = &threads[tidx];
pthread_join(work->tid,NULL);
}
double end = tscgetf();
// calculate elapsed time
time_spent = end - begin;
printf("The elapsed time is %.9f seconds (RATE: %.3f/sec)\n",
time_spent,(double) DATA_SIZE / time_spent);
return 0;
}
And here is the output for various thread counts:
0: start=0 count=4000000 (end=4000000)
The elapsed time is 0.016544766 seconds (RATE: 241768306.067/sec)
0: start=0 count=2000000 (end=2000000)
1: start=2000000 count=2000000 (end=4000000)
The elapsed time is 0.008973760 seconds (RATE: 445744055.862/sec)
0: start=0 count=1333333 (end=1333333)
1: start=1333333 count=1333333 (end=2666666)
2: start=2666666 count=1333334 (end=4000000)
The elapsed time is 0.006181198 seconds (RATE: 647123775.983/sec)
0: start=0 count=1000000 (end=1000000)
1: start=1000000 count=1000000 (end=2000000)
2: start=2000000 count=1000000 (end=3000000)
3: start=3000000 count=1000000 (end=4000000)
The elapsed time is 0.006610514 seconds (RATE: 605096656.791/sec)
0: start=0 count=800000 (end=800000)
1: start=800000 count=800000 (end=1600000)
2: start=1600000 count=800000 (end=2400000)
3: start=2400000 count=800000 (end=3200000)
4: start=3200000 count=800000 (end=4000000)
The elapsed time is 0.005417253 seconds (RATE: 738381626.381/sec)
0: start=0 count=666666 (end=666666)
1: start=666666 count=666666 (end=1333332)
2: start=1333332 count=666666 (end=1999998)
3: start=1999998 count=666666 (end=2666664)
4: start=2666664 count=666666 (end=3333330)
5: start=3333330 count=666670 (end=4000000)
The elapsed time is 0.004728798 seconds (RATE: 845880951.049/sec)
0: start=0 count=571428 (end=571428)
1: start=571428 count=571428 (end=1142856)
2: start=1142856 count=571428 (end=1714284)
3: start=1714284 count=571428 (end=2285712)
4: start=2285712 count=571428 (end=2857140)
5: start=2857140 count=571428 (end=3428568)
6: start=3428568 count=571432 (end=4000000)
The elapsed time is 0.004353798 seconds (RATE: 918738077.652/sec)
0: start=0 count=500000 (end=500000)
1: start=500000 count=500000 (end=1000000)
2: start=1000000 count=500000 (end=1500000)
3: start=1500000 count=500000 (end=2000000)
4: start=2000000 count=500000 (end=2500000)
5: start=2500000 count=500000 (end=3000000)
6: start=3000000 count=500000 (end=3500000)
7: start=3500000 count=500000 (end=4000000)
The elapsed time is 0.006288737 seconds (RATE: 636057758.927/sec)
0: start=0 count=444444 (end=444444)
1: start=444444 count=444444 (end=888888)
2: start=888888 count=444444 (end=1333332)
3: start=1333332 count=444444 (end=1777776)
4: start=1777776 count=444444 (end=2222220)
5: start=2222220 count=444444 (end=2666664)
6: start=2666664 count=444444 (end=3111108)
7: start=3111108 count=444444 (end=3555552)
8: start=3555552 count=444448 (end=4000000)
The elapsed time is 0.005567823 seconds (RATE: 718413632.935/sec)
0: start=0 count=400000 (end=400000)
1: start=400000 count=400000 (end=800000)
2: start=800000 count=400000 (end=1200000)
3: start=1200000 count=400000 (end=1600000)
4: start=1600000 count=400000 (end=2000000)
5: start=2000000 count=400000 (end=2400000)
6: start=2400000 count=400000 (end=2800000)
7: start=2800000 count=400000 (end=3200000)
8: start=3200000 count=400000 (end=3600000)
9: start=3600000 count=400000 (end=4000000)
The elapsed time is 0.005202681 seconds (RATE: 768834332.840/sec)
0: start=0 count=363636 (end=363636)
1: start=363636 count=363636 (end=727272)
2: start=727272 count=363636 (end=1090908)
3: start=1090908 count=363636 (end=1454544)
4: start=1454544 count=363636 (end=1818180)
5: start=1818180 count=363636 (end=2181816)
6: start=2181816 count=363636 (end=2545452)
7: start=2545452 count=363636 (end=2909088)
8: start=2909088 count=363636 (end=3272724)
9: start=3272724 count=363636 (end=3636360)
10: start=3636360 count=363640 (end=4000000)
The elapsed time is 0.004945641 seconds (RATE: 808793049.562/sec)
0: start=0 count=333333 (end=333333)
1: start=333333 count=333333 (end=666666)
2: start=666666 count=333333 (end=999999)
3: start=999999 count=333333 (end=1333332)
4: start=1333332 count=333333 (end=1666665)
5: start=1666665 count=333333 (end=1999998)
6: start=1999998 count=333333 (end=2333331)
7: start=2333331 count=333333 (end=2666664)
8: start=2666664 count=333333 (end=2999997)
9: start=2999997 count=333333 (end=3333330)
10: start=3333330 count=333333 (end=3666663)
11: start=3666663 count=333337 (end=4000000)
The elapsed time is 0.005431988 seconds (RATE: 736378677.432/sec)
0: start=0 count=307692 (end=307692)
1: start=307692 count=307692 (end=615384)
2: start=615384 count=307692 (end=923076)
3: start=923076 count=307692 (end=1230768)
4: start=1230768 count=307692 (end=1538460)
5: start=1538460 count=307692 (end=1846152)
6: start=1846152 count=307692 (end=2153844)
7: start=2153844 count=307692 (end=2461536)
8: start=2461536 count=307692 (end=2769228)
9: start=2769228 count=307692 (end=3076920)
10: start=3076920 count=307692 (end=3384612)
11: start=3384612 count=307692 (end=3692304)
12: start=3692304 count=307696 (end=4000000)
The elapsed time is 0.005035344 seconds (RATE: 794384720.028/sec)
0: start=0 count=285714 (end=285714)
1: start=285714 count=285714 (end=571428)
2: start=571428 count=285714 (end=857142)
3: start=857142 count=285714 (end=1142856)
4: start=1142856 count=285714 (end=1428570)
5: start=1428570 count=285714 (end=1714284)
6: start=1714284 count=285714 (end=1999998)
7: start=1999998 count=285714 (end=2285712)
8: start=2285712 count=285714 (end=2571426)
9: start=2571426 count=285714 (end=2857140)
10: start=2857140 count=285714 (end=3142854)
11: start=3142854 count=285714 (end=3428568)
12: start=3428568 count=285714 (end=3714282)
13: start=3714282 count=285718 (end=4000000)
The elapsed time is 0.004831767 seconds (RATE: 827854458.801/sec)
0: start=0 count=266666 (end=266666)
1: start=266666 count=266666 (end=533332)
2: start=533332 count=266666 (end=799998)
3: start=799998 count=266666 (end=1066664)
4: start=1066664 count=266666 (end=1333330)
5: start=1333330 count=266666 (end=1599996)
6: start=1599996 count=266666 (end=1866662)
7: start=1866662 count=266666 (end=2133328)
8: start=2133328 count=266666 (end=2399994)
9: start=2399994 count=266666 (end=2666660)
10: start=2666660 count=266666 (end=2933326)
11: start=2933326 count=266666 (end=3199992)
12: start=3199992 count=266666 (end=3466658)
13: start=3466658 count=266666 (end=3733324)
14: start=3733324 count=266676 (end=4000000)
The elapsed time is 0.006333866 seconds (RATE: 631525817.104/sec)
0: start=0 count=250000 (end=250000)
1: start=250000 count=250000 (end=500000)
2: start=500000 count=250000 (end=750000)
3: start=750000 count=250000 (end=1000000)
4: start=1000000 count=250000 (end=1250000)
5: start=1250000 count=250000 (end=1500000)
6: start=1500000 count=250000 (end=1750000)
7: start=1750000 count=250000 (end=2000000)
8: start=2000000 count=250000 (end=2250000)
9: start=2250000 count=250000 (end=2500000)
10: start=2500000 count=250000 (end=2750000)
11: start=2750000 count=250000 (end=3000000)
12: start=3000000 count=250000 (end=3250000)
13: start=3250000 count=250000 (end=3500000)
14: start=3500000 count=250000 (end=3750000)
15: start=3750000 count=250000 (end=4000000)
The elapsed time is 0.005258777 seconds (RATE: 760633102.332/sec)