/* * pipeflex.c - flexible benchmark for measuring pipe's bandwidth * * Copyright (C) 2000 IBM * * Written by Rajan Ravindran (rajancr@us.ibm.com) 29 Oct 2001 * Based on reflex.c written by Shailabh Nagar (nagar@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define STACK_SIZE (8192) #define CLONE_FLAGS (CLONE_VM | CLONE_SIGHAND | CLONE_FS) #define DEF_PERCENT (1) #define NUM_WARMUP (1) #define MIN_TRIALS (NUM_WARMUP+5) #define MAX_TRIALS (25) #define MAX_CHILDREN (1024) /* Macros related to active set formation * depend on setsize,num_active,setid * setid is a per-thread value that should be defined properly * before macros depending on it are called */ #define isextra(myid) (myid >= (regsetsize * num_active)) #define idextra(setid) ((regsetsize * num_active) + setid) #define needextra(setid) (setid < num_children - (regsetsize*num_active)) #define regsetstart(setid) (regsetsize * setid) #define regsetend(setid) ((regsetsize * (setid+1)) - 1) /* * system calls */ int __clone (int (*fn) (void *arg), void *thread_stack, int flags, void *arg); /* * prototypes for this file. */ void run_test_time(void); int bouncer(void *arg); int (*worker) (void *arg); double local_exec(void) ; double probrange(unsigned long top); void calibration(void) ; float variance(int n, float sum, float sum2); int confidence(int iterations); double uniform(double mean); void usage(void); char *child_stack ; struct timezone tz1; struct timeval tv1; struct timezone tz2; struct timeval tv2; struct timeval tvr; int num_children = 200; /* number of child processes to create */ int num_active ; int num_seconds = 20 ; /* number of seconds to run test */ int foutput = 0; /* controls type of output */ int verbose = 0; /* controls type of output */ int read_compute_time = 300; /* number of microseconds of computation before read*/ int write_compute_time = 300; /* number of microseconds of computation before write*/ int start_sem; /* sem to serialize test */ int stop_test = 0; /* flag to end test when convergence met */ int valid_test = 1; int read_size=1; /* size of message treated as token */ int write_size=1; double rounds_per_microsecond = 0.0 ; /* obtained through calibration */ int local_exec_count = 0; /* unused */ int mode = 0; /* 0 - R&W Nonblocking, 1 - R&W Blocking * 2 - R Blocking & W Nonblocking * 3 - W Blocking & R Nonblocking */ unsigned long long nbytes[MAX_CHILDREN]; int hash[MAX_CHILDREN]; int childpipe[MAX_CHILDREN][2]; /* pipes used to bounce token around */ float tau[MAX_TRIALS-1] = { 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.160, 2.145, 2.131, 2.120, 2.110, 2.101, 2.093, 2.086, 2.080, 2.074, 2.069, 2.064 }; char *child_bufs[MAX_CHILDREN]; struct _results { float data; float ave; float var; float conf; } results[MAX_TRIALS] = {{0.0}}; struct _total { unsigned long long count; char pad[24]; } *total; int main(int argc, char *argv[]) { int c; int clone_err; int i,rc; int exit_rc = 0; int wait_status; struct sched_param param; struct rlimit myrlimit; int TOKENSZ; while ((c = getopt(argc, argv, "c:t:x:y:r:w:o:m:v")) != -1) { switch (c) { case 'c': num_children = atoi(optarg); break; case 't': num_seconds = atoi(optarg); break; case 'x': read_compute_time = atoi(optarg); break; case 'y': write_compute_time = atoi(optarg); break; case 'r': read_size = atoi(optarg) * 1024; break; case 'w': write_size = atoi(optarg) * 1024; break; case 'o': foutput = atoi(optarg); break; case 'm': mode = atoi(optarg); break; case 'v': verbose = 1; break; default: usage(); goto exit_main2; } } if (num_children % 2 != 0) num_children -= 1; num_active = num_children/2; if (verbose) { printf("c:%d, t:%d, x:%d, y:%d, r:%d, w:%d, o:%d, m:%d, v:%d\n", num_children, num_seconds, read_compute_time, write_compute_time, read_size, write_size, foutput, mode, verbose); } if ((num_seconds <= 0) || (num_active <= 0) || (num_children <= 0) || (num_children > MAX_CHILDREN) || (read_compute_time < 0) || (write_compute_time < 0) || (read_size <= 0) || (write_size <= 0) ) { usage(); goto exit_main2; } /* Increase limits on number of open files */ /* normally 1024 (cur & max), set to MAX_CHILDREN */ myrlimit.rlim_cur = myrlimit.rlim_max = MAX_CHILDREN*2 ; if (setrlimit(RLIMIT_NOFILE,&myrlimit) != 0) { exit_rc = errno ; perror("setrlimit() "); goto exit_main2; } /* allocate childrens stacks*/ child_stack = malloc(num_children*STACK_SIZE); if (child_stack == NULL) { exit_rc = errno; perror ("malloc of 'child_stack' failed "); goto exit_main2; } /* open num_children pipes */ for (i=0; i< num_children/2; i++) { if (pipe(childpipe[i]) < 0) { exit_rc = errno; perror ("pipe() "); goto exit_main3; } } /* set the pipe access value blocking/Nonblocking*/ if (mode) { for (i=0; i< num_children/2; i++) { int rval, wval; rval = fcntl(childpipe[i][0], F_GETFL); wval = fcntl(childpipe[i][1], F_GETFL); switch (mode) { default: break; case 1: rval |= O_NONBLOCK; wval |= O_NONBLOCK; break; case 2: wval |= O_NONBLOCK; break; case 3: rval |= O_NONBLOCK; break; } rval = fcntl(childpipe[i][0], F_SETFL, rval); if (rval == -1) perror("fcntl(F_SETFL,read) "); wval = fcntl(childpipe[i][1], F_SETFL, wval); if (wval == -1) perror("fcntl(F_SETFL,write) "); } } /* calibrate internal loops */ calibration(); /* start_sem is used to start all children at same time */ start_sem = semget (IPC_PRIVATE, 1, IPC_CREAT | IPC_EXCL | 0660); if (start_sem == -1) { exit_rc = errno; perror("semget(start_sem) IPC_CREATE "); goto exit_main4; } /* allocate/initialize statistic variables */ total = malloc(num_children*sizeof(struct _total)); if (total == NULL) { exit_rc = errno; perror ("malloc of 'total' failed "); goto exit_main3; } for (i = 0 ; i < num_children ; i++) total[i].count = 0; TOKENSZ = ((read_size > write_size)? read_size:write_size); for (i = 0 ; i < num_children ; i++) { child_bufs[i] = (char *)calloc(1,TOKENSZ); if (!child_bufs[i]) { perror("allocation of child_bufs failed: main"); exit_rc = errno; goto exit_main3; } } /* Launch threads */ worker = bouncer; for (i=0; i< num_children; i++) { clone_err = __clone(worker, &child_stack[(i+1)*STACK_SIZE], CLONE_FLAGS, (void*)i); if (clone_err == -1) { exit_rc = errno; perror ("clone() "); goto exit_main5; } if (verbose) printf("\t\tLaunched child %d\n",i); } /* Increase priority of parent thread */ param.sched_priority = 90; rc = sched_setscheduler(getpid(), SCHED_FIFO, ¶m); if (rc == -1) { exit_rc = errno; perror ("sched_setscheduler() "); goto exit_main5; } run_test_time(); exit_main5: /* wait until all children complete */ for (i = 0 ; i < num_children ; i++) { rc = waitpid (-1, &wait_status, __WCLONE); if (verbose) printf("Child %d returned\n",i); if (rc == -1) { exit_rc = errno; perror ("waitpid() "); } } exit_main4: rc = semctl(start_sem, 0, IPC_RMID, 0); exit_main3: /* explicitly close all pipes */ for (i=0; i< num_children/2; i++) { close(childpipe[i][0]); close(childpipe[i][1]); } free(child_stack); for (i=0; i< num_children; i++) free(child_bufs[i]); exit_main2: return (exit_rc) ; } int bouncer(void *arg) { int i=0, rc, exit_rc = 0; int myid,nextid,previd; struct sembuf mysembuf; int msgsize ; int regsetsize,setid ; char *pbuf; int comp_read_rounds ; int comp_write_rounds ; /* Active set formation : * each id put into sets of size "regsetsize" first * leftover ids distributed amongst regular sets, one per set * e.g. to divide [0..10] into num_active=3, * form regular sets : [0,1,2] [3,4,5] [6,7,8] 9,10 * 9,10 are "extras", distribute one each to regular sets * forming [0,1,2,9] [3,4,5,10] [6,7,8] * and adjust previd,nextid appropriately to form logically * circular linked lists per set such as (for first set) * 0 <--> 1 <--> 2 <--> 9 <--> 0 */ /* Defaults */ myid = (int) arg ; nextid = (myid+1)%num_children; previd = (myid+num_children-1)%num_children ; pbuf = child_bufs[myid]; /* Determine extras */ regsetsize = num_children/num_active ; if (isextra(myid)) setid = myid - (regsetsize * num_active) ; else setid = myid / regsetsize ; /* Fit extras into appropriate sets */ if (needextra(setid)) { if (myid == regsetstart(setid)) previd = idextra(setid); if (myid == regsetend(setid)) nextid = idextra(setid); if (isextra(myid)) { previd = regsetend(setid); nextid = regsetstart(setid); } } else { if (myid == regsetstart(setid)) previd = regsetend(setid); if (myid == regsetend(setid)) nextid = regsetstart(setid); } /* wait to be released */ mysembuf.sem_num = 0; mysembuf.sem_op = -1; mysembuf.sem_flg = 0; rc = semop(start_sem, &mysembuf, 1); if (rc == -1) { exit_rc = errno; perror ("child semop(start_sem) failed"); return(exit_rc); } /* Actions to be done by each thread */ while (!stop_test) { /* Blocking read */ if ((myid % 2) == 0) { static int rcnt = 0; msgsize = read(childpipe[myid/2][0],pbuf,read_size) ; rcnt++; if (verbose && (msgsize != read_size)) { printf("read error %d %d %d\n",rcnt,msgsize,read_size); } nbytes[myid] += msgsize; if (!stop_test) { comp_read_rounds = (int) (uniform(read_compute_time) * rounds_per_microsecond) ; for (i=0;i> [%ld] %ld %ld %lf\n",CLOCKS_PER_SEC,n_initial,(clock2-clock1),rounds_per_microsecond); */ } /******************** Statistical functions **********************************/ double probrange(unsigned long top) { double value = random(); value = (top*value) / ((double)RAND_MAX); return value; } float variance(int n, float sum, float sum2) { return ((((float)n * sum2)-(sum * sum))/((float)n * (float)(n - 1))); } /* * iterertions includes NUM_WARMUP + a min of 3 iterations */ int confidence(int iter) { float sum_trials = 0.0; float sum_trials2 = 0.0; float percent = (float)DEF_PERCENT / 100.0; int i,x,y; x = iter - NUM_WARMUP; y = iter - 1; /* compute average */ if (iter <= NUM_WARMUP) { return(0); } else { for (i = NUM_WARMUP ; i < iter ; i++) { sum_trials += results[i].data; sum_trials2 += results[i].data * results[i].data; } } results[y].ave = sum_trials / (float)(x); if (iter < NUM_WARMUP + 2) return(0); /* compute the variance */ results[y].var = variance(x,sum_trials,sum_trials2); if (isnan(results[y].var)) results[y].var = 0.0; if (results[y].var < 0.0) results[y].var = 0.0; /* 95% confident that ave is within percent% of "true" average ? */ results[y].conf = tau[x-2] * sqrt(results[y].var / (float)x); if (isnan(results[y].conf)) results[y].conf = 0.0; if (results[y].var < 0.0) results[y].var = 0.0; if (iter < MIN_TRIALS) return(0); if (results[y].conf <= results[y].ave * percent) return (1); return(1); if (iter == MAX_TRIALS) { valid_test = 0; printf("\n*****> failed to reach confidence1 level <*****\n"); return(1); } else { return(0); } } double uniform(double mean) { /* Generate a random uniformly in [0.5xmean,1.5xmean] */ double value = random(); value = ((value/(double)RAND_MAX) + 0.5) * mean ; return value ; }