Motr  M0
crate.c
Go to the documentation of this file.
1 /* -*- C -*- */
2 /*
3  * Copyright (c) 2017-2020 Seagate Technology LLC and/or its Affiliates
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  * For any questions about this software or licensing,
18  * please email opensource@seagate.com or cortx-questions@seagate.com.
19  *
20  */
21 
22 
29 #include <unistd.h> /* getopt(), getpid() */
30 #include <stdlib.h> /* rand(), strtoul() */
31 #include <err.h> /* errx() */
32 #include <stdarg.h> /* va_list */
33 #include <stdio.h> /* vfprintf(), stderr */
34 #include <string.h> /* strdup() */
35 #include <fcntl.h> /* O_CREAT */
36 #include <assert.h> /* assert() */
37 #include <inttypes.h> /* uint64_t */
38 #include <aio.h> /* lio_list(), LIO_* */
39 #include <sys/stat.h> /* stat() */
40 #include <sys/time.h> /* gettimeofday() */
41 #include <sys/resource.h> /* getrusage() */
42 #include <pthread.h>
43 #include <errno.h>
44 
45 #include "lib/memory.h"
46 #include "lib/types.h"
47 #include "lib/trace.h"
48 #include "module/instance.h"
49 
50 #include "motr/m0crate/logger.h"
51 #include "motr/m0crate/workload.h"
52 #include "motr/m0crate/parser.h"
55 
56 extern struct crate_conf *conf;
57 
58 const char cr_default_fpattern[] = "./dir%i/f%i.%i";
59 const bcnt_t cr_default_avg = 64 * 1024; /* 64K average file size */
60 const bcnt_t cr_default_max = 1024 * 1024; /* 1M maximal file size */
61 const int cr_default_ops = 1000;
63 const int cr_default_nr_dir = 1;
64 const int cr_default_nr_thread = 1;
65 const short cr_default_read_frac = 50;
66 const bcnt_t cr_default_blocksize = 16 * 1024;
68 const bcnt_t cr_default_key_size = sizeof(struct m0_fid);
69 const bcnt_t cr_default_max_ksize = 1 << 10; /* default upper limit for key_size parameter. i.e 1KB */
70 const bcnt_t cr_default_max_vsize = 1 << 20; /* default upper limit for value_size parameter. i.e 1MB */
71 
72 
73 const char *cr_workload_name[CWT_NR] = {
74  [CWT_HPCS] = "hpcs",
75  [CWT_CSUM] = "csum",
76  [CWT_IO] = "io",
77  [CWT_INDEX] = "index",
78 };
79 
80 static int hpcs_init (struct workload *w);
81 static int hpcs_fini (struct workload *w);
82 static void hpcs_run (struct workload *w, struct workload_task *task);
83 static void hpcs_op_get(struct workload *w, struct workload_op *op);
84 static void hpcs_op_run(struct workload *w, struct workload_task *task,
85  const struct workload_op *op);
86 static int hpcs_parse (struct workload *w, char ch, const char *optarg);
87 static void hpcs_check (struct workload *w);
88 
89 static int csum_init (struct workload *w);
90 static int csum_fini (struct workload *w);
91 static void csum_run (struct workload *w, struct workload_task *task);
92 static void csum_op_get(struct workload *w, struct workload_op *op);
93 static void csum_op_run(struct workload *w, struct workload_task *task,
94  const struct workload_op *op);
95 static int csum_parse (struct workload *w, char ch, const char *optarg);
96 static void csum_check (struct workload *w);
97 
98 static const struct workload_type_ops w_ops[CWT_NR] = {
99  [CWT_HPCS] = {
100  .wto_init = hpcs_init,
101  .wto_fini = hpcs_fini,
102  .wto_run = hpcs_run,
103  .wto_op_get = hpcs_op_get,
104  .wto_op_run = hpcs_op_run,
105  .wto_parse = hpcs_parse,
106  .wto_check = hpcs_check
107  },
108  [CWT_CSUM] = {
109  .wto_init = csum_init,
110  .wto_fini = csum_fini,
111  .wto_run = csum_run,
112  .wto_op_get = csum_op_get,
113  .wto_op_run = csum_op_run,
114  .wto_parse = csum_parse,
115  .wto_check = csum_check
116  },
117 
118  [CWT_IO] = {
119  .wto_init = init,
120  .wto_fini = fini,
121  .wto_run = run,
122  .wto_op_get = NULL,
123  .wto_op_run = m0_op_run,
124  .wto_parse = NULL,
125  .wto_check = check
126  },
127 
128  [CWT_INDEX] = {
129  .wto_init = init,
130  .wto_fini = fini,
131  .wto_run = run_index,
132  .wto_op_get = NULL,
133  .wto_op_run = m0_op_run_index,
134  .wto_parse = NULL,
135  .wto_check = check
136  },
137 };
138 
139 static void fletcher_2_native(void *buf, uint64_t size);
140 static void fletcher_4_native(void *buf, uint64_t size);
141 static void csum_touch(void *buf, uint64_t size);
142 static void csum_none(void *buf, uint64_t size);
143 
144 static const struct csum_alg {
145  const char *ca_label;
146  void (*ca_func)(void *buf, uint64_t size);
147 } csums[] = {
148  {
149  .ca_label = "fletcher2",
150  .ca_func = fletcher_2_native
151  },
152  {
153  .ca_label = "fletcher4",
154  .ca_func = fletcher_4_native
155  },
156  {
157  .ca_label = "touch",
158  .ca_func = csum_touch
159  },
160  {
161  .ca_label = "sha256",
162  .ca_func = NULL
163  },
164  {
165  .ca_label = "none",
166  .ca_func = csum_none
167  }
168 };
169 
170 static const struct workload_type_ops *wop(struct workload *w)
171 {
172  assert(0 <= w->cw_type && w->cw_type < ARRAY_SIZE(w_ops));
173  return &w_ops[w->cw_type];
174 }
175 
176 /* get a pseudo-random number in the interval [a, b] */
177 static unsigned long long getrnd(unsigned long long a, unsigned long long b)
178 {
179  double r;
180  unsigned long long scaled;
181 
182  assert(a <= b);
183 
184  r = rand();
185  scaled = r*(b - a + 1.0)/(RAND_MAX+1.0) + a;
186  assert(a <= scaled && scaled <= b);
187  cr_log(CLL_DEBUG, "random [%llu, %llu]: %llu\n", a, b, scaled);
188  return scaled;
189 }
190 
191 static long long min(long long a, long long b)
192 {
193  return a < b ? a : b;
194 }
195 
196 static long long max(long long a, long long b)
197 {
198  return a < b ? b : a;
199 }
200 
204 void timeval_norm(struct timeval *t)
205 {
206  while (t->tv_usec < 0) {
207  t->tv_sec--;
208  t->tv_usec += 1000000;
209  }
210  while (t->tv_usec >= 1000000) {
211  t->tv_sec++;
212  t->tv_usec -= 1000000;
213  }
214 }
215 
216 int workload_init(struct workload *w, enum cr_workload_type wtype)
217 {
218  pthread_mutex_init(&w->cw_lock, NULL);
219  w->cw_type = wtype;
220  w->cw_name = cr_workload_name[wtype];
221  w->cw_avg = cr_default_avg;
222  w->cw_max = cr_default_max;
223  w->cw_ops = cr_default_ops;
226  w->cw_fpattern = strdup(cr_default_fpattern);
229 
230  /* set default values before yaml parsing */
231  if (wtype == CWT_INDEX) {
232  struct m0_workload_index *wit = w->u.cw_index;
234  /* default value_size is -1 i.e random */
235  wit->value_size = -1;
238  }
239 
240  return wop(w)->wto_init(w);
241 }
242 
243 static void workload_fini(struct workload *w)
244 {
245  wop(w)->wto_fini(w);
246  free(w->cw_buf);
247  free(w->cw_fpattern);
248  pthread_mutex_destroy(&w->cw_lock);
249 }
250 
251 /* construct next operation for a given workload. Retuns +ve if workload has
252  been completed */
253 static int workload_op_get(struct workload *w, struct workload_op *op)
254 {
255  int result;
256  int opno;
257  int percent;
258 
259  pthread_mutex_lock(&w->cw_lock);
260  if (w->cw_done != w->cw_ops) {
261  opno = w->cw_done++;
262 
263  if ((w->cw_done % 100000) == 0) {
264  struct timeval orate;
265  struct timeval delay;
266 
267  gettimeofday(&orate, NULL);
268  memset(&delay, 0, sizeof delay);
269  timeval_diff(&w->cw_rate, &orate, &delay);
270  cr_log(CLL_INFO, "rate: %8i %8.8f\n", w->cw_done,
271  rate(100000, &delay, 1));
272  w->cw_rate = orate;
273  }
274 
275  wop(w)->wto_op_get(w, op); /* released the mutex */
276  result = 0;
277  /* indicate the progress */
278  if (w->cw_progress) {
279  percent = opno * 100 / w->cw_ops;
280  if (percent * w->cw_ops == opno * 100) {
281  if (percent / 10 * 10 == percent)
282  printf("%02i%%", percent);
283  else
284  printf(".");
285  fflush(stdout);
286  }
287  }
288  } else {
289  pthread_mutex_unlock(&w->cw_lock);
290  result = +1; /* nothing to do */
291  }
292  return result;
293 }
294 
295 static bcnt_t w_size(const struct workload *w)
296 {
297  return min(getrnd(1, w->cw_avg * 2), w->cw_max);
298 }
299 
300 static const struct {
301  int opcode;
302  const char *opname;
303 } optable[COT_NR] = {
304  [COT_READ] = {
305  .opcode = LIO_READ,
306  .opname = "read"
307  },
308  [COT_WRITE] = {
309  .opcode = LIO_WRITE,
310  .opname = "write"
311  },
312 };
313 
314 static enum csum_op_type rw_get(const struct workload *w)
315 {
316  return getrnd(0, 99) < w->cw_read_frac ? COT_READ : COT_WRITE;
317 }
318 
319 static void *worker_thread(void *datum)
320 {
321  struct workload_task *wt = datum;
322  struct workload *w = wt->wt_load;
323  struct workload_op op;
324 
325  op.wo_task = wt;
326 
327  /*
328  * Motr can launch multiple operations in a single go.
329  * Single operation in a loop won't work for Motr.
330  */
331  if (w->cw_type == CWT_IO || w->cw_type == CWT_INDEX)
332  wop(w)->wto_op_run(w, wt, NULL);
333  else {
334  while (workload_op_get(w, &op) == 0)
335  wop(w)->wto_op_run(w, wt, &op);
336  }
337  return NULL;
338 }
339 
340 void workload_start(struct workload *w, struct workload_task *task)
341 {
342  int i;
343  int result;
344 
345  if (w->cw_nr_thread == 1) {
346  task[0].wt_load = w;
347  task[0].wt_thread = 0;
348  worker_thread(&task[0]);
349  } else {
350  for (i = 0; i < w->cw_nr_thread; ++i) {
351  task[i].wt_load = w;
352  task[i].wt_thread = i;
353  result = pthread_create(&task[i].wt_pid,
354  NULL, worker_thread, &task[i]);
355  if (result != 0)
356  err(1, "cannot create worker thread");
357  cr_log(CLL_DEBUG, "created worker thread %i\n", i);
358  }
359  cr_log(CLL_TRACE, "threads created\n");
360  }
361 }
362 
363 void workload_join(struct workload *w, struct workload_task *task)
364 {
365  int result;
366  int i;
367 
368  if (w->cw_nr_thread > 1) {
369  for (i = 0; i < w->cw_nr_thread; ++i) {
370  result = pthread_join(task[i].wt_pid, NULL);
371  if (result != 0)
372  warn("cannot join worker thread %i", i);
373  cr_log(CLL_DEBUG, "worker thread done %i\n", i);
374  }
375  }
376 }
377 
378 static void workload_run(struct workload *w)
379 {
380  struct workload_task *tasks; /* joys of C99 */
381  struct rusage u0;
382  struct rusage u1;
383  struct timeval wall_start;
384  struct timeval wall_end;
385 
387  if (tasks == NULL)
388  return ;
389  if (w->cw_block != 0 && w->cw_directio)
390  w->cw_block += getpagesize();
391 
392  cr_log(CLL_INFO, "workload type %s/%i\n",
393  w->cw_name, w->cw_type);
394  cr_log(CLL_INFO, "random seed: %u\n", w->cw_rstate);
395  cr_log(CLL_INFO, "number of threads: %u\n", w->cw_nr_thread);
396  /* Following params not applicable to IO and INDEX tests */
397  if (CWT_IO != w->cw_type && CWT_INDEX != w->cw_type) {
398  cr_log(CLL_INFO, "average size: %llu\n", w->cw_avg);
399  cr_log(CLL_INFO, "maximal size: %llu\n", w->cw_max);
400  /*
401  * XXX: cw_block could be reused for Motr instead of cwi_bs,
402  * but we can't always access `struct workload' there.
403  * That's pity and should be fixed some day probably.
404  */
405  cr_log(CLL_INFO, "block size: %llu\n", w->cw_block);
406  cr_log(CLL_INFO, "number of operations: %u\n", w->cw_ops);
407  cr_log(CLL_INFO, "oflags: %o\n", w->cw_oflag);
408  cr_log(CLL_INFO, "bound mode: %s\n",
409  w->cw_bound ? "on" : "off");
410  }
411 
412  if (w->cw_rstate == 0)
413  w->cw_rstate = time(0) + getpid();
414  srand(w->cw_rstate);
415 
416  getrusage(RUSAGE_SELF, &u0);
417  gettimeofday(&w->cw_rate, NULL);
418  gettimeofday(&wall_start, NULL);
419  wop(w)->wto_run(w, tasks);
420  m0_free(tasks);
421  gettimeofday(&wall_end, NULL);
422  getrusage(RUSAGE_SELF, &u1);
423  timeval_sub(&wall_end, &wall_start);
424  if (w->cw_usage) {
425  timeval_sub(&u1.ru_utime, &u0.ru_utime);
426  timeval_sub(&u1.ru_stime, &u0.ru_stime);
427  u1.ru_maxrss -= u0.ru_maxrss; /* integral max resident set
428  size */
429  u1.ru_ixrss -= u0.ru_ixrss; /* integral shared text memory
430  size */
431  u1.ru_idrss -= u0.ru_idrss; /* integral unshared data
432  size */
433  u1.ru_isrss -= u0.ru_isrss; /* integral unshared stack
434  size */
435  u1.ru_minflt -= u0.ru_minflt; /* page reclaims */
436  u1.ru_majflt -= u0.ru_majflt; /* page faults */
437  u1.ru_nswap -= u0.ru_nswap; /* swaps */
438  u1.ru_inblock -= u0.ru_inblock; /* block input operations */
439  u1.ru_oublock -= u0.ru_oublock; /* block output operations */
440  u1.ru_msgsnd -= u0.ru_msgsnd; /* messages sent */
441  u1.ru_msgrcv -= u0.ru_msgrcv; /* messages received */
442  u1.ru_nsignals -= u0.ru_nsignals; /* signals received */
443  u1.ru_nvcsw -= u0.ru_nvcsw; /* voluntary context
444  switches */
445  u1.ru_nivcsw -= u0.ru_nivcsw; /* involuntary context
446  switches */
447  printf("time: (w: %f u: %f s: %f)\n"
448  "\tmaxrss: %6li ixrss: %6li idrss: %6li isrss: %6li\n"
449  "\tminflt: %6li majflt: %6li nswap: %6li\n"
450  "\tinblock: %6li outblock: %6li\n"
451  "\tmsgsnd: %6li msgrcv: %6li\n"
452  "\tnsignals: %6li nvcsw: %6li nivcsw: %6li\n",
453  tsec(&wall_end), tsec(&u1.ru_utime), tsec(&u1.ru_stime),
454  u1.ru_maxrss,
455  u1.ru_ixrss,
456  u1.ru_idrss,
457  u1.ru_isrss,
458  u1.ru_minflt,
459  u1.ru_majflt,
460  u1.ru_nswap,
461  u1.ru_inblock,
462  u1.ru_oublock,
463  u1.ru_msgsnd,
464  u1.ru_msgrcv,
465  u1.ru_nsignals,
466  u1.ru_nvcsw,
467  u1.ru_nivcsw);
468  }
469 }
470 
471 
472 /*
473  * HPCS workload for Creation Rate (crate) benchmark.
474  */
475 static inline struct cr_hpcs *w2hpcs(struct workload *w)
476 {
477  return &w->u.cw_hpcs;
478 }
479 
480 static int hpcs_init(struct workload *w)
481 {
482  return 0;
483 }
484 
485 static int hpcs_fini(struct workload *w)
486 {
487  return 0;
488 }
489 
490 static void hpcs_op_get(struct workload *w, struct workload_op *op)
491 {
492  unsigned dir;
493  int nob;
494  int opno;
495  unsigned long long fid;
496 
497  opno = w->cw_done;
498  /*
499  * All calls to PRNG have to be protected by the workload lock to
500  * maintain repeatability.
501  */
502  /* select a directory */
503  dir = getrnd(0, w->cw_nr_dir - 1);
504  /* select file identifier */
505  fid = getrnd(0, w->cw_ops - 1);
506  /* choose file size */
507  op->wo_size = w_size(w);
508  pthread_mutex_unlock(&w->cw_lock);
509 
510  /*
511  * If a task is bound to a directory, force creation in this
512  * directory. Note, that PRNG has to be called though its result is
513  * discarded.
514  */
515  if (op->wo_task->u.wt_hpcs.th_bind >= 0)
516  dir = op->wo_task->u.wt_hpcs.th_bind;
517 
518  op->u.wo_hpcs.oh_dirno = dir;
519  op->u.wo_hpcs.oh_opno = opno;
520  /* form a file name */
521  nob = snprintf(op->u.wo_hpcs.oh_fname, sizeof op->u.wo_hpcs.oh_fname,
522  w->cw_fpattern, dir, (int)fid, opno);
523  if (nob >= sizeof op->u.wo_hpcs.oh_fname)
524  errx(1, "buffer [%zi] is too small for %s (%i,%i,%i)",
525  sizeof op->u.wo_hpcs.oh_fname,
526  w->cw_fpattern, dir, (int)fid, opno);
527  cr_log(CLL_TRACE, "op %i: \"%s\" %llu\n",
528  opno, op->u.wo_hpcs.oh_fname, op->wo_size);
529 }
530 
531 /* execute one operation from an HPCS workload */
532 static void hpcs_op_run(struct workload *w, struct workload_task *task,
533  const struct workload_op *op)
534 {
535  int fd;
536  int result;
537  struct stat st;
538  bcnt_t nob;
539  bcnt_t towrite;
540  int psize;
541  void *buf;
542 
543  struct timeval t0;
544  struct timeval t1;
545 
546  struct workload_task *wt = op->wo_task;
547  const char *fname;
548 
549  fname = op->u.wo_hpcs.oh_fname;
550  cr_log(CLL_TRACE, "thread %i write to \"%s\"\n", wt->wt_thread, fname);
551  gettimeofday(&t0, NULL);
552  fd = open(fname, O_CREAT|O_WRONLY|w->cw_oflag, S_IWUSR);
553  if (fd == -1)
554  err(2, "cannot create %s", fname);
555 
556  gettimeofday(&t1, NULL);
557  timeval_diff(&t0, &t1, &wt->u.wt_hpcs.th_open);
558 
559  wt->wt_ops++;
560  psize = getpagesize();
561  if (w->cw_block == 0 || w->cw_buf == NULL) {
562  result = fstat(fd, &st);
563  if (result == -1)
564  err(2, "stat(\"%s\") failed", fname);
565  pthread_mutex_lock(&w->cw_lock);
566  if (w->cw_block == 0)
567  w->cw_block = st.st_blksize;
568  if (w->cw_buf == NULL) {
569  size_t toalloc;
570 
571  toalloc = w->cw_block;
572  if (w->cw_directio)
573  toalloc += psize;
574 
575  w->cw_buf = malloc(toalloc);
576  if (w->cw_buf == NULL)
577  errx(3, "cannot allocate buffer (%llu)",
578  w->cw_block);
579  }
580  pthread_mutex_unlock(&w->cw_lock);
581  }
582  nob = 0;
583 
584  buf = w->cw_buf;
585  if (w->cw_directio)
586  buf -= ((unsigned long)buf) % psize;
587 
588  while (nob < op->wo_size) {
589  towrite = min(op->wo_size - nob, w->cw_block);
590  if (w->cw_directio)
591  towrite = max(towrite / psize * psize, psize);
592 
593  result = write(fd, buf, towrite);
594  if (result == -1)
595  err(2, "write on \"%s\" failed (%p, %llu)", fname,
596  buf, towrite);
597  nob += result;
598  cr_log(result != towrite ? CLL_WARN : CLL_TRACE,
599  "thread %i wrote %llu of %llu on \"%s\"\n",
600  wt->wt_thread, nob, op->wo_size, fname);
601  }
602  gettimeofday(&t0, NULL);
603  result = close(fd);
604  if (result == -1)
605  warn("close");
606  wt->wt_total += nob;
607  timeval_diff(&t1, &t0, &wt->u.wt_hpcs.th_write);
608 
609  cr_log(CLL_TRACE, "thread %i done writing %llu to \"%s\"\n",
610  wt->wt_thread, op->wo_size, fname);
611 }
612 
613 static void hpcs_run(struct workload *w, struct workload_task *task)
614 {
615  int i;
616 
617  unsigned ops;
618  unsigned dir;
619  bcnt_t nob;
620  struct timeval t_open;
621  struct timeval t_write;
622  struct timeval wall_start;
623  struct timeval wall_end;
624 
625  ops = 0;
626  nob = 0;
627  memset(&t_open, 0, sizeof t_open);
628  memset(&t_write, 0, sizeof t_write);
629 
630  cr_log(CLL_INFO, "file name pattern: \"%s\"\n", w->cw_fpattern);
631  cr_log(CLL_INFO, "number of directories: %u\n", w->cw_nr_dir);
632 
633  if (w->cw_nr_dir == 0)
634  errx(1, "no directories");
635 
636  gettimeofday(&wall_start, NULL);
637  for (dir = 0, i = 0; i < w->cw_nr_thread; ++i) {
638  if (w->cw_bound) {
639  task[i].u.wt_hpcs.th_bind = dir;
640  dir = (dir + 1) % w->cw_nr_dir;
641  } else
642  task[i].u.wt_hpcs.th_bind = -1;
643  }
644  workload_start(w, task);
645  workload_join(w, task);
646  gettimeofday(&wall_end, NULL);
647  cr_log(CLL_TRACE, "threads done\n");
648  for (i = 0; i < w->cw_nr_thread; ++i) {
649  ops += task[i].wt_ops;
650  nob += task[i].wt_total;
651  timeval_add(&t_open, &task[i].u.wt_hpcs.th_open);
652  timeval_add(&t_write, &task[i].u.wt_hpcs.th_write);
653  }
654  if (w->cw_progress)
655  printf("\n");
656 
657  timeval_sub(&wall_end, &wall_start);
658 
659  if (w->cw_header) {
660  printf(" time ops c-rate "
661  "nob M-rate\n");
662  printf(" wall open write wall proc"
663  " wall proc\n");
664  }
665 
666  printf("%7.0f %7.0f %7.0f %6u %6.0f %6.0f %10llu %6.0f %6.0f\n",
667  tsec(&wall_end) * 100., tsec(&t_open) * 100.,
668  tsec(&t_write) * 100., ops, rate(ops, &wall_end, 1),
669  rate(ops, &t_open, 1), nob,
670  rate(nob, &wall_end, 1000000), rate(nob, &t_write, 1000000));
671 }
672 
673 static int hpcs_parse (struct workload *w, char ch, const char *optarg)
674 {
675  switch (ch) {
676  case 'f':
677  if (w->cw_fpattern != NULL)
678  free(w->cw_fpattern);
679  w->cw_fpattern = strdup(optarg);
680  return +1;
681  case 'd':
682  w->cw_nr_dir = getnum(optarg, "nr_dir");
683  return +1;
684  }
685  return 0;
686 }
687 
688 static void hpcs_check (struct workload *w)
689 {
690  if (w->cw_fpattern == NULL)
691  err(1, "cannot duplicate pattern");
692  if (w->cw_bound)
693  w->cw_nr_thread *= w->cw_nr_dir;
694 }
695 
696 /*
697  * CSUM workload for Creation Rate (crate) benchmark.
698  */
699 
700 static inline struct cr_csum *w2csum(struct workload *w)
701 {
702  return &w->u.cw_csum;
703 }
704 
705 static int csum_init(struct workload *w)
706 {
707  struct cr_csum *s;
708  int i;
709 
710  s = w2csum(w);
711  for (i = 0; i < ARRAY_SIZE(s->c_dev); ++i) {
712  s->c_dev[i].d_fd = -1;
713  s->c_dev[i].d_csum_fd = -1;
714  }
715  s->c_blocksize = cr_default_blocksize;
716  s->c_csum_size = cr_default_csum_size;
717  return 0;
718 }
719 
720 static int csum_fini(struct workload *w)
721 {
722  struct cr_csum *s;
723  int i;
724  struct csum_dev *dev;
725 
726  s = w2csum(w);
727  for (i = 0, dev = s->c_dev; i < ARRAY_SIZE(s->c_dev); ++i, ++dev) {
728  if (dev->d_fd >= 0)
729  close(dev->d_fd);
730  if (dev->d_csum_fd >= 0)
731  close(dev->d_csum_fd);
732  free(dev->d_name);
733  free(dev->d_csum_name);
734  }
735  return 0;
736 }
737 
738 static void csum_op_get(struct workload *w, struct workload_op *op)
739 {
740  struct cr_csum *s;
741 
742  int opno;
743  bcnt_t count;
744  bcnt_t offset;
745  enum csum_op_type otype;
746 
747  s = w2csum(w);
748 
749  opno = w->cw_done;
750  otype = rw_get(w);
751  count = min(w->cw_block ?: w_size(w), s->c_dev_size);
752  offset = getrnd(0, max(s->c_dev_size - count - 1, 0));
753 
754  pthread_mutex_unlock(&w->cw_lock);
755 
756  op->u.wo_csum.oc_type = otype;
757  op->u.wo_csum.oc_offset = offset;
758  op->wo_size = count;
759 
760  cr_log(CLL_TRACE, "op %i: %s [%llu, %llu]\n",
761  opno, optable[otype].opname, offset, count);
762 }
763 
764 static void csum_none(void *buf, uint64_t size)
765 {
766 }
767 
768 static void csum_touch(void *buf, uint64_t size)
769 {
770  uint32_t *ip = buf;
771  uint32_t *ipend = ip + (size / sizeof (uint32_t));
772  volatile uint32_t word;
773 
774  for (; ip < ipend; ip++)
775  word = *ip;
776  (void)word; /* suppress "set but not used" warning. */
777 }
778 
779 /*
780  * Hash functions copied from zfs.
781  */
782 
783 static void fletcher_2_native(void *buf, uint64_t size)
784 {
785  const uint64_t *ip = buf;
786  const uint64_t *ipend = ip + (size / sizeof (uint64_t));
787  uint64_t a0, b0, a1, b1;
788 
789  for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
790  a0 += ip[0];
791  a1 += ip[1];
792  b0 += a0;
793  b1 += a1;
794  }
795 }
796 
797 static void fletcher_4_native(void *buf, uint64_t size)
798 {
799  const uint32_t *ip = buf;
800  const uint32_t *ipend = ip + (size / sizeof (uint32_t));
801  uint64_t a, b, c, d;
802 
803  for (a = b = c = d = 0; ip < ipend; ip++) {
804  a += ip[0];
805  b += a;
806  c += b;
807  d += c;
808  }
809 }
810 
811 /* do some bit shuffling */
812 static inline uint32_t csum_shuffle32(uint32_t in)
813 {
814  return
815  ((in & 0xff00) >> 8) |
816  ((in & 0xff) << 8) |
817  ((in & 0xff000000) >> 8) |
818  ((in & 0xff0000) << 8);
819 }
820 
821 static inline uint64_t csum_shuffle64(uint64_t in)
822 {
823  return
824  ((in & 0xff00ULL) >> 8) |
825  ((in & 0xffULL) << 8) |
826  ((in & 0xff000000ULL) >> 8) |
827  ((in & 0xff0000ULL) << 8) |
828  ((in & 0xff0000000000ULL) >> 8) |
829  ((in & 0xff00000000ULL) << 8) |
830  ((in & 0xff00000000000000ULL) >> 8) |
831  ((in & 0xff000000000000ULL) << 8);
832 }
833 
834 static void csum_compute(struct workload *w, void *buf, size_t count)
835 {
836  struct cr_csum *s;
837 
838  uint32_t *ip32 = buf;
839  uint32_t *ipend32 = ip32 + (count / sizeof (uint32_t));
840  volatile uint32_t word32;
841 
842  uint64_t *ip64 = buf;
843  uint64_t *ipend64 = ip64 + (count / sizeof (uint64_t));
844  volatile uint64_t word64;
845 
846  /*
847  * ST_32 and ST_64 modes don't write result of the bit swap operation.
848  * They are intended to be used for performance/overhead checks.
849  * Mark word32/word64 with volatile keyword to switch off compiler's
850  * optimisation.
851  *
852  * Same implies for csum_touch() implementation.
853  */
854 
855  s = w2csum(w);
856  assert(0 <= s->c_csum && s->c_csum < ARRAY_SIZE(csums));
857 
858  switch (s->c_swab) {
859  case ST_NONE:
860  break;
861  case ST_32:
862  for (; ip32 < ipend32; ip32++)
863  word32 = csum_shuffle32(*ip32);
864  break;
865  case ST_32W:
866  for (; ip32 < ipend32; ip32++)
867  *ip32 = csum_shuffle32(*ip32);
868  break;
869  case ST_64:
870  for (; ip64 < ipend64; ip64++)
871  word64 = csum_shuffle64(*ip64);
872  break;
873  case ST_64W:
874  for (; ip64 < ipend64; ip64++)
875  *ip64 = csum_shuffle64(*ip64);
876  break;
877  default:
878  assert(0);
879  }
880  csums[s->c_csum].ca_func(buf, count);
881  (void)word64; /* suppress "set but not used" warning. */
882  (void)word32; /* suppress "set but not used" warning. */
883 }
884 
885 static void csum_csum(struct workload *w, struct cr_csum *s,
886  char *buf, bcnt_t count)
887 {
888  int i;
889  int j;
890  unsigned bsize;
891 
892  bsize = s->c_blocksize;
893  for (i = 0; i < s->c_nr_devs; ++i) {
894  for (j = 0; j < count / bsize; ++j)
895  csum_compute(w, buf + i*count + j*bsize, bsize);
896  }
897 }
898 
899 static void csum_io(struct workload *w, struct cr_csum *s,
900  const struct workload_op *op,
901  bcnt_t offset, bcnt_t count, int opcode, const char *opname)
902 {
903  bcnt_t csum_offset = offset / s->c_blocksize * s->c_csum_size;
904  bcnt_t csum_blocks = count / s->c_blocksize;
905  bcnt_t csum_count = csum_blocks * s->c_csum_size;
906  int nr_devs = s->c_nr_devs;
907  struct aiocb *cb;
908  struct aiocb **rag;
909  char *csum_buf;
910  struct task_csum *tc;
911  struct csum_dev *dev;
912  int i;
913  int rc;
914  ssize_t nob;
915  int ops;
916 
917  tc = &op->wo_task->u.wt_csum;
918  cb = tc->tc_cb;
919  rag = tc->tc_rag;
920  csum_buf = tc->tc_csum_buf;
921 
922  memset(cb, 0, sizeof *cb);
923 
924  if (opcode == LIO_WRITE)
925  csum_csum(w, s, tc->tc_buf, count);
926 
927  for (ops = 0, i = 0, dev = s->c_dev; i < nr_devs; ++i, ++dev) {
928  rag[i] = &cb[i];
929  if (dev->d_fd >= 0) {
930  cb[i].aio_fildes = dev->d_fd;
931  cb[i].aio_lio_opcode = opcode;
932  /* read into separate buffers so that checksum function
933  dutifully incurs cache misses on each. */
934  cb[i].aio_buf = tc->tc_buf + i*count;
935  cb[i].aio_nbytes = count;
936  cb[i].aio_offset = offset;
937  ops++;
938  } else
939  cb[i].aio_lio_opcode = LIO_NOP;
940 
941  rag[i + nr_devs] = &cb[i + nr_devs];
942  if (dev->d_csum_fd >= 0) {
943  cb[i + nr_devs].aio_fildes = dev->d_csum_fd;
944  cb[i + nr_devs].aio_lio_opcode = opcode;
945  cb[i + nr_devs].aio_buf = &csum_buf[csum_count * i];
946  cb[i + nr_devs].aio_nbytes = csum_count;
947  cb[i + nr_devs].aio_offset = csum_offset;
948  ops++;
949  } else
950  cb[i + nr_devs].aio_lio_opcode = LIO_NOP;
951  }
952 
953  if (ops > 0) {
954  rc = lio_listio(LIO_WAIT, rag, nr_devs * 2, NULL);
955  if (rc != 0) {
956  for (i = 0; i < nr_devs * 2; ++i) {
957  const char *name;
958  const char *pref;
959 
960  if (i < nr_devs) {
961  name = s->c_dev[i].d_name;
962  pref = "";
963  } else {
964  name = s->c_dev[i-nr_devs].d_csum_name;
965  pref = "checksum ";
966  }
967  nob = aio_return(&cb[i]);
968  if (nob < count)
969  fprintf(stderr,
970  "short async %s %s%s: "
971  "%zi < %llu\n",
972  opname, pref, name, nob, count);
973  else if (nob < 0)
974  warn("async %s%s failed on %s with %i",
975  pref, opname, name,
976  aio_error(&cb[i]));
977  }
978  err(1, "async %s failed", opname);
979  }
980  }
981  if (opcode == LIO_READ)
982  csum_csum(w, s, tc->tc_buf, count);
983 }
984 
985 /* execute one operation from a CSUM workload */
986 static void csum_op_run(struct workload *w, struct workload_task *task,
987  const struct workload_op *op)
988 {
989  csum_io(w, w2csum(w), op, op->u.wo_csum.oc_offset, op->wo_size,
990  optable[op->u.wo_csum.oc_type].opcode,
991  optable[op->u.wo_csum.oc_type].opname);
992  op->wo_task->wt_total += op->wo_size;
993 }
994 
995 static int csum_dev_open(struct workload *w, const char *dev_name)
996 {
997  int fd;
998 
999  if (strcmp(dev_name, "/dev/zero")) {
1000  fd = open(dev_name, O_RDWR|O_CREAT|w->cw_oflag,
1001  S_IWUSR|S_IRUSR);
1002  if (fd == -1)
1003  err(2, "open(\"%s\")", dev_name);
1004  } else
1005  fd = -1;
1006  return fd;
1007 }
1008 
1009 static void csum_run(struct workload *w, struct workload_task *task)
1010 {
1011  struct cr_csum *s;
1012  struct timeval wall_start;
1013  struct timeval wall_end;
1014  struct csum_dev *dev;
1015  int i;
1016  int nr_devs;
1017  bcnt_t bufsize0;
1018  bcnt_t bufsize;
1019  bcnt_t nob;
1020 
1021  s = w2csum(w);
1022  for (i = 0; i < ARRAY_SIZE(s->c_dev); ++i) {
1023  if (!s->c_dev[i].d_name != !s->c_dev[i].d_csum_name)
1024  errx(1, "wrong checksum configuration");
1025  if (!s->c_dev[i].d_name && !s->c_dev[i].d_csum_name)
1026  break;
1027  }
1028  nr_devs = s->c_nr_devs = i;
1029 
1030  if (nr_devs == 0)
1031  errx(1, "No devices specified.");
1032 
1033  for (i = 0, dev = s->c_dev; i < nr_devs; ++i, ++dev) {
1034  dev->d_fd = csum_dev_open(w, dev->d_name);
1035  dev->d_csum_fd = csum_dev_open(w, dev->d_csum_name);
1036  }
1037 
1038  if (s->c_dev_size == 0) { /* oh well... "portable" */
1039  if (s->c_dev[0].d_fd >= 0) {
1040  off_t seek;
1041 
1042  seek = lseek(s->c_dev[0].d_fd, 0, SEEK_END);
1043  if (seek == (off_t)-1)
1044  err(2, "lseek(\"%s\", 0, SEEK_END)",
1045  s->c_dev[0].d_name);
1046  s->c_dev_size = seek;
1047  } else
1048  s->c_dev_size = ~0ULL >> 1; /* signed */
1049  }
1050 
1051  bufsize = bufsize0 = max(w->cw_block, w->cw_max) * nr_devs;
1052  if (w->cw_bound)
1053  bufsize *= w->cw_nr_thread;
1054 
1055  w->cw_buf = malloc(bufsize);
1056  if (w->cw_buf == NULL)
1057  err(2, "malloc");
1058  memset(w->cw_buf, '!', bufsize);
1059 
1060  for (i = 0; i < w->cw_nr_thread; ++i) {
1061  struct task_csum *tc = &task[i].u.wt_csum;
1062  tc->tc_cb = malloc(2 * nr_devs * sizeof(struct aiocb));
1063  tc->tc_rag = malloc(2 * nr_devs * sizeof(struct aiocb *));
1064  tc->tc_csum_buf = malloc(w->cw_max / s->c_blocksize *
1065  s->c_csum_size * nr_devs);
1066  if (tc->tc_cb == NULL || tc->tc_rag == NULL ||
1067  tc->tc_csum_buf == NULL)
1068  err(2, "malloc");
1069  if (w->cw_bound)
1070  tc->tc_buf = w->cw_buf + bufsize0 * i;
1071  else
1072  tc->tc_buf = w->cw_buf;
1073  }
1074 
1075  cr_log(CLL_INFO, "devices: %i\n", s->c_nr_devs);
1076  for (i = 0, dev = s->c_dev; i < nr_devs; ++i, ++dev) {
1077  cr_log(CLL_INFO, " main device: \"%s\"/%i\n",
1078  dev->d_name, dev->d_fd);
1079  cr_log(CLL_INFO, " checksum device: \"%s\"/%i\n",
1080  dev->d_csum_name, dev->d_csum_fd);
1081  }
1082  cr_log(CLL_INFO, "device size: %llu\n", s->c_dev_size);
1083  cr_log(CLL_INFO, "csum block size: %u\n", s->c_blocksize);
1084  cr_log(CLL_INFO, "async mode: %s\n",
1085  s->c_async ? "on" : "off");
1086  cr_log(CLL_INFO, "read fraction: %i%%\n", w->cw_read_frac);
1087  cr_log(CLL_INFO, "checksum algorithm: %s\n",
1088  csums[s->c_csum].ca_label);
1089 
1090  gettimeofday(&wall_start, NULL);
1091  workload_start(w, task);
1092  workload_join(w, task);
1093  gettimeofday(&wall_end, NULL);
1094  cr_log(CLL_TRACE, "threads done\n");
1095  timeval_sub(&wall_end, &wall_start);
1096  nob = 0;
1097  for (i = 0; i < w->cw_nr_thread; ++i) {
1098  nob += task[i].wt_total;
1099  free(task[i].u.wt_csum.tc_cb);
1100  free(task[i].u.wt_csum.tc_rag);
1101  free(task[i].u.wt_csum.tc_csum_buf);
1102  }
1103  printf("%7.0f %10llu %6.0f\n",
1104  tsec(&wall_end) * 100., nob, rate(nob, &wall_end, 1000000));
1105 }
1106 
1107 static int csum_parse (struct workload *w, char ch, const char *optarg)
1108 {
1109  int i;
1110  struct cr_csum *csw;
1111 
1112  csw = w2csum(w);
1113 
1114  switch (ch) {
1115  case 'D':
1116  if (csw->c_nr_devs == ARRAY_SIZE(csw->c_dev))
1117  errx(1, "Too many devices.");
1118  csw->c_dev[csw->c_nr_devs].d_name = strdup(optarg);
1119  csw->c_nr_devs++;
1120  return +1;
1121  case 'C':
1122  if (csw->c_nr_devs == ARRAY_SIZE(csw->c_dev))
1123  errx(1, "Too many checksum devices.");
1124  csw->c_dev[csw->c_nr_devs].d_csum_name = strdup(optarg);
1125  csw->c_nr_devs++;
1126  return +1;
1127  case 'S':
1128  csw->c_blocksize = getnum(optarg, "blocksize");
1129  return +1;
1130  case 'z':
1131  csw->c_csum_size = getnum(optarg, "checksum size");
1132  return +1;
1133  case 'c':
1134  for (i = 0; i < ARRAY_SIZE(csums); ++i) {
1135  if (!strcmp(optarg, csums[i].ca_label))
1136  break;
1137  }
1138  if (i == ARRAY_SIZE(csums))
1139  errx(1, "wrong checksum (%s)", optarg);
1140  csw->c_csum = i;
1141  return +1;
1142  case 'w':
1143  csw->c_swab = getnum(optarg, "byte swap");
1144  if (csw->c_swab < ST_NONE || csw->c_swab >= ST_NR)
1145  errx(1, "wrong byte swapping type (%i)", csw->c_swab);
1146  return +1;
1147  }
1148  return 0;
1149 }
1150 
1151 static void csum_check (struct workload *w)
1152 {
1153 }
1154 
1155 static void usage(void)
1156 {
1157  int i;
1158 
1159  fprintf(stderr,
1160 "Usage: crate GENERIC OPTIONS -W WORKLOAD_TYPE WORKLOAD_OPTIONS ...\n"
1161 " Benchmarks various workloads. Each workload is specified \n"
1162 " by -W option, multiple workloads are executed consecutively.\n\n"
1163 " Possible workload types are:\n"
1164 " \"hpcs\" (file creation),\n"
1165 " \"csum\" (check-summing device),\n"
1166 " \"db\" (db meta-data back-end),\n"
1167 " and \"stob\" (Motr storage object).\n"
1168 "\n"
1169 "Options with [defaults]: \n"
1170 " Generic options\n"
1171 "-v increase verbosity level. Can be given multiple times.\n"
1172 "-h print this help message.\n\n"
1173 " Options common for all workload types\n"
1174 "-s SEED set pseudo-random number generator seed to \n"
1175 " a given value. See srand(3).\n"
1176 "-o NR_OPERATIONS execute given number of operations [%i].\n"
1177 "-t NR_THREAD number of threads to use [%i].\n"
1178 "-p indicate workload execution progress.\n"
1179 "-H print header.\n"
1180 "-U output resource usage summary on workload completion.\n"
1181 " See getrusage(2).\n"
1182 "-b BUFFER_SIZE IO buffer size for hpcs and stob. If 0, then st_blksize\n"
1183 " is used for hpcs, see stat(2). For csum---IO size to\n"
1184 " use. If 0, generate IO size randomly, see next options.\n"
1185 " For db---page size, if non-0 [%llu].\n"
1186 "-a AVERAGE_SIZE average file size for hpcs and stob, average\n"
1187 " IO size for csum, log file size for db [%llu].\n"
1188 "-M MAXIMAL_SIZE maximal file size for hpcs and stob, maximal\n"
1189 " IO size for csum, cache size if non-0 for db [%llu].\n"
1190 "-i use O_DIRECT flag when opening files [off].\n"
1191 "-e use O_EXCL flag when opening files [off].\n\n"
1192 "-B bound threads mode. For hpcs: NR_THREADS work in\n"
1193 " each of NR_DIR directories; for csum---every thread\n"
1194 " works with its own buffer. For stob: NR_THREADS do IO\n"
1195 " against each storage object (NR_LINUX or \n"
1196 " NR_LINUX*NR_AD). [off].\n"
1197 "-r PERCENT percentage of reads in read-write work-loads [%i].\n"
1198 " \"hpcs\" workload specific options\n"
1199 "-f FILE_PATTERN file name pattern for sprintf(3). It is formatted with\n"
1200 " three unsigned int arguments:\n"
1201 " . a directory index;\n"
1202 " . a randomly selected file index;\n"
1203 " . a sequential operation index.\n"
1204 " (Positional arguments %%nn$ can be used.) [%s]\n"
1205 "-d NR_DIR number of directories [%i].\n"
1206 " \"csum\" workload specific options\n"
1207 "-D DEVICE path to main device. If not set, stdio is used.\n"
1208 "-C CSUM_DEVICE path to check-summing device. No check-summing if\n"
1209 " not set.\n"
1210 " Up to %i (device, checksum device) pair can be\n"
1211 " specified.\n"
1212 "-c LABEL check-sum algorithm to use, see below [%s].\n"
1213 "-S CSUM_BLOCK_SIZE size of block to check-sum [%llu].\n"
1214 "-z CSUM_SIZE size of a checksum [%llu].\n"
1215 "-w TYPE byte swapping. Valid types are:\n"
1216 " 0 no byte swapping;\n"
1217 " 1 32bit swapping;\n"
1218 " 2 32bit swapping with writeback;\n"
1219 " 3 64bit swapping;\n"
1220 " 4 64bit swapping with writeback\n"
1221 " [0].\n"
1222 " \"db\" workload specific options\n"
1223 "-f, -d have the same meaning as in \"hpcs\".\n"
1224 "-k NR_PAGES kill NR_PAGES pages from db pool every 1s with\n"
1225 " ->mem_trickle().\n"
1226 "-R REC_SIZE date-base record size [use structure size.\n"
1227 "-l instead of populating data-base with records,\n"
1228 " dump records from existing data-base, checking their\n"
1229 " consistency.\n"
1230 "-z {FLAG} set locking flag:\n"
1231 " d automatically detect and resolve\n"
1232 " deadlocks by aborting yongest transactions;\n"
1233 " o{NR} use internal locking instead of db one\n"
1234 " with NR locks per directory.\n"
1235 "-F {TYPE}{FLAG} specify db flag. TYPE can be one of \n"
1236 " e DB_ENV flag;\n"
1237 " d DB flag;\n"
1238 " t DB_TXN flag.\n"
1239 " FLAG of 0 clears all flags. Valid flags for each type\n"
1240 " are listed below.\n"
1241 " \"stob\" workload specific options\n"
1242 "-d NR_LINUX number of storage objects in linux storage domain [%i].\n"
1243 "-D OBJECT path to an existing file (or device) to be used for\n"
1244 " benchmarking. Can be given multiple times. If fewer\n"
1245 " than NR_LINUX objects are given, missing objects are\n"
1246 " created. Extra objects are ignored.\n"
1247 "-A NR_AD number of ad (allocation data) objects to be created\n"
1248 " in each linux object. If this option is not specified,\n"
1249 " no ad domains are created.\n"
1250 " (with -D option) [0].\n"
1251 "-q Generate sequential offsets in workload.\n"
1252 "-T Parse trace log produced by crashed stob workload.\n"
1253 "-S <filename> Read workload options from a yaml file.\n"
1254 "\n"
1255 "Numerical values can be in decimal, octal and hexadecimal as understood\n"
1256 "by strtoull(3), optionally followed by a suffix \'b\', \'k\', \'m\', \'g\',\n"
1257 "\'B\', \'K\', \'M\', \'G\', where lower-case multipliers are binary and\n"
1258 "upper-case---decimal ('b' means 512 and 'B' means 500).\n"
1259 "\n"
1260 "All sizes are in bytes. Available checksum algorithms:\n",
1270  csums[0].ca_label,
1274  );
1275  for (i = 0; i < ARRAY_SIZE(csums); ++i)
1276  printf("%s ", csums[i].ca_label);
1277  printf("\n");
1278 }
1279 
1280 void print_workload_detail(struct workload *w, int idx)
1281 {
1282  int i;
1283  cr_log(CLL_INFO, "Workload:%d:%p\n", idx, &w[0]);
1284  for(i = 0; i <= idx; i++) {
1285  cr_log(CLL_INFO, "File name:%s\n",
1286  ((struct m0_workload_io *)(
1287  w[i].u.cw_io))->cwi_filename);
1288  }
1289 }
1290 
1291 int main(int argc, char **argv)
1292 {
1293  char ch;
1294  int idx;
1295  int i;
1296  struct workload *w;
1297  struct workload *load;
1298  struct m0_workload_io *cwi;
1299  static struct m0 instance;
1300  uint64_t nr_segments;
1301  int rc;
1302 
1303  static const char opts[] =
1304  "k:s:o:f:t:W:a:r:R:ez:D:UM:BA:S:Hw:iF:d:C:c:pqb:Thvl";
1305 
1306  if (argc == 1) {
1307  usage();
1308  return EXIT_FAILURE;
1309  }
1310 
1311  idx = -1;
1312  w = NULL;
1313 
1316 
1318  if (load == NULL) {
1319  return -ENOMEM;
1320  }
1321 
1322  while ((ch = getopt(argc, argv, opts)) != -1) {
1323  /*
1324  * generic options..
1325  */
1326  switch (ch) {
1327  case 'v':
1328  continue;
1329  default:
1330  if (w == NULL)
1331  errx(1, "-W must precede workload options");
1332  else
1333  break;
1334  /* fall through */
1335  case '?':
1336  cr_log(CLL_ERROR, "unknown option\n");
1337  /* fall through */
1338  case 'h':
1339  usage();
1340  free(load);
1341  return 1;
1342  case 'W':
1343  if (++idx >= CR_WORKLOAD_MAX)
1344  errx(1, "too many workloads (%i)", idx);
1345  for (i = 0; i < ARRAY_SIZE(cr_workload_name); ++i) {
1346  if (!strcmp(optarg, cr_workload_name[i]))
1347  break;
1348  }
1349  if (i == ARRAY_SIZE(cr_workload_name))
1350  errx(1, "unknown workload type (%s)", optarg);
1351  w = &load[idx];
1352  rc = workload_init(w, i);
1353  if (rc != 0)
1354  errx(1, "failed to init the workload: %d", rc);
1355  continue;
1356  case 'S':
1357  /* All workloads are specified in a yaml file. */
1358  M0_ASSERT(idx == -1);
1360  optarg);
1361  if (rc != 0) {
1362  fprintf(stderr, "Unable to parse workload:"
1363  "%d\n", rc);
1364  m0_free(load);
1365  return -EINVAL;
1366  }
1367  w = &load[idx];
1368  if (w->cw_type != CWT_IO)
1369  continue;
1370  cwi = load->u.cw_io;
1371  if (cwi->cwi_opcode == CR_CLEANUP)
1372  continue;
1373  if (cwi->cwi_io_size < cwi->cwi_bs) {
1374  cr_log(CLL_INFO, "IO size should always be "
1375  "greater than block size "
1376  "IOSIZE =%"PRIu64
1377  " BLOCK_SIZE =%" PRIu64 "\n",
1378  cwi->cwi_io_size, cwi->cwi_bs);
1379  return M0_ERR(-EINVAL);
1380  }
1381  nr_segments = cwi->cwi_io_size / cwi->cwi_bs;
1382  if (cwi->cwi_bcount_per_op > nr_segments) {
1383  cr_log(CLL_INFO, "BLOCKS_PER_OP should be <= "
1384  "IOSIZE/BLOCK_SIZE\n");
1385  return M0_ERR(-EINVAL);
1386  }
1387  continue;
1388  }
1389  /*
1390  * Workload options.
1391  */
1392 
1393  /*
1394  * options valid for any workload type
1395  */
1396  switch (ch) {
1397  int rfrac;
1398  case 's':
1399  w->cw_rstate = getnum(optarg, "seed");
1400  continue;
1401  case 't':
1402  w->cw_nr_thread = getnum(optarg, "nr_thread");
1403  continue;
1404  case 'a':
1405  w->cw_avg = getnum(optarg, "average size");
1406  continue;
1407  case 'M':
1408  w->cw_max = getnum(optarg, "maximal size");
1409  continue;
1410  case 'o':
1411  w->cw_ops = getnum(optarg, "operations");
1412  continue;
1413  case 'b':
1414  w->cw_block = getnum(optarg, "block");
1415  continue;
1416  case 'e':
1417  w->cw_oflag |= O_EXCL;
1418  continue;
1419  case 'H':
1420  w->cw_header = 1;
1421  continue;
1422  case 'p':
1423  w->cw_progress = 1;
1424  continue;
1425  case 'U':
1426  w->cw_usage = 1;
1427  continue;
1428  case 'i':
1429  w->cw_directio = 1;
1430  w->cw_oflag |= O_DIRECT;
1431  continue;
1432  case 'B':
1433  w->cw_bound = 1;
1434  continue;
1435  case 'r':
1436  rfrac = getnum(optarg, "read percentage");
1437  if (rfrac < 0 || rfrac > 100)
1438  errx(1, "invalid percentage (%s)", optarg);
1439  w->cw_read_frac = rfrac;
1440  continue;
1441  }
1442 
1443  /*
1444  * workload type specific options
1445  */
1446  if (wop(w)->wto_parse(w, ch, optarg) == 0)
1447  errx(1, "unknown option '%c' for workload type %s",
1448  ch, w->cw_name);
1449  }
1450 
1451  cr_set_debug_level(conf != NULL ? conf->log_level : CLL_WARN);
1452 
1453  if (idx < 0)
1454  cr_log(CLL_INFO, "no workloads were specified\n");
1455  for (i = 0; i <= idx; ++i) {
1456  w = &load[i];
1457  wop(w)->wto_check(w);
1458  cr_log(CLL_INFO, "starting workload %i\n", i);
1459  workload_run(w);
1460  workload_fini(w);
1461  cr_log(CLL_INFO, "done workload %i\n", i);
1462  cr_log(CLL_INFO, "---------------------------------------\n");
1463  }
1464  m0_free(load);
1465  return 0;
1466 }
1467 
1470 /*
1471  * Local variables:
1472  * c-indentation-style: "K&R"
1473  * c-basic-offset: 8
1474  * tab-width: 8
1475  * fill-column: 80
1476  * scroll-step: 1
1477  * End:
1478  */
1479 /*
1480  * vim: tabstop=8 shiftwidth=8 noexpandtab textwidth=80 nowrap
1481  */
static uint32_t csum_shuffle32(uint32_t in)
Definition: crate.c:812
def load()
Definition: hist.py:112
void * cw_index
Definition: workload.h:99
void m0_op_run_index(struct workload *w, struct workload_task *task, const struct workload_op *op)
Definition: crate_index.c:1536
const int cr_default_nr_dir
Definition: crate.c:63
#define M0_ALLOC_ARR(arr, nr)
Definition: memory.h:84
static void fletcher_2_native(void *buf, uint64_t size)
Definition: crate.c:783
const char * cw_name
Definition: workload.h:75
M0_INTERNAL int struct dentry struct kstat * stat
Definition: dir.c:1433
struct workload::@328::cr_csum cw_csum
#define NULL
Definition: misc.h:38
csum_op_type
Definition: workload.h:121
const bcnt_t cr_default_csum_size
Definition: crate.c:67
Definition: workload.h:67
struct workload * wt_load
Definition: workload.h:129
double tsec(const struct timeval *tval)
Definition: crate_utils.c:212
union @126 u
static void csum_run(struct workload *w, struct workload_task *task)
Definition: crate.c:1009
void m0_op_run(struct workload *w, struct workload_task *task, const struct workload_op *op)
Definition: crate_io.c:1093
void run_index(struct workload *w, struct workload_task *tasks)
Definition: crate_index.c:1530
bcnt_t cw_avg
Definition: workload.h:80
Definition: workload.h:65
uint64_t cwi_bs
Definition: crate_client.h:163
double rate(bcnt_t items, const struct timeval *tval, int scale)
Definition: crate_utils.c:217
static int delay
Definition: dump.c:174
const char cr_default_fpattern[]
Definition: crate.c:58
const bcnt_t cr_default_max_ksize
Definition: crate.c:69
unsigned wt_thread
Definition: workload.h:130
static int hpcs_fini(struct workload *w)
Definition: crate.c:485
Definition: conf.py:1
M0_INTERNAL void m0_instance_setup(struct m0 *instance)
Definition: instance.c:110
char * optarg
short cw_read_frac
Definition: workload.h:93
static unsigned long long getrnd(unsigned long long a, unsigned long long b)
Definition: crate.c:177
static const struct workload_type_ops w_ops[CWT_NR]
Definition: crate.c:98
static int void * buf
Definition: dir.c:1019
static const struct csum_alg csums[]
int32_t cwi_opcode
Definition: crate_client.h:178
static void csum_touch(void *buf, uint64_t size)
Definition: crate.c:768
static void csum_csum(struct workload *w, struct cr_csum *s, char *buf, bcnt_t count)
Definition: crate.c:885
Definition: workload.h:69
const char * ca_label
Definition: crate.c:145
void print_workload_detail(struct workload *w, int idx)
Definition: crate.c:1280
unsigned cw_rstate
Definition: workload.h:79
void workload_start(struct workload *w, struct workload_task *task)
Definition: crate.c:340
int cw_directio
Definition: workload.h:88
Definition: sock.c:887
struct workload_task * wo_task
Definition: workload.h:154
static m0_bcount_t count
Definition: xcode.c:167
void timeval_norm(struct timeval *t)
Definition: crate.c:204
static int hpcs_parse(struct workload *w, char ch, const char *optarg)
Definition: crate.c:673
char * cw_fpattern
Definition: workload.h:91
void(* wto_op_run)(struct workload *w, struct workload_task *task, const struct workload_op *op)
Definition: workload.h:174
unsigned long long getnum(const char *str, const char *msg)
Definition: crate_utils.c:222
static int csum_init(struct workload *w)
Definition: crate.c:705
struct m0_fid fid
Definition: di.c:46
const short cr_default_read_frac
Definition: crate.c:65
op
Definition: libdemo.c:64
static void csum_compute(struct workload *w, void *buf, size_t count)
Definition: crate.c:834
static void csum_op_run(struct workload *w, struct workload_task *task, const struct workload_op *op)
Definition: crate.c:986
unsigned cw_ops
Definition: workload.h:76
union workload::@328 u
int opcode
Definition: crate.c:301
static void csum_check(struct workload *w)
Definition: crate.c:1151
int i
Definition: dir.c:1033
const char * opname
Definition: crate.c:302
#define PRIu64
Definition: types.h:58
uint32_t cwi_bcount_per_op
Definition: crate_client.h:168
static void hpcs_check(struct workload *w)
Definition: crate.c:688
const int cr_default_ops
Definition: crate.c:61
static struct cr_csum * w2csum(struct workload *w)
Definition: crate.c:700
pthread_mutex_t cw_lock
Definition: workload.h:95
return M0_ERR(-EOPNOTSUPP)
unsigned wt_ops
Definition: workload.h:133
const bcnt_t cr_default_avg
Definition: crate.c:59
int cw_progress
Definition: workload.h:84
int workload_init(struct workload *w, enum cr_workload_type wtype)
Definition: crate.c:216
const char * name
Definition: trace.c:110
bool run
Definition: note.c:103
void workload_join(struct workload *w, struct workload_task *task)
Definition: crate.c:363
#define M0_ASSERT(cond)
bcnt_t cw_block
Definition: workload.h:82
struct crate_conf * conf
static void hpcs_op_get(struct workload *w, struct workload_op *op)
Definition: crate.c:490
void timeval_sub(struct timeval *end, struct timeval *start)
Definition: crate_utils.c:205
union workload_task::@329 u
static struct m0_addb2_callback c
Definition: consumer.c:41
static struct m0_thread t[8]
Definition: service_ut.c:1230
static void csum_op_get(struct workload *w, struct workload_op *op)
Definition: crate.c:738
void cr_set_debug_level(enum cr_log_level level)
Definition: logger.c:63
int rand(void)
struct workload_task::@329::task_csum wt_csum
static void hpcs_op_run(struct workload *w, struct workload_task *task, const struct workload_op *op)
Definition: crate.c:532
static long long max(long long a, long long b)
Definition: crate.c:196
Definition: instance.h:80
const bcnt_t cr_default_blocksize
Definition: crate.c:66
int cw_oflag
Definition: workload.h:86
const bcnt_t cr_default_key_size
Definition: crate.c:68
struct workload_task::@329::task_hpcs wt_hpcs
unsigned cw_nr_dir
Definition: workload.h:92
char * d_csum_name
Definition: workload.h:106
static void workload_fini(struct workload *w)
Definition: crate.c:243
char * cw_buf
Definition: workload.h:83
static void usage(void)
Definition: crate.c:1155
int cw_header
Definition: workload.h:85
int(* wto_init)(struct workload *w)
Definition: workload.h:169
static m0_bindex_t offset
Definition: dump.c:173
unsigned long long bcnt_t
Definition: crate_utils.h:44
const bcnt_t cr_default_max
Definition: crate.c:60
static void t0(int self)
Definition: chan.c:39
int cw_bound
Definition: workload.h:89
void(* wto_run)(struct workload *w, struct workload_task *task)
Definition: workload.h:172
static void workload_run(struct workload *w)
Definition: crate.c:378
static int workload_op_get(struct workload *w, struct workload_op *op)
Definition: crate.c:253
static void hpcs_run(struct workload *w, struct workload_task *task)
Definition: crate.c:613
static void csum_io(struct workload *w, struct cr_csum *s, const struct workload_op *op, bcnt_t offset, bcnt_t count, int opcode, const char *opname)
Definition: crate.c:899
void(* wto_check)(struct workload *w)
Definition: workload.h:176
void timeval_add(struct timeval *sum, struct timeval *term)
Definition: crate_utils.c:198
int parse_yaml_file(struct workload *load, int max_workload, int *index, char *config_file)
Definition: parser.c:602
cr_workload_type
Definition: workload.h:54
unsigned cw_nr_thread
Definition: workload.h:78
const int cr_default_nr_thread
Definition: crate.c:64
int init(struct workload *w)
static int csum_dev_open(struct workload *w, const char *dev_name)
Definition: crate.c:995
static long long min(long long a, long long b)
Definition: crate.c:191
void(* wto_op_get)(struct workload *w, struct workload_op *op)
Definition: workload.h:173
static int csum_fini(struct workload *w)
Definition: crate.c:720
int(* wto_fini)(struct workload *w)
Definition: workload.h:170
Definition: fid.h:38
bcnt_t cw_max
Definition: workload.h:81
static void fletcher_4_native(void *buf, uint64_t size)
Definition: crate.c:797
static int csum_parse(struct workload *w, char ch, const char *optarg)
Definition: crate.c:1107
bcnt_t wt_total
Definition: workload.h:132
static int hpcs_init(struct workload *w)
Definition: crate.c:480
static int r[NR]
Definition: thread.c:46
static struct cr_hpcs * w2hpcs(struct workload *w)
Definition: crate.c:475
void cr_log(enum cr_log_level lev, const char *fmt,...)
Definition: logger.c:39
static const struct @324 optable[COT_NR]
m0_bcount_t size
Definition: di.c:39
static void csum_none(void *buf, uint64_t size)
Definition: crate.c:764
static void * worker_thread(void *datum)
Definition: crate.c:319
M0_INTERNAL int m0_threads_once_init(void)
Definition: kthread.c:89
static const struct workload_type_ops * wop(struct workload *w)
Definition: crate.c:170
const bcnt_t cr_default_max_vsize
Definition: crate.c:70
static struct m0 instance
Definition: main.c:78
Definition: tasks.py:1
struct m0t1fs_filedata * fd
Definition: dir.c:1030
int fini(struct workload *w)
static enum csum_op_type rw_get(const struct workload *w)
Definition: crate.c:314
void check(struct workload *w)
static void t1(int n)
Definition: mutex.c:48
int main(int argc, char **argv)
Definition: crate.c:1291
const char * cr_workload_name[CWT_NR]
Definition: crate.c:73
enum cr_workload_type cw_type
Definition: workload.h:74
static uint64_t csum_shuffle64(uint64_t in)
Definition: crate.c:821
static struct m0_sns_cm_repair_ag rag
Definition: net.c:54
struct inode * dir
Definition: dir.c:1028
uint64_t cwi_io_size
Definition: crate_client.h:170
struct timeval cw_rate
Definition: workload.h:94
void(* ca_func)(void *buf, uint64_t size)
Definition: crate.c:146
static bcnt_t w_size(const struct workload *w)
Definition: crate.c:295
struct m0_fom_ops ops
Definition: io_foms.c:623
unsigned cw_done
Definition: workload.h:77
void m0_free(void *data)
Definition: memory.c:146
static struct m0_addb2_source * s
Definition: consumer.c:39
const bcnt_t cr_default_block
Definition: crate.c:62
char * d_name
Definition: workload.h:105
void timeval_diff(const struct timeval *start, const struct timeval *end, struct timeval *diff)
Definition: crate_utils.c:189
int32_t rc
Definition: trigger_fop.h:47
#define ARRAY_SIZE(a)
Definition: misc.h:45
struct workload::@328::cr_hpcs cw_hpcs
int cw_usage
Definition: workload.h:87