Motr  M0
io_req.c
Go to the documentation of this file.
1 /* -*- C -*- */
2 /*
3  * Copyright (c) 2020 Seagate Technology LLC and/or its Affiliates
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  * For any questions about this software or licensing,
18  * please email opensource@seagate.com or cortx-questions@seagate.com.
19  *
20  */
21 
22 
23 #include "motr/client.h"
24 #include "motr/client_internal.h"
25 #include "motr/addb.h"
26 #include "motr/pg.h"
27 #include "motr/io.h"
28 
29 #include "lib/errno.h"
30 #include "lib/semaphore.h" /* m0_semaphore_{down|up}*/
31 #include "fid/fid.h" /* m0_fid */
32 #include "rpc/rpclib.h" /* m0_rpc_client_connect */
33 #include "lib/ext.h" /* struct m0_ext */
34 #include "lib/misc.h" /* M0_KEY_VAL_NULL */
35 #include "lib/cksum.h"
36 
37 #define M0_TRACE_SUBSYSTEM M0_TRACE_SUBSYS_CLIENT
38 #include "lib/trace.h" /* M0_LOG */
39 
40 /*
41  * CPU usage threshold for parity calculation which is introuduced by
42  * commit d4fcee53611e to solve the LNet timeout problem caused by
43  * by IO overusing CPUs.
44  */
46 
48 static struct m0_sm_state_descr io_states[] = {
49  [IRS_INITIALIZED] = {
51  .sd_name = "IO_initial",
52  .sd_allowed = M0_BITS(IRS_READING, IRS_WRITING,
54  },
55  [IRS_READING] = {
56  .sd_name = "IO_reading",
57  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED),
58  },
59  [IRS_READ_COMPLETE] = {
60  .sd_name = "IO_read_complete",
61  .sd_allowed = M0_BITS(IRS_WRITING, IRS_REQ_COMPLETE,
63  IRS_READING),
64  },
66  .sd_name = "IO_degraded_read",
67  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED),
68  },
70  .sd_name = "IO_degraded_write",
71  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED),
72  },
73  [IRS_TRUNCATE] = {
74  .sd_name = "IO_truncate",
76  },
78  .sd_name = "IO_truncate_complte",
79  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE, IRS_FAILED),
80  },
81  [IRS_WRITING] = {
82  .sd_name = "IO_writing",
83  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED),
84  },
85  [IRS_WRITE_COMPLETE] = {
86  .sd_name = "IO_write_complete",
87  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE, IRS_FAILED,
90  },
91  [IRS_FAILED] = {
92  /* XXX Add M0_SDF_TERMINAL | M0_SDF_FINAL ? */
93  .sd_flags = M0_SDF_FAILURE,
94  .sd_name = "IO_req_failed",
95  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE),
96  },
97  [IRS_REQ_COMPLETE] = {
98  /* XXX Add M0_SDF_FINAL ? */
99  .sd_flags = M0_SDF_TERMINAL,
100  .sd_name = "IO_req_complete",
101  },
102 };
103 
104 static struct m0_sm_trans_descr ioo_trans[] = {
105  { "init-reading", IRS_INITIALIZED, IRS_READING },
106  { "init-writing", IRS_INITIALIZED, IRS_WRITING },
107  { "init-complete", IRS_INITIALIZED, IRS_REQ_COMPLETE },
108  { "init-failed", IRS_INITIALIZED, IRS_FAILED },
109 
110  { "read-complete", IRS_READING, IRS_READ_COMPLETE },
111  { "read-failed", IRS_READING, IRS_FAILED },
112  { "write-complete", IRS_WRITING, IRS_WRITE_COMPLETE },
113  { "write-failed", IRS_WRITING, IRS_FAILED },
114 
115  { "rcompl-write", IRS_READ_COMPLETE, IRS_WRITING },
116  { "rcompl-complete", IRS_READ_COMPLETE, IRS_REQ_COMPLETE },
117  { "rcompl-dgread", IRS_READ_COMPLETE, IRS_DEGRADED_READING },
118  { "rcompl-failed", IRS_READ_COMPLETE, IRS_FAILED },
119  { "rcompl-reading", IRS_READ_COMPLETE, IRS_READING },
120 
121  { "wcompl-dgwrite", IRS_WRITE_COMPLETE, IRS_DEGRADED_WRITING },
122  { "wcompl-complete", IRS_WRITE_COMPLETE, IRS_REQ_COMPLETE },
123  { "wcompl-trunc", IRS_WRITE_COMPLETE, IRS_TRUNCATE },
124  { "wcompl-failed", IRS_WRITE_COMPLETE, IRS_FAILED },
125 
126  { "trunc-tcompl", IRS_TRUNCATE, IRS_TRUNCATE_COMPLETE },
127  { "trunc-failed", IRS_TRUNCATE, IRS_FAILED },
128 
129  { "tcompl-complete", IRS_TRUNCATE_COMPLETE, IRS_REQ_COMPLETE },
130  { "tcompl-failed", IRS_TRUNCATE_COMPLETE, IRS_FAILED },
131 
132  { "dgread-rcompl", IRS_DEGRADED_READING, IRS_READ_COMPLETE },
133  { "dgread-failed", IRS_DEGRADED_READING, IRS_FAILED },
134  { "dgwrite-wcompl", IRS_DEGRADED_WRITING, IRS_WRITE_COMPLETE },
135  { "dgwrite-failed", IRS_DEGRADED_WRITING, IRS_FAILED },
136 
137  { "failed-complete", IRS_FAILED, IRS_REQ_COMPLETE },
138 };
139 
142  .scf_name = "IO request state machine configuration",
143  .scf_nr_states = ARRAY_SIZE(io_states),
144  .scf_state = io_states,
145  .scf_trans = ioo_trans,
146  .scf_trans_nr = ARRAY_SIZE(ioo_trans),
147 };
148 
151 M0_BOB_DEFINE(M0_INTERNAL, &ioo_bobtype, m0_op_io);
152 
153 const struct m0_bob_type ioo_bobtype = {
154  .bt_name = "m0_op_io_bobtype",
155  .bt_magix_offset = offsetof(struct m0_op_io, ioo_magic),
156  .bt_magix = M0_IOREQ_MAGIC,
157  .bt_check = NULL,
158 };
159 
168 static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr,
169  uint32_t svc_k, uint32_t fnode_nr, uint32_t node_k)
170 {
171  if (fdev_nr > 0 && dev_k == 0)
172  return M0_RC(true);
173  if (fsvc_nr > 0 && svc_k == 0)
174  return M0_RC(true);
175  if (fnode_nr > 0 && node_k == 0)
176  return M0_RC(true);
177 
178  /* Summation of F(l) / K(l) across node, service and device */
179  if (node_k + fnode_nr > 0)
180  return M0_RC((fnode_nr * dev_k * svc_k +
181  node_k * (fdev_nr * svc_k + fsvc_nr * dev_k)) >
182  node_k * dev_k * svc_k);
183  else if (svc_k + fsvc_nr > 0)
184  return M0_RC((fdev_nr * svc_k + fsvc_nr * dev_k) >
185  dev_k * svc_k);
186  else
187  return M0_RC(fdev_nr > dev_k);
188 }
189 
193 M0_INTERNAL void ioreq_sm_state_set_locked(struct m0_op_io *ioo,
194  int state)
195 {
196  M0_ENTRY();
197 
198  M0_PRE(ioo != NULL);
200 
201  M0_LOG(M0_INFO, "[%p] IO request changes state %s -> %s",
202  ioo, io_states[ioreq_sm_state(ioo)].sd_name,
203  io_states[state].sd_name);
204  m0_sm_state_set(&ioo->ioo_sm, state);
205 
206  M0_LEAVE();
207 }
208 
212 M0_INTERNAL void ioreq_sm_failed_locked(struct m0_op_io *ioo, int rc)
213 {
214  M0_ENTRY();
215 
216  M0_PRE(ioo != NULL);
218 
219  /* Set the io operation state - FAILED isn't a terminal state */
220  m0_sm_move(&ioo->ioo_sm, rc, IRS_FAILED);
221 
222  M0_LEAVE();
223 }
224 
225 static void ioreq_sm_executed_post(struct m0_op_io *ioo)
226 {
227 
228  M0_ENTRY();
229 
230  M0_PRE(ioo != NULL);
232 
234  m0_sm_ast_post(ioo->ioo_oo.oo_sm_grp, &ioo->ioo_ast);
235 
236  M0_LEAVE();
237 }
238 
239 static int truncate_dispatch(struct m0_op_io *ioo)
240 {
241  int rc = 0;
242  struct m0_op *op;
243 
244  M0_ENTRY();
245 
246  M0_PRE(ioo != NULL);
247  op = &ioo->ioo_oo.oo_oc.oc_op;
248 
249  if (ioreq_sm_state(ioo) == IRS_WRITE_COMPLETE &&
250  op->op_code == M0_OC_FREE) {
253  }
254 
255  return M0_RC(rc);
256 }
257 
266 {
267  struct target_ioreq *ti;
268 
269  M0_ENTRY();
270 
271  M0_PRE(xfer != NULL);
272  M0_PRE(xfer->nxr_state == NXS_COMPLETE);
273 
274  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
275  ti->ti_state = M0_PNDS_ONLINE;
277 
278  M0_LEAVE();
279 }
280 
287 static void ioreq_ioo_reset(struct m0_op_io *ioo)
288 {
289  struct nw_xfer_request *xfer;
290  struct target_ioreq *ti;
291 
292  M0_ENTRY();
293 
294  M0_PRE(ioo != NULL);
295  xfer = &ioo->ioo_nwxfer;
296 
297  xfer->nxr_rc = 0;
298  xfer->nxr_bytes = 0;
299 
300  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
301  ti->ti_rc = 0;
303 
304  ioo->ioo_rc = 0;
305  M0_LEAVE();
306 }
307 
317  struct m0_sm_ast *ast)
318 {
319  int rc;
320  struct m0_op *op;
321  struct m0_op_io *ioo;
322  struct m0_pdclust_layout *play;
323 
324  M0_ENTRY();
325 
326  M0_PRE(grp != NULL);
328  M0_PRE(ast != NULL);
329  ioo = bob_of(ast, struct m0_op_io, ioo_ast, &ioo_bobtype);
331  op = &ioo->ioo_oo.oo_oc.oc_op;
332  play = pdlayout_get(ioo);
333 
334  /* @todo Do error handling based on m0_sm::sm_rc. */
335  /*
336  * Since m0_sm is part of io_request, for any parity group
337  * which is partial, read-modify-write state transition is followed
338  * for all parity groups.
339  */
340  if (ioo->ioo_map_idx == ioo->ioo_iomap_nr) {
341  enum ioreq_state state;
342 
343  state = (op->op_code == M0_OC_READ) ?
345 
346  if (state == IRS_WRITING) {
347  if (op->op_code != M0_OC_FREE) {
349  CD_COPY_FROM_APP, 0);
350  if (rc != 0) {
351  M0_LOG(M0_ERROR, "iro_application_data_copy() "
352  "failed: rc=%d", rc);
353  goto fail_locked;
354  }
355  }
356  if (!m0_pdclust_is_replicated(play)) {
357  rc = ioo->ioo_ops->iro_parity_recalc(ioo);
358  if (rc != 0) {
359  M0_LOG(M0_ERROR, "iro_parity_recalc() "
360  "failed: rc=%d", rc);
361  goto fail_locked;
362  }
363  }
364  }
365 
366  ioreq_sm_state_set_locked(ioo, state);
367  M0_ASSERT(ergo(op->op_code == M0_OC_FREE,
368  ioreq_sm_state(ioo) == IRS_WRITING));
369  if (op->op_code == M0_OC_FREE) {
372  goto out;
373  }
375  if (rc != 0) {
376  M0_LOG(M0_ERROR, "nxo_dispatch() failed: rc=%d", rc);
377  goto fail_locked;
378  }
379  } else {
380  struct target_ioreq *ti;
381  uint32_t seg;
382  m0_bcount_t read_pages = 0;
383 
384  m0_htable_for(tioreqht, ti, &ioo->ioo_nwxfer.nxr_tioreqs_hash) {
385  for (seg = 0; seg < ti->ti_bufvec.ov_vec.v_nr; ++seg)
386  if (ti->ti_pageattrs[seg] & PA_READ)
387  ++read_pages;
389 
390  /* Read IO is issued only if byte count > 0. */
391  if (read_pages > 0) {
393  ioo->ioo_rmw_read_pages = read_pages;
395  &ioo->ioo_nwxfer);
396  if (rc != 0) {
398  "nxo_dispatch() failed: rc=%d", rc);
399  goto fail_locked;
400  }
401  } else {
402  /* Don't want the sm to complain (state transition)*/
405 
406  /*
407  * If there is no READ IO issued, switch to
408  * ioreq iosm_handle_executed
409  */
411  }
412  }
413 out:
414  M0_LOG(M0_INFO, "nxr_bytes = %" PRIu64 ", copied_nr = %"PRIu64,
415  ioo->ioo_nwxfer.nxr_bytes, ioo->ioo_copied_nr);
416 
417  /* lock this as it isn't a locality group lock */
418  m0_sm_group_lock(&op->op_sm_group);
419  m0_sm_move(&op->op_sm, 0, M0_OS_LAUNCHED);
420  m0_sm_group_unlock(&op->op_sm_group);
421 
422  M0_LEAVE();
423  return;
424 
425 fail_locked:
426  ioo->ioo_rc = rc;
428  /* N.B. Failed is not a terminal state */
430 
431  /* fixed by commit 5a189beac81297ec9ea1cecf7016697aa02b0182 */
432  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, false);
433 
434  /* Move the operation state machine along */
435  m0_sm_group_lock(&op->op_sm_group);
436  m0_sm_fail(&op->op_sm, M0_OS_FAILED, rc);
437  m0_op_failed(op);
438  m0_sm_group_unlock(&op->op_sm_group);
439 
440  M0_LOG(M0_ERROR, "ioreq_iosm_handle_launch failed");
441  M0_LEAVE();
442 }
443 
452  struct m0_sm_ast *ast)
453 {
454  int rc;
455  bool rmw;
456  struct m0_client *instance;
457  struct m0_op *op;
458  struct m0_op_io *ioo;
459  struct m0_pdclust_layout *play;
460 
461  M0_ENTRY("op_io:ast %p", ast);
462 
463  M0_PRE(grp != NULL);
464  M0_PRE(ast != NULL);
466  ioo = bob_of(ast, struct m0_op_io, ioo_ast, &ioo_bobtype);
468  op = &ioo->ioo_oo.oo_oc.oc_op;
470  M0_PRE(instance != NULL);
471 
472  play = pdlayout_get(ioo);
473 
474  /* @todo Do error handling based on m0_sm::sm_rc. */
475  /*
476  * Since m0_sm is part of io_request, for any parity group
477  * which is partial, read-modify-write state transition is followed
478  * for all parity groups.
479  */
480  M0_LOG(M0_DEBUG, "map=%" PRIu64 " map_nr=%"PRIu64,
481  ioo->ioo_map_idx, ioo->ioo_iomap_nr);
482  rmw = ioo->ioo_map_idx != ioo->ioo_iomap_nr;
484  goto done;
485  if (!rmw) {
486  enum ioreq_state state;
487 
488  state = op->op_code == M0_OC_READ ?
490  M0_ASSERT(ioreq_sm_state(ioo) == state);
491  if (ioo->ioo_rc != 0) {
492  rc = ioo->ioo_rc;
493  M0_LOG(M0_DEBUG, "ioo->ioo_rc = %d", rc);
494  goto fail_locked;
495  }
496  if (state == IRS_READ_COMPLETE) {
497  /*
498  * Returns immediately if all devices are
499  * in healthy state.
500  */
501  rc = ioo->ioo_ops->iro_dgmode_read(ioo, rmw);
502  if (rc != 0) {
503  M0_LOG(M0_INFO,
504  "iro_dgmode_read() returns error: %d",
505  rc);
506  goto fail_locked;
507  }
508 
509  /*
510  * If ioo's state has been changed to IRS_READING
511  * or IRS_DEGRADED_READING, this means iro_dgmode_read
512  * has just issue DGMODE IO, simply exit and it
513  * will re-entry here later. Otherwise proceed to
514  * read_verify and to copy data to APP.
515  */
516  if (ioreq_sm_state(ioo) != IRS_READ_COMPLETE)
517  goto out;
518 
519  rc = ioo->ioo_ops->iro_parity_verify(ioo);
520  if (rc != 0) {
522  "parity verification failed: rc=%d", rc);
523  goto fail_locked;
524  }
525 
526  if ((op->op_code == M0_OC_READ &&
527  instance->m0c_config->mc_is_read_verify) &&
528  ioo->ioo_dgmap_nr > 0)
529  rc = ioo->ioo_ops->iro_dgmode_recover(ioo);
530 
531  /* Valid data are available now, copy to application */
533  CD_COPY_TO_APP, 0);
534  if (rc != 0) {
535  M0_LOG(M0_ERROR, "iro_application_data_copy() "
536  "failed (to APP): rc=%d", rc);
537  goto fail_locked;
538  }
539  } else {
540  M0_ASSERT(state == IRS_WRITE_COMPLETE);
541 
542  /*
543  * Returns immediately if all devices are
544  * in healthy state.
545  */
546  rc = ioo->ioo_ops->iro_dgmode_write(ioo, rmw);
547  if (rc != 0) {
548  M0_LOG(M0_ERROR, "iro_dgmode_write() failed, "
549  "rc=%d", rc);
550  goto fail_locked;
551  }
552 
553  rc = truncate_dispatch(ioo);
554  if (rc != 0) {
555  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
556  "rc=%d", rc);
557  goto fail_locked;
558  }
559 
561  goto out;
562  }
563  } else {
564  /*
565  * First stage of RMW done: ioo's state should be
566  * IRS_READ_COMPLETE when it reaches here.
567  */
568  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE &&
569  op->op_code != M0_OC_FREE) {
570  /*
571  * If fops dispatch fails, we need to wait till all io
572  * fop callbacks are acked since IO fops have already
573  * been dispatched.
574  *
575  * Only fully modified pages from parity groups which
576  * have chosen read-rest approach or aligned parity
577  * groups, are copied since read-old approach needs
578  * reading of all spanned pages,(no matter fully
579  * modified or paritially modified) in order to
580  * calculate parity correctly.
581  */
584  if (rc != 0) {
585  M0_LOG(M0_ERROR, "iro_application_data_copy() "
586  "on FULLPAGE failed: rc=%d", rc);
587  goto fail_locked;
588  }
589 
590  /* Copies
591  * - fully modified pages from parity groups which have
592  * chosen read_old approach and
593  * - partially modified pages from all parity groups.
594  */
596  ioo, CD_COPY_FROM_APP, 0);
597  if (rc != 0) {
598  M0_LOG(M0_ERROR, "iro_application_data_copy() "
599  "failed: rc=%d", rc);
600  goto fail_locked;
601  }
602  }
603  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE) {
604  /* Finalizes the old read fops. */
605  if (ioo->ioo_rmw_read_pages > 0) {
607  &ioo->ioo_nwxfer, rmw);
608 
609  /*
610  * There is a subtle case for first write
611  * to an object when CROW optimisation is used:
612  * if it is a RMW write, it sends a read request
613  * first as Client doesn't have the concept of
614  * object size and an -ENOENT error will be
615  * returned as there isn't any thing exists in
616  * ios yet.
617  *
618  * Client has to trust the application that it
619  * has checked the existence of an object, so
620  * we can safely ignore the -ENOENT error here.
621  */
622  if (ioo->ioo_rc == -ENOENT)
623  ioreq_ioo_reset(ioo);
624  else if (ioo->ioo_rc != 0) {
625  M0_LOG(M0_ERROR, "ioo->ioo_rc=%d",
626  ioo->ioo_rc);
627 
628  rc = ioo->ioo_rc;
629  goto fail_locked;
630  }
632  }
633 
634  /* Prepare for the Write fops*/
636  if (!m0_pdclust_is_replicated(play)) {
637  rc = ioo->ioo_ops->iro_parity_recalc(ioo);
638  if (rc != 0) {
639  M0_LOG(M0_ERROR, "iro_parity_recalc()"
640  "failed: rc=%d", rc);
641  goto fail_locked;
642  }
643  }
644 
646  &ioo->ioo_nwxfer);
647  if (rc != 0) {
648  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
649  "rc=%d", rc);
650  goto fail_locked;
651  }
652 
653  /*
654  * Simply return here as WRITE op will re-entry
655  * ioreq_iosm_handle_executed with different state.
656  */
657  goto out;
658 
659  } else {
660  /* 2nd stage of RMW done [WRITE] */
662 
663  /*
664  * Returns immediately if all devices are in healthy
665  * state.
666  */
667  rc = ioo->ioo_ops->iro_dgmode_write(ioo, rmw);
668  if (rc != 0) {
669  M0_LOG(M0_ERROR, "iro_dgmode_write() failed: "
670  "rc=%d", rc);
671  goto fail_locked;
672  }
673 
674  rc = truncate_dispatch(ioo);
675  if (rc != 0) {
676  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
677  "rc=%d", rc);
678  goto fail_locked;
679  }
680 
682  goto out;
683  }
684  }
685 done:
686  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, rmw);
687 
688 #ifdef CLIENT_FOR_M0T1FS
689  /* XXX: TODO: update the inode size on the mds */
690 #endif
691 
692  if (rmw)
694 
695  /*
696  * Move the operation state machine along: due to the lack of
697  * mechanism in Motr to inform Client if data(or FOL) has been safely
698  * written to disk (this can be done by piggying back max commited tx
699  * id or explict syncing data), Client assumes data is safe when
700  * it receive all replies from ioservies at this moment (although it
701  * is not true) and moves the state of this 'op' to STABLE.
702  *
703  * Client introduced SYNC APIs to allow an application explictly to
704  * flush data to disks.
705  */
706 
707  m0_sm_group_lock(&op->op_sm_group);
708  m0_sm_move(&op->op_sm, ioo->ioo_rc, M0_OS_EXECUTED);
710  if (M0_IN(op->op_code, (M0_OC_READ, M0_OC_WRITE,
711  M0_OC_FREE))) {
712  m0_sm_move(&op->op_sm, ioo->ioo_rc, M0_OS_STABLE);
713  m0_op_stable(op);
714  }
715  m0_sm_group_unlock(&op->op_sm_group);
716 
717  /* Post-processing for object op. */
719 
720 out:
721  M0_LEAVE();
722  return;
723 
724 fail_locked:
725  ioo->ioo_rc = rc;
727  /* N.B. Failed is not a terminal state */
729  /* XXX: a workaround to prevent kernel panic. how to do it correctly? */
730 #if 1 || BACKPORT_UPSTREAM_FIX
731  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, false);
732 #else
734 #endif
735 
736  /* As per bug MOTR-2575, rc will be reported in op->op_rc and the
737  * op will be completed with status M0_OS_STABLE */
738  op->op_rc = ioo->ioo_rc;
739  /* Move the operation state machine along */
740  m0_sm_group_lock(&op->op_sm_group);
741  m0_sm_move(&op->op_sm, 0, M0_OS_EXECUTED);
743  m0_sm_move(&op->op_sm, 0, M0_OS_STABLE);
744  m0_op_stable(op);
745  m0_sm_group_unlock(&op->op_sm_group);
746 
748 
749  M0_LOG(M0_DEBUG, "ioreq_iosm_handle_executed failed, rc=%d", rc);
750  M0_LEAVE();
751  return;
752 }
753 
760 static void ioreq_iomaps_destroy(struct m0_op_io *ioo)
761 {
762  uint64_t i;
763 
764  M0_ENTRY("op_io %p", ioo);
765 
766  M0_PRE(ioo != NULL);
767  M0_PRE(ioo->ioo_iomaps != NULL);
768 
769  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
770  if (ioo->ioo_iomaps[i] != NULL) {
771  pargrp_iomap_fini(ioo->ioo_iomaps[i], ioo->ioo_obj);
772  m0_free0(&ioo->ioo_iomaps[i]);
773  }
774  }
775  m0_free0(&ioo->ioo_iomaps);
776  ioo->ioo_iomap_nr = 0;
777 
778  M0_LEAVE();
779 }
780 
786 {
787  uint64_t seg;
788  uint64_t grp;
789  uint64_t grpstart;
790  uint64_t grpend;
791  uint64_t *grparray;
792  uint64_t grparray_sz;
793  struct m0_pdclust_layout *play;
794 
795  M0_ENTRY();
796 
797  play = pdlayout_get(ioo);
798 
799  /* Array of maximum possible number of groups spanned by req. */
800  grparray_sz = m0_vec_count(&ioo->ioo_ext.iv_vec) / data_size(play) +
801  2 * SEG_NR(&ioo->ioo_ext);
802  M0_LOG(M0_DEBUG, "ioo=%p arr_sz=%"PRIu64, ioo, grparray_sz);
803  M0_ALLOC_ARR(grparray, grparray_sz);
804  if (grparray == NULL)
805  return M0_ERR_INFO(-ENOMEM, "Failed to allocate memory"
806  " for grparray");
807  /*
808  * Finds out the total number of parity groups spanned by
809  * m0_op_io::ioo_ext.
810  */
811  for (seg = 0; seg < SEG_NR(&ioo->ioo_ext); ++seg) {
812  grpstart = group_id(INDEX(&ioo->ioo_ext, seg), data_size(play));
813  grpend = group_id(seg_endpos(&ioo->ioo_ext, seg) - 1,
814  data_size(play));
815  for (grp = grpstart; grp <= grpend; ++grp) {
816  uint64_t i;
817  /*
818  * grparray is a temporary array to record found groups.
819  * Scan this array for [grpstart, grpend].
820  * If not found, we got a new grop, record it and
821  * increase ioo_iomap_nr.
822  */
823  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
824  if (grparray[i] == grp)
825  break;
826  }
827  if (i == ioo->ioo_iomap_nr) { /* new grp */
828  M0_ASSERT_INFO(i < grparray_sz,
829  "nr=%" PRIu64 " size=%"PRIu64,
830  i , grparray_sz);
831  grparray[i] = grp;
832  ++ioo->ioo_iomap_nr;
833  }
834  }
835  }
836  m0_free(grparray);
837  return M0_RC(0);
838 }
839 
840 static void set_paritybuf_type(struct m0_op_io *ioo)
841 {
842 
843  struct m0_pdclust_layout *play = pdlayout_get(ioo);
844  struct m0_op *op = &ioo->ioo_oo.oo_oc.oc_op;
845  struct m0_client *cinst = m0__op_instance(op);
846 
849  ioo->ioo_pbuf_type = M0_PBUF_DIR;
850  else if (m0__is_update_op(op) && m0_pdclust_is_replicated(play))
851  ioo->ioo_pbuf_type = M0_PBUF_IND;
852  else
854 }
855 
863 static int ioreq_iomaps_prepare(struct m0_op_io *ioo)
864 {
865  bool bufvec = true;
866  int rc;
867  uint64_t i;
868  struct pargrp_iomap *iomap;
869  struct m0_pdclust_layout *play;
870  struct m0_ivec_cursor cursor;
871  struct m0_bufvec_cursor buf_cursor;
872 
873  M0_ENTRY("op_io = %p", ioo);
874 
875  M0_PRE(ioo != NULL);
876 
877  set_paritybuf_type(ioo);
878 
880  if (rc != 0)
881  return M0_RC(rc);
882 
883  if (ioo->ioo_oo.oo_oc.oc_op.op_code == M0_OC_FREE)
884  bufvec = false;
885 
886  play = pdlayout_get(ioo);
887 
888  M0_LOG(M0_DEBUG, "ioo=%p spanned_groups=%"PRIu64
889  " [N,K,us]=[%d,%d,%" PRIu64 "]",
890  ioo, ioo->ioo_iomap_nr, layout_n(play),
891  layout_k(play), layout_unit_size(play));
892 
893  /* ioo->ioo_iomaps is zeroed out on allocation. */
895  if (ioo->ioo_iomaps == NULL) {
896  rc = -ENOMEM;
897  goto failed;
898  }
899 
900  m0_ivec_cursor_init(&cursor, &ioo->ioo_ext);
901  if (bufvec)
902  m0_bufvec_cursor_init(&buf_cursor, &ioo->ioo_data);
903  /*
904  * cursor is advanced maximum by parity group size in one iteration
905  * of this loop.
906  * This is done by pargrp_iomap::pi_ops::pi_populate().
907  */
908  for (i = 0; !m0_ivec_cursor_move(&cursor, 0); ++i) {
909  M0_ASSERT(i < ioo->ioo_iomap_nr);
910  M0_ASSERT(ioo->ioo_iomaps[i] == NULL);
911  M0_ALLOC_PTR(ioo->ioo_iomaps[i]);
912  if (ioo->ioo_iomaps[i] == NULL) {
913  rc = -ENOMEM;
914  goto failed;
915  }
916  iomap = ioo->ioo_iomaps[i];
917 
918  rc = pargrp_iomap_init(iomap, ioo,
920  data_size(play)));
921  if (rc != 0) {
922  m0_free0(&ioo->ioo_iomaps[i]);
923  goto failed;
924  }
925 
926  /* @cursor is advanced in the following function */
927  rc = iomap->pi_ops->pi_populate(iomap, &cursor,
928  bufvec ? &buf_cursor : NULL);
929  if (rc != 0)
930  goto failed;
931  M0_LOG(M0_INFO, "iomap_id=%" PRIu64 " is populated",
932  iomap->pi_grpid);
933  }
934 
935  return M0_RC(0);
936 failed:
937  if (ioo->ioo_iomaps != NULL)
939 
940  return M0_ERR(rc);
941 }
942 
955 static uint64_t data_buf_copy(struct data_buf *data,
956  struct m0_bufvec_cursor *app_datacur,
957  enum copy_direction dir)
958 {
959  void *app_data;
960  uint32_t app_data_len;
961  uint64_t copied = 0;
962  uint64_t bytes;
963 
964  M0_ENTRY();
965 
966  M0_PRE(data != NULL);
967  M0_PRE(app_datacur != NULL);
970 
971  bytes = data->db_buf.b_nob;
972  while (bytes > 0) {
973  app_data = m0_bufvec_cursor_addr(app_datacur);
974  app_data_len = m0_bufvec_cursor_step(app_datacur);
975 
976  /* Don't copy more bytes than we were supposed to */
977  app_data_len = (app_data_len < bytes)?app_data_len:bytes;
978 
979  if (app_data == NULL)
980  break;
981 
982  /* app_data == data->db_buf.b_addr implies zero copy */
983  if (app_data != data->db_buf.b_addr) {
984  if (dir == CD_COPY_FROM_APP)
985  memcpy((char*)data->db_buf.b_addr +
986  copied, app_data, app_data_len);
987  else
988  memcpy(app_data,
989  (char*)data->db_buf.b_addr +
990  copied, app_data_len);
991  }
992 
993  bytes -= app_data_len;
994  copied += app_data_len;
995 
996  if (m0_bufvec_cursor_move(app_datacur, app_data_len))
997  break;
998  }
999 
1000  M0_LEAVE();
1001  return copied;
1002 }
1003 
1018  struct m0_obj *obj,
1020  m0_bindex_t end,
1021  struct m0_bufvec_cursor *datacur,
1022  enum copy_direction dir,
1023  enum page_attr filter)
1024 {
1025  uint64_t bytes;
1026  uint32_t row = 0;
1027  uint32_t col = 0;
1028  uint32_t m_col;
1029  struct data_buf *data;
1030  struct m0_pdclust_layout *play;
1031  struct m0_key_val *key_val;
1032  m0_bindex_t mask;
1033  m0_bindex_t grp_size;
1034 
1035  M0_ENTRY("Copy %s application, start = %8" PRIu64 ", end = %8"PRIu64,
1036  dir == CD_COPY_FROM_APP ? (char *)"from" : (char *)" to ",
1037  start, end);
1038 
1040  M0_PRE(map != NULL);
1041  M0_PRE(obj != NULL);
1042  M0_PRE(datacur != NULL);
1043  /* XXX: get rid of obj from the parameters */
1044  M0_PRE(map->pi_ioo->ioo_obj == obj);
1046  M0_PRE(end > start);
1047  /* start/end are in the same object block */
1048  M0_PRE(start >> obj->ob_attr.oa_bshift ==
1049  (end - 1) >> obj->ob_attr.oa_bshift);
1050  M0_PRE(datacur != NULL);
1051 
1052  play = pdlayout_get(map->pi_ioo);
1053  grp_size = data_size(play) * map->pi_grpid;
1054  /* Finds out the page from pargrp_iomap::pi_databufs. */
1055  page_pos_get(map, start, grp_size, &row, &col);
1057  if (play->pl_attr.pa_K == 0 ||
1058  m0_key_val_is_null(&map->pi_databufs[row][col]->db_maj_ele))
1059  data = map->pi_databufs[row][col];
1060  else {
1061  key_val = &map->pi_databufs[row][col]->db_maj_ele;
1062  m_col = *(uint32_t *)(key_val->kv_key.b_addr);
1063  if (m0_pdclust_unit_classify(play, m_col) == M0_PUT_DATA) {
1064  M0_ASSERT(m_col == 0);
1065  data = map->pi_databufs[row][m_col];
1066  } else if (m0_pdclust_unit_classify(play, m_col) ==
1067  M0_PUT_PARITY)
1068  data = map->pi_paritybufs[row][m_col - 1];
1069  else
1070  /* No way of getting spares. */
1071  M0_IMPOSSIBLE();
1072  }
1073  M0_ASSERT(data != NULL);
1074  mask = ~SHIFT2MASK(obj->ob_attr.oa_bshift);
1075 
1076  /* Client only supports whole block operations */
1077  M0_ASSERT(end - start == data->db_buf.b_nob);
1078 
1079  if (dir == CD_COPY_FROM_APP) {
1080  if ((data->db_flags & filter) == filter) {
1081  if (data->db_flags & PA_COPY_FRMUSR_DONE) {
1082  m0_bufvec_cursor_move(datacur, end - start);
1083  return M0_RC(0);
1084  }
1085 
1086  /*
1087  * Note: data has been read into auxiliary buffer
1088  * directly for READOLD method.
1089  */
1090  if (data->db_auxbuf.b_addr != NULL &&
1091  map->pi_rtype == PIR_READOLD) {
1092  if (filter != 0) {
1094  datacur, end - start);
1095  return M0_RC(0);
1096  }
1097  }
1098 
1099  /* Copies to appropriate offset within page. */
1100  bytes = data_buf_copy(data, datacur, dir);
1101  M0_LOG(M0_DEBUG, "%"PRIu64
1102  " bytes copied from application "
1103  "from offset %"PRIu64, bytes, start);
1104  map->pi_ioo->ioo_copied_nr += bytes;
1105 
1106  /*
1107  * application_data_copy() may be called to handle
1108  * only part of PA_FULLPAGE_MODIFY page.
1109  * In this case we should mark the page as done only
1110  * when the last piece is processed.
1111  * Otherwise, the rest piece of the page
1112  * will be ignored.
1113  */
1114  if (ergo(data->db_flags & PA_FULLPAGE_MODIFY,
1115  (end & mask) == 0))
1116  data->db_flags |= PA_COPY_FRMUSR_DONE;
1117 
1118  if (bytes != end - start)
1119  return M0_ERR_INFO(
1120  -EFAULT, "[%p] Failed to"
1121  " copy_from_user: %" PRIu64 " !="
1122  " %" PRIu64 " - %" PRIu64,
1123  map->pi_ioo, bytes, end, start);
1124  }
1125  } else {
1126  bytes = data_buf_copy(data, datacur, dir);
1127 
1128  map->pi_ioo->ioo_copied_nr += end - start - bytes;
1129 
1130  M0_LOG(M0_DEBUG, "%"PRIu64
1131  " bytes copied to application from offset " "%"PRIu64,
1132  bytes, start);
1133 
1134  if (bytes != end - start)
1135  return M0_ERR(-EFAULT);
1136  }
1137 
1138  return M0_RC(0);
1139 }
1140 
1141 /* This function calculates and verify checksum for data read.
1142  * It divides the data in multiple units and call the client api
1143  * to verify checksum for each data unit.
1144  */
1145 static bool verify_checksum(struct m0_op_io *ioo)
1146 {
1147  struct m0_pi_seed seed;
1148  struct m0_bufvec user_data = {};
1149  int usz;
1150  int rc;
1151  int count;
1152  int i;
1153  struct m0_generic_pi *pi_ondisk;
1154  struct m0_bufvec_cursor datacur;
1155  struct m0_bufvec_cursor tmp_datacur;
1156  struct m0_ivec_cursor extcur;
1157  uint32_t nr_seg;
1158  int attr_idx = 0;
1159  m0_bcount_t bytes;
1160 
1161  M0_ENTRY();
1163  m0__obj_lid(ioo->ioo_obj));
1164 
1165  m0_bufvec_cursor_init(&datacur, &ioo->ioo_data);
1166  m0_bufvec_cursor_init(&tmp_datacur, &ioo->ioo_data);
1167  m0_ivec_cursor_init(&extcur, &ioo->ioo_ext);
1168 
1169  while ( !m0_bufvec_cursor_move(&datacur, 0) &&
1170  !m0_ivec_cursor_move(&extcur, 0) &&
1171  attr_idx < ioo->ioo_attr.ov_vec.v_nr){
1172 
1173  /* calculate number of segments required for 1 data unit */
1174  nr_seg = 0;
1175  count = usz;
1176  while (count > 0) {
1177  nr_seg++;
1178  bytes = m0_bufvec_cursor_step(&tmp_datacur);
1179  if (bytes < count) {
1180  m0_bufvec_cursor_move(&tmp_datacur, bytes);
1181  count -= bytes;
1182  }
1183  else {
1184  m0_bufvec_cursor_move(&tmp_datacur, count);
1185  count = 0;
1186  }
1187  }
1188 
1189  /* allocate an empty buf vec */
1190  rc = m0_bufvec_empty_alloc(&user_data, nr_seg);
1191  if (rc != 0) {
1192  M0_LOG(M0_ERROR, "buffer allocation failed, rc %d", rc);
1193  return false;
1194  }
1195 
1196  /* populate the empty buf vec with data pointers
1197  * and create 1 data unit worth of buf vec
1198  */
1199  i = 0;
1200  count = usz;
1201  while (count > 0) {
1202  bytes = m0_bufvec_cursor_step(&datacur);
1203  if (bytes < count) {
1204  user_data.ov_vec.v_count[i] = bytes;
1205  user_data.ov_buf[i] = m0_bufvec_cursor_addr(&datacur);
1206  m0_bufvec_cursor_move(&datacur, bytes);
1207  count -= bytes;
1208  }
1209  else {
1211  user_data.ov_buf[i] = m0_bufvec_cursor_addr(&datacur);
1212  m0_bufvec_cursor_move(&datacur, count);
1213  count = 0;
1214  }
1215  i++;
1216  }
1217 
1218  if (ioo->ioo_attr.ov_vec.v_nr && ioo->ioo_attr.ov_vec.v_count[attr_idx] != 0) {
1219 
1220  seed.pis_data_unit_offset = m0_ivec_cursor_index(&extcur);
1221  seed.pis_obj_id.f_container = ioo->ioo_obj->ob_entity.en_id.u_hi;
1222  seed.pis_obj_id.f_key = ioo->ioo_obj->ob_entity.en_id.u_lo;
1223 
1224  pi_ondisk = (struct m0_generic_pi *)ioo->ioo_attr.ov_buf[attr_idx];
1225 
1226  if (!m0_calc_verify_cksum_one_unit(pi_ondisk, &seed, &user_data)) {
1227  return false;
1228  }
1229  }
1230 
1231  attr_idx++;
1232  m0_ivec_cursor_move(&extcur, usz);
1233 
1235  }
1236 
1237  if (m0_bufvec_cursor_move(&datacur, 0) &&
1238  m0_ivec_cursor_move(&extcur, 0) &&
1239  attr_idx == ioo->ioo_attr.ov_vec.v_nr) {
1240  return true;
1241  }
1242  else {
1243  /* something wrong, we terminated early */
1244  M0_IMPOSSIBLE("something wrong while arranging data");
1245  }
1246 }
1247 
1258 static int ioreq_application_data_copy(struct m0_op_io *ioo,
1259  enum copy_direction dir,
1260  enum page_attr filter)
1261 {
1262  int rc;
1263  uint64_t i;
1264  m0_bindex_t grpstart;
1265  m0_bindex_t grpend;
1266  m0_bindex_t pgstart;
1267  m0_bindex_t pgend;
1269  struct m0_bufvec_cursor appdatacur;
1270  struct m0_ivec_cursor extcur;
1271  struct m0_pdclust_layout *play;
1272 
1273  M0_ENTRY("op_io : %p, %s application. filter = 0x%x", ioo,
1274  dir == CD_COPY_FROM_APP ? (char *)"from" : (char *)"to",
1275  filter);
1276 
1279 
1280  m0_bufvec_cursor_init(&appdatacur, &ioo->ioo_data);
1281  m0_ivec_cursor_init(&extcur, &ioo->ioo_ext);
1282 
1283  play = pdlayout_get(ioo);
1284 
1285  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1287 
1288  count = 0;
1289  grpstart = data_size(play) * ioo->ioo_iomaps[i]->pi_grpid;
1290  grpend = grpstart + data_size(play);
1291 
1292  while (!m0_ivec_cursor_move(&extcur, count) &&
1293  m0_ivec_cursor_index(&extcur) < grpend) {
1294 
1295  pgstart = m0_ivec_cursor_index(&extcur);
1296  pgend = min64u(m0_round_up(pgstart + 1,
1297  m0__page_size(ioo)),
1298  pgstart + m0_ivec_cursor_step(&extcur));
1299  count = pgend - pgstart;
1300 
1301  /*
1302  * This takes care of finding correct page from
1303  * current pargrp_iomap structure from pgstart
1304  * and pgend.
1305  */
1307  ioo->ioo_iomaps[i], ioo->ioo_obj,
1308  pgstart, pgend, &appdatacur, dir, filter);
1309  if (rc != 0)
1310  return M0_ERR_INFO(
1311  rc, "[%p] Copy failed (pgstart=%" PRIu64
1312  " pgend=%" PRIu64 ")",
1313  ioo, pgstart, pgend);
1314  }
1315 
1316  }
1317 
1318  if (dir == CD_COPY_TO_APP) {
1319  /* verify the checksum during data read.
1320  * skip checksum verification during degraded I/O
1321  */
1322  if (ioreq_sm_state(ioo) != IRS_DEGRADED_READING &&
1324  !verify_checksum(ioo)) {
1325  return M0_RC(-EIO);
1326  }
1327  }
1328 
1329  return M0_RC(0);
1330 }
1331 
1339 static int ioreq_parity_recalc(struct m0_op_io *ioo)
1340 {
1341  int rc = 0;
1342  uint64_t i;
1343  struct pargrp_iomap *iomap;
1344 
1345  M0_ENTRY("io_request : %p", ioo);
1347 
1349 
1350  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1351  iomap = ioo->ioo_iomaps[i];
1352  rc = iomap->pi_ops->pi_parity_recalc(iomap);
1353  if (rc != 0)
1354  break;
1355  }
1356 
1358 
1359  return rc == 0 ? M0_RC(rc) :
1360  M0_ERR_INFO(rc, "Parity recalc failed for grpid=%3"PRIu64,
1361  iomap->pi_grpid);
1362 }
1363 
1371 static int ioreq_dgmode_recover(struct m0_op_io *ioo)
1372 {
1373  struct m0_pdclust_layout *play;
1374  int rc = 0;
1375  uint64_t i;
1376  struct pargrp_iomap *iomap;
1377 
1378  M0_ENTRY();
1381 
1382  play = pdlayout_get(ioo);
1383  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1384  iomap = ioo->ioo_iomaps[i];
1385  if (iomap->pi_state == PI_DEGRADED) {
1386  if (m0_pdclust_is_replicated(play))
1387  rc = iomap->pi_ops->pi_replica_recover(iomap);
1388  else
1389  rc = iomap->pi_ops->pi_dgmode_recover(iomap);
1390  if (rc != 0)
1391  return M0_ERR(rc);
1392  }
1393  }
1394 
1395  return M0_RC(rc);
1396 }
1397 
1404 static bool is_session_marked(struct m0_op_io *ioo,
1405  struct m0_rpc_session *session)
1406 {
1407  uint64_t i;
1408  uint64_t max_failures;
1409  uint64_t session_id;
1410 
1411  session_id = session->s_session_id;
1412  max_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_CTRLS);
1413  for (i = 0; i < max_failures; ++i) {
1414  if (ioo->ioo_failed_session[i] == session_id)
1415  return M0_RC(true);
1416  else if (ioo->ioo_failed_session[i] == ~(uint64_t)0) {
1417  ioo->ioo_failed_session[i] = session_id;
1418  return M0_RC(false);
1419  }
1420  }
1421  return M0_RC(false);
1422 }
1423 
1429 static bool is_node_marked(struct m0_op_io *ioo,
1430  uint64_t node_id)
1431 {
1432  uint64_t i;
1433  uint64_t max_failures;
1434 
1435  max_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_ENCLS);
1436  for (i = 0; i < max_failures; ++i) {
1437  if (ioo->ioo_failed_nodes[i] == node_id)
1438  return M0_RC(true);
1439  else if (ioo->ioo_failed_nodes[i] == ~(uint64_t)0) {
1440  ioo->ioo_failed_nodes[i] = node_id;
1441  return M0_RC(false);
1442  }
1443  }
1444  return M0_RC(false);
1445 }
1446 
1455 static int device_check(struct m0_op_io *ioo)
1456 {
1457  int rc = 0;
1458  uint32_t fdev_nr = 0;
1459  uint32_t fsvc_nr = 0;
1460  uint32_t fnode_nr = 0;
1461  uint64_t max_svc_failures;
1462  uint64_t max_node_failures;
1463  uint64_t node_id;
1464  enum m0_pool_nd_state state;
1465  enum m0_pool_nd_state node_state;
1466  struct m0_poolnode *node_obj;
1467  struct target_ioreq *ti;
1468  struct m0_pdclust_layout *play;
1469  struct m0_client *instance;
1470  struct m0_poolmach *pm;
1471  struct m0_pool_version *pv;
1472 
1473  M0_ENTRY();
1474  M0_PRE(ioo != NULL);
1475  M0_PRE(M0_IN(ioreq_sm_state(ioo),
1477 
1479  play = pdlayout_get(ioo);
1480  max_svc_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_CTRLS);
1481  max_node_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_ENCLS);
1482 
1483  pv = m0_pool_version_find(&instance->m0c_pools_common, &ioo->ioo_pver);
1484  M0_ASSERT(pv != NULL);
1485  pm = &pv->pv_mach;
1486 
1487  m0_htable_for (tioreqht, ti, &ioo->ioo_nwxfer.nxr_tioreqs_hash) {
1488  rc = m0_poolmach_device_state(pm, ti->ti_obj, &state);
1489  if (rc != 0)
1490  return M0_ERR(rc);
1491 
1492  rc = m0_poolmach_device_node_return(pm, ti->ti_obj, &node_obj);
1493  if (rc != 0)
1494  return M0_ERR(rc);
1495 
1497  node_state = node_obj->pn_state;
1499 
1500  node_id = node_obj->pn_id.f_key;
1501 
1502  ti->ti_state = state;
1503  if (ti->ti_rc == -ECANCELED) {
1504  /* Ignore service failures in a failed node */
1505  if (M0_IN(node_state, (M0_PNDS_FAILED,
1506  M0_PNDS_OFFLINE))) {
1507  if (!is_node_marked(ioo, node_id))
1508  M0_CNT_INC(fnode_nr);
1509  is_session_marked(ioo, ti->ti_session);
1510  } else if (!is_session_marked(ioo, ti->ti_session)) {
1511  M0_CNT_INC(fsvc_nr);
1512  }
1513  } else if (M0_IN(state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE,
1515  !is_session_marked(ioo, ti->ti_session)) {
1516  /*
1517  * The case when multiple devices under the same service
1518  * are unavailable.
1519  */
1520  M0_CNT_INC(fdev_nr);
1521  }
1522 
1523  } m0_htable_endfor;
1524 
1525  M0_LOG(M0_DEBUG, "failed devices = %d\ttolerance=%d", (int)fdev_nr,
1526  (int)layout_k(play));
1527  M0_LOG(M0_DEBUG, "failed services = %d\ttolerance=%d", (int)fsvc_nr,
1528  (int)max_svc_failures);
1529  M0_LOG(M0_DEBUG, "failed nodes = %d\ttolerance=%d", (int)fnode_nr,
1530  (int)max_node_failures);
1531 
1532  if (is_pver_dud(fdev_nr, layout_k(play), fsvc_nr, max_svc_failures,
1533  fnode_nr, max_node_failures))
1534  return M0_ERR_INFO(-EIO, "[%p] Failed to recover data "
1535  "since number of failed data units "
1536  "(%lu) exceeds number of parity "
1537  "units in parity group (%lu) OR "
1538  "number of failed services (%lu) "
1539  "exceeds number of max failures "
1540  "supported (%lu) OR "
1541  "number of failed nodes (%lu) "
1542  "exceeds number of max node failures "
1543  "supported (%lu)",
1544  ioo, (unsigned long)fdev_nr,
1545  (unsigned long)layout_k(play),
1546  (unsigned long)fsvc_nr,
1547  (unsigned long)max_svc_failures,
1548  (unsigned long)fnode_nr,
1549  (unsigned long)max_node_failures);
1550  return M0_RC(fdev_nr);
1551 }
1552 
1561 static int ioreq_dgmode_read(struct m0_op_io *ioo, bool rmw)
1562 {
1563  int rc = 0;
1564  uint64_t i;
1565  struct nw_xfer_request *xfer;
1566  struct pargrp_iomap *iomap;
1567  struct ioreq_fop *irfop;
1568  struct target_ioreq *ti;
1569  enum m0_pool_nd_state state;
1570  struct m0_poolmach *pm;
1571 
1572  M0_ENTRY();
1574 
1575  /*
1576  * Note: If devices are in the state of M0_PNDS_SNS_REPARED, the op
1577  * 'ioo' switchs back to IRS_READING state (see the code below
1578  * ['else' part of 'ir_dgmap_nr > 0'] and comments in dgmode_process).
1579  * How to tell if an op is doing normal or degraded io so that to avoid
1580  * multiple entries of (or a loop) ioreq_dgmode_read? A flag
1581  * 'ioo_dgmode_io_sent' is used here!
1582  */
1583  if (ioo->ioo_dgmode_io_sent == true) {
1584  /*
1585  * Recovers lost data using parity recovery algorithms
1586  * only if one or more devices were in FAILED, OFFLINE,
1587  * REPAIRING state.
1588  */
1589  if (ioo->ioo_dgmap_nr > 0)
1590  rc = ioo->ioo_ops->iro_dgmode_recover(ioo);
1591 
1592  return M0_RC(rc);
1593  }
1594  /*
1595  * If all devices are ONLINE, all requests return success.
1596  * In case of read before write, due to CROW, COB will not be present,
1597  * resulting into ENOENT error.
1598  */
1599  xfer = &ioo->ioo_nwxfer;
1600  if (xfer->nxr_rc == 0 || xfer->nxr_rc == -ENOENT)
1601  return M0_RC(xfer->nxr_rc);
1602 
1603  /*
1604  * Number of failed devices is not a criteria good enough
1605  * by itself. Even if one/more devices failed but IO request
1606  * could complete if IO request did not send any pages to
1607  * failed device(s) at all.
1608  */
1609  rc = device_check(ioo);
1610  if (rc < 0)
1611  return M0_RC(rc);
1612 
1613  pm = ioo_to_poolmach(ioo);
1614  M0_ASSERT(pm != NULL);
1615 
1616  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1617  /*
1618  * Data was retrieved successfully, so no need to check the
1619  * state of the device.
1620  */
1621  if (ti->ti_rc == 0)
1622  continue;
1623 
1624  /* state is already queried in device_check() and stored
1625  * in ti->ti_state. Why do we do this again?
1626  */
1628  pm, ti->ti_obj, &state);
1629  if (rc != 0)
1630  return M0_ERR(rc);
1631  M0_LOG(M0_INFO, "device state for "FID_F" is %d",
1632  FID_P(&ti->ti_fid), state);
1633  ti->ti_state = state;
1634 
1635  if (!M0_IN(state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE,
1638  continue;
1639  /*
1640  * Finds out parity groups for which read IO failed and marks
1641  * them as DEGRADED. This is necessary since read IO request
1642  * could be reading only a part of a parity group but if it
1643  * failed, rest of the parity group also needs to be read
1644  * (subject to file size) in order to re-generate lost data.
1645  */
1646  m0_tl_for (iofops, &ti->ti_iofops, irfop) {
1647  rc = ioreq_fop_dgmode_read(irfop);
1648  if (rc != 0)
1649  break;
1650  } m0_tl_endfor;
1651  } m0_htable_endfor;
1652 
1653  if (rc != 0)
1654  return M0_ERR_INFO(rc, "[%p] dgmode failed", ioo);
1655 
1656  M0_LOG(M0_DEBUG, "[%p] dgmap_nr=%u is in dgmode",
1657  ioo, ioo->ioo_dgmap_nr);
1658  /*
1659  * Starts processing the pages again if any of the parity groups
1660  * spanned by input IO-request is in degraded mode.
1661  */
1662  if (ioo->ioo_dgmap_nr > 0) {
1663  M0_LOG(M0_WARN, "Process failed parity groups in dgmode/read "
1664  "ioo=%p dgmap_nr=%u",
1665  ioo, ioo->ioo_dgmap_nr);
1666  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE)
1668  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1669  iomap = ioo->ioo_iomaps[i];
1670  rc = iomap->pi_ops->pi_dgmode_postprocess(iomap);
1671  if (rc != 0)
1672  break;
1673  }
1674  } else {
1677  /*
1678  * By this time, the page count in target_ioreq::ti_ivec and
1679  * target_ioreq::ti_bufvec is greater than 1, but it is
1680  * invalid since the distribution[Sining: layout] is about to
1681  * change.
1682  * Ergo, page counts in index and buffer vectors are reset.
1683  */
1684 
1685  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1686  ti->ti_ivec.iv_vec.v_nr = 0;
1687  } m0_htable_endfor;
1688  }
1689 
1690  xfer->nxr_ops->nxo_complete(xfer, rmw);
1691 
1692  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1693  ti->ti_databytes = 0;
1694  ti->ti_parbytes = 0;
1695  ti->ti_rc = 0;
1696  } m0_htable_endfor;
1697 
1698  /* Resets the status code before starting degraded mode read IO. */
1699  ioo->ioo_rc = xfer->nxr_rc = 0;
1700 
1701  rc = xfer->nxr_ops->nxo_distribute(xfer);
1702  if (rc != 0)
1703  return M0_ERR(rc);
1704 
1705  rc = xfer->nxr_ops->nxo_dispatch(xfer);
1706  if (rc != 0)
1707  return M0_ERR(rc);
1708  ioo->ioo_dgmode_io_sent = true;
1709 
1710  return M0_RC(rc);
1711 }
1712 
1721 static int ioreq_dgmode_write(struct m0_op_io *ioo, bool rmw)
1722 {
1723  int rc;
1724  struct target_ioreq *ti;
1725  struct nw_xfer_request *xfer;
1726 
1727  M0_ENTRY();
1729 
1730  xfer = &ioo->ioo_nwxfer;
1731 
1732  /* See the comments in ioreq_dgmode_read */
1733  if (ioo->ioo_dgmode_io_sent)
1734  return M0_RC(xfer->nxr_rc);
1735 
1736  /* -E2BIG: see commit 52c1072141d*/
1737  if (M0_IN(xfer->nxr_rc, (0, -E2BIG)))
1738  return M0_RC(xfer->nxr_rc);
1739 
1740  rc = device_check(ioo);
1741  if (rc < 0)
1742  return M0_RC(rc);
1743 
1744  /*
1745  * This IO request has already acquired distributed lock on the
1746  * file by this time.
1747  * Degraded mode write needs to handle 2 prime use-cases.
1748  * 1. SNS repair still to start on associated global fid.
1749  * 2. SNS repair has completed for associated global fid.
1750  * Both use-cases imply unavailability of one or more devices.
1751  *
1752  * In first use-case, repair is yet to start on file. Hence,
1753  * rest of the file data which goes on healthy devices can be
1754  * written safely.
1755  * In this case, the fops meant for failed device(s) will be simply
1756  * dropped and rest of the fops will be sent to respective ioservice
1757  * instances for writing data to servers.
1758  * Later when this IO request relinquishes the distributed lock on
1759  * associated global fid and SNS repair starts on the file, the lost
1760  * data will be regenerated using parity recovery algorithms.
1761  *
1762  * The second use-case implies completion of SNS repair for associated
1763  * global fid and the lost data is regenerated on distributed spare
1764  * units.
1765  * Ergo, all the file data meant for lost device(s) will be redirected
1766  * towards corresponding spare unit(s). Later when SNS rebalance phase
1767  * commences, it will migrate the data from spare to a new device, thus
1768  * making spare available for recovery again.
1769  * In this case, old fops will be discarded and all pages spanned by
1770  * IO request will be reshuffled by redirecting pages meant for
1771  * failed device(s) to its corresponding spare unit(s).
1772  */
1774 
1775  /*
1776  * Finalizes current fops which are not valid anymore.
1777  * Fops need to be finalized in either case since old network buffers
1778  * from IO fops are still enqueued in transfer machine and removal
1779  * of these buffers would lead to finalization of rpc bulk object.
1780  */
1781  xfer->nxr_ops->nxo_complete(xfer, rmw);
1782 
1783  /*
1784  * Resets count of data bytes and parity bytes along with
1785  * return status.
1786  * Fops meant for failed devices are dropped in
1787  * nw_xfer_req_dispatch().
1788  */
1789  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1790  ti->ti_databytes = 0;
1791  ti->ti_parbytes = 0;
1792  ti->ti_rc = 0;
1793  ti->ti_req_type = TI_NONE;
1794  } m0_htable_endfor;
1795 
1796  /*
1797  * Redistributes all pages by routing pages for failed devices
1798  * to spare units for each parity group.
1799  */
1800  rc = xfer->nxr_ops->nxo_distribute(xfer);
1801  if (rc != 0)
1802  return M0_ERR_INFO(rc, "Failed to prepare dgmode write fops");
1803 
1804  xfer->nxr_rc = 0;
1805  ioo->ioo_rc = 0;
1806 
1807  rc = xfer->nxr_ops->nxo_dispatch(xfer);
1808  if (rc != 0)
1809  return M0_ERR_INFO(rc, "Failed to dispatch degraded mode"
1810  "write IO fops");
1811 
1812  ioo->ioo_dgmode_io_sent = true;
1813 
1814  return M0_RC(xfer->nxr_rc);
1815 }
1816 
1817 static int ioreq_parity_verify(struct m0_op_io *ioo)
1818 {
1819  struct pargrp_iomap *iomap = NULL;
1820  struct m0_pdclust_layout *play;
1821  struct m0_client *instance;
1822  struct m0_op *op;
1823  int rc = 0;
1824  uint64_t i;
1825 
1826  M0_ENTRY("m0_op_io : %p", ioo);
1828 
1829  op = &ioo->ioo_oo.oo_oc.oc_op;
1831  play = pdlayout_get(ioo);
1832 
1833  if (op->op_code != M0_OC_READ ||
1834  !instance->m0c_config->mc_is_read_verify)
1835  return M0_RC(0);
1836 
1838 
1839  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1840  iomap = ioo->ioo_iomaps[i];
1841  if (iomap->pi_state == PI_DEGRADED) {
1842  /* data is recovered from existing data and parity.
1843  * It's meaningless to do parity verification */
1844  continue;
1845  }
1846  if (m0_pdclust_is_replicated(play))
1847  rc = iomap->pi_ops->pi_parity_replica_verify(iomap);
1848  else
1849  rc = iomap->pi_ops->pi_parity_verify(iomap);
1850  if (rc != 0)
1851  break;
1852  }
1853 
1855  return rc != 0 ? M0_ERR_INFO(rc, "Parity verification failed for "
1856  "grpid=%"PRIu64,
1857  iomap->pi_grpid) : M0_RC(rc);
1858 }
1859 /* XXX (Sining): should we rename ioreq_xxx to ioo_xxx?*/
1860 const struct m0_op_io_ops ioo_ops = {
1862  .iro_iomaps_destroy = ioreq_iomaps_destroy,
1863  .iro_application_data_copy = ioreq_application_data_copy,
1864  .iro_parity_recalc = ioreq_parity_recalc,
1865  .iro_parity_verify = ioreq_parity_verify,
1866  .iro_iosm_handle_launch = ioreq_iosm_handle_launch,
1867  .iro_iosm_handle_executed = ioreq_iosm_handle_executed,
1868  .iro_dgmode_read = ioreq_dgmode_read,
1869  .iro_dgmode_write = ioreq_dgmode_write,
1870  .iro_dgmode_recover = ioreq_dgmode_recover,
1871 };
1872 
1873 #undef M0_TRACE_SUBSYSTEM
1874 
1875 /*
1876  * Local variables:
1877  * c-indentation-style: "K&R"
1878 
1879  * c-basic-offset: 8
1880  * tab-width: 8
1881  * fill-column: 80
1882  * scroll-step: 1
1883  * End:
1884  */
1885 /*
1886  * vim: tabstop=8 shiftwidth=8 noexpandtab textwidth=80 nowrap
1887  */
int(* iro_dgmode_read)(struct m0_op_io *ioo, bool rmw)
Definition: pg.h:634
M0_INTERNAL void m0_ivec_cursor_init(struct m0_ivec_cursor *cur, const struct m0_indexvec *ivec)
Definition: vec.c:707
uint64_t ioo_copied_nr
#define M0_PRE(cond)
static void nw_xfer_device_state_reset(struct nw_xfer_request *xfer)
Definition: io_req.c:265
M0_INTERNAL bool m0_pdclust_is_replicated(struct m0_pdclust_layout *play)
Definition: pdclust.c:829
#define M0_ALLOC_ARR(arr, nr)
Definition: memory.h:84
M0_INTERNAL void m0_sm_fail(struct m0_sm *mach, int fail_state, int32_t rc)
Definition: sm.c:468
int(* iro_parity_recalc)(struct m0_op_io *ioo)
Definition: pg.h:595
Definition: client.h:788
#define m0_htable_for(name, var, htable)
Definition: hash.h:483
static m0_bcount_t seg_endpos(const struct m0_indexvec *ivec, uint32_t i)
Definition: file.c:420
#define NULL
Definition: misc.h:38
map
Definition: processor.c:112
M0_INTERNAL m0_bcount_t m0_ivec_cursor_step(const struct m0_ivec_cursor *cur)
Definition: vec.c:726
uint64_t * ioo_failed_nodes
static void pargrp_iomap_fini(struct pargrp_iomap *map)
Definition: file.c:1881
#define ergo(a, b)
Definition: misc.h:293
void(* sa_cb)(struct m0_sm_group *grp, struct m0_sm_ast *)
Definition: sm.h:506
static uint64_t data_buf_copy(struct data_buf *data, struct m0_bufvec_cursor *app_datacur, enum copy_direction dir)
Definition: io_req.c:955
Definition: sm.h:350
void * b_addr
Definition: buf.h:39
static void ioreq_iosm_handle_executed(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: io_req.c:451
M0_INTERNAL struct m0_pool_version * m0_pool_version_find(struct m0_pools_common *pc, const struct m0_fid *id)
Definition: pool.c:586
int(* iro_parity_verify)(struct m0_op_io *ioo)
Definition: pg.h:605
static struct m0_sm_group * grp
Definition: bytecount.c:38
struct m0_pool_version * pv
Definition: dir.c:629
struct m0_poolmach pv_mach
Definition: pool.h:133
#define M0_LOG(level,...)
Definition: trace.h:167
struct m0_bufvec * user_data
M0_LEAVE()
const struct m0_op_io_ops * ioo_ops
M0_INTERNAL void m0_sm_ast_post(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: sm.c:135
static uint32_t layout_k(const struct m0_pdclust_layout *play)
Definition: file.c:520
struct m0_sm_group * oo_sm_grp
M0_INTERNAL int ioreq_fop_dgmode_read(struct ioreq_fop *irfop)
Definition: io_req_fop.c:745
uint32_t pa_K
Definition: pdclust.h:107
int(* iro_dgmode_write)(struct m0_op_io *ioo, bool rmw)
Definition: pg.h:655
struct m0_vec ov_vec
Definition: vec.h:147
void(* nxo_complete)(struct nw_xfer_request *xfer, bool rmw)
struct m0_bufvec data
Definition: di.c:40
M0_BOB_DEFINE(M0_INTERNAL, &ioo_bobtype, m0_op_io)
static bool is_node_marked(struct m0_op_io *ioo, uint64_t node_id)
Definition: io_req.c:1429
int(* pi_dgmode_recover)(struct pargrp_iomap *map)
struct m0_sm_conf io_sm_conf
Definition: io_req.c:141
enum target_ioreq_type ti_req_type
struct m0_indexvec_varr ti_bufvec
struct m0_op oc_op
int(* pi_parity_replica_verify)(struct pargrp_iomap *map)
Definition: pg.h:509
uint64_t m0_bindex_t
Definition: types.h:80
uint64_t ti_obj
struct m0_varr ti_pageattrs
#define M0_BITS(...)
Definition: misc.h:236
M0_INTERNAL void * m0_bufvec_cursor_addr(struct m0_bufvec_cursor *cur)
Definition: vec.c:597
uint64_t m0_bcount_t
Definition: types.h:77
Definition: sm.h:504
M0_INTERNAL int m0_poolmach_device_state(struct m0_poolmach *pm, uint32_t device_index, enum m0_pool_nd_state *state_out)
Definition: pool_machine.c:816
static struct m0_rpc_session session
Definition: formation2.c:38
M0_INTERNAL bool m0__is_update_op(struct m0_op *op)
Definition: utils.c:290
struct m0_pdclust_attr pl_attr
Definition: pdclust.h:150
void ** ov_buf
Definition: vec.h:149
static struct foo * obj
Definition: tlist.c:302
const char * bt_name
Definition: bob.h:73
M0_INTERNAL uint64_t m0__obj_lid(struct m0_obj *obj)
Definition: obj.c:126
static m0_bcount_t count
Definition: xcode.c:167
M0_INTERNAL uint64_t m0_round_up(uint64_t val, uint64_t size)
Definition: misc.c:181
struct m0_sm ioo_sm
#define SEG_NR(ivec)
Definition: file.c:393
enum m0_pool_nd_state ti_state
#define m0_tl_endfor
Definition: tlist.h:700
int(* iro_application_data_copy)(struct m0_op_io *ioo, enum copy_direction dir, enum page_attr filter)
Definition: pg.h:583
struct m0_vec iv_vec
Definition: vec.h:139
return M0_RC(rc)
op
Definition: libdemo.c:64
unsigned int op_code
Definition: client.h:650
#define M0_ASSERT_EX(cond)
struct m0_semaphore cpus_sem
Definition: io_req.c:45
int(* pi_replica_recover)(struct pargrp_iomap *map)
Definition: pg.h:556
#define M0_ENTRY(...)
Definition: trace.h:170
static struct m0_sm_state_descr io_states[]
Definition: io_req.c:48
static struct m0_sm_ast ast[NR]
Definition: locality.c:44
M0_INTERNAL void m0_sm_group_unlock(struct m0_sm_group *grp)
Definition: sm.c:96
void(* iro_iosm_handle_executed)(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: pg.h:622
M0_INTERNAL bool m0_bufvec_cursor_move(struct m0_bufvec_cursor *cur, m0_bcount_t count)
Definition: vec.c:574
Definition: filter.py:1
static int ioreq_parity_recalc(struct m0_op_io *ioo)
Definition: io_req.c:1339
int m0_obj_layout_id_to_unit_size(uint64_t layout_id)
Definition: obj.c:851
uint64_t * ioo_failed_session
int i
Definition: dir.c:1033
bool ioo_dgmode_io_sent
static int device_check(struct m0_op_io *ioo)
Definition: io_req.c:1455
#define PRIu64
Definition: types.h:58
M0_INTERNAL bool m0__obj_is_parity_verify_mode(struct m0_client *instance)
Definition: io.c:655
Definition: client.h:641
static void ioreq_iosm_handle_launch(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: io_req.c:316
struct nw_xfer_request ioo_nwxfer
static int truncate_dispatch(struct m0_op_io *ioo)
Definition: io_req.c:239
#define M0_ERR_INFO(rc, fmt,...)
Definition: trace.h:215
int(* nxo_distribute)(struct nw_xfer_request *xfer)
static bool verify_checksum(struct m0_op_io *ioo)
Definition: io_req.c:1145
uint64_t ti_parbytes
return M0_ERR(-EOPNOTSUPP)
struct m0_op_obj ioo_oo
M0_INTERNAL struct m0_poolmach * ioo_to_poolmach(struct m0_op_io *ioo)
Definition: io.c:75
Definition: trace.h:482
M0_INTERNAL struct m0_client * m0__op_instance(const struct m0_op *op)
Definition: client.c:236
int(* pi_populate)(struct pargrp_iomap *iomap, struct m0_ivec_varr_cursor *cursor)
M0_INTERNAL int m0_op_stable(struct m0_op *op)
Definition: client.c:520
struct m0_indexvec ioo_ext
if(value==NULL)
Definition: dir.c:350
M0_INTERNAL m0_bcount_t m0_bufvec_cursor_step(const struct m0_bufvec_cursor *cur)
Definition: vec.c:581
enum pargrp_iomap_state pi_state
#define m0_free0(pptr)
Definition: memory.h:77
#define M0_ASSERT(cond)
const char * scf_name
Definition: sm.h:352
struct m0_fid ioo_pver
static int ioreq_dgmode_recover(struct m0_op_io *ioo)
Definition: io_req.c:1371
uint32_t ioo_dgmap_nr
void m0_sm_state_set(struct m0_sm *mach, int state)
Definition: sm.c:478
m0_pool_nd_state
Definition: pool_machine.h:57
const struct nw_xfer_ops * nxr_ops
static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr, uint32_t svc_k, uint32_t fnode_nr, uint32_t node_k)
Definition: io_req.c:168
#define bob_of(ptr, type, field, bt)
Definition: bob.h:140
struct m0_bufvec ioo_data
M0_INTERNAL void ioreq_sm_state_set_locked(struct m0_op_io *ioo, int state)
Definition: io_req.c:193
uint64_t pi_grpid
M0_INTERNAL void m0_bufvec_cursor_init(struct m0_bufvec_cursor *cur, const struct m0_bufvec *bvec)
Definition: vec.c:563
struct m0_obj * ioo_obj
uint64_t ioo_rmw_read_pages
enum m0_pbuf_type ioo_pbuf_type
const struct m0_bob_type ioo_bobtype
Definition: io_req.c:150
static bool is_session_marked(struct m0_op_io *ioo, struct m0_rpc_session *session)
Definition: io_req.c:1404
uint64_t u_hi
Definition: types.h:36
static uint64_t layout_unit_size(const struct m0_pdclust_layout *play)
Definition: file.c:525
struct m0_sm_group * sm_grp
Definition: sm.h:321
int(* pi_dgmode_postprocess)(struct pargrp_iomap *map)
struct m0_sm_ast ioo_ast
uint32_t v_nr
Definition: vec.h:51
static uint64_t tolerance_of_level(struct io_request *req, uint64_t lv)
Definition: file.c:3597
static int ioreq_dgmode_write(struct m0_op_io *ioo, bool rmw)
Definition: io_req.c:1721
int(* pi_parity_verify)(struct pargrp_iomap *map)
struct m0_htable nxr_tioreqs_hash
uint64_t ioo_map_idx
M0_INTERNAL bool m0__is_read_op(struct m0_op *op)
Definition: utils.c:296
m0_bcount_t * v_count
Definition: vec.h:53
struct m0_rpc_session * ti_session
static bool pargrp_iomap_invariant(struct pargrp_iomap *map)
Definition: file.c:1185
ioreq_state
Definition: pg.h:53
static uint64_t min64u(uint64_t a, uint64_t b)
Definition: arith.h:66
M0_INTERNAL bool m0_ivec_cursor_move(struct m0_ivec_cursor *cur, m0_bcount_t count)
Definition: vec.c:718
struct m0_tl ti_iofops
struct m0_uint128 en_id
Definition: client.h:708
struct m0_op_common oo_oc
M0_INTERNAL int m0_poolmach_device_node_return(struct m0_poolmach *pm, uint32_t device_index, struct m0_poolnode **node_out)
Definition: pool_machine.c:850
static void set_paritybuf_type(struct m0_op_io *ioo)
Definition: io_req.c:840
static void page_pos_get(struct pargrp_iomap *map, m0_bindex_t index, uint32_t *row, uint32_t *col)
Definition: file.c:725
#define FID_P(f)
Definition: fid.h:77
static int application_data_copy(struct pargrp_iomap *map, struct m0_obj *obj, m0_bindex_t start, m0_bindex_t end, struct m0_bufvec_cursor *datacur, enum copy_direction dir, enum page_attr filter)
Definition: io_req.c:1017
static uint64_t data_size(const struct m0_pdclust_layout *play)
Definition: file.c:550
M0_INTERNAL int m0_op_executed(struct m0_op *op)
Definition: client.c:500
M0_INTERNAL m0_bcount_t m0_vec_count(const struct m0_vec *vec)
Definition: vec.c:53
static uint32_t layout_n(const struct m0_pdclust_layout *play)
Definition: file.c:515
copy_direction
static struct m0_pdclust_layout * pdlayout_get(const struct io_request *req)
Definition: file.c:510
int32_t ioo_rc
uint32_t sd_flags
Definition: sm.h:378
static struct m0_client cinst
Definition: sync.c:84
uint64_t ti_databytes
M0_INTERNAL void ioreq_sm_failed_locked(struct m0_op_io *ioo, int rc)
Definition: io_req.c:212
#define M0_CNT_INC(cnt)
Definition: arith.h:226
static int ioreq_iomaps_parity_groups_cal(struct m0_op_io *ioo)
Definition: io_req.c:785
uint64_t f_key
Definition: fid.h:40
static void ioreq_iomaps_destroy(struct m0_op_io *ioo)
Definition: io_req.c:760
struct m0_fid ti_fid
M0_INTERNAL int m0_op_failed(struct m0_op *op)
Definition: client.c:548
static int pargrp_iomap_init(struct pargrp_iomap *map, struct io_request *req, uint64_t grpid)
Definition: file.c:1795
#define M0_ALLOC_PTR(ptr)
Definition: memory.h:86
struct m0_buf kv_key
Definition: misc.h:418
int(* nxo_dispatch)(struct nw_xfer_request *xfer)
M0_INTERNAL bool m0__obj_is_cksum_validation_allowed(struct m0_op_io *ioo)
Definition: io.c:665
static int ioreq_dgmode_read(struct m0_op_io *ioo, bool rmw)
Definition: io_req.c:1561
enum nw_xfer_state nxr_state
int(* iro_dgmode_recover)(struct m0_op_io *ioo)
Definition: pg.h:643
M0_INTERNAL void m0__obj_op_done(struct m0_op *op)
Definition: io.c:691
M0_INTERNAL enum m0_pdclust_unit_type m0_pdclust_unit_classify(const struct m0_pdclust_layout *pl, int unit)
Definition: pdclust.c:425
struct m0_entity ob_entity
Definition: client.h:789
struct m0_fid pn_id
Definition: pool.h:395
page_attr
static int start(struct m0_fom *fom)
Definition: trigger_fom.c:321
M0_INTERNAL m0_bindex_t m0_ivec_cursor_index(const struct m0_ivec_cursor *cur)
Definition: vec.c:733
M0_INTERNAL void m0_sm_move(struct m0_sm *mach, int32_t rc, int state)
Definition: sm.c:485
M0_INTERNAL void m0_rwlock_read_lock(struct m0_rwlock *lock)
Definition: rwlock.c:52
static int ioreq_iomaps_prepare(struct m0_op_io *ioo)
Definition: io_req.c:863
int(* iro_iomaps_prepare)(struct m0_op_io *ioo)
Definition: pg.h:567
static struct m0 instance
Definition: main.c:78
M0_INTERNAL void m0_sm_group_lock(struct m0_sm_group *grp)
Definition: sm.c:83
int(* pi_parity_recalc)(struct pargrp_iomap *map)
static uint64_t group_id(m0_bindex_t index, m0_bcount_t dtsize)
Definition: file.c:560
static struct m0_be_seg * seg
Definition: btree.c:40
uint64_t ioo_iomap_nr
static uint32_t ioreq_sm_state(const struct io_request *req)
Definition: file.c:975
#define M0_ASSERT_INFO(cond, fmt,...)
static unsigned done
Definition: storage.c:91
struct m0_rwlock pm_lock
Definition: pool_machine.h:178
struct inode * dir
Definition: dir.c:1028
M0_INTERNAL void m0_rwlock_read_unlock(struct m0_rwlock *lock)
Definition: rwlock.c:57
M0_INTERNAL void m0_semaphore_down(struct m0_semaphore *semaphore)
Definition: semaphore.c:49
const struct m0_op_io_ops ioo_ops
Definition: io_req.c:1860
#define out(...)
Definition: gen.c:41
M0_INTERNAL uint64_t m0__page_size(const struct m0_op_io *ioo)
Definition: utils.c:41
static int ioreq_application_data_copy(struct m0_op_io *ioo, enum copy_direction dir, enum page_attr filter)
Definition: io_req.c:1258
M0_INTERNAL void m0_semaphore_up(struct m0_semaphore *semaphore)
Definition: semaphore.c:65
Definition: pg.h:859
static int ioreq_parity_verify(struct m0_op_io *ioo)
Definition: io_req.c:1817
uint64_t u_lo
Definition: types.h:37
#define M0_PRE_EX(cond)
uint64_t s_session_id
Definition: session.h:309
#define m0_tl_for(name, head, obj)
Definition: tlist.h:695
void m0_free(void *data)
Definition: memory.c:146
static struct m0_sm_trans_descr ioo_trans[]
Definition: io_req.c:104
#define m0_htable_endfor
Definition: hash.h:491
bool m0_calc_verify_cksum_one_unit(struct m0_generic_pi *pi, struct m0_pi_seed *seed, struct m0_bufvec *bvec)
Definition: cksum.c:194
struct m0_bufvec ioo_attr
#define SHIFT2MASK(x)
Definition: io.h:43
static bool data_buf_invariant(const struct data_buf *db)
Definition: file.c:1110
M0_INTERNAL void m0_bufvec_free2(struct m0_bufvec *bufvec)
Definition: vec.c:401
int32_t rc
Definition: trigger_fop.h:47
#define ARRAY_SIZE(a)
Definition: misc.h:45
const struct pargrp_iomap_ops * pi_ops
struct m0_indexvec ti_ivec
Definition: pg.h:793
#define offsetof(typ, memb)
Definition: misc.h:29
M0_INTERNAL bool m0_sm_group_is_locked(const struct m0_sm_group *grp)
Definition: sm.c:107
M0_INTERNAL bool m0_key_val_is_null(struct m0_key_val *kv)
Definition: misc.c:363
struct pargrp_iomap ** ioo_iomaps
#define FID_F
Definition: fid.h:75
Definition: trace.h:478
Definition: vec.h:145
M0_INTERNAL int m0_bufvec_empty_alloc(struct m0_bufvec *bufvec, uint32_t num_segs)
Definition: vec.c:213
M0_INTERNAL bool m0_op_io_invariant(const struct m0_op_io *iop)
Definition: io.c:161
static void ioreq_sm_executed_post(struct m0_op_io *ioo)
Definition: io_req.c:225
#define M0_IMPOSSIBLE(fmt,...)
static void ioreq_ioo_reset(struct m0_op_io *ioo)
Definition: io_req.c:287