Motr
M0
|
#include "lib/errno.h"
#include "lib/memory.h"
#include "lib/trace.h"
#include "addb2/addb2.h"
#include "fol/fol.h"
#include "stob/io.h"
#include "stob/stob.h"
#include "stob/domain.h"
#include "stob/addb2.h"
Go to the source code of this file.
Macros | |
#define | M0_TRACE_SUBSYSTEM M0_TRACE_SUBSYS_STOB |
#define | SWAP_NEXT(arr, idx) |
Functions | |
static void | m0_stob_io_private_fini (struct m0_stob_io *io) |
static void | stob_io_addb2_add_and_push (uint64_t id, struct m0_stob_io *io, struct m0_stob *obj) |
static bool | stob_io_invariant (struct m0_stob_io *io, struct m0_stob *obj, enum m0_stob_io_state state) |
static void | stob_io_fill (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope, enum m0_stob_io_state state, bool count_update) |
adieu | |
Asynchronous Direct Io Extensible User interface (adieu) for storage objects. Overview. adieu is an interface for a non-blocking (asynchronous) 0-copy (direct) vectored IO against storage objects. A user of this interface builds an IO operation description and queues it against a storage object. IO completion or failure notification is done by signalling a user supplied m0_chan. As usual, the user can either wait on the chan or register a call-back with it. adieu supports scatter-gather type of IO operations (that is, vectored on both input and output data). adieu can work both on local and remote storage objects. adieu IO operations are executed as part of a distributed transaction. Functional specification. Externally, adieu usage has the following phases: - m0_bufvec registration. Some types of storage objects require that buffers from which IO is done are registered with its IO sub-system (examples: RDMA). This step is optional, IO from unregistered buffers should also be possible (albeit might incur additional data-copy). - IO description creation. A IO operation description object m0_stob_io is initialised. - IO operation is queued by a call to m0_stob_io_launch(). It is guaranteed that on a successful return from this call, a chan embedded into IO operation data-structure will be eventually signalled. - An execution of a queued IO operation can be delayed for some time due to storage traffic control regulations, concurrency control, resource quotas or barriers. - An IO operation is executed, possibly by splitting it into implementation defined fragments. A user can request an "prefixed fragments execution" mode (m0_stob_io_flags::SIF_PREFIX) constraining execution concurrency as to guarantee that after execution completion (with success or failure) a storage is updated as if some possibly empty prefix of the IO operation executed successfully (this is similar to the failure mode of POSIX write call). When prefixed fragments execution mode is not requested, an implementation is free to execute fragments in any order and with any degree of concurrency. Prefixed fragments execution mode request has no effect on read-only IO operations. - When whole operation execution completes, a chan embedded into IO operation data-structure is signalled. It is guaranteed that no IO is outstanding at this moment and that adieu implementation won't touch either IO operation structure or associated data pages afterward. - After analyzing IO result codes, a user is free to either de-allocate IO operation structure by calling m0_stob_io_fini() or use it to queue another IO operation potentially against different object. Ordering and barriers. The only guarantee about relative order of IO operations state transitions is that execution of any updating operation submitted before m0_stob_io_opcode::SIO_BARRIER operation completes before any updating operation submitted after the barrier starts executing. For the purpose of this definition, an updating operation is an operation of any valid type different from SIO_READ (i.e., barriers are updating operations). A barrier operation completes when all operations submitted before it (including other barrier operations) complete.
IO alignment and granularity. Alignment is not "optimal IO size". This is a requirement rather than hint. Block sizes are needed for the following reasons: - to insulate stob IO layer from read-modify-write details; - to allow IO to the portions of objects inaccessible through the flat 64-bit byte-granularity name-space.
Result codes. In addition to filling in data pages with the data (in a case read operation), adieu supplies two status codes on IO completion: - <tt>m0_stob_io::si_rc</tt> is a return code of IO operation. 0 means success, any other possible value is negated errno; - <tt>m0_stob_io::si_count</tt> is a number of blocks (as defined by m0_stob_op::sop_block_shift()) successfully transferred between data pages and the storage object. When IO is executed in prefixed fragments mode, exactly <tt>m0_stob_io::si_count</tt> blocks of the storage object, starting from the offset <tt>m0_stob_io::si_stob.ov_index[0]</tt> were transferred. Data ownership. Data pages are owned by adieu implementation from the moment of call to m0_stob_io_launch() until the chan is signalled. adieu users must not inspect or modify data during that time. An implementation is free to modify the data temporarily, un-map pages, etc. An implementation must not touch the data at any other time. Liveness rules. m0_stob_io can be freed once it is owned by an adieu user (see data ownership). It has no explicit reference counting, a user must add its own should m0_stob_io be shared between multiple threads. The user must guarantee that the target storage object is pinned in memory while IO operation is owned by the implementation. An implementation is free to touch storage object while IO is in progress. Similarly, the user must pin the transaction and IO scope while m0_stob_io is owned by the implementation. Concurrency. When m0_stob_io is owned by a user, the user is responsible for concurrency control. Implementation guarantees that synchronous channel notification (through clink call-back) happens in the context not holding IO lock. At the moment there are two types of storage object supporting adieu: - Linux file system based one, using Linux libaio interfaces; - AD stob type implements adieu on top of underlying backing store storage object. State. (O)(X) | ^ | | m0_stob_io_init() | | m0_stob_io_fini() | | V | SIS_IDLE | ^ | | m0_stob_io_launch() | | IO completion | | V | SIS_BUSY
| |
M0_INTERNAL int | m0_stob_io_private_setup (struct m0_stob_io *io, struct m0_stob *obj) |
M0_INTERNAL void | m0_stob_io_init (struct m0_stob_io *io) |
M0_INTERNAL void | m0_stob_io_fini (struct m0_stob_io *io) |
M0_INTERNAL void | m0_stob_io_credit (const struct m0_stob_io *io, const struct m0_stob_domain *dom, struct m0_be_tx_credit *accum) |
M0_INTERNAL int | m0_stob_io_prepare (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
M0_INTERNAL int | m0_stob_io_launch (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
M0_INTERNAL int | m0_stob_io_prepare_and_launch (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
M0_INTERNAL bool | m0_stob_io_user_is_valid (const struct m0_bufvec *user) |
M0_INTERNAL bool | m0_stob_io_stob_is_valid (const struct m0_indexvec *stob) |
M0_INTERNAL int | m0_stob_io_bufvec_launch (struct m0_stob *stob, struct m0_bufvec *bufvec, int op_code, m0_bindex_t offset) |
M0_INTERNAL void * | m0_stob_addr_pack (const void *buf, uint32_t shift) |
M0_INTERNAL void * | m0_stob_addr_open (const void *buf, uint32_t shift) |
M0_INTERNAL void | m0_stob_iovec_sort (struct m0_stob_io *stob) |
#define SWAP_NEXT | ( | arr, | |
idx | |||
) |