|
M0_INTERNAL int | m0_stob_cache_init (struct m0_stob_cache *cache, uint64_t idle_size, m0_stob_cache_eviction_cb_t eviction_cb) |
|
M0_INTERNAL void | m0_stob_cache_fini (struct m0_stob_cache *cache) |
|
M0_INTERNAL bool | m0_stob_cache__invariant (const struct m0_stob_cache *cache) |
|
M0_INTERNAL void | m0_stob_cache_add (struct m0_stob_cache *cache, struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob_cache_idle (struct m0_stob_cache *cache, struct m0_stob *stob) |
|
M0_INTERNAL struct m0_stob * | m0_stob_cache_lookup (struct m0_stob_cache *cache, const struct m0_fid *stob_fid) |
|
M0_INTERNAL void | m0_stob_cache_purge (struct m0_stob_cache *cache, int nr) |
|
M0_INTERNAL void | m0_stob_cache_lock (struct m0_stob_cache *cache) |
|
M0_INTERNAL void | m0_stob_cache_unlock (struct m0_stob_cache *cache) |
|
M0_INTERNAL bool | m0_stob_cache_is_locked (const struct m0_stob_cache *cache) |
|
M0_INTERNAL bool | m0_stob_cache_is_not_locked (const struct m0_stob_cache *cache) |
|
M0_INTERNAL void | m0_stob_cache__print (struct m0_stob_cache *cache) |
|
static int | stob_domain_type (const char *location, struct m0_stob_type **type) |
|
static char * | stob_domain_location_data (const char *location) |
|
static void | stob_domain_cache_evict_cb (struct m0_stob_cache *cache, struct m0_stob *stob) |
|
static int | stob_domain_create (struct m0_stob_type *type, const char *location_data, uint64_t dom_key, const char *str_cfg_create) |
|
static int | stob_domain_init (struct m0_stob_type *type, const char *location_data, const char *str_cfg_init, struct m0_stob_domain **out) |
|
static int | stob_domain_init_create (const char *location, const char *str_cfg_init, uint64_t dom_key, const char *str_cfg_create, struct m0_stob_domain **out, bool init) |
|
M0_INTERNAL int | m0_stob_domain_init (const char *location, const char *str_cfg_init, struct m0_stob_domain **out) |
|
M0_INTERNAL void | m0_stob_domain_fini (struct m0_stob_domain *dom) |
|
M0_INTERNAL int | m0_stob_domain_create (const char *location, const char *str_cfg_init, uint64_t dom_key, const char *str_cfg_create, struct m0_stob_domain **out) |
|
M0_INTERNAL int | m0_stob_domain_destroy (struct m0_stob_domain *dom) |
|
M0_INTERNAL int | m0_stob_domain_destroy_location (const char *location) |
|
M0_INTERNAL int | m0_stob_domain_create_or_init (const char *location, const char *str_cfg_init, uint64_t dom_key, const char *str_cfg_create, struct m0_stob_domain **out) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_domain_find (const struct m0_fid *dom_id) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_domain_find_by_location (const char *location) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_domain_find_by_stob_id (const struct m0_stob_id *stob_id) |
|
M0_INTERNAL const struct m0_fid * | m0_stob_domain_id_get (const struct m0_stob_domain *dom) |
|
M0_INTERNAL const char * | m0_stob_domain_location_get (const struct m0_stob_domain *dom) |
|
M0_INTERNAL void | m0_stob_domain__id_set (struct m0_stob_domain *dom, struct m0_fid *dom_id) |
|
M0_INTERNAL uint8_t | m0_stob_domain__type_id (const struct m0_fid *dom_id) |
|
M0_INTERNAL uint64_t | m0_stob_domain__dom_key (const struct m0_fid *dom_id) |
|
M0_INTERNAL void | m0_stob_domain__dom_id_make (struct m0_fid *dom_id, uint8_t type_id, uint64_t dom_container, uint64_t dom_key) |
|
M0_INTERNAL bool | m0_stob_domain__invariant (struct m0_stob_domain *dom) |
|
M0_INTERNAL bool | m0_stob_domain__dom_key_is_valid (uint64_t dom_key) |
|
M0_INTERNAL bool | m0_stob_domain_is_of_type (const struct m0_stob_domain *dom, const struct m0_stob_type *dt) |
|
M0_INTERNAL struct m0_stob_cache * | m0_stob_domain__cache (struct m0_stob_domain *dom) |
|
M0_INTERNAL struct m0_stob * | m0_stob_domain__stob_alloc (struct m0_stob_domain *dom, const struct m0_fid *stob_fid) |
|
M0_INTERNAL void | m0_stob_domain__stob_free (struct m0_stob_domain *dom, struct m0_stob *stob) |
|
static void | m0_stob_io_private_fini (struct m0_stob_io *io) |
|
static void | stob_io_addb2_add_and_push (uint64_t id, struct m0_stob_io *io, struct m0_stob *obj) |
|
static bool | stob_io_invariant (struct m0_stob_io *io, struct m0_stob *obj, enum m0_stob_io_state state) |
|
static void | stob_io_fill (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope, enum m0_stob_io_state state, bool count_update) |
|
struct m0_stob_ioq_error | M0_XCA_DOMAIN (rpc) |
|
M0_INTERNAL struct m0_stob_module * | m0_stob_module__get (void) |
|
M0_INTERNAL int | m0_stob_find_by_key (struct m0_stob_domain *dom, const struct m0_fid *stob_fid, struct m0_stob **out) |
|
M0_INTERNAL int | m0_stob_find (const struct m0_stob_id *id, struct m0_stob **out) |
|
M0_INTERNAL int | m0_stob_lookup_by_key (struct m0_stob_domain *dom, const struct m0_fid *stob_fid, struct m0_stob **out) |
|
M0_INTERNAL int | m0_stob_lookup (const struct m0_stob_id *id, struct m0_stob **out) |
|
M0_INTERNAL int | m0_stob_locate (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob_create_credit (struct m0_stob_domain *dom, struct m0_be_tx_credit *accum) |
|
M0_INTERNAL int | m0_stob_create (struct m0_stob *stob, struct m0_dtx *dtx, const char *str_cfg) |
|
M0_INTERNAL void | m0_stob_destroy_credit (struct m0_stob *stob, struct m0_be_tx_credit *accum) |
|
M0_INTERNAL void | m0_stob_delete_mark (struct m0_stob *stob) |
|
M0_INTERNAL int | m0_stob_destroy (struct m0_stob *stob, struct m0_dtx *dtx) |
|
M0_INTERNAL int | m0_stob_punch_credit (struct m0_stob *stob, struct m0_indexvec *want, struct m0_indexvec *got, struct m0_be_tx_credit *accum) |
|
M0_INTERNAL int | m0_stob_punch (struct m0_stob *stob, struct m0_indexvec *range, struct m0_dtx *dtx) |
|
M0_INTERNAL uint64_t | m0_stob_dom_id_get (struct m0_stob *stob) |
|
M0_INTERNAL const struct m0_stob_id * | m0_stob_id_get (struct m0_stob *stob) |
|
M0_INTERNAL const struct m0_fid * | m0_stob_fid_get (struct m0_stob *stob) |
|
M0_INTERNAL uint64_t | m0_stob_id_dom_id_get (const struct m0_stob_id *stob_id) |
|
M0_INTERNAL enum m0_stob_state | m0_stob_state_get (struct m0_stob *stob) |
|
M0_INTERNAL uint32_t | m0_stob_block_shift (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob_get (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob_put (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob__id_set (struct m0_stob *stob, const struct m0_fid *stob_fid) |
|
M0_INTERNAL void | m0_stob__cache_evict (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob__state_set (struct m0_stob *stob, enum m0_stob_state state) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_dom_get (struct m0_stob *stob) |
|
M0_INTERNAL void | m0_stob_id_make (uint64_t container, uint64_t key, const struct m0_fid *dom_id, struct m0_stob_id *stob_id) |
|
M0_INTERNAL bool | m0_stob_id_eq (const struct m0_stob_id *stob_id0, const struct m0_stob_id *stob_id1) |
|
M0_INTERNAL int | m0_stob_fd (struct m0_stob *stob) |
|
M0_INTERNAL int | m0_stob_mod_init (void) |
|
M0_INTERNAL void | m0_stob_mod_fini (void) |
|
struct m0_stob_id | M0_XCA_DOMAIN (be|rpc) |
|
| M0_TL_DESCR_DEFINE (domains, "stob domains", static, struct m0_stob_domain, sd_domain_linkage, sd_magic, M0_STOB_DOMAINS_MAGIC, M0_STOB_DOMAINS_HEAD_MAGIC) |
|
| M0_TL_DEFINE (domains, static, struct m0_stob_domain) |
|
| M0_TL_DESCR_DEFINE (types, "stob types", static, struct m0_stob_type, st_type_linkage, st_magic, M0_STOB_TYPES_MAGIC, M0_STOB_TYPES_HEAD_MAGIC) |
|
| M0_TL_DEFINE (types, static, struct m0_stob_type) |
|
static int | stob_type_copy (const struct m0_stob_type *type, struct m0_stob_type **copy) |
|
static void | stob_types_destroy_list (struct m0_stob_types *types) |
|
static struct m0_stob_types * | stob_types_get (void) |
|
M0_INTERNAL int | m0_stob_types_init (void) |
|
M0_INTERNAL void | m0_stob_types_fini (void) |
|
M0_INTERNAL struct m0_stob_type * | m0_stob_type_by_dom_id (const struct m0_fid *id) |
|
M0_INTERNAL struct m0_stob_type * | m0_stob_type_by_name (const char *name) |
|
M0_INTERNAL uint8_t | m0_stob_type_id_by_name (const char *name) |
|
M0_INTERNAL void | m0_stob_type_register (struct m0_stob_type *type) |
|
M0_INTERNAL void | m0_stob_type_deregister (struct m0_stob_type *type) |
|
M0_INTERNAL uint8_t | m0_stob_type_id_get (const struct m0_stob_type *type) |
|
M0_INTERNAL const char * | m0_stob_type_name_get (struct m0_stob_type *type) |
|
M0_INTERNAL void | m0_stob_type__dom_add (struct m0_stob_type *type, struct m0_stob_domain *dom) |
|
M0_INTERNAL void | m0_stob_type__dom_del (struct m0_stob_type *type, struct m0_stob_domain *dom) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_type__dom_find (struct m0_stob_type *type, const struct m0_fid *dom_id) |
|
M0_INTERNAL struct m0_stob_domain * | m0_stob_type__dom_find_by_location (struct m0_stob_type *type, const char *location) |
|
void | m0_stob_ut_ad_init (struct m0_be_ut_backend *ut_be, struct m0_be_ut_seg *ut_seg, bool use_small_credits) |
|
void | m0_stob_ut_ad_fini (struct m0_be_ut_backend *ut_be, struct m0_be_ut_seg *ut_seg) |
|
static struct mock_balloc * | b2mock (struct m0_ad_balloc *ballroom) |
|
static int | mock_balloc_init (struct m0_ad_balloc *ballroom, struct m0_be_seg *seg, uint32_t bshift, m0_bindex_t container_size, m0_bcount_t groupsize, m0_bcount_t spare_reserve) |
|
static void | mock_balloc_fini (struct m0_ad_balloc *ballroom) |
|
static int | mock_balloc_alloc (struct m0_ad_balloc *ballroom, struct m0_dtx *dtx, m0_bcount_t count, struct m0_ext *out, uint64_t alloc_type) |
|
static int | mock_balloc_free (struct m0_ad_balloc *ballroom, struct m0_dtx *dtx, struct m0_ext *ext) |
|
static int | malloc_reserve_extent (struct m0_ad_balloc *ballroom, struct m0_be_tx *tx, struct m0_ext *ext, uint64_t alloc_zone) |
|
static void | init_vecs () |
|
static int | test_ad_init (bool use_small_credits) |
|
static int | test_ad_fini (void) |
|
static void | test_write (int nr, struct m0_dtx *tx) |
|
static void | test_read (int nr) |
|
static void | test_punch (int nr) |
|
static void | test_ad_rw_unordered () |
|
static void | test_ad (void) |
|
static void | punch_test (void) |
|
static void | test_ad_undo (void) |
|
void | m0_stob_ut_adieu_ad (void) |
|
static void | ub_write (int i) |
|
static void | ub_read (int i) |
|
static int | ub_init (const char *opts M0_UNUSED) |
|
static void | ub_fini (void) |
|
static int | test_adieu_init (const char *location, const char *dom_cfg, const char *stob_cfg) |
|
static void | test_adieu_fini (void) |
|
static void | test_write (int i) |
|
static void | test_adieu (const char *path) |
|
void | m0_stob_ut_adieu_linux (void) |
|
void | m0_stob_ut_adieu_perf (void) |
|
static void | ub_iovec_init () |
|
static void | ub_iovec_invert () |
|
static void | ub_iovec_sort () |
|
static void | ub_iovec_sort_invert () |
|
|
Asynchronous Direct Io Extensible User interface (adieu) for storage objects.
Overview.
adieu is an interface for a non-blocking (asynchronous) 0-copy (direct) vectored IO against storage objects.
A user of this interface builds an IO operation description and queues it against a storage object. IO completion or failure notification is done by signalling a user supplied m0_chan. As usual, the user can either wait on the chan or register a call-back with it.
adieu supports scatter-gather type of IO operations (that is, vectored on both input and output data).
adieu can work both on local and remote storage objects. adieu IO operations are executed as part of a distributed transaction.
Functional specification.
Externally, adieu usage has the following phases: - m0_bufvec registration. Some types of storage objects require that
buffers from which IO is done are registered with its IO sub-system
(examples: RDMA). This step is optional, IO from unregistered buffers
should also be possible (albeit might incur additional data-copy).
- IO description creation. A IO operation description object m0_stob_io
is initialised.
- IO operation is queued by a call to m0_stob_io_launch(). It is
guaranteed that on a successful return from this call, a chan embedded
into IO operation data-structure will be eventually signalled.
- An execution of a queued IO operation can be delayed for some time
due to storage traffic control regulations, concurrency control,
resource quotas or barriers.
- An IO operation is executed, possibly by splitting it into
implementation defined fragments. A user can request an "prefixed
fragments execution" mode (m0_stob_io_flags::SIF_PREFIX) constraining
execution concurrency as to guarantee that after execution completion
(with success or failure) a storage is updated as if some possibly
empty prefix of the IO operation executed successfully (this is similar
to the failure mode of POSIX write call). When prefixed fragments
execution mode is not requested, an implementation is free to execute
fragments in any order and with any degree of concurrency. Prefixed
fragments execution mode request has no effect on read-only IO
operations.
- When whole operation execution completes, a chan embedded into IO
operation data-structure is signalled. It is guaranteed that no IO is
outstanding at this moment and that adieu implementation won't touch
either IO operation structure or associated data pages afterward.
- After analyzing IO result codes, a user is free to either de-allocate
IO operation structure by calling m0_stob_io_fini() or use it to queue
another IO operation potentially against different object.
Ordering and barriers.
The only guarantee about relative order of IO operations state transitions is that execution of any updating operation submitted before m0_stob_io_opcode::SIO_BARRIER operation completes before any updating operation submitted after the barrier starts executing. For the purpose of this definition, an updating operation is an operation of any valid type different from SIO_READ (i.e., barriers are updating operations).
A barrier operation completes when all operations submitted before it (including other barrier operations) complete.
- Warning
- Clarify the scope of a barrier: a single storage object, a storage object domain, a storage object type, all local storage objects or all objects in the system.
IO alignment and granularity.
Alignment is not "optimal IO size". This is a requirement rather than hint.
Block sizes are needed for the following reasons: - to insulate stob IO layer from read-modify-write details;
- to allow IO to the portions of objects inaccessible through the
flat 64-bit byte-granularity name-space.
- Note
- the scheme is very simplistic, enforcing the same unit of alignment and granularity. Sophistication could be added as necessary.
Result codes.
In addition to filling in data pages with the data (in a case read operation), adieu supplies two status codes on IO completion: - <tt>m0_stob_io::si_rc</tt> is a return code of IO operation. 0 means
success, any other possible value is negated errno;
- <tt>m0_stob_io::si_count</tt> is a number of blocks (as defined by
m0_stob_op::sop_block_shift()) successfully transferred between data
pages and the storage object. When IO is executed in prefixed
fragments mode, exactly <tt>m0_stob_io::si_count</tt> blocks of the
storage object, starting from the offset
<tt>m0_stob_io::si_stob.ov_index[0]</tt> were transferred.
Data ownership.
Data pages are owned by adieu implementation from the moment of call to m0_stob_io_launch() until the chan is signalled. adieu users must not inspect or modify data during that time. An implementation is free to modify the data temporarily, un-map pages, etc. An implementation must not touch the data at any other time.
Liveness rules.
m0_stob_io can be freed once it is owned by an adieu user (see data ownership). It has no explicit reference counting, a user must add its own should m0_stob_io be shared between multiple threads.
The user must guarantee that the target storage object is pinned in memory while IO operation is owned by the implementation. An implementation is free to touch storage object while IO is in progress.
Similarly, the user must pin the transaction and IO scope while m0_stob_io is owned by the implementation.
Concurrency.
When m0_stob_io is owned by a user, the user is responsible for concurrency control.
Implementation guarantees that synchronous channel notification (through clink call-back) happens in the context not holding IO lock.
At the moment there are two types of storage object supporting adieu: - Linux file system based one, using Linux libaio interfaces;
- AD stob type implements adieu on top of underlying backing store
storage object.
State. (O)(X)
| ^
| |
m0_stob_io_init() | | m0_stob_io_fini()
| |
V |
SIS_IDLE
| ^
| |
m0_stob_io_launch() | | IO completion
| |
V |
SIS_BUSY - Todo:
- A natural way to extend this design is to introduce additional SIS_PREPARED state and to split IO operation submission into two stages: (i) "preparation" stage that is entered once "IO geometry" is known (i.e., once m0_vec of data pages and m0_vec storage objects are known) and (ii) "queueing" stage that is entered when in addition to IO geometry, actual data pages are allocated. The motivating example for this refinement is a data server handling read or write RPC from a client. The RPC contains enough information to build IO vectors, while data arrive later through RDMA. To avoid dead-locks, it is crucial to avoid dynamic resource allocations (first of all, memory allocations) in data path after resources are consumed by RDMA. To this end, IO operation must be completely set up and ready for queueing before RMDA starts, i.e., before data pages are available.
|
enum | m0_stob_io_opcode {
SIO_INVALID,
SIO_READ,
SIO_WRITE,
SIO_BARRIER,
SIO_SYNC
} |
|
enum | m0_stob_io_state { SIS_ZERO = 0,
SIS_IDLE,
SIS_PREPARED,
SIS_BUSY
} |
|
enum | m0_stob_io_flags { SIF_PREFIX = (1 << 0),
SIF_NOHOLE = (1 << 1)
} |
|
M0_INTERNAL int | m0_stob_io_private_setup (struct m0_stob_io *io, struct m0_stob *obj) |
|
M0_INTERNAL void | m0_stob_io_init (struct m0_stob_io *io) |
|
M0_INTERNAL void | m0_stob_io_fini (struct m0_stob_io *io) |
|
M0_INTERNAL void | m0_stob_io_credit (const struct m0_stob_io *io, const struct m0_stob_domain *dom, struct m0_be_tx_credit *accum) |
|
M0_INTERNAL int | m0_stob_io_prepare (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
|
M0_INTERNAL int | m0_stob_io_launch (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
|
M0_INTERNAL int | m0_stob_io_prepare_and_launch (struct m0_stob_io *io, struct m0_stob *obj, struct m0_dtx *tx, struct m0_io_scope *scope) |
|
M0_INTERNAL bool | m0_stob_io_user_is_valid (const struct m0_bufvec *user) |
|
M0_INTERNAL bool | m0_stob_io_stob_is_valid (const struct m0_indexvec *stob) |
|
M0_INTERNAL int | m0_stob_io_bufvec_launch (struct m0_stob *stob, struct m0_bufvec *bufvec, int op_code, m0_bindex_t offset) |
|
M0_INTERNAL void * | m0_stob_addr_pack (const void *buf, uint32_t shift) |
|
M0_INTERNAL void * | m0_stob_addr_open (const void *buf, uint32_t shift) |
|
M0_INTERNAL void | m0_stob_iovec_sort (struct m0_stob_io *stob) |
|
Storage object is a fundamental abstraction of M0. Storage objects offer a linear address space for data and may have redundancy and may have integrity data.
There are multiple types of storage objects, used for various purposes and providing various extensions of the basic storage object interface described below. Specifically, containers for data and meta-data are implemented as special types of storage objects.