io_uring_enter.2 man page
Initial man page for io_uring_enter. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
378
man/io_uring_enter.2
Normal file
378
man/io_uring_enter.2
Normal file
@@ -0,0 +1,378 @@
|
||||
.\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
|
||||
.\" Copyright (C) 2019 Red Hat, Inc.
|
||||
.\"
|
||||
.\" %%%LICENSE_START(LGPL_V2.1)
|
||||
.\" This file is distributed according to the GNU Lesser General Public License.
|
||||
.\" %%%LICENSE_END
|
||||
.\"
|
||||
.TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual"
|
||||
.SH NAME
|
||||
io_uring_enter \- initiate and/or complete asynchronous I/O
|
||||
.SH SYNOPSIS
|
||||
.nf
|
||||
.BR "#include <linux/io_uring.h>"
|
||||
.PP
|
||||
.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
|
||||
.BI " unsigned int " min_complete ", unsigned int " flags)
|
||||
.fi
|
||||
.PP
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
.BR io_uring_enter ()
|
||||
is used to initiate and complete I/O using the shared submission and
|
||||
completion queues setup by a call to
|
||||
.BR io_uring_setup(2).
|
||||
A single call can both submit new I/O and wait for completions of I/O
|
||||
initiated by this call or previous calls to
|
||||
.BR io_uring_enter ().
|
||||
|
||||
.I fd
|
||||
is the file descriptor returned by
|
||||
.BR io_uring_setup(2).
|
||||
.I to_submit
|
||||
specifies the number of I/Os to submit from the submission queue. If
|
||||
the
|
||||
.B IORING_ENTER_GETEVENTS
|
||||
bit is set in
|
||||
.I flags,
|
||||
then the system call will attempt to wait for
|
||||
.I min_events
|
||||
I/O completions. Note that it is valid to specify
|
||||
.B IORING_ENTER_GETEVENTS
|
||||
in
|
||||
.I flags
|
||||
and pass in
|
||||
.I min_complete
|
||||
as 0 at the same time. This allows the kernel to return already
|
||||
completed events without waiting. This is useful only when the
|
||||
io_uring instance was configured for polling (by specifying the
|
||||
.B IORING_SETUP_IOPOLL
|
||||
flag in
|
||||
.BR io_uring_setup(2)),
|
||||
as for IRQ driven I/O, the application can just check the completion
|
||||
queue without entering the kernel.
|
||||
|
||||
Submission queue entries are represented using the following data
|
||||
structure:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
/*
|
||||
* IO submission data structure (Submission Queue Entry)
|
||||
*/
|
||||
struct io_uring_sqe {
|
||||
__u8 opcode; /* type of operation for this sqe */
|
||||
__u8 flags; /* IOSQE_ flags */
|
||||
__u16 ioprio; /* ioprio for the request */
|
||||
__s32 fd; /* file descriptor to do IO on */
|
||||
__u64 off; /* offset into file */
|
||||
__u64 addr; /* pointer to buffer or iovecs */
|
||||
__u32 len; /* buffer size or number of iovecs */
|
||||
union {
|
||||
__kernel_rwf_t rw_flags;
|
||||
__u32 fsync_flags;
|
||||
__u16 poll_events;
|
||||
};
|
||||
__u64 user_data; /* data passed back at completion time */
|
||||
union {
|
||||
__u16 buf_index; /* index into fixed buffers, if used */
|
||||
__u64 __pad2[3];
|
||||
};
|
||||
};
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
The
|
||||
.I opcode
|
||||
describes the operation to be performed. It can be one of:
|
||||
.TP
|
||||
.BR IORING_OP_NOP
|
||||
Do not perform any I/O. This is useful for testing the performance of
|
||||
the io_uring implementation itself.
|
||||
.TP
|
||||
.BR IORING_OP_READV
|
||||
.TP
|
||||
.BR IORING_OP_WRITEV
|
||||
Vectored read and write operations, similar to
|
||||
.BR preadv2(2)
|
||||
and
|
||||
.BR pwritev2(2).
|
||||
|
||||
.TP
|
||||
.BR IORING_OP_READ_FIXED
|
||||
.TP
|
||||
.BR IORING_OP_WRITE_FIXED
|
||||
Read from or write to pre-mapped buffers. See
|
||||
.BR io_uring_register(2)
|
||||
for details on how to setup a context for fixed reads and writes.
|
||||
|
||||
.TP
|
||||
.BR IORING_OP_FSYNC
|
||||
File sync. See also
|
||||
.BR fsync(2).
|
||||
Note that, while I/O is initiated in the order in which it appears in
|
||||
the submission queue, completions are unordered. For example, an
|
||||
application which places a write I/O followed by an fsync in the
|
||||
submission queue cannot expect the fsync to apply to the write. The
|
||||
two operations execute in parallel, so the fsync may complete before
|
||||
the write is issued to the storage. The same is also true for
|
||||
previously issued writes that have not completed prior to the fsync.
|
||||
|
||||
.TP
|
||||
.BR IORING_OP_POLL_ADD
|
||||
Poll the
|
||||
.I fd
|
||||
specified in the submission queue entry for the events
|
||||
specified in the
|
||||
.I poll_events
|
||||
field. Unlike poll or epoll without
|
||||
.B EPOLLONESHOT,
|
||||
this interface always works in one shot mode. That is, once the poll
|
||||
operation is completed, it will have to be resubmitted.
|
||||
|
||||
.TP
|
||||
.BR IORING_OP_POLL_REMOVE
|
||||
Remove an existing poll request. If found, the
|
||||
.I res
|
||||
field of the
|
||||
.I struct io_uring_cqe
|
||||
will contain 0. If not found,
|
||||
.I res
|
||||
will contain
|
||||
.B -ENOENT.
|
||||
|
||||
.PP
|
||||
The
|
||||
.I flags
|
||||
field is a bit mask. Currently, the only supported flag is
|
||||
.B IOSQE_FIXED_FILE.
|
||||
This flag must be specified for io_uring instances that registered
|
||||
files using the
|
||||
.BR io_uring_register(2)
|
||||
system call. When specified,
|
||||
.I fd
|
||||
contains an index into the files array registered with the io_uring
|
||||
instance.
|
||||
|
||||
.I ioprio
|
||||
specifies the I/O priority. See
|
||||
.BR ioprio_get(2)
|
||||
for a description of Linux I/O priorities.
|
||||
|
||||
.I fd
|
||||
specifies the file descriptor against which the operation will be
|
||||
performed, with the exception noted above.
|
||||
|
||||
If the operation is one of
|
||||
.B IORING_OP_READ_FIXED
|
||||
or
|
||||
.B IORING_OP_WRITE_FIXED,
|
||||
.I addr
|
||||
and
|
||||
.I len
|
||||
must fall within the buffer located at
|
||||
.I buf_index
|
||||
in the fixed buffer array. If the operation is either
|
||||
.B IORING_OP_READV
|
||||
or
|
||||
.B IORING_OP_WRITEV,
|
||||
then
|
||||
.I addr
|
||||
points to an iovec array of
|
||||
.I len
|
||||
entries.
|
||||
|
||||
.I rw_flags,
|
||||
specified for read and write operations, contains a bitwise OR of
|
||||
per-I/O flags, as described in the
|
||||
.BR preadv2(2)
|
||||
man page.
|
||||
|
||||
The
|
||||
.I fsync_flags
|
||||
bit mask may contain either 0, for a normal file integrity sync, or
|
||||
.B IORING_FSYNC_DATASYNC
|
||||
to provide data sync only semantics. See the descriptions of
|
||||
.B O_SYNC
|
||||
and
|
||||
.B O_DSYNC
|
||||
in the
|
||||
.BR open(2)
|
||||
manual page for more information.
|
||||
|
||||
The bits that may be set in
|
||||
.I poll_events
|
||||
are defined in \fI<poll.h>\fP, and documented in
|
||||
.BR poll(2).
|
||||
|
||||
.I user_data
|
||||
is an application-supplied value that will be copied into
|
||||
the completion queue entry (see below).
|
||||
.I buf_index
|
||||
is an index into an array of fixed buffers, and is only valid if fixed
|
||||
buffers were registered
|
||||
.PP
|
||||
Once the submission queue entry is initialized, I/O is submitted by
|
||||
placing the index of the submission queue entry into the tail of the
|
||||
submission queue. After one or more indexes are added to the queue,
|
||||
and the queue tail is advanced, the io_uring_enter(2) system call can
|
||||
be invoked to initiate the I/O.
|
||||
|
||||
Completions use the following data structure:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
*/
|
||||
struct io_uring_cqe {
|
||||
__u64 user_data; /* sqe->data submission passed back */
|
||||
__s32 res; /* result code for this event */
|
||||
__u32 flags;
|
||||
};
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
.I user_data
|
||||
is copied from the field of the same name in the submission queue
|
||||
entry. The primary use case is to store data that the application
|
||||
will need to access upon completion of this particular I/O. The
|
||||
.I flags
|
||||
bit mask may contain 0 or more of the following values, ORed together:
|
||||
.TP
|
||||
.BR IOCQE_FLAG_CACHEHIT
|
||||
The page(s) associated with the buffered I/O operation were present in
|
||||
the page cache.
|
||||
.I res
|
||||
is the operation-specific result.
|
||||
.PP
|
||||
For read and write opcodes, the
|
||||
return values match those documented in the
|
||||
.BR preadv2(2)
|
||||
and
|
||||
.BR pwritev2(2)
|
||||
man pages.
|
||||
Return codes for the io_uring-specific opcodes are documented in the
|
||||
description of the opcodes above.
|
||||
.PP
|
||||
.SH RETURN VALUE
|
||||
.BR io_uring_enter ()
|
||||
returns the number of I/Os successfully submitted. This can be zero
|
||||
if
|
||||
.I to_submit
|
||||
was zero, if there were invalid entries in the submission queue, or if
|
||||
the submission queue was empty.
|
||||
|
||||
On error, -1 is returned and
|
||||
.I errno
|
||||
is set appropriately.
|
||||
.PP
|
||||
.SH ERRORS
|
||||
.TP
|
||||
.BR EAGAIN
|
||||
The kernel was unable to allocate memory for the request.
|
||||
.TP
|
||||
.BR EBADF
|
||||
The
|
||||
.I fd
|
||||
field in the submission queue entry is invalid, or the
|
||||
.B IOSQE_FIXED_FILE
|
||||
flag was set in the submission queue entry, but no files were registered
|
||||
with the io_uring instance
|
||||
.TP
|
||||
.BR EBUSY
|
||||
the io_uring instance is being acted on by another thread
|
||||
.TP
|
||||
.BR EFAULT
|
||||
buffer is outside of the process' accessible address space
|
||||
.TP
|
||||
.BR EFAULT
|
||||
.B IORING_OP_READ_FIXED
|
||||
or
|
||||
.B IORING_OP_WRITE_FIXED
|
||||
was specified in the
|
||||
.I opcode
|
||||
field of the submission queue entry, but either buffers were not
|
||||
registered for this io_uring instance, or the address range described
|
||||
by
|
||||
.I addr
|
||||
and
|
||||
.I len
|
||||
does not fit within the buffer registered at
|
||||
.I buf_index
|
||||
.TP
|
||||
.BR EINVAL
|
||||
The
|
||||
.I index
|
||||
member of the submission queue entry is invalid.
|
||||
.TP
|
||||
.BR EINVAL
|
||||
the
|
||||
.I flags
|
||||
field or
|
||||
.I opcode
|
||||
in a submission queue entry is invalid
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_NOP
|
||||
was specified in the submission queue entry, but the io_uring context
|
||||
was setup for polling (
|
||||
.B IORING_SETUP_IOPOLL
|
||||
was specified in the call to io_uring_setup)
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_READV
|
||||
or
|
||||
.B IORING_OP_WRITEV
|
||||
was specified in the submission queue entry, but the io_uring instance
|
||||
has fixed buffers registered.
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_READ_FIXED
|
||||
or
|
||||
.B IORING_OP_WRITE_FIXED
|
||||
was specified in the submission queue entry, and the
|
||||
.I buf_index
|
||||
is invalid
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_READV, IORING_OP_WRITEV, IORING_OP_READ_FIXED,
|
||||
.B IORING_OP_WRITE_FIXED
|
||||
or
|
||||
.B IORING_OP_FSYNC
|
||||
was specified in the submission queue entry, but the io_uring instance
|
||||
was configured for IOPOLLing, or any of
|
||||
.I addr, ioprio, off, len,
|
||||
or
|
||||
.I buf_index
|
||||
was set in the submission queue entry.
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_POLL_ADD
|
||||
or
|
||||
.B IORING_OP_POLL_REMOVE
|
||||
was specified in the
|
||||
.I opcode
|
||||
field of the submission queue entry, but the io_uring instance was
|
||||
configured for busy-wait polling
|
||||
.B (IORING_SETUP_IOPOLL),
|
||||
or any of
|
||||
.I ioprio, off, len
|
||||
or
|
||||
.I buf_index
|
||||
was non-zero in the submission queue entry.
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B IORING_OP_POLL_ADD
|
||||
was specified in the
|
||||
.I opcode
|
||||
field of the submission queue entry, and the
|
||||
.I addr
|
||||
field was non-zero.
|
||||
.TP
|
||||
.BR ENXIO
|
||||
the io_uring instance is in the process of being torn down
|
||||
.TP
|
||||
.BR EOPNOTSUPP
|
||||
.I fd
|
||||
does not refer to an io_uring instance
|
||||
Reference in New Issue
Block a user