There's a failure case where an application gets a cqe entry, but
the kernel can then overwrite it before the application is done
reading it. This can happen since the io_uring_{get,wait}_completion()
interface both returns a CQE pointer AND increments the ring index.
If the kernel reuses this entry before the applications is done reading
it, the contents may be corrupted.
Remove the CQ head increment from the CQE retrieval, and put it into
a separate helper, io_uring_cqe_seen(). The application must call this
helper when it got a new CQE entry through one of the above calls, and
it's now done reading it.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
250 lines
4.6 KiB
C
250 lines
4.6 KiB
C
/*
|
|
* gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-cp io_uring-cp.c -luring
|
|
*/
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <inttypes.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/ioctl.h>
|
|
#include "../src/liburing.h"
|
|
|
|
#define QD 64
|
|
#define BS (32*1024)
|
|
|
|
static int infd, outfd;
|
|
|
|
struct io_data {
|
|
int read;
|
|
off_t first_offset, offset;
|
|
size_t first_len;
|
|
struct iovec iov;
|
|
};
|
|
|
|
static int setup_context(unsigned entries, struct io_uring *ring)
|
|
{
|
|
int ret;
|
|
|
|
ret = io_uring_queue_init(entries, ring, 0);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "queue_init: %s\n", strerror(-ret));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_file_size(int fd, off_t *size)
|
|
{
|
|
struct stat st;
|
|
|
|
if (fstat(fd, &st) < 0)
|
|
return -1;
|
|
if (S_ISREG(st.st_mode)) {
|
|
*size = st.st_size;
|
|
return 0;
|
|
} else if (S_ISBLK(st.st_mode)) {
|
|
unsigned long long bytes;
|
|
|
|
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
|
|
return -1;
|
|
|
|
*size = bytes;
|
|
return 0;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void queue_prepped(struct io_uring *ring, struct io_data *data)
|
|
{
|
|
struct io_uring_sqe *sqe;
|
|
|
|
sqe = io_uring_get_sqe(ring);
|
|
assert(sqe);
|
|
|
|
if (data->read)
|
|
io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset);
|
|
else
|
|
io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset);
|
|
|
|
io_uring_sqe_set_data(sqe, data);
|
|
}
|
|
|
|
static int queue_read(struct io_uring *ring, off_t size, off_t offset)
|
|
{
|
|
struct io_uring_sqe *sqe;
|
|
struct io_data *data;
|
|
|
|
sqe = io_uring_get_sqe(ring);
|
|
if (!sqe)
|
|
return 1;
|
|
|
|
data = malloc(size + sizeof(*data));
|
|
data->read = 1;
|
|
data->offset = data->first_offset = offset;
|
|
|
|
data->iov.iov_base = data + 1;
|
|
data->iov.iov_len = size;
|
|
data->first_len = size;
|
|
|
|
io_uring_prep_readv(sqe, infd, &data->iov, 1, offset);
|
|
io_uring_sqe_set_data(sqe, data);
|
|
return 0;
|
|
}
|
|
|
|
static void queue_write(struct io_uring *ring, struct io_data *data)
|
|
{
|
|
data->read = 0;
|
|
data->offset = data->first_offset;
|
|
|
|
data->iov.iov_base = data + 1;
|
|
data->iov.iov_len = data->first_len;
|
|
|
|
queue_prepped(ring, data);
|
|
io_uring_submit(ring);
|
|
}
|
|
|
|
static int copy_file(struct io_uring *ring, off_t insize)
|
|
{
|
|
unsigned long reads, writes;
|
|
struct io_uring_cqe *cqe;
|
|
off_t write_left, offset;
|
|
int ret;
|
|
|
|
write_left = insize;
|
|
writes = reads = offset = 0;
|
|
|
|
while (insize || write_left) {
|
|
int had_reads, got_comp;
|
|
|
|
/*
|
|
* Queue up as many reads as we can
|
|
*/
|
|
had_reads = reads;
|
|
while (insize) {
|
|
off_t this_size = insize;
|
|
|
|
if (reads + writes >= QD)
|
|
break;
|
|
if (this_size > BS)
|
|
this_size = BS;
|
|
else if (!this_size)
|
|
break;
|
|
|
|
if (queue_read(ring, this_size, offset))
|
|
break;
|
|
|
|
insize -= this_size;
|
|
offset += this_size;
|
|
reads++;
|
|
}
|
|
|
|
if (had_reads != reads) {
|
|
ret = io_uring_submit(ring);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret));
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Queue is full at this point. Find at least one completion.
|
|
*/
|
|
got_comp = 0;
|
|
while (write_left) {
|
|
struct io_data *data;
|
|
|
|
if (!got_comp) {
|
|
ret = io_uring_wait_completion(ring, &cqe);
|
|
got_comp = 1;
|
|
} else
|
|
ret = io_uring_get_completion(ring, &cqe);
|
|
if (ret < 0) {
|
|
fprintf(stderr, "io_uring_get_completion: %s\n",
|
|
strerror(-ret));
|
|
return 1;
|
|
}
|
|
if (!cqe)
|
|
break;
|
|
|
|
data = io_uring_cqe_get_data(cqe);
|
|
if (cqe->res < 0) {
|
|
if (cqe->res == -EAGAIN) {
|
|
queue_prepped(ring, data);
|
|
io_uring_cqe_seen(ring, cqe);
|
|
continue;
|
|
}
|
|
fprintf(stderr, "cqe failed: %s\n",
|
|
strerror(-cqe->res));
|
|
return 1;
|
|
} else if (cqe->res != data->iov.iov_len) {
|
|
/* Short read/write, adjust and requeue */
|
|
data->iov.iov_base += cqe->res;
|
|
data->iov.iov_len -= cqe->res;
|
|
data->offset += cqe->res;
|
|
queue_prepped(ring, data);
|
|
io_uring_cqe_seen(ring, cqe);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* All done. if write, nothing else to do. if read,
|
|
* queue up corresponding write.
|
|
*/
|
|
if (data->read) {
|
|
queue_write(ring, data);
|
|
write_left -= data->first_len;
|
|
reads--;
|
|
writes++;
|
|
} else {
|
|
free(data);
|
|
writes--;
|
|
}
|
|
io_uring_cqe_seen(ring, cqe);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
struct io_uring ring;
|
|
off_t insize;
|
|
int ret;
|
|
|
|
if (argc < 3) {
|
|
printf("%s: infile outfile\n", argv[0]);
|
|
return 1;
|
|
}
|
|
|
|
infd = open(argv[1], O_RDONLY);
|
|
if (infd < 0) {
|
|
perror("open infile");
|
|
return 1;
|
|
}
|
|
outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (outfd < 0) {
|
|
perror("open outfile");
|
|
return 1;
|
|
}
|
|
|
|
if (setup_context(QD, &ring))
|
|
return 1;
|
|
if (get_file_size(infd, &insize))
|
|
return 1;
|
|
|
|
ret = copy_file(&ring, insize);
|
|
|
|
close(infd);
|
|
close(outfd);
|
|
io_uring_queue_exit(&ring);
|
|
return ret;
|
|
}
|