Initial commit

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe
2019-01-08 06:51:07 -07:00
commit f93c84e1b0
12 changed files with 1186 additions and 0 deletions

61
src/Makefile Normal file
View File

@@ -0,0 +1,61 @@
prefix=/usr
includedir=$(prefix)/include
libdir=$(prefix)/lib
CFLAGS ?= -g -fomit-frame-pointer -O2
CFLAGS += -Wall -I. -fPIC
SO_CFLAGS=-shared $(CFLAGS)
L_CFLAGS=$(CFLAGS)
LINK_FLAGS=
LINK_FLAGS+=$(LDFLAGS)
ENABLE_SHARED ?= 1
soname=liburing.so.1
minor=0
micro=1
libname=$(soname).$(minor).$(micro)
all_targets += liburing.a
ifeq ($(ENABLE_SHARED),1)
all_targets += $(libname)
endif
all: $(all_targets)
liburing_srcs := io_uring.c syscall.c
liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs))
liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs))
$(liburing_objs) $(liburing_sobjs): io_uring.h
%.os: %.c
$(CC) $(SO_CFLAGS) -c -o $@ $<
%.ol: %.c
$(CC) $(L_CFLAGS) -c -o $@ $<
AR ?= ar
RANLIB ?= ranlib
liburing.a: $(liburing_objs)
rm -f liburing.a
$(AR) r liburing.a $^
$(RANLIB) liburing.a
$(libname): $(liburing_sobjs) liburing.map
$(CC) $(SO_CFLAGS) -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS)
install: $(all_targets)
install -D -m 644 io_uring.h $(includedir)/io_uring.h
install -D -m 644 liburing.a $(libdir)/liburing.a
ifeq ($(ENABLE_SHARED),1)
install -D -m 755 $(libname) $(libdir)/$(libname)
ln -sf $(libname) $(libdir)/$(soname)
ln -sf $(libname) $(libdir)/liburing.so
endif
$(liburing_objs): liburing.h
clean:
rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(soname).new
rm -f *.so* *.a *.o

16
src/barrier.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef LIBURING_BARRIER_H
#define LIBURING_BARRIER_H
#if defined(__x86_64)
#define read_barrier() __asm__ __volatile__("lfence":::"memory")
#define write_barrier() __asm__ __volatile__("sfence":::"memory")
#else
/*
* Add arch appropriate definitions. Be safe and use full barriers for
* archs we don't have support for.
*/
#define read_barrier() __sync_synchronize()
#define write_barrier() __sync_synchronize()
#endif
#endif

193
src/io_uring.c Normal file
View File

@@ -0,0 +1,193 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include "io_uring.h"
#include "liburing.h"
#include "barrier.h"
/*
* Return an IO completion, waiting for it it necessary.
*/
int io_uring_get_completion(int fd, struct io_uring_cq *cq,
struct io_uring_event **ev_ptr)
{
const unsigned mask = *cq->kring_mask;
struct io_uring_event *ev = NULL;
unsigned head;
int ret;
head = *cq->khead;
do {
read_barrier();
if (head != *cq->ktail) {
ev = &cq->events[head & mask];
break;
}
ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS);
if (ret < 0)
return -errno;
} while (1);
if (ev) {
*cq->khead = head + 1;
write_barrier();
}
*ev_ptr = ev;
return 0;
}
/*
* Submit iocbs acquired from io_uring_get_iocb() to the kernel.
*
* Returns number of iocbs submitted
*/
int io_uring_submit(int fd, struct io_uring_sq *sq)
{
const unsigned mask = *sq->kring_mask;
unsigned ktail, ktail_next, submitted;
/*
* If we have pending IO in the kring, submit it first
*/
read_barrier();
if (*sq->khead != *sq->ktail) {
submitted = *sq->kring_entries;
goto submit;
}
if (sq->iocb_head == sq->iocb_tail)
return 0;
/*
* Fill in iocbs that we have queued up, adding them to the kernel ring
*/
submitted = 0;
ktail = ktail_next = *sq->ktail;
while (sq->iocb_head < sq->iocb_tail) {
ktail_next++;
read_barrier();
if (ktail_next == *sq->khead)
break;
sq->array[ktail & mask] = sq->iocb_head & mask;
ktail = ktail_next;
sq->iocb_head++;
submitted++;
}
if (!submitted)
return 0;
if (*sq->ktail != ktail) {
write_barrier();
*sq->ktail = ktail;
write_barrier();
}
submit:
return io_uring_enter(fd, submitted, 0, IORING_ENTER_GETEVENTS);
}
/*
* Return an iocb to fill. Application must later call io_uring_submit()
* when it's ready to tell the kernel about it. The caller may call this
* function multiple times before calling io_uring_submit().
*
* Returns a vacant iocb, or NULL if we're full.
*/
struct io_uring_iocb *io_uring_get_iocb(struct io_uring_sq *sq)
{
unsigned next = sq->iocb_tail + 1;
struct io_uring_iocb *iocb;
/*
* All iocbs are used
*/
if (next - sq->iocb_head > *sq->kring_entries)
return NULL;
iocb = &sq->iocbs[sq->iocb_tail & *sq->kring_mask];
sq->iocb_tail = next;
return iocb;
}
static int io_uring_mmap(int fd, struct io_uring_params *p,
struct io_uring_sq *sq, struct io_uring_cq *cq)
{
size_t size;
void *ptr;
int ret;
sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
if (ptr == MAP_FAILED)
return -errno;
sq->khead = ptr + p->sq_off.head;
sq->ktail = ptr + p->sq_off.tail;
sq->kring_mask = ptr + p->sq_off.ring_mask;
sq->kring_entries = ptr + p->sq_off.ring_entries;
sq->kflags = ptr + p->sq_off.flags;
sq->kdropped = ptr + p->sq_off.dropped;
sq->array = ptr + p->sq_off.array;
size = p->sq_entries * sizeof(struct io_uring_iocb);
sq->iocbs = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_IOCB);
if (sq->iocbs == MAP_FAILED) {
ret = -errno;
err:
munmap(sq->khead, sq->ring_sz);
return ret;
}
cq->ring_sz = p->cq_off.events + p->cq_entries * sizeof(struct io_uring_event);
ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
if (ptr == MAP_FAILED) {
ret = -errno;
munmap(sq->iocbs, p->sq_entries * sizeof(struct io_uring_iocb));
goto err;
}
cq->khead = ptr + p->cq_off.head;
cq->ktail = ptr + p->cq_off.tail;
cq->kring_mask = ptr + p->cq_off.ring_mask;
cq->kring_entries = ptr + p->cq_off.ring_entries;
cq->koverflow = ptr + p->cq_off.overflow;
cq->events = ptr + p->cq_off.events;
return fd;
}
/*
* Returns -1 on error, or an 'fd' on success. On success, 'sq' and 'cq'
* contain the necessary information to read/write to the rings.
*/
int io_uring_queue_init(unsigned entries, struct io_uring_params *p,
struct iovec *iovecs, struct io_uring_sq *sq,
struct io_uring_cq *cq)
{
int fd;
fd = io_uring_setup(entries, iovecs, p);
if (fd < 0)
return fd;
memset(sq, 0, sizeof(*sq));
memset(cq, 0, sizeof(*cq));
return io_uring_mmap(fd, p, sq, cq);
}
void io_uring_queue_exit(int fd, struct io_uring_sq *sq, struct io_uring_cq *cq)
{
munmap(sq->iocbs, *sq->kring_entries * sizeof(struct io_uring_iocb));
munmap(sq->khead, sq->ring_sz);
munmap(cq->khead, cq->ring_sz);
close(fd);
}

115
src/io_uring.h Normal file
View File

@@ -0,0 +1,115 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Header file for the io_uring interface.
*
* Copyright (C) 2019 Jens Axboe
* Copyright (C) 2019 Christoph Hellwig
*/
#ifndef LINUX_IO_URING_H
#define LINUX_IO_URING_H
#include <linux/fs.h>
#include <linux/types.h>
/*
* IO submission data structure
*/
struct io_uring_iocb {
__u8 opcode;
__u8 flags;
__u16 ioprio;
__s32 fd;
__u64 off;
union {
void *addr;
__u64 __pad;
};
__u32 len;
union {
__kernel_rwf_t rw_flags;
__u32 __resv;
};
};
/*
* io_uring_setup() flags
*/
#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */
#define IORING_SETUP_FIXEDBUFS (1 << 1) /* IO buffers are fixed */
#define IORING_SETUP_SQTHREAD (1 << 2) /* Use SQ thread */
#define IORING_SETUP_SQWQ (1 << 3) /* Use SQ workqueue */
#define IORING_SETUP_SQPOLL (1 << 4) /* SQ thread polls */
#define IORING_OP_READ 1
#define IORING_OP_WRITE 2
#define IORING_OP_FSYNC 3
#define IORING_OP_FDSYNC 4
#define IORING_OP_READ_FIXED 5
#define IORING_OP_WRITE_FIXED 6
/*
* IO completion data structure
*/
struct io_uring_event {
__u64 index; /* what iocb this event came from */
__s32 res; /* result code for this event */
__u32 flags;
};
/*
* io_uring_event->flags
*/
#define IOEV_FLAG_CACHEHIT (1 << 0) /* IO did not hit media */
/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL
#define IORING_OFF_CQ_RING 0x8000000ULL
#define IORING_OFF_IOCB 0x10000000ULL
/*
* Filled with the offset for mmap(2)
*/
struct io_sqring_offsets {
__u32 head;
__u32 tail;
__u32 ring_mask;
__u32 ring_entries;
__u32 flags;
__u32 dropped;
__u32 array;
__u32 resv[3];
};
#define IORING_SQ_NEED_WAKEUP (1 << 0) /* needs io_uring_enter wakeup */
struct io_cqring_offsets {
__u32 head;
__u32 tail;
__u32 ring_mask;
__u32 ring_entries;
__u32 overflow;
__u32 events;
__u32 resv[4];
};
/*
* io_uring_enter(2) flags
*/
#define IORING_ENTER_GETEVENTS (1 << 0)
/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
*/
struct io_uring_params {
__u32 sq_entries;
__u32 cq_entries;
__u32 flags;
__u16 sq_thread_cpu;
__u16 resv[9];
struct io_sqring_offsets sq_off;
struct io_cqring_offsets cq_off;
};
#endif

57
src/liburing.h Normal file
View File

@@ -0,0 +1,57 @@
#ifndef LIB_URING_H
#define LIB_URING_H
#include <sys/uio.h>
#include "io_uring.h"
/*
* Library interface to io_uring
*/
struct io_uring_sq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *kflags;
unsigned *kdropped;
unsigned *array;
struct io_uring_iocb *iocbs;
unsigned iocb_head;
unsigned iocb_tail;
size_t ring_sz;
};
struct io_uring_cq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *koverflow;
struct io_uring_event *events;
size_t ring_sz;
};
/*
* System calls
*/
extern int io_uring_setup(unsigned entries, struct iovec *iovecs,
struct io_uring_params *p);
extern int io_uring_enter(unsigned fd, unsigned to_submit,
unsigned min_complete, unsigned flags);
/*
* Library interface
*/
extern int io_uring_queue_init(unsigned entries, struct io_uring_params *p,
struct iovec *iovecs, struct io_uring_sq *sq, struct io_uring_cq *cq);
extern void io_uring_queue_exit(int fd, struct io_uring_sq *sq,
struct io_uring_cq *cq);
extern int io_uring_get_completion(int fd, struct io_uring_cq *cq,
struct io_uring_event **ev_ptr);
extern int io_uring_submit(int fd, struct io_uring_sq *sq);
extern struct io_uring_iocb *io_uring_get_iocb(struct io_uring_sq *sq);
#endif

11
src/liburing.map Normal file
View File

@@ -0,0 +1,11 @@
LIBURING_0.1 {
global:
io_uring_queue_init;
io_uring_queue_exit;
io_uring_get_completion;
io_uring_submit;
io_uring_get_iocb;
local:
*;
};

31
src/syscall.c Normal file
View File

@@ -0,0 +1,31 @@
/*
* Will go away once libc support is there
*/
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/uio.h>
#include "io_uring.h"
#if defined(__x86_64)
#ifndef __NR_sys_io_uring_setup
#define __NR_sys_io_uring_setup 335
#endif
#ifndef __NR_sys_io_uring_enter
#define __NR_sys_io_uring_enter 336
#endif
#else
#error "Arch not supported yet"
#endif
int io_uring_setup(unsigned int entries, struct iovec *iovecs,
struct io_uring_params *p)
{
return syscall(__NR_sys_io_uring_setup, entries, iovecs, p);
}
int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
unsigned int flags)
{
return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
flags);
}