61
src/Makefile
Normal file
61
src/Makefile
Normal file
@@ -0,0 +1,61 @@
|
||||
prefix=/usr
|
||||
includedir=$(prefix)/include
|
||||
libdir=$(prefix)/lib
|
||||
|
||||
CFLAGS ?= -g -fomit-frame-pointer -O2
|
||||
CFLAGS += -Wall -I. -fPIC
|
||||
SO_CFLAGS=-shared $(CFLAGS)
|
||||
L_CFLAGS=$(CFLAGS)
|
||||
LINK_FLAGS=
|
||||
LINK_FLAGS+=$(LDFLAGS)
|
||||
ENABLE_SHARED ?= 1
|
||||
|
||||
soname=liburing.so.1
|
||||
minor=0
|
||||
micro=1
|
||||
libname=$(soname).$(minor).$(micro)
|
||||
all_targets += liburing.a
|
||||
|
||||
ifeq ($(ENABLE_SHARED),1)
|
||||
all_targets += $(libname)
|
||||
endif
|
||||
|
||||
all: $(all_targets)
|
||||
|
||||
liburing_srcs := io_uring.c syscall.c
|
||||
|
||||
liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs))
|
||||
liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs))
|
||||
|
||||
$(liburing_objs) $(liburing_sobjs): io_uring.h
|
||||
|
||||
%.os: %.c
|
||||
$(CC) $(SO_CFLAGS) -c -o $@ $<
|
||||
|
||||
%.ol: %.c
|
||||
$(CC) $(L_CFLAGS) -c -o $@ $<
|
||||
|
||||
AR ?= ar
|
||||
RANLIB ?= ranlib
|
||||
liburing.a: $(liburing_objs)
|
||||
rm -f liburing.a
|
||||
$(AR) r liburing.a $^
|
||||
$(RANLIB) liburing.a
|
||||
|
||||
$(libname): $(liburing_sobjs) liburing.map
|
||||
$(CC) $(SO_CFLAGS) -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS)
|
||||
|
||||
install: $(all_targets)
|
||||
install -D -m 644 io_uring.h $(includedir)/io_uring.h
|
||||
install -D -m 644 liburing.a $(libdir)/liburing.a
|
||||
ifeq ($(ENABLE_SHARED),1)
|
||||
install -D -m 755 $(libname) $(libdir)/$(libname)
|
||||
ln -sf $(libname) $(libdir)/$(soname)
|
||||
ln -sf $(libname) $(libdir)/liburing.so
|
||||
endif
|
||||
|
||||
$(liburing_objs): liburing.h
|
||||
|
||||
clean:
|
||||
rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(soname).new
|
||||
rm -f *.so* *.a *.o
|
||||
16
src/barrier.h
Normal file
16
src/barrier.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef LIBURING_BARRIER_H
|
||||
#define LIBURING_BARRIER_H
|
||||
|
||||
#if defined(__x86_64)
|
||||
#define read_barrier() __asm__ __volatile__("lfence":::"memory")
|
||||
#define write_barrier() __asm__ __volatile__("sfence":::"memory")
|
||||
#else
|
||||
/*
|
||||
* Add arch appropriate definitions. Be safe and use full barriers for
|
||||
* archs we don't have support for.
|
||||
*/
|
||||
#define read_barrier() __sync_synchronize()
|
||||
#define write_barrier() __sync_synchronize()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
193
src/io_uring.c
Normal file
193
src/io_uring.c
Normal file
@@ -0,0 +1,193 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "io_uring.h"
|
||||
#include "liburing.h"
|
||||
#include "barrier.h"
|
||||
|
||||
/*
|
||||
* Return an IO completion, waiting for it it necessary.
|
||||
*/
|
||||
int io_uring_get_completion(int fd, struct io_uring_cq *cq,
|
||||
struct io_uring_event **ev_ptr)
|
||||
{
|
||||
const unsigned mask = *cq->kring_mask;
|
||||
struct io_uring_event *ev = NULL;
|
||||
unsigned head;
|
||||
int ret;
|
||||
|
||||
head = *cq->khead;
|
||||
do {
|
||||
read_barrier();
|
||||
if (head != *cq->ktail) {
|
||||
ev = &cq->events[head & mask];
|
||||
break;
|
||||
}
|
||||
ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS);
|
||||
if (ret < 0)
|
||||
return -errno;
|
||||
} while (1);
|
||||
|
||||
if (ev) {
|
||||
*cq->khead = head + 1;
|
||||
write_barrier();
|
||||
}
|
||||
|
||||
*ev_ptr = ev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit iocbs acquired from io_uring_get_iocb() to the kernel.
|
||||
*
|
||||
* Returns number of iocbs submitted
|
||||
*/
|
||||
int io_uring_submit(int fd, struct io_uring_sq *sq)
|
||||
{
|
||||
const unsigned mask = *sq->kring_mask;
|
||||
unsigned ktail, ktail_next, submitted;
|
||||
|
||||
/*
|
||||
* If we have pending IO in the kring, submit it first
|
||||
*/
|
||||
read_barrier();
|
||||
if (*sq->khead != *sq->ktail) {
|
||||
submitted = *sq->kring_entries;
|
||||
goto submit;
|
||||
}
|
||||
|
||||
if (sq->iocb_head == sq->iocb_tail)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Fill in iocbs that we have queued up, adding them to the kernel ring
|
||||
*/
|
||||
submitted = 0;
|
||||
ktail = ktail_next = *sq->ktail;
|
||||
while (sq->iocb_head < sq->iocb_tail) {
|
||||
ktail_next++;
|
||||
read_barrier();
|
||||
if (ktail_next == *sq->khead)
|
||||
break;
|
||||
|
||||
sq->array[ktail & mask] = sq->iocb_head & mask;
|
||||
ktail = ktail_next;
|
||||
|
||||
sq->iocb_head++;
|
||||
submitted++;
|
||||
}
|
||||
|
||||
if (!submitted)
|
||||
return 0;
|
||||
|
||||
if (*sq->ktail != ktail) {
|
||||
write_barrier();
|
||||
*sq->ktail = ktail;
|
||||
write_barrier();
|
||||
}
|
||||
|
||||
submit:
|
||||
return io_uring_enter(fd, submitted, 0, IORING_ENTER_GETEVENTS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an iocb to fill. Application must later call io_uring_submit()
|
||||
* when it's ready to tell the kernel about it. The caller may call this
|
||||
* function multiple times before calling io_uring_submit().
|
||||
*
|
||||
* Returns a vacant iocb, or NULL if we're full.
|
||||
*/
|
||||
struct io_uring_iocb *io_uring_get_iocb(struct io_uring_sq *sq)
|
||||
{
|
||||
unsigned next = sq->iocb_tail + 1;
|
||||
struct io_uring_iocb *iocb;
|
||||
|
||||
/*
|
||||
* All iocbs are used
|
||||
*/
|
||||
if (next - sq->iocb_head > *sq->kring_entries)
|
||||
return NULL;
|
||||
|
||||
iocb = &sq->iocbs[sq->iocb_tail & *sq->kring_mask];
|
||||
sq->iocb_tail = next;
|
||||
return iocb;
|
||||
}
|
||||
|
||||
static int io_uring_mmap(int fd, struct io_uring_params *p,
|
||||
struct io_uring_sq *sq, struct io_uring_cq *cq)
|
||||
{
|
||||
size_t size;
|
||||
void *ptr;
|
||||
int ret;
|
||||
|
||||
sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
|
||||
ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
|
||||
if (ptr == MAP_FAILED)
|
||||
return -errno;
|
||||
sq->khead = ptr + p->sq_off.head;
|
||||
sq->ktail = ptr + p->sq_off.tail;
|
||||
sq->kring_mask = ptr + p->sq_off.ring_mask;
|
||||
sq->kring_entries = ptr + p->sq_off.ring_entries;
|
||||
sq->kflags = ptr + p->sq_off.flags;
|
||||
sq->kdropped = ptr + p->sq_off.dropped;
|
||||
sq->array = ptr + p->sq_off.array;
|
||||
|
||||
size = p->sq_entries * sizeof(struct io_uring_iocb);
|
||||
sq->iocbs = mmap(0, size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, fd,
|
||||
IORING_OFF_IOCB);
|
||||
if (sq->iocbs == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
err:
|
||||
munmap(sq->khead, sq->ring_sz);
|
||||
return ret;
|
||||
}
|
||||
|
||||
cq->ring_sz = p->cq_off.events + p->cq_entries * sizeof(struct io_uring_event);
|
||||
ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
|
||||
if (ptr == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
munmap(sq->iocbs, p->sq_entries * sizeof(struct io_uring_iocb));
|
||||
goto err;
|
||||
}
|
||||
cq->khead = ptr + p->cq_off.head;
|
||||
cq->ktail = ptr + p->cq_off.tail;
|
||||
cq->kring_mask = ptr + p->cq_off.ring_mask;
|
||||
cq->kring_entries = ptr + p->cq_off.ring_entries;
|
||||
cq->koverflow = ptr + p->cq_off.overflow;
|
||||
cq->events = ptr + p->cq_off.events;
|
||||
return fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns -1 on error, or an 'fd' on success. On success, 'sq' and 'cq'
|
||||
* contain the necessary information to read/write to the rings.
|
||||
*/
|
||||
int io_uring_queue_init(unsigned entries, struct io_uring_params *p,
|
||||
struct iovec *iovecs, struct io_uring_sq *sq,
|
||||
struct io_uring_cq *cq)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = io_uring_setup(entries, iovecs, p);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
memset(sq, 0, sizeof(*sq));
|
||||
memset(cq, 0, sizeof(*cq));
|
||||
return io_uring_mmap(fd, p, sq, cq);
|
||||
}
|
||||
|
||||
void io_uring_queue_exit(int fd, struct io_uring_sq *sq, struct io_uring_cq *cq)
|
||||
{
|
||||
munmap(sq->iocbs, *sq->kring_entries * sizeof(struct io_uring_iocb));
|
||||
munmap(sq->khead, sq->ring_sz);
|
||||
munmap(cq->khead, cq->ring_sz);
|
||||
close(fd);
|
||||
}
|
||||
115
src/io_uring.h
Normal file
115
src/io_uring.h
Normal file
@@ -0,0 +1,115 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* Header file for the io_uring interface.
|
||||
*
|
||||
* Copyright (C) 2019 Jens Axboe
|
||||
* Copyright (C) 2019 Christoph Hellwig
|
||||
*/
|
||||
#ifndef LINUX_IO_URING_H
|
||||
#define LINUX_IO_URING_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* IO submission data structure
|
||||
*/
|
||||
struct io_uring_iocb {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u16 ioprio;
|
||||
__s32 fd;
|
||||
__u64 off;
|
||||
union {
|
||||
void *addr;
|
||||
__u64 __pad;
|
||||
};
|
||||
__u32 len;
|
||||
union {
|
||||
__kernel_rwf_t rw_flags;
|
||||
__u32 __resv;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_setup() flags
|
||||
*/
|
||||
#define IORING_SETUP_IOPOLL (1 << 0) /* io_context is polled */
|
||||
#define IORING_SETUP_FIXEDBUFS (1 << 1) /* IO buffers are fixed */
|
||||
#define IORING_SETUP_SQTHREAD (1 << 2) /* Use SQ thread */
|
||||
#define IORING_SETUP_SQWQ (1 << 3) /* Use SQ workqueue */
|
||||
#define IORING_SETUP_SQPOLL (1 << 4) /* SQ thread polls */
|
||||
|
||||
#define IORING_OP_READ 1
|
||||
#define IORING_OP_WRITE 2
|
||||
#define IORING_OP_FSYNC 3
|
||||
#define IORING_OP_FDSYNC 4
|
||||
#define IORING_OP_READ_FIXED 5
|
||||
#define IORING_OP_WRITE_FIXED 6
|
||||
|
||||
/*
|
||||
* IO completion data structure
|
||||
*/
|
||||
struct io_uring_event {
|
||||
__u64 index; /* what iocb this event came from */
|
||||
__s32 res; /* result code for this event */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_event->flags
|
||||
*/
|
||||
#define IOEV_FLAG_CACHEHIT (1 << 0) /* IO did not hit media */
|
||||
|
||||
/*
|
||||
* Magic offsets for the application to mmap the data it needs
|
||||
*/
|
||||
#define IORING_OFF_SQ_RING 0ULL
|
||||
#define IORING_OFF_CQ_RING 0x8000000ULL
|
||||
#define IORING_OFF_IOCB 0x10000000ULL
|
||||
|
||||
/*
|
||||
* Filled with the offset for mmap(2)
|
||||
*/
|
||||
struct io_sqring_offsets {
|
||||
__u32 head;
|
||||
__u32 tail;
|
||||
__u32 ring_mask;
|
||||
__u32 ring_entries;
|
||||
__u32 flags;
|
||||
__u32 dropped;
|
||||
__u32 array;
|
||||
__u32 resv[3];
|
||||
};
|
||||
|
||||
#define IORING_SQ_NEED_WAKEUP (1 << 0) /* needs io_uring_enter wakeup */
|
||||
|
||||
struct io_cqring_offsets {
|
||||
__u32 head;
|
||||
__u32 tail;
|
||||
__u32 ring_mask;
|
||||
__u32 ring_entries;
|
||||
__u32 overflow;
|
||||
__u32 events;
|
||||
__u32 resv[4];
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_enter(2) flags
|
||||
*/
|
||||
#define IORING_ENTER_GETEVENTS (1 << 0)
|
||||
|
||||
/*
|
||||
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
||||
*/
|
||||
struct io_uring_params {
|
||||
__u32 sq_entries;
|
||||
__u32 cq_entries;
|
||||
__u32 flags;
|
||||
__u16 sq_thread_cpu;
|
||||
__u16 resv[9];
|
||||
struct io_sqring_offsets sq_off;
|
||||
struct io_cqring_offsets cq_off;
|
||||
};
|
||||
|
||||
#endif
|
||||
57
src/liburing.h
Normal file
57
src/liburing.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef LIB_URING_H
|
||||
#define LIB_URING_H
|
||||
|
||||
#include <sys/uio.h>
|
||||
#include "io_uring.h"
|
||||
|
||||
/*
|
||||
* Library interface to io_uring
|
||||
*/
|
||||
struct io_uring_sq {
|
||||
unsigned *khead;
|
||||
unsigned *ktail;
|
||||
unsigned *kring_mask;
|
||||
unsigned *kring_entries;
|
||||
unsigned *kflags;
|
||||
unsigned *kdropped;
|
||||
unsigned *array;
|
||||
struct io_uring_iocb *iocbs;
|
||||
|
||||
unsigned iocb_head;
|
||||
unsigned iocb_tail;
|
||||
|
||||
size_t ring_sz;
|
||||
};
|
||||
|
||||
struct io_uring_cq {
|
||||
unsigned *khead;
|
||||
unsigned *ktail;
|
||||
unsigned *kring_mask;
|
||||
unsigned *kring_entries;
|
||||
unsigned *koverflow;
|
||||
struct io_uring_event *events;
|
||||
|
||||
size_t ring_sz;
|
||||
};
|
||||
|
||||
/*
|
||||
* System calls
|
||||
*/
|
||||
extern int io_uring_setup(unsigned entries, struct iovec *iovecs,
|
||||
struct io_uring_params *p);
|
||||
extern int io_uring_enter(unsigned fd, unsigned to_submit,
|
||||
unsigned min_complete, unsigned flags);
|
||||
|
||||
/*
|
||||
* Library interface
|
||||
*/
|
||||
extern int io_uring_queue_init(unsigned entries, struct io_uring_params *p,
|
||||
struct iovec *iovecs, struct io_uring_sq *sq, struct io_uring_cq *cq);
|
||||
extern void io_uring_queue_exit(int fd, struct io_uring_sq *sq,
|
||||
struct io_uring_cq *cq);
|
||||
extern int io_uring_get_completion(int fd, struct io_uring_cq *cq,
|
||||
struct io_uring_event **ev_ptr);
|
||||
extern int io_uring_submit(int fd, struct io_uring_sq *sq);
|
||||
extern struct io_uring_iocb *io_uring_get_iocb(struct io_uring_sq *sq);
|
||||
|
||||
#endif
|
||||
11
src/liburing.map
Normal file
11
src/liburing.map
Normal file
@@ -0,0 +1,11 @@
|
||||
LIBURING_0.1 {
|
||||
global:
|
||||
io_uring_queue_init;
|
||||
io_uring_queue_exit;
|
||||
io_uring_get_completion;
|
||||
io_uring_submit;
|
||||
io_uring_get_iocb;
|
||||
local:
|
||||
*;
|
||||
|
||||
};
|
||||
31
src/syscall.c
Normal file
31
src/syscall.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Will go away once libc support is there
|
||||
*/
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/uio.h>
|
||||
#include "io_uring.h"
|
||||
|
||||
#if defined(__x86_64)
|
||||
#ifndef __NR_sys_io_uring_setup
|
||||
#define __NR_sys_io_uring_setup 335
|
||||
#endif
|
||||
#ifndef __NR_sys_io_uring_enter
|
||||
#define __NR_sys_io_uring_enter 336
|
||||
#endif
|
||||
#else
|
||||
#error "Arch not supported yet"
|
||||
#endif
|
||||
|
||||
int io_uring_setup(unsigned int entries, struct iovec *iovecs,
|
||||
struct io_uring_params *p)
|
||||
{
|
||||
return syscall(__NR_sys_io_uring_setup, entries, iovecs, p);
|
||||
}
|
||||
|
||||
int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
|
||||
unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
|
||||
flags);
|
||||
}
|
||||
Reference in New Issue
Block a user