Skip to content

Commit ee692a2

Browse files
committed
fs,io_uring: add infrastructure for uring-cmd
file_operations->uring_cmd is a file private handler. This is somewhat similar to ioctl but hopefully a lot more sane and useful as it can be used to enable many io_uring capabilities for the underlying operation. IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't know what is in this command type, it's for the provider of ->uring_cmd() to deal with. Co-developed-by: Kanchan Joshi <[email protected]> Signed-off-by: Kanchan Joshi <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 2bb04df commit ee692a2

File tree

4 files changed

+165
-26
lines changed

4 files changed

+165
-26
lines changed

fs/io_uring.c

Lines changed: 117 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,6 @@ struct io_rings {
202202
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
203203
};
204204

205-
enum io_uring_cmd_flags {
206-
IO_URING_F_COMPLETE_DEFER = 1,
207-
IO_URING_F_UNLOCKED = 2,
208-
/* int's last bit, sign checks are usually faster than a bit test */
209-
IO_URING_F_NONBLOCK = INT_MIN,
210-
};
211-
212205
struct io_mapped_ubuf {
213206
u64 ubuf;
214207
u64 ubuf_end;
@@ -972,6 +965,7 @@ struct io_kiocb {
972965
struct io_xattr xattr;
973966
struct io_socket sock;
974967
struct io_nop nop;
968+
struct io_uring_cmd uring_cmd;
975969
};
976970

977971
u8 opcode;
@@ -1050,6 +1044,14 @@ struct io_cancel_data {
10501044
int seq;
10511045
};
10521046

1047+
/*
1048+
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
1049+
* the following sqe if SQE128 is used.
1050+
*/
1051+
#define uring_cmd_pdu_size(is_sqe128) \
1052+
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
1053+
offsetof(struct io_uring_sqe, cmd))
1054+
10531055
struct io_op_def {
10541056
/* needs req->file assigned */
10551057
unsigned needs_file : 1;
@@ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = {
12891291
[IORING_OP_SOCKET] = {
12901292
.audit_skip = 1,
12911293
},
1294+
[IORING_OP_URING_CMD] = {
1295+
.needs_file = 1,
1296+
.plug = 1,
1297+
.needs_async_setup = 1,
1298+
.async_size = uring_cmd_pdu_size(1),
1299+
},
12921300
};
12931301

12941302
/* requests with any of those set should undergo io_disarm_next() */
@@ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode)
14281436
return "GETXATTR";
14291437
case IORING_OP_SOCKET:
14301438
return "SOCKET";
1439+
case IORING_OP_URING_CMD:
1440+
return "URING_CMD";
14311441
case IORING_OP_LAST:
14321442
return "INVALID";
14331443
}
@@ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req,
45074517
const char __user *name;
45084518
int ret;
45094519

4510-
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
4511-
return -EINVAL;
4512-
if (unlikely(sqe->ioprio))
4513-
return -EINVAL;
45144520
if (unlikely(req->flags & REQ_F_FIXED_FILE))
45154521
return -EBADF;
45164522

@@ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req,
46204626
const char __user *name;
46214627
int ret;
46224628

4623-
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
4624-
return -EINVAL;
4625-
if (unlikely(sqe->ioprio))
4626-
return -EINVAL;
46274629
if (unlikely(req->flags & REQ_F_FIXED_FILE))
46284630
return -EBADF;
46294631

@@ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
49104912
return 0;
49114913
}
49124914

4915+
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
4916+
{
4917+
req->uring_cmd.task_work_cb(&req->uring_cmd);
4918+
}
4919+
4920+
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
4921+
void (*task_work_cb)(struct io_uring_cmd *))
4922+
{
4923+
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
4924+
4925+
req->uring_cmd.task_work_cb = task_work_cb;
4926+
req->io_task_work.func = io_uring_cmd_work;
4927+
io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
4928+
}
4929+
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
4930+
4931+
/*
4932+
* Called by consumers of io_uring_cmd, if they originally returned
4933+
* -EIOCBQUEUED upon receiving the command.
4934+
*/
4935+
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
4936+
{
4937+
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
4938+
4939+
if (ret < 0)
4940+
req_set_fail(req);
4941+
if (req->ctx->flags & IORING_SETUP_CQE32)
4942+
__io_req_complete32(req, 0, ret, 0, res2, 0);
4943+
else
4944+
io_req_complete(req, ret);
4945+
}
4946+
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
4947+
4948+
static int io_uring_cmd_prep_async(struct io_kiocb *req)
4949+
{
4950+
size_t cmd_size;
4951+
4952+
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
4953+
4954+
memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
4955+
return 0;
4956+
}
4957+
4958+
static int io_uring_cmd_prep(struct io_kiocb *req,
4959+
const struct io_uring_sqe *sqe)
4960+
{
4961+
struct io_uring_cmd *ioucmd = &req->uring_cmd;
4962+
4963+
if (sqe->rw_flags)
4964+
return -EINVAL;
4965+
ioucmd->cmd = sqe->cmd;
4966+
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
4967+
return 0;
4968+
}
4969+
4970+
static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
4971+
{
4972+
struct io_uring_cmd *ioucmd = &req->uring_cmd;
4973+
struct io_ring_ctx *ctx = req->ctx;
4974+
struct file *file = req->file;
4975+
int ret;
4976+
4977+
if (!req->file->f_op->uring_cmd)
4978+
return -EOPNOTSUPP;
4979+
4980+
if (ctx->flags & IORING_SETUP_SQE128)
4981+
issue_flags |= IO_URING_F_SQE128;
4982+
if (ctx->flags & IORING_SETUP_CQE32)
4983+
issue_flags |= IO_URING_F_CQE32;
4984+
if (ctx->flags & IORING_SETUP_IOPOLL)
4985+
issue_flags |= IO_URING_F_IOPOLL;
4986+
4987+
if (req_has_async_data(req))
4988+
ioucmd->cmd = req->async_data;
4989+
4990+
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
4991+
if (ret == -EAGAIN) {
4992+
if (!req_has_async_data(req)) {
4993+
if (io_alloc_async_data(req))
4994+
return -ENOMEM;
4995+
io_uring_cmd_prep_async(req);
4996+
}
4997+
return -EAGAIN;
4998+
}
4999+
5000+
if (ret != -EIOCBQUEUED)
5001+
io_uring_cmd_done(ioucmd, ret, 0);
5002+
return 0;
5003+
}
5004+
49135005
static int io_shutdown_prep(struct io_kiocb *req,
49145006
const struct io_uring_sqe *sqe)
49155007
{
@@ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
63056397
{
63066398
struct io_socket *sock = &req->sock;
63076399

6308-
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
6309-
return -EINVAL;
6310-
if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
6400+
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
63116401
return -EINVAL;
63126402

63136403
sock->domain = READ_ONCE(sqe->fd);
@@ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
77557845
return io_getxattr_prep(req, sqe);
77567846
case IORING_OP_SOCKET:
77577847
return io_socket_prep(req, sqe);
7848+
case IORING_OP_URING_CMD:
7849+
return io_uring_cmd_prep(req, sqe);
77587850
}
77597851

77607852
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req)
77877879
return io_recvmsg_prep_async(req);
77887880
case IORING_OP_CONNECT:
77897881
return io_connect_prep_async(req);
7882+
case IORING_OP_URING_CMD:
7883+
return io_uring_cmd_prep_async(req);
77907884
}
77917885
printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
77927886
req->opcode);
@@ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
80818175
case IORING_OP_SOCKET:
80828176
ret = io_socket(req, issue_flags);
80838177
break;
8178+
case IORING_OP_URING_CMD:
8179+
ret = io_uring_cmd(req, issue_flags);
8180+
break;
80848181
default:
80858182
ret = -EINVAL;
80868183
break;
@@ -12699,6 +12796,8 @@ static int __init io_uring_init(void)
1269912796

1270012797
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
1270112798

12799+
BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
12800+
1270212801
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
1270312802
SLAB_ACCOUNT);
1270412803
return 0;

include/linux/fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,6 +1953,7 @@ struct dir_context {
19531953
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
19541954

19551955
struct iov_iter;
1956+
struct io_uring_cmd;
19561957

19571958
struct file_operations {
19581959
struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
19951996
struct file *file_out, loff_t pos_out,
19961997
loff_t len, unsigned int remap_flags);
19971998
int (*fadvise)(struct file *, loff_t, loff_t, int);
1999+
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
19982000
} __randomize_layout;
19992001

20002002
struct inode_operations {

include/linux/io_uring.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,32 @@
55
#include <linux/sched.h>
66
#include <linux/xarray.h>
77

8+
enum io_uring_cmd_flags {
9+
IO_URING_F_COMPLETE_DEFER = 1,
10+
IO_URING_F_UNLOCKED = 2,
11+
/* int's last bit, sign checks are usually faster than a bit test */
12+
IO_URING_F_NONBLOCK = INT_MIN,
13+
14+
/* ctx state flags, for URING_CMD */
15+
IO_URING_F_SQE128 = 4,
16+
IO_URING_F_CQE32 = 8,
17+
IO_URING_F_IOPOLL = 16,
18+
};
19+
20+
struct io_uring_cmd {
21+
struct file *file;
22+
const void *cmd;
23+
/* callback to defer completions to task context */
24+
void (*task_work_cb)(struct io_uring_cmd *cmd);
25+
u32 cmd_op;
26+
u32 pad;
27+
u8 pdu[32]; /* available inline for free use */
28+
};
29+
830
#if defined(CONFIG_IO_URING)
31+
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
32+
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
33+
void (*task_work_cb)(struct io_uring_cmd *));
934
struct sock *io_uring_get_socket(struct file *file);
1035
void __io_uring_cancel(bool cancel_all);
1136
void __io_uring_free(struct task_struct *tsk);
@@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
3055
__io_uring_free(tsk);
3156
}
3257
#else
58+
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
59+
ssize_t ret2)
60+
{
61+
}
62+
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
63+
void (*task_work_cb)(struct io_uring_cmd *))
64+
{
65+
}
3366
static inline struct sock *io_uring_get_socket(struct file *file)
3467
{
3568
return NULL;

include/uapi/linux/io_uring.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct io_uring_sqe {
2222
union {
2323
__u64 off; /* offset into file */
2424
__u64 addr2;
25+
__u32 cmd_op;
2526
};
2627
union {
2728
__u64 addr; /* pointer to buffer or iovecs */
@@ -61,14 +62,17 @@ struct io_uring_sqe {
6162
__s32 splice_fd_in;
6263
__u32 file_index;
6364
};
64-
__u64 addr3;
65-
__u64 __pad2[1];
66-
67-
/*
68-
* If the ring is initialized with IORING_SETUP_SQE128, then this field
69-
* contains 64-bytes of padding, doubling the size of the SQE.
70-
*/
71-
__u64 __big_sqe_pad[0];
65+
union {
66+
struct {
67+
__u64 addr3;
68+
__u64 __pad2[1];
69+
};
70+
/*
71+
* If the ring is initialized with IORING_SETUP_SQE128, then
72+
* this field is used for 80 bytes of arbitrary command data
73+
*/
74+
__u8 cmd[0];
75+
};
7276
};
7377

7478
enum {
@@ -175,6 +179,7 @@ enum io_uring_op {
175179
IORING_OP_FGETXATTR,
176180
IORING_OP_GETXATTR,
177181
IORING_OP_SOCKET,
182+
IORING_OP_URING_CMD,
178183

179184
/* this goes last, obviously */
180185
IORING_OP_LAST,

0 commit comments

Comments
 (0)