eventfd and signalfd
Event notification and signal delivery through file descriptors
eventfd: a counter you can poll
eventfd creates a file descriptor backed by a kernel counter. It's used for:
- Event notification between threads or processes
- Waking up epoll/select/poll from another context
- Counting occurrences (semaphore-like, but pollable)
#include <sys/eventfd.h>
int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
/* initval=0: initial counter value */
/* EFD_SEMAPHORE: semaphore mode (see below) */
Writing (signaling)
Writing an 8-byte uint64_t adds to the counter:
uint64_t value = 1;
write(efd, &value, sizeof(value)); /* counter += 1 */
/* Multiple signals: */
value = 5;
write(efd, &value, sizeof(value)); /* counter += 5 */
/* Counter saturates at UINT64_MAX - 1 */
/* write blocks (or returns EAGAIN with EFD_NONBLOCK) when at max */
Reading (consuming)
Reading an 8-byte uint64_t returns the current counter and resets it to 0:
uint64_t count;
ssize_t n = read(efd, &count, sizeof(count));
/* n == 8, count == accumulated value, counter reset to 0 */
/* EFD_NONBLOCK: returns EAGAIN if counter == 0 */
EFD_SEMAPHORE mode
With EFD_SEMAPHORE, each read() decrements by 1 and returns 1 (not the full count):
int efd = eventfd(0, EFD_SEMAPHORE | EFD_NONBLOCK);
write(efd, &(uint64_t){3}, 8); /* counter = 3 */
uint64_t val;
read(efd, &val, 8); /* val=1, counter=2 */
read(efd, &val, 8); /* val=1, counter=1 */
read(efd, &val, 8); /* val=1, counter=0 */
read(efd, &val, 8); /* EAGAIN: counter==0 */
epoll integration
The key feature: eventfd integrates with epoll so you can wake a sleeping event loop:
int epfd = epoll_create1(0);
int efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
struct epoll_event ev = { .events = EPOLLIN, .data.fd = efd };
epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &ev);
/* In another thread or signal handler: */
write(efd, &(uint64_t){1}, 8); /* wakes epoll_wait */
/* Main loop: */
struct epoll_event events[16];
int n = epoll_wait(epfd, events, 16, -1);
for (int i = 0; i < n; i++) {
if (events[i].data.fd == efd) {
uint64_t count;
read(efd, &count, 8); /* consume + reset */
/* Handle the event */
}
}
Typical uses
Thread pool wakeup: Instead of a mutex+condvar, use eventfd with a work queue:
struct thread_pool {
int wakeup_efd; /* eventfd */
/* ... work queue ... */
};
/* Enqueue work and wake a thread */
void enqueue(struct thread_pool *pool, struct work *w) {
work_queue_push(pool, w);
write(pool->wakeup_efd, &(uint64_t){1}, 8);
}
/* Worker thread */
void worker(struct thread_pool *pool) {
while (1) {
uint64_t count;
read(pool->wakeup_efd, &count, 8); /* blocks until work */
/* process 'count' items */
}
}
io_uring completion notification: io_uring can post to an eventfd when completions arrive:
Kernel implementation
/* fs/eventfd.c */
struct eventfd_ctx {
struct kref kref;
wait_queue_head_t wqh;
__u64 count;
unsigned int flags;
int id; /* for debugging */
};
static ssize_t eventfd_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct eventfd_ctx *ctx = file->private_data;
__u64 ucnt = 0;
copy_from_user(&ucnt, buf, sizeof(ucnt));
spin_lock_irq(&ctx->wqh.lock);
/* Saturate at ULLONG_MAX - 1 */
if (ULLONG_MAX - ctx->count > ucnt)
ctx->count += ucnt;
else
ctx->count = ULLONG_MAX - 1;
/* Wake up anyone polling/reading */
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
spin_unlock_irq(&ctx->wqh.lock);
}
static ssize_t eventfd_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct eventfd_ctx *ctx = file->private_data;
__u64 ucnt = 0;
spin_lock_irq(&ctx->wqh.lock);
if (!ctx->count) {
if (file->f_flags & O_NONBLOCK) {
spin_unlock_irq(&ctx->wqh.lock);
return -EAGAIN;
}
/* Sleep until count > 0 */
wait_event_interruptible_locked_irq(ctx->wqh, ctx->count);
}
if (ctx->flags & EFD_SEMAPHORE) {
ucnt = 1;
ctx->count--;
} else {
ucnt = ctx->count;
ctx->count = 0;
}
/* Wake up blocked writers if count was at max */
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
spin_unlock_irq(&ctx->wqh.lock);
copy_to_user(buf, &ucnt, sizeof(ucnt));
return sizeof(ucnt);
}
signalfd: receive signals via file descriptor
Normally, signals interrupt the current execution at unpredictable points. signalfd delivers signals as readable data on a file descriptor, making signal handling compatible with epoll-driven event loops.
#include <sys/signalfd.h>
#include <signal.h>
/* Block signals from normal delivery first */
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGINT);
sigaddset(&mask, SIGTERM);
sigaddset(&mask, SIGUSR1);
sigprocmask(SIG_BLOCK, &mask, NULL);
/* Create signalfd for these signals */
int sfd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
/* Read signal info (blocks or EAGAIN with SFD_NONBLOCK) */
struct signalfd_siginfo ssi;
ssize_t n = read(sfd, &ssi, sizeof(ssi));
if (n == sizeof(ssi)) {
printf("signal %u from pid %u\n", ssi.ssi_signo, ssi.ssi_pid);
printf("uid=%u code=%d status=%d\n",
ssi.ssi_uid, ssi.ssi_code, ssi.ssi_status);
}
signalfd_siginfo fields
struct signalfd_siginfo {
uint32_t ssi_signo; /* Signal number */
int32_t ssi_errno; /* Error number (usually 0) */
int32_t ssi_code; /* Signal code (SI_USER, SI_KERNEL, etc.) */
uint32_t ssi_pid; /* PID of sender */
uint32_t ssi_uid; /* UID of sender */
int32_t ssi_fd; /* File descriptor (SIGIO) */
uint32_t ssi_tid; /* Timer ID (SIGALRM/SIGVTALRM) */
uint32_t ssi_band; /* Band event (SIGIO) */
uint32_t ssi_overrun; /* Timer overrun count */
uint32_t ssi_trapno; /* Trap number (hardware fault) */
int32_t ssi_status; /* Exit status/signal (SIGCHLD) */
int32_t ssi_int; /* Integer payload (sigqueue) */
uint64_t ssi_ptr; /* Pointer payload (sigqueue) */
uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
uint64_t ssi_addr; /* Faulting address (SIGSEGV/SIGBUS) */
/* ... padding ... */
};
epoll with signalfd
The canonical pattern for a single-threaded server handling both I/O and signals:
/* Block signals at thread/process level */
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGINT);
sigaddset(&mask, SIGTERM);
sigaddset(&mask, SIGCHLD);
pthread_sigmask(SIG_BLOCK, &mask, NULL);
int sfd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
int epfd = epoll_create1(EPOLL_CLOEXEC);
/* Add signalfd and other fds to epoll */
struct epoll_event ev = { .events = EPOLLIN, .data.fd = sfd };
epoll_ctl(epfd, EPOLL_CTL_ADD, sfd, &ev);
/* ... add socket fds, timer fds, etc. ... */
/* Single event loop for everything */
while (running) {
struct epoll_event events[32];
int n = epoll_wait(epfd, events, 32, -1);
for (int i = 0; i < n; i++) {
int fd = events[i].data.fd;
if (fd == sfd) {
struct signalfd_siginfo ssi;
read(sfd, &ssi, sizeof(ssi));
if (ssi.ssi_signo == SIGTERM || ssi.ssi_signo == SIGINT)
running = 0;
else if (ssi.ssi_signo == SIGCHLD)
reap_children();
} else {
handle_io(fd);
}
}
}
signalfd vs traditional signal handling
| Approach | Thread-safe | epoll-compatible | Signal info |
|---|---|---|---|
signal()/sigaction() |
Careful with SA_RESTART | No | Limited |
sigwaitinfo() |
Yes (blocking) | No | Full siginfo |
signalfd |
Yes | Yes | Full siginfo_t |
self-pipe trick |
Yes | Yes | Signal number only |
The self-pipe trick (write a byte to a pipe in the signal handler, read from the other end in the event loop) was the traditional solution. signalfd replaces it cleanly.
signalfd update
Pass an existing sfd to update its mask:
/* Add SIGUSR2 to existing signalfd */
sigaddset(&mask, SIGUSR2);
signalfd(sfd, &mask, 0); /* first arg is existing fd */
Kernel implementation
/* fs/signalfd.c */
static ssize_t signalfd_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct signalfd_ctx *ctx = file->private_data;
struct signalfd_siginfo __user *siginfo = (void __user *)buf;
int ret = 0;
siginfo_t info;
count /= sizeof(*siginfo);
if (!count)
return -EINVAL;
do {
/* Dequeue a pending signal matching our mask */
ret = dequeue_signal(current, &ctx->sigmask, &info, &type);
if (!ret) {
/* No signal: block or EAGAIN */
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
/* Wait for a signal in our mask */
wait_event_interruptible(ctx->wqh,
next_signal(¤t->pending, &ctx->sigmask) ||
next_signal(¤t->signal->shared_pending, &ctx->sigmask));
}
} while (!ret);
/* Copy siginfo to userspace signalfd_siginfo format */
copy_siginfo_to_user_sighand(siginfo, &info);
return sizeof(*siginfo);
}
timerfd: pollable timers
timerfd (covered in POSIX Timers) follows the same pattern — a file descriptor that becomes readable when a timer fires. Combined with eventfd and signalfd, it enables a complete event loop without threads:
/* All three work with epoll: */
epoll_ctl(epfd, EPOLL_CTL_ADD, timerfd, &ev_timer);
epoll_ctl(epfd, EPOLL_CTL_ADD, signalfd, &ev_signal);
epoll_ctl(epfd, EPOLL_CTL_ADD, eventfd, &ev_event);
epoll_ctl(epfd, EPOLL_CTL_ADD, sock_fd, &ev_socket);
/* One epoll_wait handles timers + signals + events + I/O */
Further reading
- Completions and Wait Queues — kernel-side wait primitives
- IPC: Signals — traditional signal delivery
- POSIX Timers and timerfd — timerfd
- io_uring Architecture — io_uring + eventfd
fs/eventfd.c,fs/signalfd.c— implementations