Download presentation
Presentation is loading. Please wait.
1
Advanced Char Driver Operations
Linux Kernel Programming CIS 4930/COP 5641
2
Advanced (Manual) Sleeping
3
Advanced Sleeping Uses low-level functions to affect a sleep
How a process sleeps 1. Allocate and initialize a wait_queue_t structure DEFINE_WAIT(my_wait); Or wait_queue_t my_wait; init_wait(&my_wait); Queue element
4
Advanced Sleeping 2. Add to the proper wait queue and mark a process as being asleep TASK_RUNNING TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE Call void prepare_to_wait(wait_queue_head_t *queue, wait_queue_t *wait, int state);
5
Advanced Sleeping 3. Give up the processor
Double check the sleeping condition before going to sleep The wakeup thread might have changed the condition between steps 1 and 2 if (/* sleeping condition */) { schedule(); /* yield the CPU */ }
6
Advanced Sleeping 4. Return from sleep
Remove the process from the wait queue if schedule() was not called void finish_wait(wait_queue_head_t *queue, wait_queue_t *wait);
7
Advanced Sleeping scullpipe write method /* How much space is free? */
static int spacefree(struct scull_pipe *dev) { if (dev->rp == dev->wp) return dev->buffersize - 1; return ((dev->rp + dev->buffersize - dev->wp) % dev->buffersize) - 1; }
8
Advanced Sleeping static ssize_t
scull_p_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { struct scull_pipe *dev = filp->private_data; int result; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; /* Wait for space for writing */ result = scull_getwritespace(dev, filp); if (result) return result; /* scull_getwritespace called mutex_unlock(&dev->mutex) */ /* ok, space is there, accept something */ count = min(count, (size_t)spacefree(dev));
9
Advanced Sleeping if (dev->wp >= dev->rp)
count = min(count, (size_t)(dev->end - dev->wp)); else /* the write pointer has wrapped, fill up to rp - 1 */ count = min(count, (size_t)(dev->rp - dev->wp - 1)); if (copy_from_user(dev->wp, buf, count)) { mutex_unlock(&dev->mutex); return -EFAULT; } dev->wp += count; if (dev->wp == dev->end) dev->wp = dev->buffer; /* wrapped */ mutex_unlock(&dev->mutex); wake_up_interruptible(&dev->inq); if (dev->async_queue) kill_fasync(&dev->async_queue, SIGIO, POLL_IN); return count;
10
Advanced Sleeping Queue: full Task state: RUNNING
/* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static int scull_getwritespace(struct scull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; Queue: full Task state: RUNNING
11
Advanced Sleeping Queue: full Task state: RUNNING INTERRUPTIBLE
/* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static int scull_getwritespace(struct scull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; Queue: full Task state: RUNNING INTERRUPTIBLE
12
Advanced Sleeping Queue: full Task state: INTERRUPTIBLE /* sleep */
/* Wait for space for writing; caller must hold device mutex. * On error the mutex will be released before returning. */ static int scull_getwritespace(struct scull_pipe *dev, struct file *filp) { while (spacefree(dev) == 0) { /* full */ DEFINE_WAIT(wait); mutex_unlock(&dev->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE); if (spacefree(dev) == 0) schedule(); finish_wait(&dev->outq, &wait); if (signal_pending(current)) return -ERESTARTSYS; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; } return 0; Queue: full Task state: INTERRUPTIBLE /* sleep */
13
Exclusive Waits Avoid waking up all processes waiting on a queue Call
Wakes up only one process Call void prepare_to_wait_exclusive(wait_queue_heat_t *queue, wait_queue_t *wait, int state); Set the WQ_FLAG_EXCLUSIVE flag Add the queue entry to the end of the wait queue wake_up stops after waking the first process with the flag set
14
The Details of Waking Up
/* wakes up all processes waiting on the queue */ void wake_up(wait_queue_head_t *queue); /* wakes up processes that perform an interruptible sleep */ void wake_up_interruptible(wait_queue_head_t *queue); /* wake up to nr exclusive waiters */ void wake_up_nr(wait_queue_head_t *queue, int nr); void wake_up_interruptible_nr(wait_queue_head_t *queue, int nr); /* wake up all exclusive waiters */ void wake_up_all(wait_queue_head_t *queue); void wake_up_interruptible_all(wait_queue_head_t *queue); /* do not lose the CPU during this call */ void wake_up_interruptible_sync(wait_queue_head_t *queue);
15
poll and select (and epoll)
16
poll and select (and epoll)
Nonblocking I/Os often involve the use of poll, select, and epoll system calls Allow a process to determine whether it can read or write one or more open files without blocking Can block a process until any of a set of file descriptors becomes available for reading and writing select introduced in BSD Linux poll introduced in System V epoll added in Improved scaling on the number of file descriptors
17
poll and select All three calls supported through the poll method
unsigned int (*poll) (struct file *filp, poll_table *wait); 1. Call poll_wait on one or more wait queues that could indicate a change in the poll status If no file descriptors are available, wait 2. Return a bit mask describing the operations that could be immediately performed without blocking
18
poll and select poll_table defined in <linux/poll.h>
To add a wait queue into the poll_table, call void poll_wait(struct file *, wait_queue_head_t *, poll_table *); Bit mask flags defined in <linux/poll.h> POLLIN Set if the device can be read without blocking
19
poll and select POLLOUT POLLRDNORM POLLWRNORM POLLPRI
Set if the device can be written without blocking POLLRDNORM Set if “normal” data is available for reading A readable device returns (POLLIN | POLLRDNORM) POLLWRNORM Same meaning as POLLOUT A writable device returns (POLLOUT | POLLWRNORM) POLLPRI High-priority data can be read without blocking
20
poll and select POLLHUP POLLERR POLLRDBAND POLLWRBAND
Returns when a process reads the end-of-file POLLERR An error condition has occurred POLLRDBAND Out-of-band data is available for reading Associated with sockets POLLWRBAND Data with nonzero priority can be written to the device
21
poll and select Example
static unsigned int scull_p_poll(struct file *filp, poll_table *wait) { struct scull_pipe *dev = filp->private_data; unsigned int mask = 0; mutex_lock(&dev->mutex); poll_wait(filp, &dev->inq, wait); poll_wait(filp, &dev->outq, wait); if (dev->rp != dev->wp) /* circular buffer not empty */ mask |= POLLIN | POLLRDNORM; /* readable */ if (spacefree(dev)) /* circular buffer not full */ mask |= POLLOUT | POLLWRNORM; /* writable */ mutex_unlock(&dev->mutex); return mask; }
22
poll and select No end-of-file support
Scull pipe does not implement this If it did… The reader could see an end-of-file when all writers close the file Check dev->nwriters in read and poll Problem when a reader opens the scullpipe before the writer Need blocking within open
23
Interaction with read and write
Reading from the device If there is data in the input buffer, return at least one byte poll returns POLLIN | POLLRDNORM If no data is available If O_NONBLOCK is set, return –EAGAIN poll must report the device unreadable until one byte arrives At the end-of-file, read returns 0, poll returns POLLHUP
24
Interaction with read and write
Writing to the device If there is space in the output buffer, accept at least one byte poll reports that the devices is writable by returning POLLOUT | POLLWRNORM If the output buffer is full, write blocks If O_NONBLOCK is set, write returns –EAGAIN poll reports that the file is not writable If the device is full, write returns -ENOSPC
25
Interaction with read and write
In write, never wait for data transmission before returning Or, select may block To make sure the output buffer is actually transmitted, use fsync call
26
Interaction with read and write
To flush pending output, call fsync int (*fsync) (struct file *file, loff_t, loff_t, int datasync); Should return only when the device has been completely flushed datasync: Used by file systems, ignored by drivers
27
The Underlying Data Structure
28
The Underlying Data Structure
When the poll call completes, poll_table is deallocated with all wait queue entries removed epoll reduces this overhead of setting up and tearing down the data structure between every I/O
29
Asynchronous Notification
Polling Inefficient for rare events A solution: asynchronous notification Application receives a signal whenever data becomes available Two steps Specify a process as the owner of the file (so that the kernel knows whom to notify) Set the FASYNC flag in the device via fcntl command
30
Asynchronous Notification
Example (user space) /* create a signal handler */ signal(SIGIO, &input_handler); /* set current pid the owner of the stdin */ fcntl(STDIN_FILENO, F_SETOWN, getpid()); /* obtain the current file control flags */ oflags = fcntl(STDIN_FILENO, F_GETFL); /* set the asynchronous flag */ fcntl(STDIN_FILENO, F_SETFL, oflags | FASYNC);
31
Asynchronous Notification
Some catches Not all devices support asynchronous notification Usually available for sockets and ttys Need to know which input file to process Still need to use poll or select
32
The Driver’s Point of View
1. When F_SETOWN is invoked, a value is assigned to filp->f_owner 2. When F_SETFL is executed to change the status of FASYNC The driver’s fasync method is called static int scull_p_fasync(int fd, struct file *filp, int mode) { struct scull_pipe *dev = filp->private_data; return fasync_helper(fd, filp, mode, &dev->async_queue); }
33
The Driver’s Point of View
fasync_helper adds or removes processes from the asynchronous list void fasync_helper(int fd, struct file *filp, int mode, struct fasync_struct **fa); 3. When data arrives, send a SIGNO signal to all processes registered for asynchronous notification Near the end of write, notify blocked readers if (dev->async_queue) kill_fasync(&dev->async_queue, SIGIO, POLL_IN); Similarly for read, as needed
34
The Driver’s Point of View
4. When the file is closed, remove the file from the list of asynchronous readers in the release method scull_p_fasync(-1, filp, 0);
35
Access Control
36
Access Control on a Device File
Prevents unauthorized users from using the device Sometimes permits only one authorized user to open the device at a time
37
Returns true, if the tested value is 0
Single-Open Devices Example: scullsingle static atomic_t scull_s_available = ATOMIC_INIT(1); static int scull_s_open(struct inode *inode, struct file *filp) { struct scull_dev *dev = &scull_s_device; if (!atomic_dec_and_test(&scull_s_available)) { atomic_inc(&scull_s_available); return -EBUSY; /* already open */ } /* then, everything else is the same as before */ if ((filp->f_flags & O_ACCMODE) == O_WRONLY) scull_trim(dev); filp->private_data = dev; return 0; /* success */ Returns true, if the tested value is 0
38
Single-Open Devices In the release call, marks the device idle
static int scull_s_release(struct inode *inode, struct file *filp) { atomic_inc(&scull_s_available); /* release the device */ return 0; }
39
Restricting Access to a Single User (with multiple processes) at a Time
Example: sculluid Includes the following in the open call spin_lock(&scull_u_lock); if (scull_u_count && /* someone is using the device */ (scull_u_owner != current->uid) && /* not the same user */ (scull_u_owner != current->euid) && /* not the same effective uid (for su) */ !capable(CAP_DAC_OVERRIDE)) { /* not root override */ spin_unlock(&scull_u_lock); return -EBUSY; /* -EPERM would confuse the user */ } if (scull_u_count == 0) scull_u_owner = current->uid; scull_u_count++;
40
Restricting Access to a Single User (with Multiple Processes) at a Time
Includes the following in the release call static int scull_u_release(struct inode *inode, struct file *filp) { spin_lock(&scull_u_lock); scull_u_count--; /* nothing else */ spin_unlock(&scull_u_lock); return 0; }
41
Blocking open as an Alternative to EBUSY (scullwuid)
A user might prefer to wait over getting errors E.g., data communication channel spin_lock(&scull_w_lock); while (!scull_w_available()) { spin_unlock(&scull_w_lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(scull_w_wait, scull_w_available())) return -ERESTARTSYS; /* tell the fs layer to handle it */ } if (scull_w_count == 0) scull_w_owner = current->uid; scull_w_count++;
42
Blocking open as an Alternative to EBUSY (scullwuid)
The release method wakes pending processes static int scull_w_release(struct inode *inode, struct file *filp) { int temp; spin_lock(&scull_w_lock); scull_w_count--; temp = scull_w_count; spin_unlock(&scull_w_lock); if (temp == 0) wake_up_interruptible_sync(&scull_w_wait); return 0; }
43
Blocking open as an Alternative to EBUSY
Might not be the right semantics for interactive users Blocking on cp vs. getting a return value –EBUSY or -EPERM Incompatible policies for the same device One solution: one device node per policy
44
Cloning the Device on open
Allows the creation of private, virtual devices E.g., One virtual scull device for each process with different tty device number Example: scullpriv
45
Cloning the Device on open
static int scull_c_open(struct inode *inode, struct file *filp) { struct scull_dev *dev; dev_t key; if (!current->signal->tty) { PDEBUG("Process \"%s\" has no ctl tty\n", current->comm); return -EINVAL; } key = tty_devnum(current->signal->tty); spin_lock(&scull_c_lock); dev = scull_c_lookfor_device(key); spin_unlock(&scull_c_lock); if (!dev) return -ENOMEM; .../* then, everything else is the same as before */
46
Cloning the Device on open
/* The clone-specific data structure includes a key field */ struct scull_listitem { struct scull_dev device; dev_t key; struct list_head list; }; /* The list of devices, and a lock to protect it */ static LIST_HEAD(scull_c_list); static spinlock_t scull_c_lock = SPIN_LOCK_UNLOCKED;
47
Cloning the Device on open
/* Look for a device or create one if missing */ static struct scull_dev *scull_c_lookfor_device(dev_t key) { struct scull_listitem *lptr; list_for_each_entry(lptr, &scull_c_list, list) { if (lptr->key == key) return &(lptr->device); } /* not found */ lptr = kzalloc(sizeof(struct scull_listitem), GFP_KERNEL); if (!lptr) return NULL;
48
Cloning the Device on open
/* initialize the device */ lptr->key = key; scull_trim(&(lptr->device)); /* initialize it */ mutex_init(&(lptr->device.mutex)); /* place it in the list */ list_add(&lptr->list, &scull_c_list); return &(lptr->device);
49
What’s going on? scull_listitem struct scull_dev device; dev_t key;
scull_c_list struct list_head { struct list_head *next; struct list_head *prev; }; struct list_head { struct list_head *next; struct list_head *prev; } list;
50
(put|get)_user() copy_to_user and copy_from_user seen previously
Data transfer functions optimized for most used data sizes (1, 2, 4, and 8 bytes) If the size mismatches Cryptic compiler error message: Conversion to non-scalar type requested #include <linux/uaccess.h> put_user(datum, ptr) Writes to a user-space address Calls access_ok() Returns 0 on success, -EFAULT on error
51
(put|get)_user() __put_user(datum, ptr) get_user(local, ptr)
Does not check access_ok() Can still fail if the user-space memory is not writable get_user(local, ptr) Reads from a user-space address Calls access_ok() Stores the retrieved value in local Returns 0 on success, -EFAULT on error __get_user(local, ptr) Can still fail if the user-space memory is not readable
Similar presentations
© 2025 SlidePlayer.com. Inc.
All rights reserved.