Filesystem Registration and Mounting

How filesystems plug into VFS and how mount() works

Registering a filesystem type

Every filesystem must define a file_system_type and register it with VFS:

/* include/linux/fs.h */
struct file_system_type {
    const char *name;       /* "ext4", "tmpfs", "proc", etc. */
    int fs_flags;           /* FS_REQUIRES_DEV, FS_USERNS_MOUNT, etc. */
    int (*init_fs_context)(struct fs_context *); /* fill in mount context */
    void (*kill_sb)(struct super_block *);       /* unmount cleanup */
    struct module *owner;
    struct file_system_type *next;     /* linked list of all filesystems */
    struct hlist_head fs_supers;       /* all superblocks of this type */
};

/* Register at module load time */
static struct file_system_type my_fs_type = {
    .name           = "myfs",
    .init_fs_context = my_init_fs_context,
    .kill_sb        = kill_litter_super,  /* for simple in-memory fs */
    .owner          = THIS_MODULE,
};

static int __init myfs_init(void)
{
    return register_filesystem(&my_fs_type);
}
module_init(myfs_init);

static void __exit myfs_exit(void)
{
    unregister_filesystem(&my_fs_type);
}
module_exit(myfs_exit);

After registration, the filesystem appears in /proc/filesystems:

cat /proc/filesystems
# nodev  sysfs
# nodev  tmpfs
# nodev  bdev
# nodev  proc
#        ext4
# nodev  btrfs
#        vfat

nodev means the filesystem doesn't require a block device.

The mount syscall

The mount(2) system call attaches a filesystem to the directory tree (man page).

/* User calls: mount("/dev/sda1", "/mnt/data", "ext4", 0, "") */
SYSCALL_DEFINE5(mount, ...)
    → path_mount()
        → do_new_mount()
            1. get_fs_type("ext4")    ← find registered ext4 fs_type
            2. fs_context_for_mount() ← allocate fs_context
            3. vfs_get_tree()
                 → fs_type->init_fs_context(fc)  ← filesystem fills context
                 → fc->ops->get_tree(fc)          ← create/find superblock
                    → ext4_get_tree() → mount_bdev()
                        a. blkdev_get_by_path("/dev/sda1") ← open block device
                        b. sget_dev(): find or create super_block for this device
                        c. ext4_fill_super(): read superblock from disk,
                           set s_op, s_root, etc.
            4. do_new_mount_fc(): attach mount to the tree
                 → graft_tree(): insert new vfsmount at mountpoint

/* The result: a new struct mount attached to the namespace's mount tree */

Mount namespace and vfsmount

Each process has a mount namespace (struct mnt_namespace) containing a tree of vfsmount objects. Mount namespaces were introduced in Linux 2.4.19 (man page); before that, all processes shared a single global mount table. Each vfsmount represents one mount point:

struct vfsmount {
    struct dentry *mnt_root;     /* root dentry of this mounted fs */
    struct super_block *mnt_sb;  /* superblock */
    int mnt_flags;               /* MNT_READONLY, MNT_NOSUID, etc. */
};

struct mount {
    struct vfsmount mnt;
    struct mount *mnt_parent;   /* mount that this is mounted on */
    struct dentry *mnt_mountpoint; /* dentry in parent where we're mounted */
    struct list_head mnt_child;    /* children of mnt_parent */
};

When path resolution reaches a mountpoint dentry, follow_mount() switches to the mounted filesystem's root dentry, crossing into the new vfsmount.

/proc/mounts and /proc/self/mountinfo

# All current mounts
cat /proc/mounts
# sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0
# proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0
# /dev/sda1 / ext4 rw,relatime,errors=remount-ro 0 0
# tmpfs /tmp tmpfs rw,nosuid,nodev 0 0

# Detailed format with mount IDs (used by systemd)
cat /proc/self/mountinfo
# 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext4 /dev/sdb rw,errors=remount-ro
# ^  ^  ^    ^     ^                ^           ^    ^
# mount_id parent_id maj:min root  mount_point opts peer_group fstype source

Implementing a simple in-memory filesystem

Here's the minimal skeleton for a new filesystem:

/* Inode operations for a directory */
static struct inode_operations myfs_dir_iops = {
    .lookup = simple_lookup,
    .create = myfs_create,
    .mkdir  = myfs_mkdir,
    .unlink = simple_unlink,
    .rmdir  = simple_rmdir,
};

/* File operations for a regular file */
static struct file_operations myfs_file_fops = {
    .read_iter  = generic_file_read_iter,
    .write_iter = generic_file_write_iter,
    .llseek     = generic_file_llseek,
    .mmap       = generic_file_mmap,
    .fsync      = noop_fsync,
};

/* Address space operations (page cache) */
static struct address_space_operations myfs_aops = {
    .dirty_folio    = filemap_dirty_folio,
    .writepage      = simple_writepage,
};

/* Superblock operations */
static struct super_operations myfs_super_ops = {
    .statfs         = simple_statfs,
    .drop_inode     = generic_delete_inode,
};

/* Fill in the superblock (called during mount) */
static int myfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
    struct inode *root_inode;

    sb->s_maxbytes = MAX_LFS_FILESIZE;
    sb->s_blocksize = PAGE_SIZE;
    sb->s_blocksize_bits = PAGE_SHIFT;
    sb->s_magic = 0x4D594653;  /* 'MYFS' */
    sb->s_op = &myfs_super_ops;

    /* Create root inode */
    root_inode = new_inode(sb);
    root_inode->i_ino = 1;
    root_inode->i_mode = S_IFDIR | 0755;
    root_inode->i_op = &myfs_dir_iops;
    root_inode->i_fop = &simple_dir_operations;

    /* Create root dentry */
    sb->s_root = d_make_root(root_inode);

    return 0;
}

static int myfs_get_tree(struct fs_context *fc)
{
    return get_tree_nodev(fc, myfs_fill_super);
}

static const struct fs_context_operations myfs_context_ops = {
    .get_tree = myfs_get_tree,
};

static int myfs_init_fs_context(struct fs_context *fc)
{
    fc->ops = &myfs_context_ops;
    return 0;
}

static struct file_system_type myfs_type = {
    .name            = "myfs",
    .init_fs_context = myfs_init_fs_context,
    .kill_sb         = kill_litter_super,
    .owner           = THIS_MODULE,
};

bind mount and move mount

# Bind mount: mount a directory at another location
mount --bind /original/path /new/path
# Creates a new vfsmount pointing to the same dentry/inode tree

# Move mount: atomically change mount location
mount --move /old/mountpoint /new/mountpoint

Bind mounts are widely used in containers to share host directories into a container's namespace (man page).