Skip to content

Commit 070941f

Browse files
hsiangkaointel-lab-lkp
authored andcommitted
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox use cases, there will be thousands (and even more) of authenticated (sub)images running on the same host, unlike OS images. Of course, all scenarios can use the same EROFS on-disk format, but bdev-backed mounts just work well for OS images since golden data is dumped into real block devices. However, it's somewhat hard for container runtimes to manage and isolate so many unnecessary virtual block devices safely and efficiently [1]: they just look like a burden to orchestrators and file-backed mounts are preferred indeed. There were already enough attempts such as Incremental FS, the original ComposeFS and PuzzleFS acting in the same way for immutable fses. As for current EROFS users, ComposeFS, containerd and Android APEXs will be directly benefited from it. On the other hand, previous experimental feature "erofs over fscache" was once also intended to provide a similar solution (inspired by Incremental FS discussion [2]), but the following facts show file-backed mounts will be a better approach: - Fscache infrastructure has recently been moved into new Netfslib which is an unexpected dependency to EROFS really, although it originally claims "it could be used for caching other things such as ISO9660 filesystems too." [3] - It takes an unexpectedly long time to upstream Fscache/Cachefiles enhancements. For example, the failover feature took more than one year, and the deamonless feature is still far behind now; - Ongoing HSM "fanotify pre-content hooks" [4] together with this will perfectly supersede "erofs over fscache" in a simpler way since developers (mainly containerd folks) could leverage their existing caching mechanism entirely in userspace instead of strictly following the predefined in-kernel caching tree hierarchy. After "fanotify pre-content hooks" lands upstream to provide the same functionality, "erofs over fscache" will be removed then (as an EROFS internal improvement and EROFS will not have to bother with on-demand fetching and/or caching improvements anymore.) [1] containers/storage#2039 [2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com [3] https://docs.kernel.org/filesystems/caching/fscache.html [4] https://lore.kernel.org/r/[email protected] Closes: composefs/composefs#144 Signed-off-by: Gao Xiang <[email protected]>
1 parent b45adcb commit 070941f

File tree

5 files changed

+99
-43
lines changed

5 files changed

+99
-43
lines changed

fs/erofs/Kconfig

+17
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,23 @@ config EROFS_FS_SECURITY
7474

7575
If you are not using a security module, say N.
7676

77+
config EROFS_FS_BACKED_BY_FILE
78+
bool "File-backed EROFS filesystem support"
79+
depends on EROFS_FS
80+
default y
81+
help
82+
This allows EROFS to use filesystem image files directly, without
83+
the intercession of loopback block devices or likewise. It is
84+
particularly useful for container images with numerous blobs and
85+
other sandboxes, where loop devices behave intricately. It can also
86+
be used to simplify error-prone lifetime management of unnecessary
87+
virtual block devices.
88+
89+
Note that this feature, along with ongoing fanotify pre-content
90+
hooks, will eventually replace "EROFS over fscache."
91+
92+
If you don't want to enable this feature, say N.
93+
7794
config EROFS_FS_ZIP
7895
bool "EROFS Data Compression Support"
7996
depends on EROFS_FS

fs/erofs/data.c

+21-14
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
5959

6060
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
6161
{
62-
if (erofs_is_fscache_mode(sb))
63-
buf->mapping = EROFS_SB(sb)->s_fscache->inode->i_mapping;
62+
struct erofs_sb_info *sbi = EROFS_SB(sb);
63+
64+
if (erofs_is_fileio_mode(sbi))
65+
buf->mapping = file_inode(sbi->fdev)->i_mapping;
66+
else if (erofs_is_fscache_mode(sb))
67+
buf->mapping = sbi->s_fscache->inode->i_mapping;
6468
else
6569
buf->mapping = sb->s_bdev->bd_mapping;
6670
}
@@ -189,10 +193,22 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
189193
return err;
190194
}
191195

196+
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
197+
struct erofs_device_info *dif)
198+
{
199+
map->m_bdev = NULL;
200+
if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
201+
map->m_bdev = file_bdev(dif->file);
202+
map->m_daxdev = dif->dax_dev;
203+
map->m_dax_part_off = dif->dax_part_off;
204+
map->m_fscache = dif->fscache;
205+
}
206+
192207
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
193208
{
194209
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
195210
struct erofs_device_info *dif;
211+
erofs_off_t startoff, length;
196212
int id;
197213

198214
map->m_bdev = sb->s_bdev;
@@ -212,29 +228,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
212228
up_read(&devs->rwsem);
213229
return 0;
214230
}
215-
map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL;
216-
map->m_daxdev = dif->dax_dev;
217-
map->m_dax_part_off = dif->dax_part_off;
218-
map->m_fscache = dif->fscache;
231+
erofs_fill_from_devinfo(map, dif);
219232
up_read(&devs->rwsem);
220233
} else if (devs->extra_devices && !devs->flatdev) {
221234
down_read(&devs->rwsem);
222235
idr_for_each_entry(&devs->tree, dif, id) {
223-
erofs_off_t startoff, length;
224-
225236
if (!dif->mapped_blkaddr)
226237
continue;
238+
227239
startoff = erofs_pos(sb, dif->mapped_blkaddr);
228240
length = erofs_pos(sb, dif->blocks);
229-
230241
if (map->m_pa >= startoff &&
231242
map->m_pa < startoff + length) {
232243
map->m_pa -= startoff;
233-
map->m_bdev = dif->bdev_file ?
234-
file_bdev(dif->bdev_file) : NULL;
235-
map->m_daxdev = dif->dax_dev;
236-
map->m_dax_part_off = dif->dax_part_off;
237-
map->m_fscache = dif->fscache;
244+
erofs_fill_from_devinfo(map, dif);
238245
break;
239246
}
240247
}

fs/erofs/inode.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,10 @@ static int erofs_fill_inode(struct inode *inode)
258258
}
259259

260260
mapping_set_large_folios(inode->i_mapping);
261-
if (erofs_inode_is_data_compressed(vi->datalayout)) {
261+
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb))) {
262+
/* XXX: data I/Os will be implemented in the following patches */
263+
err = -EOPNOTSUPP;
264+
} else if (erofs_inode_is_data_compressed(vi->datalayout)) {
262265
#ifdef CONFIG_EROFS_FS_ZIP
263266
DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
264267
erofs_info, inode->i_sb,

fs/erofs/internal.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ typedef u32 erofs_blk_t;
4949
struct erofs_device_info {
5050
char *path;
5151
struct erofs_fscache *fscache;
52-
struct file *bdev_file;
52+
struct file *file;
5353
struct dax_device *dax_dev;
5454
u64 dax_part_off;
5555

@@ -130,6 +130,7 @@ struct erofs_sb_info {
130130

131131
struct erofs_sb_lz4_info lz4;
132132
#endif /* CONFIG_EROFS_FS_ZIP */
133+
struct file *fdev;
133134
struct inode *packed_inode;
134135
struct erofs_dev_context *devs;
135136
struct dax_device *dax_dev;
@@ -190,9 +191,15 @@ struct erofs_sb_info {
190191
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
191192
#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option)
192193

194+
static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
195+
{
196+
return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
197+
}
198+
193199
static inline bool erofs_is_fscache_mode(struct super_block *sb)
194200
{
195-
return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !sb->s_bdev;
201+
return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) &&
202+
!erofs_is_fileio_mode(EROFS_SB(sb)) && !sb->s_bdev;
196203
}
197204

198205
enum {

fs/erofs/super.c

+48-26
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/fs_context.h>
1111
#include <linux/fs_parser.h>
1212
#include <linux/exportfs.h>
13+
#include <linux/backing-dev.h>
1314
#include "xattr.h"
1415

1516
#define CREATE_TRACE_POINTS
@@ -161,7 +162,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
161162
struct erofs_sb_info *sbi = EROFS_SB(sb);
162163
struct erofs_fscache *fscache;
163164
struct erofs_deviceslot *dis;
164-
struct file *bdev_file;
165+
struct file *file;
165166

166167
dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP);
167168
if (IS_ERR(dis))
@@ -183,13 +184,17 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
183184
return PTR_ERR(fscache);
184185
dif->fscache = fscache;
185186
} else if (!sbi->devs->flatdev) {
186-
bdev_file = bdev_file_open_by_path(dif->path, BLK_OPEN_READ,
187-
sb->s_type, NULL);
188-
if (IS_ERR(bdev_file))
189-
return PTR_ERR(bdev_file);
190-
dif->bdev_file = bdev_file;
191-
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(bdev_file),
192-
&dif->dax_part_off, NULL, NULL);
187+
file = erofs_is_fileio_mode(sbi) ?
188+
filp_open(dif->path, O_RDONLY, 0) :
189+
bdev_file_open_by_path(dif->path,
190+
BLK_OPEN_READ, sb->s_type, NULL);
191+
if (IS_ERR(file))
192+
return PTR_ERR(file);
193+
194+
dif->file = file;
195+
if (!erofs_is_fileio_mode(sbi))
196+
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file),
197+
&dif->dax_part_off, NULL, NULL);
193198
}
194199

195200
dif->blocks = le32_to_cpu(dis->blocks);
@@ -566,15 +571,16 @@ static void erofs_set_sysfs_name(struct super_block *sb)
566571
{
567572
struct erofs_sb_info *sbi = EROFS_SB(sb);
568573

569-
if (erofs_is_fscache_mode(sb)) {
570-
if (sbi->domain_id)
571-
super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id,
572-
sbi->fsid);
573-
else
574-
super_set_sysfs_name_generic(sb, "%s", sbi->fsid);
575-
return;
576-
}
577-
super_set_sysfs_name_id(sb);
574+
if (sbi->domain_id)
575+
super_set_sysfs_name_generic(sb, "%s,%s", sbi->domain_id,
576+
sbi->fsid);
577+
else if (sbi->fsid)
578+
super_set_sysfs_name_generic(sb, "%s", sbi->fsid);
579+
else if (erofs_is_fileio_mode(sbi))
580+
super_set_sysfs_name_generic(sb, "%s",
581+
bdi_dev_name(sb->s_bdi));
582+
else
583+
super_set_sysfs_name_id(sb);
578584
}
579585

580586
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
@@ -589,14 +595,15 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
589595
sb->s_op = &erofs_sops;
590596

591597
sbi->blkszbits = PAGE_SHIFT;
592-
if (erofs_is_fscache_mode(sb)) {
598+
if (!sb->s_bdev) {
593599
sb->s_blocksize = PAGE_SIZE;
594600
sb->s_blocksize_bits = PAGE_SHIFT;
595601

596-
err = erofs_fscache_register_fs(sb);
597-
if (err)
598-
return err;
599-
602+
if (erofs_is_fscache_mode(sb)) {
603+
err = erofs_fscache_register_fs(sb);
604+
if (err)
605+
return err;
606+
}
600607
err = super_setup_bdi(sb);
601608
if (err)
602609
return err;
@@ -693,11 +700,24 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
693700
static int erofs_fc_get_tree(struct fs_context *fc)
694701
{
695702
struct erofs_sb_info *sbi = fc->s_fs_info;
703+
int ret;
696704

697705
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
698706
return get_tree_nodev(fc, erofs_fc_fill_super);
699707

700-
return get_tree_bdev(fc, erofs_fc_fill_super);
708+
ret = get_tree_bdev(fc, erofs_fc_fill_super);
709+
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
710+
if (ret == -ENOTBLK) {
711+
if (!fc->source)
712+
return invalf(fc, "No source specified");
713+
sbi->fdev = filp_open(fc->source, O_RDONLY, 0);
714+
if (IS_ERR(sbi->fdev))
715+
return PTR_ERR(sbi->fdev);
716+
717+
return get_tree_nodev(fc, erofs_fc_fill_super);
718+
}
719+
#endif
720+
return ret;
701721
}
702722

703723
static int erofs_fc_reconfigure(struct fs_context *fc)
@@ -727,8 +747,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
727747
struct erofs_device_info *dif = ptr;
728748

729749
fs_put_dax(dif->dax_dev, NULL);
730-
if (dif->bdev_file)
731-
fput(dif->bdev_file);
750+
if (dif->file)
751+
fput(dif->file);
732752
erofs_fscache_unregister_cookie(dif->fscache);
733753
dif->fscache = NULL;
734754
kfree(dif->path);
@@ -801,6 +821,8 @@ static void erofs_kill_sb(struct super_block *sb)
801821
erofs_fscache_unregister_fs(sb);
802822
kfree(sbi->fsid);
803823
kfree(sbi->domain_id);
824+
if (sbi->fdev)
825+
fput(sbi->fdev);
804826
kfree(sbi);
805827
sb->s_fs_info = NULL;
806828
}
@@ -903,7 +925,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
903925
buf->f_namelen = EROFS_NAME_LEN;
904926

905927
if (uuid_is_null(&sb->s_uuid))
906-
buf->f_fsid = u64_to_fsid(erofs_is_fscache_mode(sb) ? 0 :
928+
buf->f_fsid = u64_to_fsid(!sb->s_bdev ? 0 :
907929
huge_encode_dev(sb->s_bdev->bd_dev));
908930
else
909931
buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);

0 commit comments

Comments
 (0)