diff --git a/ChangeLog b/ChangeLog index df460b6..7b734e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Sat May 5 21:00:26 EEST 2007 - Theodoros V. Kalamatianos + * Release 0.0.5 + * Added the ability to use visible temporary files, as a way to avoid + the file descriptor usage limits. As a side-effect, if fuseflt is shot + in the head (kill -9, SIGSEGV and similar) the temporary files will + have to be removed manually. This mode has to be explicitly enabled in + the configuration file. + * Ran fuseflt under valgrind and eliminated most memory leaks. + * Minor documentation updates + Sat May 5 15:37:56 EEST 2007 - Theodoros V. Kalamatianos * Release 0.0.4 * Fixed broken caching code which used to cache all file descriptors, diff --git a/FAQ b/FAQ index 954e872..78a3538 100644 --- a/FAQ +++ b/FAQ @@ -45,6 +45,27 @@ To find out read the `Internals' section in the README. Then read the source code itself for details. +* Should I use large caches ? + +That's up to you. Large caches can help performance in some cases, but they are +very taxing in resources. When using invisible temporary files, fuseflt won't +let you exceed the per-process open file descriptor limit with _just_ the cache +(yes, you can still reach that limit by opening multiple files simultaneously), +but it won't protect you from reaching any other limits. + +In addition, there are no protections when using visible temporary files. When +using large (e.g. in the 10,000 range) caches within that context, you might +even bump on limits posed by the kernel or the temporary directory filesystem. +For example ext3fs with the dir_index option enabled starts to present filename +hash collisions when there are over 15,000 files in one directory. At least, the +original "ghost" temporary file code does not suffer from that problem, as the +files are deleted immediately after being created. + +So when using large caches, beware and stress-check your setup before relying +too much on it. Actually, that might be a good idea whether you use large caches +or not... + + * I found a bug ! Good! Now report it at ... or even better fix it and diff --git a/NEWS b/NEWS index 7119b65..4feaf6f 100644 --- a/NEWS +++ b/NEWS @@ -1,9 +1,8 @@ -0.0.4: - * Fixed broken caching code which used to cache all file descriptors, - rather than just the ones corresponding to conversion filter output. - fuseflt won't reach the open fd limit so fast now. - * Added a cache size limit and a pruning function. fuseflt should now be - far more robust when cache entries accumulate way too fast (like when - users dare try absurd and completely illogical things such as running - `find' or even `ls -l' on a large directory. Running `ls'... what are - people thinking these days?) +0.0.5: + * Added the ability to use visible temporary files, as a way to avoid + the file descriptor usage limits. As a side-effect, if fuseflt is shot + in the head (kill -9, SIGSEGV and similar) the temporary files will + have to be removed manually. This mode has to be explicitly enabled in + the configuration file. + * Ran fuseflt under valgrind and eliminated most memory leaks. + * Minor documentation updates diff --git a/README b/README index 2891c45..941534a 100644 --- a/README +++ b/README @@ -85,6 +85,14 @@ cache_max_nr: an integer that indicates the maximum number of cache entries. temp_dir: the directory where the temporary files will be created. +ghost_temp: By default fuseflt uses "ghost" temporary files, files that have + been created and then unlinked. While this method has the advantage of + not leaving files around, it does require an open file descriptor for + each temporary file. By setting ghost_temp to 0, the total usage of file + descriptors is much saner, but the temporary files become visible and if + fuseflt is killed abruptly (kill -9, segmentation faults, the end of the + universe as we know it e.t.c.) they will have to be removed manually. + ext_in: along with a corresponding ext_out variable, it creates a filename filter, where filenames that end in the string supplied in ext_in will be changed to end in the one supplied in ext_out instead. Both variables diff --git a/fuseflt.c b/fuseflt.c index 7484d19..394ae5c 100644 --- a/fuseflt.c +++ b/fuseflt.c @@ -15,7 +15,7 @@ -#define DEBUG 1 +#define DEBUG 0 #define FDC_MAX_AGE 120 #define FDC_CHK_INT 5 @@ -120,8 +120,11 @@ static void *fdc = NULL; static int fdc_nr = 0; static int fdc_nr_max = 0; +static int fdc_ghost_tmp = 1; + static pthread_t fdc_expire_thread; static pthread_mutex_t fdc_mutex = PTHREAD_MUTEX_INITIALIZER; +static int fdc_expire_thread_stop = 0; static int fdc_max_age = FDC_MAX_AGE; static int fdc_chk_int = FDC_CHK_INT; @@ -167,8 +170,18 @@ static int fdc_open(const char *path) if (fdcres != NULL) { DBGMSG("fdc: retrieve %s", vpath); - rfd = dup((*fdcres)->fd); gettimeofday(&((*fdcres)->et), NULL); + if (fdc_ghost_tmp) { + rfd = dup((*fdcres)->fd); + } else { + rfd = open((*fdcres)->tfn, O_RDONLY); + if (rfd == -1) { + int e = errno; + DBGMSG("fdc: cache file open failure [%s:%s]", + vpath, (*fdcres)->tfn); + return -e; + } + } pthread_mutex_unlock(&fdc_mutex); @@ -189,7 +202,7 @@ static int fdc_open(const char *path) int ifd = res; char tfn[PATH_MAX + 1]; - snprintf(tfn, PATH_MAX + 1, "%s/fuseflt.XXXXXX", tmpdir); + snprintf(tfn, PATH_MAX + 1, "%s/.fuseflt.XXXXXX", tmpdir); int ofd = mkstemp(tfn); if (ofd == -1) { @@ -199,7 +212,8 @@ static int fdc_open(const char *path) return -errno; } - unlink(tfn); + if (fdc_ghost_tmp) + unlink(tfn); pid_t pid = fork(); if (pid == -1) { @@ -240,7 +254,13 @@ static int fdc_open(const char *path) } strncpy(fdcent->path, vpath, PATH_MAX); gettimeofday(&(fdcent->et), NULL); - fdcent->fd = dup(rfd); + if (fdc_ghost_tmp) { + fdcent->fd = dup(rfd); + fdcent->tfn = NULL; + } else { + fdcent->fd = -1; + fdcent->tfn = strdup(tfn); + } fcntl(fdcent->fd, FD_CLOEXEC, 1); /* Cache the file descriptor for future use */ @@ -251,6 +271,12 @@ static int fdc_open(const char *path) fdcres = tsearch(fdcent, &fdc, fdcentcmp); if (fdcres == NULL) { int e = errno; + if (fdc_ghost_tmp) { + close(fdcent->fd); + } else { + unlink(fdcent->tfn); + free(fdcent->tfn); + } free(fdcent); close(rfd); free(vpath); @@ -259,8 +285,14 @@ static int fdc_open(const char *path) return -e; } else if (*fdcres != fdcent) { (*fdcres)->et = fdcent->et; - close((*fdcres)->fd); - (*fdcres)->fd = fdcent->fd; + if (fdc_ghost_tmp) { + close((*fdcres)->fd); + (*fdcres)->fd = fdcent->fd; + } else { + unlink((*fdcres)->tfn); + free((*fdcres)->tfn); + (*fdcres)->tfn = fdcent->tfn; + } free(fdcent); } else { ++fdc_nr; @@ -280,7 +312,12 @@ static void fdc_walk_delete_removed(const void *p, const VISIT v, const int d) static void fdc_free(void *p) { DBGMSG("fdc: remove %s", ((fdcent_t *)p)->path); - close(((fdcent_t *)p)->fd); + if (fdc_ghost_tmp) { + close(((fdcent_t *)p)->fd); + } else { + unlink(((fdcent_t *)p)->tfn); + free(((fdcent_t *)p)->tfn); + } --fdc_nr; free(p); } @@ -296,7 +333,7 @@ static void fdc_expire_mark(const void *p, const VISIT v, const int d) static void *fdc_expire(void *arg) { - while (1) { + while (!fdc_expire_thread_stop) { sleep(fdc_chk_int); DBGMSG("fdc: expiration check [i=%i,m=%i,n=%i/%i]", @@ -498,6 +535,33 @@ static int flt_release(const char *path, struct fuse_file_info *fi) } +static void flt_arrstr_free(char **c) +{ + int i; + + for (i = 0; c[i] != NULL; i++) + free(c[i]); + + free(c); +} + +static void flt_destroy(void *p) +{ + fdc_clear(0); + + fdc_expire_thread_stop = 1; + pthread_kill(fdc_expire_thread, SIGINT); + pthread_join(fdc_expire_thread, NULL); + + flt_arrstr_free(flt_in); + flt_arrstr_free(flt_out); + flt_arrstr_free(flt_cmd); + flt_arrstr_free(ext_in); + flt_arrstr_free(ext_out); + free(src); +} + + static struct fuse_operations flt_oper = { .getattr = flt_getattr, @@ -512,6 +576,7 @@ static struct fuse_operations flt_oper = { .statfs = flt_statfs, .flush = flt_flush, .release = flt_release, + .destroy = flt_destroy, }; @@ -567,6 +632,7 @@ int main(int argc, char *argv[]) {NULL, '\0', "cache_max_age", CFG_INT, (void *) &fdc_max_age, 0}, {NULL, '\0', "cache_max_nr", CFG_INT, (void *) &fdc_nr_max, 0}, {NULL, '\0', "temp_dir", CFG_STR, (void *) &tmpdir, 0}, + {NULL, '\0', "ghost_temp", CFG_INT, (void *) &fdc_ghost_tmp, 0}, {NULL, '\0', "flt_in", CFG_STR+CFG_MULTI, (void *) &flt_in, 0}, {NULL, '\0', "flt_out", CFG_STR+CFG_MULTI, (void *) &flt_out, 0}, @@ -616,6 +682,8 @@ int main(int argc, char *argv[]) fprintf(stderr, "\n"); return ret < 0 ? -ret : ret; } + + cfg_free_context(con); free(arg); pargc--; @@ -642,12 +710,21 @@ int main(int argc, char *argv[]) close(cwdfd); umask(077); + /* Use `ghost' temporary files or not ? */ + if (fdc_ghost_tmp != 0) + fdc_ghost_tmp = 1; + /* Get a proper value for fdc_nr_max */ - o = sysconf(_SC_OPEN_MAX) - 64; - if (fdc_nr_max == 0) - fdc_nr_max = o / 2; - else if (fdc_nr_max > o) - fdc_nr_max = o; + if (fdc_ghost_tmp) { + o = sysconf(_SC_OPEN_MAX) - 64; + if (fdc_nr_max == 0) + fdc_nr_max = o / 2; + else if (fdc_nr_max > o) + fdc_nr_max = o; + } else { + if (fdc_nr_max == 0) + fdc_nr_max = 2048; + } /* Allow SIGUSR1 to clear the cache */ signal(SIGUSR1, fdc_clear); @@ -657,5 +734,9 @@ int main(int argc, char *argv[]) DBGMSG("temp_dir = %s", tmpdir); - return fuse_main(pargc, pargv, &flt_oper, NULL); + o = fuse_main(pargc, pargv, &flt_oper, NULL); + + free(pargv); + + return o; } diff --git a/fuseflt.conf b/fuseflt.conf index 53a3d91..a368c10 100644 --- a/fuseflt.conf +++ b/fuseflt.conf @@ -8,7 +8,10 @@ cache_max_age = 120 # cache_max_nr = 640 # Directory for temporary files -temp_dir = /tmp +# temp_dir = /tmp + +# Use "ghost" temporary files ? +# ghost_temp = 1 # File content filters