diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f6d5d8af..05fd40be4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/src/common) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(test) +ADD_SUBDIRECTORY(man) IF(ENABLE_EXPERIMENTAL) ADD_SUBDIRECTORY(experimental) diff --git a/doc/Makefile b/doc/Makefile index d7255e6bd..a9a704712 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -18,3 +18,6 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +install: + cp -f build/man/* ../man/ diff --git a/doc/README.md b/doc/README.md index 29fe3171f..075902476 100644 --- a/doc/README.md +++ b/doc/README.md @@ -3,3 +3,16 @@ Edits should be made to the `.rst` files. The documentation can be built with `make html` or `make man`. The generated files will be found in the `build` directory. + +## Man Pages + +Since there is no guarantee of Sphinx on each system, the man pages for each release are committed directly to the repo. +This can be done with: + +``` shell +cd doc +make man +make install +``` + +Be sure to update the version number in the `doc/rst/conf.py` file as well. diff --git a/doc/rst/conf.py b/doc/rst/conf.py index c0f8f74a7..64172c699 100644 --- a/doc/rst/conf.py +++ b/doc/rst/conf.py @@ -54,9 +54,9 @@ # built documents. # # The short X.Y version. -version = u'0.8' +version = u'0.9' # The full version, including alpha/beta/rc tags. -release = u'0.8' +release = u'0.9' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/rst/dbz2.1.rst b/doc/rst/experimental/dbz2.1.rst similarity index 100% rename from doc/rst/dbz2.1.rst rename to doc/rst/experimental/dbz2.1.rst diff --git a/doc/rst/index.rst b/doc/rst/index.rst index 1b0696dbc..5459312ba 100644 --- a/doc/rst/index.rst +++ b/doc/rst/index.rst @@ -77,17 +77,18 @@ Man Pages :maxdepth: 1 dbcast.1 - dbz2.1 dchmod.1 dcmp.1 dcp.1 ddup.1 dfilemaker.1 dfind.1 + dreln.1 drm.1 dstripe.1 dsync.1 dwalk.1 + experimental/dbz2.1 experimental/dgrep.1 experimental/dparallel.1 experimental/dtar.1 diff --git a/experimental/dsh/dsh.c b/experimental/dsh/dsh.c index 3127f6334..bbe37ce2a 100644 --- a/experimental/dsh/dsh.c +++ b/experimental/dsh/dsh.c @@ -591,7 +591,7 @@ static void sort_scan_sort(mfu_path* origpath, uint64_t allmax, /* copy items into buffer */ uint64_t idx = 0; char* ptr = (char*) buf; - strmap_node* elem; + const strmap_node* elem; for (elem = strmap_node_first(children); elem != NULL; elem = strmap_node_next(elem)) @@ -865,7 +865,7 @@ static void summarize_children(mfu_flist flist, mfu_path* path, int print_defaul } /* free data structure allocated for each child */ - strmap_node* elem; + const strmap_node* elem; for (elem = strmap_node_first(children); elem != NULL; elem = strmap_node_next(elem)) @@ -1657,7 +1657,7 @@ static char* arg_to_regex(const char* arg) { /* count number of bytes we need */ size_t count = 2; /* for ^ and $ at ends of regex */ - char* str = arg; + char* str = (char*)arg; char* tok = strchr(str, '*'); while (tok != NULL) { count += tok - str; /* copy text leading up to * */ @@ -1675,7 +1675,7 @@ static char* arg_to_regex(const char* arg) strcpy(regex, "^"); /* replace each * with .* */ - str = arg; + str = (char*)arg; tok = strchr(str, '*'); while (tok != NULL) { strncat(regex, str, tok - str); @@ -1815,7 +1815,7 @@ int main(int argc, char** argv) paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ - char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt new file mode 100644 index 000000000..db0ebb430 --- /dev/null +++ b/man/CMakeLists.txt @@ -0,0 +1,16 @@ +LIST(APPEND man_pages + dbcast.1 + dchmod.1 + dcmp.1 + dcp.1 + ddup.1 + dfilemaker.1 + dfind.1 + dreln.1 + drm.1 + dstripe.1 + dsync.1 + dwalk.1 + ) + +INSTALL(FILES ${man_pages} DESTINATION ${X_DATADIR}/man/man1) diff --git a/man/Makefile.am b/man/Makefile.am deleted file mode 100644 index 4bd7823d6..000000000 --- a/man/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -dist_man1_MANS = \ - dbcast.1 \ - dchmod.1 \ - dcmp.1 \ - dcp.1 \ - ddup.1 \ - dfilemaker.1 \ - drm.1 \ - dstripe.1 \ - dsync.1 \ - dwalk.1 diff --git a/man/dbcast.1 b/man/dbcast.1 index b828ae22b..3186b3cf3 100644 --- a/man/dbcast.1 +++ b/man/dbcast.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DBCAST" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DBCAST" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dbcast \- distributed broadcast . diff --git a/man/dchmod.1 b/man/dchmod.1 index 0d12579c8..e464b4d7a 100644 --- a/man/dchmod.1 +++ b/man/dchmod.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DCHMOD" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DCHMOD" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dchmod \- distributed tool to set permissions and group . diff --git a/man/dcmp.1 b/man/dcmp.1 index f34016708..42a73ef6f 100644 --- a/man/dcmp.1 +++ b/man/dcmp.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DCMP" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DCMP" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dcmp \- distributed compare . @@ -70,6 +70,15 @@ read, byte rate, and file rate. .UNINDENT .INDENT 0.0 .TP +.B \-l, \-\-lite +lite mode does a comparison of file modification time and size. If +modification time and size are the same, then the contents are assumed +to be the same. Similarly, if the modification time or size is different, +then the contents are assumed to be different. The lite mode does no comparison +of data/content in the file. +.UNINDENT +.INDENT 0.0 +.TP .B \-h, \-\-help Print the command usage, and the list of options available. .UNINDENT diff --git a/man/dcp.1 b/man/dcp.1 index c80246bd5..5aae2a8b6 100644 --- a/man/dcp.1 +++ b/man/dcp.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DCP" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DCP" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dcp \- distributed copy . diff --git a/man/ddup.1 b/man/ddup.1 index 4791e826f..30ef57543 100644 --- a/man/ddup.1 +++ b/man/ddup.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DDUP" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DDUP" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME ddup \- report files with identical content . diff --git a/man/dfilemaker.1 b/man/dfilemaker.1 index 458b770bf..ffd4c26d4 100644 --- a/man/dfilemaker.1 +++ b/man/dfilemaker.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DFILEMAKER" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DFILEMAKER" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dfilemaker \- distributed random file generation program . diff --git a/man/dfind.1 b/man/dfind.1 new file mode 100644 index 000000000..ff20fdcc3 --- /dev/null +++ b/man/dfind.1 @@ -0,0 +1,249 @@ +.\" Man page generated from reStructuredText. +. +.TH "DFIND" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" +.SH NAME +dfind \- distributed file filtering +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBdfind [OPTION] [EXPRESSION] PATH ...\fP +.SH DESCRIPTION +.sp +Parallel MPI application to filter a list of files according to an expression. +.sp +dfind provides functionality similar to \fBfind(1)\fP\&. +.sp +The file list can be obtained by either walking one or more paths provided on the command line or through an input list. +.sp +The filtered list can be written to an output file. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-i, \-\-input FILE +Read source list from FILE. FILE must be generated by another tool +from the mpiFileUtils suite. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o, \-\-output FILE +Write the processed list to a file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Run in verbose mode. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print a brief message listing the \fBdfind(1)\fP options and usage. +.UNINDENT +.SH EXPRESSIONS +.sp +Numeric arguments can be specified as: +.INDENT 0.0 +.INDENT 3.5 +.TS +center; +|l|l|. +_ +T{ ++N +T} T{ +more than N +T} +_ +T{ +\-N +T} T{ +less than N +T} +_ +T{ +N +T} T{ +exactly N +T} +_ +.TE +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-amin N +File was last accessed N minutes ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-anewer FILE +File was last accessed more recently than FILE was modified. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-atime N +File was last accessed N days ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-cmin N +File\(aqs status was last changed N minutes ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-cnewer FILE +File\(aqs status was last changed more recently than FILE was modified. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-ctime N +File\(aqs status was last changed N days ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-gid N +File\(aqs numeric group ID is N. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-group NAME +File belongs to group NAME. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-mmin N +File\(aqs data was last modified N minutes ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-name PATTERN +Base of file name matches shell pattern PATTERN. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-path PATTERN +Full path to file matches shell pattern PATTERN. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-regex REGEX +Full path to file matches POSIX regular expression REGEX. Regular expressions processed by \fBregexec(3)\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-newer FILE +File was modified more recently than FILE. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-mtime N +File\(aqs data was last modified N days ago. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-size N +File size is N bytes. Units can be used like \(aqKB\(aq, \(aqMB\(aq, \(aqGB\(aq. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-type C +File is of type C: +.TS +center; +|l|l|. +_ +T{ +d +T} T{ +directory +T} +_ +T{ +f +T} T{ +regular file +T} +_ +T{ +l +T} T{ +symbolic link +T} +_ +.TE +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-uid N +File\(aqs numeric user ID is N. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-user NAME +File is owned by user NAME. +.UNINDENT +.SH ACTIONS +.INDENT 0.0 +.TP +.B \-\-print +Print file name to stdout. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-exec CMD ; +Execute command CMD on file. All following arguments are taken as arguments to the command until \(aq;\(aq is encountered. The string \(aq{}\(aq is replaced by the current file name. +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.IP 1. 3 +Print all files owner by user1 under given path: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-\-user user1 \-\-print /path/to/target\fP +.INDENT 0.0 +.IP 2. 3 +To find all files less than 1GB and write them to a file: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-o outfile \-\-size \-1GB /path/to/target\fP +.INDENT 0.0 +.IP 3. 3 +Filter list in infile to find all regular files not changed in the past 180 days and write new list to outfile: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-i infile \-o outfile \-\-type f \-\-mtime +180\fP +.SH SEE ALSO +.sp +The mpiFileUtils source code and all documentation may be downloaded +from <\fI\%https://github.com/hpc/mpifileutils\fP> +.SH AUTHOR +HPC +.SH COPYRIGHT +2018, LLNL/LANL/UT-Battelle/DDN +.\" Generated by docutils manpage writer. +. diff --git a/man/dreln.1 b/man/dreln.1 new file mode 100644 index 000000000..ea2b441d8 --- /dev/null +++ b/man/dreln.1 @@ -0,0 +1,107 @@ +.\" Man page generated from reStructuredText. +. +.TH "DRELN" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" +.SH NAME +dreln \- distributed relink +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBdreln [OPTION] OLDPATH NEWPATH PATH ...\fP +.SH DESCRIPTION +.sp +Parallel MPI application to recursively update symlinks within a +directory. +.sp +dreln walks the specified PATH and updates any symlink whose target +includes an absolute path to OLDPATH and replaces that symlink +with a new link whose target points to NEWPATH instead. +.sp +This is useful to update symlinks after migrating a large +directory from one file system to another, whose links specify +absolute paths to the original file system. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-i, \-\-input FILE +Read source list from FILE. FILE must be generated by another tool +from the mpiFileUtils suite. +.UNINDENT +.INDENT 0.0 +.TP +.B \-p, \-\-preserve +Preserve existing modification times on links. +.UNINDENT +.INDENT 0.0 +.TP +.B \-r, \-\-relative +Replace links using target paths that are relative to NEWPATH. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Run in verbose mode. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print a brief message listing the \fBdrm(1)\fP options and usage. +.UNINDENT +.SH EXAMPLES +.sp +1. To update all links under /walk/path whose targets point to /orig/path +and replace them with targets that point to /new/path: +.sp +\fBmpirun \-np 128 dreln \-v /orig/path /new/path /walk/path\fP +.sp +2. Same as above, but replace each link target with a relative path +from the link to its new target under /new/path: +.sp +\fBmpirun \-np 128 dreln \-v \-\-relative /orig/path /new/path /walk/path\fP +.INDENT 0.0 +.IP 3. 3 +One can preserve existing modification times on links: +.UNINDENT +.sp +\fBmpirun \-np 128 dreln \-v \-\-preserve /orig/path /new/path /walk/path\fP +.INDENT 0.0 +.IP 4. 3 +One can specifiy multiple paths to walk: +.UNINDENT +.sp +\fBmpirun \-np 128 dreln \-v /orig/path /new/path /walk/path1 /walk/path2\fP +.SH SEE ALSO +.sp +The mpiFileUtils source code and all documentation may be downloaded +from <\fI\%https://github.com/hpc/mpifileutils\fP> +.SH AUTHOR +HPC +.SH COPYRIGHT +2018, LLNL/LANL/UT-Battelle/DDN +.\" Generated by docutils manpage writer. +. diff --git a/man/drm.1 b/man/drm.1 index 5f828c2fb..5828a388c 100644 --- a/man/drm.1 +++ b/man/drm.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DRM" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DRM" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME drm \- distributed remove . @@ -92,6 +92,13 @@ them. This is useful to check list of items satisfying \-\-exclude or .UNINDENT .INDENT 0.0 .TP +.B \-\-aggressive +This option will delete files during the walk phase, and then +delete directories by level after the walk in drm. You cannot +use this option with \-\-dryrun. +.UNINDENT +.INDENT 0.0 +.TP .B \-T, \-\-traceless Delete child items without updating the mtime on their parent directory. .UNINDENT diff --git a/man/dstripe.1 b/man/dstripe.1 index b1f681b99..e83e259fa 100644 --- a/man/dstripe.1 +++ b/man/dstripe.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DSTRIPE" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DSTRIPE" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dstripe \- restripe files on underlying storage . diff --git a/man/dsync.1 b/man/dsync.1 index 3b79d8b88..60325ee71 100644 --- a/man/dsync.1 +++ b/man/dsync.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DSYNC" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DSYNC" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dsync \- synchronize directory trees . @@ -37,9 +37,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .sp Parallel MPI application to synchronize two files or two directory trees. .sp -dsync makes DEST match SRC, adding missing entries from DEST, removing -extra entries from DEST, and updating existing entries in DEST as necessary -so that SRC and DEST have identical content, ownership, timestamps, and permissions. +dsync makes DEST match SRC, adding missing entries from DEST, and updating +existing entries in DEST as necessary so that SRC and DEST have identical +content, ownership, timestamps, and permissions. .SH OPTIONS .INDENT 0.0 .TP @@ -48,14 +48,19 @@ Show differences without changing anything. .UNINDENT .INDENT 0.0 .TP +.B \-b, \-\-batch\-files N +Batch files into groups of up to size N during copy operation. +.UNINDENT +.INDENT 0.0 +.TP .B \-c, \-\-contents Compare files byte\-by\-byte rather than checking size and mtime to determine whether file contents are different. .UNINDENT .INDENT 0.0 .TP -.B \-N, \-\-no\-delete -Do not delete extraneous files from destination. +.B \-D, \-\-delete +Delete extraneous files from destination. .UNINDENT .INDENT 0.0 .TP diff --git a/man/dwalk.1 b/man/dwalk.1 index 8783251da..e57c5154a 100644 --- a/man/dwalk.1 +++ b/man/dwalk.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "DWALK" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "DWALK" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME dwalk \- distributed walk and list . diff --git a/man/mpifileutils.1 b/man/mpifileutils.1 index 5ad8c31fc..f3fba7be7 100644 --- a/man/mpifileutils.1 +++ b/man/mpifileutils.1 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "MPIFILEUTILS" "1" "Dec 19, 2018" "0.8" "mpiFileUtils" +.TH "MPIFILEUTILS" "1" "Jan 28, 2019" "0.9" "mpiFileUtils" .SH NAME mpifileutils \- mpiFileUtils Documentation . @@ -62,6 +62,8 @@ ddup \- Find duplicate files. .IP \(bu 2 dfilemaker \- Generate random files. .IP \(bu 2 +dreln \- Relink symlinks. +.IP \(bu 2 drm \- Remove files. .IP \(bu 2 dstripe \- Restripe files. @@ -75,14 +77,13 @@ dwalk \- List files. Experimental utilities are under active development. They are not considered to be production worthy, but they are available in the distribution for those interested in developing them further or to provide additional examples. To -enable experimental utilities, run configure with the enable experimental -option. +build the experimental utilities, turn on the CMake option. .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -$ ./configure \-\-enable\-experimental +$ cmake \-DENABLE_EXPERIMENTAL=ON ... .ft P .fi .UNINDENT @@ -129,40 +130,57 @@ $ spack install mpifileutils +lustre +experimental .UNINDENT .UNINDENT .sp -To build from a release tarball, there are two scripts: buildme_dependencies and -buildme. The buildme_dependencies script downloads and installs all the -necessary libraries. The buildme script then builds mpiFileUtils assuming the -libraries have been installed. Both scripts require that mpicc is in your path, -and that it is for an MPI library that supports at least v2.2 of the MPI -standard. Please review each buildme script, and edit if necessary. Then run -them in sequence: +To build from a release tarball, use CMake. Note that this requires the manual +installation of the dependencies. Assuming the dependencies have been placed in +an \fIinstall\fP directory the build commands are thus: .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -$ ./buildme_dependencies -$ ./buildme +$ git clone https://github.com/hpc/mpifileutils +$ mkdir build install +$ # build DTCMP and other dependencies +$ cd build +$ cmake ../mpifileutils \-DWITH_DTCMP_PREFIX=../install \-DWITH_LibCircle_PREFIX=../install \-DCMAKE_INSTALL_PREFIX=../install .ft P .fi .UNINDENT .UNINDENT .sp -To build from a clone, it may also be necessary to first run the -buildme_autotools script to obtain the required set of autotools, then use -buildme_dependencies_dev and buildme_dev: +One can also use spack to create an environment and view with the provided \fIspack.yaml\fP file. +First, make sure that you\(aqve set up spack in your shell (see \fI\%these instructions\fP). +Next, be sure that your \fI~/.spack/packages.yaml\fP is configured to ensure that spack can detect system\-provided packages. +.sp +From the root directory of mpifileutils, run the command \fIspack find\fP to determine which packages spack will install. +Next, run \fIspack concretize\fP to build have spack perform dependency analysis. +Finally, run \fIspack install\fP to build the dependencies. +.sp +There are two ways to tell CMake about the dependencies. +First, you can use \fIspack load [depname]\fP to put the installed dependency into your environment paths. +Then, at configure time, CMake will automatically detect the location of these dependencies. +Thus, the commands to build become: .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -$ ./buildme_autotools -$ ./buildme_dependencies_dev -$ ./buildme_dev +$ git clone https://github.com/hpc/mpifileutils +$ mkdir build install +$ cd mpifileutils +$ spack install +$ spack load dtcmp +$ spack load libcircle +$ spack load libarchive +$ cd ../build +$ cmake ../mpifileutils .ft P .fi .UNINDENT .UNINDENT +.sp +The other way to use spack is to create a "view" to the installed dependencies. +Details on this are coming soon. .SS Project Design Principles .sp The following principles drive design decisions in the project. @@ -331,55 +349,6 @@ To read the current striping parameters of a file on Lustre: .sp The mpiFileUtils source code and all documentation may be downloaded from <\fI\%https://github.com/hpc/mpifileutils\fP> -.SS dbz2 -.SS SYNOPSIS -.sp -\fBdbz2 [OPTIONS] [\-z|\-d] FILE\fP -.SS DESCRIPTION -.sp -Parallel MPI application to compress or decompress a file. -.SS OPTIONS -.INDENT 0.0 -.TP -.B \-d, \-\-decompress -Decompress the file -.UNINDENT -.INDENT 0.0 -.TP -.B \-z, \-\-compress -Compress the file -.UNINDENT -.INDENT 0.0 -.TP -.B \-k, \-\-keep -Keep the input file (optional). -.UNINDENT -.INDENT 0.0 -.TP -.B \-f, \-\-overwrite -Overwrite the output file, if it exists (optional). -.UNINDENT -.INDENT 0.0 -.TP -.B \-b, \-\-block SIZE -Set the compression block size, from 1 to 9. -Where 1=100kB ... and 9=900kB. Default is 9 (optional). -.UNINDENT -.INDENT 0.0 -.TP -.B \-m, \-\-memory SIZE -Limit the memory that can be used by a processs, in bytes (optional). -.UNINDENT -.INDENT 0.0 -.TP -.B \-v, \-\-verbose -Verbose output (optional). -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-debug -Show debug output (optional). -.UNINDENT .SS dchmod .SS SYNOPSIS .sp @@ -522,6 +491,15 @@ read, byte rate, and file rate. .UNINDENT .INDENT 0.0 .TP +.B \-l, \-\-lite +lite mode does a comparison of file modification time and size. If +modification time and size are the same, then the contents are assumed +to be the same. Similarly, if the modification time or size is different, +then the contents are assumed to be different. The lite mode does no comparison +of data/content in the file. +.UNINDENT +.INDENT 0.0 +.TP .B \-h, \-\-help Print the command usage, and the list of options available. .UNINDENT @@ -961,31 +939,19 @@ Print version information and exit. .sp The mpiFileUtils source code and all documentation may be downloaded from <\fI\%https://github.com/hpc/mpifileutils\fP> -.SS drm +.SS dfind .SS SYNOPSIS .sp -\fBdrm [OPTION] PATH...\fP +\fBdfind [OPTION] [EXPRESSION] PATH ...\fP .SS DESCRIPTION .sp -Parallel MPI application to recursively delete a directory and its -contents. +Parallel MPI application to filter a list of files according to an expression. .sp -drm is a tool for removing files recursively in parallel. -drm behaves like \fIrm \-rf\fP, but it is faster. +dfind provides functionality similar to \fBfind(1)\fP\&. .sp -\fBNOTE:\fP -.INDENT 0.0 -.INDENT 3.5 -DO NOT USE SHELL REGEX!!! -The \-\-match and \-\-exclude options use POSIX regex syntax. Because of -this make sure that the shell does not try to interpret your regex before -it gets passed to the program. You can generally use quotes around your -regex to prevent the shell from expanding. An example of this using the -\-\-match option with \-\-dryrun would be: +The file list can be obtained by either walking one or more paths provided on the command line or through an input list. .sp -\fBmpirun \-np 128 drm \-\-dryrun \-v \-\-name \-\-match \(aqfile_.*\(aq /path/to/dir/*\fP -.UNINDENT -.UNINDENT +The filtered list can be written to an output file. .SS OPTIONS .INDENT 0.0 .TP @@ -995,161 +961,462 @@ from the mpiFileUtils suite. .UNINDENT .INDENT 0.0 .TP -.B \-l, \-\-lite -Walk file system without stat. +.B \-o, \-\-output FILE +Write the processed list to a file. .UNINDENT .INDENT 0.0 .TP -.B \-\-exclude REGEX -Do not remove items whose full path matches REGEX, processed by \fBregexec(3)\fP\&. +.B \-v, \-\-verbose +Run in verbose mode. .UNINDENT .INDENT 0.0 .TP -.B \-\-match REGEX -Only remove items whose full path matches REGEX, processed by -\fBregexec(3)\fP\&. +.B \-h, \-\-help +Print a brief message listing the \fBdfind(1)\fP options and usage. .UNINDENT +.SS EXPRESSIONS +.sp +Numeric arguments can be specified as: .INDENT 0.0 -.TP -.B \-\-name -Change \-\-exclude and match to apply to item name rather than its -full path. +.INDENT 3.5 +.TS +center; +|l|l|. +_ +T{ ++N +T} T{ +more than N +T} +_ +T{ +\-N +T} T{ +less than N +T} +_ +T{ +N +T} T{ +exactly N +T} +_ +.TE .UNINDENT -.INDENT 0.0 -.TP -.B \-\-dryrun -Print a list of files that \fBwould\fP be deleted without deleting -them. This is useful to check list of items satisfying \-\-exclude or -\-\-match options before actually deleting anything. .UNINDENT .INDENT 0.0 .TP -.B \-T, \-\-traceless -Delete child items without updating the mtime on their parent directory. +.B \-\-amin N +File was last accessed N minutes ago. .UNINDENT .INDENT 0.0 .TP -.B \-v, \-\-verbose -Run in verbose mode. +.B \-\-anewer FILE +File was last accessed more recently than FILE was modified. .UNINDENT .INDENT 0.0 .TP -.B \-h, \-\-help -Print a brief message listing the \fBdrm(1)\fP options and usage. -.UNINDENT -.SS EXAMPLES -.INDENT 0.0 -.IP 1. 3 -To delete a directory and its contents: -.UNINDENT -.sp -\fBmpirun \-np 128 drm \-v /dir/to/delete\fP -.INDENT 0.0 -.IP 2. 3 -Delete all items (files and directories) ending with .core from -directory tree: +.B \-\-atime N +File was last accessed N days ago. .UNINDENT -.sp -\fBmpirun \-np 128 drm \-\-match \(aq.core$\(aq /dir/to/delete/from\fP .INDENT 0.0 -.IP 3. 3 -List items that would be deleted without removing them: +.TP +.B \-\-cmin N +File\(aqs status was last changed N minutes ago. .UNINDENT -.sp -\fBmpirun \-np 128 drm \-\-dryrun \-\-match \(aq.core$\(aq /dir/to/delete/from\fP .INDENT 0.0 -.IP 4. 3 -Delete all items named foo: +.TP +.B \-\-cnewer FILE +File\(aqs status was last changed more recently than FILE was modified. .UNINDENT -.sp -\fBmpirun \-np 128 drm \-\-name \-\-match \(aq^foo$\(aq /dir/to/delete/from\fP -.SS SEE ALSO -.sp -The mpiFileUtils source code and all documentation may be downloaded -from <\fI\%https://github.com/hpc/mpifileutils\fP> -.SS dstripe -.SS SYNOPSIS -.sp -\fBdstripe [OPTION] PATH...\fP -.SS DESCRIPTION -.sp -Parallel MPI application to restripe files. -.sp -This tool is in active development. It currently only works on Lustre. -.sp -dstripe enables one to restripe file(s) across the underlying storage -devices. One must specify a list of paths. All files in those paths can -be restriped. By default, stripe size is 1MB and stripe count is \-1 -allowing dstripe to use all available stripes. -.SS OPTIONS .INDENT 0.0 .TP -.B \-c, \-\-count STRIPE_COUNT -The number of stripes to use during file restriping. If STRIPE_COUNT -is \-1, then all available stripes are used. If STRIPE_COUNT is 0, -the lustre file system default is used. The default stripe count is -\-1. +.B \-\-ctime N +File\(aqs status was last changed N days ago. .UNINDENT .INDENT 0.0 .TP -.B \-s, \-\-size STRIPE_SIZE -The stripe size to use during file restriping. Units like "MB" and -"GB" can immediately follow the number without spaces (ex. 2MB). The -default stripe size is 1MB. +.B \-\-gid N +File\(aqs numeric group ID is N. .UNINDENT .INDENT 0.0 .TP -.B \-m, \-\-minsize SIZE -The minimum size a file must be to be a candidate for restriping. -Files smaller than SIZE will not be restriped. Units like "MB" and -"GB" can immediately follow the number without spaces (ex. 2MB). The -default minimum file size is 0MB. +.B \-\-group NAME +File belongs to group NAME. .UNINDENT .INDENT 0.0 .TP -.B \-r, \-\-report -Display the file size, stripe count, and stripe size of all files -found in PATH. No restriping is performed when using this option. +.B \-\-mmin N +File\(aqs data was last modified N minutes ago. .UNINDENT .INDENT 0.0 .TP -.B \-v, \-\-verbose -Run in verbose mode. +.B \-\-name PATTERN +Base of file name matches shell pattern PATTERN. .UNINDENT .INDENT 0.0 .TP -.B \-h, \-\-help -Print the command usage, and the list of options available. +.B \-\-path PATTERN +Full path to file matches shell pattern PATTERN. .UNINDENT -.SS EXAMPLES .INDENT 0.0 -.IP 1. 3 -To stripe a file on all storage devices using a 1MB stripe size: +.TP +.B \-\-regex REGEX +Full path to file matches POSIX regular expression REGEX. Regular expressions processed by \fBregexec(3)\fP\&. .UNINDENT -.sp -\fBmpirun \-np 128 dstripe \-s 1MB /path/to/file\fP .INDENT 0.0 -.IP 2. 3 -To stripe a file across 20 storage devices with a 1GB stripe size: +.TP +.B \-\-newer FILE +File was modified more recently than FILE. .UNINDENT -.sp -\fBmpirun \-np 128 dstripe \-c 20 \-s 1GB /path/to/file\fP .INDENT 0.0 -.IP 3. 3 -To restripe all files in /path/to/files/ that are at least 1GB in -size: +.TP +.B \-\-mtime N +File\(aqs data was last modified N days ago. .UNINDENT -.sp -\fBmpirun \-np 128 dstripe \-m 1GB /path/to/files/\fP .INDENT 0.0 -.IP 4. 3 -To restripe all files in /path/to/files/ across 10 storage devices -with 2MB stripe size: +.TP +.B \-\-size N +File size is N bytes. Units can be used like \(aqKB\(aq, \(aqMB\(aq, \(aqGB\(aq. .UNINDENT -.sp -\fBmpirun \-np 128 dstripe \-c 10 \-s 2MB /path/to/files/\fP .INDENT 0.0 -.IP 5. 3 +.TP +.B \-\-type C +File is of type C: +.TS +center; +|l|l|. +_ +T{ +d +T} T{ +directory +T} +_ +T{ +f +T} T{ +regular file +T} +_ +T{ +l +T} T{ +symbolic link +T} +_ +.TE +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-uid N +File\(aqs numeric user ID is N. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-user NAME +File is owned by user NAME. +.UNINDENT +.SS ACTIONS +.INDENT 0.0 +.TP +.B \-\-print +Print file name to stdout. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-exec CMD ; +Execute command CMD on file. All following arguments are taken as arguments to the command until \(aq;\(aq is encountered. The string \(aq{}\(aq is replaced by the current file name. +.UNINDENT +.SS EXAMPLES +.INDENT 0.0 +.IP 1. 3 +Print all files owner by user1 under given path: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-\-user user1 \-\-print /path/to/target\fP +.INDENT 0.0 +.IP 2. 3 +To find all files less than 1GB and write them to a file: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-o outfile \-\-size \-1GB /path/to/target\fP +.INDENT 0.0 +.IP 3. 3 +Filter list in infile to find all regular files not changed in the past 180 days and write new list to outfile: +.UNINDENT +.sp +\fBmpirun \-np 128 dfind \-v \-i infile \-o outfile \-\-type f \-\-mtime +180\fP +.SS SEE ALSO +.sp +The mpiFileUtils source code and all documentation may be downloaded +from <\fI\%https://github.com/hpc/mpifileutils\fP> +.SS dreln +.SS SYNOPSIS +.sp +\fBdreln [OPTION] OLDPATH NEWPATH PATH ...\fP +.SS DESCRIPTION +.sp +Parallel MPI application to recursively update symlinks within a +directory. +.sp +dreln walks the specified PATH and updates any symlink whose target +includes an absolute path to OLDPATH and replaces that symlink +with a new link whose target points to NEWPATH instead. +.sp +This is useful to update symlinks after migrating a large +directory from one file system to another, whose links specify +absolute paths to the original file system. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-i, \-\-input FILE +Read source list from FILE. FILE must be generated by another tool +from the mpiFileUtils suite. +.UNINDENT +.INDENT 0.0 +.TP +.B \-p, \-\-preserve +Preserve existing modification times on links. +.UNINDENT +.INDENT 0.0 +.TP +.B \-r, \-\-relative +Replace links using target paths that are relative to NEWPATH. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Run in verbose mode. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print a brief message listing the \fBdrm(1)\fP options and usage. +.UNINDENT +.SS EXAMPLES +.sp +1. To update all links under /walk/path whose targets point to /orig/path +and replace them with targets that point to /new/path: +.sp +\fBmpirun \-np 128 dreln \-v /orig/path /new/path /walk/path\fP +.sp +2. Same as above, but replace each link target with a relative path +from the link to its new target under /new/path: +.sp +\fBmpirun \-np 128 dreln \-v \-\-relative /orig/path /new/path /walk/path\fP +.INDENT 0.0 +.IP 3. 3 +One can preserve existing modification times on links: +.UNINDENT +.sp +\fBmpirun \-np 128 dreln \-v \-\-preserve /orig/path /new/path /walk/path\fP +.INDENT 0.0 +.IP 4. 3 +One can specifiy multiple paths to walk: +.UNINDENT +.sp +\fBmpirun \-np 128 dreln \-v /orig/path /new/path /walk/path1 /walk/path2\fP +.SS SEE ALSO +.sp +The mpiFileUtils source code and all documentation may be downloaded +from <\fI\%https://github.com/hpc/mpifileutils\fP> +.SS drm +.SS SYNOPSIS +.sp +\fBdrm [OPTION] PATH...\fP +.SS DESCRIPTION +.sp +Parallel MPI application to recursively delete a directory and its +contents. +.sp +drm is a tool for removing files recursively in parallel. +drm behaves like \fIrm \-rf\fP, but it is faster. +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +DO NOT USE SHELL REGEX!!! +The \-\-match and \-\-exclude options use POSIX regex syntax. Because of +this make sure that the shell does not try to interpret your regex before +it gets passed to the program. You can generally use quotes around your +regex to prevent the shell from expanding. An example of this using the +\-\-match option with \-\-dryrun would be: +.sp +\fBmpirun \-np 128 drm \-\-dryrun \-v \-\-name \-\-match \(aqfile_.*\(aq /path/to/dir/*\fP +.UNINDENT +.UNINDENT +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-i, \-\-input FILE +Read source list from FILE. FILE must be generated by another tool +from the mpiFileUtils suite. +.UNINDENT +.INDENT 0.0 +.TP +.B \-l, \-\-lite +Walk file system without stat. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-exclude REGEX +Do not remove items whose full path matches REGEX, processed by \fBregexec(3)\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-match REGEX +Only remove items whose full path matches REGEX, processed by +\fBregexec(3)\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-name +Change \-\-exclude and match to apply to item name rather than its +full path. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dryrun +Print a list of files that \fBwould\fP be deleted without deleting +them. This is useful to check list of items satisfying \-\-exclude or +\-\-match options before actually deleting anything. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-aggressive +This option will delete files during the walk phase, and then +delete directories by level after the walk in drm. You cannot +use this option with \-\-dryrun. +.UNINDENT +.INDENT 0.0 +.TP +.B \-T, \-\-traceless +Delete child items without updating the mtime on their parent directory. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Run in verbose mode. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print a brief message listing the \fBdrm(1)\fP options and usage. +.UNINDENT +.SS EXAMPLES +.INDENT 0.0 +.IP 1. 3 +To delete a directory and its contents: +.UNINDENT +.sp +\fBmpirun \-np 128 drm \-v /dir/to/delete\fP +.INDENT 0.0 +.IP 2. 3 +Delete all items (files and directories) ending with .core from +directory tree: +.UNINDENT +.sp +\fBmpirun \-np 128 drm \-\-match \(aq.core$\(aq /dir/to/delete/from\fP +.INDENT 0.0 +.IP 3. 3 +List items that would be deleted without removing them: +.UNINDENT +.sp +\fBmpirun \-np 128 drm \-\-dryrun \-\-match \(aq.core$\(aq /dir/to/delete/from\fP +.INDENT 0.0 +.IP 4. 3 +Delete all items named foo: +.UNINDENT +.sp +\fBmpirun \-np 128 drm \-\-name \-\-match \(aq^foo$\(aq /dir/to/delete/from\fP +.SS SEE ALSO +.sp +The mpiFileUtils source code and all documentation may be downloaded +from <\fI\%https://github.com/hpc/mpifileutils\fP> +.SS dstripe +.SS SYNOPSIS +.sp +\fBdstripe [OPTION] PATH...\fP +.SS DESCRIPTION +.sp +Parallel MPI application to restripe files. +.sp +This tool is in active development. It currently only works on Lustre. +.sp +dstripe enables one to restripe file(s) across the underlying storage +devices. One must specify a list of paths. All files in those paths can +be restriped. By default, stripe size is 1MB and stripe count is \-1 +allowing dstripe to use all available stripes. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-c, \-\-count STRIPE_COUNT +The number of stripes to use during file restriping. If STRIPE_COUNT +is \-1, then all available stripes are used. If STRIPE_COUNT is 0, +the lustre file system default is used. The default stripe count is +\-1. +.UNINDENT +.INDENT 0.0 +.TP +.B \-s, \-\-size STRIPE_SIZE +The stripe size to use during file restriping. Units like "MB" and +"GB" can immediately follow the number without spaces (ex. 2MB). The +default stripe size is 1MB. +.UNINDENT +.INDENT 0.0 +.TP +.B \-m, \-\-minsize SIZE +The minimum size a file must be to be a candidate for restriping. +Files smaller than SIZE will not be restriped. Units like "MB" and +"GB" can immediately follow the number without spaces (ex. 2MB). The +default minimum file size is 0MB. +.UNINDENT +.INDENT 0.0 +.TP +.B \-r, \-\-report +Display the file size, stripe count, and stripe size of all files +found in PATH. No restriping is performed when using this option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Run in verbose mode. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print the command usage, and the list of options available. +.UNINDENT +.SS EXAMPLES +.INDENT 0.0 +.IP 1. 3 +To stripe a file on all storage devices using a 1MB stripe size: +.UNINDENT +.sp +\fBmpirun \-np 128 dstripe \-s 1MB /path/to/file\fP +.INDENT 0.0 +.IP 2. 3 +To stripe a file across 20 storage devices with a 1GB stripe size: +.UNINDENT +.sp +\fBmpirun \-np 128 dstripe \-c 20 \-s 1GB /path/to/file\fP +.INDENT 0.0 +.IP 3. 3 +To restripe all files in /path/to/files/ that are at least 1GB in +size: +.UNINDENT +.sp +\fBmpirun \-np 128 dstripe \-m 1GB /path/to/files/\fP +.INDENT 0.0 +.IP 4. 3 +To restripe all files in /path/to/files/ across 10 storage devices +with 2MB stripe size: +.UNINDENT +.sp +\fBmpirun \-np 128 dstripe \-c 10 \-s 2MB /path/to/files/\fP +.INDENT 0.0 +.IP 5. 3 To display the current stripe count and stripe size of all files in /path/to/files/: .UNINDENT @@ -1167,9 +1434,9 @@ from <\fI\%https://github.com/hpc/mpifileutils\fP> .sp Parallel MPI application to synchronize two files or two directory trees. .sp -dsync makes DEST match SRC, adding missing entries from DEST, removing -extra entries from DEST, and updating existing entries in DEST as necessary -so that SRC and DEST have identical content, ownership, timestamps, and permissions. +dsync makes DEST match SRC, adding missing entries from DEST, and updating +existing entries in DEST as necessary so that SRC and DEST have identical +content, ownership, timestamps, and permissions. .SS OPTIONS .INDENT 0.0 .TP @@ -1178,14 +1445,19 @@ Show differences without changing anything. .UNINDENT .INDENT 0.0 .TP +.B \-b, \-\-batch\-files N +Batch files into groups of up to size N during copy operation. +.UNINDENT +.INDENT 0.0 +.TP .B \-c, \-\-contents Compare files byte\-by\-byte rather than checking size and mtime to determine whether file contents are different. .UNINDENT .INDENT 0.0 .TP -.B \-N, \-\-no\-delete -Do not delete extraneous files from destination. +.B \-D, \-\-delete +Delete extraneous files from destination. .UNINDENT .INDENT 0.0 .TP @@ -1313,218 +1585,55 @@ field from the top level directory. .sp The mpiFileUtils source code and all documentation may be downloaded from <\fI\%https://github.com/hpc/mpifileutils\fP> -.SS dfind +.SS dbz2 .SS SYNOPSIS .sp -\fBdfind [OPTION] [EXPRESSION] PATH ...\fP +\fBdbz2 [OPTIONS] [\-z|\-d] FILE\fP .SS DESCRIPTION .sp -Parallel MPI application to filter a list of files according to an expression. -.sp -dfind provides functionality similar to \fBfind(1)\fP\&. -.sp -The file list can be obtained by either walking one or more paths provided on the command line or through an input list. -.sp -The filtered list can be written to an output file. +Parallel MPI application to compress or decompress a file. .SS OPTIONS .INDENT 0.0 .TP -.B \-i, \-\-input FILE -Read source list from FILE. FILE must be generated by another tool -from the mpiFileUtils suite. -.UNINDENT -.INDENT 0.0 -.TP -.B \-o, \-\-output FILE -Write the processed list to a file. -.UNINDENT -.INDENT 0.0 -.TP -.B \-v, \-\-verbose -Run in verbose mode. -.UNINDENT -.INDENT 0.0 -.TP -.B \-h, \-\-help -Print a brief message listing the \fBdfind(1)\fP options and usage. -.UNINDENT -.SS EXPRESSIONS -.sp -Numeric arguments can be specified as: -.INDENT 0.0 -.INDENT 3.5 -.TS -center; -|l|l|. -_ -T{ -+N -T} T{ -more than N -T} -_ -T{ -\-N -T} T{ -less than N -T} -_ -T{ -N -T} T{ -exactly N -T} -_ -.TE -.UNINDENT -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-amin N -File was last accessed N minutes ago. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-anewer FILE -File was last accessed more recently than FILE was modified. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-atime N -File was last accessed N days ago. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-cmin N -File\(aqs status was last changed N minutes ago. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-cnewer FILE -File\(aqs status was last changed more recently than FILE was modified. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-ctime N -File\(aqs status was last changed N days ago. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-gid N -File\(aqs numeric group ID is N. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-group NAME -File belongs to group NAME. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-mmin N -File\(aqs data was last modified N minutes ago. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-name PATTERN -Base of file name matches shell pattern PATTERN. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-path PATTERN -Full path to file matches shell pattern PATTERN. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-regex REGEX -Full path to file matches POSIX regular expression REGEX. Regular expressions processed by \fBregexec(3)\fP\&. -.UNINDENT -.INDENT 0.0 -.TP -.B \-\-newer FILE -File was modified more recently than FILE. +.B \-d, \-\-decompress +Decompress the file .UNINDENT .INDENT 0.0 .TP -.B \-\-mtime N -File\(aqs data was last modified N days ago. +.B \-z, \-\-compress +Compress the file .UNINDENT .INDENT 0.0 .TP -.B \-\-size N -File size is N bytes. Units can be used like \(aqKB\(aq, \(aqMB\(aq, \(aqGB\(aq. +.B \-k, \-\-keep +Keep the input file (optional). .UNINDENT .INDENT 0.0 .TP -.B \-\-type C -File is of type C: -.TS -center; -|l|l|. -_ -T{ -d -T} T{ -directory -T} -_ -T{ -f -T} T{ -regular file -T} -_ -T{ -l -T} T{ -symbolic link -T} -_ -.TE +.B \-f, \-\-overwrite +Overwrite the output file, if it exists (optional). .UNINDENT .INDENT 0.0 .TP -.B \-\-uid N -File\(aqs numeric user ID is N. +.B \-b, \-\-block SIZE +Set the compression block size, from 1 to 9. +Where 1=100kB ... and 9=900kB. Default is 9 (optional). .UNINDENT .INDENT 0.0 .TP -.B \-\-user NAME -File is owned by user NAME. +.B \-m, \-\-memory SIZE +Limit the memory that can be used by a processs, in bytes (optional). .UNINDENT -.SS ACTIONS .INDENT 0.0 .TP -.B \-\-print -Print file name to stdout. +.B \-v, \-\-verbose +Verbose output (optional). .UNINDENT .INDENT 0.0 .TP -.B \-\-exec CMD ; -Execute command CMD on file. All following arguments are taken as arguments to the command until \(aq;\(aq is encountered. The string \(aq{}\(aq is replaced by the current file name. -.UNINDENT -.SS EXAMPLES -.INDENT 0.0 -.IP 1. 3 -Print all files owner by user1 under given path: -.UNINDENT -.sp -\fBmpirun \-np 128 dfind \-v \-\-user user1 \-\-print /path/to/target\fP -.INDENT 0.0 -.IP 2. 3 -To find all files less than 1GB and write them to a file: -.UNINDENT -.sp -\fBmpirun \-np 128 dfind \-v \-o outfile \-\-size \-1GB /path/to/target\fP -.INDENT 0.0 -.IP 3. 3 -Filter list in infile to find all regular files not changed in the past 180 days and write new list to outfile: +.B \-\-debug +Show debug output (optional). .UNINDENT -.sp -\fBmpirun \-np 128 dfind \-v \-i infile \-o outfile \-\-type f \-\-mtime +180\fP -.SS SEE ALSO -.sp -The mpiFileUtils source code and all documentation may be downloaded -from <\fI\%https://github.com/hpc/mpifileutils\fP> .SS dgrep .SS SYNOPSIS .sp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 93e19e816..3be730978 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,7 @@ ADD_SUBDIRECTORY(ddup) ADD_SUBDIRECTORY(dfilemaker) ADD_SUBDIRECTORY(dfilemaker1) ADD_SUBDIRECTORY(dfind) +ADD_SUBDIRECTORY(dreln) ADD_SUBDIRECTORY(drm) ADD_SUBDIRECTORY(dstripe) ADD_SUBDIRECTORY(dsync) diff --git a/src/common/mfu.h b/src/common/mfu.h index 00259a53b..87ac6711a 100644 --- a/src/common/mfu.h +++ b/src/common/mfu.h @@ -27,6 +27,7 @@ extern "C" { #include "mfu_param_path.h" #include "mfu_flist.h" #include "mfu_pred.h" +#include "mfu_bz2.h" #endif /* MFU_H */ diff --git a/src/common/mfu_bz2.h b/src/common/mfu_bz2.h index b97911f4c..462e814ba 100644 --- a/src/common/mfu_bz2.h +++ b/src/common/mfu_bz2.h @@ -1,9 +1,7 @@ -#ifndef MFU_DBZ2_H -#define MFU_DBZ2_H - -#include "mfu.h" +#ifndef MFU_BZ2_H +#define MFU_BZ2_H void mfu_compress_bz2(int b_size, const char* fname, ssize_t opts_memory); void mfu_decompress_bz2(const char* fname, const char* fname_out); -#endif /* MFU_DBZ2_H */ +#endif /* MFU_BZ2_H */ diff --git a/src/common/mfu_flist_remove.c b/src/common/mfu_flist_remove.c index 5d443389c..95437f406 100644 --- a/src/common/mfu_flist_remove.c +++ b/src/common/mfu_flist_remove.c @@ -390,13 +390,13 @@ void mfu_flist_unlink(mfu_flist flist, bool traceless) mfu_flist_array_by_depth(flist, &levels, &minlevel, &lists); mfu_flist pstatlist; uint64_t size = mfu_flist_size(flist); - const char** strings; + char** strings = NULL; /* if traceless, dump the stat of each item's pdir */ if (traceless) { uint64_t idx; - strings = (const char **) MFU_MALLOC(size * sizeof(char *)); + strings = (char **) MFU_MALLOC(size * sizeof(char *)); for (idx = 0; idx < size; idx++) { /* stat the item */ struct stat st; @@ -427,7 +427,7 @@ void mfu_flist_unlink(mfu_flist flist, bool traceless) uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes); uint64_t* group_rank = (uint64_t*) MFU_MALLOC(output_bytes); - DTCMP_Rankv_strings((int)size, strings, &groups, group_ids, group_ranks, + DTCMP_Rankv_strings((int)size, (const char**)strings, &groups, group_ids, group_ranks, group_rank, DTCMP_FLAG_NONE, MPI_COMM_WORLD); for (idx = 0; idx < size; idx++) { @@ -567,7 +567,7 @@ void mfu_flist_unlink(mfu_flist flist, bool traceless) times[0].tv_nsec = mfu_flist_file_get_atime_nsec(newlist, idx); times[1].tv_nsec = mfu_flist_file_get_mtime_nsec(newlist, idx); - if(utimensat(AT_FDCWD, pdir, times, AT_SYMLINK_NOFOLLOW) != 0) { + if(mfu_utimensat(AT_FDCWD, pdir, times, AT_SYMLINK_NOFOLLOW) != 0) { MFU_LOG(MFU_LOG_DBG, "Failed to changeback timestamps with utimesat() `%s' (errno=%d %s)", pdir, errno, strerror(errno)); diff --git a/src/common/mfu_flist_walk.c b/src/common/mfu_flist_walk.c index 5adac86d7..446c71903 100644 --- a/src/common/mfu_flist_walk.c +++ b/src/common/mfu_flist_walk.c @@ -170,7 +170,7 @@ static int lustre_mds_stat(int fd, char* fname, struct stat* sb) return ret; } -static void walk_lustrestat_process_dir(char* dir, CIRCLE_handle* handle) +static void walk_lustrestat_process_dir(const char* dir, CIRCLE_handle* handle) { /* TODO: may need to try these functions multiple times */ DIR* dirp = mfu_opendir(dir); @@ -443,7 +443,7 @@ static void walk_getdents_process(CIRCLE_handle* handle) * Walk directory tree using stat at top level and readdir ***************************************/ -static void walk_readdir_process_dir(char* dir, CIRCLE_handle* handle) +static void walk_readdir_process_dir(const char* dir, CIRCLE_handle* handle) { /* TODO: may need to try these functions multiple times */ DIR* dirp = mfu_opendir(dir); @@ -555,7 +555,7 @@ static void walk_readdir_create(CIRCLE_handle* handle) { uint64_t i; for (i = 0; i < CURRENT_NUM_DIRS; i++) { - char* path = CURRENT_DIRS[i]; + const char* path = CURRENT_DIRS[i]; /* stat top level item */ struct stat st; @@ -645,7 +645,7 @@ static void walk_stat_create(CIRCLE_handle* handle) for (i = 0; i < CURRENT_NUM_DIRS; i++) { /* we'll call stat on every item */ const char* path = CURRENT_DIRS[i]; - handle->enqueue(path); + handle->enqueue((char*)path); } } diff --git a/src/common/mfu_io.c b/src/common/mfu_io.c index 09cf9d81b..1eda885e8 100644 --- a/src/common/mfu_io.c +++ b/src/common/mfu_io.c @@ -1,14 +1,16 @@ -#include "mfu.h" +#include +#include +#include + +#include #include #include #include #include -#include #include -#include -#include -#include + +#include "mfu.h" #define MFU_IO_TRIES (5) #define MFU_IO_USLEEP (100) diff --git a/src/common/mfu_param_path.c b/src/common/mfu_param_path.c index 7165460cc..d4c6a48b7 100644 --- a/src/common/mfu_param_path.c +++ b/src/common/mfu_param_path.c @@ -719,7 +719,7 @@ void mfu_param_path_set_all(uint64_t num, const char** paths, mfu_param_path* pa /* unpack recv buffer into caller's params */ ptr = recvbuf; for (i = 0; i < num; i++) { - mfu_unpack_param(&ptr, ¶ms[i]); + mfu_unpack_param((const char**)(&ptr), ¶ms[i]); } /* Loop through the list of files &/or directories, and check the params diff --git a/src/dchmod/dchmod.c b/src/dchmod/dchmod.c index 4b085440f..8d4721301 100644 --- a/src/dchmod/dchmod.c +++ b/src/dchmod/dchmod.c @@ -145,7 +145,7 @@ int main(int argc, char** argv) paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ - char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ diff --git a/src/dcmp/dcmp.c b/src/dcmp/dcmp.c index d69192a57..97ed335a8 100644 --- a/src/dcmp/dcmp.c +++ b/src/dcmp/dcmp.c @@ -2078,7 +2078,7 @@ int main(int argc, char **argv) mfu_param_path* paths = (mfu_param_path*) MFU_MALLOC((size_t)numargs * sizeof(mfu_param_path)); /* process each path */ - const char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numargs, argpaths, paths); /* advance to next set of options */ @@ -2108,8 +2108,8 @@ int main(int argc, char **argv) const char* path2 = destpath->path; /* map files to ranks based on portion following prefix directory */ - mfu_flist flist3 = mfu_flist_remap(flist1, dcmp_map_fn, (const void*)path1); - mfu_flist flist4 = mfu_flist_remap(flist2, dcmp_map_fn, (const void*)path2); + mfu_flist flist3 = mfu_flist_remap(flist1, (mfu_flist_map_fn)dcmp_map_fn, (const void*)path1); + mfu_flist flist4 = mfu_flist_remap(flist2, (mfu_flist_map_fn)dcmp_map_fn, (const void*)path2); /* map each file name to its index and its comparison state */ strmap* map1 = dcmp_strmap_creat(flist3, path1); diff --git a/src/dcp/dcp.c b/src/dcp/dcp.c index 0ee60e2f4..263863cc5 100644 --- a/src/dcp/dcp.c +++ b/src/dcp/dcp.c @@ -24,14 +24,14 @@ static int input_flist_skip(const char* name, void *args) struct mfu_flist_skip_args *sk_args = (struct mfu_flist_skip_args *)args; /* create mfu_path from name */ - const mfu_path* path = mfu_path_from_str(name); + mfu_path* path = mfu_path_from_str(name); /* iterate over each source path */ int i; for (i = 0; i < sk_args->numpaths; i++) { /* create mfu_path of source path */ const char* src_name = sk_args->paths[i].path; - const mfu_path* src_path = mfu_path_from_str(src_name); + mfu_path* src_path = mfu_path_from_str(src_name); /* check whether path is contained within or equal to * source path and if so, we need to copy this file */ @@ -232,7 +232,7 @@ int main(int argc, char** argv) paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ - char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ diff --git a/src/dcp1/dcp1.c b/src/dcp1/dcp1.c index 4c84fecfe..8cbc05237 100644 --- a/src/dcp1/dcp1.c +++ b/src/dcp1/dcp1.c @@ -347,6 +347,7 @@ int main(int argc, \ }; /* Parse options */ + unsigned long long bytes; while((c = getopt_long(argc, argv, "cb:d:fhpusvk:", \ long_options, &option_index)) != -1) { switch(c) { @@ -464,21 +465,23 @@ int main(int argc, \ break; case 'k': - if (mfu_abtoull(optarg, &DCOPY_chunksize) != MFU_SUCCESS) { + if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (DCOPY_global_rank == 0) { fprintf(stderr, "Failed to convert -k: %s\n", optarg); DCOPY_exit(EXIT_FAILURE); } } + DCOPY_chunksize = (size_t)bytes; break; case 'b': - if (mfu_abtoull(optarg, &DCOPY_blocksize) != MFU_SUCCESS) { + if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (DCOPY_global_rank == 0) { fprintf(stderr, "Failed to convert -b: %s\n", optarg); DCOPY_exit(EXIT_FAILURE); } } - break; + DCOPY_blocksize = (size_t)bytes; + break; case '?': default: diff --git a/src/dreln/CMakeLists.txt b/src/dreln/CMakeLists.txt new file mode 100644 index 000000000..9ff3eca02 --- /dev/null +++ b/src/dreln/CMakeLists.txt @@ -0,0 +1 @@ +MFU_ADD_TOOL(dreln) diff --git a/src/dreln/Makefile.am b/src/dreln/Makefile.am deleted file mode 100644 index d161c9d72..000000000 --- a/src/dreln/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -include $(top_srcdir)/common.mk - -bin_PROGRAMS = dreln - -dreln_SOURCES = dreln.c -dreln_CPPFLAGS = -I../common/ $(MPI_CFLAGS) $(libcircle_CFLAGS) -dreln_LDFLAGS = $(MPI_CLDFLAGS) -dreln_LDADD = ../common/libmfu.la $(MPI_CLDFLAGS) $(libcircle_LIBS) diff --git a/src/dreln/dreln.c b/src/dreln/dreln.c index dfa9db53d..b6599d1a7 100644 --- a/src/dreln/dreln.c +++ b/src/dreln/dreln.c @@ -132,7 +132,7 @@ int main (int argc, char* argv[]) numpaths = argc - optind; /* process paths to be walked */ - const char** p = &argv[optind]; + const char** p = (const char**)(&argv[optind]); paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); mfu_param_path_set_all((uint64_t)numpaths, (const char**)p, paths); diff --git a/src/drm/drm.c b/src/drm/drm.c index 18b1d5804..d1ddd1a54 100644 --- a/src/drm/drm.c +++ b/src/drm/drm.c @@ -172,7 +172,7 @@ int main(int argc, char** argv) paths = (mfu_param_path*) MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path)); /* process each path */ - char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numpaths, argpaths, paths); /* advance to next set of options */ diff --git a/src/dstripe/dstripe.c b/src/dstripe/dstripe.c index ec05ee8d1..2738250f8 100644 --- a/src/dstripe/dstripe.c +++ b/src/dstripe/dstripe.c @@ -319,6 +319,7 @@ int main(int argc, char* argv[]) int verbose = 0; unsigned int numpaths = 0; mfu_param_path* paths = NULL; + unsigned long long bytes; /* default to 1MB stripe size, stripe across all OSTs, and all files are candidates */ int stripes = -1; @@ -349,23 +350,25 @@ int main(int argc, char* argv[]) break; case 's': /* stripe size in bytes */ - if (mfu_abtoull(optarg, &stripe_size) != MFU_SUCCESS) { + if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { printf("Failed to parse stripe size: %s\n", optarg); fflush(stdout); } MPI_Abort(MPI_COMM_WORLD, 1); } + stripe_size = (uint64_t)bytes; break; case 'm': /* min file size in bytes */ - if (mfu_abtoull(optarg, &min_size) != MFU_SUCCESS) { + if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS) { if (rank == 0) { printf("Failed to parse minimum file size: %s\n", optarg); fflush(stdout); } MPI_Abort(MPI_COMM_WORLD, 1); } + min_size = (uint64_t)bytes; break; case 'r': /* report striping info */ diff --git a/src/dsync/dsync.c b/src/dsync/dsync.c index 257c16418..c02bb7334 100644 --- a/src/dsync/dsync.c +++ b/src/dsync/dsync.c @@ -1191,7 +1191,7 @@ static int dsync_strmap_compare(mfu_flist src_list, if (!options.dry_run) { /* sync the files that are in the source and destination directories */ - tmp_rc = dsync_sync_files(src_map, dst_map, src_path, dest_path, dst_list, dst_remove_list, src_cp_list, mfu_copy_opts); + tmp_rc = dsync_sync_files(src_map, dst_map, (mfu_param_path*)src_path, (mfu_param_path*)dest_path, dst_list, dst_remove_list, src_cp_list, mfu_copy_opts); if (tmp_rc < 0) { rc = -1; } @@ -2272,7 +2272,7 @@ int main(int argc, char **argv) mfu_param_path* paths = (mfu_param_path*) MFU_MALLOC((size_t)numargs * sizeof(mfu_param_path)); /* process each path */ - const char** argpaths = &argv[optind]; + const char** argpaths = (const char**)(&argv[optind]); mfu_param_path_set_all(numargs, argpaths, paths); /* advance to next set of options */ @@ -2317,8 +2317,8 @@ int main(int argc, char **argv) const char* path2 = destpath->path; /* map files to ranks based on portion following prefix directory */ - mfu_flist flist3 = mfu_flist_remap(flist1, dsync_map_fn, (const void*)path1); - mfu_flist flist4 = mfu_flist_remap(flist2, dsync_map_fn, (const void*)path2); + mfu_flist flist3 = mfu_flist_remap(flist1, (mfu_flist_map_fn)dsync_map_fn, (const void*)path1); + mfu_flist flist4 = mfu_flist_remap(flist2, (mfu_flist_map_fn)dsync_map_fn, (const void*)path2); /* free original file lists */ mfu_flist_free(&flist1);