Skip to content

Commit

Permalink
Merge branch 'main' of github.com:tamatebako/libdwarfs into main
Browse files Browse the repository at this point in the history
  • Loading branch information
maxirmx committed Aug 5, 2024
2 parents 536101a + e3ec23b commit e283935
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 73 deletions.
16 changes: 7 additions & 9 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ jobs:
fail-fast: false
matrix:
env:
- { os: 'macos-12', arch: 'x86_64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '13.1' }
- { os: 'macos-14', arch: 'arm64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '14.3.1' }
- { os: 'macos-14', arch: 'arm64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '15.4' }
- { os: 'macos-12', arch: 'x86_64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '13.1' }
- { os: 'macos-14', arch: 'arm64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '14.3.1', LG_VADDR: 39 }
- { os: 'macos-14', arch: 'arm64', ASAN: 'OFF', COVERAGE: 'OFF', LOG_LEVEL: 'warn', xcode: '15.4' }

env: ${{ matrix.env }}

Expand Down Expand Up @@ -106,15 +106,13 @@ jobs:
run: brew install zlib

- name: Install packages
# Already installed: openssl, libevent, libsodium, lz4, xz
# Not installing [comparing to ubuntu as a baseline] libiberty, libunwind, libdwarf, libelf
run: brew install bison flex gnu-sed bash double-conversion boost jemalloc fmt glog
# Already installed: openssl, libevent, libsodium, lz4, xz
# Not installing [comparing to ubuntu as a baseline] libiberty, libunwind, libdwarf, libelf
run: brew install bison flex gnu-sed bash double-conversion boost jemalloc fmt glog googletest

- name: Configure
run: |
xcodebuild -find clang
xcrun --show-sdk-path
cmake -B build -DWITH_TESTS=ON -DWITH_ASAN=${{ env.ASAN }} -DWITH_COVERAGE=${{ env.COVERAGE }} -DTESTS_LOG_LEVEL=${{ env.LOG_LEVEL }}
cmake -B build -DPREFER_SYSTEM_GTEST=ON -DWITH_TESTS=ON -DWITH_ASAN=${{ env.ASAN }} -DWITH_COVERAGE=${{ env.COVERAGE }} -DTESTS_LOG_LEVEL=${{ env.LOG_LEVEL }}
- name: Build
run: cmake --build build --parallel "$CORES"
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/windows-msys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,15 @@ jobs:
- name: Run additional tests
run: tests/scripts/tests.sh

- name: Pack
run: cpack --config build/CPackConfig.cmake

- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: libdwarfs-wr
path: '*.7z'
retention-days: 1
# - name: Pack
# run: cpack --config build/CPackConfig.cmake

# - name: Upload artifacts
# uses: actions/upload-artifact@v4
# with:
# name: libdwarfs-wr
# path: '*.7z'
# retention-days: 1

publish:
name: Publish artifacts
Expand Down
47 changes: 36 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ cmake_policy(SET CMP0048 NEW)
cmake_policy(SET CMP0091 NEW)
# DOWNLOAD_EXTRACT_TIMESTAMP option default = true
cmake_policy(SET CMP0135 NEW)
# FindBoost moudule
# Pls refer to the comment re 'set(Boost_NO_BOOST_CMAKE ON)' below
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.0")
cmake_policy(SET CMP0167 OLD)
endif(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.0")

include(tools/cmake-scripts/version.cmake)
determine_version("${CMAKE_CURRENT_SOURCE_DIR}" LIBDWARFS_WR)
Expand Down Expand Up @@ -131,6 +136,10 @@ endif(NOT CMAKE_BUILD_TYPE)
# Reasonable defaults
set(USE_JEMALLOC ON)
set(WITH_JEMALLOC_BUILD OFF)
set(JEMALLOC_LG_VADDR "")
set(JEMALLOC_LG_VADDR_MSG "")
set(JEMALLOC_LG_PAGE "")
set(JEMALLOC_LG_PAGE_MSG "")
set(WITH_GLOG_BUILD OFF)
set(WITH_GFLAGS_BUILD OFF)
set(WITH_DC_BUILD OFF)
Expand Down Expand Up @@ -186,10 +195,20 @@ else()
set(WITH_BROTLI_BUILD ON)
endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
elseif("${OSTYPE_TXT}" MATCHES "^darwin.*")
message(STATUS "jemalloc homebrew library is not good enough, building locally")
set(IS_DARWIN ON)
# To be removed
# set(WITH_JEMALLOC_BUILD ON)
message(STATUS "Building jemalloc library locally")
set(WITH_JEMALLOC_BUILD ON)
if(DEFINED ENV{LG_VADDR})
set(JEMALLOC_LG_VADDR --with-lg-vaddr=$ENV{LG_VADDR})
set(JEMALLOC_LG_VADDR_MSG ", lg_vaddr = $ENV{LG_VADDR}")
message(STATUS "Using LG_VADDR environment variable: ${JEMALLOC_LG_VADDR}")
endif(DEFINED ENV{LG_VADDR})
if(DEFINED ENV{LG_PAGE})
set(JEMALLOC_LG_PAGE --with-lg-page=$ENV{LG_PAGE})
set(JEMALLOC_LG_PAGE_MSG ", lg_page = $ENV{LG_PAGE}")
message(STATUS "Using LG_PAGE environment variable: ${JEMALLOC_LG_PAGE}")
endif(DEFINED ENV{LG_PAGE})

endif()
endif(MSVC)

Expand All @@ -209,9 +228,9 @@ else(NOT IS_WINDOWS AND NOT IS_MSYS)
set(RB_W32 ON)
endif(NOT IS_WINDOWS AND NOT IS_MSYS)
# -----------------------------------------------------------------------------
# - "darwin.*" will have IS_MUSL flag set
# - "darwin.*" will have IS_DARWIN flag set
# -----------------------------------------------------------------------------
# - "linux-musl.*" will have IS_DARWIN flag set
# - "linux-musl.*" will have IS_MUSL flag set
# -----------------------------------------------------------------------------
# - ... everything else ... will be be treated as linux-gnu but in reality
# only Ubuntu is supported/tested
Expand All @@ -227,7 +246,7 @@ message(STATUS "Tebako build scope: ${TEBAKO_BUILD_SCOPE}")
if (IS_WINDOWS)
message(STATUS "Building for Ruby Win32: ${RB_W32}")
endif(IS_WINDOWS)
message(STATUS "Use jemalloc: ${USE_JEMALLOC}")
message(STATUS "Use jemalloc: ${USE_JEMALLOC}${JEMALLOC_LG_VADDR_MSG}${JEMALLOC_LG_PAGE_MSG}")
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS " with ASAN: ${WITH_ASAN}")
message(STATUS " with coverage: ${WITH_COVERAGE}")
Expand Down Expand Up @@ -284,7 +303,7 @@ if(NOT IS_WINDOWS)
message(STATUS "glog static library was not found, building locally")
set(WITH_GLOG_BUILD ON)
if(IS_DARWIN)
message(STATUS "building gflags locally as well because os MacOS specifics")
message(STATUS "building gflags locally as well because of MacOS specifics")
set(WITH_GFLAGS_BUILD ON)
endif(IS_DARWIN)
endif()
Expand Down Expand Up @@ -669,6 +688,7 @@ if(WITH_JEMALLOC_BUILD)
--disable-shared
--datarootdir=${DEPS}/share.dummy
--disable-syscall
${JEMALLOC_LG_VADDR} ${JEMALLOC_LG_PAGE}
SOURCE_DIR ${JEMALLOC_SOURCE_DIR}
BINARY_DIR ${JEMALLOC_BINARY_DIR}
BUILD_BYPRODUCTS ${__LIBJEMALLOC}
Expand Down Expand Up @@ -866,7 +886,10 @@ if(WITH_TESTS)
@ONLY
)

if(NOT PREFER_SYSTEM_GTEST)
if(PREFER_SYSTEM_GTEST)
find_package(GTest REQUIRED)
set(GTestMain GTest::Main)
else(PREFER_SYSTEM_GTEST)
message(STATUS "Fetching googletest")
include(FetchContent)

Expand All @@ -882,7 +905,9 @@ if(WITH_TESTS)

set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
endif(NOT PREFER_SYSTEM_GTEST)

set(GTestMain gtest_main)
endif(PREFER_SYSTEM_GTEST)

enable_testing()
include(GoogleTest)
Expand Down Expand Up @@ -1176,7 +1201,7 @@ endif(RB_W32)
list(APPEND _LIBRARIES ${Boost_LIBRARIES} ${_LIBEVENT})

if(IS_DARWIN)
list(APPEND _LIBRARIES ${_LIBZ} ${_LIBLZMA} c++ c++abi)
list(APPEND _LIBRARIES ${_LIBZ} ${_LIBLZMA} c++abi)
else(IS_DARWIN)
# libgcc_eh shall precede libunwind to aboid duplicate symbols during linking
# liblzma shall follow libunwind because it is libunwind dependency
Expand All @@ -1197,7 +1222,7 @@ endif(RB_W32)
endif(IS_DARWIN)

target_link_libraries(wr-bin ${_LIBRARIES})
target_link_libraries(wr-tests gtest_main gtest ${_LIBRARIES})
target_link_libraries(wr-tests ${GTestMain} ${_LIBRARIES})

if(WITH_JEMALLOC_BUILD)
add_dwarfs_library(_LIBJEMALLOC ${JEMALLOC_PRJ} wr-bin)
Expand Down
47 changes: 39 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,42 @@ This is libdwarfs-wr - a wrapper for https://github.com/mhx/dwarfs core library
* C interface (as opposed to dwarfs C++ API)
* fd (file descriptor) addressing above dwarfs inode implementation

### CMake project options

* **WITH_TESTS**, default:ON -- If this option is ON, build script looks for Google test, installs INCBIN, and build google tests and static test application.
* **WITH_ASAN**, default:ON -- If this option is ON, address amd memory sanitizer tests are performed.
* **WITH_COVERAGE**, default:ON -- If this option is ON, test coverage analysis is perfornmed using codecov.
* **RB_W32**, default:OFF -- If this option is ON, the version integrated with Ruby library is built.
* **WITH_LINK_TEST**, default:ON -- If this option is ON, symbolic/hard link tests are enabled.
* **USE_TEMP_FS**, default:OFF -- If this option is ON, the data for test file system is created under /tmp. Otherwise in-source location is used.

### CMake Project Options

* **WITH_TESTS**, default: ON -- If this option is ON, the build script looks for Google Test, installs INCBIN, and builds Google tests and a static test application.
* **WITH_ASAN**, default: ON -- If this option is ON, address and memory sanitizer tests are performed.
* **WITH_COVERAGE**, default: ON -- If this option is ON, test coverage analysis is performed using Codecov.
* **RB_W32**, default: OFF -- If this option is ON, the version integrated with the Ruby library is built.
* **WITH_LINK_TEST**, default: ON -- If this option is ON, symbolic/hard link tests are enabled.
* **USE_TEMP_FS**, default: OFF -- If this option is ON, the data for the test file system is created under /tmp. Otherwise, an in-source location is used.

### jemalloc Library Build on macOS

The `libdwarfs` build script creates an additional jemalloc installation on macOS. This is done to satisfy the magic applied by folly during linking but uses a static library.
If the library is created in an emulated environment (QEMU, Rosetta, etc.), there are known issues ([jemalloc issue #1997](https://github.com/jemalloc/jemalloc/issues/1997)) where jemalloc incorrectly defines the number of significant virtual address bits (lg-vaddr parameter).

These issues can be fixed by explicitly setting the `--with-lg-vaddr` parameter for the jemalloc build. We decided not to automate this since we do not feel that we can provide reasonable test coverage. Instead, our build script accepts the `LG_VADDR` environment variable and passes it to the jemalloc build as `--with-lg-vaddr=${LG_VADDR}`.

Simple script to set `LG_VADDR`. Please note that it is provided for illustration only.

```bash
#!/bin/bash

# Check the CPU architecture
ARCH=$(uname -m)

# Check if running under Rosetta 2 emulation
if [[ "$ARCH" == "x86_64" && $(sysctl -n sysctl.proc_translated) == "1" ]]; then
echo "Running on Apple Silicon under Rosetta 2 emulation"
export LG_VADDR=39
elif [[ "$ARCH" == "arm64" ]]; then
echo "Running on Apple Silicon"
export LG_VADDR=39
else
echo "Running on Intel Silicon"
export LG_VADDR=48
fi

echo "Setting lg-vaddr to $LG_VADDR"
```
116 changes: 81 additions & 35 deletions src/tebako-fd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,26 +289,48 @@ ssize_t sync_tebako_fdtable::readv(int vfd,
const struct iovec* iov,
int iovcnt) noexcept
{
uint32_t ino;
off_t pos;
// Some specific error conditions:
// EOVERFLOW - the resulting file offset cannot be represented in an off_t.
// EINVAL - the sum of the iov_len values overflows an ssize_t value.
// EINVAL - the vector count, iovcnt, is less than zero or greater
// than the permitted maximum.

ssize_t ret = DWARFS_INVALID_FD;
auto p_fdtable = *rlock();
auto p_fd = p_fdtable->find(vfd);
if (p_fd != p_fdtable->end()) {
ret = 0;
for (int i = 0; i < iovcnt; ++i) {
ssize_t ssize =
dwarfs_inode_read(p_fd->second->st.st_ino, iov[i].iov_base,
iov[i].iov_len, p_fd->second->pos);
if (ssize > 0) {
p_fd->second->pos += ssize;
ret += ssize;
}
else {
if (ssize < 0) {
ret = DWARFS_IO_ERROR;
if (iovcnt < 0) {
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
}
else {
uint32_t ino;
off_t pos;
auto p_fdtable = *rlock();
auto p_fd = p_fdtable->find(vfd);
if (p_fd != p_fdtable->end()) {
ret = 0;
for (int i = 0; i < iovcnt; ++i) {
ssize_t ssize =
dwarfs_inode_read(p_fd->second->st.st_ino, iov[i].iov_base,
iov[i].iov_len, p_fd->second->pos);
if (ssize > 0) {
if (p_fd->second->pos > std::numeric_limits<off_t>::max() - ssize) {
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
ret = DWARFS_IO_ERROR;
break;
}
if (ret > std::numeric_limits<ssize_t>::max() - ssize) {
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
break;
}
p_fd->second->pos += ssize;
ret += ssize;
}
else {
if (ssize < 0) {
ret = DWARFS_IO_ERROR;
}
break;
}
break;
}
}
}
Expand All @@ -323,38 +345,62 @@ off_t sync_tebako_fdtable::lseek(int vfd, off_t offset, int whence) noexcept
if (p_fd != p_fdtable->end()) {
switch (whence) {
case SEEK_SET:
ret = p_fd->second->pos = offset;
if (offset < 0) {
// [EINVAL] The resulting file offset would be negative for a regular
// file, block special file, or directory.
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
}
else {
ret = p_fd->second->pos = offset;
}
break;
case SEEK_CUR:
ret = p_fd->second->pos = p_fd->second->pos + offset;
if (ret < 0) {
// [EOVERFLOW] The resulting file offset would be a value which cannot
// be represented correctly in an object of type off_t.
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
if (offset < 0 && p_fd->second->pos < -offset) {
// [EINVAL] The resulting file offset would be negative for a regular
// file, block special file, or directory.
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
}
else {
if (offset > 0 &&
p_fd->second->pos > std::numeric_limits<off_t>::max() - offset) {
// [EOVERFLOW] The resulting file offset would be a value which
// cannot be represented correctly in an object of type off_t.
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
ret = DWARFS_IO_ERROR;
}
else {
ret = p_fd->second->pos = p_fd->second->pos + offset;
}
}
break;
case SEEK_END:
ret = p_fd->second->pos = p_fd->second->st.st_size + offset;
if (ret < 0) {
// [EOVERFLOW] The resulting file offset would be a value which cannot
// be represented correctly in an object of type off_t.
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
if (offset < 0 && p_fd->second->st.st_size < -offset) {
// [EINVAL] The resulting file offset would be negative for a regular
// file, block special file, or directory.
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
}
else {
if (offset > 0 && p_fd->second->st.st_size >
std::numeric_limits<off_t>::max() - offset) {
// [EOVERFLOW] The resulting file offset would be a value which
// cannot be represented correctly in an object of type off_t.
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
ret = DWARFS_IO_ERROR;
}
else {
ret = p_fd->second->pos = p_fd->second->st.st_size + offset;
}
}
break;
default:
// [EINVAL] The whence argument is not a proper value, or the resulting
// file offset would be negative for a regular file, block special file,
// or directory.
TEBAKO_SET_LAST_ERROR(EINVAL);
ret = DWARFS_IO_ERROR;
break;
}
if (ret < 0) {
TEBAKO_SET_LAST_ERROR(EOVERFLOW);
ret = DWARFS_IO_ERROR;
}
}
return ret;
}
Expand Down
Loading

0 comments on commit e283935

Please sign in to comment.