// Copyright 2019 Bloomberg Finance L.P
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <buildboxcommon_casclient.h>
#include <buildboxcommon_digestgenerator.h>
#include <buildboxcommon_direntwrapper.h>
#include <buildboxcommon_exception.h>
#include <buildboxcommon_fileutils.h>
#include <buildboxcommon_futuregroup.h>
#include <buildboxcommon_logging.h>
#include <buildboxcommon_merklize.h>
#include <buildboxcommon_stringutils.h>
#include <buildboxcommon_timeutils.h>

#include <algorithm>
#include <array>
#include <cerrno>
#include <chrono>
#include <cstddef>
#include <cstring>
#include <dirent.h>
#include <filesystem>
#include <fnmatch.h>
#include <future>
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <sys/stat.h>
#include <sys/types.h>
#include <system_error>
#include <ThreadPool.h>
#include <unistd.h>
#include <unordered_map>
#include <utility>
#include <vector>

/*
 * TODO: Remove following block when homebrew (or us) stop supporting MacOS13
 *
 * The following is to address the homebrew forumula build failure on MacOS13
 * after we started using std::unordered_map for std::filesystem::path.
 *
 * std::unordered_map requires its key type to be hashable and comparable for
 * equality. std::filesystem::path provides the operator==, but does not
 * provide a specialization of std::hash by default in all standard library
 * implementations.
 *
 * On macOS 13 Apple's libc++ implementatione std::hash<std::filesystem::path>
 * is not be defined. Define hash specialization for std::filesystem::path to
 * allow code to build on MacOS13
 */
#if defined(__APPLE__) && __clang_major__ < 15
namespace std {
template <> struct hash<std::filesystem::path> {
    std::size_t operator()(const std::filesystem::path &p) const noexcept
    {
        return std::filesystem::hash_value(p);
    }
};
} // namespace std
#endif

namespace buildboxcommon {

namespace {
Digest hashFile(int fd) { return DigestGenerator::hash(fd); }

void validateName(const std::string &name)
{
    if (name.empty() || name == "." || name == "..") {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            std::invalid_argument,
            "NestedDirectory: non-canonical path argument");
    }
}

// Wrap a directory in a path
void wrapDirectory(const std::filesystem::path &path, const Directory &leaf,
                   MerklizeResult *result)
{
    // collect path components
    std::vector<std::filesystem::path::const_iterator> components;
    for (auto pathIt = path.begin(); pathIt != path.end(); ++pathIt) {
        components.push_back(pathIt);
    }

    Digest current;
    {
        const auto leafBlob = leaf.SerializeAsString();
        const auto leafDigest = DigestGenerator::hash(leafBlob);
        result->d_digestToDirectoryBlob.emplace(leafDigest, leafBlob);
        result->d_digestToDirectory.emplace(leafDigest, leaf);
        current = leafDigest;
    }

    for (auto pathIt = components.crbegin(); pathIt != components.crend();
         ++pathIt) {
        const auto &component = *pathIt;

        DirectoryNode childNode;
        childNode.set_name(component->string());
        *childNode.mutable_digest() = current;
        Directory parent;
        *parent.add_directories() = std::move(childNode);
        const auto parentBlob = parent.SerializeAsString();
        const auto parentDigest = DigestGenerator::hash(parentBlob);
        result->d_digestToDirectoryBlob.emplace(parentDigest, parentBlob);
        result->d_digestToDirectory.emplace(parentDigest, parent);
        current = parentDigest;
    }

    result->d_rootDigest = current;
}

} // namespace

File::File(const char *path,
           const std::vector<std::string> &capture_properties,
           const UnixModeUpdater &unixModeUpdater,
           const std::map<std::string, std::string> &nodeProperties,
           bool allowChmodToRead)
    : File(path, hashFile, capture_properties, unixModeUpdater, nodeProperties,
           allowChmodToRead)
{
}

File::File(const char *path, const FileDigestFunction &fileDigestFunc,
           const std::vector<std::string> &capture_properties,
           const UnixModeUpdater &unixModeUpdater,
           const std::map<std::string, std::string> &nodeProperties,
           bool allowChmodToRead)
    : File(AT_FDCWD, path, fileDigestFunc, capture_properties, unixModeUpdater,
           nodeProperties, allowChmodToRead)
{
}

static std::pair<FileDescriptor, std::optional<mode_t>> handleUnreadableFile(
    int dirfd, const char *path, int flags,
    const std::function<FileDescriptor(int, const char *, int)> &openFunc)
{
    std::optional<mode_t> originalModeOverride;

    if (FileUtils::isRegularFileNoFollow(dirfd, path)) {
        const mode_t originalMode =
            FileUtils::getFileStat(dirfd, path).st_mode & 07777;
        if ((originalMode & S_IRUSR) == 0) {
            if (fchmodat(dirfd, path, originalMode | S_IRUSR, 0) == 0) {
                FileDescriptor fd(-1);
                try {
                    fd = openFunc(dirfd, path, flags);
                    if (fd.get() >= 0) {
                        originalModeOverride = originalMode;
                        fchmod(fd.get(), originalMode);
                    }
                    else {
                        fchmodat(dirfd, path, originalMode, 0);
                    }
                }
                catch (...) {
                    fchmodat(dirfd, path, originalMode, 0);
                    throw;
                }
                return std::make_pair(std::move(fd), originalModeOverride);
            }
            else {
                BUILDBOXCOMMON_THROW_SYSTEM_EXCEPTION(
                    std::system_error, errno, std::system_category,
                    "Failed to chmod path \"" << path
                                              << "\" to add read permissions");
            }
        }
    }

    return std::make_pair(FileDescriptor(-1), std::nullopt);
}

File::File(int dirfd, const char *path,
           const FileDigestFunction &fileDigestFunc,
           const std::vector<std::string> &capture_properties,
           const UnixModeUpdater &unixModeUpdater,
           const std::map<std::string, std::string> &nodeProperties,
           bool allowChmodToRead)
{
    FileDescriptor fd(openat(dirfd, path, O_RDONLY | O_CLOEXEC));
    std::optional<mode_t> originalModeOverride;

    if (fd.get() < 0 && errno == EACCES && allowChmodToRead) {
        auto [retry_fd, mode_override] =
            handleUnreadableFile(dirfd, path, O_RDONLY | O_CLOEXEC,
                                 [](int d, const char *p, int f) {
                                     return FileDescriptor(openat(d, p, f));
                                 });
        fd = std::move(retry_fd);
        originalModeOverride = mode_override;
    }

    if (fd.get() < 0) {
        BUILDBOXCOMMON_THROW_SYSTEM_EXCEPTION(
            std::system_error, errno, std::system_category,
            "Failed to open path \"" << path << "\"");
    }
    init(fd.get(), fileDigestFunc, capture_properties, unixModeUpdater,
         nodeProperties, originalModeOverride);
}

File::File(int fd, const std::vector<std::string> &capture_properties,
           const UnixModeUpdater &unixModeUpdater,
           const std::map<std::string, std::string> &nodeProperties,
           bool allowChmodToRead, const std::optional<mode_t> &modeOverride)
    : File(fd, hashFile, capture_properties, unixModeUpdater, nodeProperties,
           allowChmodToRead, modeOverride)
{
}

File::File(int fd, const FileDigestFunction &fileDigestFunc,
           const std::vector<std::string> &capture_properties,
           const UnixModeUpdater &unixModeUpdater,
           const std::map<std::string, std::string> &nodeProperties,
           bool /* allowChmodToRead */,
           const std::optional<mode_t> &modeOverride)
{
    init(fd, fileDigestFunc, capture_properties, unixModeUpdater,
         nodeProperties, modeOverride);
}

void File::init(int fd, const FileDigestFunction &fileDigestFunc,
                const std::vector<std::string> &capture_properties,
                const UnixModeUpdater &unixModeUpdater,
                const std::map<std::string, std::string> &nodeProperties,
                const std::optional<mode_t> &modeOverride)
{
    const auto mode = modeOverride.has_value()
                          ? modeOverride.value()
                          : (FileUtils::getUnixMode(fd) & 07777);
    d_executable = (mode & S_IXUSR) != 0;
    d_digest = fileDigestFunc(fd);

    for (const std::string &property : capture_properties) {
        if (property == "mtime") {
            const auto mtime = FileUtils::getFileMtime(fd);
            d_nodeProperties.mutable_mtime()->CopyFrom(
                TimeUtils::make_timestamp(mtime));
        }
        else if (property == "unix_mode") {
            const mode_t updated_mode =
                unixModeUpdater ? static_cast<mode_t>(unixModeUpdater(
                                      static_cast<mode_t>(mode)))
                                : static_cast<mode_t>(mode);
            d_nodeProperties.mutable_unix_mode()->set_value(
                static_cast<uint32_t>(updated_mode));
        }
    }

    for (const auto &propertyPair : nodeProperties) {
        NodeProperty property;
        property.set_name(propertyPair.first);
        property.set_value(propertyPair.second);
        *d_nodeProperties.add_properties() = property;
    }
}

FileNode File::to_filenode(const std::string &name) const
{
    FileNode result;
    result.set_name(name);
    *result.mutable_digest() = d_digest;
    result.set_is_executable(d_executable);

    if (d_nodeProperties.ByteSizeLong() > 0) {
        result.mutable_node_properties()->CopyFrom(d_nodeProperties);
    }

    return result;
}

bool NestedDirectory::getSubdirAndNameForAdd(const char *relativePath,
                                             NestedDirectory **subdir,
                                             std::string *name)
{
    std::string_view relativePathView(relativePath);
    size_t slashIndex = relativePathView.find_last_of('/');

    if (slashIndex != std::string_view::npos) {
        *name = relativePathView.substr(slashIndex + 1).data();

        const std::string subdirKey(relativePathView.substr(0, slashIndex));
        if (subdirKey.empty()) {
            // Create entry in current directory
            *subdir = this;
        }
        else {
            *subdir = tryAddDirectory(subdirKey.c_str());
            if (*subdir == nullptr) {
                return false;
            }
        }
    }
    else {
        // Create entry in current directory
        *subdir = this;
        *name = relativePathView.data();
    }

    validateName(*name);

    return true;
}

bool NestedDirectory::tryAddFile(const File &file, const char *relativePath)
{
    NestedDirectory *subdir = nullptr;
    std::string name;

    if (!getSubdirAndNameForAdd(relativePath, &subdir, &name)) {
        return false;
    }

    if (subdir->d_files.count(name) > 0) {
        // Fail if existing file is different
        return file == subdir->d_files[name];
    }
    else if (subdir->d_filePaths.count(name) > 0 ||
             subdir->d_symlinks.count(name) > 0 ||
             subdir->d_subdirs->count(name) > 0) {
        // Conflict with existing file, symlink or directory
        return false;
    }

    subdir->d_files[name] = file;
    return true;
}

void NestedDirectory::add(const File &file, const char *relativePath)
{
    if (!tryAddFile(file, relativePath)) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            std::runtime_error, "NestedDirectory: Cannot add file, path '"
                                    << relativePath << "' already exists");
    }
}

bool NestedDirectory::tryAddFilePath(const std::filesystem::path &path,
                                     const char *relativePath)
{
    NestedDirectory *subdir = nullptr;
    std::string name;

    const auto canonicalPath = std::filesystem::canonical(path);

    if (!getSubdirAndNameForAdd(relativePath, &subdir, &name)) {
        return false;
    }

    if (subdir->d_filePaths.count(name) > 0) {
        // Fail if existing file is different
        return canonicalPath == subdir->d_filePaths[name];
    }
    else if (subdir->d_files.count(name) > 0 ||
             subdir->d_symlinks.count(name) > 0 ||
             subdir->d_subdirs->count(name) > 0) {
        // Conflict with existing file, symlink or directory
        return false;
    }

    subdir->d_filePaths[name] = canonicalPath;
    return true;
}

void NestedDirectory::addFilePath(const std::filesystem::path &path,
                                  const char *relativePath)
{
    if (!tryAddFilePath(path, relativePath)) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            std::runtime_error, "NestedDirectory: Cannot add file, path '"
                                    << relativePath << "' already exists");
    }
}

bool NestedDirectory::tryAddSymlink(const std::string &target,
                                    const char *relativePath)
{
    NestedDirectory *subdir = nullptr;
    std::string name;

    if (!getSubdirAndNameForAdd(relativePath, &subdir, &name)) {
        return false;
    }

    if (subdir->d_symlinks.count(name) > 0) {
        // Fail if existing symlink has a different target
        return target == subdir->d_symlinks[name];
    }
    else if (subdir->d_files.count(name) > 0 ||
             subdir->d_filePaths.count(name) > 0 ||
             subdir->d_subdirs->count(name) > 0) {
        // Conflict with existing file or directory
        return false;
    }

    subdir->d_symlinks[name] = target;
    return true;
}

void NestedDirectory::addSymlink(const std::string &target,
                                 const char *relativePath)
{
    if (!tryAddSymlink(target, relativePath)) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            std::runtime_error,
            "NestedDirectory: Cannot add symlink, path already exists");
    }
}

NestedDirectory *NestedDirectory::tryAddDirectory(const char *directory)
{
    // A forward slash by itself is not a valid input directory
    if (strcmp(directory, "/") == 0) {
        return this;
    }
    std::string_view directoryView(directory);
    size_t slashIndex = directoryView.find('/');

    if (slashIndex != std::string_view::npos) {
        const std::string subdirKey(directoryView.substr(0, slashIndex));
        if (subdirKey.empty()) {
            return this->tryAddDirectory(
                directoryView.substr(slashIndex + 1).data());
        }
        else {
            validateName(subdirKey);
            if (d_files.count(subdirKey) > 0 ||
                d_filePaths.count(subdirKey) > 0 ||
                d_symlinks.count(subdirKey) > 0) {
                // Conflict with existing file or symlink
                return nullptr;
            }
            return (*d_subdirs)[subdirKey].tryAddDirectory(
                directoryView.substr(slashIndex + 1).data());
        }
    }
    else {
        const std::string name(directoryView);
        if ((*d_subdirs).count(name) == 0) {
            validateName(name);
            if (d_files.count(name) > 0 || d_filePaths.count(name) > 0 ||
                d_symlinks.count(name) > 0) {
                // Conflict with existing file or symlink
                return nullptr;
            }
            (*d_subdirs)[name] = NestedDirectory();
        }
        return &(*d_subdirs)[name];
    }
}

void NestedDirectory::addDirectory(const char *directory)
{
    if (tryAddDirectory(directory) == nullptr) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            std::runtime_error,
            "NestedDirectory: Cannot create directory, path already exists");
    }
}

void NestedDirectory::addNodeProperty(const std::string &name,
                                      const std::string &value)
{
    auto propertyPtr = d_nodeProperties.add_properties();
    propertyPtr->set_name(name);
    propertyPtr->set_value(value);
}

void NestedDirectory::addNodeProperties(
    const std::map<std::string, std::string> &nodeProperties)
{
    for (const auto &[name, value] : nodeProperties) {
        addNodeProperty(name, value);
    }
}

void NestedDirectory::hashFiles(ThreadPool *threadPool,
                                digest_string_map *digestToFilepaths)
{
    FutureGroup<File> futureGroup(threadPool);
    createFileFutures(&futureGroup);
    waitForFileFutures(digestToFilepaths);
}

void NestedDirectory::createFileFutures(FutureGroup<File> *futureGroup)
{
    for (const auto &filePathEntry : d_filePaths) {
        const auto &merklePath = filePathEntry.first;
        const auto &path = filePathEntry.second;
        auto fileLambda = [path]() {
            // This follows symlinks
            return File(path.c_str());
        };
        d_fileFutures.emplace(merklePath, futureGroup->add(fileLambda));
    }

    for (auto &subdirIter : *d_subdirs) {
        subdirIter.second.createFileFutures(futureGroup);
    }
}

void NestedDirectory::waitForFileFutures(digest_string_map *digestToFilepaths)
{
    for (const auto &[merklePath, future] : d_fileFutures) {
        d_files[merklePath] = future.get();
        (*digestToFilepaths)[d_files[merklePath].d_digest] =
            d_filePaths[merklePath];
    }
    d_fileFutures.clear();
    d_filePaths.clear();

    for (auto &subdirIter : *d_subdirs) {
        subdirIter.second.waitForFileFutures(digestToFilepaths);
    }
}

void NestedDirectory::captureFiles(CASClient *casClient,
                                   digest_string_map *digestToFilepaths)
{
    std::vector<std::string> paths;
    collectFilePaths(&paths);

    auto responses = casClient->captureFiles(
        paths, {}, false /* bypass_local_cache */, true /* skip_upload */);

    std::unordered_map<std::string, const CaptureFilesResponse_Response *>
        pathResponseMap;
    for (auto &response : responses) {
        const auto &status = response.status();
        if (status.code() == grpc::StatusCode::PERMISSION_DENIED) {
            // Fall back to in-process hashing in case buildbox-casd
            // was unable to access the input file.
            BUILDBOX_LOG_DEBUG("LocalCAS CaptureFiles fallback for \""
                               << response.path() << "\"");
            const auto file = File(response.path().c_str());
            response.mutable_digest()->CopyFrom(file.d_digest);
            response.set_is_executable(file.d_executable);
            *response.mutable_node_properties() = file.d_nodeProperties;
        }
        else if (status.code() != grpc::StatusCode::OK) {
            GrpcError::throwGrpcError(
                grpc::Status(static_cast<grpc::StatusCode>(status.code()),
                             status.message()));
        }
        pathResponseMap[response.path()] = &response;
        (*digestToFilepaths)[response.digest()] = response.path();
    }

    processResponse<CaptureFilesResponse_Response>(&pathResponseMap);
}

void NestedDirectory::hashFiles(CASClient *casClient,
                                digest_string_map *digestToFilepaths)
{
    std::vector<std::string> paths;
    collectFilePaths(&paths);

    auto responses = casClient->hashFiles(paths, {});

    std::unordered_map<std::string, const HashFilesResponse_Response *>
        pathResponseMap;
    for (auto &response : responses) {
        const auto &status = response.status();
        if (status.code() == grpc::StatusCode::PERMISSION_DENIED) {
            // Fall back to in-process hashing in case buildbox-casd
            // was unable to access the input file.
            BUILDBOX_LOG_DEBUG("LocalCAS HashFiles fallback for \""
                               << response.path() << "\"");
            const auto file = File(response.path().c_str());
            response.mutable_digest()->CopyFrom(file.d_digest);
            response.set_is_executable(file.d_executable);
            *response.mutable_node_properties() = file.d_nodeProperties;
        }
        else if (status.code() != grpc::StatusCode::OK) {
            GrpcError::throwGrpcError(
                grpc::Status(static_cast<grpc::StatusCode>(status.code()),
                             status.message()));
        }
        pathResponseMap[response.path()] = &response;
        (*digestToFilepaths)[response.digest()] = response.path();
    }

    processResponse<HashFilesResponse_Response>(&pathResponseMap);
}

void NestedDirectory::collectFilePaths(std::vector<std::string> *paths)
{
    for (const auto &filePathEntry : d_filePaths) {
        paths->push_back(filePathEntry.second);
    }

    for (auto &subdirIter : *d_subdirs) {
        subdirIter.second.collectFilePaths(paths);
    }
}

template <typename Response>
void NestedDirectory::processResponse(
    std::unordered_map<std::string, const Response *> *pathResponseMap)
{
    for (const auto &[merklePath, path] : d_filePaths) {
        const auto response = pathResponseMap->at(path);
        d_files[merklePath] =
            File(response->digest(), response->is_executable(),
                 response->node_properties());
    }
    d_filePaths.clear();

    for (auto &subdirIter : *d_subdirs) {
        subdirIter.second.processResponse<Response>(pathResponseMap);
    }
}

Digest NestedDirectory::to_digest(digest_string_map *digestMap) const
{
    if (!d_filePaths.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(std::logic_error,
                                       "NestedDirectory: Files need to be "
                                       "hashed before calling `to_digest()`");
    }

    // The 'd_files' and 'd_subdirs' maps make sure everything is sorted by
    // name thus the iterators will iterate lexicographically

    Directory directoryMessage;
    for (const auto &fileIter : d_files) {
        *directoryMessage.add_files() =
            fileIter.second.to_filenode(fileIter.first);
    }
    for (const auto &symlinkIter : d_symlinks) {
        SymlinkNode symlinkNode;
        symlinkNode.set_name(symlinkIter.first);
        symlinkNode.set_target(symlinkIter.second);
        *directoryMessage.add_symlinks() = symlinkNode;
    }
    for (const auto &subdirIter : *d_subdirs) {
        auto subdirNode = directoryMessage.add_directories();
        subdirNode->set_name(subdirIter.first);
        auto subdirDigest = subdirIter.second.to_digest(digestMap);
        *subdirNode->mutable_digest() = subdirDigest;
    }
    if (d_nodeProperties.ByteSizeLong() > 0) {
        directoryMessage.mutable_node_properties()->CopyFrom(d_nodeProperties);
    }

    auto blob = directoryMessage.SerializeAsString();
    auto digest = DigestGenerator::hash(blob);
    if (digestMap != nullptr) {
        (*digestMap)[digest] = blob;
    }
    return digest;
}

Tree NestedDirectory::to_tree() const
{
    if (!d_filePaths.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(std::logic_error,
                                       "NestedDirectory: Files need to be "
                                       "hashed before calling `to_tree()`");
    }

    Tree result;
    auto root = result.mutable_root();
    for (const auto &fileIter : d_files) {
        *root->add_files() = fileIter.second.to_filenode(fileIter.first);
    }
    for (const auto &symlinkIter : d_symlinks) {
        SymlinkNode symlinkNode;
        symlinkNode.set_name(symlinkIter.first);
        symlinkNode.set_target(symlinkIter.second);
        *root->add_symlinks() = symlinkNode;
    }
    for (const auto &subdirIter : *d_subdirs) {
        auto subtree = subdirIter.second.to_tree();
        result.mutable_children()->MergeFrom(subtree.children());
        *result.add_children() = subtree.root();
        auto subdirNode = root->add_directories();
        subdirNode->set_name(subdirIter.first);
        *subdirNode->mutable_digest() = DigestGenerator::hash(subtree.root());
    }
    if (d_nodeProperties.ByteSizeLong() > 0) {
        root->mutable_node_properties()->CopyFrom(d_nodeProperties);
    }
    return result;
}

void NestedDirectory::print(std::ostream &out,
                            const std::string &dirName) const
{
    out << "directory: \"" << dirName << "\"" << std::endl;

    const std::string prefix = dirName.empty() ? "" : dirName + "/";

    out << d_files.size() << " files" << std::endl;
    for (const auto &it : d_files) {
        const std::string path = prefix + it.first;
        out << "    \"" << path << "\"" << std::endl;
    }

    out << d_symlinks.size() << " symlinks" << std::endl;
    for (const auto &it : d_symlinks) {
        const std::string path = prefix + it.first;
        out << "    \"" << path << "\", \"" << it.second << "\"" << std::endl;
    }

    out << d_subdirs->size() << " sub-directories" << std::endl << std::endl;
    for (const auto &it : *d_subdirs) {
        const std::string path = prefix + it.first;
        it.second.print(out, path);
    }
}

std::ostream &operator<<(std::ostream &out, const NestedDirectory &obj)
{
    obj.print(out);
    return out;
}

bool IgnorePattern::operator==(const IgnorePattern &other) const
{
    return d_pattern == other.d_pattern &&
           d_matchBasenameOnly == other.d_matchBasenameOnly &&
           d_matchDirectoryOnly == other.d_matchDirectoryOnly;
}

IgnorePattern IgnorePattern::fromString(const std::string &s)
{
    std::string pattern = StringUtils::trim(s);
    const auto slashIdx = pattern.find('/');
    // If slash only occurs at the end
    const bool matchBasenameOnly =
        slashIdx == std::string::npos || slashIdx == s.size() - 1;
    StringUtils::ltrim(&pattern, [](char c) { return c == '/'; });
    const bool matchDirOnly = !s.empty() && s.back() == '/';
    StringUtils::rtrim(&pattern, [](char c) { return c == '/'; });

    if (pattern.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(std::invalid_argument,
                                       "Invalid IgnorePattern line: " << s);
    }
    return IgnorePattern(pattern, matchBasenameOnly, matchDirOnly);
}

std::ostream &operator<<(std::ostream &o, const IgnorePattern &ignorePattern)
{
    return o << "[pattern=" << ignorePattern.d_pattern
             << ", matchBasenameOnly=" << ignorePattern.d_matchBasenameOnly
             << ", matchDirectoryOnly=" << ignorePattern.d_matchDirectoryOnly
             << "]";
}

IgnoreMatcher::IgnoreMatcher(
    const std::string &pathPrefix,
    const std::shared_ptr<std::vector<IgnorePattern>> &ignorePatterns)
    : d_pathPrefix(pathPrefix), d_ignorePatterns(ignorePatterns)
{
}

bool IgnoreMatcher::match(const std::string &path, const int fnmatchFlags,
                          const bool isDirectory) const
{
    const auto relativePath = trimPrefix(path);
    for (const auto &pattern : *d_ignorePatterns) {
        if (pattern.d_matchDirectoryOnly && !isDirectory) {
            continue;
        }

        bool matched = false;
        if (pattern.d_matchBasenameOnly) {
            std::string filename =
                FileUtils::pathBasename(relativePath.c_str());
            // preserve the last slash given it's trimmed in `pathBasename`
            if (relativePath.back() == '/') {
                filename += '/';
            }
            matched = fnmatch(pattern.d_pattern.c_str(), filename.c_str(),
                              fnmatchFlags) == 0;
        }
        else {
            matched = fnmatch(pattern.d_pattern.c_str(), relativePath.c_str(),
                              fnmatchFlags | FNM_PATHNAME) == 0;
        }

        if (matched) {
            return true;
        }
    }
    return false;
}

std::string IgnoreMatcher::trimPrefix(const std::string &path) const
{
    if (d_pathPrefix.empty() || path.find(d_pathPrefix) == std::string::npos) {
        return path;
    }
    size_t startIdx = d_pathPrefix.size();
    // skip through beginning slashes
    while (startIdx != path.size() && path[startIdx] == '/') {
        startIdx++;
    }
    return path.substr(startIdx);
}

std::shared_ptr<std::vector<IgnorePattern>>
IgnoreMatcher::parseIgnorePatterns(std::istream &is)
{
    std::string line;
    auto result = std::make_shared<std::vector<IgnorePattern>>();
    while (std::getline(is, line)) {
        StringUtils::trim(&line);
        if (!line.empty()) {
            result->emplace_back(IgnorePattern::fromString(line));
        }
    }

    return result;
}

std::shared_ptr<std::vector<IgnorePattern>>
IgnoreMatcher::parseIgnorePatterns(const std::vector<std::string> &patterns)
{
    auto result = std::make_shared<std::vector<IgnorePattern>>();
    for (auto line : patterns) {
        StringUtils::trim(&line);
        if (!line.empty()) {
            result->emplace_back(IgnorePattern::fromString(line));
        }
    }
    return result;
}

bool IgnoreMatcher::operator==(const IgnoreMatcher &other) const
{
    return d_pathPrefix == other.d_pathPrefix &&
           ((d_ignorePatterns == nullptr &&
             other.d_ignorePatterns == nullptr) ||
            (d_ignorePatterns != nullptr &&
             other.d_ignorePatterns != nullptr &&
             *d_ignorePatterns == *other.d_ignorePatterns));
}

UnixModeUpdater unixModeMaskUpdater(mode_t mask)
{
    return [mask](mode_t mode) -> mode_t {
        mode &= (~mask);
        return mode;
    };
}

Merklizer::Merklizer(bool followSymlinks,
                     const std::vector<std::string> &captureProperties,
                     const std::shared_ptr<IgnoreMatcher> &ignoreMatcher,
                     ThreadPool *threadPool)
    : d_followSymlinks(followSymlinks), d_captureProperties(captureProperties),
      d_ignoreMatcher(ignoreMatcher), d_threadPool(threadPool)
{
}

MerklizeResult Merklizer::merklize(
    int rootDirFd, const std::string &pathPrefix,
    const FileDigestFunction &fileDigestFunc,
    const UnixModeUpdater &unixModeUpdater,
    const std::map<std::string, std::string> &rootNodeProperties,
    bool allowChmodToRead) const
{
    const auto sortedPaths = topologicalSort(rootDirFd);

    FutureGroup<File> futureGroup(d_threadPool);
    auto [fileFutures, symlinks] =
        visitFilesAndSymlinks(rootDirFd, sortedPaths, fileDigestFunc,
                              unixModeUpdater, &futureGroup, allowChmodToRead);
    std::unordered_map<std::filesystem::path, Digest> directories;

    const auto openFlags = getOpenFlags();
    const auto statFlags = getStatFlags();
    const auto &sortedDirectories = sortedPaths.d_directories;
    MerklizeResult result;
    // build the tree in sorted bottom-up order
    for (auto directoryPath = sortedDirectories.crbegin();
         directoryPath != sortedDirectories.crend(); directoryPath++) {
        Directory directory;
        // construct children nodes
        auto dir =
            DirentWrapper(rootDirFd, directoryPath->string(), openFlags);
        while (dir.entry() != nullptr) {
            const auto dirent = dir.entry();
            const auto entryPath =
                createEntryPath(*directoryPath, dirent->d_name);
            dir.next();

            struct stat entryStatus = {};
            if (fstatat(rootDirFd, entryPath.c_str(), &entryStatus,
                        statFlags) != 0) {
                continue;
            }

            if (S_ISREG(entryStatus.st_mode)) {
                if (!fileFutures.contains(entryPath)) {
                    continue;
                }
                auto file = fileFutures.at(entryPath).get();
                *directory.add_files() =
                    file.to_filenode(entryPath.filename().string());
                result.d_digestToPath.emplace(
                    file.d_digest,
                    pathPrefix.empty() ? entryPath : pathPrefix / entryPath);
            }
            else if (S_ISLNK(entryStatus.st_mode)) {
                if (!symlinks.contains(entryPath)) {
                    continue;
                }
                const auto &target = symlinks.at(entryPath);
                SymlinkNode symlinkNode;
                symlinkNode.set_name(entryPath.filename());
                symlinkNode.set_target(target);
                *directory.add_symlinks() = symlinkNode;
            }
            else if (S_ISDIR(entryStatus.st_mode)) {
                if (!directories.contains(entryPath)) {
                    continue;
                }
                auto subdirDigest = directories.at(entryPath);
                auto subdirNode = directory.add_directories();
                subdirNode->set_name(entryPath.filename());
                *subdirNode->mutable_digest() = subdirDigest;
            }
        }
        // set node properties
        for (const std::string &property : d_captureProperties) {
            if (property == "mtime") {
                const auto mtime = FileUtils::getFileMtime(dir.fd());
                directory.mutable_node_properties()->mutable_mtime()->CopyFrom(
                    TimeUtils::make_timestamp(mtime));
            }
            else if (property == "unix_mode") {
                const auto mode = static_cast<mode_t>(
                    FileUtils::getUnixMode(dir.fd()) & 07777);
                directory.mutable_node_properties()
                    ->mutable_unix_mode()
                    ->set_value(unixModeUpdater
                                    ? static_cast<mode_t>(unixModeUpdater(
                                          static_cast<mode_t>(mode)))
                                    : static_cast<mode_t>(mode));
            }
        }

        // Add root node properties
        if (*directoryPath == ".") {
            for (const auto &[name, value] : rootNodeProperties) {
                NodeProperty property;
                property.set_name(name);
                property.set_value(value);
                *directory.mutable_node_properties()->add_properties() =
                    property;
            }
        }

        // store directory node
        std::sort(directory.mutable_files()->begin(),
                  directory.mutable_files()->end(),
                  [](const FileNode &a, const FileNode &b) {
                      return a.name() < b.name();
                  });
        std::sort(directory.mutable_symlinks()->begin(),
                  directory.mutable_symlinks()->end(),
                  [](const SymlinkNode &a, const SymlinkNode &b) {
                      return a.name() < b.name();
                  });
        std::sort(directory.mutable_directories()->begin(),
                  directory.mutable_directories()->end(),
                  [](const DirectoryNode &a, const DirectoryNode &b) {
                      return a.name() < b.name();
                  });
        auto dirMessage = directory.SerializeAsString();
        Digest dirDigest = DigestGenerator::hash(dirMessage);
        result.d_digestToDirectoryBlob.emplace(dirDigest,
                                               std::move(dirMessage));
        result.d_digestToDirectory.emplace(dirDigest, std::move(directory));
        directories[*directoryPath] = std::move(dirDigest);
    }

    result.d_rootDigest = directories.at(".");
    return result;
}

std::pair<NestedDirectory, digest_string_map> Merklizer::makeNestedDirectory(
    int rootDirFd, const std::string &pathPrefix,
    const FileDigestFunction &fileDigestFunc,
    const UnixModeUpdater &unixModeUpdater,
    const std::map<std::string, std::string> &rootNodeProperties,
    bool allowChmodToRead) const
{
    const auto sortedPaths = topologicalSort(rootDirFd);

    FutureGroup<File> futureGroup(d_threadPool);
    auto [fileFutures, symlinks] =
        visitFilesAndSymlinks(rootDirFd, sortedPaths, fileDigestFunc,
                              unixModeUpdater, &futureGroup, allowChmodToRead);
    std::unordered_map<std::filesystem::path, NestedDirectory> directories;

    const auto openFlags = getOpenFlags();
    const auto statFlags = getStatFlags();
    const auto &sortedDirectories = sortedPaths.d_directories;
    digest_string_map digestToPath;

    // build the tree in sorted bottom-up order
    for (auto directoryPath = sortedDirectories.crbegin();
         directoryPath != sortedDirectories.crend(); directoryPath++) {
        NestedDirectory directory;
        // construct children nodes
        auto dir =
            DirentWrapper(rootDirFd, directoryPath->string(), openFlags);
        while (dir.entry() != nullptr) {
            const auto dirent = dir.entry();
            const auto entryPath =
                createEntryPath(*directoryPath, dirent->d_name);
            dir.next();

            struct stat entryStatus = {};
            if (fstatat(rootDirFd, entryPath.c_str(), &entryStatus,
                        statFlags) != 0) {
                continue;
            }

            if (S_ISREG(entryStatus.st_mode)) {
                if (!fileFutures.contains(entryPath)) {
                    continue;
                }
                auto file = fileFutures.at(entryPath).get();
                digestToPath.emplace(
                    file.d_digest,
                    pathPrefix.empty() ? entryPath : pathPrefix / entryPath);
                directory.d_files.emplace(entryPath.filename().string(),
                                          std::move(file));
            }
            else if (S_ISLNK(entryStatus.st_mode)) {
                if (!symlinks.contains(entryPath)) {
                    continue;
                }
                const auto &target = symlinks.at(entryPath);
                directory.d_symlinks.emplace(entryPath.filename().string(),
                                             target);
            }
            else if (S_ISDIR(entryStatus.st_mode)) {
                if (!directories.contains(entryPath)) {
                    continue;
                }
                directory.d_subdirs->emplace(
                    entryPath.filename().string(),
                    std::move(directories.at(entryPath)));
            }
        }
        // set node properties
        for (const std::string &property : d_captureProperties) {
            if (property == "mtime") {
                const auto mtime = FileUtils::getFileMtime(dir.fd());
                directory.d_nodeProperties.mutable_mtime()->CopyFrom(
                    TimeUtils::make_timestamp(mtime));
            }
            else if (property == "unix_mode") {
                const auto mode = static_cast<mode_t>(
                    FileUtils::getUnixMode(dir.fd()) & 07777);
                const mode_t updated_mode =
                    unixModeUpdater ? static_cast<mode_t>(unixModeUpdater(
                                          static_cast<mode_t>(mode)))
                                    : static_cast<mode_t>(mode);
                directory.d_nodeProperties.mutable_unix_mode()->set_value(
                    static_cast<uint32_t>(updated_mode));
            }
        }

        // Add root node properties
        if (*directoryPath == ".") {
            directory.addNodeProperties(rootNodeProperties);
        }

        directories[*directoryPath] = std::move(directory);
    }

    return {std::move(directories["."]), std::move(digestToPath)};
}

int Merklizer::getStatFlags() const
{
    return d_followSymlinks ? 0 : AT_SYMLINK_NOFOLLOW;
}

int Merklizer::getOpenFlags() const
{
    return O_RDONLY | O_CLOEXEC | (d_followSymlinks ? 0 : O_NOFOLLOW);
}

std::filesystem::path
Merklizer::createEntryPath(const std::filesystem::path &parent,
                           const std::filesystem::path &entry)
{
    return parent == "." ? entry : parent / entry;
}

std::pair<std::unordered_map<std::filesystem::path, std::shared_future<File>>,
          std::unordered_map<std::filesystem::path, std::string>>
Merklizer::visitFilesAndSymlinks(int rootDirFd,
                                 const TopoSortedTreePaths &sortedPaths,
                                 const FileDigestFunction &fileDigestFunc,
                                 const UnixModeUpdater &unixModeUpdater,
                                 FutureGroup<File> *futureGroup,
                                 bool allowChmodToRead) const
{
    const auto &[sortedFiles, sortedSymlinks, _] = sortedPaths;

    std::unordered_map<std::filesystem::path, std::shared_future<File>>
        fileFutures;
    std::unordered_map<std::filesystem::path, std::string> symlinks;
    const auto openFlags = getOpenFlags();
    for (auto filePathIt = sortedFiles.crbegin();
         filePathIt != sortedFiles.crend(); filePathIt++) {
        const std::filesystem::path &filePath = *filePathIt;
        auto fileLambda = [this, rootDirFd, filePath, openFlags,
                           &fileDigestFunc, &unixModeUpdater,
                           allowChmodToRead]() {
            if (d_followSymlinks) {
                return File(rootDirFd, filePath.c_str(), fileDigestFunc,
                            d_captureProperties, unixModeUpdater, {},
                            allowChmodToRead);
            }
            else {
                FileDescriptor fd(FileUtils::openInRoot(
                    rootDirFd, filePath.c_str(), openFlags));
                std::optional<mode_t> originalModeOverride;

                if (fd.get() < 0 && errno == EACCES && allowChmodToRead) {
                    auto [retry_fd, mode_override] = handleUnreadableFile(
                        rootDirFd, filePath.c_str(), openFlags,
                        [](int d, const char *p, int f) {
                            return FileDescriptor(
                                FileUtils::openInRoot(d, p, f));
                        });
                    fd = std::move(retry_fd);
                    originalModeOverride = mode_override;
                }

                if (fd.get() < 0) {
                    BUILDBOXCOMMON_THROW_SYSTEM_EXCEPTION(
                        std::system_error, errno, std::system_category,
                        "Failed to open path \"" << filePath << "\"");
                }
                return File(fd.get(), fileDigestFunc, d_captureProperties,
                            unixModeUpdater, {}, allowChmodToRead,
                            originalModeOverride);
            }
        };
        fileFutures.emplace(filePath, futureGroup->add(fileLambda));
    }

    for (auto symlinkPath = sortedSymlinks.crbegin();
         symlinkPath != sortedSymlinks.crend(); symlinkPath++) {
        std::array<char, PATH_MAX> target = {};
        const ssize_t targetSize = readlinkat(
            rootDirFd, symlinkPath->c_str(), target.data(), target.size() - 1);
        if (targetSize < 0) {
            BUILDBOXCOMMON_THROW_SYSTEM_EXCEPTION(
                std::system_error, errno, std::system_category,
                "Error reading symlink at \"" << *symlinkPath << "\"");
        }
        symlinks.emplace(*symlinkPath, target.data());
    }

    return std::make_pair(std::move(fileFutures), std::move(symlinks));
}

Merklizer::TopoSortedTreePaths Merklizer::topologicalSort(int rootDirFd) const
{
    TopoSortedTreePaths sortedPaths;
    const auto statFlags = getStatFlags();
    const auto openDirFlags = getOpenFlags();
    std::deque<std::filesystem::path> queue;
    queue.emplace_back(".");

    while (!queue.empty()) {
        const auto child = queue.front();
        queue.pop_front();

        struct stat statResult = {};
        if (fstatat(rootDirFd, child.c_str(), &statResult, statFlags) != 0) {
            continue;
        }

        // Skip if the path is ignored
        if (d_ignoreMatcher != nullptr && child != "." &&
            d_ignoreMatcher->match(child, 0, S_ISDIR(statResult.st_mode))) {
            continue;
        }

        if (S_ISREG(statResult.st_mode)) {
            sortedPaths.d_files.emplace_back(child);
        }
        else if (S_ISLNK(statResult.st_mode)) {
            sortedPaths.d_symlinks.emplace_back(child);
        }
        else if (S_ISDIR(statResult.st_mode)) {
            auto dir = DirentWrapper(rootDirFd, child.c_str(), openDirFlags);
            while (dir.entry() != nullptr) {
                const auto dirent = dir.entry();
                queue.emplace_back(createEntryPath(child, dirent->d_name));
                dir.next();
            }
            sortedPaths.d_directories.emplace_back(child);
        }
        else {
            BUILDBOX_LOG_DEBUG("Unsupported file type at path="
                               << child << " mode=" << statResult.st_mode);
        }
    }

    return sortedPaths;
}

MerklizeResult Merklizer::remapFile(const File &file,
                                    const std::filesystem::path &targetPath)
{
    const auto filename = targetPath.filename();
    const auto parentPath = targetPath.parent_path();

    Directory leafDir;
    *leafDir.add_files() = file.to_filenode(filename);

    MerklizeResult result;
    wrapDirectory(parentPath, leafDir, &result);
    return result;
}

MerklizeResult Merklizer::remapSymlink(const std::string &linkTarget,
                                       const std::filesystem::path &targetPath)
{
    const auto filename = targetPath.filename();
    const auto parentPath = targetPath.parent_path();

    SymlinkNode symlinkNode;
    symlinkNode.set_name(filename.string());
    symlinkNode.set_target(linkTarget);

    Directory leafDir;
    *leafDir.add_symlinks() = std::move(symlinkNode);

    MerklizeResult result;
    wrapDirectory(parentPath, leafDir, &result);
    return result;
}

MerklizeResult
Merklizer::remapDirectory(MerklizeResult &&srcResult,
                          const std::filesystem::path &targetPath)
{
    const auto filename = targetPath.filename();
    const auto parentPath = targetPath.parent_path();

    DirectoryNode childNode;
    childNode.set_name(filename.string());
    *childNode.mutable_digest() = srcResult.d_rootDigest;
    Directory leafDir;
    *leafDir.add_directories() = std::move(childNode);

    auto newResult = std::move(srcResult);
    wrapDirectory(parentPath, leafDir, &newResult);
    return newResult;
}

Tree MerklizeResult::tree() const
{
    Tree result;
    *result.mutable_root() = d_digestToDirectory.at(d_rootDigest);
    for (const auto &[digest, dir] : d_digestToDirectory) {
        if (digest != d_rootDigest) {
            result.add_children()->CopyFrom(dir);
        }
    }

    return result;
}

} // namespace buildboxcommon
