A multithreaded implementation of the ‘which’ command

Here is an implementation of the ‘which‘ command which can tell where programs are located.

/* SPDX-FileCopyrightText: 2021-2022 John Scott <jscott@posteo.net>
 * SPDX-License-Identifier: GPL-3.0-or-later */

/* We do not support the obsolete extension where an
 * omitted directory name is interpreted as the current
 * working directory. In $PATH = "/usr::/bin:", the lack
 * of a path in the middle or one at the end is simply ignored. */

#define _XOPEN_SOURCE 700
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <pthread.h>
#include <search.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#if defined(_POSIX_ADVISORY_INFO) && _POSIX_ADVISORY_INFO > -1
#include <sys/mman.h>
#endif

/* A NULL-terminated list of directories in PATH. */
static char **list;

/* A NULL-terminated list of the names of programs to be found.
 * Neither the list nor the strings are dynamically allocated! */
static char **progs;

/* We need a way to tell which threads are running at any given time
 * so we know which ones we can send cancellation requests to.
 * This is a boolean list indicating whether a given thread is
 * running or not. thread_is_running[0] will correspond to the
 * first child we create, thread_is_running[1] to the second,
 * and so on. */
static bool *thread_is_running;

/* This condition is broadcasted whenever a thread is about to
 * terminate. */
static pthread_cond_t thread_dying = PTHREAD_COND_INITIALIZER;

/* This mutex serves multiple purposes. It not only protects
 * thread_is_running from data races, but is also used in
 * tandem with thread_dying. This mutex also has the effect
 * that, if it's locked in the main thread, then child threads
 * will be waiting to acquire this mutex before they bail out,
 * so in effect which threads are running will be frozen. This
 * allows for safely sending cancellation requests to running threads. */
static pthread_mutex_t thread_guard = PTHREAD_MUTEX_INITIALIZER;

/* This is a list of groups we are in. Its lifetime is managed by main(). */
static gid_t *groups;
static int groupcount;

static void *reallocarray(void *p, size_t m, size_t n) {
    if(n && m > SIZE_MAX / n) {
        errno = ENOMEM;
        return NULL;
    }
    return realloc(p, m * n);
}

static int giddiff(const void *a, const void *b) {
    gid_t gid_a = *(const gid_t*)a;
    gid_t gid_b = *(const gid_t*)b;
    /* We do not simply return gid_a - gid_b, because that
     * bears the risk of overflow if gid_t is a signed type. */
    if(gid_a > gid_b) {
        return 1;
    } else if(gid_a < gid_b) {
        return -1;
    } else {
        return 0;
    }
}

static int boolcmp(const void *a, const void *b) {
    return *(const bool*)a != *(const bool*)b;
}

static void stop_running(void *i) {
    if(pthread_mutex_lock(&thread_guard)) {
        abort();
    }
    assert(thread_is_running[(intptr_t)i]);
    thread_is_running[(intptr_t)i] = false;
    if(pthread_cond_broadcast(&thread_dying) || pthread_mutex_unlock(&thread_guard)) {
        abort();
    }
}

static bool file_is_executable(const char filename[restrict static 1]) {
    struct stat st;
    if(stat(filename, &st) == -1 || !S_ISREG(st.st_mode)) {
        return false;
    }

    if(st.st_uid == geteuid()) {
        return (st.st_mode & S_IXUSR) ? true : false;
    }

    if(bsearch(&st.st_gid, groups, groupcount, sizeof(*groups), giddiff)) {
        return (st.st_mode & S_IXGRP) ? true : false;
    }

    return (st.st_mode & S_IXOTH) ? true : false;
}

/* For the n'th program, where n is type-punned from arg,
 * return a dynamically allocated pathname articulating where
 * it can be found, or NULL if that can't be done. */
static void *find_program(void *arg) {
    const int my_seq_thread_id = (intptr_t)arg;
    int k = pthread_mutex_lock(&thread_guard);
    if(k) {
        errno = k;
        perror("Failed to lock mutex");
        abort();
    }
    assert(!thread_is_running[my_seq_thread_id]);
    thread_is_running[my_seq_thread_id] = true;
    k = pthread_mutex_unlock(&thread_guard);
    if(k) {
        abort();
    }

    /* These have to be declared outside of our cleanup handler calls. */
    char *pathname;
    bool pathname_found = false;
    /* We hit no cancellation points between setting thread_is_running[j]
     * to true and pushing this handler, since pthread_mutex_unlock is not one. */
    pthread_cleanup_push(stop_running, (void*)(intptr_t)my_seq_thread_id);

    if(strchr(progs[my_seq_thread_id], '/')) {
        /* We were either given an absolute pathname, or a relative
         * path that must be evaluated with respect to the current
         * working directory, which must not use prefixes from PATH. */
        if(file_is_executable(progs[my_seq_thread_id])) {
            /* The caller expects that the string we return is dynamically allocated. */
            pathname = strdup(progs[my_seq_thread_id]);
            if(!pathname) {
                perror("Failed to duplicate string");
            }
            pthread_exit(pathname);
        } else {
            pthread_exit(NULL);
        }
    }

    /* It's not an absolute pathname; try prefixing the string
     * with strings from PATH and see what sticks. */
    const size_t filenamelen = strlen(progs[my_seq_thread_id]);
    for(char **directory = list; *directory; directory++) {
        const size_t directorylen = strlen(*directory);
        if(filenamelen > SIZE_MAX - directorylen || filenamelen + directorylen > SIZE_MAX - 2) {
            continue;
        }
        pathname = malloc(directorylen + filenamelen + 2); /* one extra byte for a /, one for the NUL */
        if(!pathname) {
            perror("Failed to allocate memory for pathname");
            pthread_exit(NULL);
        }
        pthread_cleanup_push(free, pathname);

        char *end = (char*)memcpy(pathname, *directory, directorylen) + directorylen;
        if(end[-1] != '/') {
            *end++ = '/';
        }
        memcpy(end, progs[my_seq_thread_id], filenamelen + 1);

        pathname_found = file_is_executable(pathname);
        pthread_cleanup_pop(!pathname_found); /* free(pathname)? */
        if(pathname_found) {
            break;
        }
    }

    pthread_cleanup_pop(true); /* stop_running(my_seq_thread_id) */
    pthread_exit(pathname_found ? pathname : NULL);
}

int main(int argc, char *argv[]) {
    if(!setlocale(LC_ALL, "")) {
        fputs("Failed to enable default locale\n", stderr);
        exit(EXIT_FAILURE);
    }

    int opt;
    while((opt = getopt(argc, argv, "")) != -1) {
        if(opt == '?') {
            exit(EXIT_FAILURE);
        }
    }
    argc -= optind;
    progs = argv + optind;
    if(!argc) {
        exit(EXIT_SUCCESS);
    }
#if INT_MAX > INTPTR_MAX
    if(argc - 1 > INTPTR_MAX) {
        fprintf(stderr, "Too many arguments: %s\n", strerror(EOVERFLOW));
        exit(EXIT_FAILURE);
    }
#endif

    const char *const envpath = getenv("PATH");
    char *path;
    size_t l;
    if(!envpath || !envpath[0]) {
        l = confstr(_CS_PATH, NULL, 0);
        if(!l) {
            fputs("Failed to obtain value of PATH\n", stderr);
            exit(EXIT_FAILURE);
        }

        path = aligned_alloc(sysconf(_SC_PAGESIZE), l);
        if(!path) {
            perror("Failed to allocate memory for PATH");
            exit(EXIT_FAILURE);
        }
        confstr(_CS_PATH, path, l);
    } else {
        l = strlen(envpath) + 1;
        path = aligned_alloc(sysconf(_SC_PAGESIZE), l);
        if(!path) {
            perror("Failed to duplicate string");
            exit(EXIT_FAILURE);
        }
        memcpy(path, envpath, l);
    }
#if defined(_POSIX_ADVISORY_INFO) && _POSIX_ADVISORY_INFO > -1
    int p = posix_madvise(path, l, POSIX_MADV_SEQUENTIAL);
    if(p && sysconf(_SC_ADVISORY_INFO) != -1) {
        fprintf(stderr, "Failed to advise the system on memory usage: %s\n", strerror(p));
    }
#endif

    /* The maximum number of directories in PATH is one plus
     * the number of colons, where multiple consecutive colons
     * can be treated as a single one. */
    size_t numdirs = 1;
    assert(path[0]);
    for(size_t i = 1; path[i]; i++) {
        /* In case of a set of multiple consecutive colons,
         * only count the last one. */
        if(path[i] == ':' && path[i+1] && path[i+1] != ':') {
            numdirs++;
        }
    }

    assert(numdirs < SIZE_MAX);
    list = reallocarray(NULL, numdirs + 1, sizeof(*list));
    if(!list) {
        perror("Failed to allocate memory for directory list");
        goto endpath;
    }

    char *tok = NULL;
    size_t n = 0;
    do {
        /* cppcheck-suppress[strtokCalled] there's only one thread; this is safe */
        tok = strtok(tok ? NULL : path, ":");
        assert(n <= numdirs);
        list[n++] = tok;
    } while(tok);

    pthread_t *ids = reallocarray(NULL, argc, sizeof(*ids));
    if(!ids) {
        perror("Failed to allocate memory for thread list");
        goto endlist;
    }

    if((groupcount = getgroups(0, groups)) == -1) {
        perror("Failed to get number of groups");
        goto endids;
    }
#pragma GCC diagnostic ignored "-Wsign-compare"
    if(groupcount == INT_MAX || groupcount == SIZE_MAX) {
#pragma GCC diagnostic pop
        fprintf(stderr, "Failed to create group list: %s\n", strerror(EOVERFLOW));
        goto endids;
    }

    /* We might need an extra member for the effective group ID. */
    groups = reallocarray(NULL, groupcount + 1, sizeof(*groups));
    if(!groups) {
        perror("Failed to allocate memory for group list");
        goto endids;
    }
    /* It's possible that in a TOCTTOU sort of way, the number of
     * groups we're in now is fewer than the number we were in before,
     * hence the reassignment to groupcount. */
    if((groupcount = getgroups(groupcount, groups)) == -1) {
        perror("Failed to populate group list");
        goto endgroups;
    }

    /* The group list may not include the effective group ID. */
    if(!lfind(&(gid_t){getegid()}, groups, &(size_t){groupcount}, sizeof(*groups), giddiff)) {
        groups[groupcount++] = getegid();
    }
    qsort(groups, groupcount, sizeof(*groups), giddiff);

    thread_is_running = calloc(argc, sizeof(*thread_is_running));
    if(!thread_is_running) {
        perror("Failed to allocate memory for running thread list");
        goto endgroups;
    }

    void **retval = reallocarray(NULL, argc, sizeof(*retval));
    if(!retval) {
        perror("Failed to allocate memory for thread return values");
        goto endthread_is_running;
    }

    for(int i = 0; i < argc; i++) {
        int k = pthread_mutex_lock(&thread_guard);
        if(k) {
            fprintf(stderr, "Failed to lock mutex: %s\n", strerror(k));
            abort();
        }
tryagain:
        k = pthread_create(ids + i, NULL, find_program, (void*)(intptr_t)i);
        switch(k) {
        case 0:
            k = pthread_mutex_unlock(&thread_guard);
            if(k) {
                fprintf(stderr, "Failed to unlock mutex: %s\n", strerror(k));
                abort();
            }
            break;
        case EAGAIN:
            if(lfind(&(bool){true}, thread_is_running, &(size_t){i}, sizeof(*thread_is_running), boolcmp)) {
                k = pthread_cond_wait(&thread_dying, &thread_guard);
                if(k) {
                    fprintf(stderr, "Failed to wait on condition: %s\n", strerror(k));
                    abort();
                }
                goto tryagain;
            }
        default:
            fprintf(stderr, "Failed to create thread: %s\n", strerror(k));

            for(int j = 0; j < i; j++) {
                if(thread_is_running[j]) {
                    k = pthread_cancel(ids[j]);
                    if(k) {
                        fprintf(stderr, "Failed to cancel thread: %s\n", strerror(k));
                        abort();
                    }
                }
            }
            k = pthread_mutex_unlock(&thread_guard);
            if(k) {
                fprintf(stderr, "Failed to unlock mutex: %s\n", strerror(k));
                abort();
            }

            void *threadreturn;
            for(int j = 0; j < i; j++) {
                k = pthread_join(ids[j], &threadreturn);
                if(k) {
                    fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
                    abort();
                }
                if(threadreturn != PTHREAD_CANCELED) {
                    free(threadreturn);
                }
            }
            goto endthread_guard;
        }
    }

    for(int j = 0; j < argc; j++) {
        int k = pthread_join(ids[j], retval + j);
        if(k) {
            fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
            abort();
        }
    }

    bool all_found = true;
    for(int j = 0; j < argc; j++) {
        if(retval[j]) {
            if(puts(retval[j]) == EOF) {
                perror("Failed to print filename");
                all_found = false;
            }
            free(retval[j]);
        } else {
            all_found = false;
        }
    }

    int k = pthread_mutex_destroy(&thread_guard);
    if(k) {
        fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
        abort();
    }
    k = pthread_cond_destroy(&thread_dying);
    if(k) {
        fprintf(stderr, "Failed to destroy condition variable: %s\n", strerror(k));
        abort();
    }
    free(retval);
    free(thread_is_running);
    free(groups);
    free(ids);
    free(list);
    free(path);
    exit(all_found ? EXIT_SUCCESS : EXIT_FAILURE);

endthread_guard:
    k = pthread_mutex_destroy(&thread_guard);
    if(k) {
        fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
        abort();
    }
    k = pthread_cond_destroy(&thread_dying);
    if(k) {
        fprintf(stderr, "Failed to destroy condition variable: %s\n", strerror(k));
        abort();
    }
    free(retval);
endthread_is_running:
    free(thread_is_running);
endgroups:
    free(groups);
endids:
    free(ids);
endlist:
    free(list);
endpath:
    free(path);
    exit(EXIT_FAILURE);
}

Here are some remarks in no particular order:

  • Trying to cancel a thread which isn’t actually running, even if it’s joinable and not yet joined with, is undefined behavior, so I follow Ulrich Drepper’s suggestions for dealing with this problem.
  • I assume that an arbitrary intptr_t can be cast to void* and back with the expected result, which I realize is not guaranteed by any standard. At build time, I check this is okay with the build system.
  • Perhaps ‘posix_madvise()‘ is overkill or premature optimization, but on the other hand I think it serves as a form of self-documenting code which says we traverse the directory list in order. I’d like to know if you agree.

Answer

It’s overcomplicated

Throwing threads at a problem does not necessarily make it faster, and can even make it slower. Consider that creating and destroying threads has an overhead of its own, and that your program will likely have some parts that have to run in serial, which means there is a limit to the speed up you can gain even if you could use an infinite number of threads (see Amdahl’s law). Consider benchmarking your code against /usr/bin/which with various numbers of arguments, and see where the cutoff is where a parallel version becomes faster, if at all. Use perf stat to see things like the number of instructions and cycles spent, context switches and page faults.

Even if threads make sense, you chose to do things the hard way, wanting to use thread cancellation in the error path for example, when you could just simply let all threads finish what they were doing. Follow the KISS principle.

Use a thread pool

You start one thread per command line argument. But what if I call your program with a thousand arguments? Surely that will spawn more threads than I have CPU cores, which means they are all trying to contend for resources. A better approach is to limit the number of threads to the number of cores, and distribute the workload over the threads. For example, with n threads, let each thread work on every nth element of the argument list, offset with their thread ID.

Doing this will also limit the amount of memory needed for your program to run, independent of the number of arguments.

Use access() to check for execute permissions

You spend a lot of code getting a list of groups the user belongs to, and checking the results from stat() against that list. However, you can use access() instead to do that. There are some slight differences in semantics, but assuming your version of which won’t have the setuid bit set, that should not be a problem. But you can also use faccessat(), which takes a flags argument, then you can use AT_EACCESS to make sure it uses the effective UID and GID to do its checks.

You still need to call stat() to check if the file is a regular file, because access() and friends will not distinguish between executable files and accessible directories. Since stat() is quite a heavy operation because of the amount of information it returns, you could use the non-portable statx(..., STATX_TYPE, ...) to just query the type.

Consider using the *at() functions

Instead of keeping the list of directories as strings, and building full filenames from the combination of directory names and command line arguments, consider opening all directories using open(dirname, O_PATH) and storing those filedescriptors. Then you can use those with fstatat() and faccessat(), so you don’t need to build full filenames anymore.

Unnecessary things

Perhaps posix_madvise() is overkill or premature optimization, but on the other hand I think it serves as a form of self-documenting code which says we traverse the directory list in order.

The POSIX advise functions are quite subtle, and might not always do what you think they do. Furthermore, they have an overhead of their own. Calling it for just one string that will surely fit into the L1 cache will not do anything useful. You had to jump through hoops to even be able to use it (like using aligned_alloc()), so it makes the code less readable.

Let threads print out their results

Instead of waiting for all threads to finish and then print out the results, consider letting each thread print their results themselves. Each write() is done atomically, so if you can ensure you can print a pathname and newline in a single write(), the output will always be correct, the only thing that is no longer guaranteed is the order in which things are printed.

Create a struct to hold all the state for a given filename

Instead of having several arrays holding data, like the filenames, running status, return values, and indexing them by a thread ID, consider creating a struct that holds all these things, make an array of that struct, and then you can pass a pointer to a given element of that array to the thread. This also avoids the whole issue of having to cast intptr_ts to void*s.

Attribution
Source : Link , Question Author : JohnScott , Answer Author : G. Sliepen

Leave a Comment