szaydel
10/3/2019 - 4:50 AM

Rename utility with name validation rules

/*************************************************************************\
*                  Copyright (C) Michael Kerrisk, 2019.                   *
*                                                                         *
* This program is free software. You may use, modify, and redistribute it *
* under the terms of the GNU Lesser General Public License as published   *
* by the Free Software Foundation, either version 3 or (at your option)   *
* any later version. This program is distributed without any warranty.    *
* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details.           *
\*************************************************************************/

/* error_functions.h

   Header file for error_functions.c.
*/
#ifndef ERROR_FUNCTIONS_H
#define ERROR_FUNCTIONS_H

/* Error diagnostic routines */

void errMsg(const char *format, ...);

#ifdef __GNUC__

    /* This macro stops 'gcc -Wall' complaining that "control reaches
       end of non-void function" if we use the following functions to
       terminate main() or some other non-void function. */

#define NORETURN __attribute__ ((__noreturn__))
#else
#define NORETURN
#endif

void errExit(const char *format, ...) NORETURN ;

void err_exit(const char *format, ...) NORETURN ;

void errExitEN(int errnum, const char *format, ...) NORETURN ;

void fatal(const char *format, ...) NORETURN ;

void usageErr(const char *format, ...) NORETURN ;

void cmdLineErr(const char *format, ...) NORETURN ;

#endif
/*************************************************************************\
*                  Copyright (C) Michael Kerrisk, 2019.                   *
*                                                                         *
* This program is free software. You may use, modify, and redistribute it *
* under the terms of the GNU Lesser General Public License as published   *
* by the Free Software Foundation, either version 3 or (at your option)   *
* any later version. This program is distributed without any warranty.    *
* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details.           *
\*************************************************************************/

/* get_num.h

   Header file for get_num.c.
*/
#ifndef GET_NUM_H
#define GET_NUM_H

#define GN_NONNEG       01      /* Value must be >= 0 */
#define GN_GT_0         02      /* Value must be > 0 */

                                /* By default, integers are decimal */
#define GN_ANY_BASE   0100      /* Can use any base - like strtol(3) */
#define GN_BASE_8     0200      /* Value is expressed in octal */
#define GN_BASE_16    0400      /* Value is expressed in hexadecimal */

long getLong(const char *arg, int flags, const char *name);

int getInt(const char *arg, int flags, const char *name);

#endif
#ifndef HUMAN_H
#define HUMAN_H
    #define DIRECTORY FTW_D
    #define DIRECTORY_NOREAD FTW_DNR
    #define FILE FTW_F
    #define DIRECTORY_CHLD_VISITED FTW_DP
    #define FILE_NOSTAT FTW_NS
    #define SYMLINK FTW_SL
    #define SYMLINK_NOFILE FTW_SLN
#endif // HUMAN_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>

static inline bool bad_head_tail(const char *str) {
    if (
        !( isalnum(str[0]) || str[0] == '.')
    ) return true;
    char end = str[strlen(str)-1];
    if (
        !(isalnum(end) || end == '.' || end == '$')
    ) return true;
    return false;
}

// exclude1 deals with eliminating characters from the head and tail
static inline bool exclude1(int c) {
    if (
        isblank(c) ||
        ispunct(c) ||
        iscntrl(c) ||
        !isascii(c)
    ) return true;
    return false;
}

// exclude illegal characters for path names according to SMB spec
static inline bool exclude2(int c) {
    char illegal[9] = {
        '"', '\\', '/', ':', '|', '<', '>', '*', '?',
    };
    if (isalnum(c)) return false;
    for (size_t i = 0 ; i < sizeof(illegal) ; i++) {
        if (c == illegal[i]) return true;
    }
    return false;
}

char *sanitize_fname(char *str) {
    size_t length = strlen(str) + 1;
    char *start = str;
    char *end = &str[strlen(str)-1];
    char *new_str = malloc(sizeof(char) * length);
    if (!new_str) { // this should never happen in practice
        perror("malloc(...)");
        abort();
    }
    char *tnew_str = new_str;
    while (*start != '\0' && exclude1(*start)) start++;
    while (*end != '\0' && exclude1(*end)) end--;
    while (start <= end) {
        // if we need to exclude a character, advance start pointer to next
        // character, but do not increment tnew_str pointer thereby skipping
        // the character we do not want to include.
        if (exclude2(*start)) {
            start++; continue;
        }
        *tnew_str = *start; tnew_str++; start++;
    }
    *tnew_str = '\0';
    strcpy(str, new_str);
    free(new_str);
    return str;
}
#ifndef SANITIZE_H
#define SANITIZE_H
    char *sanitize_fname(char *str);
#endif // SANITIZE_H
/*************************************************************************\
*                  Copyright (C) Michael Kerrisk, 2019.                   *
*                                                                         *
* This program is free software. You may use, modify, and redistribute it *
* under the terms of the GNU Lesser General Public License as published   *
* by the Free Software Foundation, either version 3 or (at your option)   *
* any later version. This program is distributed without any warranty.    *
* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details.           *
\*************************************************************************/

/* tlpi_hdr.h

   Standard header file used by nearly all of our example programs.
*/
#ifndef TLPI_HDR_H
#define TLPI_HDR_H      /* Prevent accidental double inclusion */

#include <sys/types.h>  /* Type definitions used by many programs */
#include <stdio.h>      /* Standard I/O functions */
#include <stdlib.h>     /* Prototypes of commonly used library functions,
                           plus EXIT_SUCCESS and EXIT_FAILURE constants */
#include <unistd.h>     /* Prototypes for many system calls */
#include <errno.h>      /* Declares errno and defines error constants */
#include <string.h>     /* Commonly used string-handling functions */
#include <stdbool.h>    /* 'bool' type plus 'true' and 'false' constants */

#include "get_num.h"    /* Declares our functions for handling numeric
                           arguments (getInt(), getLong()) */

#include "error_functions.h"  /* Declares our error-handling functions */

/* Unfortunately some UNIX implementations define FALSE and TRUE -
   here we'll undefine them */

#ifdef TRUE
#undef TRUE
#endif

#ifdef FALSE
#undef FALSE
#endif

typedef enum { FALSE, TRUE } Boolean;

#define min(m,n) ((m) < (n) ? (m) : (n))
#define max(m,n) ((m) > (n) ? (m) : (n))

/* Some systems don't define 'socklen_t' */

#if defined(__sgi)
typedef int socklen_t;
#endif

#if defined(__sun)
#include <sys/file.h>           /* Has definition of FASYNC */
#endif

#if ! defined(O_ASYNC) && defined(FASYNC)
/* Some systems define FASYNC instead of O_ASYNC */
#define O_ASYNC FASYNC
#endif

#if defined(MAP_ANON) && ! defined(MAP_ANONYMOUS)
/* BSD derivatives usually have MAP_ANON, not MAP_ANONYMOUS */
#define MAP_ANONYMOUS MAP_ANON

#endif

#if ! defined(O_SYNC) && defined(O_FSYNC)
/* Some implementations have O_FSYNC instead of O_SYNC */
#define O_SYNC O_FSYNC
#endif

#if defined(__FreeBSD__)

/* FreeBSD uses these alternate names for fields in the sigval structure */

#define sival_int sigval_int
#define sival_ptr sigval_ptr
#endif

#endif
/*************************************************************************\
*                  Copyright (C) Michael Kerrisk, 2019.                   *
*                                                                         *
* This program is free software. You may use, modify, and redistribute it *
* under the terms of the GNU General Public License as published by the   *
* Free Software Foundation, either version 3 or (at your option) any      *
* later version. This program is distributed without any warranty.  See   *
* the file COPYING.gpl-v3 for details.                                    *
\*************************************************************************/

#define _GNU_SOURCE
#include <stdio.h>
#if defined(__sun)
#define _XOPEN_SOURCE 500       /* Solaris 8 needs it this way */
#else
#if ! defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600
#define _XOPEN_SOURCE 600       /* Get nftw() and S_IFSOCK declarations */
#endif
#endif

#include <stdbool.h>
#include <string.h>
#include <ftw.h>

#include "tlpi_hdr.h"
#include "human.h"
#include "sanitize.h"

static const char *types[] = {
    "FILE", "DIR", "CHAR", "BLOCK", "SYM", "FIFO", "SOCK", "EPERM"
};

static bool verbose = false;
static bool dryrun  = false;

static void
usageError(const char *progName, const char *msg)
{
    if (msg != NULL)
        fprintf(stderr, "%s\n", msg);
    fprintf(stderr, "Usage: %s [-d] [-m] [-p] [-v] [directory-path]\n", progName);
    fprintf(stderr, "\t-d Use FTW_DEPTH flag, without it, directories are skipped\n");
    fprintf(stderr, "\t-m Use FTW_MOUNT flag\n");
    fprintf(stderr, "\t-n Dry-run, do not actually rename\n");
    fprintf(stderr, "\t-p Use FTW_PHYS flag, do not follow symbolic links\n");
    fprintf(stderr, "\t-v Enable verbose, i.e. print name of each object\n");
    exit(EXIT_FAILURE);
}

static int                      /* Function called by nftw() */
dirTree(const char *pathname, const struct stat *sbuf, int type,
        struct FTW *ftwb)
{
    int type_idx;
    bool skip = false;
    if (type == FTW_NS) {                  /* Could not stat() file */
        type_idx = 6;
    } else {
        switch (sbuf->st_mode & S_IFMT) {
        case S_IFREG:  type_idx = 0; break;
        case S_IFDIR:  type_idx = 1; break;
        case S_IFCHR:  type_idx = 2; break;
        case S_IFBLK:  type_idx = 3; break;
        case S_IFLNK:  type_idx = 4; break;
        case S_IFIFO:  type_idx = 5; break;
        case S_IFSOCK: type_idx = 6; break;
        default:       type_idx = 7; break; /* Should never happen (on Linux) */
        }
    }

    switch (type) {
        case DIRECTORY:
            if (verbose) printf("[%s] level<%d> %s\n", types[type_idx], ftwb->level, pathname);
            skip = true;
            break;
        case DIRECTORY_NOREAD:
            fprintf(stderr, "ERROR: directory not readable %s\n", pathname);
            return 0;
        case DIRECTORY_CHLD_VISITED:
            if (verbose) printf("[%s]\tlevel<%d> %s\n",
                types[type_idx], ftwb->level, pathname);
            break;
        case FILE:
            if (type_idx != 0) {
                skip = true;
                if (verbose) {
                    printf("[%s]\tSkipping %s\n", types[type_idx], pathname);
                }
                return 0;
            } else {
                if (verbose) printf("[%s]\t%s\n", types[type_idx], pathname);
            }
            break;
        case FILE_NOSTAT:
            fprintf(stderr, "[%s]\tFailed to stat %s\n", types[type_idx], pathname);
            return 0;
        case SYMLINK:
            if (verbose) printf("[%s]\tSkipping %s\n",
                types[type_idx], pathname);
            break;
        case SYMLINK_NOFILE:
            printf("ERROR: symlink invalid %s\n", pathname);
            return 0;
    }

    if (skip) return 0;

    const char *basename = &pathname[ftwb->base];
    char *copy = strdup(basename);
    char *sanitized_copy = sanitize_fname(copy);

    if (strcmp(sanitized_copy, basename)) {
        char *new_name = strdup(pathname);
        strcpy(&new_name[ftwb->base], sanitized_copy);
        if (verbose) printf("  `-");
        printf("rename %s -> %s\n",  pathname, new_name);

        // Actually rename the object
        if ((!dryrun) && rename(pathname, new_name)) {
            perror("rename(...)");
        };
        free(new_name);
    }
    free(copy);
    return 0;                                   /* Tell nftw() to continue */
}

int
main(int argc, char *argv[])
{
    int flags, opt;

    flags = 0;
    while ((opt = getopt(argc, argv, "dmnpv")) != -1) {
        switch (opt) {
        case 'd': flags |= FTW_DEPTH;   break;
        case 'm': flags |= FTW_MOUNT;   break;
        case 'p': flags |= FTW_PHYS;    break;

        // globals
        case 'n': dryrun = true;       break;
        case 'v': verbose = true;      break;
        default:  usageError(argv[0], NULL);
        }
    }

    if (argc > optind + 1)
        usageError(argv[0], NULL);
    if (dryrun) printf("Dry-run mode, nothing will be changed\n");

    if (nftw((argc > optind) ? argv[optind] : ".", dirTree, 10, flags) == -1) {
        perror("nftw");
        exit(EXIT_FAILURE);
    }
    exit(EXIT_SUCCESS);
}