blob: f9941f8dcd2b3a206e267d02873954dfeb8207ba [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
//
// Traverse the source tree, parsing all .gitignore files, and print file paths
// that are ignored by git.
// The output is suitable to the --exclude-from option of tar.
// This is useful until the --exclude-vcs-ignores option gets working correctly.
//
// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
// (a lot of code imported from GIT)
#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
//
//---------------------------(IMPORT FROM GIT BEGIN)---------------------------
// Copied from environment.c
static bool ignore_case;
// Copied from git-compat-util.h
/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
#undef isdigit
#undef isalpha
#undef isalnum
#undef isprint
#undef islower
#undef isupper
#undef tolower
#undef toupper
#undef iscntrl
#undef ispunct
#undef isxdigit
static const unsigned char sane_ctype[256];
#define GIT_SPACE 0x01
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_GLOB_SPECIAL 0x08
#define GIT_REGEX_SPECIAL 0x10
#define GIT_PATHSPEC_MAGIC 0x20
#define GIT_CNTRL 0x40
#define GIT_PUNCT 0x80
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isascii(x) (((x) & ~0x7f) == 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
#define islower(x) sane_iscase(x, 1)
#define isupper(x) sane_iscase(x, 0)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
static inline int sane_case(int x, int high)
{
if (sane_istest(x, GIT_ALPHA))
x = (x & ~0x20) | high;
return x;
}
static inline int sane_iscase(int x, int is_lower)
{
if (!sane_istest(x, GIT_ALPHA))
return 0;
if (is_lower)
return (x & 0x20) != 0;
else
return (x & 0x20) == 0;
}
// Copied from ctype.c
enum {
S = GIT_SPACE,
A = GIT_ALPHA,
D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
X = GIT_CNTRL,
U = GIT_PUNCT,
Z = GIT_CNTRL | GIT_SPACE
};
static const unsigned char sane_ctype[256] = {
X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
/* Nothing in the 128.. range */
};
// Copied from hex.c
static const signed char hexval_table[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
-1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
-1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
-1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
-1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
-1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
-1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
-1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
-1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
-1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
-1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
-1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
-1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
-1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
-1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
};
// Copied from wildmatch.h
#define WM_CASEFOLD 1
#define WM_PATHNAME 2
#define WM_NOMATCH 1
#define WM_MATCH 0
#define WM_ABORT_ALL -1
#define WM_ABORT_TO_STARSTAR -2
// Copied from wildmatch.c
typedef unsigned char uchar;
// local modification: remove NEGATE_CLASS(2)
#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
&& *(class) == *(litmatch) \
&& strncmp((char*)class, litmatch, len) == 0)
// local modification: simpilify macros
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#define ISGRAPH(c) (isprint(c) && !isspace(c))
#define ISPRINT(c) isprint(c)
#define ISDIGIT(c) isdigit(c)
#define ISALNUM(c) isalnum(c)
#define ISALPHA(c) isalpha(c)
#define ISCNTRL(c) iscntrl(c)
#define ISLOWER(c) islower(c)
#define ISPUNCT(c) ispunct(c)
#define ISSPACE(c) isspace(c)
#define ISUPPER(c) isupper(c)
#define ISXDIGIT(c) isxdigit(c)
/* Match pattern "p" against "text" */
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
{
uchar p_ch;
const uchar *pattern = p;
for ( ; (p_ch = *p) != '\0'; text++, p++) {
int matched, match_slash, negated;
uchar t_ch, prev_ch;
if ((t_ch = *text) == '\0' && p_ch != '*')
return WM_ABORT_ALL;
if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
t_ch = tolower(t_ch);
if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
p_ch = tolower(p_ch);
switch (p_ch) {
case '\\':
/* Literal match with following character. Note that the test
* in "default" handles the p[1] == '\0' failure case. */
p_ch = *++p;
/* FALLTHROUGH */
default:
if (t_ch != p_ch)
return WM_NOMATCH;
continue;
case '?':
/* Match anything but '/'. */
if ((flags & WM_PATHNAME) && t_ch == '/')
return WM_NOMATCH;
continue;
case '*':
if (*++p == '*') {
const uchar *prev_p = p - 2;
while (*++p == '*') {}
if (!(flags & WM_PATHNAME))
/* without WM_PATHNAME, '*' == '**' */
match_slash = 1;
else if ((prev_p < pattern || *prev_p == '/') &&
(*p == '\0' || *p == '/' ||
(p[0] == '\\' && p[1] == '/'))) {
/*
* Assuming we already match 'foo/' and are at
* <star star slash>, just assume it matches
* nothing and go ahead match the rest of the
* pattern with the remaining string. This
* helps make foo/<*><*>/bar (<> because
* otherwise it breaks C comment syntax) match
* both foo/bar and foo/a/bar.
*/
if (p[0] == '/' &&
dowild(p + 1, text, flags) == WM_MATCH)
return WM_MATCH;
match_slash = 1;
} else /* WM_PATHNAME is set */
match_slash = 0;
} else
/* without WM_PATHNAME, '*' == '**' */
match_slash = flags & WM_PATHNAME ? 0 : 1;
if (*p == '\0') {
/* Trailing "**" matches everything. Trailing "*" matches
* only if there are no more slash characters. */
if (!match_slash) {
if (strchr((char *)text, '/'))
return WM_NOMATCH;
}
return WM_MATCH;
} else if (!match_slash && *p == '/') {
/*
* _one_ asterisk followed by a slash
* with WM_PATHNAME matches the next
* directory
*/
const char *slash = strchr((char*)text, '/');
if (!slash)
return WM_NOMATCH;
text = (const uchar*)slash;
/* the slash is consumed by the top-level for loop */
break;
}
while (1) {
if (t_ch == '\0')
break;
/*
* Try to advance faster when an asterisk is
* followed by a literal. We know in this case
* that the string before the literal
* must belong to "*".
* If match_slash is false, do not look past
* the first slash as it cannot belong to '*'.
*/
if (!is_glob_special(*p)) {
p_ch = *p;
if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
p_ch = tolower(p_ch);
while ((t_ch = *text) != '\0' &&
(match_slash || t_ch != '/')) {
if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
t_ch = tolower(t_ch);
if (t_ch == p_ch)
break;
text++;
}
if (t_ch != p_ch)
return WM_NOMATCH;
}
if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
return matched;
} else if (!match_slash && t_ch == '/')
return WM_ABORT_TO_STARSTAR;
t_ch = *++text;
}
return WM_ABORT_ALL;
case '[':
p_ch = *++p;
if (p_ch == '^')
p_ch = '!';
/* Assign literal 1/0 because of "matched" comparison. */
negated = p_ch == '!' ? 1 : 0;
if (negated) {
/* Inverted character class. */
p_ch = *++p;
}
prev_ch = 0;
matched = 0;
do {
if (!p_ch)
return WM_ABORT_ALL;
if (p_ch == '\\') {
p_ch = *++p;
if (!p_ch)
return WM_ABORT_ALL;
if (t_ch == p_ch)
matched = 1;
} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
p_ch = *++p;
if (p_ch == '\\') {
p_ch = *++p;
if (!p_ch)
return WM_ABORT_ALL;
}
if (t_ch <= p_ch && t_ch >= prev_ch)
matched = 1;
else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
uchar t_ch_upper = toupper(t_ch);
if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
matched = 1;
}
p_ch = 0; /* This makes "prev_ch" get set to 0. */
} else if (p_ch == '[' && p[1] == ':') {
const uchar *s;
int i;
for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
if (!p_ch)
return WM_ABORT_ALL;
i = p - s - 1;
if (i < 0 || p[-1] != ':') {
/* Didn't find ":]", so treat like a normal set. */
p = s - 2;
p_ch = '[';
if (t_ch == p_ch)
matched = 1;
continue;
}
if (CC_EQ(s,i, "alnum")) {
if (ISALNUM(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "alpha")) {
if (ISALPHA(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "blank")) {
if (ISBLANK(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "cntrl")) {
if (ISCNTRL(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "digit")) {
if (ISDIGIT(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "graph")) {
if (ISGRAPH(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "lower")) {
if (ISLOWER(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "print")) {
if (ISPRINT(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "punct")) {
if (ISPUNCT(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "space")) {
if (ISSPACE(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "upper")) {
if (ISUPPER(t_ch))
matched = 1;
else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
matched = 1;
} else if (CC_EQ(s,i, "xdigit")) {
if (ISXDIGIT(t_ch))
matched = 1;
} else /* malformed [:class:] string */
return WM_ABORT_ALL;
p_ch = 0; /* This makes "prev_ch" get set to 0. */
} else if (t_ch == p_ch)
matched = 1;
} while (prev_ch = p_ch, (p_ch = *++p) != ']');
if (matched == negated ||
((flags & WM_PATHNAME) && t_ch == '/'))
return WM_NOMATCH;
continue;
}
}
return *text ? WM_NOMATCH : WM_MATCH;
}
/* Match the "pattern" against the "text" string. */
static int wildmatch(const char *pattern, const char *text, unsigned int flags)
{
// local modification: move WM_CASEFOLD here
if (ignore_case)
flags |= WM_CASEFOLD;
return dowild((const uchar*)pattern, (const uchar*)text, flags);
}
// Copied from dir.h
#define PATTERN_FLAG_NODIR 1
#define PATTERN_FLAG_ENDSWITH 4
#define PATTERN_FLAG_MUSTBEDIR 8
#define PATTERN_FLAG_NEGATIVE 16
// Copied from dir.c
static int fspathncmp(const char *a, const char *b, size_t count)
{
return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
}
static int simple_length(const char *match)
{
int len = -1;
for (;;) {
unsigned char c = *match++;
len++;
if (c == '\0' || is_glob_special(c))
return len;
}
}
static int no_wildcard(const char *string)
{
return string[simple_length(string)] == '\0';
}
static void parse_path_pattern(const char **pattern,
int *patternlen,
unsigned *flags,
int *nowildcardlen)
{
const char *p = *pattern;
size_t i, len;
*flags = 0;
if (*p == '!') {
*flags |= PATTERN_FLAG_NEGATIVE;
p++;
}
len = strlen(p);
if (len && p[len - 1] == '/') {
len--;
*flags |= PATTERN_FLAG_MUSTBEDIR;
}
for (i = 0; i < len; i++) {
if (p[i] == '/')
break;
}
if (i == len)
*flags |= PATTERN_FLAG_NODIR;
*nowildcardlen = simple_length(p);
/*
* we should have excluded the trailing slash from 'p' too,
* but that's one more allocation. Instead just make sure
* nowildcardlen does not exceed real patternlen
*/
if (*nowildcardlen > len)
*nowildcardlen = len;
if (*p == '*' && no_wildcard(p + 1))
*flags |= PATTERN_FLAG_ENDSWITH;
*pattern = p;
*patternlen = len;
}
static void trim_trailing_spaces(char *buf)
{
char *p, *last_space = NULL;
for (p = buf; *p; p++)
switch (*p) {
case ' ':
if (!last_space)
last_space = p;
break;
case '\\':
p++;
if (!*p)
return;
/* fallthrough */
default:
last_space = NULL;
}
if (last_space)
*last_space = '\0';
}
static int match_basename(const char *basename, int basenamelen,
const char *pattern, int prefix, int patternlen,
unsigned flags)
{
if (prefix == patternlen) {
if (patternlen == basenamelen &&
!fspathncmp(pattern, basename, basenamelen))
return 1;
} else if (flags & PATTERN_FLAG_ENDSWITH) {
/* "*literal" matching against "fooliteral" */
if (patternlen - 1 <= basenamelen &&
!fspathncmp(pattern + 1,
basename + basenamelen - (patternlen - 1),
patternlen - 1))
return 1;
} else {
// local modification: call wildmatch() directly
if (!wildmatch(pattern, basename, flags))
return 1;
}
return 0;
}
static int match_pathname(const char *pathname, int pathlen,
const char *base, int baselen,
const char *pattern, int prefix, int patternlen)
{
// local modification: remove local variables
/*
* match with FNM_PATHNAME; the pattern has base implicitly
* in front of it.
*/
if (*pattern == '/') {
pattern++;
patternlen--;
prefix--;
}
/*
* baselen does not count the trailing slash. base[] may or
* may not end with a trailing slash though.
*/
if (pathlen < baselen + 1 ||
(baselen && pathname[baselen] != '/') ||
fspathncmp(pathname, base, baselen))
return 0;
// local modification: simplified because always baselen > 0
pathname += baselen + 1;
pathlen -= baselen + 1;
if (prefix) {
/*
* if the non-wildcard part is longer than the
* remaining pathname, surely it cannot match.
*/
if (prefix > pathlen)
return 0;
if (fspathncmp(pattern, pathname, prefix))
return 0;
pattern += prefix;
patternlen -= prefix;
pathname += prefix;
pathlen -= prefix;
/*
* If the whole pattern did not have a wildcard,
* then our prefix match is all we need; we
* do not need to call fnmatch at all.
*/
if (!patternlen && !pathlen)
return 1;
}
// local modification: call wildmatch() directly
return !wildmatch(pattern, pathname, WM_PATHNAME);
}
// Copied from git/utf8.c
static const char utf8_bom[] = "\357\273\277";
//----------------------------(IMPORT FROM GIT END)----------------------------
struct pattern {
unsigned int flags;
int nowildcardlen;
int patternlen;
int dirlen;
char pattern[];
};
static struct pattern **pattern_list;
static int nr_patterns, alloced_patterns;
// Remember the number of patterns at each directory level
static int *nr_patterns_at;
// Track the current/max directory level;
static int depth, max_depth;
static bool debug_on;
static FILE *out_fp, *stat_fp;
static char *prefix = "";
static char *progname;
static void __attribute__((noreturn)) perror_exit(const char *s)
{
perror(s);
exit(EXIT_FAILURE);
}
static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
{
va_list args;
fprintf(stderr, "%s: error: ", progname);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(EXIT_FAILURE);
}
static void debug(const char *fmt, ...)
{
va_list args;
int i;
if (!debug_on)
return;
fprintf(stderr, "[DEBUG] ");
for (i = 0; i < depth * 2; i++)
fputc(' ', stderr);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
}
static void *xrealloc(void *ptr, size_t size)
{
ptr = realloc(ptr, size);
if (!ptr)
perror_exit(progname);
return ptr;
}
static void *xmalloc(size_t size)
{
return xrealloc(NULL, size);
}
// similar to last_matching_pattern_from_list() in GIT
static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
{
int i;
// Search in the reverse order because the last matching pattern wins.
for (i = nr_patterns - 1; i >= 0; i--) {
struct pattern *p = pattern_list[i];
unsigned int flags = p->flags;
const char *gitignore_dir = p->pattern + p->patternlen + 1;
bool ignored;
if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
continue;
if (flags & PATTERN_FLAG_NODIR) {
if (!match_basename(path + dirlen + 1,
pathlen - dirlen - 1,
p->pattern,
p->nowildcardlen,
p->patternlen,
p->flags))
continue;
} else {
if (!match_pathname(path, pathlen,
gitignore_dir, p->dirlen,
p->pattern,
p->nowildcardlen,
p->patternlen))
continue;
}
debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
gitignore_dir);
ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
if (ignored)
debug("Ignore: %s\n", path);
return ignored;
}
debug("%s: no match\n", path);
return false;
}
static void add_pattern(const char *string, const char *dir, int dirlen)
{
struct pattern *p;
int patternlen, nowildcardlen;
unsigned int flags;
parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
if (patternlen == 0)
return;
p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
memcpy(p->pattern, string, patternlen);
p->pattern[patternlen] = 0;
memcpy(p->pattern + patternlen + 1, dir, dirlen);
p->pattern[patternlen + 1 + dirlen] = 0;
p->patternlen = patternlen;
p->nowildcardlen = nowildcardlen;
p->dirlen = dirlen;
p->flags = flags;
debug("Add pattern: %s%s%s\n",
flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
if (nr_patterns >= alloced_patterns) {
alloced_patterns += 128;
pattern_list = xrealloc(pattern_list,
sizeof(*pattern_list) * alloced_patterns);
}
pattern_list[nr_patterns++] = p;
}
// similar to add_patterns_from_buffer() in GIT
static void add_patterns_from_gitignore(const char *dir, int dirlen)
{
struct stat st;
char path[PATH_MAX], *buf, *entry;
size_t size;
int fd, pathlen, i;
pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
if (pathlen >= sizeof(path))
error_exit("%s: too long path was truncated\n", path);
fd = open(path, O_RDONLY | O_NOFOLLOW);
if (fd < 0) {
if (errno != ENOENT)
return perror_exit(path);
return;
}
if (fstat(fd, &st) < 0)
perror_exit(path);
size = st.st_size;
buf = xmalloc(size + 1);
if (read(fd, buf, st.st_size) != st.st_size)
perror_exit(path);
buf[st.st_size] = '\n';
if (close(fd))
perror_exit(path);
debug("Parse %s\n", path);
entry = buf;
// skip utf8 bom
if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
entry += strlen(utf8_bom);
for (i = entry - buf; i < size; i++) {
if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') {
buf[i - (i && buf[i-1] == '\r')] = 0;
trim_trailing_spaces(entry);
add_pattern(entry, dir, dirlen);
}
entry = buf + i + 1;
}
}
free(buf);
}
// Save the current number of patterns and increment the depth
static void increment_depth(void)
{
if (depth >= max_depth) {
max_depth += 1;
nr_patterns_at = xrealloc(nr_patterns_at,
sizeof(*nr_patterns_at) * max_depth);
}
nr_patterns_at[depth] = nr_patterns;
depth++;
}
// Decrement the depth, and free up the patterns of this directory level.
static void decrement_depth(void)
{
depth--;
assert(depth >= 0);
while (nr_patterns > nr_patterns_at[depth])
free(pattern_list[--nr_patterns]);
}
static void print_path(const char *path)
{
// The path always starts with "./"
assert(strlen(path) >= 2);
// Replace the root directory with a preferred prefix.
// This is useful for the tar command.
fprintf(out_fp, "%s%s\n", prefix, path + 2);
}
static void print_stat(const char *path, struct stat *st)
{
if (!stat_fp)
return;
if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
return;
assert(strlen(path) >= 2);
fprintf(stat_fp, "%c %9ld %10ld %s\n",
S_ISLNK(st->st_mode) ? 'l' : '-',
st->st_size, st->st_mtim.tv_sec, path + 2);
}
// Traverse the entire directory tree, parsing .gitignore files.
// Print file paths that are not tracked by git.
//
// Return true if all files under the directory are ignored, false otherwise.
static bool traverse_directory(const char *dir, int dirlen)
{
bool all_ignored = true;
DIR *dirp;
debug("Enter[%d]: %s\n", depth, dir);
increment_depth();
add_patterns_from_gitignore(dir, dirlen);
dirp = opendir(dir);
if (!dirp)
perror_exit(dir);
while (1) {
struct dirent *d;
struct stat st;
char path[PATH_MAX];
int pathlen;
bool ignored;
errno = 0;
d = readdir(dirp);
if (!d) {
if (errno)
perror_exit(dir);
break;
}
if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
continue;
pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
if (pathlen >= sizeof(path))
error_exit("%s: too long path was truncated\n", path);
if (lstat(path, &st) < 0)
perror_exit(path);
if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
ignored = true;
} else {
if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
// If all the files in a directory are ignored,
// let's ignore that directory as well. This
// will avoid empty directories in the tarball.
ignored = traverse_directory(path, pathlen);
else
ignored = false;
}
if (ignored) {
print_path(path);
} else {
print_stat(path, &st);
all_ignored = false;
}
}
if (closedir(dirp))
perror_exit(dir);
decrement_depth();
debug("Leave[%d]: %s\n", depth, dir);
return all_ignored;
}
static void usage(void)
{
fprintf(stderr,
"usage: %s [options]\n"
"\n"
"Show files that are ignored by git\n"
"\n"
"options:\n"
" -d, --debug print debug messages to stderr\n"
" -e, --exclude PATTERN add the given exclude pattern\n"
" -h, --help show this help message and exit\n"
" -i, --ignore-case Ignore case differences between the patterns and the files\n"
" -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n"
" -p, --prefix PREFIX prefix added to each path (default: empty string)\n"
" -r, --rootdir DIR root of the source tree (default: current working directory)\n"
" -s, --stat FILE output the file stat of non-ignored files to a file\n",
progname);
}
static void open_output(const char *pathname, FILE **fp)
{
if (strcmp(pathname, "-")) {
*fp = fopen(pathname, "w");
if (!*fp)
perror_exit(pathname);
} else {
*fp = stdout;
}
}
static void close_output(const char *pathname, FILE *fp)
{
fflush(fp);
if (ferror(fp))
error_exit("not all data was written to the output\n");
if (fclose(fp))
perror_exit(pathname);
}
int main(int argc, char *argv[])
{
const char *output = "-";
const char *rootdir = ".";
const char *stat = NULL;
progname = strrchr(argv[0], '/');
if (progname)
progname++;
else
progname = argv[0];
while (1) {
static struct option long_options[] = {
{"debug", no_argument, NULL, 'd'},
{"help", no_argument, NULL, 'h'},
{"ignore-case", no_argument, NULL, 'i'},
{"output", required_argument, NULL, 'o'},
{"prefix", required_argument, NULL, 'p'},
{"rootdir", required_argument, NULL, 'r'},
{"stat", required_argument, NULL, 's'},
{"exclude", required_argument, NULL, 'x'},
{},
};
int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'd':
debug_on = true;
break;
case 'h':
usage();
exit(0);
case 'i':
ignore_case = true;
break;
case 'o':
output = optarg;
break;
case 'p':
prefix = optarg;
break;
case 'r':
rootdir = optarg;
break;
case 's':
stat = optarg;
break;
case 'x':
add_pattern(optarg, ".", strlen("."));
break;
case '?':
usage();
/* fallthrough */
default:
exit(EXIT_FAILURE);
}
}
open_output(output, &out_fp);
if (stat && stat[0])
open_output(stat, &stat_fp);
if (chdir(rootdir))
perror_exit(rootdir);
add_pattern(".git/", ".", strlen("."));
if (traverse_directory(".", strlen(".")))
print_path("./");
assert(depth == 0);
while (nr_patterns > 0)
free(pattern_list[--nr_patterns]);
free(pattern_list);
free(nr_patterns_at);
close_output(output, out_fp);
if (stat_fp)
close_output(stat, stat_fp);
return 0;
}