From b00d0b9c8ca4b291f55bc9f2cb1de4d42e0437d6 Mon Sep 17 00:00:00 2001 From: Chris Allegretta Date: Sun, 13 Feb 2011 04:23:10 +0000 Subject: [PATCH] 2011-02-12 Chris Allegretta * Initial libmagic implementation, adapted from Eitan Adler . New nanorc entry "magic" to enable this functionality, nanorc file and man page updates. git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@4528 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 4 ++ configure.ac | 3 +- doc/man/nanorc.5 | 7 +++ doc/syntax/asm.nanorc | 1 + doc/syntax/awk.nanorc | 1 + doc/syntax/c.nanorc | 1 + doc/syntax/html.nanorc | 1 + doc/syntax/java.nanorc | 1 + doc/syntax/man.nanorc | 1 + doc/syntax/nanorc.nanorc | 2 +- doc/syntax/patch.nanorc | 1 + doc/syntax/perl.nanorc | 1 + doc/syntax/php.nanorc | 1 + doc/syntax/sh.nanorc | 1 + doc/syntax/xml.nanorc | 1 + src/color.c | 103 +++++++++++++++++++++++++++++++++------ src/nano.h | 2 + src/proto.h | 1 + src/rcfile.c | 74 ++++++++++++++++++++++++++++ 19 files changed, 189 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index a02e388a..5f436bc6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2011-02-12 Chris Allegretta + * Initial libmagic implementation, adapted from Eitan Adler . + New nanorc entry "magic" to enable this functionality, nanorc file and man page updates. + 2011-02-06 Chris Allegretta * src/*: Retire iso_me_harder_funcmap based on suggestion by This does add 20KB to nano's executable size but it gets rid of a lot of indirection diff --git a/configure.ac b/configure.ac index 4f87a2ca..1b62d6b1 100644 --- a/configure.ac +++ b/configure.ac @@ -50,7 +50,7 @@ AC_DEFINE_DIR([PKGDATADIR], [pkgdatadir], [Where data are placed to.]) dnl Checks for header files. AC_HEADER_STDC -AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h) +AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h magic.h) dnl Checks for options. @@ -583,6 +583,7 @@ AC_MSG_RESULT(yes), AC_MSG_RESULT(no), AC_MSG_WARN([*** Can't check for macro redefinability when cross-compiling])) +AC_CHECK_LIB(magic, magic_open) # Check for groff html support AC_MSG_CHECKING([for HTML support in groff]) diff --git a/doc/man/nanorc.5 b/doc/man/nanorc.5 index 3af57aa1..4a0fc0f0 100644 --- a/doc/man/nanorc.5 +++ b/doc/man/nanorc.5 @@ -239,6 +239,13 @@ the same as not having a syntax at all. The \fIdefault\fP syntax is special: it takes no \fIfileregex\fP, and applies to files that don't match any other syntax's \fIfileregex\fP. .TP +.B magic ["\fIregex\fP" ... ] +For the currently defined syntax, add one or more regexes which +will be compared against the \fBmagic\fP database when attempting +to determine which highlighting rules to use for a given file. This +functionality only works when \fBlibmagic\fP is installed on the +system and will be silently ignored otherwise. +.TP .B color \fIfgcolor\fP,\fIbgcolor\fP "\fIregex\fP" ... For the currently defined syntax, display all expressions matching the extended regular expression \fIregex\fP with foreground color diff --git a/doc/syntax/asm.nanorc b/doc/syntax/asm.nanorc index e94ee906..9d927604 100644 --- a/doc/syntax/asm.nanorc +++ b/doc/syntax/asm.nanorc @@ -1,6 +1,7 @@ ## Here is an example for assembler. ## syntax "asm" "\.(S|s|asm)$" +magic "[Aa]ssembl(y|er)" color red "\<[A-Z_]{2,}\>" color brightgreen "\.(data|subsection|text)" color green "\.(align|file|globl|global|hidden|section|size|type|weak)" diff --git a/doc/syntax/awk.nanorc b/doc/syntax/awk.nanorc index 696ab6ad..b84d2fdc 100644 --- a/doc/syntax/awk.nanorc +++ b/doc/syntax/awk.nanorc @@ -1,6 +1,7 @@ ## Here is an example for awk. ## syntax "awk" "\.awk$" +magic "awk.*script text" ## records icolor brightred "\$[0-9A-Z_!@#$*?-]+" ## awk-set variables diff --git a/doc/syntax/c.nanorc b/doc/syntax/c.nanorc index b89c9d29..59f0f3b6 100644 --- a/doc/syntax/c.nanorc +++ b/doc/syntax/c.nanorc @@ -1,6 +1,7 @@ ## Here is an example for C/C++. ## syntax "c" "\.(c(c|pp|xx)?|C)$" "\.(h(h|pp|xx)?|H)$" "\.ii?$" +magic "ASCII C(\+\+)? program text" color brightred "\<[A-Z_][0-9A-Z_]+\>" color green "\<(float|double|bool|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline)\>" color green "\<((s?size)|((u_?)?int(8|16|32|64|ptr)))_t\>" diff --git a/doc/syntax/html.nanorc b/doc/syntax/html.nanorc index d31467da..c94bfec9 100644 --- a/doc/syntax/html.nanorc +++ b/doc/syntax/html.nanorc @@ -1,5 +1,6 @@ ## Here is a short example for HTML. ## syntax "html" "\.html$" +magic "HTML document text" color blue start="<" end=">" color red "&[^;[[:space:]]]*;" diff --git a/doc/syntax/java.nanorc b/doc/syntax/java.nanorc index a432f817..f45b0c94 100644 --- a/doc/syntax/java.nanorc +++ b/doc/syntax/java.nanorc @@ -1,6 +1,7 @@ ## Here is an example for Java. ## syntax "java" "\.java$" +magic "Java " color green "\<(boolean|byte|char|double|float|int|long|new|short|this|transient|void)\>" color red "\<(break|case|catch|continue|default|do|else|finally|for|if|return|switch|throw|try|while)\>" color cyan "\<(abstract|class|extends|final|implements|import|instanceof|interface|native|package|private|protected|public|static|strictfp|super|synchronized|throws|volatile)\>" diff --git a/doc/syntax/man.nanorc b/doc/syntax/man.nanorc index 85efd262..0dfa6feb 100644 --- a/doc/syntax/man.nanorc +++ b/doc/syntax/man.nanorc @@ -1,6 +1,7 @@ ## Here is an example for manpages. ## syntax "man" "\.[1-9]x?$" +magic "troff or preprocessor input text" color green "\.(S|T)H.*$" color brightgreen "\.(S|T)H" "\.TP" color brightred "\.(BR?|I[PR]?).*$" diff --git a/doc/syntax/nanorc.nanorc b/doc/syntax/nanorc.nanorc index 19ab5332..102fc028 100644 --- a/doc/syntax/nanorc.nanorc +++ b/doc/syntax/nanorc.nanorc @@ -5,7 +5,7 @@ syntax "nanorc" "\.?nanorc$" icolor brightwhite "^[[:space:]]*((un)?set|include|syntax|i?color).*$" ## Keywords icolor brightgreen "^[[:space:]]*(set|unset)[[:space:]]+(allow_insecure_backup|autoindent|backup|backupdir|backwards|boldtext|brackets|casesensitive|const|cut|fill|historylog|matchbrackets|morespace|mouse|multibuffer|noconvert|nofollow|nohelp|nonewlines|nowrap|operatingdir|preserve|punct)\>" "^[[:space:]]*(set|unset)[[:space:]]+(quickblank|quotestr|rebinddelete|rebindkeypad|regexp|smarthome|smooth|softwrap|speller|suspend|suspendenable|tabsize|tabstospaces|tempfile|undo|view|whitespace|wordbounds)\>" -icolor green "^[[:space:]]*(set|unset|include|syntax|header)\>" +icolor green "^[[:space:]]*(set|unset|include|syntax|header|magic)\>" ## Colors icolor yellow "^[[:space:]]*i?color[[:space:]]*(bright)?(white|black|red|blue|green|yellow|magenta|cyan)?(,(white|black|red|blue|green|yellow|magenta|cyan))?\>" icolor magenta "^[[:space:]]*i?color\>" "\<(start|end)=" diff --git a/doc/syntax/patch.nanorc b/doc/syntax/patch.nanorc index a788b359..5f101186 100644 --- a/doc/syntax/patch.nanorc +++ b/doc/syntax/patch.nanorc @@ -1,6 +1,7 @@ ## Here is an example for patch files. ## syntax "patch" "\.(patch|diff)$" +magic "diff output text" color brightgreen "^\+.*" color green "^\+\+\+.*" color brightblue "^ .*" diff --git a/doc/syntax/perl.nanorc b/doc/syntax/perl.nanorc index b5a56062..21acb145 100644 --- a/doc/syntax/perl.nanorc +++ b/doc/syntax/perl.nanorc @@ -1,6 +1,7 @@ ## Here is an example for Perl. ## syntax "perl" "\.p[lm]$" +magic "perl.*script text" header "^#!.*/perl[-0-9._]*" color red "\<(accept|alarm|atan2|bin(d|mode)|c(aller|h(dir|mod|op|own|root)|lose(dir)?|onnect|os|rypt)|d(bm(close|open)|efined|elete|ie|o|ump)|e(ach|of|val|x(ec|ists|it|p))|f(cntl|ileno|lock|ork))\>" "\<(get(c|login|peername|pgrp|ppid|priority|pwnam|(host|net|proto|serv)byname|pwuid|grgid|(host|net)byaddr|protobynumber|servbyport)|([gs]et|end)(pw|gr|host|net|proto|serv)ent|getsock(name|opt)|gmtime|goto|grep|hex|index|int|ioctl|join)\>" "\<(keys|kill|last|length|link|listen|local(time)?|log|lstat|m|mkdir|msg(ctl|get|snd|rcv)|next|oct|open(dir)?|ord|pack|pipe|pop|printf?|push|q|qq|qx|rand|re(ad(dir|link)?|cv|do|name|quire|set|turn|verse|winddir)|rindex|rmdir|s|scalar|seek(dir)?)\>" "\<(se(lect|mctl|mget|mop|nd|tpgrp|tpriority|tsockopt)|shift|shm(ctl|get|read|write)|shutdown|sin|sleep|socket(pair)?|sort|spli(ce|t)|sprintf|sqrt|srand|stat|study|substr|symlink|sys(call|read|tem|write)|tell(dir)?|time|tr(y)?|truncate|umask)\>" "\<(un(def|link|pack|shift)|utime|values|vec|wait(pid)?|wantarray|warn|write)\>" color magenta "\<(continue|else|elsif|do|for|foreach|if|unless|until|while|eq|ne|lt|gt|le|ge|cmp|x|my|sub|use|package|can|isa)\>" diff --git a/doc/syntax/php.nanorc b/doc/syntax/php.nanorc index 8ef495f6..9cf54412 100644 --- a/doc/syntax/php.nanorc +++ b/doc/syntax/php.nanorc @@ -1,6 +1,7 @@ ## Here is an example for PHP ## syntax "php" "\.php[2345s~]?$" +magic "PHP script text" ## php markings color brightgreen "(<\?(php)?|\?>)" diff --git a/doc/syntax/sh.nanorc b/doc/syntax/sh.nanorc index ecce22ec..f6601057 100644 --- a/doc/syntax/sh.nanorc +++ b/doc/syntax/sh.nanorc @@ -1,6 +1,7 @@ ## Here is an example for Bourne shell scripts. ## syntax "sh" "\.sh$" +magic "(POSIX|Bourne.*) shell script text" header "^#!.*/(ba|k|pdk)?sh[-0-9_]*" icolor brightgreen "^[0-9A-Z_]+\(\)" color green "\<(case|do|done|elif|else|esac|exit|fi|for|function|if|in|local|read|return|select|shift|then|time|until|while)\>" diff --git a/doc/syntax/xml.nanorc b/doc/syntax/xml.nanorc index 1f68c053..9005d614 100644 --- a/doc/syntax/xml.nanorc +++ b/doc/syntax/xml.nanorc @@ -1,6 +1,7 @@ ## Here is an example for xml files. ## syntax "xml" "\.([jrs]html?|sgml?|xml|xslt?)$" +magic "XML.*document text" color green start="<" end=">" color cyan "<[^> ]+" color cyan ">" diff --git a/src/color.c b/src/color.c index 2522f630..580fcaa0 100644 --- a/src/color.c +++ b/src/color.c @@ -25,6 +25,11 @@ #include #include +#include + +#ifdef HAVE_MAGIC_H +#include +#endif #ifdef ENABLE_COLOR @@ -102,12 +107,32 @@ void color_init(void) } } +/* Cleanup a regex we previously compiled */ +void nfreeregex(regex_t *r) +{ + assert(r != NULL); + + regfree(r); + free(r); + r = NULL; +} + /* Update the color information based on the current filename. */ void color_update(void) { syntaxtype *tmpsyntax; syntaxtype *defsyntax = NULL; colortype *tmpcolor, *defcolor = NULL; + exttype *e; + +/* libmagic structures */ +/* magicstring will be NULL if we fail to get magic result */ +#ifdef HAVE_LIBMAGIC + const char *magicstring = NULL; + const char *magicerr = NULL; + magic_t m; +#endif /* HAVE_LIBMAGIC */ + assert(openfile != NULL); @@ -133,13 +158,35 @@ void color_update(void) } } +#ifdef HAVE_LIBMAGIC + + if (strcmp(openfile->filename,"")) { + m = magic_open(MAGIC_SYMLINK | +#ifdef DEBUG + MAGIC_DEBUG | MAGIC_CHECK | +#endif /* DEBUG */ + MAGIC_ERROR); + if (m == NULL || magic_load(m, NULL) < 0) + fprintf(stderr, "something went wrong: %s [%s]\n", strerror(errno), openfile->filename); + else { + magicstring = magic_file(m,openfile->filename); + if (magicstring == NULL) { + magicerr = magic_error(m); + fprintf(stderr, "something went wrong: %s [%s]\n", magicerr, openfile->filename); + } +#ifdef DEBUG + fprintf(stderr, "magic string returned: %s\n", magicstring); +#endif /* DEBUG */ + } + } +#endif /* HAVE_LIBMAGIC */ + /* If we didn't specify a syntax override string, or if we did and * there was no syntax by that name, get the syntax based on the * file extension, and then look in the header. */ if (openfile->colorstrings == NULL) { for (tmpsyntax = syntaxes; tmpsyntax != NULL; tmpsyntax = tmpsyntax->next) { - exttype *e; /* If this is the default syntax, it has no associated * extensions, which we've checked for elsewhere. Skip over @@ -163,24 +210,52 @@ void color_update(void) /* Set colorstrings if we matched the extension * regex. */ - if (regexec(e->ext, openfile->filename, 0, NULL, - 0) == 0) { + if (regexec(e->ext, openfile->filename, 0, NULL, 0) == 0) { openfile->syntax = tmpsyntax; openfile->colorstrings = tmpsyntax->color; - } - - if (openfile->colorstrings != NULL) break; + } /* Decompile e->ext_regex's specified regex if we aren't * going to use it. */ - if (not_compiled) { - regfree(e->ext); - free(e->ext); - e->ext = NULL; + if (not_compiled) + nfreeregex(e->ext); + } + } + + /* Check magic if we don't yet have an answer */ +#ifdef HAVE_LIBMAGIC + if (openfile->colorstrings == NULL) { + +#ifdef DEBUG + fprintf(stderr, "No match using extension, trying libmagic...\n"); +#endif /* DEBUG */ + + for (tmpsyntax = syntaxes; tmpsyntax != NULL; + tmpsyntax = tmpsyntax->next) { + for (e = tmpsyntax->magics; e != NULL; e = e->next) { + bool not_compiled = (e->ext == NULL); + if (not_compiled) { + e->ext = (regex_t *)nmalloc(sizeof(regex_t)); + regcomp(e->ext, fixbounds(e->ext_regex), REG_EXTENDED); + } +#ifdef DEBUG + fprintf(stderr,"Matching regex \"%s\" against \"%s\"\n",e->ext_regex, magicstring); +#endif /* DEBUG */ + + if (magicstring && regexec(e->ext, magicstring, 0, NULL, 0) == 0) { + fprintf(stderr,"We matched!\n"); + openfile->syntax = tmpsyntax; + openfile->colorstrings = tmpsyntax->color; + break; + } + + if (not_compiled) + nfreeregex(e->ext); } } } +#endif /* HAVE_LIBMAGIC */ /* If we haven't matched anything yet, try the headers */ if (openfile->colorstrings == NULL) { @@ -189,7 +264,6 @@ void color_update(void) #endif for (tmpsyntax = syntaxes; tmpsyntax != NULL; tmpsyntax = tmpsyntax->next) { - exttype *e; for (e = tmpsyntax->headers; e != NULL; e = e->next) { bool not_compiled = (e->ext == NULL); @@ -217,11 +291,8 @@ void color_update(void) /* Decompile e->ext_regex's specified regex if we aren't * going to use it. */ - if (not_compiled) { - regfree(e->ext); - free(e->ext); - e->ext = NULL; - } + if (not_compiled) + nfreeregex(e->ext); } } } diff --git a/src/nano.h b/src/nano.h index f2f3e50c..2d5bb086 100644 --- a/src/nano.h +++ b/src/nano.h @@ -232,6 +232,8 @@ typedef struct syntaxtype { /* The list of extensions that this syntax applies to. */ exttype *headers; /* Regexes to match on the 'header' (1st line) of the file */ + exttype *magics; + /* Regexes to match libmagic results */ colortype *color; /* The colors used in this syntax. */ int nmultis; diff --git a/src/proto.h b/src/proto.h index 553a2a03..df805676 100644 --- a/src/proto.h +++ b/src/proto.h @@ -547,6 +547,7 @@ char *parse_argument(char *ptr); char *parse_next_regex(char *ptr); bool nregcomp(const char *regex, int eflags); void parse_syntax(char *ptr); +void parse_magic_syntax(char *ptr); void parse_include(char *ptr); short color_to_short(const char *colorname, bool *bright); void parse_colors(char *ptr, bool icase); diff --git a/src/rcfile.c b/src/rcfile.c index d5682b5a..af68a64f 100644 --- a/src/rcfile.c +++ b/src/rcfile.c @@ -303,6 +303,7 @@ void parse_syntax(char *ptr) endheader = NULL; endsyntax->extensions = NULL; endsyntax->headers = NULL; + endsyntax->magics = NULL; endsyntax->next = NULL; endsyntax->nmultis = 0; @@ -358,6 +359,76 @@ void parse_syntax(char *ptr) } else free(newext); } + +} + + +/* Parse the next syntax string from the line at ptr, and add it to the + * global list of color syntaxes. */ +void parse_magictype(char *ptr) +{ +#ifdef HAVE_LIBMAGIC + const char *fileregptr = NULL; + exttype *endext = NULL; + + assert(ptr != NULL); + + if (syntaxes == NULL) { + rcfile_error( + N_("Cannot add a magic string regex without a syntax command")); + return; + } + + if (*ptr == '\0') { + rcfile_error(N_("Missing magic string name")); + return; + } + + if (*ptr != '"') { + rcfile_error( + N_("Regex strings must begin and end with a \" character")); + return; + } + +#ifdef DEBUG + fprintf(stderr, "Starting a magic type: \"%s\"\n", ptr); +#endif + + /* Now load the extensions into their part of the struct. */ + while (*ptr != '\0') { + exttype *newext; + /* The new extension structure. */ + + while (*ptr != '"' && *ptr != '\0') + ptr++; + + if (*ptr == '\0') + return; + + ptr++; + + fileregptr = ptr; + ptr = parse_next_regex(ptr); + if (ptr == NULL) + break; + + newext = (exttype *)nmalloc(sizeof(exttype)); + + /* Save the regex if it's valid. */ + if (nregcomp(fileregptr, REG_NOSUB)) { + newext->ext_regex = mallocstrcpy(NULL, fileregptr); + newext->ext = NULL; + + if (endext == NULL) + endsyntax->magics = newext; + else + endext->next = newext; + endext = newext; + endext->next = NULL; + } else + free(newext); + } +#endif /* HAVE_LIBMAGIC */ } int check_bad_binding(sc *s) @@ -951,6 +1022,9 @@ void parse_rcfile(FILE *rcstream rcfile_error(N_("Syntax \"%s\" has no color commands"), endsyntax->desc); parse_syntax(ptr); + } + else if (strcasecmp(keyword, "magic") == 0) { + parse_magictype(ptr); } else if (strcasecmp(keyword, "header") == 0) parse_headers(ptr); else if (strcasecmp(keyword, "color") == 0)