2011-02-12 Chris Allegretta <chrisa@asty.org>

* Initial libmagic implementation, adapted from Eitan Adler <eitanadlerlist@gmail.com>.
          New nanorc entry "magic" to enable this functionality, nanorc file and man page updates.



git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@4528 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
This commit is contained in:
Chris Allegretta 2011-02-13 04:23:10 +00:00
parent 637daa85c5
commit b00d0b9c8c
19 changed files with 189 additions and 18 deletions

View File

@ -1,3 +1,7 @@
2011-02-12 Chris Allegretta <chrisa@asty.org>
* Initial libmagic implementation, adapted from Eitan Adler <eitanadlerlist@gmail.com>.
New nanorc entry "magic" to enable this functionality, nanorc file and man page updates.
2011-02-06 Chris Allegretta <chrisa@asty.org>
* src/*: Retire iso_me_harder_funcmap based on suggestion by <bernd.spaeth@gmx.net>
This does add 20KB to nano's executable size but it gets rid of a lot of indirection

View File

@ -50,7 +50,7 @@ AC_DEFINE_DIR([PKGDATADIR], [pkgdatadir], [Where data are placed to.])
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h)
AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h magic.h)
dnl Checks for options.
@ -583,6 +583,7 @@ AC_MSG_RESULT(yes),
AC_MSG_RESULT(no),
AC_MSG_WARN([*** Can't check for macro redefinability when cross-compiling]))
AC_CHECK_LIB(magic, magic_open)
# Check for groff html support
AC_MSG_CHECKING([for HTML support in groff])

View File

@ -239,6 +239,13 @@ the same as not having a syntax at all. The \fIdefault\fP syntax is
special: it takes no \fIfileregex\fP, and applies to files that don't
match any other syntax's \fIfileregex\fP.
.TP
.B magic ["\fIregex\fP" ... ]
For the currently defined syntax, add one or more regexes which
will be compared against the \fBmagic\fP database when attempting
to determine which highlighting rules to use for a given file. This
functionality only works when \fBlibmagic\fP is installed on the
system and will be silently ignored otherwise.
.TP
.B color \fIfgcolor\fP,\fIbgcolor\fP "\fIregex\fP" ...
For the currently defined syntax, display all expressions matching
the extended regular expression \fIregex\fP with foreground color

View File

@ -1,6 +1,7 @@
## Here is an example for assembler.
##
syntax "asm" "\.(S|s|asm)$"
magic "[Aa]ssembl(y|er)"
color red "\<[A-Z_]{2,}\>"
color brightgreen "\.(data|subsection|text)"
color green "\.(align|file|globl|global|hidden|section|size|type|weak)"

View File

@ -1,6 +1,7 @@
## Here is an example for awk.
##
syntax "awk" "\.awk$"
magic "awk.*script text"
## records
icolor brightred "\$[0-9A-Z_!@#$*?-]+"
## awk-set variables

View File

@ -1,6 +1,7 @@
## Here is an example for C/C++.
##
syntax "c" "\.(c(c|pp|xx)?|C)$" "\.(h(h|pp|xx)?|H)$" "\.ii?$"
magic "ASCII C(\+\+)? program text"
color brightred "\<[A-Z_][0-9A-Z_]+\>"
color green "\<(float|double|bool|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline)\>"
color green "\<((s?size)|((u_?)?int(8|16|32|64|ptr)))_t\>"

View File

@ -1,5 +1,6 @@
## Here is a short example for HTML.
##
syntax "html" "\.html$"
magic "HTML document text"
color blue start="<" end=">"
color red "&[^;[[:space:]]]*;"

View File

@ -1,6 +1,7 @@
## Here is an example for Java.
##
syntax "java" "\.java$"
magic "Java "
color green "\<(boolean|byte|char|double|float|int|long|new|short|this|transient|void)\>"
color red "\<(break|case|catch|continue|default|do|else|finally|for|if|return|switch|throw|try|while)\>"
color cyan "\<(abstract|class|extends|final|implements|import|instanceof|interface|native|package|private|protected|public|static|strictfp|super|synchronized|throws|volatile)\>"

View File

@ -1,6 +1,7 @@
## Here is an example for manpages.
##
syntax "man" "\.[1-9]x?$"
magic "troff or preprocessor input text"
color green "\.(S|T)H.*$"
color brightgreen "\.(S|T)H" "\.TP"
color brightred "\.(BR?|I[PR]?).*$"

View File

@ -5,7 +5,7 @@ syntax "nanorc" "\.?nanorc$"
icolor brightwhite "^[[:space:]]*((un)?set|include|syntax|i?color).*$"
## Keywords
icolor brightgreen "^[[:space:]]*(set|unset)[[:space:]]+(allow_insecure_backup|autoindent|backup|backupdir|backwards|boldtext|brackets|casesensitive|const|cut|fill|historylog|matchbrackets|morespace|mouse|multibuffer|noconvert|nofollow|nohelp|nonewlines|nowrap|operatingdir|preserve|punct)\>" "^[[:space:]]*(set|unset)[[:space:]]+(quickblank|quotestr|rebinddelete|rebindkeypad|regexp|smarthome|smooth|softwrap|speller|suspend|suspendenable|tabsize|tabstospaces|tempfile|undo|view|whitespace|wordbounds)\>"
icolor green "^[[:space:]]*(set|unset|include|syntax|header)\>"
icolor green "^[[:space:]]*(set|unset|include|syntax|header|magic)\>"
## Colors
icolor yellow "^[[:space:]]*i?color[[:space:]]*(bright)?(white|black|red|blue|green|yellow|magenta|cyan)?(,(white|black|red|blue|green|yellow|magenta|cyan))?\>"
icolor magenta "^[[:space:]]*i?color\>" "\<(start|end)="

View File

@ -1,6 +1,7 @@
## Here is an example for patch files.
##
syntax "patch" "\.(patch|diff)$"
magic "diff output text"
color brightgreen "^\+.*"
color green "^\+\+\+.*"
color brightblue "^ .*"

View File

@ -1,6 +1,7 @@
## Here is an example for Perl.
##
syntax "perl" "\.p[lm]$"
magic "perl.*script text"
header "^#!.*/perl[-0-9._]*"
color red "\<(accept|alarm|atan2|bin(d|mode)|c(aller|h(dir|mod|op|own|root)|lose(dir)?|onnect|os|rypt)|d(bm(close|open)|efined|elete|ie|o|ump)|e(ach|of|val|x(ec|ists|it|p))|f(cntl|ileno|lock|ork))\>" "\<(get(c|login|peername|pgrp|ppid|priority|pwnam|(host|net|proto|serv)byname|pwuid|grgid|(host|net)byaddr|protobynumber|servbyport)|([gs]et|end)(pw|gr|host|net|proto|serv)ent|getsock(name|opt)|gmtime|goto|grep|hex|index|int|ioctl|join)\>" "\<(keys|kill|last|length|link|listen|local(time)?|log|lstat|m|mkdir|msg(ctl|get|snd|rcv)|next|oct|open(dir)?|ord|pack|pipe|pop|printf?|push|q|qq|qx|rand|re(ad(dir|link)?|cv|do|name|quire|set|turn|verse|winddir)|rindex|rmdir|s|scalar|seek(dir)?)\>" "\<(se(lect|mctl|mget|mop|nd|tpgrp|tpriority|tsockopt)|shift|shm(ctl|get|read|write)|shutdown|sin|sleep|socket(pair)?|sort|spli(ce|t)|sprintf|sqrt|srand|stat|study|substr|symlink|sys(call|read|tem|write)|tell(dir)?|time|tr(y)?|truncate|umask)\>" "\<(un(def|link|pack|shift)|utime|values|vec|wait(pid)?|wantarray|warn|write)\>"
color magenta "\<(continue|else|elsif|do|for|foreach|if|unless|until|while|eq|ne|lt|gt|le|ge|cmp|x|my|sub|use|package|can|isa)\>"

View File

@ -1,6 +1,7 @@
## Here is an example for PHP
##
syntax "php" "\.php[2345s~]?$"
magic "PHP script text"
## php markings
color brightgreen "(<\?(php)?|\?>)"

View File

@ -1,6 +1,7 @@
## Here is an example for Bourne shell scripts.
##
syntax "sh" "\.sh$"
magic "(POSIX|Bourne.*) shell script text"
header "^#!.*/(ba|k|pdk)?sh[-0-9_]*"
icolor brightgreen "^[0-9A-Z_]+\(\)"
color green "\<(case|do|done|elif|else|esac|exit|fi|for|function|if|in|local|read|return|select|shift|then|time|until|while)\>"

View File

@ -1,6 +1,7 @@
## Here is an example for xml files.
##
syntax "xml" "\.([jrs]html?|sgml?|xml|xslt?)$"
magic "XML.*document text"
color green start="<" end=">"
color cyan "<[^> ]+"
color cyan ">"

View File

@ -25,6 +25,11 @@
#include <stdio.h>
#include <string.h>
#include <errno.h>
#ifdef HAVE_MAGIC_H
#include <magic.h>
#endif
#ifdef ENABLE_COLOR
@ -102,12 +107,32 @@ void color_init(void)
}
}
/* Cleanup a regex we previously compiled */
void nfreeregex(regex_t *r)
{
assert(r != NULL);
regfree(r);
free(r);
r = NULL;
}
/* Update the color information based on the current filename. */
void color_update(void)
{
syntaxtype *tmpsyntax;
syntaxtype *defsyntax = NULL;
colortype *tmpcolor, *defcolor = NULL;
exttype *e;
/* libmagic structures */
/* magicstring will be NULL if we fail to get magic result */
#ifdef HAVE_LIBMAGIC
const char *magicstring = NULL;
const char *magicerr = NULL;
magic_t m;
#endif /* HAVE_LIBMAGIC */
assert(openfile != NULL);
@ -133,13 +158,35 @@ void color_update(void)
}
}
#ifdef HAVE_LIBMAGIC
if (strcmp(openfile->filename,"")) {
m = magic_open(MAGIC_SYMLINK |
#ifdef DEBUG
MAGIC_DEBUG | MAGIC_CHECK |
#endif /* DEBUG */
MAGIC_ERROR);
if (m == NULL || magic_load(m, NULL) < 0)
fprintf(stderr, "something went wrong: %s [%s]\n", strerror(errno), openfile->filename);
else {
magicstring = magic_file(m,openfile->filename);
if (magicstring == NULL) {
magicerr = magic_error(m);
fprintf(stderr, "something went wrong: %s [%s]\n", magicerr, openfile->filename);
}
#ifdef DEBUG
fprintf(stderr, "magic string returned: %s\n", magicstring);
#endif /* DEBUG */
}
}
#endif /* HAVE_LIBMAGIC */
/* If we didn't specify a syntax override string, or if we did and
* there was no syntax by that name, get the syntax based on the
* file extension, and then look in the header. */
if (openfile->colorstrings == NULL) {
for (tmpsyntax = syntaxes; tmpsyntax != NULL;
tmpsyntax = tmpsyntax->next) {
exttype *e;
/* If this is the default syntax, it has no associated
* extensions, which we've checked for elsewhere. Skip over
@ -163,24 +210,52 @@ void color_update(void)
/* Set colorstrings if we matched the extension
* regex. */
if (regexec(e->ext, openfile->filename, 0, NULL,
0) == 0) {
if (regexec(e->ext, openfile->filename, 0, NULL, 0) == 0) {
openfile->syntax = tmpsyntax;
openfile->colorstrings = tmpsyntax->color;
}
if (openfile->colorstrings != NULL)
break;
}
/* Decompile e->ext_regex's specified regex if we aren't
* going to use it. */
if (not_compiled) {
regfree(e->ext);
free(e->ext);
e->ext = NULL;
if (not_compiled)
nfreeregex(e->ext);
}
}
/* Check magic if we don't yet have an answer */
#ifdef HAVE_LIBMAGIC
if (openfile->colorstrings == NULL) {
#ifdef DEBUG
fprintf(stderr, "No match using extension, trying libmagic...\n");
#endif /* DEBUG */
for (tmpsyntax = syntaxes; tmpsyntax != NULL;
tmpsyntax = tmpsyntax->next) {
for (e = tmpsyntax->magics; e != NULL; e = e->next) {
bool not_compiled = (e->ext == NULL);
if (not_compiled) {
e->ext = (regex_t *)nmalloc(sizeof(regex_t));
regcomp(e->ext, fixbounds(e->ext_regex), REG_EXTENDED);
}
#ifdef DEBUG
fprintf(stderr,"Matching regex \"%s\" against \"%s\"\n",e->ext_regex, magicstring);
#endif /* DEBUG */
if (magicstring && regexec(e->ext, magicstring, 0, NULL, 0) == 0) {
fprintf(stderr,"We matched!\n");
openfile->syntax = tmpsyntax;
openfile->colorstrings = tmpsyntax->color;
break;
}
if (not_compiled)
nfreeregex(e->ext);
}
}
}
#endif /* HAVE_LIBMAGIC */
/* If we haven't matched anything yet, try the headers */
if (openfile->colorstrings == NULL) {
@ -189,7 +264,6 @@ void color_update(void)
#endif
for (tmpsyntax = syntaxes; tmpsyntax != NULL;
tmpsyntax = tmpsyntax->next) {
exttype *e;
for (e = tmpsyntax->headers; e != NULL; e = e->next) {
bool not_compiled = (e->ext == NULL);
@ -217,11 +291,8 @@ void color_update(void)
/* Decompile e->ext_regex's specified regex if we aren't
* going to use it. */
if (not_compiled) {
regfree(e->ext);
free(e->ext);
e->ext = NULL;
}
if (not_compiled)
nfreeregex(e->ext);
}
}
}

View File

@ -232,6 +232,8 @@ typedef struct syntaxtype {
/* The list of extensions that this syntax applies to. */
exttype *headers;
/* Regexes to match on the 'header' (1st line) of the file */
exttype *magics;
/* Regexes to match libmagic results */
colortype *color;
/* The colors used in this syntax. */
int nmultis;

View File

@ -547,6 +547,7 @@ char *parse_argument(char *ptr);
char *parse_next_regex(char *ptr);
bool nregcomp(const char *regex, int eflags);
void parse_syntax(char *ptr);
void parse_magic_syntax(char *ptr);
void parse_include(char *ptr);
short color_to_short(const char *colorname, bool *bright);
void parse_colors(char *ptr, bool icase);

View File

@ -303,6 +303,7 @@ void parse_syntax(char *ptr)
endheader = NULL;
endsyntax->extensions = NULL;
endsyntax->headers = NULL;
endsyntax->magics = NULL;
endsyntax->next = NULL;
endsyntax->nmultis = 0;
@ -358,6 +359,76 @@ void parse_syntax(char *ptr)
} else
free(newext);
}
}
/* Parse the next syntax string from the line at ptr, and add it to the
* global list of color syntaxes. */
void parse_magictype(char *ptr)
{
#ifdef HAVE_LIBMAGIC
const char *fileregptr = NULL;
exttype *endext = NULL;
assert(ptr != NULL);
if (syntaxes == NULL) {
rcfile_error(
N_("Cannot add a magic string regex without a syntax command"));
return;
}
if (*ptr == '\0') {
rcfile_error(N_("Missing magic string name"));
return;
}
if (*ptr != '"') {
rcfile_error(
N_("Regex strings must begin and end with a \" character"));
return;
}
#ifdef DEBUG
fprintf(stderr, "Starting a magic type: \"%s\"\n", ptr);
#endif
/* Now load the extensions into their part of the struct. */
while (*ptr != '\0') {
exttype *newext;
/* The new extension structure. */
while (*ptr != '"' && *ptr != '\0')
ptr++;
if (*ptr == '\0')
return;
ptr++;
fileregptr = ptr;
ptr = parse_next_regex(ptr);
if (ptr == NULL)
break;
newext = (exttype *)nmalloc(sizeof(exttype));
/* Save the regex if it's valid. */
if (nregcomp(fileregptr, REG_NOSUB)) {
newext->ext_regex = mallocstrcpy(NULL, fileregptr);
newext->ext = NULL;
if (endext == NULL)
endsyntax->magics = newext;
else
endext->next = newext;
endext = newext;
endext->next = NULL;
} else
free(newext);
}
#endif /* HAVE_LIBMAGIC */
}
int check_bad_binding(sc *s)
@ -951,6 +1022,9 @@ void parse_rcfile(FILE *rcstream
rcfile_error(N_("Syntax \"%s\" has no color commands"),
endsyntax->desc);
parse_syntax(ptr);
}
else if (strcasecmp(keyword, "magic") == 0) {
parse_magictype(ptr);
} else if (strcasecmp(keyword, "header") == 0)
parse_headers(ptr);
else if (strcasecmp(keyword, "color") == 0)