+2011-02-12 Chris Allegretta <chrisa@asty.org>
+ * Initial libmagic implementation, adapted from Eitan Adler <eitanadlerlist@gmail.com>.
+ New nanorc entry "magic" to enable this functionality, nanorc file and man page updates.
2011-02-06 Chris Allegretta <chrisa@asty.org>
* src/*: Retire iso_me_harder_funcmap based on suggestion by <bernd.spaeth@gmx.net>
This does add 20KB to nano's executable size but it gets rid of a lot of indirection
dnl Checks for header files.
-AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h)
+AC_CHECK_HEADERS(getopt.h libintl.h limits.h regex.h sys/param.h wchar.h wctype.h stdarg.h magic.h)
dnl Checks for options.
AC_MSG_WARN([*** Can't check for macro redefinability when cross-compiling]))
+AC_CHECK_LIB(magic, magic_open)
# Check for groff html support
AC_MSG_CHECKING([for HTML support in groff])
special: it takes no \fIfileregex\fP, and applies to files that don't
match any other syntax's \fIfileregex\fP.
+.B magic ["\fIregex\fP" ... ]
+For the currently defined syntax, add one or more regexes which
+will be compared against the \fBmagic\fP database when attempting
+to determine which highlighting rules to use for a given file. This
+functionality only works when \fBlibmagic\fP is installed on the
+system and will be silently ignored otherwise.
.B color \fIfgcolor\fP,\fIbgcolor\fP "\fIregex\fP" ...
For the currently defined syntax, display all expressions matching
the extended regular expression \fIregex\fP with foreground color
## Here is an example for assembler.
syntax "asm" "\.(S|s|asm)$"
+magic "[Aa]ssembl(y|er)"
color red "\<[A-Z_]{2,}\>"
color brightgreen "\.(data|subsection|text)"
color green "\.(align|file|globl|global|hidden|section|size|type|weak)"
## Here is an example for awk.
syntax "awk" "\.awk$"
+magic "awk.*script text"
## records
icolor brightred "\$[0-9A-Z_!@#$*?-]+"
## awk-set variables
## Here is an example for C/C++.
syntax "c" "\.(c(c|pp|xx)?|C)$" "\.(h(h|pp|xx)?|H)$" "\.ii?$"
+magic "ASCII C(\+\+)? program text"
color brightred "\<[A-Z_][0-9A-Z_]+\>"
color green "\<(float|double|bool|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline)\>"
color green "\<((s?size)|((u_?)?int(8|16|32|64|ptr)))_t\>"
## Here is a short example for HTML.
syntax "html" "\.html$"
+magic "HTML document text"
color blue start="<" end=">"
color red "&[^;[[:space:]]]*;"
## Here is an example for Java.
syntax "java" "\.java$"
+magic "Java "
color green "\<(boolean|byte|char|double|float|int|long|new|short|this|transient|void)\>"
color red "\<(break|case|catch|continue|default|do|else|finally|for|if|return|switch|throw|try|while)\>"
color cyan "\<(abstract|class|extends|final|implements|import|instanceof|interface|native|package|private|protected|public|static|strictfp|super|synchronized|throws|volatile)\>"
## Here is an example for manpages.
syntax "man" "\.[1-9]x?$"
+magic "troff or preprocessor input text"
color green "\.(S|T)H.*$"
color brightgreen "\.(S|T)H" "\.TP"
color brightred "\.(BR?|I[PR]?).*$"
icolor brightwhite "^[[:space:]]*((un)?set|include|syntax|i?color).*$"
## Keywords
icolor brightgreen "^[[:space:]]*(set|unset)[[:space:]]+(allow_insecure_backup|autoindent|backup|backupdir|backwards|boldtext|brackets|casesensitive|const|cut|fill|historylog|matchbrackets|morespace|mouse|multibuffer|noconvert|nofollow|nohelp|nonewlines|nowrap|operatingdir|preserve|punct)\>" "^[[:space:]]*(set|unset)[[:space:]]+(quickblank|quotestr|rebinddelete|rebindkeypad|regexp|smarthome|smooth|softwrap|speller|suspend|suspendenable|tabsize|tabstospaces|tempfile|undo|view|whitespace|wordbounds)\>"
-icolor green "^[[:space:]]*(set|unset|include|syntax|header)\>"
+icolor green "^[[:space:]]*(set|unset|include|syntax|header|magic)\>"
## Colors
icolor yellow "^[[:space:]]*i?color[[:space:]]*(bright)?(white|black|red|blue|green|yellow|magenta|cyan)?(,(white|black|red|blue|green|yellow|magenta|cyan))?\>"
icolor magenta "^[[:space:]]*i?color\>" "\<(start|end)="
## Here is an example for patch files.
syntax "patch" "\.(patch|diff)$"
+magic "diff output text"
color brightgreen "^\+.*"
color green "^\+\+\+.*"
color brightblue "^ .*"
## Here is an example for Perl.
syntax "perl" "\.p[lm]$"
+magic "perl.*script text"
header "^#!.*/perl[-0-9._]*"
color red "\<(accept|alarm|atan2|bin(d|mode)|c(aller|h(dir|mod|op|own|root)|lose(dir)?|onnect|os|rypt)|d(bm(close|open)|efined|elete|ie|o|ump)|e(ach|of|val|x(ec|ists|it|p))|f(cntl|ileno|lock|ork))\>" "\<(get(c|login|peername|pgrp|ppid|priority|pwnam|(host|net|proto|serv)byname|pwuid|grgid|(host|net)byaddr|protobynumber|servbyport)|([gs]et|end)(pw|gr|host|net|proto|serv)ent|getsock(name|opt)|gmtime|goto|grep|hex|index|int|ioctl|join)\>" "\<(keys|kill|last|length|link|listen|local(time)?|log|lstat|m|mkdir|msg(ctl|get|snd|rcv)|next|oct|open(dir)?|ord|pack|pipe|pop|printf?|push|q|qq|qx|rand|re(ad(dir|link)?|cv|do|name|quire|set|turn|verse|winddir)|rindex|rmdir|s|scalar|seek(dir)?)\>" "\<(se(lect|mctl|mget|mop|nd|tpgrp|tpriority|tsockopt)|shift|shm(ctl|get|read|write)|shutdown|sin|sleep|socket(pair)?|sort|spli(ce|t)|sprintf|sqrt|srand|stat|study|substr|symlink|sys(call|read|tem|write)|tell(dir)?|time|tr(y)?|truncate|umask)\>" "\<(un(def|link|pack|shift)|utime|values|vec|wait(pid)?|wantarray|warn|write)\>"
color magenta "\<(continue|else|elsif|do|for|foreach|if|unless|until|while|eq|ne|lt|gt|le|ge|cmp|x|my|sub|use|package|can|isa)\>"
## Here is an example for PHP
syntax "php" "\.php[2345s~]?$"
+magic "PHP script text"
## php markings
color brightgreen "(<\?(php)?|\?>)"
## Here is an example for Bourne shell scripts.
syntax "sh" "\.sh$"
+magic "(POSIX|Bourne.*) shell script text"
header "^#!.*/(ba|k|pdk)?sh[-0-9_]*"
icolor brightgreen "^[0-9A-Z_]+\(\)"
color green "\<(case|do|done|elif|else|esac|exit|fi|for|function|if|in|local|read|return|select|shift|then|time|until|while)\>"
## Here is an example for xml files.
syntax "xml" "\.([jrs]html?|sgml?|xml|xslt?)$"
+magic "XML.*document text"
color green start="<" end=">"
color cyan "<[^> ]+"
color cyan ">"
#include <stdio.h>
#include <string.h>
+#include <errno.h>
+#ifdef HAVE_MAGIC_H
+#include <magic.h>
+/* Cleanup a regex we previously compiled */
+void nfreeregex(regex_t *r)
+ assert(r != NULL);
+ regfree(r);
+ free(r);
+ r = NULL;
/* Update the color information based on the current filename. */
void color_update(void)
syntaxtype *tmpsyntax;
syntaxtype *defsyntax = NULL;
colortype *tmpcolor, *defcolor = NULL;
+ exttype *e;
+/* libmagic structures */
+/* magicstring will be NULL if we fail to get magic result */
+ const char *magicstring = NULL;
+ const char *magicerr = NULL;
+ magic_t m;
+#endif /* HAVE_LIBMAGIC */
assert(openfile != NULL);
+ if (strcmp(openfile->filename,"")) {
+ m = magic_open(MAGIC_SYMLINK |
+#ifdef DEBUG
+#endif /* DEBUG */
+ if (m == NULL || magic_load(m, NULL) < 0)
+ fprintf(stderr, "something went wrong: %s [%s]\n", strerror(errno), openfile->filename);
+ else {
+ magicstring = magic_file(m,openfile->filename);
+ if (magicstring == NULL) {
+ magicerr = magic_error(m);
+ fprintf(stderr, "something went wrong: %s [%s]\n", magicerr, openfile->filename);
+ }
+#ifdef DEBUG
+ fprintf(stderr, "magic string returned: %s\n", magicstring);
+#endif /* DEBUG */
+ }
+ }
+#endif /* HAVE_LIBMAGIC */
/* If we didn't specify a syntax override string, or if we did and
* there was no syntax by that name, get the syntax based on the
* file extension, and then look in the header. */
if (openfile->colorstrings == NULL) {
for (tmpsyntax = syntaxes; tmpsyntax != NULL;
tmpsyntax = tmpsyntax->next) {
- exttype *e;
/* If this is the default syntax, it has no associated
* extensions, which we've checked for elsewhere. Skip over
/* Set colorstrings if we matched the extension
* regex. */
- if (regexec(e->ext, openfile->filename, 0, NULL,
- 0) == 0) {
+ if (regexec(e->ext, openfile->filename, 0, NULL, 0) == 0) {
openfile->syntax = tmpsyntax;
openfile->colorstrings = tmpsyntax->color;
- }
- if (openfile->colorstrings != NULL)
+ }
/* Decompile e->ext_regex's specified regex if we aren't
* going to use it. */
- if (not_compiled) {
- regfree(e->ext);
- free(e->ext);
- e->ext = NULL;
+ if (not_compiled)
+ nfreeregex(e->ext);
+ }
+ }
+ /* Check magic if we don't yet have an answer */
+ if (openfile->colorstrings == NULL) {
+#ifdef DEBUG
+ fprintf(stderr, "No match using extension, trying libmagic...\n");
+#endif /* DEBUG */
+ for (tmpsyntax = syntaxes; tmpsyntax != NULL;
+ tmpsyntax = tmpsyntax->next) {
+ for (e = tmpsyntax->magics; e != NULL; e = e->next) {
+ bool not_compiled = (e->ext == NULL);
+ if (not_compiled) {
+ e->ext = (regex_t *)nmalloc(sizeof(regex_t));
+ regcomp(e->ext, fixbounds(e->ext_regex), REG_EXTENDED);
+ }
+#ifdef DEBUG
+ fprintf(stderr,"Matching regex \"%s\" against \"%s\"\n",e->ext_regex, magicstring);
+#endif /* DEBUG */
+ if (magicstring && regexec(e->ext, magicstring, 0, NULL, 0) == 0) {
+ fprintf(stderr,"We matched!\n");
+ openfile->syntax = tmpsyntax;
+ openfile->colorstrings = tmpsyntax->color;
+ break;
+ }
+ if (not_compiled)
+ nfreeregex(e->ext);
+#endif /* HAVE_LIBMAGIC */
/* If we haven't matched anything yet, try the headers */
if (openfile->colorstrings == NULL) {
for (tmpsyntax = syntaxes; tmpsyntax != NULL;
tmpsyntax = tmpsyntax->next) {
- exttype *e;
for (e = tmpsyntax->headers; e != NULL; e = e->next) {
bool not_compiled = (e->ext == NULL);
/* Decompile e->ext_regex's specified regex if we aren't
* going to use it. */
- if (not_compiled) {
- regfree(e->ext);
- free(e->ext);
- e->ext = NULL;
- }
+ if (not_compiled)
+ nfreeregex(e->ext);
/* The list of extensions that this syntax applies to. */
exttype *headers;
/* Regexes to match on the 'header' (1st line) of the file */
+ exttype *magics;
+ /* Regexes to match libmagic results */
colortype *color;
/* The colors used in this syntax. */
int nmultis;
char *parse_next_regex(char *ptr);
bool nregcomp(const char *regex, int eflags);
void parse_syntax(char *ptr);
+void parse_magic_syntax(char *ptr);
void parse_include(char *ptr);
short color_to_short(const char *colorname, bool *bright);
void parse_colors(char *ptr, bool icase);
endheader = NULL;
endsyntax->extensions = NULL;
endsyntax->headers = NULL;
+ endsyntax->magics = NULL;
endsyntax->next = NULL;
endsyntax->nmultis = 0;
} else
+/* Parse the next syntax string from the line at ptr, and add it to the
+ * global list of color syntaxes. */
+void parse_magictype(char *ptr)
+ const char *fileregptr = NULL;
+ exttype *endext = NULL;
+ assert(ptr != NULL);
+ if (syntaxes == NULL) {
+ rcfile_error(
+ N_("Cannot add a magic string regex without a syntax command"));
+ return;
+ }
+ if (*ptr == '\0') {
+ rcfile_error(N_("Missing magic string name"));
+ return;
+ }
+ if (*ptr != '"') {
+ rcfile_error(
+ N_("Regex strings must begin and end with a \" character"));
+ return;
+ }
+#ifdef DEBUG
+ fprintf(stderr, "Starting a magic type: \"%s\"\n", ptr);
+ /* Now load the extensions into their part of the struct. */
+ while (*ptr != '\0') {
+ exttype *newext;
+ /* The new extension structure. */
+ while (*ptr != '"' && *ptr != '\0')
+ ptr++;
+ if (*ptr == '\0')
+ return;
+ ptr++;
+ fileregptr = ptr;
+ ptr = parse_next_regex(ptr);
+ if (ptr == NULL)
+ break;
+ newext = (exttype *)nmalloc(sizeof(exttype));
+ /* Save the regex if it's valid. */
+ if (nregcomp(fileregptr, REG_NOSUB)) {
+ newext->ext_regex = mallocstrcpy(NULL, fileregptr);
+ newext->ext = NULL;
+ if (endext == NULL)
+ endsyntax->magics = newext;
+ else
+ endext->next = newext;
+ endext = newext;
+ endext->next = NULL;
+ } else
+ free(newext);
+ }
+#endif /* HAVE_LIBMAGIC */
int check_bad_binding(sc *s)
rcfile_error(N_("Syntax \"%s\" has no color commands"),
+ }
+ else if (strcasecmp(keyword, "magic") == 0) {
+ parse_magictype(ptr);
} else if (strcasecmp(keyword, "header") == 0)
else if (strcasecmp(keyword, "color") == 0)