NetBSD/usr.bin/mail/mime_codecs.c

748 lines
18 KiB
C

/* $NetBSD: mime_codecs.c,v 1.12 2019/10/24 18:18:00 kamil Exp $ */
/*-
* Copyright (c) 2006 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Anon Ymous.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This module contains all mime related codecs. Typically there are
* two versions: one operating on buffers and one operating on files.
* All exported routines have a "mime_" prefix. The file oriented
* routines have a "mime_f" prefix replacing the "mime_" prefix of the
* equivalent buffer based version.
*
* The file based API should be:
*
* mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
*
* XXX - currently this naming convention has not been adheared to.
*
* where the cookie is a generic way to pass arguments to the routine.
* This way these routines can be run by run_function() in mime.c.
*
* The buffer based API is not as rigid.
*/
#ifdef MIME_SUPPORT
#include <sys/cdefs.h>
#ifndef __lint__
__RCSID("$NetBSD: mime_codecs.c,v 1.12 2019/10/24 18:18:00 kamil Exp $");
#endif /* not __lint__ */
#include <assert.h>
#include <iconv.h>
#include <stdio.h>
#include <stdlib.h>
#include <util.h>
#include "def.h"
#include "extern.h"
#include "mime_codecs.h"
#ifdef CHARSET_SUPPORT
/************************************************************************
* Core character set conversion routines.
*
*/
/*
* Fault-tolerant iconv() function.
*
* This routine was borrowed from nail-11.25/mime.c and modified. It
* tries to handle errno == EILSEQ by restarting at the next input
* byte (is this a good idea?). All other errors are handled by the
* caller.
*/
PUBLIC size_t
mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
{
size_t sz = 0;
while ((sz = iconv(cd, __UNCONST(inb), inbleft, outb, outbleft))
== (size_t)-1
&& errno == EILSEQ) {
if (*outbleft > 0) {
*(*outb)++ = '?';
(*outbleft)--;
} else {
**outb = '\0';
return E2BIG;
}
if (*inbleft > 0) {
(*inb)++;
(*inbleft)--;
} else {
**outb = '\0';
break;
}
}
return sz;
}
/*
* This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
* We don't care about the invalid character count, so don't bother
* with __iconv(). We do care about robustness, so call iconv_ft()
* above to try to recover from errors.
*/
#define INBUFSIZE 1024
#define OUTBUFSIZE (INBUFSIZE * 2)
PUBLIC void
mime_ficonv(FILE *fi, FILE *fo, void *cookie)
{
char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
const char *in;
size_t inbytes, outbytes, ret;
iconv_t cd;
/*
* NOTE: iconv_t is actually a pointer typedef, so this
* conversion is not what it appears to be!
*/
cd = (iconv_t)cookie;
while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
in = inbuf;
while (inbytes > 0) {
out = outbuf;
outbytes = OUTBUFSIZE;
ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
if (ret == (size_t)-1 && errno != E2BIG) {
if (errno != EINVAL || in == inbuf) {
/* XXX - what is proper here?
* Just copy out the remains? */
(void)fprintf(fo,
"\n\t[ iconv truncated message: %s ]\n\n",
strerror(errno));
return;
}
/*
* If here: errno == EINVAL && in != inbuf
*/
/* incomplete input character */
(void)memmove(inbuf, in, inbytes);
ret = fread(inbuf + inbytes, 1,
INBUFSIZE - inbytes, fi);
if (ret == 0) {
if (feof(fi)) {
(void)fprintf(fo,
"\n\t[ unexpected end of file; "
"the last character is "
"incomplete. ]\n\n");
return;
}
(void)fprintf(fo,
"\n\t[ fread(): %s ]\n\n",
strerror(errno));
return;
}
in = inbuf;
inbytes += ret;
}
if (outbytes < OUTBUFSIZE)
(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
}
}
/* reset the shift state of the output buffer */
outbytes = OUTBUFSIZE;
out = outbuf;
ret = iconv(cd, NULL, NULL, &out, &outbytes);
if (ret == (size_t)-1) {
(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
strerror(errno));
return;
}
if (outbytes < OUTBUFSIZE)
(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
}
#endif /* CHARSET_SUPPORT */
/************************************************************************
* Core base64 routines
*
* Defined in sec 6.8 of RFC 2045.
*/
/*
* Decode a base64 buffer.
*
* bin: buffer to hold the decoded (binary) result (see note 1).
* b64: buffer holding the encoded (base64) source.
* cnt: number of bytes in the b64 buffer to decode (see note 2).
*
* Return: the number of bytes written to the 'bin' buffer or -1 on
* error.
* NOTES:
* 1) It is the callers responsibility to ensure that bin is large
* enough to hold the result.
* 2) The b64 buffer should always contain a multiple of 4 bytes of
* data!
*/
PUBLIC ssize_t
mime_b64tobin(char *bin, const char *b64, size_t cnt)
{
static const signed char b64index[] = {
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
};
unsigned char *p;
const unsigned char *q, *end;
#define EQU (unsigned)-2
#define BAD (unsigned)-1
#define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
p = (unsigned char *)bin;
q = (const unsigned char *)b64;
for (end = q + cnt; q < end; q += 4) {
unsigned a = uchar64(q[0]);
unsigned b = uchar64(q[1]);
unsigned c = uchar64(q[2]);
unsigned d = uchar64(q[3]);
if (a == BAD || a == EQU || b == BAD || b == EQU ||
c == BAD || d == BAD)
return -1;
*p++ = ((a << 2) | ((b & 0x30) >> 4));
if (c == EQU) { /* got '=' */
if (d != EQU)
return -1;
break;
}
*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
if (d == EQU) { /* got '=' */
break;
}
*p++ = (((c & 0x03) << 6) | d);
}
#undef uchar64
#undef EQU
#undef BAD
return p - (unsigned char*)bin;
}
/*
* Encode a buffer as a base64 result.
*
* b64: buffer to hold the encoded (base64) result (see note).
* bin: buffer holding the binary source.
* cnt: number of bytes in the bin buffer to encode.
*
* NOTE: it is the callers responsibility to ensure that 'b64' is
* large enough to hold the result.
*/
PUBLIC void
mime_bintob64(char *b64, const char *bin, size_t cnt)
{
static const char b64table[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const unsigned char *p = (const unsigned char*)bin;
ssize_t i;
for (i = cnt; i > 0; i -= 3) {
unsigned a = p[0];
unsigned b = p[1];
unsigned c = p[2];
b64[0] = b64table[a >> 2];
switch(i) {
case 1:
b64[1] = b64table[((a & 0x3) << 4)];
b64[2] = '=';
b64[3] = '=';
break;
case 2:
b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
b64[2] = b64table[((b & 0xf) << 2)];
b64[3] = '=';
break;
default:
b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
b64[3] = b64table[c & 0x3f];
break;
}
p += 3;
b64 += 4;
}
}
#define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
static void
mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
{
static char b64[MIME_BASE64_LINE_MAX];
static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
size_t cnt;
char *cp;
size_t limit;
#ifdef __lint__
cookie = cookie;
#endif
limit = 0;
if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
limit = (size_t)atoi(cp);
if (limit == 0 || limit > sizeof(b64))
limit = sizeof(b64);
limit = 3 * roundup(limit, 4) / 4;
if (limit < 3)
limit = 3;
while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
mime_bintob64(b64, mem, (size_t)cnt);
(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
(void)putc('\n', fo);
}
}
static void
mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
{
char *line;
size_t len;
char *buf;
size_t buflen;
buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
buf = emalloc(buflen);
while ((line = fgetln(fi, &len)) != NULL) {
ssize_t binlen;
if (line[len-1] == '\n') /* forget the trailing newline */
len--;
/* trash trailing white space */
for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
continue;
/* skip leading white space */
for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
continue;
if (len == 0)
break;
if (3 * len > 4 * buflen) {
buflen *= 2;
buf = erealloc(buf, buflen);
}
binlen = mime_b64tobin(buf, line, len);
if (binlen <= 0) {
(void)fprintf(fo, "WARN: invalid base64 encoding\n");
break;
}
(void)fwrite(buf, 1, (size_t)binlen, fo);
}
free(buf);
if (add_lf)
(void)fputc('\n', fo);
}
/************************************************************************
* Core quoted-printable routines.
*
* Defined in sec 6.7 of RFC 2045.
*/
/*
* strtol(3), but inline and with easy error indication.
*/
static inline int
_qp_cfromhex(char const *hex)
{
/* Be robust, allow lowercase hexadecimal letters, too */
static unsigned char const atoi16[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
};
unsigned char i1, i2;
int r;
if ((i1 = (unsigned char)hex[0] - '0') >= __arraycount(atoi16) ||
(i2 = (unsigned char)hex[1] - '0') >= __arraycount(atoi16))
goto jerr;
i1 = atoi16[i1];
i2 = atoi16[i2];
if ((i1 | i2) & 0xF0)
goto jerr;
r = i1;
r <<= 4;
r += i2;
jleave:
return r;
jerr:
r = -1;
goto jleave;
}
/*
* Header specific "quoted-printable" decode!
* Differences with body QP decoding (see rfc 2047, sec 4.2):
* 1) '=' occurs _only_ when followed by two hex digits (FWS is not allowed).
* 2) Spaces can be encoded as '_' in headers for readability.
*/
static ssize_t
mime_QPh_decode(char *outbuf, size_t outlen, const char *inbuf, size_t inlen)
{
const char *p, *inend;
char *outend;
char *q;
outend = outbuf + outlen;
inend = inbuf + inlen;
q = outbuf;
for (p = inbuf; p < inend; p++) {
if (q >= outend)
return -1;
if (*p == '=') {
p++;
if (p + 1 < inend) {
int c = _qp_cfromhex(p++);
if (c < 0)
return -1;
*q++ = (char)c;
}
else
return -1;
}
else if (*p == '_') /* header's may encode ' ' as '_' */
*q++ = ' ';
else
*q++ = *p;
}
return q - outbuf;
}
static int
mustquote(unsigned char *p, unsigned char *end, size_t l)
{
#define N 0 /* do not quote */
#define Q 1 /* must quote */
#define SP 2 /* white space */
#define XF 3 /* special character 'F' - maybe quoted */
#define XD 4 /* special character '.' - maybe quoted */
#define EQ Q /* '=' must be quoted */
#define TB SP /* treat '\t' as a space */
#define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
#define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
static const signed char quotetab[] = {
Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q,
Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N,
N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
};
int flag = *p > 0x7f ? Q : quotetab[*p];
if (flag == N)
return 0;
if (flag == Q)
return 1;
if (flag == SP)
return p + 1 < end && p[1] == '\n'; /* trailing white space */
/* The remainder are special start-of-line cases. */
if (l != 0)
return 0;
if (flag == XF) /* line may start with "From" */
return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
if (flag == XD) /* line may consist of a single dot */
return p + 1 < end && p[1] == '\n';
errx(EXIT_FAILURE,
"mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
*p, *p, flag, l);
/* NOT REACHED */
return 0; /* appease GCC */
#undef N
#undef Q
#undef SP
#undef XX
#undef EQ
#undef TB
#undef NL
#undef CR
}
#define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
static void
fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
{
size_t l; /* length of current output line */
unsigned char *beg;
unsigned char *end;
unsigned char *p;
assert(limit <= MIME_QUOTED_LINE_MAX);
beg = (unsigned char*)line;
end = beg + len;
l = 0;
for (p = (unsigned char*)line; p < end; p++) {
if (mustquote(p, end, l)) {
if (l + 4 > limit) {
(void)fputs("=\n", fo);
l = 0;
}
(void)fprintf(fo, "=%02X", *p);
l += 3;
}
else {
if (*p == '\n') {
if (p > beg && p[-1] == '\r') {
if (l + 4 > limit)
(void)fputs("=\n", fo);
(void)fputs("=0A=", fo);
}
l = (size_t)-1;
}
else if (l + 2 > limit) {
(void)fputs("=\n", fo);
l = 0;
}
(void)putc(*p, fo);
l++;
}
}
/*
* Lines ending in a blank must escape the newline.
*/
if (len && is_WSP(p[-1]))
(void)fputs("=\n", fo);
}
static void
mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
{
char *line;
size_t len;
char *cp;
size_t limit;
#ifdef __lint__
cookie = cookie;
#endif
limit = 0;
if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
limit = (size_t)atoi(cp);
if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
limit = MIME_QUOTED_LINE_MAX;
if (limit < 4)
limit = 4;
while ((line = fgetln(fi, &len)) != NULL)
fput_quoted_line(fo, line, len, limit);
}
static void
mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
{
char *line;
size_t len;
#ifdef __lint__
cookie = cookie;
#endif
while ((line = fgetln(fi, &len)) != NULL) {
char *p;
char *end;
end = line + len;
for (p = line; p < end; p++) {
if (*p == '=') {
p++;
while (p < end && is_WSP(*p))
p++;
if (*p != '\n' && p + 1 < end) {
int c = _qp_cfromhex(p++);
if (c >= 0)
(void)fputc(c, fo);
else
(void)fputs("[?]", fo);
}
}
else
(void)fputc(*p, fo);
}
}
}
/************************************************************************
* Routines to select the codec by name.
*/
PUBLIC void
mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
{
int c;
#ifdef __lint__
cookie = cookie;
#endif
while ((c = getc(fi)) != EOF)
(void)putc(c, fo);
(void)fflush(fo);
if (ferror(fi)) {
warn("read");
rewind(fi);
return;
}
if (ferror(fo)) {
warn("write");
(void)Fclose(fo);
rewind(fi);
return;
}
}
static const struct transfer_encoding_s {
const char *name;
mime_codec_t enc;
mime_codec_t dec;
} transfer_encoding_tbl[] = {
{ MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy },
{ MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy },
{ MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy },
{ MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode },
{ MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode },
{ NULL, NULL, NULL },
};
PUBLIC mime_codec_t
mime_fio_encoder(const char *ename)
{
const struct transfer_encoding_s *tep = NULL;
if (ename == NULL)
return NULL;
for (tep = transfer_encoding_tbl; tep->name; tep++)
if (strcasecmp(tep->name, ename) == 0)
break;
return tep->enc;
}
PUBLIC mime_codec_t
mime_fio_decoder(const char *ename)
{
const struct transfer_encoding_s *tep = NULL;
if (ename == NULL)
return NULL;
for (tep = transfer_encoding_tbl; tep->name; tep++)
if (strcasecmp(tep->name, ename) == 0)
break;
return tep->dec;
}
/*
* Decode a RFC 2047 extended message header *encoded-word*.
* *encoding* is the corresponding character of the *encoded-word*.
*/
PUBLIC ssize_t
mime_rfc2047_decode(char encoding, char *outbuf, size_t outlen,
const char *inbuf, size_t inlen)
{
ssize_t declen = -1;
if (encoding == 'B' || encoding == 'b') {
if (outlen >= 3 * roundup(inlen, 4) / 4)
declen = mime_b64tobin(outbuf, inbuf, inlen);
} else if (encoding == 'Q' || encoding == 'q')
declen = mime_QPh_decode(outbuf, outlen, inbuf, inlen);
return declen;
}
/*
* This is for use in complete.c and mime.c to get the list of
* encoding names without exposing the transfer_encoding_tbl[]. The
* first name is returned if called with a pointer to a NULL pointer.
* Subsequent calls with the same cookie give successive names. A
* NULL return indicates the end of the list.
*/
PUBLIC const char *
mime_next_encoding_name(const void **cookie)
{
const struct transfer_encoding_s *tep;
tep = *cookie;
if (tep == NULL)
tep = transfer_encoding_tbl;
*cookie = tep->name ? &tep[1] : NULL;
return tep->name;
}
#endif /* MIME_SUPPORT */