#include "a.h"
int
latin1toutf(char **out, char *in, char *e)
{
int n;
char *p;
Rune r;
n = 0;
for(p = in; p < e; p++)
if(*p & 0x80)
n++;
if(n == 0)
return 0;
n += e-in;
*out = p = malloc(n+1);
if(p == nil)
return 0;
for(; in < e; in++){
r = (uchar)*in;
p += runetochar(p, &r);
}
*p = 0;
return p - *out;
}
int
hex2int(int x)
{
if(x >= '0' && x <= '9')
return x - '0';
if(x >= 'A' && x <= 'F')
return (x - 'A') + 10;
if(x >= 'a' && x <= 'f')
return (x - 'a') + 10;
return 0;
}
// translate any thing using the tcs program
int
xtoutf(char *charset, char **out, char *in, char *e)
{
char *av[4];
int totcs[2];
int fromtcs[2];
int n, len, sofar;
char *p;
// might not need to convert
if(cistrcmp(charset, "us-ascii") == 0 || cistrcmp(charset, "utf-8") == 0)
return 0;
if(cistrcmp(charset, "iso-8859-1") == 0)
return latin1toutf(out, in, e);
len = e-in+1;
sofar = 0;
*out = p = malloc(len+1);
if(p == nil)
return 0;
av[0] = charset;
av[1] = "-f";
av[2] = charset;
av[3] = 0;
if(pipe(totcs) < 0)
goto error;
if(pipe(fromtcs) < 0){
close(totcs[0]); close(totcs[1]);
goto error;
}
switch(rfork(RFPROC|RFFDG|RFNOWAIT)){
case -1:
close(fromtcs[0]); close(fromtcs[1]);
close(totcs[0]); close(totcs[1]);
goto error;
case 0:
close(fromtcs[0]); close(totcs[1]);
dup(fromtcs[1], 1);
dup(totcs[0], 0);
close(fromtcs[1]); close(totcs[0]);
dup(open("/dev/null", OWRITE), 2);
exec("/bin/tcs", av);
_exits(0);
default:
close(fromtcs[1]); close(totcs[0]);
switch(rfork(RFPROC|RFFDG|RFNOWAIT)){
case -1:
close(fromtcs[0]); close(totcs[1]);
goto error;
case 0:
close(fromtcs[0]);
while(in < e){
n = write(totcs[1], in, e-in);
if(n <= 0)
break;
in += n;
}
close(totcs[1]);
_exits(0);
default:
close(totcs[1]);
for(;;){
n = read(fromtcs[0], &p[sofar], len-sofar);
if(n <= 0)
break;
sofar += n;
p[sofar] = 0;
if(sofar == len){
len += 1024;
p = realloc(p, len+1);
if(p == nil)
goto error;
*out = p;
}
}
close(fromtcs[0]);
break;
}
break;
}
if(sofar == 0)
goto error;
return sofar;
error:
free(*out);
*out = nil;
return 0;
}
// underscores are translated in 2047 headers (uscores=1)
// but not in the body (uscores=0)
char*
decquotedline(char *out, char *in, char *e, int uscores)
{
int c, soft;
/* dump trailing white space */
while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
e--;
/* trailing '=' means no newline */
if(*e == '='){
soft = 1;
e--;
} else
soft = 0;
while(in <= e){
c = (*in++) & 0xff;
switch(c){
case '_':
if(uscores){
*out++ = ' ';
break;
}
default:
*out++ = c;
break;
case '=':
c = hex2int(*in++)<<4;
c |= hex2int(*in++);
*out++ = c;
break;
}
}
if(!soft)
*out++ = '\n';
*out = 0;
return out;
}
int
decquoted(char *out, char *in, char *e, int uscores)
{
char *p, *nl;
p = out;
while((nl = strchr(in, '\n')) != nil && nl < e){
p = decquotedline(p, in, nl, uscores);
in = nl + 1;
}
if(in < e)
p = decquotedline(p, in, e-1, uscores);
// make sure we end with a new line
if(*(p-1) != '\n'){
*p++ = '\n';
*p = 0;
}
return p - out;
}
int
rfc2047convert(String *s, char *token, int len)
{
char charset[100], decoded[1024], *e, *x;
int l;
if(len == 0)
return -1;
e = token+len-2;
token += 2;
x = memchr(token, '?', e-token);
if(x == nil || (l=x-token) >= sizeof charset)
return -1;
memmove(charset, token, l);
charset[l] = 0;
token = x+1;
// bail if it doesn't fit
if(e-token > sizeof(decoded)-1)
return -1;
// bail if we don't understand the encoding
if(cistrncmp(token, "b?", 2) == 0){
token += 2;
len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
decoded[len] = 0;
} else if(cistrncmp(token, "q?", 2) == 0){
token += 2;
len = decquoted(decoded, token, e, 1);
if(len > 0 && decoded[len-1] == '\n')
len--;
decoded[len] = 0;
} else
return -1;
if(xtoutf(charset, &x, decoded, decoded+len) <= 0)
s_append(s, decoded);
else {
s_append(s, x);
free(x);
}
return 0;
}
char*
rfc2047start(char *start, char *end)
{
int quests;
if(*--end != '=')
return nil;
if(*--end != '?')
return nil;
quests = 0;
for(end--; end >= start; end--){
switch(*end){
case '=':
if(quests == 3 && *(end+1) == '?')
return end;
break;
case '?':
++quests;
break;
case ' ':
case '\t':
case '\n':
case '\r':
/* can't have white space in a token */
return nil;
}
}
return nil;
}
// convert a header line
char*
stringconvert(char *buff)
{
String *s;
int len;
char *resp, *token, *p, *e;
if ( buff == nil )
return nil;
len=strlen(buff);
s = s_new();
p = buff;
for(e = p+len; p < e; ){
while(*p++ == '=' && (token = rfc2047start(buff, p))){
s_nappend(s, buff, token-buff);
if(rfc2047convert(s, token, p - token) < 0)
s_nappend(s, token, p - token);
buff = p;
for(; p<e && isspace(*p);)
p++;
if(p+2 < e && p[0] == '=' && p[1] == '?')
buff = p; // paste
}
}
if(p > buff)
s_nappend(s, buff, p-buff);
resp=estrdup(s_to_c(s));
s_free(s);
return resp;
}
/* from 2822 format to "natural" format */
char*
addrrfc(char *p)
{
String *s, *list;
int incomment, addrdone, inanticomment, quoted;
int n;
int c;
char *resp;
if (p==nil)
return nil;
list = s_new();
s = s_new();
quoted = incomment = addrdone = inanticomment = 0;
n = 0;
for(; *p; p++){
c = *p;
// whitespace is ignored
if(!quoted && isspace(c) || c == '\r')
continue;
// strings are always treated as atoms
if(!quoted && c == '"'){
if(!addrdone && !incomment)
s_putc(s, c);
for(p++; *p; p++){
if(!addrdone && !incomment)
s_putc(s, *p);
if(!quoted && *p == '"')
break;
if(*p == '\\')
quoted = 1;
else
quoted = 0;
}
if(*p == 0)
break;
quoted = 0;
continue;
}
// ignore everything in an expicit comment
if(!quoted && c == '('){
incomment = 1;
continue;
}
if(incomment){
if(!quoted && c == ')')
incomment = 0;
quoted = 0;
continue;
}
// anticomments makes everything outside of them comments
if(!quoted && c == '<' && !inanticomment){
inanticomment = 1;
s = s_reset(s);
continue;
}
if(!quoted && c == '>' && inanticomment){
addrdone = 1;
inanticomment = 0;
continue;
}
// commas separate addresses
if(!quoted && c == ',' && !inanticomment){
s_terminate(s);
addrdone = 0;
if(n++ != 0)
s_append(list, " ");
s_append(list, s_to_c(s));
s = s_reset(s);
continue;
}
// what's left is part of the address
s_putc(s, c);
// quoted characters are recognized only as characters
if(c == '\\')
quoted = 1;
else
quoted = 0;
}
if(*s_to_c(s) != 0){
s_terminate(s);
if(n++ != 0)
s_append(list, " ");
s_append(list, s_to_c(s));
}
s_free(s);
if(n == 0){
s_free(list);
return nil;
}
resp=estrdup(s_to_c(list));
s_free(list);
return resp;
}
static char*
skiptext(char *q)
{
while(*q!='\0' && *q!=' ' && *q!='\t' && *q!='\r' && *q!='\n')
q++;
return q;
}
static char*
skipwhite(char *q)
{
while(*q==' ' || *q=='\t' || *q=='\r' || *q=='\n')
q++;
return q;
}
static char* months[] = {
"jan", "feb", "mar", "apr",
"may", "jun", "jul", "aug",
"sep", "oct", "nov", "dec"
};
static int
strcmplwr(char *a, char *b, int n)
{
char *eb;
eb = b+n;
while(*a && *b && b<eb){
if(tolower(*a) != tolower(*b))
return 1;
a++;
b++;
}
if(b==eb)
return 0;
return *a != *b;
}
int
strtotm(char *p, Tm *tmp)
{
char *q, *r;
int j;
Tm tm;
int delta;
delta = 0;
memset(&tm, 0, sizeof(tm));
tm.mon = -1;
tm.hour = -1;
tm.min = -1;
tm.year = -1;
tm.mday = -1;
for(p=skipwhite(p); *p; p=skipwhite(q)){
q = skiptext(p);
/* look for time in hh:mm[:ss] */
if(r = memchr(p, ':', q-p)){
tm.hour = strtol(p, 0, 10);
tm.min = strtol(r+1, 0, 10);
if(r = memchr(r+1, ':', q-(r+1)))
tm.sec = strtol(r+1, 0, 10);
else
tm.sec = 0;
continue;
}
/* look for month */
for(j=0; j<12; j++)
if(strcmplwr(p, months[j], 3)==0){
tm.mon = j;
break;
}
if(j!=12)
continue;
/* look for time zone [A-Z][A-Z]T */
if(q-p==3 && 'A' <= p[0] && p[0] <= 'Z'
&& 'A' <= p[1] && p[1] <= 'Z' && p[2] == 'T'){
strecpy(tm.zone, tm.zone+4, p);
continue;
}
if(p[0]=='+'||p[0]=='-')
if(q-p==5 && strspn(p+1, "0123456789") == 4){
delta = (((p[1]-'0')*10+p[2]-'0')*60+(p[3]-'0')*10+p[4]-'0')*60;
if(p[0] == '-')
delta = -delta;
continue;
}
if(strspn(p, "0123456789") == q-p){
j = strtol(p, nil, 10);
if(1 <= j && j <= 31)
tm.mday = j;
if(j >= 1900)
tm.year = j-1900;
}
}
if(tm.mon<0 || tm.year<0
|| tm.hour<0 || tm.min<0
|| tm.mday<0)
return -1;
*tmp = *localtime(tm2sec(&tm)-delta);
return 0;
}
int
cencoding(Message *m)
{
if(cistrncmp(m->mime.encoding, "base64", 6) == 0)
return Ebase64;
else if(cistrncmp(m->mime.encoding, "quoted-printable", 16) == 0)
return Equoted;
return -1;
}
vlong
decode(Message *m, char *buff, char **out, vlong len)
{
vlong i, dlen;
char *x;
if ( len == 0 || buff == nil ) {
*out=nil;
return 0;
}
switch(cencoding(m)){
case Ebase64:
i = (len*3)/4+1; // room for max chars + null
x = emalloc(i);
dlen = dec64((uchar*)x, i, buff, len);
*out=x;
break;
case Equoted:
x = emalloc(len+2); // room for null and possible extra nl
dlen = decquoted(x, buff, &buff[len], 0);
*out=x;
break;
default:
error(DEBUG,"decode(): m->mime.encoding == %s\n",m->mime.encoding);
*out=nil;
dlen=0;
break;
}
error(DEBUG,"decode(): dlen == %d\n",dlen);
return dlen;
}
|