/usr/web/sources/contrib/gabidiaz/wip/mboxfs/rfc.c

Plan 9 from Bell Labs’s /usr/web/sources/contrib/gabidiaz/wip/mboxfs/rfc.c

#include "a.h"

int
latin1toutf(char **out, char *in, char *e)
{
	int n;
	char *p;
	Rune r;

	n = 0;
	for(p = in; p < e; p++)
		if(*p & 0x80)
			n++;
	if(n == 0)
		return 0;

	n += e-in;
	*out = p = malloc(n+1);
	if(p == nil)
		return 0;

	for(; in < e; in++){
		r = (uchar)*in;
		p += runetochar(p, &r);
	}
	*p = 0;
	return p - *out;
}


int
hex2int(int x)
{
	if(x >= '0' && x <= '9')
		return x - '0';
	if(x >= 'A' && x <= 'F')
		return (x - 'A') + 10;
	if(x >= 'a' && x <= 'f')
		return (x - 'a') + 10;
	return 0;
}

// translate any thing using the tcs program
int
xtoutf(char *charset, char **out, char *in, char *e)
{
	char *av[4];
	int totcs[2];
	int fromtcs[2];
	int n, len, sofar;
	char *p;

	// might not need to convert
	if(cistrcmp(charset, "us-ascii") == 0 || cistrcmp(charset, "utf-8") == 0)
		return 0;
	if(cistrcmp(charset, "iso-8859-1") == 0)
		return latin1toutf(out, in, e);

	len = e-in+1;
	sofar = 0;
	*out = p = malloc(len+1);
	if(p == nil)
		return 0;

	av[0] = charset;
	av[1] = "-f";
	av[2] = charset;
	av[3] = 0;
	if(pipe(totcs) < 0)
		goto error;
	if(pipe(fromtcs) < 0){
		close(totcs[0]); close(totcs[1]);
		goto error;
	}
	switch(rfork(RFPROC|RFFDG|RFNOWAIT)){
	case -1:
		close(fromtcs[0]); close(fromtcs[1]);
		close(totcs[0]); close(totcs[1]);
		goto error;
	case 0:
		close(fromtcs[0]); close(totcs[1]);
		dup(fromtcs[1], 1);
		dup(totcs[0], 0);
		close(fromtcs[1]); close(totcs[0]);
		dup(open("/dev/null", OWRITE), 2);
		exec("/bin/tcs", av);
		_exits(0);
	default:
		close(fromtcs[1]); close(totcs[0]);
		switch(rfork(RFPROC|RFFDG|RFNOWAIT)){
		case -1:
			close(fromtcs[0]); close(totcs[1]);
			goto error;
		case 0:
			close(fromtcs[0]);
			while(in < e){
				n = write(totcs[1], in, e-in);
				if(n <= 0)
					break;
				in += n;
			}
			close(totcs[1]);
			_exits(0);
		default:
			close(totcs[1]);
			for(;;){
				n = read(fromtcs[0], &p[sofar], len-sofar);
				if(n <= 0)
					break;
				sofar += n;
				p[sofar] = 0;
				if(sofar == len){
					len += 1024;
					p = realloc(p, len+1);
					if(p == nil)
						goto error;
					*out = p;
				}
			}
			close(fromtcs[0]);
			break;
		}
		break;
	}
	if(sofar == 0)
		goto error;
	return sofar;

error:
	free(*out);
	*out = nil;
	return 0;
}

// underscores are translated in 2047 headers (uscores=1) 
// but not in the body (uscores=0)
char*
decquotedline(char *out, char *in, char *e, int uscores)
{
	int c, soft;

	/* dump trailing white space */
	while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
		e--;

	/* trailing '=' means no newline */
	if(*e == '='){
		soft = 1;
		e--;
	} else
		soft = 0;

	while(in <= e){
		c = (*in++) & 0xff;
		switch(c){
		case '_':
			if(uscores){
				*out++ = ' ';
				break;
			}
		default:
			*out++ = c;
			break;
		case '=':
			c = hex2int(*in++)<<4;
			c |= hex2int(*in++);
			*out++ = c;
			break;
		}
	}
	if(!soft)
		*out++ = '\n';
	*out = 0;

	return out;
}

int
decquoted(char *out, char *in, char *e, int uscores)
{
	char *p, *nl;

	p = out;
	while((nl = strchr(in, '\n')) != nil && nl < e){
		p = decquotedline(p, in, nl, uscores);
		in = nl + 1;
	}
	if(in < e)
		p = decquotedline(p, in, e-1, uscores);

	// make sure we end with a new line
	if(*(p-1) != '\n'){
		*p++ = '\n';
		*p = 0;
	}

	return p - out;
}

int
rfc2047convert(String *s, char *token, int len)
{
	char charset[100], decoded[1024], *e, *x;
	int l;

	if(len == 0)
		return -1;

	e = token+len-2;
	token += 2;

	x = memchr(token, '?', e-token);
	if(x == nil || (l=x-token) >= sizeof charset)
		return -1;
	memmove(charset, token, l);
	charset[l] = 0;

	token = x+1;

	// bail if it doesn't fit 
	if(e-token > sizeof(decoded)-1)
		return -1;

	// bail if we don't understand the encoding
	if(cistrncmp(token, "b?", 2) == 0){
		token += 2;
		len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
		decoded[len] = 0;
	} else if(cistrncmp(token, "q?", 2) == 0){
		token += 2;
		len = decquoted(decoded, token, e, 1);
		if(len > 0 && decoded[len-1] == '\n')
			len--;
		decoded[len] = 0;
	} else
		return -1;

	if(xtoutf(charset, &x, decoded, decoded+len) <= 0)
		s_append(s, decoded);
	else {
		s_append(s, x);
		free(x);
	}
	return 0;
}

char*
rfc2047start(char *start, char *end)
{
	int quests;

	if(*--end != '=')
		return nil;
	if(*--end != '?')
		return nil;

	quests = 0;
	for(end--; end >= start; end--){
		switch(*end){
		case '=':
			if(quests == 3 && *(end+1) == '?')
				return end;
			break;
		case '?':
			++quests;
			break;
		case ' ':
		case '\t':
		case '\n':
		case '\r':
			/* can't have white space in a token */
			return nil;
		}
	}
	return nil;
}

// convert a header line
char*
stringconvert(char *buff)
{
	String *s;
	int len;
	char *resp, *token, *p, *e;

	if ( buff == nil )
		return nil;

	len=strlen(buff);
	s = s_new();
	p = buff;
	for(e = p+len; p < e; ){
		while(*p++ == '=' && (token = rfc2047start(buff, p))){
			s_nappend(s, buff, token-buff);
			if(rfc2047convert(s, token, p - token) < 0)
				s_nappend(s, token, p - token);
			buff = p;
			for(; p<e && isspace(*p);)
				p++;
			if(p+2 < e && p[0] == '=' && p[1] == '?')
				buff = p;	// paste
		}
	}
	if(p > buff)
		s_nappend(s, buff, p-buff);

	resp=estrdup(s_to_c(s));
	s_free(s);
	return resp;
}

/* from 2822 format to "natural" format */

char*
addrrfc(char *p)
{
	String *s, *list;
	int incomment, addrdone, inanticomment, quoted;
	int n;
	int c;
	char *resp;

	if (p==nil)
		return nil;

	list = s_new();
	s = s_new();
	quoted = incomment = addrdone = inanticomment = 0;
	n = 0;
	for(; *p; p++){
		c = *p;

		// whitespace is ignored
		if(!quoted && isspace(c) || c == '\r')
			continue;

		// strings are always treated as atoms
		if(!quoted && c == '"'){
			if(!addrdone && !incomment)
				s_putc(s, c);
			for(p++; *p; p++){
				if(!addrdone && !incomment)
					s_putc(s, *p);
				if(!quoted && *p == '"')
					break;
				if(*p == '\\')
					quoted = 1;
				else
					quoted = 0;
			}
			if(*p == 0)
				break;
			quoted = 0;
			continue;
		}

		// ignore everything in an expicit comment
		if(!quoted && c == '('){
			incomment = 1;
			continue;
		}
		if(incomment){
			if(!quoted && c == ')')
				incomment = 0;
			quoted = 0;
			continue;
		}

		// anticomments makes everything outside of them comments
		if(!quoted && c == '<' && !inanticomment){
			inanticomment = 1;
			s = s_reset(s);
			continue;
		}
		if(!quoted && c == '>' && inanticomment){
			addrdone = 1;
			inanticomment = 0;
			continue;
		}

		// commas separate addresses
		if(!quoted && c == ',' && !inanticomment){
			s_terminate(s);
			addrdone = 0;
			if(n++ != 0)
				s_append(list, " ");
			s_append(list, s_to_c(s));
			s = s_reset(s);
			continue;
		}

		// what's left is part of the address
		s_putc(s, c);

		// quoted characters are recognized only as characters
		if(c == '\\')
			quoted = 1;
		else
			quoted = 0;

	}

	if(*s_to_c(s) != 0){
		s_terminate(s);
		if(n++ != 0)
			s_append(list, " ");
		s_append(list, s_to_c(s));
	}
	s_free(s);

	if(n == 0){
		s_free(list);
		return nil;
	}

	resp=estrdup(s_to_c(list));
	s_free(list);
	return resp;
}


static char*
skiptext(char *q)
{
	while(*q!='\0' && *q!=' ' && *q!='\t' && *q!='\r' && *q!='\n')
		q++;
	return q;
}

static char*
skipwhite(char *q)
{
	while(*q==' ' || *q=='\t' || *q=='\r' || *q=='\n')
		q++;
	return q;
}

static char* months[] = {
	"jan", "feb", "mar", "apr",
	"may", "jun", "jul", "aug", 
	"sep", "oct", "nov", "dec"
};

static int
strcmplwr(char *a, char *b, int n)
{
	char *eb;

	eb = b+n;
	while(*a && *b && b<eb){
		if(tolower(*a) != tolower(*b))
			return 1;
		a++;
		b++;
	}
	if(b==eb)
		return 0;
	return *a != *b;
}

int
strtotm(char *p, Tm *tmp)
{
	char *q, *r;
	int j;
	Tm tm;
	int delta;

	delta = 0;
	memset(&tm, 0, sizeof(tm));
	tm.mon = -1;
	tm.hour = -1;
	tm.min = -1;
	tm.year = -1;
	tm.mday = -1;
	for(p=skipwhite(p); *p; p=skipwhite(q)){
		q = skiptext(p);

		/* look for time in hh:mm[:ss] */
		if(r = memchr(p, ':', q-p)){
			tm.hour = strtol(p, 0, 10);
			tm.min = strtol(r+1, 0, 10);
			if(r = memchr(r+1, ':', q-(r+1)))
				tm.sec = strtol(r+1, 0, 10);
			else
				tm.sec = 0;
			continue;
		}

		/* look for month */
		for(j=0; j<12; j++)
			if(strcmplwr(p, months[j], 3)==0){
				tm.mon = j;
				break;
			}

		if(j!=12)
			continue;

		/* look for time zone [A-Z][A-Z]T */
		if(q-p==3 && 'A' <= p[0] && p[0] <= 'Z' 
		&& 'A' <= p[1] && p[1] <= 'Z' && p[2] == 'T'){
			strecpy(tm.zone, tm.zone+4, p);
			continue;
		}

		if(p[0]=='+'||p[0]=='-')
		if(q-p==5 && strspn(p+1, "0123456789") == 4){
			delta = (((p[1]-'0')*10+p[2]-'0')*60+(p[3]-'0')*10+p[4]-'0')*60;
			if(p[0] == '-')
				delta = -delta;
			continue;
		}
		if(strspn(p, "0123456789") == q-p){
			j = strtol(p, nil, 10);
			if(1 <= j && j <= 31)
				tm.mday = j;
			if(j >= 1900)
				tm.year = j-1900;
		}
	}

	if(tm.mon<0 || tm.year<0
	|| tm.hour<0 || tm.min<0
	|| tm.mday<0)
		return -1;

	*tmp = *localtime(tm2sec(&tm)-delta);
	return 0;
}

int
cencoding(Message *m)
{
	if(cistrncmp(m->mime.encoding, "base64", 6) == 0)
		return Ebase64;
	else if(cistrncmp(m->mime.encoding, "quoted-printable", 16) == 0)
		return Equoted;

	return -1;
}

vlong
decode(Message *m, char *buff, char **out, vlong len)
{
	vlong i, dlen;
	char *x;

	if ( len == 0 || buff == nil ) {
		*out=nil;
		return 0;
	}

	switch(cencoding(m)){
	case Ebase64:
		i = (len*3)/4+1;	// room for max chars + null
		x = emalloc(i);
		dlen = dec64((uchar*)x, i, buff, len);
		*out=x;
		break;
	case Equoted:
		x = emalloc(len+2);	// room for null and possible extra nl
		dlen = decquoted(x, buff, &buff[len], 0);
		*out=x;
		break;
	default:
		error(DEBUG,"decode(): m->mime.encoding == %s\n",m->mime.encoding);
		*out=nil;
		dlen=0;
		break;
	}
	error(DEBUG,"decode(): dlen == %d\n",dlen);
	return dlen;

}
(Return to Plan 9 Home Page)