/******************************************************************************* * The Elm Mail System - $Revision: 1.4 $ $State: Exp $ * * Copyright (c) 1988-1995 USENET Community Trust * Copyright (c) 1986,1987 Dave Taylor ******************************************************************************* * Bug reports, patches, comments, suggestions should be sent to: * * Bill Pemberton, Elm Coordinator * flash@virginia.edu * ******************************************************************************* * $Log: parsarpmbox.c,v $ * Revision 1.4 1996/05/09 15:51:09 wfp5p * Alpha 10 * * Revision 1.3 1995/09/29 17:41:25 wfp5p * Alpha 8 (Chip's big changes) * * Revision 1.2 1995/09/11 15:18:56 wfp5p * Alpha 7 * * Revision 1.1 1995/07/18 18:59:50 wfp5p * Alpha 6 * * * ******************************************************************************/ /* * parse_arpa_mailbox() parses RFC-822 "mailbox" specifications into address * and fullname components. A "mailbox" is the formal name of the RFC-822 * lexical element that corresponds to what we normally might call an * "address". (RFC-822 uses the term "address" to describe something else.) * * A "mailbox" can be in one of two formats: * * addr-spec * such as: joe@acme.com (Joe User) * * or: * * [phrase] "<" [route] addr-spec ">" * such as: Joe User * * We invent the names "bare addr-spec" to describe the first form and * "angle addr-spec" to describe the second. * * Synopsis: * * int parse_arpa_mailbox(buf, ret_addr, len_addr, * ret_name, len_name, next_field); * char *buf, *ret_addr, *ret_name, **next_field; * int len_addr, len_name; * * This routine takes a comma-delimited list of mailbox specifications * pointed to by "buf", and breaks the next mailbox specification in the * list into the address and fullname components. It is NONdestructive * to the buffer. * * The return code will be 0 for success, -1 for failure. All bets are * off if the mailbox specification is poorly formed (i.e. syntax errors). * We might catch the problem and return -1. Or we might indicate success * and return nonsense values. Other error conditions are discussed * below. * * If "ret_addr" is not NULL, then it points to a buffer where the * extracted address is stored and the "len_addr" indicates the size of * the buffer. If we cannot locate a non-empty address or if it is too * large to fit into the buffer, then an error is returned. * * If "ret_name" is not NULL, then it points to a buffer where the * extracted fullname is stored and the "len_name" indicates the size of * the buffer. If we cannot locate a non-empty fullname or if it is too * large to fit into the buffer, then an empty string is stored in the * buffer. We need to discard the fullname rather than truncating it * because truncation could result in an illegal value (e.g. unbalanced * quotes). As an added little glitch, if the fullname value is fully * enclosed in double-quotes (and with no interior double-quotes), then * the quotes will be stripped. * * If the "next_field" pointer is not NULL, it will be set to point to * the beginning of the next mailbox specification in the list. It will * point to the '\0' string terminator when the list is complete. This * update occurs even if an error code is returned, thus address parsing * may continue with the next mailbox in the list. If the "buf" contains * a single address, the "next_field" result should be checked upon return * to ensure it points to the '\0' string terminator. */ #include "elm_defs.h" static int fullname_is_quoted P_((const char *, int)); static int parse_bare_addrspec P_((const char *, char *, int, char *, int, char **)); static int parse_angle_addrspec P_((const char *, char *, int, char *, int, char **)); int parse_arpa_mailbox(buf, ret_addr, len_addr, ret_name, len_name, next_field) const char *buf; char *ret_addr, *ret_name, **next_field; int len_addr, len_name; { register const char *s; int rc; /* * Take a quick look through the buffer to determine the format. */ for (s = buf ; *s != '\0' && *s != '<' && *s != ',' ; s += rfc822_toklen(s)) ; /* * Handle as the appropriate format. */ if (*s == '<') { rc = parse_angle_addrspec(buf, ret_addr, len_addr, ret_name, len_name, next_field); } else { rc = parse_bare_addrspec(buf, ret_addr, len_addr, ret_name, len_name, next_field); } /* * In the case of an error, advance to next mailbox field. */ if (rc < 0 && next_field != NULL) { for (s = buf ; *s != '\0' && *s != ',' ; s += rfc822_toklen(s)) ; if (*s == ',') ++s; *next_field = (char *) s; } dprint(5, (debugfile, "parse_arpa_mailbox - addr=\"%s\" name=\"%s\"\n", (ret_addr != NULL ? ret_addr : "(null)"), (ret_name != NULL ? ret_name : "(null)"))); return rc; } /* * Return TRUE if the fullname string is enclosed in double-quotes. * AND it is safe to strip the quotes. */ static int fullname_is_quoted(fn_str, fn_len) const char *fn_str; int fn_len; { if (fn_len < 2 || fn_str[0] != '"' || fn_str[fn_len-1] != '"') return FALSE; for (fn_len -= 2 ; fn_len > 0 ; --fn_len) { switch (*++fn_str) { case '"': return FALSE; case '(': return FALSE; case ')': return FALSE; } } return TRUE; } /* * Parse a mailbox spec in the format: addr-spec */ static int parse_bare_addrspec(buf, ret_addr, len_addr, ret_name, len_name, next_field) register const char *buf; char *ret_addr, *ret_name, **next_field; int len_addr, len_name; { const char *n_ptr; /* pointer to (user name) into "buf" */ int n_len; /* length of text pointed to by "n_ptr" */ register char *a_ptr; /* pointer into "ret_addr" */ int a_size; /* space remaining in "ret_addr" */ register int tlen; /* length of current token */ int got_addr; /* indicates an address was found */ /* * Initialize pointer into address storage, and reserve space for * the '\0' terminator. */ a_ptr = ret_addr; a_size = (len_addr - 1); /* * We will set "n_ptr" to the right-most occurance of (parens) * encountered when scanning the buffer. We go back later and * extract this into the "ret_name" buffer. */ n_ptr = NULL; n_len = 0; /* to keep "gcc -Wall" from whining */ /* * We will set this TRUE when we discover there really is an addr here. */ got_addr = FALSE; /* * Discard leading space. */ while (isspace(*buf)) ++buf; /* * Scan through the field, copying out the address elements. */ while (*buf != '\0' && *buf != ',') { tlen = rfc822_toklen(buf); if (isspace(*buf)) { /* * Discard whitespace. */ ; /* nop */ } else if (*buf == '(') { /*)*/ /* * Save info so we can go back later and extract * the right-most comment with (parens) stripped. */ n_ptr = buf+1; n_len = tlen-2; } else { /* * This is a portion of the address. */ if (a_ptr != NULL) { if (tlen >= a_size) return -1; (void) strncpy(a_ptr, buf, tlen); a_ptr += tlen; a_size -= tlen; } got_addr = TRUE; } buf += tlen; } /* * Make sure we extracted a valid address and terminate the string. */ if (!got_addr) return -1; if (a_ptr != NULL) *a_ptr = '\0'; /* * If there is a fullname comment then save it off, else set the result * to an empty string. Also return an empty string if the buffer isn't * big enough. That's because if we only stored off a portion, we * could end up with something like unbalanced quotes. */ if (ret_name != NULL) { if (n_ptr == NULL) { *ret_name = '\0'; } else { if (*n_ptr == '"' && fullname_is_quoted(n_ptr, n_len)) { ++n_ptr; n_len -= 2; } if (n_len > 0 && n_len < len_name) (void) strfcpy(ret_name, n_ptr, n_len+1); else *ret_name = '\0'; } } /* * We should be at the end of the mailbox field. */ if (*buf != '\0' && *buf != ',') return -1; /* * Save off pointer to next mailbox field. */ if (next_field != NULL) *next_field = (char *) buf + (*buf == ',' ? 1 : 0); return 0; } /* * Parse a mailbox spec in the format: [phrase] "<" [route] addr-spec ">" */ static int parse_angle_addrspec(buf, ret_addr, len_addr, ret_name, len_name, next_field) register const char *buf; char *ret_addr, *ret_name, **next_field; int len_addr, len_name; { const char *beg_field, *end_field; register int tlen; int w; /* * Discard leading space. */ while (isspace(*buf)) ++buf; /* * Locate the front and back of the fullname portion. * "end_field" actually points one beyond the end of the field. */ beg_field = end_field = buf; while (*buf != '<' && *buf != '\0' && *buf != ',') { tlen = rfc822_toklen(buf); /* * By updating "end_field" only on non-space tokens we ensure * that when we copy beg_field->end_field trailing whitespace * will be elided. */ if (!isspace(*buf)) { buf += tlen; end_field = buf; } else { buf += tlen; } } if (*buf != '<') return -1; /* * If there is a fullname field then save it off, else set the result * to an empty string. Also return an empty string if the buffer isn't * big enough. That's because if we only stored off a portion, we * could end up with something like unbalanced quotes. */ if (ret_name != NULL) { w = end_field - beg_field; if (w <= 0 || w >= len_name) *ret_name = '\0'; else { if (*beg_field == '"' && fullname_is_quoted(beg_field, w)) { ++beg_field; w -= 2; } (void) strfcpy(ret_name, beg_field, w+1); } } /* * Locate the front and back of the address field. * "end_field" actually points one beyond the end of the field. */ beg_field = ++buf; while (*buf != '>' && *buf != '\0') buf += rfc822_toklen(buf); if (*buf != '>') return -1; end_field = buf; /* * Calculate the length of the address and save off the result. */ if ((w = end_field - beg_field) <= 0) return -1; if (ret_addr != NULL) { if (w > len_addr) return -1; (void) strfcpy(ret_addr, beg_field, w+1); } /* * There shouldn't be anything but comments and whitespace left. */ ++buf; while (isspace(*buf) || *buf == '(') /*)*/ buf += rfc822_toklen(buf); if (*buf != '\0' && *buf != ',') return -1; /* * Save off pointer to next mailbox field. */ if (next_field != NULL) *next_field = (char *) buf + (*buf == ',' ? 1 : 0); return 0; } #ifdef _TEST int debug = 0; FILE *debugfile = stderr; main() { char buf[256], abuf[128], nbuf[128], *cf, *nf; int rc; fputs("Enter address list, one per line. EOF to terminate\n", stderr); while (gets(buf) != NULL) { cf = buf; while (*cf != '\0') { rc = parse_arpa_mailbox(cf, abuf, sizeof(abuf), nbuf, sizeof(nbuf), &nf); if (rc < 0) printf("illegal mailbox: %.*s\n", (nf-cf), buf); else printf("addr=|%s| name=|%s|\n", abuf, nbuf); cf = nf; } } exit(0); } #endif /*_TEST*/