static char rcsid[] = "@(#)$Id: rfc822tlen.c,v 1.3 1995/09/29 17:41:35 wfp5p Exp $"; /******************************************************************************* * The Elm Mail System - $Revision: 1.3 $ $State: Exp $ * * Copyright (c) 1988-1995 USENET Community Trust ******************************************************************************* * Bug reports, patches, comments, suggestions should be sent to: * * Bill Pemberton, Elm Coordinator * flash@virginia.edu * ******************************************************************************* * $Log: rfc822tlen.c,v $ * Revision 1.3 1995/09/29 17:41:35 wfp5p * Alpha 8 (Chip's big changes) * * Revision 1.2 1995/09/11 15:18:58 wfp5p * Alpha 7 * * Revision 1.1.1.1 1995/04/19 20:38:33 wfp5p * Initial import of elm 2.4 PL0 as base for elm 2.5. * ******************************************************************************/ #include "elm_defs.h" /* * rfc822_toklen(str) - Returns length of RFC-822 token that starts at "str". * * We understand the following tokens: * * linear-white-space * "quoted string" * [dom.ain.lit.eral] * (comment) * \c (quoted character) * control characters * special characters (other chars with semantic meaning in addresses) * atom (strings of alphanumerics and non-special/non-control chars) * * This routine is a profiling hot spot. To speed things up, a lookup * table is used to classify the character types. The table is initialized * the first time this routine is called. * * At this time, this routine does not do any error handling, and will * process defective tokens (e.g. no closing paren or quote). Grep * for ERROR to see the places where error handling should be added if * it ever is necessary. */ #define charlen(s) ((s)[0] == '\\' && (s)[1] != '\0' ? 2 : 1) /* * Assuming headers only contain 7-bit US-ASCII, which * should be true for the structured address fields. */ static char chtab[0200]; static int first_time = 1; #define CH_EOS 0 /* \0 - we should not see this! */ #define CH_ATOM 1 /* char that can be part of an atom */ #define CH_SPACE 2 /* linear white space character */ #define CH_COMMENT 3 /* ( char - comment */ #define CH_QSTR 4 /* " char - quoted string */ #define CH_QCHAR 5 /* \ char - quoted character */ #define CH_DOMLIT 6 /* [ char - domain literal */ #define CH_SPECIAL 7 /* some other char with special meaning */ #define CH_CTL 8 /* a non-printing control character */ #define chtype(c) (chtab[(c) & 0177]) int rfc822_toklen(str) register const char *str; { const char *str0; int depth; if (first_time) { int i = 0; /* most chars in the range 001 - 037 are control chars */ while (i < 040) chtab[i++] = CH_CTL; /* most char in the range 040 - 0177 are "atom" chars */ while (i < 0200) chtab[i++] = CH_ATOM; chtab[0] = CH_EOS; /* mark whitespace chars */ chtab[' '] = CH_SPACE; chtab['\t'] = CH_SPACE; chtab['\r'] = CH_SPACE; chtab['\n'] = CH_SPACE; /* mark special chars that require further lexical processing */ chtab['"'] = CH_QSTR; chtab['('] = CH_COMMENT; chtab['['] = CH_DOMLIT; chtab['\\'] = CH_QCHAR; /* mark remaining chars that are special in address fields */ chtab[')'] = CH_SPECIAL; chtab['<'] = CH_SPECIAL; chtab['>'] = CH_SPECIAL; chtab['@'] = CH_SPECIAL; chtab[','] = CH_SPECIAL; chtab[';'] = CH_SPECIAL; chtab[':'] = CH_SPECIAL; chtab['.'] = CH_SPECIAL; chtab[']'] = CH_SPECIAL; first_time = 0; } str0 = str; switch (chtype(*str)) { case CH_ATOM: do { ++str; } while (chtype(*str) == CH_ATOM); return (str-str0); case CH_SPACE: do { ++str; } while (chtype(*str) == CH_SPACE); return (str-str0); case CH_COMMENT: ++str; depth = 0; while (*str != '\0' && (*str != ')' || depth > 0)) { switch (*str) { case '(': ++str; ++depth; break; case ')': ++str; --depth; break; default: str += charlen(str); break; } } if (*str == ')') ++str; else ; /* ERROR - unterminated paren */ return (str-str0); case CH_QSTR: ++str; while (*str != '\0' && *str != '"') str += charlen(str); if (*str == '"') ++str; else ; /* ERROR - unterminated quote */ return (str-str0); case CH_QCHAR: if (str[1] != '\0') return 2; return 1; /* ERROR - string ends with backslash */ case CH_DOMLIT: ++str; while (*str != '\0' && *str != ']') str += charlen(str); if (*str == ']') ++str; else ; /* ERROR - unterminated domain literal */ return (str-str0); case CH_EOS: return 0; /* ERROR - we should not see this */ /* case CH_SPECIAL: */ /* case CH_CTL: */ default: return 1; } /*NOTREACHED*/ } #ifdef _TEST main() { char buf[1024], *bp; int len; for (;;) { fputs("\nstr> ", stdout); fflush(stdout); if (gets(buf) == NULL) { putchar('\n'); break; } bp = buf; while (*bp != '\0') { len = rfc822_toklen(bp); printf("len %4d |%.*s|\n", len, len, bp); bp += len; } } exit(0); } #endif