lib/parseaddr.c - cyrus-imapd (upstream/3.4.3)

Tree @upstream/3.4.3 (Download .tar.gz)

parseaddr.c @upstream/3.4.3 — raw · history · blame

/* parseaddr.c -- RFC 822 address parser
 *
 * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The name "Carnegie Mellon University" must not be used to
 *    endorse or promote products derived from this software without
 *    prior written permission. For permission or any legal
 *    details, please contact
 *      Carnegie Mellon University
 *      Center for Technology Transfer and Enterprise Creation
 *      4615 Forbes Avenue
 *      Suite 302
 *      Pittsburgh, PA  15213
 *      (412) 268-7393, fax: (412) 268-7395
 *      innovation@andrew.cmu.edu
 *
 * 4. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by Computing Services
 *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
 *
 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "parseaddr.h"
#include "xmalloc.h"
#include "util.h"

static const char unknown_user[] = "unknown-user";
static const char unspecified_domain[] = "unspecified-domain";

static void parseaddr_append(struct address ***addrpp, const char *name,
                             const char *route, const char *mailbox,
                             const char *domain, char **freemep, int invalid);
static int parseaddr_phrase (char **inp, char **phrasep, const char *specials);
static int parseaddr_domain (char **inp, char **domainp, char **commmentp, int *invalid);
static int parseaddr_route (char **inp, char **routep);

/*
 * Parse an address list in 's', appending address structures to
 * the list pointed to by 'addrp'.
 */
EXPORTED void parseaddr_list(const char *str, struct address **addrp)
{
    char *s;
    int ingroup = 0;
    char *freeme;
    int tok = ' ', invalid = 0;
    char *phrase, *route, *mailbox, *domain, *comment;

    /* Skip down to the tail */
    while (*addrp) {
        addrp = &(*addrp)->next;
    }

    s = freeme = xstrdup(str);

    while (tok) {
        tok = parseaddr_phrase(&s, &phrase, ingroup ? ",@<;" : ",@<:");
        switch (tok) {
        case ',':
        case '\0':
        case ';':
            if (*phrase) {
                parseaddr_append(&addrp, 0, 0, phrase, "", &freeme, invalid);
            }
            if (tok == ';') {
                parseaddr_append(&addrp, 0, 0, 0, 0, &freeme, invalid);
                ingroup = 0;
            }
            continue;

        case ':':
            parseaddr_append(&addrp, 0, 0, phrase, 0, &freeme, invalid);
            ingroup++;
            continue;

        case '@':
            tok = parseaddr_domain(&s, &domain, &comment, &invalid);
            parseaddr_append(&addrp, comment, 0, phrase, domain, &freeme, invalid);
            if (tok == ';') {
                parseaddr_append(&addrp, 0, 0, 0, 0, &freeme, invalid);
                ingroup = 0;
            }
            continue;

        case '<':
            tok = parseaddr_phrase(&s, &mailbox, "@>");
            if (tok == '@') {
                route = 0;
                if (!*mailbox) {
                    *--s = '@';
                    tok = parseaddr_route(&s, &route);
                    if (tok != ':') {
                        parseaddr_append(&addrp, phrase, route, "", "", &freeme, invalid);
                        while (tok && tok != '>') tok = *s++;
                        continue;
                    }
                    tok = parseaddr_phrase(&s, &mailbox, "@>");
                    if (tok != '@') {
                        parseaddr_append(&addrp, phrase, route, mailbox, "",
                                         &freeme, invalid);
                        continue;
                    }
                }
                tok = parseaddr_domain(&s, &domain, 0, &invalid);
                parseaddr_append(&addrp, phrase, route, mailbox, domain,
                                 &freeme, invalid);
                while (tok && tok != '>') tok = *s++;
                continue; /* effectively auto-inserts a comma */
            }
            else {
                parseaddr_append(&addrp, phrase, 0, mailbox, "", &freeme, invalid);
            }
        }
    }
    if (ingroup) parseaddr_append(&addrp, 0, 0, 0, 0, &freeme, invalid);

    if (freeme) free(freeme);
}

/*
 * Free the address list 'addr'
 */
EXPORTED void parseaddr_free(struct address *addr)
{
    struct address *next;

    while (addr) {
        if (addr->freeme) free(addr->freeme);
        next = addr->next;
        free((char *)addr);
        addr = next;
    }
}

/*
 * Helper function to append a new address structure to and address list.
 */
static void parseaddr_append(struct address ***addrpp, const char *name,
                             const char *route, const char *mailbox,
                             const char *domain, char **freemep, int invalid)
{
    struct address *newaddr;

    newaddr = (struct address *)xmalloc(sizeof(struct address));
    if (name && *name) {
        newaddr->name = name;
    }
    else {
        newaddr->name = 0;
    }

    if (route && *route) {
        newaddr->route = route;
    }
    else {
        newaddr->route = 0;
    }

    newaddr->mailbox = mailbox;

    if (domain && !*domain) {
        domain = unspecified_domain;
    }
    newaddr->domain = domain;

    newaddr->next = 0;
    newaddr->freeme = *freemep;
    *freemep = 0;

    newaddr->invalid = invalid;

    **addrpp = newaddr;
    *addrpp = &newaddr->next;
}

/* Macro to skip white space and RFC 822 comments */

#define SKIPWHITESPACE(s) \
{ \
    int _c, _comment = 0; \
 \
    while ((_c = *(s))) { \
        if (_c == '(') { \
            _comment = 1; \
            (s)++; \
            while ((_comment && (_c = *(s)))) { \
                (s)++; \
                if (_c == '\\' && *(s)) (s)++; \
                else if (_c == '(') _comment++; \
                else if (_c == ')') _comment--; \
            } \
            (s)--; \
        } \
        else if (!Uisspace(_c)) break; \
        (s)++; \
    } \
}

/*
 * Parse an RFC 822 "phrase", stopping at 'specials'
 */
static int parseaddr_phrase(char **inp, char **phrasep, const char *specials)
{
    int c;
    char *src = *inp;
    char *dst;

    SKIPWHITESPACE(src);

    *phrasep = dst = src;

    for (;;) {
        c = *src++;
        if (c == '"') {
            while ((c = *src)) {
                src++;
                if (c == '\\' && *src == '\r' && *(src+1) == '\n') {
                    /* Ignore quote right in front of CR+LF. There's no
                     * point in accepting lone CR or stray LF, and there's
                     * clients out there that produce these bogus addresses. */
                    c = *src;
                    src++;
                }
                if (c == '\r' && *src == '\n') {
                    /* CR+LF combination */
                    src++;
                    if (*src == ' ' || *src == '\t') {
                        /* CR+LF+WSP - folded header field,
                         * unfold it by skipping ONLY the CR+LF */
                        continue;
                    }
                    /* otherwise we have CR+LF at the end of a header
                     * field, which means we have an unbalanced " */
                    goto fail;
                }
                else if (iscntrl(c)) {
                    if (c == '\r' || c == '\n')
                        c = ' '; // replace CR and LF with space
                    else if (c != '\t')
                        continue; // else ignore anything but TAB
                }
                if (c == '"') break;        /* end of quoted string */
                if (c == '\\') {
                    if (!(c = *src)) goto fail;
                    src++;
                }
                *dst++ = c;
            }
            if (c != '"') goto fail;        /* unbalanced " */
        }
        else if (Uisspace(c) || c == '(') {
            src--;
            SKIPWHITESPACE(src);
            *dst++ = ' ';
        }
        else if (!c || strchr(specials, c)) {
            if (dst > *phrasep && dst[-1] == ' ') dst--;
            *dst = '\0';
            *inp = src;
            return c;
        }
        else {
            *dst++ = c;
        }
    }

fail:
    /* simulate end-of-string */
    *phrasep = "";
    return 0;
}

/*
 * Parse a domain.  If 'commentp' is non-nil, parses any trailing comment.
 * If the domain is invalid, set invalid to non-zero.
 */
static int parseaddr_domain(char **inp, char **domainp, char **commentp, int *invalid)
{
    int c;
    char *src = *inp;
    char *dst;
    char *cdst;
    int comment;

    if (commentp) *commentp = 0;
    SKIPWHITESPACE(src);

    *domainp = dst = src;

    for (;;) {
        c = *src++;
        if (Uisalnum(c) || c == '-' || c == '[' || c == ']' || c == ':') {
            *dst++ = c;
            if (commentp) *commentp = 0;
        }
        else if (c == '.') {
            if (dst > *domainp && dst[-1] != '.') *dst++ = c;
            if (commentp) *commentp = 0;
        }
        else if (c == '(') {
            if (commentp) {
                *commentp = cdst = src;
                comment = 1;
                while (comment && (c = *src)) {
                    src++;
                    if (c == '(') comment++;
                    else if (c == ')') comment--;
                    else if (c == '\\' && (c = *src)) src++;

                    if (comment) *cdst++ = c;
                }
                *cdst = '\0';
            }
            else {
                src--;
                SKIPWHITESPACE(src);
            }
        }
        else if (c == '@') {
            /* This domain name is garbage. Continue eating up the characters
             * until we get to a sane state. */
            *invalid = 1;
            *dst++ = c;
            if (commentp) *commentp = 0;
        }
        else if (!Uisspace(c)) {
            if (dst > *domainp && dst[-1] == '.') dst--;
            *dst = '\0';
            *inp = src;
            return c;
        }
    }
}

/*
 * Parse a source route (at-domain-list)
 */
static int parseaddr_route(char **inp, char **routep)
{
    int c;
    char *src = *inp;
    char *dst;

    SKIPWHITESPACE(src);

    *routep = dst = src;

    for (;;) {
        c = *src++;
        if (Uisalnum(c) || c == '-' || c == '[' || c == ']' ||
            c == ',' || c == '@') {
            *dst++ = c;
        }
        else if (c == '.') {
            if (dst > *routep && dst[-1] != '.') *dst++ = c;
        }
        else if (Uisspace(c) || c == '(') {
            src--;
            SKIPWHITESPACE(src);
        }
        else {
            while (dst > *routep &&
                   (dst[-1] == '.' || dst[-1] == ',' || dst[-1] == '@')) dst--;
            *dst = '\0';
            *inp = src;
            return c;
        }
    }
}

EXPORTED char *address_get_all(const struct address *a, int canon_domain)
{
    char *s = NULL;

    if (a->mailbox || a->domain) {
        const char *m = a->mailbox ? a->mailbox : unknown_user;
        const char *d = a->domain ? a->domain : unspecified_domain;
        s = strconcat(m, "@", d, (char *)NULL);
        if (canon_domain)
            lcase(s + strlen(m) + 1);
    }

    return s;
}

EXPORTED char *address_get_localpart(const struct address *a)
{
    return xstrdupnull(a->mailbox);
}

EXPORTED char *address_get_domain(const struct address *a, int canon_domain)
{
    char *s = NULL;

    if (a->domain) {
        s = xstrdup(a->domain);
        if (canon_domain)
            lcase(s);
    }

    return s;
}

EXPORTED char *address_get_user(const struct address *a)
{
    char *s = NULL;

    if (a->mailbox) {
        char *p = strchr(a->mailbox, '+');
        int len = p ? p - a->mailbox : (int)strlen(a->mailbox);
        s = xstrndup(a->mailbox, len);
    }

    return s;
}

EXPORTED char *address_get_detail(const struct address *a)
{
    char *s = NULL;

    if (a->mailbox) {
        char *p = strchr(a->mailbox, '+');
        s = p ? xstrdup(p + 1) : NULL;
    }

    return s;
}

/*
 * Address iterator interface
 */

EXPORTED void address_itr_init(struct address_itr *ai, const char *str,
                               int reverse_path)
{
    memset(ai, 0, sizeof(*ai));
    if (!*str && reverse_path) {
        /* Null reverse-path */
        ai->addrlist = (struct address *)xzmalloc(sizeof(struct address));
    }
    else parseaddr_list(str, &ai->addrlist);
    ai->anext = ai->addrlist;
}

EXPORTED const struct address *address_itr_next(struct address_itr *ai)
{
    struct address *a;
    if (ai->anext == NULL)
        return NULL;
    a = ai->anext;
    ai->anext = ai->anext->next;
    return a;
}

EXPORTED void address_itr_fini(struct address_itr *ai)
{
    parseaddr_free(ai->addrlist);
    memset(ai, 0, sizeof(*ai));
}


/*
 * Convenience function to return a single canonicalised address.
 */
EXPORTED char *address_canonicalise(const char *str)
{
    struct address *addrlist = NULL;
    char *s = NULL;

    parseaddr_list(str, &addrlist);
    if (addrlist)
        s = address_get_all(addrlist, 1);
    parseaddr_free(addrlist);

    return s;
}