Codebase list cyrus-imapd / debian/3.0.10-1 lib / charset.h
debian/3.0.10-1

Tree @debian/3.0.10-1 (Download .tar.gz)

charset.h @debian/3.0.10-1raw · history · blame

/*
 * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The name "Carnegie Mellon University" must not be used to
 *    endorse or promote products derived from this software without
 *    prior written permission. For permission or any legal
 *    details, please contact
 *      Carnegie Mellon University
 *      Center for Technology Transfer and Enterprise Creation
 *      4615 Forbes Avenue
 *      Suite 302
 *      Pittsburgh, PA  15213
 *      (412) 268-7393, fax: (412) 268-7395
 *      innovation@andrew.cmu.edu
 *
 * 4. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by Computing Services
 *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
 *
 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#ifndef INCLUDED_CHARSET_H
#define INCLUDED_CHARSET_H

#define ENCODING_NONE 0
#define ENCODING_QP 1
#define ENCODING_BASE64 2
#define ENCODING_UNKNOWN 255

#define CHARSET_SKIPDIACRIT (1<<0)
#define CHARSET_SKIPSPACE (1<<1)
#define CHARSET_MERGESPACE (1<<2)
#define CHARSET_SKIPHTML (1<<3)
#define CHARSET_SNIPPET (1<<4)
#define CHARSET_UNFOLD_SKIPWS (1<<5)
#define CHARSET_MIME_UTF8 (1<<6)
#define CHARSET_ESCAPEHTML (1<<8)

#define CHARSET_UNKNOWN_CHARSET (NULL)

#include "unicode/ucnv.h"

#include "util.h"

typedef int comp_pat;
/*
 * Charset identifies a character encoding.
 * Use charset_lookupname to create an instance, and release it
 * using charset_free.
 *
 * Caveats:
 * * Two instances for the same character encoding are not pointer-equal.
 *   Use string comparison of the charset_name to test for equality.
 * * Instances are not safe to use for two simultaneous conversions. It is safe
 *   (and recommended) to reuse an instance for consecutive conversions.
 */
typedef struct charset_converter* charset_t;

extern int encoding_lookupname(const char *name);
extern const char *encoding_name(int);

/* ensure up to MAXTRANSLATION times expansion into buf */
extern char *charset_convert(const char *s, charset_t charset, int flags);
extern char *charset_decode_mimeheader(const char *s, int flags);
extern char *charset_parse_mimeheader(const char *s, int flags);
extern char *charset_utf8_to_searchform(const char *s, int flags);


extern charset_t charset_lookupname(const char *name);
extern charset_t charset_lookupnumid(int id);
extern void charset_free(charset_t *charset);

extern const char *charset_name(charset_t);
extern comp_pat *charset_compilepat(const char *s);
extern void charset_freepat(comp_pat *pat);
extern int charset_searchstring(const char *substr, comp_pat *pat,
                                const char *s, size_t len, int flags);
extern int charset_searchfile(const char *substr, comp_pat *pat,
                              const char *msg_base, size_t len,
                              charset_t charset, int encoding, int flags);
extern const char *charset_decode_mimebody(const char *msg_base, size_t len,
                                           int encoding, char **retval,
                                           size_t *outlen);
extern char *charset_encode_mimebody(const char *msg_base, size_t len,
                                     char *retval, size_t *outlen,
                                     int *outlines);
extern char *charset_qpencode_mimebody(const char *msg_base, size_t len,
                                       size_t *outlen);
extern char *charset_to_utf8(const char *msg_base, size_t len, charset_t charset, int encoding);
extern char *charset_to_imaputf7(const char *msg_base, size_t len, charset_t charset, int encoding);

extern int charset_search_mimeheader(const char *substr, comp_pat *pat, const char *s, int flags);

extern char *charset_encode_mimeheader(const char *header, size_t len);

extern char *charset_unfold(const char *s, size_t len, int flags);

extern int charset_decode(struct buf *dst, const char *src, size_t len, int encoding);

/* Extract the body text for the message denoted by 'uid', convert its
   text to the canonical form for searching, and pass the converted text
   down in a series of invocations of the callback 'cb'.  This is
   called by index_getsearchtext to extract the MIME body parts. */
extern int charset_extract(void (*cb)(const struct buf *text, void *rock),
                           void *rock,
                           const struct buf *data,
                           charset_t charset, int encoding,
                           const char *subtype, int flags);

#endif /* INCLUDED_CHARSET_H */