Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F120834002
charset.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
6 KB
Referenced Files
None
Subscribers
None
charset.h
View Options
/*
* Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The name "Carnegie Mellon University" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For permission or any legal
* details, please contact
* Carnegie Mellon University
* Center for Technology Transfer and Enterprise Creation
* 4615 Forbes Avenue
* Suite 302
* Pittsburgh, PA 15213
* (412) 268-7393, fax: (412) 268-7395
* innovation@andrew.cmu.edu
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Computing Services
* at Carnegie Mellon University (http://www.cmu.edu/computing/)."
*
* CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef INCLUDED_CHARSET_H
#define INCLUDED_CHARSET_H
#define ENCODING_NONE 0
#define ENCODING_QP 1
#define ENCODING_BASE64 2
#define ENCODING_UNKNOWN 255
#define CHARSET_SKIPDIACRIT (1<<0)
#define CHARSET_SKIPSPACE (1<<1)
#define CHARSET_MERGESPACE (1<<2)
#define CHARSET_SKIPHTML (1<<3)
#define CHARSET_KEEPCASE (1<<4)
#define CHARSET_UNFOLD_SKIPWS (1<<5)
#define CHARSET_MIME_UTF8 (1<<6)
#define CHARSET_ESCAPEHTML (1<<8)
#define CHARSET_KEEPHTML (1<<9)
#define CHARSET_TRIMWS (1<<10)
#define CHARSET_UNKNOWN_CHARSET (NULL)
#include
"util.h"
typedef
int
comp_pat
;
/*
* Charset identifies a character encoding.
* Use charset_lookupname to create an instance, and release it
* using charset_free.
*
* Caveats:
* * Two instances for the same character encoding are not pointer-equal.
* Use string comparison of the charset_canon_name to test for equality.
* * Instances are not safe to use for two simultaneous conversions. It is safe
* (and recommended) to reuse an instance for consecutive conversions.
*/
typedef
struct
charset_converter
*
charset_t
;
extern
int
encoding_lookupname
(
const
char
*
name
);
extern
const
char
*
encoding_name
(
int
);
/* ensure up to MAXTRANSLATION times expansion into buf */
extern
char
*
charset_convert
(
const
char
*
s
,
charset_t
charset
,
int
flags
);
extern
char
*
charset_decode_mimeheader
(
const
char
*
s
,
int
flags
);
extern
char
*
charset_parse_mimeheader
(
const
char
*
s
,
int
flags
);
extern
char
*
charset_parse_mimexvalue
(
const
char
*
s
,
struct
buf
*
language
);
extern
char
*
charset_encode_mimexvalue
(
const
char
*
s
,
const
char
*
language
);
extern
char
*
charset_utf8_to_searchform
(
const
char
*
s
,
int
flags
);
/* Normalize the zero-terminted UTF-8 string s to Unicode NFC
* normal form.
*
* Does not enforce CR LF line ending or omission of control
* characters as defined in RFC 5198.
*
* Also see http://www.unicode.org/reports/tr15/ and RFC 5198 */
extern
char
*
charset_utf8_normalize
(
const
char
*
s
);
extern
charset_t
charset_lookupname
(
const
char
*
name
);
extern
charset_t
charset_lookupnumid
(
int
id
);
extern
void
charset_free
(
charset_t
*
charset
);
/* Return the canonical charset name. */
extern
const
char
*
charset_canon_name
(
charset_t
);
/* Returns the name as provided in lookupname, if any.
* Falls back to returning the canonical name. */
extern
const
char
*
charset_alias_name
(
charset_t
);
extern
comp_pat
*
charset_compilepat
(
const
char
*
s
);
extern
void
charset_freepat
(
comp_pat
*
pat
);
extern
int
charset_searchstring
(
const
char
*
substr
,
comp_pat
*
pat
,
const
char
*
s
,
size_t
len
,
int
flags
);
extern
int
charset_searchfile
(
const
char
*
substr
,
comp_pat
*
pat
,
const
char
*
msg_base
,
size_t
len
,
charset_t
charset
,
int
encoding
,
int
flags
);
extern
const
char
*
charset_decode_mimebody
(
const
char
*
msg_base
,
size_t
len
,
int
encoding
,
char
**
retval
,
size_t
*
outlen
);
extern
char
*
charset_encode_mimebody
(
const
char
*
msg_base
,
size_t
len
,
char
*
retval
,
size_t
*
outlen
,
int
*
outlines
,
int
wrap
);
extern
char
*
charset_qpencode_mimebody
(
const
char
*
msg_base
,
size_t
len
,
int
force_quote
,
size_t
*
outlen
);
extern
char
*
charset_to_utf8
(
const
char
*
msg_base
,
size_t
len
,
charset_t
charset
,
int
encoding
);
extern
char
*
charset_to_imaputf7
(
const
char
*
msg_base
,
size_t
len
,
charset_t
charset
,
int
encoding
);
extern
int
charset_search_mimeheader
(
const
char
*
substr
,
comp_pat
*
pat
,
const
char
*
s
,
int
flags
);
extern
char
*
charset_encode_mimeheader
(
const
char
*
header
,
size_t
len
,
int
force_quote
);
extern
char
*
charset_encode_mimephrase
(
const
char
*
header
);
extern
char
*
charset_unfold
(
const
char
*
s
,
size_t
len
,
int
flags
);
extern
int
charset_decode
(
struct
buf
*
dst
,
const
char
*
src
,
size_t
len
,
int
encoding
);
/* Extract the body text for the message denoted by 'uid', convert its
text to the canonical form for searching, and pass the converted text
down in a series of invocations of the callback 'cb'. This is
called by index_getsearchtext to extract the MIME body parts. */
extern
int
charset_extract
(
void
(
*
cb
)(
const
struct
buf
*
text
,
void
*
rock
),
void
*
rock
,
const
struct
buf
*
data
,
charset_t
charset
,
int
encoding
,
const
char
*
subtype
,
int
flags
);
/* Extract plain text from HTML, converting <p> and <br>
* to newlines and trimming space left by HTML-only lines. */
EXPORTED
char
*
charset_extract_plain
(
const
char
*
html
);
struct
char_counts
{
size_t
valid
;
size_t
replacement
;
size_t
invalid
;
};
/* Count the number of valid, invalid and replacement UTF-8 characters
* in the first INT32_MAX bytes of data. */
extern
struct
char_counts
charset_count_validutf8
(
const
char
*
data
,
size_t
datalen
);
#endif
/* INCLUDED_CHARSET_H */
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Fri, Apr 24, 12:57 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
18868278
Default Alt Text
charset.h (6 KB)
Attached To
Mode
R111 cyrus-imapd
Attached
Detach File
Event Timeline