diff --git a/kmime/kmime_util.cpp b/kmime/kmime_util.cpp index 84340924e..8848c49d4 100644 --- a/kmime/kmime_util.cpp +++ b/kmime/kmime_util.cpp @@ -1,514 +1,527 @@ /* kmime_util.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_util.h" #include "kmime_util_p.h" #include "kmime_header_parsing.h" #include #include // for strcasestr #include #include #include #include #include #include #include #include #include #include #include using namespace KMime; namespace KMime { QList c_harsetCache; QList l_anguageCache; QByteArray cachedCharset( const QByteArray &name ) { foreach ( const QByteArray& charset, c_harsetCache ) { if ( qstricmp( name.data(), charset.data() ) == 0 ) { return charset; } } c_harsetCache.append( name.toUpper() ); //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); return c_harsetCache.last(); } QByteArray cachedLanguage( const QByteArray &name ) { foreach ( const QByteArray& language, l_anguageCache ) { if ( qstricmp( name.data(), language.data() ) == 0 ) { return language; } } l_anguageCache.append( name.toUpper() ); //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); return l_anguageCache.last(); } bool isUsAscii( const QString &s ) { uint sLength = s.length(); for ( uint i=0; i@[\] const uchar specialsMap[16] = { 0x00, 0x00, 0x00, 0x00, // CTLs 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 0x00, 0x00, 0x00, 0x00 // '`' ... DEL }; // "(),:;<>@[\]/=? const uchar tSpecialsMap[16] = { 0x00, 0x00, 0x00, 0x00, // CTLs 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 0x00, 0x00, 0x00, 0x00 // '`' ... DEL }; // all except specials, CTLs, SPACE. const uchar aTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x5F, 0x35, 0xFF, 0xC5, 0x7F, 0xFF, 0xFF, 0xE3, 0xFF, 0xFF, 0xFF, 0xFE }; // all except tspecials, CTLs, SPACE. const uchar tTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x5F, 0x36, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE3, 0xFF, 0xFF, 0xFF, 0xFE }; // none except a-zA-Z0-9!*+-/ const uchar eTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0 }; QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS ) { QByteArray result; QByteArray spaceBuffer; const char *scursor = src.constData(); const char *send = scursor + src.length(); bool onlySpacesSinceLastWord = false; while ( scursor != send ) { // space if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { spaceBuffer += *scursor++; continue; } // possible start of an encoded word if ( *scursor == '=' ) { QByteArray language; QString decoded; ++scursor; const char *start = scursor; if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { result += decoded.toUtf8(); onlySpacesSinceLastWord = true; spaceBuffer.clear(); } else { if ( onlySpacesSinceLastWord ) { result += spaceBuffer; onlySpacesSinceLastWord = false; } result += '='; scursor = start; // reset cursor after parsing failure } continue; } else { // unencoded data if ( onlySpacesSinceLastWord ) { result += spaceBuffer; onlySpacesSinceLastWord = false; } result += *scursor; ++scursor; } } return QString::fromUtf8(result); } QString decodeRFC2047String( const QByteArray &src ) { QByteArray usedCS; return decodeRFC2047String( src, usedCS, "utf-8", false ); } QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, bool addressHeader, bool allow8BitHeaders ) { QByteArray encoded8Bit, result, usedCS; int start=0, end=0; bool nonAscii=false, ok=true, useQEncoding=false; QTextCodec *codec=0; usedCS = charset; codec = KGlobal::charsets()->codecForName( usedCS, ok ); if ( !ok ) { //no codec available => try local8Bit and hope the best ;-) usedCS = KGlobal::locale()->encoding(); codec = KGlobal::charsets()->codecForName( usedCS, ok ); } if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets useQEncoding = true; } encoded8Bit = codec->fromUnicode( src ); if ( allow8BitHeaders ) { return encoded8Bit; } uint encoded8BitLength = encoded8Bit.length(); for ( unsigned int i=0; i@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) { end = start; // non us-ascii char found, now we determine where to stop encoding nonAscii = true; break; } } if ( nonAscii ) { while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { // we encode complete words end++; } for ( int x=end; x@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) { end = encoded8Bit.length(); // we found another non-ascii word while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { // we encode complete words end++; } } } result = encoded8Bit.left( start ) + "=?" + usedCS; if ( useQEncoding ) { result += "?Q?"; char c, hexcode;// "Q"-encoding implementation described in RFC 2047 for ( int i=start; i= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers ( ( c >= '0' ) && ( c <= '9' ) ) ) { result += c; } else { result += '='; // "stolen" from KMail ;-) hexcode = ((c & 0xF0) >> 4) + 48; if ( hexcode >= 58 ) { hexcode += 7; } result += hexcode; hexcode = (c & 0x0F) + 48; if ( hexcode >= 58 ) { hexcode += 7; } result += hexcode; } } } } else { result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); } result +="?="; result += encoded8Bit.right( encoded8Bit.length() - end ); } else { result = encoded8Bit; } return result; } QByteArray uniqueString() { static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; time_t now; char p[11]; int pos, ran; unsigned int timeval; p[10] = '\0'; now = time( 0 ); ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0)); timeval = (now / ran) + getpid(); for ( int i=0; i<10; i++ ) { pos = (int) (61.0*rand() / (RAND_MAX + 1.0)); //kDebug(5320) << pos; p[i] = chars[pos]; } QByteArray ret; ret.setNum( timeval ); ret += '.'; ret += p; return ret; } QByteArray multiPartBoundary() { return "nextPart" + uniqueString(); } QByteArray unfoldHeader( const QByteArray &header ) { QByteArray result; int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { foldBegin = foldEnd = foldMid; // find the first space before the line-break while ( foldBegin > 0 ) { if ( !QChar( header[foldBegin - 1] ).isSpace() ) { break; } --foldBegin; } // find the first non-space after the line-break while ( foldEnd <= header.length() - 1 ) { if ( !QChar( header[foldEnd] ).isSpace() ) { break; } ++foldEnd; } result += header.mid( pos, foldBegin - pos ); if ( foldEnd < header.length() -1 ) result += ' '; pos = foldEnd; } result += header.mid( pos, header.length() - pos ); return result; } int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) { QByteArray n = name; n.append( ':' ); int begin = -1; if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { begin = 0; } else { n.prepend('\n'); const char *p = strcasestr( src.constData(), n.constData() ); if ( !p ) { begin = -1; } else { begin = p - src.constData(); ++begin; } } if ( begin > -1) { //there is a header with the given name dataBegin = begin + name.length() + 1; //skip the name // skip the usual space after the colon if ( src.at( dataBegin ) == ' ' ) { ++dataBegin; } end = dataBegin; int len = src.length() - 1; if ( folded ) *folded = false; if ( src.at(end) != '\n' ) { // check if the header is not empty while ( true ) { end = src.indexOf( '\n', end + 1 ); if ( end == -1 || end == len || ( src[end+1] != ' ' && src[end+1] != '\t' ) ) { //break if we reach the end of the string, honor folded lines break; } else { if ( folded ) *folded = true; } } } if ( end < 0 ) { end = len + 1; //take the rest of the string } return begin; } else { dataBegin = -1; return -1; //header not found } } QByteArray extractHeader( const QByteArray &src, const QByteArray &name ) { int begin, end; bool folded; indexOfHeader( src, name, end, begin, &folded ); if ( begin >= 0 ) { if ( !folded ) { return src.mid( begin, end - begin ); } else { QByteArray hdrValue = src.mid( begin, end - begin ); return unfoldHeader( hdrValue ); } } else { return QByteArray(); //header not found } } QList extractHeaders( const QByteArray &src, const QByteArray &name ) { int begin, end; bool folded; QList result; QByteArray copySrc( src ); indexOfHeader( copySrc, name, end, begin, &folded ); while ( begin >= 0 ) { if ( !folded ) { result.append( copySrc.mid( begin, end - begin ) ); } else { QByteArray hdrValue = copySrc.mid( begin, end - begin ); result.append( unfoldHeader( hdrValue ) ); } // get the next one, a tiny bit ugly, but we don't want the previous to be found again... copySrc = copySrc.mid( end ); indexOfHeader( copySrc, name, end, begin, &folded ); } return result; } void removeHeader( QByteArray &header, const QByteArray &name ) { int begin, end, dummy; begin = indexOfHeader( header, name, end, dummy ); if ( begin >= 0 ) { header.remove( begin, end - begin + 1 ); } } QByteArray CRLFtoLF( const QByteArray &s ) { QByteArray ret = s; ret.replace( "\r\n", "\n" ); return ret; } QByteArray LFtoCRLF( const QByteArray &s ) { QByteArray ret = s; ret.replace( "\n", "\r\n" ); return ret; } namespace { template < typename T > void removeQuotesGeneric( T & str ) { bool inQuote = false; for ( int i = 0; i < str.length(); ++i ) { if ( str[i] == '"' ) { str.remove( i, 1 ); i--; inQuote = !inQuote; } else { if ( inQuote && ( str[i] == '\\' ) ) { str.remove( i, 1 ); } } } } } void removeQuots( QByteArray &str ) { removeQuotesGeneric( str ); } void removeQuots( QString &str ) { removeQuotesGeneric( str ); } void addQuotes( QByteArray &str, bool forceQuotes ) { bool needsQuotes=false; for ( int i=0; i < str.length(); i++ ) { if ( strchr("()<>@,.;:[]=\\\"", str[i] ) != 0 ) { needsQuotes = true; } if ( str[i] == '\\' || str[i] == '\"' ) { str.insert( i, '\\' ); i++; } } if ( needsQuotes || forceQuotes ) { str.insert( 0, '\"' ); str.append( "\"" ); } } } // namespace KMime diff --git a/kmime/kmime_util.h b/kmime/kmime_util.h index 759609241..3346614c5 100644 --- a/kmime/kmime_util.h +++ b/kmime/kmime_util.h @@ -1,248 +1,256 @@ /* -*- c++ -*- kmime_util.h KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __KMIME_UTIL_H__ #define __KMIME_UTIL_H__ #include #include "kmime_export.h" +#include "kmime_headers.h" namespace KMime { /** Consult the charset cache. Only used for reducing mem usage by keeping strings in a common repository. @param name */ KMIME_EXPORT extern QByteArray cachedCharset( const QByteArray &name ); /** Consult the language cache. Only used for reducing mem usage by keeping strings in a common repository. @param name */ KMIME_EXPORT extern QByteArray cachedLanguage( const QByteArray &name ); /** Checks whether @p s contains any non-us-ascii characters. @param s */ KMIME_EXPORT extern bool isUsAscii( const QString &s ); +/** + Returns a user-visible string for a contentEncoding, for example + "quoted-printable" for CEquPr. + TODO should they be i18n'ed? +*/ +KMIME_EXPORT extern QString nameForEncoding( KMime::Headers::contentEncoding enc ); + //@cond PRIVATE extern const uchar specialsMap[16]; extern const uchar tSpecialsMap[16]; extern const uchar aTextMap[16]; extern const uchar tTextMap[16]; extern const uchar eTextMap[16]; inline bool isOfSet( const uchar map[16], unsigned char ch ) { return ( ch < 128 ) && ( map[ ch/8 ] & 0x80 >> ch%8 ); } inline bool isSpecial( char ch ) { return isOfSet( specialsMap, ch ); } inline bool isTSpecial( char ch ) { return isOfSet( tSpecialsMap, ch ); } inline bool isAText( char ch ) { return isOfSet( aTextMap, ch ); } inline bool isTText( char ch ) { return isOfSet( tTextMap, ch ); } inline bool isEText( char ch ) { return isOfSet( eTextMap, ch ); } //@endcond /** Decodes string @p src according to RFC2047,i.e., the construct =?charset?[qb]?encoded?= @param src source string. @param usedCS the detected charset is returned here @param defaultCS the charset to use in case the detected one isn't known to us. @param forceCS force the use of the default charset. @return the decoded string. */ KMIME_EXPORT extern QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS = QByteArray(), bool forceCS = false ); /** Decode string @p src according to RFC2047 (ie. the =?charset?[qb]?encoded?= construct). @param src source string. @return the decoded string. */ KMIME_EXPORT extern QString decodeRFC2047String( const QByteArray &src ); /** Encodes string @p src according to RFC2047 using charset @p charset. @param src source string. @param charset charset to use. @param addressHeader if this flag is true, all special chars like <,>,[,],... will be encoded, too. @param allow8bitHeaders if this flag is true, 8Bit headers are allowed. @return the encoded string. */ KMIME_EXPORT extern QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, bool addressHeader=false, bool allow8bitHeaders=false ); /** Uses current time, pid and random numbers to construct a string that aims to be unique on a per-host basis (ie. for the local part of a message-id or for multipart boundaries. @return the unique string. @see multiPartBoundary */ KMIME_EXPORT extern QByteArray uniqueString(); /** Constructs a random string (sans leading/trailing "--") that can be used as a multipart delimiter (ie. as @p boundary parameter to a multipart/... content-type). @return the randomized string. @see uniqueString */ KMIME_EXPORT extern QByteArray multiPartBoundary(); /** Unfolds the given header if necessary. @param header The header to unfold. */ KMIME_EXPORT extern QByteArray unfoldHeader( const QByteArray &header ); /** Tries to extract the header with name @p name from the string @p src, unfolding it if necessary. @param src the source string. @param name the name of the header to search for. @return the first instance of the header @p name in @p src or a null QCString if no such header was found. */ KMIME_EXPORT extern QByteArray extractHeader( const QByteArray &src, const QByteArray &name ); /** Tries to extract the headers with name @p name from the string @p src, unfolding it if necessary. @param src the source string. @param name the name of the header to search for. @return all instances of the header @p name in @p src @since 4.2 */ KMIME_EXPORT extern QList extractHeaders( const QByteArray &src, const QByteArray &name ); /** Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF). This function is expensive and should be used only if the mail will be stored locally. All decode functions can cope with both line endings. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const char*) LFtoCRLF */ KMIME_EXPORT extern QByteArray CRLFtoLF( const QByteArray &s ); /** Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF). This function is expensive and should be used only if the mail will be stored locally. All decode functions can cope with both line endings. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const QCString&) LFtoCRLF */ KMIME_EXPORT extern QByteArray CRLFtoLF( const char *s ); /** Converts all occurrences of "\n" (LF) in @p s to "\r\n" (CRLF). This function is expensive and should be used only if the mail will be transmitted as an RFC822 message later. All decode functions can cope with and all encode functions can optionally produce both line endings, which is much faster. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const QCString&) LFtoCRLF */ KMIME_EXPORT extern QByteArray LFtoCRLF( const QByteArray &s ); /** Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie. backslash-escaped characters) @param str the string to work on. @see addQuotes */ KMIME_EXPORT extern void removeQuots( QByteArray &str ); /** Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie. backslash-escaped characters) @param str the string to work on. @see addQuotes */ KMIME_EXPORT extern void removeQuots( QString &str ); /** Converts the given string into a quoted-string if the string contains any special characters (ie. one of ()<>@,.;:[]=\"). @param str us-ascii string to work on. @param forceQuotes if @p true, always add quote characters. */ KMIME_EXPORT extern void addQuotes( QByteArray &str, bool forceQuotes ); } // namespace KMime #endif /* __KMIME_UTIL_H__ */ diff --git a/kmime/kmime_util_p.h b/kmime/kmime_util_p.h index 1ddfd4fd4..d25b5f6fd 100644 --- a/kmime/kmime_util_p.h +++ b/kmime/kmime_util_p.h @@ -1,47 +1,48 @@ /* Copyright (c) 2007 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KMIME_UTIL_P_H #define KMIME_UTIL_P_H // @cond PRIVATE /* Internal helper functions. Not part of the public API. */ namespace KMime { /** Finds the first header of type @p name in @p src. @param end The end index of the header. @param dataBegin begin of the data part of the header, -1 if not found. @param folded true if the headder is folded into multiple lines @returns the begin index of the header, -1 if not found. */ extern int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded = 0 ); /** Removes the first occurrence of the @p name from @p head. */ +// This is used in zero places at the moment. extern void removeHeader( QByteArray &head, const QByteArray &name ); } // @endcond #endif