diff --git a/kmime/kmime_header_parsing.cpp b/kmime/kmime_header_parsing.cpp index 7f628dd59..5b292062f 100644 --- a/kmime/kmime_header_parsing.cpp +++ b/kmime/kmime_header_parsing.cpp @@ -1,2035 +1,2038 @@ /* -*- c++ -*- kmime_header_parsing.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001-2002 Marc Mutz This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_header_parsing.h" #include "kmime_codecs.h" #include "kmime_util.h" #include "kmime_dateformatter.h" #include "kmime_warning.h" #include #include #include #include #include #include #include // for isdigit #include using namespace KMime; using namespace KMime::Types; namespace KMime { namespace Types { // QUrl::fromAce is extremely expensive, so only use it when necessary. // Fortunately, the presence of IDNA is readily detected with a substring match... static inline QString QUrl_fromAce_wrapper( const QString & domain ) { if ( domain.contains( QLatin1String( "xn--" ) ) ) return QUrl::fromAce( domain.toLatin1() ); else return domain; } static QString addr_spec_as_string( const AddrSpec & as, bool pretty ) { if ( as.isEmpty() ) { return QString(); } bool needsQuotes = false; QString result; result.reserve( as.localPart.length() + as.domain.length() + 1 ); for ( int i = 0 ; i < as.localPart.length() ; ++i ) { const char ch = as.localPart[i].toLatin1(); if ( ch == '.' || isAText( ch ) ) { result += ch; } else { needsQuotes = true; if ( ch == '\\' || ch == '"' ) { result += '\\'; } result += ch; } } const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ; if ( needsQuotes ) { return '"' + result + "\"@" + dom; } else { return result + '@' + dom; } } QString AddrSpec::asString() const { return addr_spec_as_string( *this, false ); } QString AddrSpec::asPrettyString() const { return addr_spec_as_string( *this, true ); } bool AddrSpec::isEmpty() const { return localPart.isEmpty() && domain.isEmpty(); } QByteArray Mailbox::address() const { return mAddrSpec.asString().toLatin1(); } AddrSpec Mailbox::addrSpec() const { return mAddrSpec; } QString Mailbox::name() const { return mDisplayName; } void Mailbox::setAddress( const AddrSpec &addr ) { mAddrSpec = addr; } void Mailbox::setAddress( const QByteArray &addr ) { const char *cursor = addr.constData(); if ( !HeaderParsing::parseAngleAddr( cursor, cursor + addr.length(), mAddrSpec ) ) { if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(), mAddrSpec ) ) { kWarning() << "Invalid address"; return; } } } void Mailbox::setName( const QString &name ) { mDisplayName = name; } void Mailbox::setNameFrom7Bit( const QByteArray &name, const QByteArray &defaultCharset ) { QByteArray cs; mDisplayName = decodeRFC2047String( name, cs, defaultCharset, false ); } bool Mailbox::hasAddress() const { return !mAddrSpec.isEmpty(); } bool Mailbox::hasName() const { return !mDisplayName.isEmpty(); } QString Mailbox::prettyAddress() const { if ( !hasName() ) { return address(); } QString s = name(); if ( hasAddress() ) { s += QLatin1String(" <") + address() + QLatin1Char('>'); } return s; } void Mailbox::fromUnicodeString( const QString &s ) { from7BitString( encodeRFC2047String( s, "utf-8", false ) ); } void Mailbox::from7BitString( const QByteArray &s ) { const char *cursor = s.constData(); HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this ); } QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const { if ( !hasName() ) { return address(); } QByteArray rv; if ( isUsAscii( name() ) ) { QByteArray tmp = name().toLatin1(); addQuotes( tmp, false ); rv += tmp; } else { rv += encodeRFC2047String( name(), encCharset, true ); } if ( hasAddress() ) { rv += " <" + address() + '>'; } return rv; } } // namespace Types namespace HeaderParsing { // parse the encoded-word (scursor points to after the initial '=') bool parseEncodedWord( const char* &scursor, const char * const send, QString &result, QByteArray &language, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS ) { // make sure the caller already did a bit of the work. assert( *(scursor-1) == '=' ); // // STEP 1: // scan for the charset/language portion of the encoded-word // char ch = *scursor++; if ( ch != '?' ) { // kDebug(5320) << "first"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // remember start of charset (ie. just after the initial "=?") and // language (just after the first '*') fields: const char * charsetStart = scursor; const char * languageStart = 0; // find delimiting '?' (and the '*' separating charset and language // tags, if any): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?') { break; } else if ( *scursor == '*' && languageStart == 0 ) { languageStart = scursor + 1; } } // not found? can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { // kDebug(5320) << "second"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the language information, if any (if languageStart is 0, // language will be null, too): QByteArray maybeLanguage( languageStart, scursor - languageStart ); // extract charset information (keep in mind: the size given to the // ctor is one off due to the \0 terminator): QByteArray maybeCharset( charsetStart, ( languageStart ? languageStart - 1 : scursor ) - charsetStart ); // // STEP 2: // scan for the encoding portion of the encoded-word // // remember start of encoding (just _after_ the second '?'): scursor++; const char * encodingStart = scursor; // find next '?' (ending the encoding tag): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { break; } } // not found? Can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { // kDebug(5320) << "third"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the encoding information: QByteArray maybeEncoding( encodingStart, scursor - encodingStart ); // kDebug(5320) << "parseEncodedWord: found charset == \"" << maybeCharset // << "\"; language == \"" << maybeLanguage // << "\"; encoding == \"" << maybeEncoding << "\""; // // STEP 3: // scan for encoded-text portion of encoded-word // // remember start of encoded-text (just after the third '?'): scursor++; const char * encodedTextStart = scursor; - // find next '?' (ending the encoded-text): + // find the '?=' sequence (ending the encoded-text): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { - break; + if ( scursor + 1 != send ) { + if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore + KMIME_WARN << "Stray '?' in q-encoded word, ignoring this."; + continue; + } + else { // yep, found a '?=' sequence + scursor += 2; + break; + } + } + else { // The '?' is the last char, but we need a '=' after it! + KMIME_WARN_PREMATURE_END_OF( EncodedWord ); + return false; + } } } - // not found? Can't be an encoded-word! - // ### maybe evaluate it nonetheless if the rest is OK? - if ( scursor == send || *scursor != '?' ) { - // kDebug(5320) << "fourth"; + if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' || + scursor < encodedTextStart + 2 ) { KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } - scursor++; - // check for trailing '=': - if ( scursor == send || *scursor != '=' ) { - // kDebug(5320) << "fifth"; - KMIME_WARN_PREMATURE_END_OF( EncodedWord ); - return false; - } - scursor++; // set end sentinel for encoded-text: const char * const encodedTextEnd = scursor - 2; // // STEP 4: // setup decoders for the transfer encoding and the charset // // try if there's a codec for the encoding found: Codec * codec = Codec::codecForName( maybeEncoding ); if ( !codec ) { KMIME_WARN_UNKNOWN( Encoding, maybeEncoding ); return false; } // get an instance of a corresponding decoder: Decoder * dec = codec->makeDecoder(); assert( dec ); // try if there's a (text)codec for the charset found: bool matchOK = false; QTextCodec *textCodec = 0; if ( forceCS || maybeCharset.isEmpty() ) { textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK ); usedCS = cachedCharset( defaultCS ); } else { textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK ); if ( !matchOK ) { //no suitable codec found => use default charset textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK ); usedCS = cachedCharset( defaultCS ); } else { usedCS = cachedCharset( maybeCharset ); } } if ( !matchOK || !textCodec ) { KMIME_WARN_UNKNOWN( Charset, maybeCharset ); delete dec; return false; }; // kDebug(5320) << "mimeName(): \"" << textCodec->name() << "\""; // allocate a temporary buffer to store the 8bit text: int encodedTextLength = encodedTextEnd - encodedTextStart; QByteArray buffer; buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) ); QByteArray::Iterator bit = buffer.begin(); QByteArray::ConstIterator bend = buffer.end(); // // STEP 5: // do the actual decoding // if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) ) { KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor(" << encodedTextLength << ")\nresult may be truncated"; } result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() ); // kDebug(5320) << "result now: \"" << result << "\""; // cleanup: delete dec; language = maybeLanguage; return true; } static inline void eatWhiteSpace( const char* &scursor, const char * const send ) { while ( scursor != send && ( *scursor == ' ' || *scursor == '\n' || *scursor == '\t' || *scursor == '\r' ) ) scursor++; } bool parseAtom( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseAtom( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char *start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isAText( ch ) ) { // AText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or special - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } bool parseToken( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseToken( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char * start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isTText( ch ) ) { // TText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or tspecial - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } #define READ_ch_OR_FAIL if ( scursor == send ) { \ KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \ return false; \ } else { \ ch = *scursor++; \ } // known issues: // // - doesn't handle quoted CRLF bool parseGenericQuotedString( const char* &scursor, const char * const send, QString &result, bool isCRLF, const char openChar, const char closeChar ) { char ch; // We are in a quoted-string or domain-literal or comment and the // cursor points to the first char after the openChar. // We will apply unfolding and quoted-pair removal. // We return when we either encounter the end or unescaped openChar // or closeChar. assert( *(scursor-1) == openChar || *(scursor-1) == closeChar ); while ( scursor != send ) { ch = *scursor++; if ( ch == closeChar || ch == openChar ) { // end of quoted-string or another opening char: // let caller decide what to do. return true; } switch( ch ) { case '\\': // quoted-pair // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 READ_ch_OR_FAIL; KMIME_WARN_IF_8BIT( ch ); result += QChar( ch ); break; case '\r': // ### // The case of lonely '\r' is easy to solve, as they're // not part of Unix Line-ending conventions. // But I see a problem if we are given Unix-native // line-ending-mails, where we cannot determine anymore // whether a given '\n' was part of a CRLF or was occurring // on it's own. READ_ch_OR_FAIL; if ( ch != '\n' ) { // CR on it's own... KMIME_WARN_LONE( CR ); result += QChar('\r'); scursor--; // points to after the '\r' again } else { // CRLF encountered. // lookahead: check for folding READ_ch_OR_FAIL; if ( ch == ' ' || ch == '\t' ) { // correct folding; // position cursor behind the CRLF WSP (unfolding) // and add the WSP to the result result += QChar( ch ); } else { // this is the "shouldn't happen"-case. There is a CRLF // inside a quoted-string without it being part of FWS. // We take it verbatim. KMIME_WARN_NON_FOLDING( CRLF ); result += "\r\n"; // the cursor is decremented again, so's we need not // duplicate the whole switch here. "ch" could've been // everything (incl. openChar or closeChar). scursor--; } } break; case '\n': // Note: CRLF has been handled above already! // ### LF needs special treatment, depending on whether isCRLF // is true (we can be sure a lonely '\n' was meant this way) or // false ('\n' alone could have meant LF or CRLF in the original // message. This parser assumes CRLF iff the LF is followed by // either WSP (folding) or NULL (premature end of quoted-string; // Should be fixed, since NULL is allowed as per rfc822). READ_ch_OR_FAIL; if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { // folding // correct folding result += QChar( ch ); } else { // non-folding KMIME_WARN_LONE( LF ); result += QChar('\n'); // pos is decremented, so's we need not duplicate the whole // switch here. ch could've been everything (incl. <">, "\"). scursor--; } break; default: KMIME_WARN_IF_8BIT( ch ); result += QChar( ch ); } } return false; } // known issues: // // - doesn't handle encoded-word inside comments. bool parseComment( const char* &scursor, const char * const send, QString &result, bool isCRLF, bool reallySave ) { int commentNestingDepth = 1; const char *afterLastClosingParenPos = 0; QString maybeCmnt; const char *oldscursor = scursor; assert( *(scursor-1) == '(' ); while ( commentNestingDepth ) { QString cmntPart; if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { assert( *(scursor-1) == ')' || *(scursor-1) == '(' ); // see the kdoc for above function for the possible conditions // we have to check: switch ( *(scursor-1) ) { case ')': if ( reallySave ) { // add the chunk that's now surely inside the comment. result += maybeCmnt; result += cmntPart; if ( commentNestingDepth > 1 ) { // don't add the outermost ')'... result += QChar(')'); } maybeCmnt.clear(); } afterLastClosingParenPos = scursor; --commentNestingDepth; break; case '(': if ( reallySave ) { // don't add to "result" yet, because we might find that we // are already outside the (broken) comment... maybeCmnt += cmntPart; maybeCmnt += QChar('('); } ++commentNestingDepth; break; default: assert( 0 ); } // switch } else { // !parseGenericQuotedString, ie. premature end if ( afterLastClosingParenPos ) { scursor = afterLastClosingParenPos; } else { scursor = oldscursor; } return false; } } // while return true; } // known issues: none. bool parsePhrase( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None; QString tmp; QByteArray lang, charset; const char *successfullyParsed = 0; // only used by the encoded-word branch const char *oldscursor; // used to suppress whitespace between adjacent encoded-words // (rfc2047, 6.2): bool lastWasEncodedWord = false; while ( scursor != send ) { char ch = *scursor++; switch ( ch ) { case '.': // broken, but allow for intorop's sake if ( found == None ) { --scursor; return false; } else { if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) { result += ". "; } else { result += '.'; } successfullyParsed = scursor; } break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { successfullyParsed = scursor; assert( *(scursor-1) == '"' ); switch ( found ) { case None: found = QuotedString; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QChar(' '); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { // premature end of quoted string. // What to do? Return leading '"' as special? Return as quoted-string? // We do the latter if we already found something, else signal failure. if ( found == None ) { return false; } else { result += QChar(' '); // rfc822, 3.4.4 result += tmp; return true; } } break; case '(': // comment // parse it, but ignore content: tmp.clear(); if ( parseComment( scursor, send, tmp, isCRLF, false /*don't bother with the content*/ ) ) { successfullyParsed = scursor; lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } break; case '=': // encoded-word tmp.clear(); oldscursor = scursor; lang.clear(); charset.clear(); if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = EncodedWord; break; case Phrase: case EncodedWord: case Atom: case QuotedString: if ( !lastWasEncodedWord ) { result += QChar(' '); // rfc822, 3.4.4 } found = Phrase; break; default: assert( 0 ); } lastWasEncodedWord = true; result += tmp; break; } else { // parse as atom: scursor = oldscursor; } // fall though... default: //atom tmp.clear(); scursor--; if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = Atom; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QChar(' '); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } } eatWhiteSpace( scursor, send ); } return found != None; } bool parseDotAtom( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); // always points to just after the last atom parsed: const char *successfullyParsed; QString tmp; if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { return false; } result += tmp; successfullyParsed = scursor; while ( scursor != send ) { // end of header or no '.' -> return if ( scursor == send || *scursor != '.' ) { return true; } scursor++; // eat '.' if ( scursor == send || !isAText( *scursor ) ) { // end of header or no AText, but this time following a '.'!: // reset cursor to just after last successfully parsed char and // return: scursor = successfullyParsed; return true; } // try to parse the next atom: QString maybeAtom; if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { scursor = successfullyParsed; return true; } result += QChar('.'); result += maybeAtom; successfullyParsed = scursor; } scursor = successfullyParsed; return true; } void eatCFWS( const char* &scursor, const char * const send, bool isCRLF ) { QString dummy; while ( scursor != send ) { const char *oldscursor = scursor; char ch = *scursor++; switch( ch ) { case ' ': case '\t': // whitespace case '\r': case '\n': // folding continue; case '(': // comment if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) { continue; } scursor = oldscursor; return; default: scursor = oldscursor; return; } } } bool parseDomain( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // domain := dot-atom / domain-literal / atom *("." atom) // // equivalent to: // domain = dot-atom / domain-literal, // since parseDotAtom does allow CFWS between atoms and dots if ( *scursor == '[' ) { // domain-literal: QString maybeDomainLiteral; // eat '[': scursor++; while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, isCRLF, '[', ']' ) ) { if ( scursor == send ) { // end of header: check for closing ']': if ( *(scursor-1) == ']' ) { // OK, last char was ']': result = maybeDomainLiteral; return true; } else { // not OK, domain-literal wasn't closed: return false; } } // we hit openChar in parseGenericQuotedString. // include it in maybeDomainLiteral and keep on parsing: if ( *(scursor-1) == '[' ) { maybeDomainLiteral += QChar('['); continue; } // OK, real end of domain-literal: result = maybeDomainLiteral; return true; } } else { // dot-atom: QString maybeDotAtom; if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { result = maybeDotAtom; return true; } } return false; } bool parseObsRoute( const char* &scursor, const char* const send, QStringList &result, bool isCRLF, bool save ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; if ( save ) { result.append( QString() ); } continue; } // empty entry ending the list: if ( *scursor == ':' ) { scursor++; if ( save ) { result.append( QString() ); } return true; } // each non-empty entry must begin with '@': if ( *scursor != '@' ) { return false; } else { scursor++; } QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } if ( save ) { result.append( maybeDomain ); } // eat the following (optional) comma: eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == ':' ) { scursor++; return true; } if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddrSpec( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // // STEP 1: // local-part := dot-atom / quoted-string / word *("." word) // // this is equivalent to: // local-part := word *("." word) QString maybeLocalPart; QString tmp; while ( scursor != send ) { // first, eat any whitespace eatCFWS( scursor, send, isCRLF ); char ch = *scursor++; switch ( ch ) { case '.': // dot maybeLocalPart += QChar('.'); break; case '@': goto SAW_AT_SIGN; break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { maybeLocalPart += tmp; } else { return false; } break; default: // atom scursor--; // re-set scursor to point to ch again tmp.clear(); if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { maybeLocalPart += tmp; } else { return false; // parseAtom can only fail if the first char is non-atext. } break; } } return false; // // STEP 2: // domain // SAW_AT_SIGN: assert( *(scursor-1) == '@' ); QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } result.localPart = maybeLocalPart; result.domain = maybeDomain; return true; } bool parseAngleAddr( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // first, we need an opening angle bracket: eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '<' ) { return false; } scursor++; // eat '<' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == '@' || *scursor == ',' ) { // obs-route: parse, but ignore: KMIME_WARN << "obsolete source route found! ignoring."; QStringList dummy; if ( !parseObsRoute( scursor, send, dummy, isCRLF, false /* don't save */ ) ) { return false; } // angle-addr isn't complete until after the '>': if ( scursor == send ) { return false; } } // parse addr-spec: AddrSpec maybeAddrSpec; if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '>' ) { return false; } scursor++; result = maybeAddrSpec; return true; } bool parseMailbox( const char* &scursor, const char * const send, Mailbox &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } AddrSpec maybeAddrSpec; QString maybeDisplayName; // first, try if it's a vanilla addr-spec: const char * oldscursor = scursor; if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { result.setAddress( maybeAddrSpec ); // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } result.setNameFrom7Bit( maybeDisplayName.toLatin1() ); return true; } scursor = oldscursor; // second, see if there's a display-name: if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { // failed: reset cursor, note absent display-name maybeDisplayName.clear(); scursor = oldscursor; } else { // succeeded: eat CFWS eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } } // third, parse the angle-addr: if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } if ( maybeDisplayName.isNull() ) { // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } } result.setName( maybeDisplayName ); result.setAddress( maybeAddrSpec ); return true; } bool parseGroup( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] // // equivalent to: // group := display-name ":" [ obs-mbox-list ] ";" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // get display-name: QString maybeDisplayName; if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { return false; } // get ":": eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } result.displayName = maybeDisplayName; // get obs-mbox-list (may contain empty entries): scursor++; while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; continue; } // empty entry ending the list: if ( *scursor == ';' ) { scursor++; return true; } Mailbox maybeMailbox; if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { return false; } result.mailboxList.append( maybeMailbox ); eatCFWS( scursor, send, isCRLF ); // premature end: if ( scursor == send ) { return false; } // regular end of the list: if ( *scursor == ';' ) { scursor++; return true; } // eat regular list entry separator: if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddress( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // address := mailbox / group eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // first try if it's a single mailbox: Mailbox maybeMailbox; const char * oldscursor = scursor; if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { // yes, it is: result.displayName.clear(); result.mailboxList.append( maybeMailbox ); return true; } scursor = oldscursor; Address maybeAddress; // no, it's not a single mailbox. Try if it's a group: if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) { return false; } result = maybeAddress; return true; } bool parseAddressList( const char* &scursor, const char * const send, AddressList &result, bool isCRLF ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // empty entry: ignore: if ( *scursor == ',' ) { scursor++; continue; } // broken clients might use ';' as list delimiter, accept that as well if ( *scursor == ';' ) { scursor++; continue; } // parse one entry Address maybeAddress; if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) { return false; } result.append( maybeAddress ); eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // comma separating entries: eat it. if ( *scursor == ',' ) { scursor++; } } return true; } static QString asterisk = QString::fromLatin1( "*0*", 1 ); static QString asteriskZero = QString::fromLatin1( "*0*", 2 ); //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 ); bool parseParameter( const char* &scursor, const char * const send, QPair &result, bool isCRLF ) { // parameter = regular-parameter / extended-parameter // regular-parameter = regular-parameter-name "=" value // extended-parameter = // value = token / quoted-string // // note that rfc2231 handling is out of the scope of this function. // Therefore we return the attribute as QString and the value as // (start,length) tupel if we see that the value is encoded // (trailing asterisk), for parseParameterList to decode... eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // parse the parameter name: // QString maybeAttribute; if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) { return false; } eatCFWS( scursor, send, isCRLF ); // premature end: not OK (haven't seen '=' yet). if ( scursor == send || *scursor != '=' ) { return false; } scursor++; // eat '=' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { // don't choke on attribute=, meaning the value was omitted: if ( maybeAttribute.endsWith( asterisk ) ) { KMIME_WARN << "attribute ends with \"*\", but value is empty!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return true; } const char * oldscursor = scursor; // // parse the parameter value: // QStringOrQPair maybeValue; if ( *scursor == '"' ) { // value is a quoted-string: scursor++; if ( maybeAttribute.endsWith( asterisk ) ) { // attributes ending with "*" designate extended-parameters, // which cannot have quoted-strings as values. So we remove the // trailing "*" to not confuse upper layers. KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } else { // value is a token: if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } result = qMakePair( maybeAttribute.toLower(), maybeValue ); return true; } bool parseRawParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { // we use parseParameter() consecutively to obtain a map of raw // attributes to raw values. "Raw" here means that we don't do // rfc2231 decoding and concatenation. This is left to // parseParameterList(), which will call this function. // // The main reason for making this chunk of code a separate // (private) method is that we can deal with broken parameters // _here_ and leave the rfc2231 handling solely to // parseParameterList(), which will still be enough work. while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // empty entry ending the list: OK. if ( scursor == send ) { return true; } // empty list entry: ignore. if ( *scursor == ';' ) { scursor++; continue; } QPair maybeParameter; if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { // we need to do a bit of work if the attribute is not // NULL. These are the cases marked with "needs further // processing" in parseParameter(). Specifically, parsing of the // token or the quoted-string, which should represent the value, // failed. We take the easy way out and simply search for the // next ';' to start parsing again. (Another option would be to // take the text between '=' and ';' as value) if ( maybeParameter.first.isNull() ) { return false; } while ( scursor != send ) { if ( *scursor++ == ';' ) { goto IS_SEMICOLON; } } // scursor == send case: end of list. return true; IS_SEMICOLON: // *scursor == ';' case: parse next entry. continue; } // successful parsing brings us here: result.insert( maybeParameter.first, maybeParameter.second ); eatCFWS( scursor, send, isCRLF ); // end of header: ends list. if ( scursor == send ) { return true; } // regular separator: eat it. if ( *scursor == ';' ) { scursor++; } } return true; } static void decodeRFC2231Value( Codec* &rfc2231Codec, QTextCodec* &textcodec, bool isContinuation, QString &value, QPair &source ) { // // parse the raw value into (charset,language,text): // const char * decBegin = source.first; const char * decCursor = decBegin; const char * decEnd = decCursor + source.second; if ( !isContinuation ) { // find the first single quote while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { // there wasn't a single single quote at all! // take the whole value to be in latin-1: KMIME_WARN << "No charset in extended-initial-value." "Assuming \"iso-8859-1\"."; value += QString::fromLatin1( decBegin, source.second ); return; } QByteArray charset( decBegin, decCursor - decBegin ); const char * oldDecCursor = ++decCursor; // find the second single quote (we ignore the language tag): while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { KMIME_WARN << "No language in extended-initial-value." "Trying to recover."; decCursor = oldDecCursor; } else { decCursor++; } // decCursor now points to the start of the // "extended-other-values": // // get the decoders: // bool matchOK = false; textcodec = KGlobal::charsets()->codecForName( charset, matchOK ); if ( !matchOK ) { textcodec = 0; KMIME_WARN_UNKNOWN( Charset, charset ); } } if ( !rfc2231Codec ) { rfc2231Codec = Codec::codecForName("x-kmime-rfc2231"); assert( rfc2231Codec ); } if ( !textcodec ) { value += QString::fromLatin1( decCursor, decEnd - decCursor ); return; } Decoder * dec = rfc2231Codec->makeDecoder(); assert( dec ); // // do the decoding: // QByteArray buffer; buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); QByteArray::Iterator bit = buffer.begin(); QByteArray::ConstIterator bend = buffer.end(); if ( !dec->decode( decCursor, decEnd, bit, bend ) ) { KMIME_WARN << rfc2231Codec->name() << "codec lies about its maxDecodedSizeFor()" << endl << "result may be truncated"; } value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); // kDebug(5320) << "value now: \"" << value << "\""; // cleanup: delete dec; } // known issues: // - permutes rfc2231 continuations when the total number of parts // exceeds 10 (other-sections then becomes *xy, ie. two digits) bool parseParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { // parse the list into raw attribute-value pairs: QMap rawParameterList; if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) { return false; } if ( rawParameterList.isEmpty() ) { return true; } // decode rfc 2231 continuations and alternate charset encoding: // NOTE: this code assumes that what QMapIterator delivers is sorted // by the key! Codec * rfc2231Codec = 0; QTextCodec * textcodec = 0; QString attribute; QString value; enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode; QMap::Iterator it, end = rawParameterList.end(); for ( it = rawParameterList.begin() ; it != end ; ++it ) { if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { // // new attribute: // // store the last attribute/value pair in the result map now: if ( !attribute.isNull() ) { result.insert( attribute, value ); } // and extract the information from the new raw attribute: value.clear(); attribute = it.key(); mode = NoMode; // is the value encoded? if ( attribute.endsWith( asterisk ) ) { attribute.truncate( attribute.length() - 1 ); mode = (Modes) ((int) mode | Encoded); } // is the value continued? if ( attribute.endsWith( asteriskZero ) ) { attribute.truncate( attribute.length() - 2 ); mode = (Modes) ((int) mode | Continued); } // // decode if necessary: // if ( mode & Encoded ) { decodeRFC2231Value( rfc2231Codec, textcodec, false, /* isn't continuation */ value, (*it).qpair ); } else { // not encoded. if ( (*it).qpair.first ) { value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); } else { value += (*it).qstring; } } // // shortcut-processing when the value isn't encoded: // if ( !(mode & Continued) ) { // save result already: result.insert( attribute, value ); // force begin of a new attribute: attribute.clear(); } } else { // it.key().startsWith( attribute ) // // continuation // // ignore the section and trust QMap to have sorted the keys: if ( it.key().endsWith( asterisk ) ) { // encoded decodeRFC2231Value( rfc2231Codec, textcodec, true, /* is continuation */ value, (*it).qpair ); } else { // not encoded if ( (*it).qpair.first ) { value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); } else { value += (*it).qstring; } } } } // write last attr/value pair: if ( !attribute.isNull() ) { result.insert( attribute, value ); } return true; } static const char * const stdDayNames[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; static bool parseDayName( const char* &scursor, const char * const send ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( int i = 0 ; i < stdDayNamesLen ; ++i ) { if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { scursor += 3; // kDebug(5320) << "found" << stdDayNames[i]; return true; } } return false; } static const char * const stdMonthNames[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static const int stdMonthNamesLen = sizeof stdMonthNames / sizeof *stdMonthNames; static bool parseMonthName( const char* &scursor, const char * const send, int &result ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( result = 0 ; result < stdMonthNamesLen ; ++result ) { if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { scursor += 3; return true; } } // not found: return false; } static const struct { const char * tzName; long int secsEastOfGMT; } timeZones[] = { // rfc 822 timezones: { "GMT", 0 }, { "UT", 0 }, { "EDT", -4*3600 }, { "EST", -5*3600 }, { "MST", -5*3600 }, { "CST", -6*3600 }, { "MDT", -6*3600 }, { "MST", -7*3600 }, { "PDT", -7*3600 }, { "PST", -8*3600 }, // common, non-rfc-822 zones: { "CET", 1*3600 }, { "MET", 1*3600 }, { "UTC", 0 }, { "CEST", 2*3600 }, { "BST", 1*3600 }, // rfc 822 military timezones: { "Z", 0 }, { "A", -1*3600 }, { "B", -2*3600 }, { "C", -3*3600 }, { "D", -4*3600 }, { "E", -5*3600 }, { "F", -6*3600 }, { "G", -7*3600 }, { "H", -8*3600 }, { "I", -9*3600 }, // J is not used! { "K", -10*3600 }, { "L", -11*3600 }, { "M", -12*3600 }, { "N", 1*3600 }, { "O", 2*3600 }, { "P", 3*3600 }, { "Q", 4*3600 }, { "R", 5*3600 }, { "S", 6*3600 }, { "T", 7*3600 }, { "U", 8*3600 }, { "V", 9*3600 }, { "W", 10*3600 }, { "X", 11*3600 }, { "Y", 12*3600 }, }; static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; static bool parseAlphaNumericTimeZone( const char* &scursor, const char * const send, long int &secsEastOfGMT, bool &timeZoneKnown ) { QPair maybeTimeZone( 0, 0 ); if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) { return false; } for ( int i = 0 ; i < timeZonesLen ; ++i ) { if ( qstrnicmp( timeZones[i].tzName, maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { scursor += maybeTimeZone.second; secsEastOfGMT = timeZones[i].secsEastOfGMT; timeZoneKnown = true; return true; } } // don't choke just because we don't happen to know the time zone KMIME_WARN_UNKNOWN( time zone, QByteArray( maybeTimeZone.first, maybeTimeZone.second ) ); secsEastOfGMT = 0; timeZoneKnown = false; return true; } // parse a number and return the number of digits parsed: int parseDigits( const char* &scursor, const char * const send, int &result ) { result = 0; int digits = 0; for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { result *= 10; result += int( *scursor - '0' ); } return digits; } static bool parseTimeOfDay( const char* &scursor, const char * const send, int &hour, int &min, int &sec, bool isCRLF=false ) { // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] // // 2DIGIT representing "hour": // if ( !parseDigits( scursor, send, hour ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // 2DIGIT representing "minute": // if ( !parseDigits( scursor, send, min ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return true; // seconds are optional } // // let's see if we have a 2DIGIT representing "second": // if ( *scursor == ':' ) { // yepp, there are seconds: scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseDigits( scursor, send, sec ) ) { return false; } } else { sec = 0; } return true; } bool parseTime( const char* &scursor, const char * send, int &hour, int &min, int &sec, long int &secsEastOfGMT, bool &timeZoneKnown, bool isCRLF ) { // time := time-of-day CFWS ( zone / obs-zone ) // // obs-zone := "UT" / "GMT" / // "EST" / "EDT" / ; -0500 / -0400 // "CST" / "CDT" / ; -0600 / -0500 // "MST" / "MDT" / ; -0700 / -0600 // "PST" / "PDT" / ; -0800 / -0700 // "A"-"I" / "a"-"i" / // "K"-"Z" / "k"-"z" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { timeZoneKnown = false; secsEastOfGMT = 0; return true; // allow missing timezone } timeZoneKnown = true; if ( *scursor == '+' || *scursor == '-' ) { // remember and eat '-'/'+': const char sign = *scursor++; // numerical timezone: int maybeTimeZone; if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) { return false; } secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); if ( sign == '-' ) { secsEastOfGMT *= -1; if ( secsEastOfGMT == 0 ) { timeZoneKnown = false; // -0000 means indetermined tz } } } else { // maybe alphanumeric timezone: if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) { return false; } } return true; } bool parseDateTime( const char* &scursor, const char * const send, KDateTime &result, bool isCRLF ) { // Parsing date-time; strict mode: // // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date // time // // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" result = KDateTime(); QDateTime maybeDateTime; eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // let's see if there's a day-of-week: // if ( parseDayName( scursor, send ) ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // day-name should be followed by ',' but we treat it as optional: if ( *scursor == ',' ) { scursor++; // eat ',' eatCFWS( scursor, send, isCRLF ); } } // // 1*2DIGIT representing "day" (of month): // int maybeDay; if ( !parseDigits( scursor, send, maybeDay ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // month-name: // int maybeMonth = 0; if ( !parseMonthName( scursor, send, maybeMonth ) ) { return false; } if ( scursor == send ) { return false; } assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); ++maybeMonth; // 0-11 -> 1-12 eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // 2*DIGIT representing "year": // int maybeYear; if ( !parseDigits( scursor, send, maybeYear ) ) { return false; } // RFC 2822 4.3 processing: if ( maybeYear < 50 ) { maybeYear += 2000; } else if ( maybeYear < 1000 ) { maybeYear += 1900; } // else keep as is if ( maybeYear < 1900 ) { return false; // rfc2822, 3.3 } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) ); // // time // int maybeHour, maybeMinute, maybeSecond; long int secsEastOfGMT; bool timeZoneKnown = true; if ( !parseTime( scursor, send, maybeHour, maybeMinute, maybeSecond, secsEastOfGMT, timeZoneKnown, isCRLF ) ) { return false; } maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) ); if ( !maybeDateTime.isValid() ) return false; result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) ); if ( !result.isValid() ) return false; return true; } } // namespace HeaderParsing } // namespace KMime diff --git a/kmime/kmime_warning.h b/kmime/kmime_warning.h index 00c331652..620a4800f 100644 --- a/kmime/kmime_warning.h +++ b/kmime/kmime_warning.h @@ -1,60 +1,60 @@ /* kmime_warning.h KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001-2002 Marc Mutz See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KMIME_WARNING_H #define KMIME_WARNING_H #ifndef KMIME_NO_WARNING # include -# define KMIME_WARN kWarning(5100) << "Tokenizer Warning: " +# define KMIME_WARN kWarning(5100) << "Tokenizer Warning:" # define KMIME_WARN_UNKNOWN(x,y) KMIME_WARN << "unknown " #x ": \"" \ << y << "\""; # define KMIME_WARN_UNKNOWN_ENCODING KMIME_WARN << "unknown encoding in " \ "RFC 2047 encoded-word (only know 'q' and 'b')"; # define KMIME_WARN_UNKNOWN_CHARSET(c) KMIME_WARN << "unknown charset \"" \ << c << "\" in RFC 2047 encoded-word"; # define KMIME_WARN_8BIT(ch) KMIME_WARN \ - << "8Bit character '" << QString(QChar(ch)) << "'" << endl + << "8Bit character '" << QString(QChar(ch)) << "'" # define KMIME_WARN_IF_8BIT(ch) if ( (unsigned char)(ch) > 127 ) \ { KMIME_WARN_8BIT(ch); } # define KMIME_WARN_PREMATURE_END_OF(x) KMIME_WARN \ - << "Premature end of " #x << endl -# define KMIME_WARN_LONE(x) KMIME_WARN << "Lonely " #x " character" << endl -# define KMIME_WARN_NON_FOLDING(x) KMIME_WARN << "Non-folding " #x << endl + << "Premature end of " #x +# define KMIME_WARN_LONE(x) KMIME_WARN << "Lonely " #x " character" +# define KMIME_WARN_NON_FOLDING(x) KMIME_WARN << "Non-folding " #x # define KMIME_WARN_CTL_OUTSIDE_QS(x) KMIME_WARN << "Control character " \ - #x " outside quoted-string" << endl + #x " outside quoted-string" # define KMIME_WARN_INVALID_X_IN_Y(X,Y) KMIME_WARN << "Invalid character '" \ QString(QChar(X)) << "' in " #Y; # define KMIME_WARN_TOO_LONG(x) KMIME_WARN << #x \ " too long or missing delimiter"; #else # define KMIME_NOP do {} while (0) # define KMIME_WARN_8BIT(ch) KMIME_NOP # define KMIME_WARN_IF_8BIT(ch) KMIME_NOP # define KMIME_WARN_PREMATURE_END_OF(x) KMIME_NOP # define KMIME_WARN_LONE(x) KMIME_NOP # define KMIME_WARN_NON_FOLDING(x) KMIME_NOP # define KMIME_WARN_CTL_OUTSIDE_QS(x) KMIME_NOP #endif #endif diff --git a/kmime/tests/headertest.cpp b/kmime/tests/headertest.cpp index 515ca1240..099180532 100644 --- a/kmime/tests/headertest.cpp +++ b/kmime/tests/headertest.cpp @@ -1,724 +1,756 @@ /* Copyright (c) 2006 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "headertest.h" #include #include using namespace KMime; using namespace KMime::Headers; using namespace KMime::Headers::Generics; // the following test cases are taken from KDE mailinglists, bug reports, RFC 2045, // RFC 2183 and RFC 2822, Appendix A QTEST_KDEMAIN( HeaderTest, NoGUI ) void HeaderTest::testIdentHeader() { // empty header Headers::Generics::Ident* h = new Headers::Generics::Ident(); QVERIFY( h->isEmpty() ); // parse single identifier h->from7BitString( QByteArray( "<1162746587.784559.5038.nullmailer@svn.kde.org>" ) ); QCOMPARE( h->identifiers().count(), 1 ); QCOMPARE( h->identifiers().first(), QByteArray( "1162746587.784559.5038.nullmailer@svn.kde.org" ) ); QCOMPARE( h->asUnicodeString(), QString("<1162746587.784559.5038.nullmailer@svn.kde.org>") ); QVERIFY( !h->isEmpty() ); // clearing a header h->clear(); QVERIFY( h->isEmpty() ); QVERIFY( h->identifiers().isEmpty() ); delete h; // parse multiple identifiers h = new Headers::Generics::Ident(); h->from7BitString( QByteArray( "<1234@local.machine.example> <3456@example.net>" ) ); QCOMPARE( h->identifiers().count(), 2 ); QList ids = h->identifiers(); QCOMPARE( ids.takeFirst(), QByteArray( "1234@local.machine.example" ) ); QCOMPARE( ids.first(), QByteArray( "3456@example.net" ) ); delete h; // parse multiple identifiers with folded headers h = new Headers::Generics::Ident(); h->from7BitString( QByteArray( "<1234@local.machine.example>\n <3456@example.net>" ) ); QCOMPARE( h->identifiers().count(), 2 ); ids = h->identifiers(); QCOMPARE( ids.takeFirst(), QByteArray( "1234@local.machine.example" ) ); QCOMPARE( ids.first(), QByteArray( "3456@example.net" ) ); // appending of new identifiers (with and without angle-brackets) h->appendIdentifier( "" ); h->appendIdentifier( "78910@example.net" ); QCOMPARE( h->identifiers().count(), 4 ); // assemble the final header QCOMPARE( h->as7BitString( false ), QByteArray("<1234@local.machine.example> <3456@example.net> <78910@example.net>") ); } void HeaderTest::testAddressListHeader() { // empty header Headers::Generics::AddressList *h = new Headers::Generics::AddressList(); QVERIFY( h->isEmpty() ); // parse single simple address h->from7BitString( "joe@where.test" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray("joe@where.test") ); QCOMPARE( h->displayNames().count(), 1 ); QCOMPARE( h->displayNames().first(), QString() ); QCOMPARE( h->prettyAddresses().count(), 1 ); QCOMPARE( h->prettyAddresses().first(), QString("joe@where.test") ); // clearing a header h->clear(); QVERIFY( h->isEmpty() ); delete h; // parsing and re-assembling a single address with display name h = new Headers::Generics::AddressList(); h->from7BitString( "Pete " ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray( "pete@silly.example" ) ); QCOMPARE( h->displayNames().first(), QString("Pete") ); QCOMPARE( h->prettyAddresses().first(), QString("Pete ") ); QCOMPARE( h->as7BitString( false ), QByteArray("Pete ") ); delete h; // parsing a single address with legacy comment style display name h = new Headers::Generics::AddressList(); h->from7BitString( "jdoe@machine.example (John Doe)" ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray( "jdoe@machine.example" ) ); QCOMPARE( h->displayNames().first(), QString("John Doe") ); QCOMPARE( h->prettyAddresses().first(), QString("John Doe ") ); delete h; // parsing and re-assembling list of diffrent addresses h = new Headers::Generics::AddressList(); h->from7BitString( "Mary Smith , jdoe@example.org, Who? " ); QCOMPARE( h->addresses().count(), 3 ); QStringList names = h->displayNames(); QCOMPARE( names.takeFirst(), QString("Mary Smith") ); QCOMPARE( names.takeFirst(), QString() ); QCOMPARE( names.takeFirst(), QString("Who?") ); QCOMPARE( h->as7BitString( false ), QByteArray("Mary Smith , jdoe@example.org, Who? ") ); delete h; // same again with some interessting quoting h = new Headers::Generics::AddressList(); h->from7BitString( "\"Joe Q. Public\" , , \"Giant; \\\"Big\\\" Box\" " ); QCOMPARE( h->addresses().count(), 3 ); names = h->displayNames(); QCOMPARE( names.takeFirst(), QString("Joe Q. Public") ); QCOMPARE( names.takeFirst(), QString() ); QCOMPARE( names.takeFirst(), QString("Giant; \"Big\" Box") ); QCOMPARE( h->as7BitString( false ), QByteArray("\"Joe Q. Public\" , boss@nil.test, \"Giant; \\\"Big\\\" Box\" ") ); delete h; // a display name with non-latin1 content h = new Headers::Generics::AddressList(); h->from7BitString( "Ingo =?iso-8859-15?q?Kl=F6cker?= " ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray( "kloecker@kde.org" ) ); QCOMPARE( h->displayNames().first(), QString::fromUtf8("Ingo Klöcker") ); QCOMPARE( h->asUnicodeString(), QString::fromUtf8("Ingo Klöcker ") ); QCOMPARE( h->as7BitString( false ), QByteArray("Ingo =?ISO-8859-1?Q?Kl=F6cker?= ") ); delete h; // again, this time legacy style h = new Headers::Generics::AddressList(); h->from7BitString( "kloecker@kde.org (Ingo =?iso-8859-15?q?Kl=F6cker?=)" ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray( "kloecker@kde.org" ) ); QCOMPARE( h->displayNames().first(), QString::fromUtf8("Ingo Klöcker") ); delete h; // parsing a empty group h = new Headers::Generics::AddressList(); h->from7BitString( "Undisclosed recipients:;" ); QCOMPARE( h->addresses().count(), 0 ); delete h; // parsing and re-assembling a address list with a group h = new Headers::Generics::AddressList(); h->from7BitString( "A Group:Chris Jones ,joe@where.test,John ;" ); QCOMPARE( h->addresses().count(), 3 ); names = h->displayNames(); QCOMPARE( names.takeFirst(), QString("Chris Jones") ); QCOMPARE( names.takeFirst(), QString() ); QCOMPARE( names.takeFirst(), QString("John") ); QCOMPARE( h->as7BitString( false ), QByteArray("Chris Jones , joe@where.test, John ") ); delete h; // modifying a header h = new Headers::Generics::AddressList(); h->from7BitString( "John " ); h->addAddress( "", QString::fromUtf8("Ingo Klöcker") ); h->addAddress( "c@a.test" ); QCOMPARE( h->addresses().count(), 3 ); QCOMPARE( h->asUnicodeString(), QString::fromUtf8("John , Ingo Klöcker , c@a.test") ); QCOMPARE( h->as7BitString( false ), QByteArray("John , Ingo =?ISO-8859-1?Q?Kl=F6cker?= , c@a.test") ); delete h; // parsing from utf-8 h = new Headers::Generics::AddressList(); h->fromUnicodeString( QString::fromUtf8("Ingo Klöcker "), "utf-8" ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray( "kloecker@kde.org" ) ); QCOMPARE( h->displayNames().first(), QString::fromUtf8("Ingo Klöcker") ); delete h; // based on bug #137033, a header broken in various ways: ';' as list separator, // unquoted '.' in display name h = new Headers::Generics::AddressList(); h->from7BitString( "Vice@censored.serverkompetenz.net,\n President@mail2.censored.net;\"Int\\\\\\\\\\\\\\\\\\\\'l\" Lotto Commission. " ); QCOMPARE( h->addresses().count(), 3 ); names = h->displayNames(); QCOMPARE( names.takeFirst(), QString() ); QCOMPARE( names.takeFirst(), QString() ); // there is an wrong ' ' after the name, but since the header is completely // broken we can be happy it parses at all... QCOMPARE( names.takeFirst(), QString("Int\\\\\\\\\\'l Lotto Commission. ") ); QList addrs = h->addresses(); QCOMPARE( addrs.takeFirst(), QByteArray("Vice@censored.serverkompetenz.net") ); QCOMPARE( addrs.takeFirst(), QByteArray("President@mail2.censored.net") ); QCOMPARE( addrs.takeFirst(), QByteArray("censored@yahoo.fr") ); delete h; // based on bug #102010, a display name containing '<' h = new Headers::Generics::AddressList( 0, QByteArray("\"|") ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray("censored@censored.dy") ); QCOMPARE( h->displayNames().first(), QString("|as7BitString( false ), QByteArray("\"|") ); // based on bug #93790 (legacy display name with nested comments) h = new Headers::Generics::AddressList( 0, QByteArray("first.name@domain.tld (first name (nickname))") ); QCOMPARE( h->displayNames().count(), 1 ); QCOMPARE( h->displayNames().first(), QString("first name (nickname)") ); QCOMPARE( h->as7BitString( false ), QByteArray("\"first name (nickname)\" ") ); delete h; // rfc 2047 encoding in quoted name (which is not allowed there) h = new Headers::Generics::AddressList(); h->from7BitString( QByteArray( "\"Ingo =?iso-8859-15?q?Kl=F6cker?=\" " ) ); QCOMPARE( h->mailboxes().count(), 1 ); QCOMPARE( h->asUnicodeString(), QString::fromUtf8( "Ingo =?iso-8859-15?q?Kl=F6cker?= " ) ); delete h; } void HeaderTest::testMailboxListHeader() { // empty header Headers::Generics::MailboxList *h = new Headers::Generics::MailboxList(); QVERIFY( h->isEmpty() ); // parse single simple address h->from7BitString( "joe_smith@where.test" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->mailboxes().count(), 1 ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray("joe_smith@where.test") ); QCOMPARE( h->displayNames().count(), 1 ); QCOMPARE( h->displayNames().first(), QString() ); QCOMPARE( h->prettyAddresses().count(), 1 ); QCOMPARE( h->prettyAddresses().first(), QString("joe_smith@where.test") ); // https://bugzilla.novell.com/show_bug.cgi?id=421057 (but apparently this was not the cause of the bug) h->from7BitString( "fr...@ce.sco (Francesco)" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->mailboxes().count(), 1 ); QCOMPARE( h->prettyAddresses().first(), QString("Francesco ") ); delete h; } void HeaderTest::testSingleMailboxHeader() { // empty header Headers::Generics::SingleMailbox *h = new Headers::Generics::SingleMailbox(); QVERIFY( h->isEmpty() ); // parse single simple address h->from7BitString( "joe_smith@where.test" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->addresses().count(), 1 ); QCOMPARE( h->addresses().first(), QByteArray("joe_smith@where.test") ); QCOMPARE( h->displayNames().count(), 1 ); QCOMPARE( h->displayNames().first(), QString() ); QCOMPARE( h->prettyAddresses().count(), 1 ); QCOMPARE( h->prettyAddresses().first(), QString("joe_smith@where.test") ); delete h; } void HeaderTest::testMailCopiesToHeader() { Headers::MailCopiesTo *h; // empty header h = new Headers::MailCopiesTo(); QVERIFY( h->isEmpty() ); QVERIFY( !h->alwaysCopy() ); QVERIFY( !h->neverCopy() ); // set to always copy to poster h->setAlwaysCopy(); QVERIFY( !h->isEmpty() ); QVERIFY( h->alwaysCopy() ); QVERIFY( !h->neverCopy() ); QCOMPARE( h->as7BitString(), QByteArray( "Mail-Copies-To: poster" ) ); // set to never copy h->setNeverCopy(); QVERIFY( !h->isEmpty() ); QVERIFY( !h->alwaysCopy() ); QVERIFY( h->neverCopy() ); QCOMPARE( h->as7BitString(), QByteArray( "Mail-Copies-To: nobody" ) ); // clear header h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse copy to poster h = new MailCopiesTo( 0, "always" ); QVERIFY( h->addresses().isEmpty() ); QVERIFY( !h->isEmpty() ); QVERIFY( h->alwaysCopy() ); delete h; // parse never copy h = new MailCopiesTo( 0, "never" ); QVERIFY( h->addresses().isEmpty() ); QVERIFY( !h->isEmpty() ); QVERIFY( h->neverCopy() ); delete h; // parse address h = new MailCopiesTo( 0, "vkrause@kde.org" ); QVERIFY( !h->addresses().isEmpty() ); QVERIFY( h->alwaysCopy() ); QVERIFY( !h->neverCopy() ); QCOMPARE( h->as7BitString(), QByteArray( "Mail-Copies-To: vkrause@kde.org" ) ); delete h; } void HeaderTest::testParametrizedHeader() { Parametrized *h; // empty header h = new Parametrized(); QVERIFY( h->isEmpty() ); // add a parameter h->setParameter( "filename", "bla.jpg" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->parameter( "filename" ), QString( "bla.jpg" ) ); QCOMPARE( h->as7BitString( false ), QByteArray( "filename=\"bla.jpg\"" ) ); // clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a parameter list h = new Parametrized( 0, "filename=genome.jpeg;\n modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"" ); QCOMPARE( h->parameter( "filename" ), QString( "genome.jpeg" ) ); QCOMPARE( h->parameter( "modification-date" ), QString( "Wed, 12 Feb 1997 16:29:51 -0500" ) ); QCOMPARE( h->as7BitString( false ), QByteArray( "filename=\"genome.jpeg\"; modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"" ) ); delete h; // quoting of whitespaces in parameter value h = new Parametrized(); h->setParameter( "boundary", "simple boundary" ); QCOMPARE( h->as7BitString( false ), QByteArray( "boundary=\"simple boundary\"" ) ); delete h; // TODO: test RFC 2047 encoded values // TODO: test case-insensitive key-names } void HeaderTest::testContentDispositionHeader() { ContentDisposition *h; // empty header h = new ContentDisposition(); QVERIFY( h->isEmpty() ); // set some values h->setFilename( "test.jpg" ); QVERIFY( h->isEmpty() ); QVERIFY( h->as7BitString( false ).isEmpty() ); h->setDisposition( CDattachment ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString( false ), QByteArray( "attachment; filename=\"test.jpg\"" ) ); delete h; // parse parameter-less header h = new ContentDisposition( 0, "inline" ); QCOMPARE( h->disposition(), CDinline ); QVERIFY( h->filename().isEmpty() ); QCOMPARE( h->as7BitString( true ), QByteArray( "Content-Disposition: inline" ) ); delete h; // parse header with parameter h = new ContentDisposition( 0, "attachment; filename=genome.jpeg;\n modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";"); QCOMPARE( h->disposition(), CDattachment ); QCOMPARE( h->filename(), QString( "genome.jpeg" ) ); delete h; // TODO: test for case-insensitive disposition value } void HeaderTest::testContentTypeHeader() { ContentType* h; // empty header h = new ContentType(); QVERIFY( h->isEmpty() ); // Empty content-type means text/plain (RFC 2045 §5.2) QVERIFY( h->isPlainText() ); QVERIFY( h->isText() ); // set a mimetype h->setMimeType( "text/plain" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->mimeType(), QByteArray( "text/plain" ) ); QCOMPARE( h->mediaType(), QByteArray("text") ); QCOMPARE( h->subType(), QByteArray("plain") ); QVERIFY( h->isText() ); QVERIFY( h->isPlainText() ); QVERIFY( !h->isMultipart() ); QVERIFY( !h->isPartial() ); QVERIFY( h->isMediatype( "text" ) ); QVERIFY( h->isSubtype( "plain" ) ); QCOMPARE( h->as7BitString( true ), QByteArray( "Content-Type: text/plain" ) ); // add some parameters h->setId( "bla" ); h->setCharset( "us-ascii" ); QCOMPARE( h->as7BitString( false ), QByteArray( "text/plain; charset=\"us-ascii\"; id=\"bla\"" ) ); // clear header h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a complete header h = new ContentType( 0, "text/plain; charset=us-ascii (Plain text)" ); QVERIFY( h->isPlainText() ); QCOMPARE( h->charset(), QByteArray( "us-ascii" ) ); delete h; // bug #136631 (name with rfc 2231 style parameter wrapping) h = new ContentType( 0, "text/plain;\n name*0=\"PIN_Brief_box1@xx.xxx.censored_Konfigkarte.confi\";\n name*1=\"guration.txt\"" ); QVERIFY( h->isPlainText() ); QCOMPARE( h->name(), QString( "PIN_Brief_box1@xx.xxx.censored_Konfigkarte.configuration.txt" ) ); delete h; } void HeaderTest::testTokenHeader() { Token *h; // empty header h = new Token(); QVERIFY( h->isEmpty() ); // set a token h->setToken( "bla" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString( false ), QByteArray( "bla" ) ); // clear it again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a header h = new Token( 0, "value (comment)" ); QCOMPARE( h->token(), QByteArray("value") ); QCOMPARE( h->as7BitString( false ), QByteArray("value") ); delete h; } void HeaderTest::testContentTransferEncoding() { ContentTransferEncoding *h; // empty header h = new ContentTransferEncoding(); QVERIFY( h->isEmpty() ); // set an encoding h->setEncoding( CEbinary ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString( true ), QByteArray("Content-Transfer-Encoding: binary") ); // clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a header h = new ContentTransferEncoding( 0, "(comment) base64" ); QCOMPARE( h->encoding(), CEbase64 ); QCOMPARE( h->as7BitString( false ), QByteArray("base64") ); delete h; } void HeaderTest::testPhraseListHeader() { PhraseList *h; // empty header h = new PhraseList(); QVERIFY( h->isEmpty() ); delete h; // parse a simple phrase list h = new PhraseList( 0, "foo,\n bar" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->phrases().count(), 2 ); QStringList phrases = h->phrases(); QCOMPARE( phrases.takeFirst(), QString( "foo" ) ); QCOMPARE( phrases.takeFirst(), QString( "bar" ) ); QCOMPARE( h->as7BitString( false ), QByteArray("foo, bar") ); // clear header h->clear(); QVERIFY( h->isEmpty() ); delete h; // TODO: encoded/quoted phrases } void HeaderTest::testDotAtomHeader() { DotAtom *h; // empty header h = new DotAtom; QVERIFY( h->isEmpty() ); // parse a simple dot atom h->from7BitString( "1.0 (mime version)" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->asUnicodeString(), QString( "1.0" ) ); // clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // TODO: more complex atoms } void HeaderTest::testDateHeader() { Date *h; // empty header h = new Date(); QVERIFY( h->isEmpty() ); // parse a simple date h->from7BitString( "Fri, 21 Nov 1997 09:55:06 -0600" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->dateTime().date(), QDate( 1997, 11, 21 ) ); QCOMPARE( h->dateTime().time(), QTime( 9, 55, 6 ) ); QCOMPARE( h->dateTime().utcOffset(), -6 * 3600 ); QCOMPARE( h->as7BitString(), QByteArray( "Date: Fri, 21 Nov 1997 09:55:06 -0600" ) ); // clear it again h->clear(); QVERIFY( h->isEmpty() ); delete h; // white spaces and comment (from RFC 2822, Appendix A.5) h = new Date( 0, "Thu,\n 13\n Feb\n 1969\n 23:32\n -0330 (Newfoundland Time)" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->dateTime().date(), QDate( 1969, 2, 13 ) ); QCOMPARE( h->dateTime().time(), QTime( 23, 32 ) ); QCOMPARE( h->dateTime().utcOffset(), -12600 ); QCOMPARE( h->as7BitString( false ), QByteArray( "Thu, 13 Feb 1969 23:32 -0330" ) ); delete h; // obsolete date format (from RFC 2822, Appendix A.6.2) h = new Date( 0, "21 Nov 97 09:55:06 GMT" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->dateTime().date(), QDate( 1997, 11, 21 ) ); QCOMPARE( h->dateTime().time(), QTime( 9, 55, 6 ) ); QCOMPARE( h->dateTime().utcOffset(), 0 ); delete h; // obsolete whitespaces and commnets (from RFC 2822, Appendix A.6.3) h = new Date( 0, "Fri, 21 Nov 1997 09(comment): 55 : 06 -0600" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->dateTime().date(), QDate( 1997, 11, 21 ) ); QCOMPARE( h->dateTime().time(), QTime( 9, 55, 6 ) ); QCOMPARE( h->dateTime().utcOffset(), -6 * 3600 ); delete h; } void HeaderTest::testLinesHeader() { Lines *h; // empty header h = new Lines(); QVERIFY( h->isEmpty() ); QVERIFY( h->as7BitString().isEmpty() ); // set some content h->setNumberOfLines( 5 ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString(), QByteArray( "Lines: 5" ) ); // clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse header with comment h = new Lines( 0, "(this is a comment) 10 (and yet another comment)" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->numberOfLines(), 10 ); delete h; } void HeaderTest::testNewsgroupsHeader() { Newsgroups *h; // empty header h = new Newsgroups(); QVERIFY( h->isEmpty() ); QVERIFY( h->as7BitString().isEmpty() ); // set newsgroups QList groups; groups << "gmane.comp.kde.devel.core" << "gmane.comp.kde.devel.buildsystem"; h->setGroups( groups ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString(), QByteArray( "Newsgroups: gmane.comp.kde.devel.core,gmane.comp.kde.devel.buildsystem" ) ); // and clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a header h = new Newsgroups( 0, "gmane.comp.kde.devel.core,gmane.comp.kde.devel.buildsystem" ); groups = h->groups(); QCOMPARE( groups.count(), 2 ); QCOMPARE( groups.takeFirst(), QByteArray("gmane.comp.kde.devel.core") ); QCOMPARE( groups.takeFirst(), QByteArray("gmane.comp.kde.devel.buildsystem") ); delete h; // same again, this time with whitespaces and comments h = new Newsgroups(); h->from7BitString( "(comment) gmane.comp.kde.devel.core (second comment),\n gmane.comp.kde.devel.buildsystem (that all)" ); groups = h->groups(); QCOMPARE( groups.count(), 2 ); QCOMPARE( groups.takeFirst(), QByteArray("gmane.comp.kde.devel.core") ); QCOMPARE( groups.takeFirst(), QByteArray("gmane.comp.kde.devel.buildsystem") ); delete h; } void HeaderTest::testControlHeader() { Control *h; // empty header h = new Control(); QVERIFY( h->isEmpty() ); QVERIFY( h->as7BitString().isEmpty() ); // set some content h->setCancel( "" ); QVERIFY( !h->isEmpty() ); QVERIFY( h->isCancel() ); QCOMPARE( h->as7BitString(), QByteArray( "Control: cancel " ) ); // clear again h->clear(); QVERIFY( h->isEmpty() ); delete h; // parse a control header h = new Control( 0, "cancel " ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->parameter(), QByteArray("") ); QVERIFY( h->isCancel() ); QCOMPARE( h->controlType(), QByteArray("cancel") ); delete h; } void HeaderTest::testReturnPath() { ReturnPath *h; h = new ReturnPath(); QVERIFY( h->isEmpty() ); QVERIFY( h->as7BitString().isEmpty() ); h->from7BitString( "" ); QVERIFY( !h->isEmpty() ); QCOMPARE( h->as7BitString( true ), QByteArray( "Return-Path: " ) ); delete h; } void HeaderTest::noAbstractHeaders() { From* h2 = new From(); delete h2; Sender* h3 = new Sender(); delete h3; To* h4 = new To(); delete h4; Cc* h5 = new Cc(); delete h5; Bcc* h6 = new Bcc(); delete h6; ReplyTo* h7 = new ReplyTo(); delete h7; Keywords* h8 = new Keywords(); delete h8; MIMEVersion* h9 = new MIMEVersion(); delete h9; MessageID* h10 = new MessageID(); delete h10; ContentID* h11 = new ContentID(); delete h11; Supersedes* h12 = new Supersedes(); delete h12; InReplyTo* h13 = new InReplyTo(); delete h13; References* h14 = new References(); delete h14; Generic* h15 = new Generic(); delete h15; Subject* h16 = new Subject(); delete h16; Organization* h17 = new Organization(); delete h17; ContentDescription* h18 = new ContentDescription(); delete h18; FollowUpTo* h22 = new FollowUpTo(); delete h22; UserAgent* h24 = new UserAgent(); delete h24; } +void HeaderTest::testInvalidButOkQEncoding() +{ + // A stray '?' should not confuse the parser + Subject subject; + subject.from7BitString( "=?us-ascii?q?Why?_Why_do_some_clients_violate_the_RFC?" "?=" ); + QCOMPARE( subject.as7BitString( false ), QByteArray( "Why? Why do some clients violate the RFC?" ) ); +} + +void HeaderTest::testInvalidQEncoding_data() +{ + QTest::addColumn("encodedWord"); + + // All examples below should not be treated as invalid encoded strings, since the '?=' is missing + QTest::newRow("") << QString( "=?us-ascii?q?Why?_Why_do_some_clients_violate_the_RFC??" ); + QTest::newRow("") << QString( "=?us-ascii?q?Why?_Why_do_some_clients_violate_the_RFC?" ); + QTest::newRow("") << QString( "=?us-ascii?q?Why?_Why_do_some_clients_violate_the_RFC" ); +} + +void HeaderTest::testInvalidQEncoding() +{ + using namespace HeaderParsing; + QFETCH( QString,encodedWord ); + + const char *data = encodedWord.toAscii().data(); + const char *start = data + 1; + const char *end = data + strlen( data ); + QString result; + QByteArray language; + QByteArray usedCS; + QVERIFY( !parseEncodedWord( start, end, result, language, usedCS ) ); +} + #include "headertest.moc" diff --git a/kmime/tests/headertest.h b/kmime/tests/headertest.h index 1ba3e9a24..760fd6932 100644 --- a/kmime/tests/headertest.h +++ b/kmime/tests/headertest.h @@ -1,53 +1,56 @@ /* Copyright (c) 2006 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KMIME_HEADERTEST_H #define KMIME_HEADERTEST_H #include class HeaderTest : public QObject { Q_OBJECT private Q_SLOTS: void testIdentHeader(); void testAddressListHeader(); void testMailboxListHeader(); void testSingleMailboxHeader(); void testMailCopiesToHeader(); void testParametrizedHeader(); void testContentDispositionHeader(); void testContentTypeHeader(); void testTokenHeader(); void testContentTransferEncoding(); void testPhraseListHeader(); void testDotAtomHeader(); void testDateHeader(); void testLinesHeader(); void testNewsgroupsHeader(); void testControlHeader(); void testReturnPath(); + void testInvalidButOkQEncoding(); + void testInvalidQEncoding(); + void testInvalidQEncoding_data(); // makes sure we don't accidently have an abstract header class that's not // meant to be abstract void noAbstractHeaders(); }; #endif diff --git a/kmime/tests/rfc2047test.cpp b/kmime/tests/rfc2047test.cpp index 27ff7e16f..b49b10d7b 100644 --- a/kmime/tests/rfc2047test.cpp +++ b/kmime/tests/rfc2047test.cpp @@ -1,108 +1,111 @@ /* Copyright (c) 2006 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "rfc2047test.h" #include "rfc2047test.moc" #include using namespace KMime; QTEST_KDEMAIN( RFC2047Test, NoGUI ) void RFC2047Test::testRFC2047decode() { QByteArray encCharset; // empty QCOMPARE( KMime::decodeRFC2047String( QByteArray(), encCharset, "utf-8", false ), QString() ); // identity QCOMPARE( KMime::decodeRFC2047String( "bla", encCharset, "utf-8", false ), QString( "bla" ) ); // utf-8 QCOMPARE( KMime::decodeRFC2047String( "=?utf-8?q?Ingo=20Kl=C3=B6cker?= ", encCharset, "utf-8", false ), QString::fromUtf8( "Ingo Klöcker " ) ); QCOMPARE( KMime::decodeRFC2047String( "=?utf-8?q?Ingo=20Kl=C3=B6cker?= ", encCharset, "iso8859-1", false ), QString::fromUtf8( "Ingo Klöcker " ) ); QCOMPARE( KMime::decodeRFC2047String( "=?utf-8?q?Ingo=20Kl=C3=B6cker?=", encCharset, "utf-8", false ), QString::fromUtf8( "Ingo Klöcker" ) ); QCOMPARE( encCharset, QByteArray( "UTF-8" ) ); // whitespaces between two encoded words QCOMPARE( KMime::decodeRFC2047String( "=?utf-8?q?Ingo=20Kl=C3=B6cker?= =?utf-8?q?Ingo=20Kl=C3=B6cker?=", encCharset, "utf-8", false ), QString::fromUtf8( "Ingo KlöckerIngo Klöcker" ) ); QCOMPARE( decodeRFC2047String( "=?utf-8?q?Ingo=20Kl=C3=B6cker?= foo =?utf-8?q?Ingo=20Kl=C3=B6cker?=", encCharset ), QString::fromUtf8( "Ingo Klöcker foo Ingo Klöcker" ) ); // iso-8859-x QCOMPARE( KMime::decodeRFC2047String( "=?ISO-8859-1?Q?Andr=E9s_Ot=F3n?=", encCharset, "utf-8", false ), QString::fromUtf8( "Andrés Otón" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-1" ) ); QCOMPARE( KMime::decodeRFC2047String( "=?iso-8859-2?q?Rafa=B3_Rzepecki?=", encCharset, "utf-8", false ), QString::fromUtf8( "Rafał Rzepecki" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-2" ) ); QCOMPARE( KMime::decodeRFC2047String( "=?iso-8859-9?Q?S=2E=C7a=F0lar?= Onur", encCharset, "utf-8", false ), QString::fromUtf8( "S.Çağlar Onur" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-9" ) ); QCOMPARE( KMime::decodeRFC2047String( "Rafael =?iso-8859-15?q?Rodr=EDguez?=", encCharset, "utf-8", false ), QString::fromUtf8( "Rafael Rodríguez" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-15" ) ); // wrong charset + charset overwrite QCOMPARE( KMime::decodeRFC2047String( "=?iso-8859-1?q?Ingo=20Kl=C3=B6cker?=", encCharset, "utf-8", true ), QString::fromUtf8( "Ingo Klöcker" ) ); // language parameter according to RFC 2231, section 5 QCOMPARE( decodeRFC2047String( "From: =?US-ASCII*EN?Q?Keith_Moore?= ", encCharset ), QString::fromUtf8( "From: Keith Moore " ) ); QCOMPARE( encCharset, QByteArray( "US-ASCII" ) ); // broken qp endoding (using lowercase) QCOMPARE( decodeRFC2047String( "Subject: =?iso-8859-1?Q?Belangrijk=3a=20Verhuizing=20FTP=20server?=", encCharset ), QString::fromUtf8( "Subject: Belangrijk: Verhuizing FTP server" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-1" ) ); // mixed charsets, based on bug 125542 but pasted from above instead since I'm unable to enter those asian symbols QCOMPARE( decodeRFC2047String( "Subject: =?utf-8?q?Ingo=20Kl=C3=B6cker?= unencoded words =?iso-8859-9?Q?S=2E=C7a=F0lar?=", encCharset ), QString::fromUtf8( "Subject: Ingo Klöcker unencoded words S.Çağlar" ) ); QCOMPARE( encCharset, QByteArray( "ISO-8859-9" ) ); + + // Small data + QCOMPARE( decodeRFC2047String( "=?iso-8859-1?Q?c?=", encCharset ), QString::fromUtf8("c") ); } void RFC2047Test::testInvalidDecode() { QByteArray encCharset; // invalid / incomplete encoded data QCOMPARE( decodeRFC2047String( "=", encCharset ), QString::fromUtf8("=") ); QCOMPARE( decodeRFC2047String( "=?", encCharset ), QString::fromUtf8("=?") ); QCOMPARE( decodeRFC2047String( "=?a?b?=", encCharset ), QString::fromUtf8("=?a?b?=") ); QCOMPARE( decodeRFC2047String( "=?a?b?c?", encCharset ), QString::fromUtf8("=?a?b?c?") ); QCOMPARE( decodeRFC2047String( "=?a??c?=", encCharset ), QString::fromUtf8("=?a??c?=") ); } void RFC2047Test::testRFC2047encode() { // empty QCOMPARE( KMime::encodeRFC2047String( QString(), "utf-8" ), QByteArray() ); // identity QCOMPARE( KMime::encodeRFC2047String( "bla", "utf-8" ), QByteArray( "bla" ) ); // utf-8 // expected value is probably wrong, libkmime will chose 'B' instead of 'Q' encoding QEXPECT_FAIL( "", "libkmime will chose 'B' instead of 'Q' encoding", Continue ); QCOMPARE( KMime::encodeRFC2047String( QString::fromUtf8( "Ingo Klöcker " ), "utf-8" ).constData(), "=?utf-8?q?Ingo=20Kl=C3=B6cker?= " ); }