diff --git a/kmime/kmime_content.cpp b/kmime/kmime_content.cpp index a2e41b176..d6a0c0a49 100644 --- a/kmime/kmime_content.cpp +++ b/kmime/kmime_content.cpp @@ -1,1173 +1,1179 @@ /* kmime_content.cpp KMime, the KDE Internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details Copyright (c) 2006 Volker Krause Copyright (c) 2009 Constantin Berzan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @file This file is part of the API for handling @ref MIME data and defines the Content class. @brief Defines the Content class. @authors the KMime authors (see AUTHORS file), Volker Krause \ */ #include "kmime_content.h" #include "kmime_content_p.h" #include "kmime_codecs.h" #include "kmime_message.h" #include "kmime_header_parsing.h" #include "kmime_header_parsing_p.h" #include "kmime_parsers.h" #include "kmime_util_p.h" #include #include #include #include #include #include #include #include #include using namespace KMime; namespace KMime { Content::Content() : d_ptr( new ContentPrivate( this ) ) { } Content::Content( Content *parent ) : d_ptr( new ContentPrivate( this ) ) { d_ptr->parent = parent; } Content::Content( const QByteArray &h, const QByteArray &b ) : d_ptr( new ContentPrivate( this ) ) { d_ptr->head = h; d_ptr->body = b; } Content::Content( const QByteArray &h, const QByteArray &b, Content *parent ) : d_ptr( new ContentPrivate( this ) ) { d_ptr->head = h; d_ptr->body = b; d_ptr->parent = parent; } Content::Content( ContentPrivate *d ) : d_ptr( d ) { } Content::~Content() { qDeleteAll( h_eaders ); h_eaders.clear(); delete d_ptr; d_ptr = 0; } bool Content::hasContent() const { return !d_ptr->head.isEmpty() || !d_ptr->body.isEmpty() || !d_ptr->contents().isEmpty(); } void Content::setContent( const QList &l ) { Q_D( Content ); //qDebug("Content::setContent( const QList &l ) : start"); d->head.clear(); d->body.clear(); //usage of textstreams is much faster than simply appending the strings QTextStream hts( &( d->head ), QIODevice::WriteOnly ); QTextStream bts( &( d->body ), QIODevice::WriteOnly ); hts.setCodec( "ISO 8859-1" ); bts.setCodec( "ISO 8859-1" ); bool isHead = true; foreach ( const QByteArray& line, l ) { if ( isHead && line.isEmpty() ) { isHead = false; continue; } if ( isHead ) { hts << line << "\n"; } else { bts << line << "\n"; } } //qDebug("Content::setContent( const QList & l ) : finished"); } void Content::setContent( const QByteArray &s ) { Q_D( Content ); KMime::HeaderParsing::extractHeaderAndBody( s, d->head, d->body ); } QByteArray Content::head() const { return d_ptr->head; } void Content::setHead( const QByteArray &head ) { d_ptr->head = head; if ( !head.endsWith( '\n' ) ) { d_ptr->head += '\n'; } } QByteArray Content::body() const { return d_ptr->body; } void Content::setBody( const QByteArray &body ) { d_ptr->body = body; } QByteArray Content::preamble() const { return d_ptr->preamble; } void Content::setPreamble( const QByteArray &preamble ) { d_ptr->preamble = preamble; } QByteArray Content::epilogue() const { return d_ptr->epilogue; } void Content::setEpilogue( const QByteArray &epilogue ) { d_ptr->epilogue = epilogue; } void Content::parse() { Q_D( Content ); // Clean up old headers and parse them again. qDeleteAll( h_eaders ); h_eaders.clear(); h_eaders = HeaderParsing::parseHeaders( d->head ); foreach ( Headers::Base *h, h_eaders ) { h->setParent( this ); } // If we are frozen, save the body as-is. This is done because parsing // changes the content (it loses preambles and epilogues, converts uuencode->mime, etc.) if ( d->frozen ) { d->frozenBody = d->body; } // Clean up old sub-Contents and parse them again. qDeleteAll( d->multipartContents ); d->multipartContents.clear(); d->clearBodyMessage(); Headers::ContentType *ct = contentType(); if ( ct->isText() ) { // This content is either text, or of unknown type. if ( d->parseUuencoded() ) { // This is actually uuencoded content generated by broken software. } else if ( d->parseYenc() ) { // This is actually yenc content generated by broken software. } else { // This is just plain text. } } else if ( ct->isMultipart() ) { // This content claims to be MIME multipart. if ( d->parseMultipart() ) { // This is actual MIME multipart content. } else { // Parsing failed; treat this content as "text/plain". ct->setMimeType( "text/plain" ); ct->setCharset( "US-ASCII" ); } } else { // This content is something else, like an encapsulated message or a binary attachment // or something like that if ( bodyIsMessage() ) { d->bodyAsMessage = Message::Ptr( new Message ); d->bodyAsMessage->setContent( d->body ); d->bodyAsMessage->setFrozen( d->frozen ); d->bodyAsMessage->parse(); d->bodyAsMessage->d_ptr->parent = this; // Clear the body, as it is now represented by d->bodyAsMessage. This is the same behavior // as with multipart contents, since parseMultipart() clears the body as well d->body.clear(); } } } bool Content::isFrozen() const { return d_ptr->frozen; } void Content::setFrozen( bool frozen ) { d_ptr->frozen = frozen; } void Content::assemble() { Q_D( Content ); if ( d->frozen ) { return; } d->head = assembleHeaders(); foreach ( Content *c, contents() ) { c->assemble(); } } QByteArray Content::assembleHeaders() { QByteArray newHead; foreach ( const Headers::Base *h, h_eaders ) { if ( !h->isEmpty() ) { newHead += h->as7BitString() + '\n'; } } return newHead; } void Content::clear() { Q_D( Content ); qDeleteAll( h_eaders ); h_eaders.clear(); clearContents(); d->head.clear(); d->body.clear(); } void Content::clearContents( bool del ) { Q_D( Content ); if ( del ) { qDeleteAll( d->multipartContents ); } d->multipartContents.clear(); d->clearBodyMessage(); } QByteArray Content::encodedContent( bool useCrLf ) { - Q_D( Content ); - QByteArray e; - - // Head. - e = d->head; - e += '\n'; - e += encodedBody(); + QByteArray encodedContentData = head(); // return value; initialise with the head data + const QByteArray encodedBodyData = encodedBody(); + + /* Make sure, that head and body have at least two newlines as seperator, otherwise add one. + * If we have enough newlines as sperator, than we should not change the number of newlines + * to not break digital signatures + */ + if (!encodedContentData.endsWith("\n\n") && + !encodedBodyData.startsWith("\n\n") && + !(encodedContentData.endsWith("\n") && encodedBodyData.startsWith("\n"))){ + encodedContentData += '\n'; + } + encodedContentData += encodedBodyData; if ( useCrLf ) { - return LFtoCRLF( e ); + return LFtoCRLF( encodedContentData ); } else { - return e; + return encodedContentData; } } QByteArray Content::encodedBody() { Q_D( Content ); QByteArray e; // Body. if ( d->frozen ) { // This Content is frozen. if ( d->frozenBody.isEmpty() ) { // This Content has never been parsed. e += d->body; } else { // Use the body as it was before parsing. e += d->frozenBody; } } else if ( bodyIsMessage() && d->bodyAsMessage ) { // This is an encapsulated message // No encoding needed, as the ContentTransferEncoding can only be 7bit // for encapsulated messages e += d->bodyAsMessage->encodedContent(); } else if ( !d->body.isEmpty() ) { // This is a single-part Content. Headers::ContentTransferEncoding *enc = contentTransferEncoding(); if ( enc->needToEncode() ) { if ( enc->encoding() == Headers::CEquPr ) { e += KCodecs::quotedPrintableEncode( d->body, false ); } else { e += KCodecs::base64Encode( d->body, true ); e += '\n'; } } else { e += d->body; } } if ( !d->frozen && !d->multipartContents.isEmpty() ) { // This is a multipart Content. Headers::ContentType *ct=contentType(); QByteArray boundary = "\n--" + ct->boundary(); if ( !d->preamble.isEmpty() ) { e += d->preamble; } //add all (encoded) contents separated by boundaries foreach ( Content *c, d->multipartContents ) { e += boundary + '\n'; e += c->encodedContent( false ); // don't convert LFs here, we do that later!!!!! } //finally append the closing boundary e += boundary+"--\n"; if ( !d->epilogue.isEmpty() ) { e += d->epilogue; } } return e; } QByteArray Content::decodedContent() { QByteArray ret; Headers::ContentTransferEncoding *ec=contentTransferEncoding(); bool removeTrailingNewline=false; if ( d_ptr->body.length() == 0 ) { return ret; } if ( ec->decoded() ) { ret = d_ptr->body; //Laurent Fix bug #311267 //removeTrailingNewline = true; } else { switch ( ec->encoding() ) { case Headers::CEbase64 : { KMime::Codec *codec = KMime::Codec::codecForName( "base64" ); Q_ASSERT( codec ); ret.resize( codec->maxDecodedSizeFor( d_ptr->body.size() ) ); KMime::Decoder* decoder = codec->makeDecoder(); QByteArray::const_iterator inputIt = d_ptr->body.constBegin(); QByteArray::iterator resultIt = ret.begin(); decoder->decode( inputIt, d_ptr->body.constEnd(), resultIt, ret.end() ); ret.truncate( resultIt - ret.begin() ); break; } case Headers::CEquPr : ret = KCodecs::quotedPrintableDecode( d_ptr->body ); removeTrailingNewline = true; break; case Headers::CEuuenc : KCodecs::uudecode( d_ptr->body, ret ); break; case Headers::CEbinary : ret = d_ptr->body; removeTrailingNewline = false; break; default : ret = d_ptr->body; removeTrailingNewline = true; } } if ( removeTrailingNewline && ( ret.size() > 0 ) && ( ret[ret.size() - 1] == '\n' ) ) { ret.resize( ret.size() - 1 ); } return ret; } QString Content::decodedText( bool trimText, bool removeTrailingNewlines ) { if ( !decodeText() ) { //this is not a text content !! return QString(); } bool ok = true; QTextCodec *codec = KGlobal::charsets()->codecForName( QLatin1String( contentType()->charset() ), ok ); if ( !ok || codec == NULL ) { // no suitable codec found => try local settings and hope the best ;-) codec = KGlobal::locale()->codecForEncoding(); QByteArray chset = KGlobal::locale()->encoding(); contentType()->setCharset( chset ); } QString s = codec->toUnicode( d_ptr->body.data(), d_ptr->body.length() ); if ( trimText || removeTrailingNewlines ) { int i; for ( i = s.length() - 1; i >= 0; --i ) { if ( trimText ) { if ( !s[i].isSpace() ) { break; } } else { if ( s[i] != QLatin1Char( '\n' ) ) { break; } } } s.truncate( i + 1 ); } else { if ( s.right( 1 ) == QLatin1String( "\n" ) ) { s.truncate( s.length() - 1 ); // remove trailing new-line } } return s; } void Content::fromUnicodeString( const QString &s ) { bool ok = true; QTextCodec *codec = KGlobal::charsets()->codecForName( QLatin1String( contentType()->charset() ), ok ); if ( !ok ) { // no suitable codec found => try local settings and hope the best ;-) codec = KGlobal::locale()->codecForEncoding(); QByteArray chset = KGlobal::locale()->encoding(); contentType()->setCharset( chset ); } d_ptr->body = codec->fromUnicode( s ); contentTransferEncoding()->setDecoded( true ); //text is always decoded } Content *Content::textContent() { Content *ret=0; //return the first content with mimetype=text/* if ( contentType()->isText() ) { ret = this; } else { foreach ( Content *c, d_ptr->contents() ) { if ( ( ret = c->textContent() ) != 0 ) { break; } } } return ret; } Content::List Content::attachments( bool incAlternatives ) { List attachments; if ( d_ptr->contents().isEmpty() ) { attachments.append( this ); } else { foreach ( Content *c, d_ptr->contents() ) { if ( !incAlternatives && c->contentType()->category() == Headers::CCalternativePart ) { continue; } else { attachments += c->attachments( incAlternatives ); } } } if ( isTopLevel() ) { Content *text = textContent(); if ( text ) { attachments.removeAll( text ); } } return attachments; } Content::List Content::contents() const { return d_ptr->contents(); } void Content::addContent( Content *c, bool prepend ) { Q_D( Content ); // This method makes no sense for encapsulated messages Q_ASSERT( !bodyIsMessage() ); // If this message is single-part; make it multipart first. if( d->multipartContents.isEmpty() && !contentType()->isMultipart() ) { // The current body will be our first sub-Content. Content *main = new Content( this ); // Move the MIME headers to the newly created sub-Content. // NOTE: The other headers (RFC5322 headers like From:, To:, as well as X-headers // are not moved to the subcontent; they remain with the top-level content. for ( Headers::Base::List::iterator it = h_eaders.begin(); it != h_eaders.end(); ) { if ( (*it)->isMimeHeader() ) { // Add to new content. main->setHeader( *it ); // Remove from this content. it = h_eaders.erase( it ); } else { ++it; } } // Adjust the Content-Type of the newly created sub-Content. main->contentType()->setCategory( Headers::CCmixedPart ); // Move the body to the new subcontent. main->setBody( d->body ); d->body.clear(); // Add the subcontent. d->multipartContents.append( main ); // Convert this content to "multipart/mixed". Headers::ContentType *ct = contentType(); ct->setMimeType( "multipart/mixed" ); ct->setBoundary( multiPartBoundary() ); ct->setCategory( Headers::CCcontainer ); contentTransferEncoding()->clear(); // 7Bit, decoded. } // Add the new content. if( prepend ) { d->multipartContents.prepend( c ); } else { d->multipartContents.append( c ); } if( c->parent() != this ) { // If the content was part of something else, this will remove it from there. c->setParent( this ); } } void Content::removeContent( Content *c, bool del ) { Q_D( Content ); if ( d->multipartContents.isEmpty() || !d->multipartContents.contains( c ) ) { return; } // This method makes no sense for encapsulated messages. // Should be covered by the above assert already, though. Q_ASSERT( !bodyIsMessage() ); d->multipartContents.removeAll( c ); if ( del ) { delete c; } else { c->d_ptr->parent = 0; } // If only one content is left, turn this content into a single-part. if( d->multipartContents.count() == 1 ) { Content *main = d->multipartContents.first(); // Move all headers from the old subcontent to ourselves. // NOTE: This also sets the new Content-Type. foreach( Headers::Base *h, main->h_eaders ) { setHeader( h ); // Will remove the old one if present. } main->h_eaders.clear(); // Move the body. d->body = main->body(); // Delete the old subcontent. delete main; d->multipartContents.clear(); } } void Content::changeEncoding( Headers::contentEncoding e ) { // This method makes no sense for encapsulated messages, they are always 7bit // encoded. Q_ASSERT( !bodyIsMessage() ); Headers::ContentTransferEncoding *enc = contentTransferEncoding(); if( enc->encoding() == e ) { // Nothing to do. return; } if( decodeText() ) { // This is textual content. Textual content is stored decoded. Q_ASSERT( enc->decoded() ); enc->setEncoding( e ); } else { // This is non-textual content. Re-encode it. if( e == Headers::CEbase64 ) { d_ptr->body = KCodecs::base64Encode( decodedContent(), true ); d_ptr->body.append( "\n" ); enc->setEncoding( e ); enc->setDecoded( false ); } else { // It only makes sense to convert binary stuff to base64. Q_ASSERT( false ); } } } void Content::toStream( QTextStream &ts, bool scrambleFromLines ) { QByteArray ret = encodedContent( false ); if ( scrambleFromLines ) { // FIXME Why are only From lines with a preceding empty line considered? // And, of course, all lines starting with >*From have to be escaped // because otherwise the transformation is not revertable. ret.replace( "\n\nFrom ", "\n\n>From "); } ts << ret; } Headers::Generic *Content::getNextHeader( QByteArray &head ) { return d_ptr->nextHeader( head ); } Headers::Generic *Content::nextHeader( QByteArray &head ) { return d_ptr->nextHeader( head ); } Headers::Generic *ContentPrivate::nextHeader( QByteArray &_head ) { Headers::Base *header = HeaderParsing::extractFirstHeader( _head ); if ( !header ) { return 0; } // Convert it from the real class to Generic. Headers::Generic *ret = new Headers::Generic( header->type(), q_ptr ); ret->from7BitString( header->as7BitString() ); return ret; } Headers::Base *Content::getHeaderByType( const char *type ) { return headerByType( type ); } Headers::Base *Content::headerByType( const char *type ) { Q_ASSERT( type && *type ); foreach( Headers::Base *h, h_eaders ) { if( h->is( type ) ) { return h; // Found. } } return 0; // Not found. } Headers::Base::List Content::headersByType( const char *type ) { Q_ASSERT( type && *type ); Headers::Base::List result; foreach( Headers::Base *h, h_eaders ) { if( h->is( type ) ) { result << h; } } return result; } void Content::setHeader( Headers::Base *h ) { Q_ASSERT( h ); removeHeader( h->type() ); appendHeader( h ); } void Content::appendHeader( Headers::Base *h ) { h_eaders.append( h ); h->setParent( this ); } void Content::prependHeader( Headers::Base *h ) { h_eaders.prepend( h ); h->setParent( this ); } bool Content::removeHeader( const char *type ) { for ( Headers::Base::List::iterator it = h_eaders.begin(); it != h_eaders.end(); ++it ) if ( (*it)->is(type) ) { delete (*it); h_eaders.erase( it ); return true; } return false; } bool Content::hasHeader( const char *type ) { return headerByType( type ) != 0; } int Content::size() { int ret = d_ptr->body.length(); if ( contentTransferEncoding()->encoding() == Headers::CEbase64 ) { KMime::Codec *codec = KMime::Codec::codecForName( "base64" ); return codec->maxEncodedSizeFor(ret); } // Not handling quoted-printable here since that requires actually // converting the content, and that is O(size_of_content). // For quoted-printable, this is only an approximate size. return ret; } int Content::storageSize() const { const Q_D( Content ); int s = d->head.size(); if ( d->contents().isEmpty() ) { s += d->body.size(); } else { // FIXME: This should take into account the boundary headers that are added in // encodedContent! foreach ( Content *c, d->contents() ) { s += c->storageSize(); } } return s; } int Content::lineCount() const { const Q_D( Content ); int ret = 0; if ( !isTopLevel() ) { ret += d->head.count( '\n' ); } ret += d->body.count( '\n' ); foreach ( Content *c, d->contents() ) { ret += c->lineCount(); } return ret; } QByteArray Content::rawHeader( const char *name ) const { return KMime::extractHeader( d_ptr->head, name ); } QList Content::rawHeaders( const char *name ) const { return KMime::extractHeaders( d_ptr->head, name ); } bool Content::decodeText() { Q_D( Content ); Headers::ContentTransferEncoding *enc = contentTransferEncoding(); if ( !contentType()->isText() ) { return false; //non textual data cannot be decoded here => use decodedContent() instead } if ( enc->decoded() ) { return true; //nothing to do } switch( enc->encoding() ) { case Headers::CEbase64 : d->body = KCodecs::base64Decode( d->body ); - d->body.append( "\n" ); break; case Headers::CEquPr : d->body = KCodecs::quotedPrintableDecode( d->body ); break; case Headers::CEuuenc : d->body = KCodecs::uudecode( d->body ); - d->body.append( "\n" ); break; case Headers::CEbinary : // nothing to decode - d->body.append( "\n" ); default : break; } + if (!d->body.endsWith("\n")) { + d->body.append( "\n" ); + } enc->setDecoded( true ); return true; } QByteArray Content::defaultCharset() const { return d_ptr->defaultCS; } void Content::setDefaultCharset( const QByteArray &cs ) { d_ptr->defaultCS = KMime::cachedCharset( cs ); foreach ( Content *c, d_ptr->contents() ) { c->setDefaultCharset( cs ); } // reparse the part and its sub-parts in order // to clear cached header values parse(); } bool Content::forceDefaultCharset() const { return d_ptr->forceDefaultCS; } void Content::setForceDefaultCharset( bool b ) { d_ptr->forceDefaultCS = b; foreach ( Content *c, d_ptr->contents() ) { c->setForceDefaultCharset( b ); } // reparse the part and its sub-parts in order // to clear cached header values parse(); } Content * KMime::Content::content( const ContentIndex &index ) const { if ( !index.isValid() ) { return const_cast( this ); } ContentIndex idx = index; unsigned int i = idx.pop() - 1; // one-based -> zero-based index if ( i < (unsigned int)d_ptr->contents().size() ) { return d_ptr->contents()[i]->content( idx ); } else { return 0; } } ContentIndex KMime::Content::indexForContent( Content * content ) const { int i = d_ptr->contents().indexOf( content ); if ( i >= 0 ) { ContentIndex ci; ci.push( i + 1 ); // zero-based -> one-based index return ci; } // not found, we need to search recursively for ( int i = 0; i < d_ptr->contents().size(); ++i ) { ContentIndex ci = d_ptr->contents()[i]->indexForContent( content ); if ( ci.isValid() ) { // found it ci.push( i + 1 ); // zero-based -> one-based index return ci; } } return ContentIndex(); // not found } bool Content::isTopLevel() const { return d_ptr->parent == 0; } void Content::setParent( Content *parent ) { // Make sure the Content is only in the contents list of one parent object Content *oldParent = d_ptr->parent; if ( oldParent ) { if ( !oldParent->contents().isEmpty() && oldParent->contents().contains( this ) ) { oldParent->removeContent( this ); } } d_ptr->parent = parent; if ( parent ) { if ( !parent->contents().isEmpty() && !parent->contents().contains( this ) ) { parent->addContent( this ); } } } Content *Content::parent() const { return d_ptr->parent; } Content *Content::topLevel() const { Content *top = const_cast(this); Content *c = parent(); while ( c ) { top = c; c = c->parent(); } return top; } ContentIndex Content::index() const { Content* top = topLevel(); if ( top ) { return top->indexForContent( const_cast(this) ); } return indexForContent( const_cast(this) ); } Message::Ptr Content::bodyAsMessage() const { if ( bodyIsMessage() && d_ptr->bodyAsMessage ) { return d_ptr->bodyAsMessage; } else { return Message::Ptr(); } } bool Content::bodyIsMessage() const { // Use const_case here to work around API issue that neither header() nor hasHeader() are // const, even though they should be return const_cast( this )->header( false ) && const_cast( this )->header( true ) ->mimeType().toLower() == "message/rfc822"; } // @cond PRIVATE #define kmime_mk_header_accessor( type, method ) \ Headers::type *Content::method( bool create ) { \ return header( create ); \ } kmime_mk_header_accessor( ContentType, contentType ) kmime_mk_header_accessor( ContentTransferEncoding, contentTransferEncoding ) kmime_mk_header_accessor( ContentDisposition, contentDisposition ) kmime_mk_header_accessor( ContentDescription, contentDescription ) kmime_mk_header_accessor( ContentLocation, contentLocation ) kmime_mk_header_accessor( ContentID, contentID ) #undef kmime_mk_header_accessor // @endcond void ContentPrivate::clearBodyMessage() { bodyAsMessage.reset(); } Content::List ContentPrivate::contents() const { Q_ASSERT( multipartContents.isEmpty() || !bodyAsMessage ); if ( bodyAsMessage ) return Content::List() << bodyAsMessage.get(); else return multipartContents; } bool ContentPrivate::parseUuencoded() { Q_Q( Content ); Parser::UUEncoded uup( body, KMime::extractHeader( head, "Subject" ) ); if( !uup.parse() ) { return false; // Parsing failed. } Headers::ContentType *ct = q->contentType(); ct->clear(); if( uup.isPartial() ) { // This seems to be only a part of the message, so we treat it as "message/partial". ct->setMimeType( "message/partial" ); //ct->setId( uniqueString() ); not needed yet ct->setPartialParams( uup.partialCount(), uup.partialNumber() ); q->contentTransferEncoding()->setEncoding( Headers::CE7Bit ); } else { // This is a complete message, so treat it as "multipart/mixed". body.clear(); ct->setMimeType( "multipart/mixed" ); ct->setBoundary( multiPartBoundary() ); ct->setCategory( Headers::CCcontainer ); q->contentTransferEncoding()->clear(); // 7Bit, decoded. // Add the plain text part first. Q_ASSERT( multipartContents.count() == 0 ); { Content *c = new Content( q ); c->contentType()->setMimeType( "text/plain" ); c->contentTransferEncoding()->setEncoding( Headers::CE7Bit ); c->setBody( uup.textPart() ); multipartContents.append( c ); } // Now add each of the binary parts as sub-Contents. for( int i = 0; i < uup.binaryParts().count(); ++i ) { Content *c = new Content( q ); c->contentType()->setMimeType( uup.mimeTypes().at( i ) ); c->contentType()->setName( QLatin1String( uup.filenames().at( i ) ), QByteArray( /*charset*/ ) ); c->contentTransferEncoding()->setEncoding( Headers::CEuuenc ); c->contentTransferEncoding()->setDecoded( false ); c->contentDisposition()->setDisposition( Headers::CDattachment ); c->contentDisposition()->setFilename( QLatin1String( uup.filenames().at( i ) ) ); c->setBody( uup.binaryParts().at( i ) ); c->changeEncoding( Headers::CEbase64 ); // Convert to base64. multipartContents.append( c ); } } return true; // Parsing successful. } bool ContentPrivate::parseYenc() { Q_Q( Content ); Parser::YENCEncoded yenc( body ); if ( !yenc.parse() ) { return false; // Parsing failed. } Headers::ContentType *ct = q->contentType(); ct->clear(); if ( yenc.isPartial() ) { // Assume there is exactly one decoded part. Treat this as "message/partial". ct->setMimeType( "message/partial" ); //ct->setId( uniqueString() ); not needed yet ct->setPartialParams( yenc.partialCount(), yenc.partialNumber() ); q->contentTransferEncoding()->setEncoding( Headers::CEbinary ); q->changeEncoding( Headers::CEbase64 ); // Convert to base64. } else { // This is a complete message, so treat it as "multipart/mixed". body.clear(); ct->setMimeType( "multipart/mixed" ); ct->setBoundary( multiPartBoundary() ); ct->setCategory( Headers::CCcontainer ); q->contentTransferEncoding()->clear(); // 7Bit, decoded. // Add the plain text part first. Q_ASSERT( multipartContents.count() == 0 ); { Content *c = new Content( q ); c->contentType()->setMimeType( "text/plain" ); c->contentTransferEncoding()->setEncoding( Headers::CE7Bit ); c->setBody( yenc.textPart() ); multipartContents.append( c ); } // Now add each of the binary parts as sub-Contents. for ( int i=0; icontentType()->setMimeType( yenc.mimeTypes().at( i ) ); c->contentType()->setName( QLatin1String( yenc.filenames().at( i ) ), QByteArray( /*charset*/ ) ); c->contentTransferEncoding()->setEncoding( Headers::CEbinary ); c->contentDisposition()->setDisposition( Headers::CDattachment ); c->contentDisposition()->setFilename( QLatin1String( yenc.filenames().at( i ) ) ); c->setBody( yenc.binaryParts().at( i ) ); // Yenc bodies are binary. c->changeEncoding( Headers::CEbase64 ); // Convert to base64. multipartContents.append( c ); } } return true; // Parsing successful. } bool ContentPrivate::parseMultipart() { Q_Q( Content ); const Headers::ContentType *ct = q->contentType(); const QByteArray boundary = ct->boundary(); if ( boundary.isEmpty() ) { return false; // Parsing failed; invalid multipart content. } Parser::MultiPart mpp( body, boundary ); if ( !mpp.parse() ) { return false; // Parsing failed. } preamble = mpp.preamble(); epilogue = mpp.epilouge(); // Determine the category of the subparts (used in attachments()). Headers::contentCategory cat; if ( ct->isSubtype( "alternative" ) ) { cat = Headers::CCalternativePart; } else { cat = Headers::CCmixedPart; // Default to "mixed". } // Create a sub-Content for every part. Q_ASSERT( multipartContents.isEmpty() ); body.clear(); QList parts = mpp.parts(); foreach ( const QByteArray &part, mpp.parts() ) { Content *c = new Content( q ); c->setContent( part ); c->setFrozen( frozen ); c->parse(); c->contentType()->setCategory( cat ); multipartContents.append( c ); } return true; // Parsing successful. } } // namespace KMime diff --git a/kmime/kmime_header_parsing.cpp b/kmime/kmime_header_parsing.cpp index 9cc5e7508..8acd8e4bb 100644 --- a/kmime/kmime_header_parsing.cpp +++ b/kmime/kmime_header_parsing.cpp @@ -1,2274 +1,2277 @@ /* -*- c++ -*- kmime_header_parsing.cpp KMime, the KDE Internet mail/usenet news message library. Copyright (c) 2001-2002 Marc Mutz This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_header_parsing.h" #include "kmime_codecs.h" #include "kmime_headerfactory_p.h" #include "kmime_headers.h" #include "kmime_util.h" #include "kmime_util_p.h" #include "kmime_dateformatter.h" #include "kmime_warning.h" #include #include #include #include #include #include #include // for isdigit #include using namespace KMime; using namespace KMime::Types; namespace KMime { namespace Types { // QUrl::fromAce is extremely expensive, so only use it when necessary. // Fortunately, the presence of IDNA is readily detected with a substring match... static inline QString QUrl_fromAce_wrapper( const QString & domain ) { if ( domain.contains( QLatin1String( "xn--" ) ) ) { return QUrl::fromAce( domain.toLatin1() ); } else { return domain; } } static QString addr_spec_as_string( const AddrSpec & as, bool pretty ) { if ( as.isEmpty() ) { return QString(); } static QChar dotChar = QLatin1Char( '.' ); static QChar backslashChar = QLatin1Char( '\\' ); static QChar quoteChar = QLatin1Char( '"' ); bool needsQuotes = false; QString result; result.reserve( as.localPart.length() + as.domain.length() + 1 ); for ( int i = 0 ; i < as.localPart.length() ; ++i ) { const QChar ch = as.localPart.at( i ); if ( ch == dotChar || isAText( ch.toLatin1() ) ) { result += ch; } else { needsQuotes = true; if ( ch == backslashChar || ch == quoteChar ) { result += backslashChar; } result += ch; } } const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ; if ( needsQuotes ) { result = quoteChar + result + quoteChar; } if ( dom.isEmpty() ) { return result; } else { result += QLatin1Char( '@' ); result += dom; return result; } } QString AddrSpec::asString() const { return addr_spec_as_string( *this, false ); } QString AddrSpec::asPrettyString() const { return addr_spec_as_string( *this, true ); } bool AddrSpec::isEmpty() const { return localPart.isEmpty() && domain.isEmpty(); } QByteArray Mailbox::address() const { QByteArray result; const QString asString = addr_spec_as_string( mAddrSpec, false ); if ( !asString.isEmpty() ) { result = asString.toLatin1(); } return result; //return mAddrSpec.asString().toLatin1(); } AddrSpec Mailbox::addrSpec() const { return mAddrSpec; } QString Mailbox::name() const { return mDisplayName; } void Mailbox::setAddress( const AddrSpec &addr ) { mAddrSpec = addr; } void Mailbox::setAddress( const QByteArray &addr ) { const char *cursor = addr.constData(); if ( !HeaderParsing::parseAngleAddr( cursor, cursor + addr.length(), mAddrSpec ) ) { if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(), mAddrSpec ) ) { kWarning() << "Invalid address"; return; } } } void Mailbox::setName( const QString &name ) { mDisplayName = removeBidiControlChars( name ); } void Mailbox::setNameFrom7Bit( const QByteArray &name, const QByteArray &defaultCharset ) { QByteArray cs; setName( decodeRFC2047String( name, cs, defaultCharset, false ) ); } bool Mailbox::hasAddress() const { return !mAddrSpec.isEmpty(); } bool Mailbox::hasName() const { return !mDisplayName.isEmpty(); } QString Mailbox::prettyAddress() const { return prettyAddress( QuoteNever ); } QString Mailbox::prettyAddress( Quoting quoting ) const { if ( !hasName() ) { return QLatin1String( address() ); } QString s = name(); if ( quoting != QuoteNever ) { addQuotes( s, quoting == QuoteAlways /*bool force*/ ); } if ( hasAddress() ) { s += QLatin1String( " <" ) + QLatin1String( address() ) + QLatin1Char( '>' ); } return s; } void Mailbox::fromUnicodeString( const QString &s ) { from7BitString( encodeRFC2047Sentence( s, "utf-8" ) ); } void Mailbox::from7BitString( const QByteArray &s ) { const char *cursor = s.constData(); HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this ); } QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const { if ( !hasName() ) { return address(); } QByteArray rv; if ( isUsAscii( name() ) ) { QByteArray tmp = name().toLatin1(); addQuotes( tmp, false ); rv += tmp; } else { rv += encodeRFC2047String( name(), encCharset, true ); } if ( hasAddress() ) { rv += " <" + address() + '>'; } return rv; } } // namespace Types namespace HeaderParsing { // parse the encoded-word (scursor points to after the initial '=') bool parseEncodedWord( const char* &scursor, const char * const send, QString &result, QByteArray &language, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS ) { // make sure the caller already did a bit of the work. assert( *( scursor - 1 ) == '=' ); // // STEP 1: // scan for the charset/language portion of the encoded-word // char ch = *scursor++; if ( ch != '?' ) { // kDebug() << "first"; //KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // remember start of charset (ie. just after the initial "=?") and // language (just after the first '*') fields: const char * charsetStart = scursor; const char * languageStart = 0; // find delimiting '?' (and the '*' separating charset and language // tags, if any): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { break; } else if ( *scursor == '*' && languageStart == 0 ) { languageStart = scursor + 1; } } // not found? can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { // kDebug() << "second"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the language information, if any (if languageStart is 0, // language will be null, too): QByteArray maybeLanguage( languageStart, scursor - languageStart ); // extract charset information (keep in mind: the size given to the // ctor is one off due to the \0 terminator): QByteArray maybeCharset( charsetStart, ( languageStart ? languageStart - 1 : scursor ) - charsetStart ); // // STEP 2: // scan for the encoding portion of the encoded-word // // remember start of encoding (just _after_ the second '?'): scursor++; const char * encodingStart = scursor; // find next '?' (ending the encoding tag): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { break; } } // not found? Can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { // kDebug() << "third"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the encoding information: QByteArray maybeEncoding( encodingStart, scursor - encodingStart ); // kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset // << "\"; language == \"" << maybeLanguage // << "\"; encoding == \"" << maybeEncoding << "\""; // // STEP 3: // scan for encoded-text portion of encoded-word // // remember start of encoded-text (just after the third '?'): scursor++; const char * encodedTextStart = scursor; // find the '?=' sequence (ending the encoded-text): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { if ( scursor + 1 != send ) { if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore KMIME_WARN << "Stray '?' in q-encoded word, ignoring this."; continue; } else { // yep, found a '?=' sequence scursor += 2; break; } } else { // The '?' is the last char, but we need a '=' after it! KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } } } if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' || scursor < encodedTextStart + 2 ) { KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // set end sentinel for encoded-text: const char * const encodedTextEnd = scursor - 2; // // STEP 4: // setup decoders for the transfer encoding and the charset // // try if there's a codec for the encoding found: Codec * codec = Codec::codecForName( maybeEncoding ); if ( !codec ) { KMIME_WARN_UNKNOWN( Encoding, maybeEncoding ); return false; } // get an instance of a corresponding decoder: Decoder * dec = codec->makeDecoder(); assert( dec ); // try if there's a (text)codec for the charset found: bool matchOK = false; QTextCodec *textCodec = 0; if ( forceCS || maybeCharset.isEmpty() ) { textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); usedCS = cachedCharset( defaultCS ); } else { textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK ); if ( !matchOK ) { //no suitable codec found => use default charset textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); usedCS = cachedCharset( defaultCS ); } else { usedCS = cachedCharset( maybeCharset ); } } if ( !matchOK || !textCodec ) { KMIME_WARN_UNKNOWN( Charset, maybeCharset ); delete dec; return false; }; // kDebug() << "mimeName(): \"" << textCodec->name() << "\""; // allocate a temporary buffer to store the 8bit text: int encodedTextLength = encodedTextEnd - encodedTextStart; QByteArray buffer; buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) ); char *bbegin = buffer.data(); char *bend = bbegin + buffer.length(); // // STEP 5: // do the actual decoding // if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) { KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor(" << encodedTextLength << ")\nresult may be truncated"; } result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() ); // kDebug() << "result now: \"" << result << "\""; // cleanup: delete dec; language = maybeLanguage; return true; } static inline void eatWhiteSpace( const char* &scursor, const char * const send ) { while ( scursor != send && ( *scursor == ' ' || *scursor == '\n' || *scursor == '\t' || *scursor == '\r' ) ) scursor++; } bool parseAtom( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseAtom( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char *start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isAText( ch ) ) { // AText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or special - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } // FIXME: Remove this and the other parseToken() method. add a new one where "result" is a // QByteArray. bool parseToken( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseToken( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char * start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isTText( ch ) ) { // TText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or tspecial - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } #define READ_ch_OR_FAIL if ( scursor == send ) { \ KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \ return false; \ } else { \ ch = *scursor++; \ } // known issues: // // - doesn't handle quoted CRLF // FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't // know about encodings yet! bool parseGenericQuotedString( const char* &scursor, const char * const send, QString &result, bool isCRLF, const char openChar, const char closeChar ) { char ch; // We are in a quoted-string or domain-literal or comment and the // cursor points to the first char after the openChar. // We will apply unfolding and quoted-pair removal. // We return when we either encounter the end or unescaped openChar // or closeChar. assert( *( scursor - 1 ) == openChar || *( scursor - 1 ) == closeChar ); while ( scursor != send ) { ch = *scursor++; if ( ch == closeChar || ch == openChar ) { // end of quoted-string or another opening char: // let caller decide what to do. return true; } switch ( ch ) { case '\\': // quoted-pair // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 READ_ch_OR_FAIL; KMIME_WARN_IF_8BIT( ch ); result += QLatin1Char( ch ); break; case '\r': // ### // The case of lonely '\r' is easy to solve, as they're // not part of Unix Line-ending conventions. // But I see a problem if we are given Unix-native // line-ending-mails, where we cannot determine anymore // whether a given '\n' was part of a CRLF or was occurring // on it's own. READ_ch_OR_FAIL; if ( ch != '\n' ) { // CR on it's own... KMIME_WARN_LONE( CR ); result += QLatin1Char( '\r' ); scursor--; // points to after the '\r' again } else { // CRLF encountered. // lookahead: check for folding READ_ch_OR_FAIL; if ( ch == ' ' || ch == '\t' ) { // correct folding; // position cursor behind the CRLF WSP (unfolding) // and add the WSP to the result result += QLatin1Char( ch ); } else { // this is the "shouldn't happen"-case. There is a CRLF // inside a quoted-string without it being part of FWS. // We take it verbatim. KMIME_WARN_NON_FOLDING( CRLF ); result += QLatin1String( "\r\n" ); // the cursor is decremented again, so's we need not // duplicate the whole switch here. "ch" could've been // everything (incl. openChar or closeChar). scursor--; } } break; case '\n': // Note: CRLF has been handled above already! // ### LF needs special treatment, depending on whether isCRLF // is true (we can be sure a lonely '\n' was meant this way) or // false ('\n' alone could have meant LF or CRLF in the original // message. This parser assumes CRLF iff the LF is followed by // either WSP (folding) or NULL (premature end of quoted-string; // Should be fixed, since NULL is allowed as per rfc822). READ_ch_OR_FAIL; if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { // folding // correct folding result += QLatin1Char( ch ); } else { // non-folding KMIME_WARN_LONE( LF ); result += QLatin1Char( '\n' ); // pos is decremented, so's we need not duplicate the whole // switch here. ch could've been everything (incl. <">, "\"). scursor--; } break; case '=': { // ### Work around broken clients that send encoded words in quoted-strings // For example, older KMail versions. if ( scursor == send ) { break; } const char *oldscursor = scursor; QString tmp; QByteArray lang, charset; if ( *scursor++ == '?' ) { --scursor; if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { result += tmp; break; } else { scursor = oldscursor; } } else { scursor = oldscursor; } // fall through } default: KMIME_WARN_IF_8BIT( ch ); result += QLatin1Char( ch ); } } return false; } // known issues: // // - doesn't handle encoded-word inside comments. bool parseComment( const char* &scursor, const char * const send, QString &result, bool isCRLF, bool reallySave ) { int commentNestingDepth = 1; const char *afterLastClosingParenPos = 0; QString maybeCmnt; const char *oldscursor = scursor; assert( *( scursor - 1 ) == '(' ); while ( commentNestingDepth ) { QString cmntPart; if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { assert( *( scursor - 1 ) == ')' || *( scursor - 1 ) == '(' ); // see the kdoc for above function for the possible conditions // we have to check: switch ( *( scursor - 1 ) ) { case ')': if ( reallySave ) { // add the chunk that's now surely inside the comment. result += maybeCmnt; result += cmntPart; if ( commentNestingDepth > 1 ) { // don't add the outermost ')'... result += QLatin1Char( ')' ); } maybeCmnt.clear(); } afterLastClosingParenPos = scursor; --commentNestingDepth; break; case '(': if ( reallySave ) { // don't add to "result" yet, because we might find that we // are already outside the (broken) comment... maybeCmnt += cmntPart; maybeCmnt += QLatin1Char( '(' ); } ++commentNestingDepth; break; default: assert( 0 ); } // switch } else { // !parseGenericQuotedString, ie. premature end if ( afterLastClosingParenPos ) { scursor = afterLastClosingParenPos; } else { scursor = oldscursor; } return false; } } // while return true; } // known issues: none. bool parsePhrase( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None; QString tmp; QByteArray lang, charset; const char *successfullyParsed = 0; // only used by the encoded-word branch const char *oldscursor; // used to suppress whitespace between adjacent encoded-words // (rfc2047, 6.2): bool lastWasEncodedWord = false; while ( scursor != send ) { char ch = *scursor++; switch ( ch ) { case '.': // broken, but allow for intorop's sake if ( found == None ) { --scursor; return false; } else { if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) { result += QLatin1String( ". " ); } else { result += QLatin1Char( '.' ); } successfullyParsed = scursor; } break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { successfullyParsed = scursor; assert( *( scursor - 1 ) == '"' ); switch ( found ) { case None: found = QuotedString; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QLatin1Char( ' ' ); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { // premature end of quoted string. // What to do? Return leading '"' as special? Return as quoted-string? // We do the latter if we already found something, else signal failure. if ( found == None ) { return false; } else { result += QLatin1Char( ' ' ); // rfc822, 3.4.4 result += tmp; return true; } } break; case '(': // comment // parse it, but ignore content: tmp.clear(); if ( parseComment( scursor, send, tmp, isCRLF, false /*don't bother with the content*/ ) ) { successfullyParsed = scursor; lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } break; case '=': // encoded-word tmp.clear(); oldscursor = scursor; lang.clear(); charset.clear(); if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = EncodedWord; break; case Phrase: case EncodedWord: case Atom: case QuotedString: if ( !lastWasEncodedWord ) { result += QLatin1Char( ' ' ); // rfc822, 3.4.4 } found = Phrase; break; default: assert( 0 ); } lastWasEncodedWord = true; result += tmp; break; } else { // parse as atom: scursor = oldscursor; } // fall though... default: //atom tmp.clear(); scursor--; if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = Atom; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QLatin1Char( ' ' ); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } } eatWhiteSpace( scursor, send ); } return found != None; } // FIXME: This should probably by QByteArray &result instead? bool parseDotAtom( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); // always points to just after the last atom parsed: const char *successfullyParsed; QString tmp; if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { return false; } result += tmp; successfullyParsed = scursor; while ( scursor != send ) { // end of header or no '.' -> return if ( scursor == send || *scursor != '.' ) { return true; } scursor++; // eat '.' if ( scursor == send || !isAText( *scursor ) ) { // end of header or no AText, but this time following a '.'!: // reset cursor to just after last successfully parsed char and // return: scursor = successfullyParsed; return true; } // try to parse the next atom: QString maybeAtom; if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { scursor = successfullyParsed; return true; } result += QLatin1Char( '.' ); result += maybeAtom; successfullyParsed = scursor; } scursor = successfullyParsed; return true; } void eatCFWS( const char* &scursor, const char * const send, bool isCRLF ) { QString dummy; while ( scursor != send ) { const char *oldscursor = scursor; char ch = *scursor++; switch ( ch ) { case ' ': case '\t': // whitespace case '\r': case '\n': // folding continue; case '(': // comment if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) { continue; } scursor = oldscursor; return; default: scursor = oldscursor; return; } } } bool parseDomain( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // domain := dot-atom / domain-literal / atom *("." atom) // // equivalent to: // domain = dot-atom / domain-literal, // since parseDotAtom does allow CFWS between atoms and dots if ( *scursor == '[' ) { // domain-literal: QString maybeDomainLiteral; // eat '[': scursor++; while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, isCRLF, '[', ']' ) ) { if ( scursor == send ) { // end of header: check for closing ']': if ( *( scursor - 1 ) == ']' ) { // OK, last char was ']': result = maybeDomainLiteral; return true; } else { // not OK, domain-literal wasn't closed: return false; } } // we hit openChar in parseGenericQuotedString. // include it in maybeDomainLiteral and keep on parsing: if ( *( scursor - 1 ) == '[' ) { maybeDomainLiteral += QLatin1Char( '[' ); continue; } // OK, real end of domain-literal: result = maybeDomainLiteral; return true; } } else { // dot-atom: QString maybeDotAtom; if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { result = maybeDotAtom; // Domain may end with '.', if so preserve it' if ( scursor != send && *scursor == '.' ) { result += QLatin1Char( '.' ); scursor++; } return true; } } return false; } bool parseObsRoute( const char* &scursor, const char* const send, QStringList &result, bool isCRLF, bool save ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; if ( save ) { result.append( QString() ); } continue; } // empty entry ending the list: if ( *scursor == ':' ) { scursor++; if ( save ) { result.append( QString() ); } return true; } // each non-empty entry must begin with '@': if ( *scursor != '@' ) { return false; } else { scursor++; } QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } if ( save ) { result.append( maybeDomain ); } // eat the following (optional) comma: eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == ':' ) { scursor++; return true; } if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddrSpec( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // // STEP 1: // local-part := dot-atom / quoted-string / word *("." word) // // this is equivalent to: // local-part := word *("." word) QString maybeLocalPart; QString tmp; while ( scursor != send ) { // first, eat any whitespace eatCFWS( scursor, send, isCRLF ); char ch = *scursor++; switch ( ch ) { case '.': // dot maybeLocalPart += QLatin1Char( '.' ); break; case '@': goto SAW_AT_SIGN; break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { maybeLocalPart += tmp; } else { return false; } break; default: // atom scursor--; // re-set scursor to point to ch again tmp.clear(); if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { maybeLocalPart += tmp; } else { return false; // parseAtom can only fail if the first char is non-atext. } break; } } return false; // // STEP 2: // domain // SAW_AT_SIGN: assert( *( scursor - 1 ) == '@' ); QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } result.localPart = maybeLocalPart; result.domain = maybeDomain; return true; } bool parseAngleAddr( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // first, we need an opening angle bracket: eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '<' ) { return false; } scursor++; // eat '<' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == '@' || *scursor == ',' ) { // obs-route: parse, but ignore: KMIME_WARN << "obsolete source route found! ignoring."; QStringList dummy; if ( !parseObsRoute( scursor, send, dummy, isCRLF, false /* don't save */ ) ) { return false; } // angle-addr isn't complete until after the '>': if ( scursor == send ) { return false; } } // parse addr-spec: AddrSpec maybeAddrSpec; if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '>' ) { return false; } scursor++; result = maybeAddrSpec; return true; } static QString stripQuotes( const QString &input ) { const QLatin1Char quotes( '"' ); if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) { QString stripped( input.mid( 1, input.size() - 2 ) ); return stripped; } else { return input; } } bool parseMailbox( const char* &scursor, const char * const send, Mailbox &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } AddrSpec maybeAddrSpec; QString maybeDisplayName; // first, try if it's a vanilla addr-spec: const char * oldscursor = scursor; if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { result.setAddress( maybeAddrSpec ); // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } result.setName( stripQuotes( maybeDisplayName ) ); return true; } scursor = oldscursor; // second, see if there's a display-name: if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { // failed: reset cursor, note absent display-name maybeDisplayName.clear(); scursor = oldscursor; } else { // succeeded: eat CFWS if (*scursor == '@') { maybeDisplayName += QLatin1Char('@'); scursor++; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } } // third, parse the angle-addr: if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } if ( maybeDisplayName.isNull() ) { // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } } result.setName( stripQuotes( maybeDisplayName ) ); result.setAddress( maybeAddrSpec ); return true; } bool parseGroup( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] // // equivalent to: // group := display-name ":" [ obs-mbox-list ] ";" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // get display-name: QString maybeDisplayName; if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { return false; } // get ":": eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that // automatically calls removeBidiControlChars result.displayName = removeBidiControlChars( maybeDisplayName ); // get obs-mbox-list (may contain empty entries): scursor++; while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; continue; } // empty entry ending the list: if ( *scursor == ';' ) { scursor++; return true; } Mailbox maybeMailbox; if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { return false; } result.mailboxList.append( maybeMailbox ); eatCFWS( scursor, send, isCRLF ); // premature end: if ( scursor == send ) { return false; } // regular end of the list: if ( *scursor == ';' ) { scursor++; return true; } // eat regular list entry separator: if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddress( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // address := mailbox / group eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // first try if it's a single mailbox: Mailbox maybeMailbox; const char * oldscursor = scursor; if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { // yes, it is: result.displayName.clear(); result.mailboxList.append( maybeMailbox ); return true; } scursor = oldscursor; Address maybeAddress; // no, it's not a single mailbox. Try if it's a group: if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) { return false; } result = maybeAddress; return true; } bool parseAddressList( const char* &scursor, const char * const send, AddressList &result, bool isCRLF ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // empty entry: ignore: if ( *scursor == ',' ) { scursor++; continue; } // broken clients might use ';' as list delimiter, accept that as well if ( *scursor == ';' ) { scursor++; continue; } // parse one entry Address maybeAddress; if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) { return false; } result.append( maybeAddress ); eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // comma separating entries: eat it. if ( *scursor == ',' ) { scursor++; } } return true; } static QString asterisk = QString::fromLatin1( "*0*", 1 ); static QString asteriskZero = QString::fromLatin1( "*0*", 2 ); //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 ); // FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work // on byte arrays, not strings! The result parameter should be a simple // QPair, which is the attribute name and the value. bool parseParameter( const char* &scursor, const char * const send, QPair &result, bool isCRLF ) { // parameter = regular-parameter / extended-parameter // regular-parameter = regular-parameter-name "=" value // extended-parameter = // value = token / quoted-string // // note that rfc2231 handling is out of the scope of this function. // Therefore we return the attribute as QString and the value as // (start,length) tupel if we see that the value is encoded // (trailing asterisk), for parseParameterList to decode... eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // parse the parameter name: // // FIXME: maybeAttribute should be a QByteArray QString maybeAttribute; if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) { return false; } eatCFWS( scursor, send, isCRLF ); // premature end: not OK (haven't seen '=' yet). if ( scursor == send || *scursor != '=' ) { return false; } scursor++; // eat '=' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { // don't choke on attribute=, meaning the value was omitted: if ( maybeAttribute.endsWith( asterisk ) ) { KMIME_WARN << "attribute ends with \"*\", but value is empty!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return true; } const char * oldscursor = scursor; // // parse the parameter value: // QStringOrQPair maybeValue; if ( *scursor == '"' ) { // value is a quoted-string: scursor++; if ( maybeAttribute.endsWith( asterisk ) ) { // attributes ending with "*" designate extended-parameters, // which cannot have quoted-strings as values. So we remove the // trailing "*" to not confuse upper layers. KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } else { // value is a token: if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } result = qMakePair( maybeAttribute.toLower(), maybeValue ); return true; } // FIXME: Get rid of QStringOrQPair: Use a simply QMap for "result" // instead! bool parseRawParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { // we use parseParameter() consecutively to obtain a map of raw // attributes to raw values. "Raw" here means that we don't do // rfc2231 decoding and concatenation. This is left to // parseParameterList(), which will call this function. // // The main reason for making this chunk of code a separate // (private) method is that we can deal with broken parameters // _here_ and leave the rfc2231 handling solely to // parseParameterList(), which will still be enough work. while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // empty entry ending the list: OK. if ( scursor == send ) { return true; } // empty list entry: ignore. if ( *scursor == ';' ) { scursor++; continue; } QPair maybeParameter; if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { // we need to do a bit of work if the attribute is not // NULL. These are the cases marked with "needs further // processing" in parseParameter(). Specifically, parsing of the // token or the quoted-string, which should represent the value, // failed. We take the easy way out and simply search for the // next ';' to start parsing again. (Another option would be to // take the text between '=' and ';' as value) if ( maybeParameter.first.isNull() ) { return false; } while ( scursor != send ) { if ( *scursor++ == ';' ) { goto IS_SEMICOLON; } } // scursor == send case: end of list. return true; IS_SEMICOLON: // *scursor == ';' case: parse next entry. continue; } // successful parsing brings us here: result.insert( maybeParameter.first, maybeParameter.second ); eatCFWS( scursor, send, isCRLF ); // end of header: ends list. if ( scursor == send ) { return true; } // regular separator: eat it. if ( *scursor == ';' ) { scursor++; } } return true; } static void decodeRFC2231Value( Codec* &rfc2231Codec, QTextCodec* &textcodec, bool isContinuation, QString &value, QPair &source, QByteArray& charset ) { // // parse the raw value into (charset,language,text): // const char * decBegin = source.first; const char * decCursor = decBegin; const char * decEnd = decCursor + source.second; if ( !isContinuation ) { // find the first single quote while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { // there wasn't a single single quote at all! // take the whole value to be in latin-1: KMIME_WARN << "No charset in extended-initial-value." "Assuming \"iso-8859-1\"."; value += QString::fromLatin1( decBegin, source.second ); return; } charset = QByteArray( decBegin, decCursor - decBegin ); const char * oldDecCursor = ++decCursor; // find the second single quote (we ignore the language tag): while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { KMIME_WARN << "No language in extended-initial-value." "Trying to recover."; decCursor = oldDecCursor; } else { decCursor++; } // decCursor now points to the start of the // "extended-other-values": // // get the decoders: // bool matchOK = false; textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK ); if ( !matchOK ) { textcodec = 0; KMIME_WARN_UNKNOWN( Charset, charset ); } } if ( !rfc2231Codec ) { rfc2231Codec = Codec::codecForName( "x-kmime-rfc2231" ); assert( rfc2231Codec ); } if ( !textcodec ) { value += QString::fromLatin1( decCursor, decEnd - decCursor ); return; } Decoder * dec = rfc2231Codec->makeDecoder(); assert( dec ); // // do the decoding: // QByteArray buffer; buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); QByteArray::Iterator bit = buffer.begin(); QByteArray::ConstIterator bend = buffer.end(); if ( !dec->decode( decCursor, decEnd, bit, bend ) ) { KMIME_WARN << rfc2231Codec->name() << "codec lies about its maxDecodedSizeFor()" << endl << "result may be truncated"; } value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); // kDebug() << "value now: \"" << value << "\""; // cleanup: delete dec; } // known issues: // - permutes rfc2231 continuations when the total number of parts // exceeds 10 (other-sections then becomes *xy, ie. two digits) bool parseParameterListWithCharset( const char* &scursor, const char * const send, QMap &result, QByteArray& charset, bool isCRLF ) { // parse the list into raw attribute-value pairs: QMap rawParameterList; if ( !parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) { return false; } if ( rawParameterList.isEmpty() ) { return true; } // decode rfc 2231 continuations and alternate charset encoding: // NOTE: this code assumes that what QMapIterator delivers is sorted // by the key! Codec * rfc2231Codec = 0; QTextCodec * textcodec = 0; QString attribute; QString value; enum Mode { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 }; enum EncodingMode { NoEncoding, RFC2047, RFC2231 }; QMap::Iterator it, end = rawParameterList.end(); for ( it = rawParameterList.begin() ; it != end ; ++it ) { if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { // // new attribute: // // store the last attribute/value pair in the result map now: if ( !attribute.isNull() ) { result.insert( attribute, value ); } // and extract the information from the new raw attribute: value.clear(); attribute = it.key(); int mode = NoMode; EncodingMode encodingMode = NoEncoding; // is the value rfc2331-encoded? if ( attribute.endsWith( asterisk ) ) { attribute.truncate( attribute.length() - 1 ); mode |= Encoded; encodingMode = RFC2231; } // is the value rfc2047-encoded? if ( !( *it ).qstring.isNull() && ( *it ).qstring.contains( QLatin1String( "=?" ) ) ) { mode |= Encoded; encodingMode = RFC2047; } // is the value continued? if ( attribute.endsWith( asteriskZero ) ) { attribute.truncate( attribute.length() - 2 ); mode |= Continued; } // // decode if necessary: // if ( mode & Encoded ) { if ( encodingMode == RFC2231 ) { decodeRFC2231Value( rfc2231Codec, textcodec, false, /* isn't continuation */ value, ( *it ).qpair, charset ); } else if ( encodingMode == RFC2047 ) { value += decodeRFC2047String( ( *it ).qstring.toLatin1(), charset ); } } else { // not encoded. if ( ( *it ).qpair.first ) { value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second ); } else { value += ( *it ).qstring; } } // // shortcut-processing when the value isn't encoded: // if ( !( mode & Continued ) ) { // save result already: result.insert( attribute, value ); // force begin of a new attribute: attribute.clear(); } } else { // it.key().startsWith( attribute ) // // continuation // // ignore the section and trust QMap to have sorted the keys: if ( it.key().endsWith( asterisk ) ) { // encoded decodeRFC2231Value( rfc2231Codec, textcodec, true, /* is continuation */ value, ( *it ).qpair, charset ); } else { // not encoded if ( ( *it ).qpair.first ) { value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second ); } else { value += ( *it ).qstring; } } } } // write last attr/value pair: if ( !attribute.isNull() ) { result.insert( attribute, value ); } return true; } bool parseParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { QByteArray charset; return parseParameterListWithCharset( scursor, send, result, charset, isCRLF ); } static const char * const stdDayNames[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; static bool parseDayName( const char* &scursor, const char * const send ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( int i = 0 ; i < stdDayNamesLen ; ++i ) { if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { scursor += 3; // kDebug() << "found" << stdDayNames[i]; return true; } } return false; } static const char * const stdMonthNames[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static const int stdMonthNamesLen = sizeof stdMonthNames / sizeof *stdMonthNames; static bool parseMonthName( const char* &scursor, const char * const send, int &result ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( result = 0 ; result < stdMonthNamesLen ; ++result ) { if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { scursor += 3; return true; } } // not found: return false; } static const struct { const char * tzName; long int secsEastOfGMT; } timeZones[] = { // rfc 822 timezones: { "GMT", 0 }, { "UT", 0 }, { "EDT", -4*3600 }, { "EST", -5*3600 }, { "MST", -5*3600 }, { "CST", -6*3600 }, { "MDT", -6*3600 }, { "MST", -7*3600 }, { "PDT", -7*3600 }, { "PST", -8*3600 }, // common, non-rfc-822 zones: { "CET", 1*3600 }, { "MET", 1*3600 }, { "UTC", 0 }, { "CEST", 2*3600 }, { "BST", 1*3600 }, // rfc 822 military timezones: { "Z", 0 }, { "A", -1*3600 }, { "B", -2*3600 }, { "C", -3*3600 }, { "D", -4*3600 }, { "E", -5*3600 }, { "F", -6*3600 }, { "G", -7*3600 }, { "H", -8*3600 }, { "I", -9*3600 }, // J is not used! { "K", -10*3600 }, { "L", -11*3600 }, { "M", -12*3600 }, { "N", 1*3600 }, { "O", 2*3600 }, { "P", 3*3600 }, { "Q", 4*3600 }, { "R", 5*3600 }, { "S", 6*3600 }, { "T", 7*3600 }, { "U", 8*3600 }, { "V", 9*3600 }, { "W", 10*3600 }, { "X", 11*3600 }, { "Y", 12*3600 }, }; static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; static bool parseAlphaNumericTimeZone( const char* &scursor, const char * const send, long int &secsEastOfGMT, bool &timeZoneKnown ) { // allow the timezone to be wrapped in quotes; bug 260761 if ( *scursor == '"' ) { scursor++; if ( scursor == send ) { return false; } } QPair maybeTimeZone( 0, 0 ); if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) { return false; } for ( int i = 0 ; i < timeZonesLen ; ++i ) { if ( qstrnicmp( timeZones[i].tzName, maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { scursor += maybeTimeZone.second; secsEastOfGMT = timeZones[i].secsEastOfGMT; timeZoneKnown = true; if ( *scursor == '"' ) { scursor++; } return true; } } // don't choke just because we don't happen to know the time zone KMIME_WARN_UNKNOWN( time zone, QByteArray( maybeTimeZone.first, maybeTimeZone.second ) ); secsEastOfGMT = 0; timeZoneKnown = false; return true; } // parse a number and return the number of digits parsed: int parseDigits( const char* &scursor, const char * const send, int &result ) { result = 0; int digits = 0; for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { result *= 10; result += int( *scursor - '0' ); } return digits; } static bool parseTimeOfDay( const char* &scursor, const char * const send, int &hour, int &min, int &sec, bool isCRLF=false ) { // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] // // 2DIGIT representing "hour": // if ( !parseDigits( scursor, send, hour ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // 2DIGIT representing "minute": // if ( !parseDigits( scursor, send, min ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return true; // seconds are optional } // // let's see if we have a 2DIGIT representing "second": // if ( *scursor == ':' ) { // yepp, there are seconds: scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseDigits( scursor, send, sec ) ) { return false; } } else { sec = 0; } return true; } bool parseTime( const char* &scursor, const char * send, int &hour, int &min, int &sec, long int &secsEastOfGMT, bool &timeZoneKnown, bool isCRLF ) { // time := time-of-day CFWS ( zone / obs-zone ) // // obs-zone := "UT" / "GMT" / // "EST" / "EDT" / ; -0500 / -0400 // "CST" / "CDT" / ; -0600 / -0500 // "MST" / "MDT" / ; -0700 / -0600 // "PST" / "PDT" / ; -0800 / -0700 // "A"-"I" / "a"-"i" / // "K"-"Z" / "k"-"z" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); // there might be no timezone but a year following if ( ( scursor == send ) || isdigit( *scursor ) ) { timeZoneKnown = false; secsEastOfGMT = 0; return true; // allow missing timezone } timeZoneKnown = true; if ( *scursor == '+' || *scursor == '-' ) { // remember and eat '-'/'+': const char sign = *scursor++; // numerical timezone: int maybeTimeZone; if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) { return false; } secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); if ( sign == '-' ) { secsEastOfGMT *= -1; if ( secsEastOfGMT == 0 ) { timeZoneKnown = false; // -0000 means indetermined tz } } } else { // maybe alphanumeric timezone: if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) { return false; } } return true; } bool parseDateTime( const char* &scursor, const char * const send, KDateTime &result, bool isCRLF ) { // Parsing date-time; strict mode: // // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date // time // // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" result = KDateTime(); QDateTime maybeDateTime; eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // let's see if there's a day-of-week: // if ( parseDayName( scursor, send ) ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // day-name should be followed by ',' but we treat it as optional: if ( *scursor == ',' ) { scursor++; // eat ',' eatCFWS( scursor, send, isCRLF ); } } int maybeMonth = -1; bool asctimeFormat = false; // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993 if ( !isdigit( *scursor ) && parseMonthName( scursor, send, maybeMonth ) ) { asctimeFormat = true; eatCFWS( scursor, send, isCRLF ); } // // 1*2DIGIT representing "day" (of month): // int maybeDay; if ( !parseDigits( scursor, send, maybeDay ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // ignore ","; bug 54098 if ( *scursor == ',' ) { scursor++; } // // month-name: // if ( !asctimeFormat && !parseMonthName( scursor, send, maybeMonth ) ) { return false; } if ( scursor == send ) { return false; } assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); ++maybeMonth; // 0-11 -> 1-12 eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS") bool timeAfterYear = true; if ( ( send - scursor > 3 ) && ( ( scursor[1] == ':' ) || ( scursor[2] == ':' ) ) ) { timeAfterYear = false; // first read time, then year } // // 2*DIGIT representing "year": // int maybeYear = 0; if ( timeAfterYear && !parseDigits( scursor, send, maybeYear ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // time // int maybeHour, maybeMinute, maybeSecond; long int secsEastOfGMT; bool timeZoneKnown = true; if ( !parseTime( scursor, send, maybeHour, maybeMinute, maybeSecond, secsEastOfGMT, timeZoneKnown, isCRLF ) ) { return false; } // in asctime() the year follows the time if ( !timeAfterYear ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseDigits( scursor, send, maybeYear ) ) { return false; } } // RFC 2822 4.3 processing: if ( maybeYear < 50 ) { maybeYear += 2000; } else if ( maybeYear < 1000 ) { maybeYear += 1900; } // else keep as is if ( maybeYear < 1900 ) { return false; // rfc2822, 3.3 } maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) ); maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) ); if ( !maybeDateTime.isValid() ) { return false; } result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) ); if ( !result.isValid() ) { return false; } return true; } Headers::Base *extractFirstHeader( QByteArray &head ) { int endOfFieldBody = 0; bool folded = false; Headers::Base *header = 0; int startOfFieldBody = head.indexOf( ':' ); const int endOfFieldHeader = startOfFieldBody; if ( startOfFieldBody > -1 ) { //there is another header startOfFieldBody++; //skip the ':' if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there startOfFieldBody++; } endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded ); QByteArray rawType = head.left( endOfFieldHeader ); QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody ); if ( folded ) { rawFieldBody = unfoldHeader( rawFieldBody ); } // We might get an invalid mail without a field name, don't crash on that. if ( !rawType.isEmpty() ) { header = HeaderFactory::self()->createHeader( rawType ); } if ( !header ) { //kWarning() << "Returning Generic header of type" << rawType; header = new Headers::Generic( rawType.constData() ); } header->from7BitString( rawFieldBody ); head.remove( 0, endOfFieldBody + 1 ); } else { head.clear(); } return header; } void extractHeaderAndBody( const QByteArray &content, QByteArray &header, QByteArray &body ) { header.clear(); body.clear(); // empty header if ( content.startsWith( '\n' ) ) { body = content.right( content.length() - 1 ); return; } int pos = content.indexOf( "\n\n", 0 ); if ( pos > -1 ) { header = content.left( ++pos ); //header *must* end with "\n" !! - body = content.mid( pos + 1, content.length() - pos - 1 ); + body = content.mid(pos+1); + if (body.startsWith("\n")) { + body = "\n"+body; + } } else { header = content; } } Headers::Base::List parseHeaders( const QByteArray &head ) { Headers::Base::List ret; Headers::Base *h; QByteArray copy = head; while ( ( h = extractFirstHeader( copy ) ) ) { ret << h; } return ret; } } // namespace HeaderParsing } // namespace KMime diff --git a/kmime/tests/auto/contenttest.cpp b/kmime/tests/auto/contenttest.cpp index 772260c8f..fc54e92a6 100644 --- a/kmime/tests/auto/contenttest.cpp +++ b/kmime/tests/auto/contenttest.cpp @@ -1,741 +1,740 @@ /* Copyright (c) 2006 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "contenttest.h" #include #include #include #include #include using namespace KMime; QTEST_KDEMAIN( ContentTest, NoGUI ) void ContentTest::testGetHeaderInstance( ) { // stuff that looks trivial but breaks if you mess with virtual method signatures (see r534381) Headers::From *myfrom = new Headers::From(); QCOMPARE( myfrom->type(), "From" ); Headers::Base *mybase = myfrom; QCOMPARE( mybase->type(), "From" ); // getHeaderInstance() is protected, so we need to test it via KMime::Message Message *c = new Message(); Headers::From *f1 = c->from( true ); Headers::From *f2 = c->from( true ); QCOMPARE( f1, f2 ); delete c; } void ContentTest::testHeaderAddRemove() { // Add a Content-Description header to a content. Content *c = new Content; QVERIFY( !c->contentDescription( false ) ); c->contentDescription()->from7BitString( "description" ); // The content must now have the header. QVERIFY( c->contentDescription( false ) ); QCOMPARE( c->contentDescription()->as7BitString( false ), QByteArray( "description" ) ); // The content's head must also have the header. Save the head. c->assemble(); QByteArray head = c->head(); // Clear the content. It must now forget the cached header. c->clear(); QVERIFY( c->head().isEmpty() ); QVERIFY( !c->contentDescription( false ) ); // Put the head back. It must now remember the header. c->setHead( head ); QVERIFY( !c->contentDescription( false ) ); c->parse(); QVERIFY( c->contentDescription( false ) ); c->contentDescription()->from7BitString( "description" ); // Now remove the header explicitly. bool ret = c->removeHeader( "Content-Description" ); QVERIFY( ret ); // The content must have forgotten the header now. QVERIFY( !c->contentDescription( false ) ); // And after assembly the header should stay gone. c->assemble(); QVERIFY( c->head().isEmpty() ); QVERIFY( !c->contentDescription( false ) ); } void ContentTest::testHeaderAppendPrepend() { Content *c = new Content; QByteArray d1( "Resent-From: test1@example.com" ); QByteArray d2( "Resent-From: test2@example.com" ); Headers::Generic *h1 = new Headers::Generic( "Resent-From", 0, "test1@example.com" ); Headers::Generic *h2 = new Headers::Generic( "Resent-From", 0, "test2@example.com" ); c->appendHeader( h1 ); c->appendHeader( h2 ); c->assemble(); QByteArray head = d1 + '\n' + d2 + '\n'; QCOMPARE( c->head(), head ); QByteArray d3( "Resent-From: test3@example.com" ); Headers::Generic *h3 = new Headers::Generic( "Resent-From", 0, "test3@example.com" ); c->prependHeader( h3 ); c->assemble(); head.prepend( d3 + '\n' ); QCOMPARE( c->head(), head ); } void ContentTest::testImplicitMultipartGeneration() { Content *c1 = new Content(); c1->contentType()->from7BitString( "text/plain" ); c1->setBody( "textpart" ); Content *c2 = new Content(); c2->contentType()->from7BitString( "text/html" ); c2->setBody( "htmlpart" ); c1->addContent( c2 ); // c1 implicitly converted into a multipart/mixed node. QVERIFY( c1->contentType( false ) ); QCOMPARE( c1->contentType()->mimeType(), QByteArray( "multipart/mixed" ) ); QVERIFY( c1->body().isEmpty() ); QCOMPARE( c1->contents().count(), 2 ); Content *c = c1->contents().at( 0 ); // Former c1. QVERIFY( c->contentType( false ) ); QCOMPARE( c->contentType()->mimeType(), QByteArray( "text/plain" ) ); QCOMPARE( c->body(), QByteArray( "textpart" ) ); QCOMPARE( c1->contents().at( 1 ), c2 ); // Now remove c2. c1 should be converted back to a text/plain content. c1->removeContent( c2, false ); QVERIFY( c1->contents().isEmpty() ); QVERIFY( c1->contentType( false ) ); QCOMPARE( c1->contentType()->mimeType(), QByteArray( "text/plain" ) ); QCOMPARE( c1->body(), QByteArray( "textpart" ) ); // c2 should not have been touched. QVERIFY( c2->contents().isEmpty() ); QVERIFY( c2->contentType( false ) ); QCOMPARE( c2->contentType()->mimeType(), QByteArray( "text/html" ) ); QCOMPARE( c2->body(), QByteArray( "htmlpart" ) ); // Clean up. delete c1; delete c2; } void ContentTest::testExplicitMultipartGeneration() { Content *c1 = new Content(); c1->contentType()->from7BitString( "multipart/mixed" ); Content *c2 = new Content(); c2->contentType()->from7BitString( "text/plain" ); c2->setBody( "textpart" ); Content *c3 = new Content(); c3->contentType()->from7BitString( "text/html" ); c3->setBody( "htmlpart" ); c1->addContent( c2 ); c1->addContent( c3 ); // c1 should not have been changed. QCOMPARE( c1->contentType()->mimeType(), QByteArray( "multipart/mixed" ) ); QVERIFY( c1->body().isEmpty() ); QCOMPARE( c1->contents().count(), 2 ); QCOMPARE( c1->contents().at( 0 ), c2 ); QCOMPARE( c1->contents().at( 1 ), c3 ); // Removing c3 should turn c1 into a single-part content containing the data of c2. c1->removeContent( c3, false ); QCOMPARE( c1->contentType()->mimeType(), QByteArray( "text/plain" ) ); QCOMPARE( c1->contents().count(), 0 ); QCOMPARE( c1->body(), QByteArray( "textpart" ) ); // Clean up. delete c1; // c2 was deleted when c1 turned itself single-part. delete c3; } void ContentTest::testSetContent() { Content *c = new Content(); QVERIFY( !c->hasContent() ); // head and body present c->setContent( "head1\nhead2\n\nbody1\n\nbody2\n" ); QVERIFY( c->hasContent() ); QCOMPARE( c->head(), QByteArray( "head1\nhead2\n" ) ); QCOMPARE( c->body(), QByteArray( "body1\n\nbody2\n" ) ); QList list; list << "head1" << "head2" << "" << "body1" << "" << "body2"; c->setContent( list ); QVERIFY( c->hasContent() ); QCOMPARE( c->head(), QByteArray( "head1\nhead2\n" ) ); QCOMPARE( c->body(), QByteArray( "body1\n\nbody2\n" ) ); // ### the final \n is questionable // empty content c->setContent( QByteArray() ); QVERIFY( !c->hasContent() ); QVERIFY( c->head().isEmpty() ); QVERIFY( c->body().isEmpty() ); // empty head c->setContent( "\nbody1\n\nbody2\n" ); QVERIFY( c->hasContent() ); QVERIFY( c->head().isEmpty() ); QCOMPARE( c->body(), QByteArray( "body1\n\nbody2\n" ) ); list.clear(); list << "" << "body1" << "" << "body2"; c->setContent( list ); QVERIFY( c->hasContent() ); QVERIFY( c->head().isEmpty() ); QCOMPARE( c->body(), QByteArray( "body1\n\nbody2\n" ) ); // empty body c->setContent( "head1\nhead2\n\n" ); QVERIFY( c->hasContent() ); QCOMPARE( c->head(), QByteArray( "head1\nhead2\n" ) ); QVERIFY( c->body().isEmpty() ); list.clear(); list << "head1" << "head2" << ""; c->setContent( list ); QVERIFY( c->hasContent() ); QCOMPARE( c->head(), QByteArray( "head1\nhead2\n" ) ); QVERIFY( c->body().isEmpty() ); } void ContentTest::testEncodedContent() { // Example taken from RFC 2046, section 5.1.1. // Removed "preamble" and "epilogue", which KMime loses. QByteArray data = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-type: text/plain; charset=us-ascii\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n"; Message *msg = new Message; msg->setContent( data ); msg->parse(); // Test that multiple calls do not corrupt anything. QByteArray encc = msg->encodedContent(); //kDebug() << "original data" << data; //kDebug() << "encodedContent" << encc; QCOMPARE( msg->encodedContent(), data ); QCOMPARE( msg->encodedContent(), data ); QCOMPARE( msg->encodedContent(), data ); delete msg; // RFC 2822 3.5: lines are limited to 1000 characters (998 + CRLF) // (bug #187345) msg = new Message(); data = "Subject:" "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test " "test test test test test test test test test test test test test test test test test test test test" "\n" "References: " " " " " " " " " " " " " " " " " " " " " "\n\n" "body\n"; msg->setContent( data ); QByteArray content = msg->encodedContent( true /* use CRLF */ ); QStringList lines = QString::fromLatin1( content ).split( "\r\n" ); foreach ( const QString &line, lines ) { QEXPECT_FAIL( "", "KMime does not fold lines longer than 998 characters", Continue ); QVERIFY( line.length() < 998 && !line.isEmpty() && line != "body" ); // The test should be (after the expected failure disappears): //QVERIFY( line.length() < 998 ); } delete msg; } void ContentTest::testDecodedContent() { Content *c = new Content(); c->setBody( '\0' ); QVERIFY( c->decodedContent() == QByteArray() ); c->setBody( QByteArray() ); QVERIFY( c->decodedContent() == QByteArray() ); c->setBody( " " ); QVERIFY( c->decodedContent() == QByteArray( " " ) ); } void ContentTest::testMultipleHeaderExtraction() { QByteArray data = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)\n" "Subject: Sample message\n" "Received: from ktown.kde.org ([192.168.100.1])\n" "Received: from dev1.kde.org ([192.168.100.2])\n" "\t by ktown.kde.org ([192.168.100.1])\n" "Received: from dev2.kde.org ([192.168.100.3])\n" " by ktown.kde.org ([192.168.100.1])\n"; Message *msg = new Message(); msg->setContent( data ); // FAILS identically to ContentTest::testMultipartMixed // QCOMPARE( msg->encodedContent(), data ); msg->parse(); QList result = msg->headersByType( "Received" ); QCOMPARE( result.count(), 3 ); QCOMPARE( result[0]->asUnicodeString(), QString( "from ktown.kde.org ([192.168.100.1])" ) ); QCOMPARE( result[1]->asUnicodeString(), QString( "from dev1.kde.org ([192.168.100.2]) by ktown.kde.org ([192.168.100.1])" ) ); QCOMPARE( result[2]->asUnicodeString(), QString( "from dev2.kde.org ([192.168.100.3]) by ktown.kde.org ([192.168.100.1])" ) ); } void ContentTest::testMultipartMixed() { // example taken from RFC 2046, section 5.1.1. QByteArray data = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" "This is the preamble. It is to be ignored, though it\n" "is a handy place for composition agents to include an\n" "explanatory note to non-MIME conformant readers.\n" "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-type: text/plain; charset=us-ascii\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n" "\n" "This is the epilogue. It is also to be ignored.\n"; QByteArray part1 = "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak."; QByteArray part2 = "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n"; // What we expect KMime to parse the above data into. QByteArray parsedWithPreambleAndEpilogue = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-Type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" "This is the preamble. It is to be ignored, though it\n" "is a handy place for composition agents to include an\n" "explanatory note to non-MIME conformant readers.\n" "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-Type: text/plain; charset=\"us-ascii\"\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n" "\n" "This is the epilogue. It is also to be ignored.\n"; // What we expect KMime to assemble the above data into. QByteArray assembled = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-Type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" - "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-Type: text/plain; charset=\"us-ascii\"\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n"; // test parsing Message *msg = new Message(); msg->setContent( data ); QCOMPARE( msg->encodedContent(), data ); msg->parse(); QVERIFY( msg->contentType()->isMultipart() ); Content::List list = msg->contents(); QCOMPARE( list.count(), 2 ); Content *c = list.takeFirst(); QCOMPARE( c->body(), part1 ); c = list.takeFirst(); QCOMPARE( c->body(), part2 ); // assemble again msg->assemble(); //kDebug() << "expected assembled content" << parsedWithPreambleAndEpilogue; //kDebug() << "actual new encoded content" << msg->encodedContent(); QCOMPARE( msg->encodedContent(), parsedWithPreambleAndEpilogue ); delete msg; // assembling from scratch // (The headers have to be in the same order, as we compare with the above assembled.) msg = new Message(); msg->from()->from7BitString( "Nathaniel Borenstein " ); msg->to()->from7BitString( "Ned Freed " ); msg->date()->from7BitString( "Sun, 21 Mar 1993 23:56:48 -0800 (PST)" ); msg->subject()->from7BitString( "Sample message" ); // HACK to make MIME-Version appear before Content-Type, as in the expected message. msg->setHeader( new Headers::MIMEVersion( msg, "1.234" ) ); msg->setBody( part1 ); c = new Content(); c->setBody( part2 ); c->contentType()->setMimeType( "text/plain" ); c->contentType()->setCharset( "us-ascii" ); msg->addContent( c ); msg->contentType()->setBoundary( "simple boundary" ); list = msg->contents(); QCOMPARE( list.count(), 2 ); c = list.takeFirst(); QCOMPARE( c->body(), part1 ); c = list.takeFirst(); QCOMPARE( c->body(), part2 ); msg->assemble(); QByteArray encc = msg->encodedContent(); //kDebug() << "expected assembled content" << assembled; //kDebug() << "actual encoded content" << encc; QCOMPARE( msg->encodedContent(), assembled ); } void ContentTest::testParsingUuencoded() { const QByteArray body = "This is a test message that should appears as a text/plain part\n" "once this message is parsed and convert to a MIME tree.\n" "\n" "\n"; const QString imageName = "Name of the encoded file (oxygen 22x22 kde.png)"; const QByteArray imageBase64 = "\n" "iVBORw0KGgoAAAANSUhEUgAAABYAAAAWCAYAAADEtGw7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n" "AAADdgAAA3YBfdWCzAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAU4SURB\n" "VBgZjcFbiF1XGcDx/7fW2pdznTOXNJlJc4WWVK3RqiC2FOyTiGCs+lKkohWTIl5QWrAp9ckLKpYi\n" "KFjwodgXoRDMk1VECj7UpkIqUZuCTUycyWXOzJxz9tnXtdfnxNqnvvj7iaryNhGxgPBOAh/gLa+y\n" "S3kn3dXyP6KqyEe+1Rm6tSc6nYVHO+loOXYR1hisFYRAIOBljtecyPaItEMkXeK4S2QTVAxVOZ1t\n" "TzaeG6//9fTWuR9MnOxyx7/xzaWjB548cvgAUeyJbGDYj9mzPGJl1GdpocOwlxCCMs1qtrKSrZ2c\n" "ze0Z126O2ZkWSJoO0rDylUabREROOsCoG3z58JEDrK4NIFQMBz0WBl2G3ZReGiNi+debO6gKC3sH\n" "DAcxNu6QpF1GiwtsTzMm04wrVyeY7upngEcdYIy4pSgVJtmMYb+HmBiVGE9Eo47ZdsHJj3eJnOHp\n" "M3P6exbIJxmffr/ibMK58zN+M4nwlGCTPmAMu8QYKasCFYd1CWoSgkT4YGmCoWggTRLiOKH0UFTK\n" "A8csdx0ZcnBfl/PXIuJ+j253gBED3CEGDluxVtqgGBcTJCKIZboxJq9bssozLxqiKMJZS1G3LIct\n" "7nvfAs5FvPDSjHlnEbER3f4AsUZYG1rD2t3GGIu4GIhosUSzCd9/5HZOvKtldnmd7evbRM7hnEOz\n" "CV/8xCrOWv52qeKVGx0CBpUIF3cwxsLwdmtYPGSMtaLW0WIIKuStIXLCh9+9wE++fgfWV4jwX489\n" "fJQkMswr5ee/26EMgaaFVoW6VsRaGXWWrFnqWyPWSV0rrULlA7M45dd/uEHZwOlfvMGW6yAiiAhr\n" "KwkgGIEiL8jrmryuqWpPWbWItYTlNWvauGeNs8xLT9W2FFXDdGPMwb0pz569wsUqpqgbQADhmecu\n" "IgK91HHqY7cxz0um85zxrKAVMNYSbGKNtqkIhtB6xptTvntiyJnv3MVH71niT3+fUHvQ1vC2F1+v\n" "efHPm9xy33sXubtXsj3NaJqKNjSA0KePEVsqKEE9dZWTOBCUtg1sZoamhrYFVQWUphV+dPYml67l\n" "3PLtz99Jr8zxdYn3NSJKRoYxhQZ2+aZCteWhZy7yydOvceHNOXeuWNRbQmMIIaCqGGJcOuL0s5fJ\n" "S8+gY3j8U4fQ2hPqEg0BqQnCsUcGg7XjNxZXV1MbJQx6I1ZW9vPge4QHPrjM47/cwXZ6VFmBaEsy\n" "6GPqgqEtqJqWsmq4OpmT+Sl1XTHdHIemeG3ZML3RBu+1rkp8mROahqiYceL+RQ7eZvnewwusyoRh\n" "f8hgtMywmfPUQ0Oe+sI+WlJ0tIrrJjR1SdMUBO/Z2fhn61g/68PRe7UqC4JraDo1h3oVsW1440rD\n" "718uOfXgiL1LEIKiOsI5IY0CT36uzxO/KvF1TV3MqX1D8F6Z/8U7QEPr1WCpyzlVVXJuo+WrP7xE\n" "ke5neeUA55+/ytNfSxAnPPazEnVdPntvweV/52R5oK4KqiqnqhtQr1y50jpAQ1PmvbTfG493mE62\n" "oYV/+CWGgzFN8EQm5vo4RxWmLKBty09/65nPC6bZDjuTLeZZhrMJWs8rdjkghOmlF3x57NTy4hrX\n" "b65T5zl1WVAWc7LuhDTpcvLHFcYY6E7xTUNZ5eT5jFm2w3S6RWRT9oz2cXX9lT8Cragqsv9DK93F\n" "48/3995zf7e/J41dhDMWawQkoNriTYbXnMj2ibRLJF3iuEtkE1SEfL7VXLv00qs3Xz/zpWp84YKo\n" "KreIiANGwH5AAOH/o7xlE7gOeN31H1IDp2dl3tAoAAAAAElFTkSuQmCC\n" ; const QByteArray uuencodedMsg = "Path: news.example.net!not-for-mail\n" "From: Coin coin \n" "Newsgroups: test.kmime.uuencoded\n" "Subject: Kmime test\n" "Date: Thu, 14 Apr 2005 20:12:47 -0700\n" "Message-ID: \n" "X-Newsreader: Forte Agent 2.0/32.640\n" "Lines: 1283\n" "Organization: Ament\n" "Xref: news.example.net test.kmime.uuencoded:4584\n" "\n" "This is a test message that should appears as a text/plain part\n" "once this message is parsed and convert to a MIME tree.\n" "\n" "begin 644 Name of the encoded file (oxygen 22x22 kde.png)\n" "MB5!.1PT*&@H````-24A$4@```!8````6\"`8```#$M&P[````!'-\"250(\"`@(\n" "M?`ADB`````EP2%ES```#=@```W8!?=6\"S````!ET15AT4V]F='=A^U1FZM2*X2V035`Q5.9UM3S:>&Z__]?36N1],G.QRQ[_QS:6C!YX\\R)\n" "M;C]FS/&)EU&=IH<.PEQ\"\",LUJMK*2K9VT9UVZ.V9D62)H.TK#RE4:;\n" "M1$1..L\"H&WSY\\)$#K*X-(%0,!ST6!EV&W91>&B-B^=>;.Z@*\"WL'#`8[NIG@$<=8(RXI2@5)MF,8;^'F!B5&$]$HX[9=L')\n" "MCW>)G.'I,W/Z>Q;()QF??K_B;,*Y\\S-^,XGPE&\"3/F`,N\\08*:L\"%8=U\"6H2\n" "M@D3X8&F\"H6@@31+B.*'T4%3*`\\O;1,[AG$.S\"5_\\\n" "MQ\"K.6OYVJ>*5&QT\"!I4(%W>_VZ$,@::%5H6Z5L1:&766K%GJ6R/625TKK4+E\n" "M`[,XY==_N$'9P.E?O,&6ZR`BB`AK*PD@&($B+\\CKFKRNJ6I/6;6(M83E-6O:\n" "MN&>-L\\Q+3]6V%%7#=&/,P;TISYZ]PL4JIJ@;0`#AF>?'/F]QRWWL7N;M7LCW-:)J*-C2`T*>/$5LJ*$$]=963.!\"4M@UL9H:FAK8%\n" "M5064IA5^=/8FEZ[EW/+MS]])K\\SQ=8GW-2)*1H8QA09V^:9\"M>6A9R[RR=.O\n" "M<>'-.7>N6-1;0F,((:\"J&&)<.N+TLY?)2\\^@8WC\\4X?0VA/J$@T!J0G\"L4<&\n" "M@[7C-Q975U,;)0QZ(U96]O/@>X0'/KC,X[_3YC%FVPW2Z1613\n" "M]HSV<77]E3\\\"K:@JLO]#*]W%X\\_W]]YS?[>_)XU=A#,6:P0DH-KB38;7G,CV\n" "MB;1+)%WBN$MD$U2$?+[57+OTTJLW7S_SI6I\\X8*H*K>(B`-&P'Y``.'_H[QE\n" ";$[@.>-WU'U(#IV=EWM`H`````$E%3D2N0F\"\"\n" "`\n" "end\n" "\n"; Message *msg = new Message(); msg->setContent( uuencodedMsg ); msg->parse(); Content::List contents = msg->contents(); // text + image QCOMPARE( contents.size(), 2 ); Content *c = 0; // Check the first text part c = contents.at( 0 ); QVERIFY( c->contentType()->isPlainText() ); QCOMPARE( c->body(), body ); // Check the image part c = contents.at( 1 ); QVERIFY( !c->contentType()->isText() ); QCOMPARE( c->contentType()->name(), imageName ); // The uuencoded content as been recoded as base64 QCOMPARE( c->encodedContent(), imageBase64 ); delete msg; } void ContentTest::testParent() { Content *c1 = new Content(); c1->contentType()->from7BitString( "multipart/mixed" ); Content *c2 = new Content(); c2->contentType()->from7BitString( "text/plain" ); c2->setBody( "textpart" ); Content *c3 = new Content(); c3->contentType()->from7BitString( "text/html" ); c3->setBody( "htmlpart" ); Content *c4 = new Content(); c4->contentType()->from7BitString( "text/html" ); c4->setBody( "htmlpart2" ); Content *c5 = new Content(); c5->contentType()->from7BitString( "multipart/mixed" ); //c2 doesn't have a parent yet QCOMPARE( c2->parent(), (Content*)( 0L ) ); c1->addContent( c2 ); c1->addContent( c3 ); c1->addContent( c4 ); // c1 is the parent of those QCOMPARE( c2->parent(), c1 ); QCOMPARE( c3->parent(), c1 ); //test removal c1->removeContent( c2, false ); QCOMPARE( c2->parent(), (Content*)( 0L ) ); QCOMPARE( c1->contents().at( 0 ), c3 ); //check if the content is moved correctly to another parent c5->addContent( c4 ); QCOMPARE( c4->parent(), c5 ); QCOMPARE( c1->contents().count(), 0 ); //yes, it should be 0 QCOMPARE( c5->contents().at( 0 ), c4 ); // example taken from RFC 2046, section 5.1.1. QByteArray data = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" "This is the preamble. It is to be ignored, though it\n" "is a handy place for composition agents to include an\n" "explanatory note to non-MIME conformant readers.\n" "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-type: text/plain; charset=us-ascii\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n" "\n" "This is the epilogue. It is also to be ignored.\n"; // test parsing Message *msg = new Message(); msg->setContent( data ); msg->parse(); QCOMPARE( msg->parent(), (Content*)( 0L )); QCOMPARE( msg->contents().at( 0 )->parent(), msg ); QCOMPARE( msg->contents().at( 1 )->parent(), msg ); delete msg; } void ContentTest::testFreezing() { // Example taken from RFC 2046, section 5.1.1. QByteArray data = "From: Nathaniel Borenstein \n" "To: Ned Freed \n" "Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)\n" "Subject: Sample message\n" "MIME-Version: 1.0\n" "Content-type: multipart/mixed; boundary=\"simple boundary\"\n" "\n" "This is the preamble. It is to be ignored, though it\n" "is a handy place for composition agents to include an\n" "explanatory note to non-MIME conformant readers.\n" "\n" "--simple boundary\n" "\n" "This is implicitly typed plain US-ASCII text.\n" "It does NOT end with a linebreak.\n" "--simple boundary\n" "Content-type: text/plain; charset=us-ascii\n" "\n" "This is explicitly typed plain US-ASCII text.\n" "It DOES end with a linebreak.\n" "\n" "--simple boundary--\n" "\n" "This is the epilogue. It is also to be ignored.\n"; Message *msg = new Message; msg->setContent( data ); msg->setFrozen( true ); // The data should be untouched before parsing. //kDebug() << "original data" << data; //kDebug() << "data from message" << msg->encodedContent(); QCOMPARE( msg->encodedContent(), data ); // The data should remain untouched after parsing. msg->parse(); QVERIFY( msg->contentType()->isMultipart() ); QCOMPARE( msg->contents().count(), 2 ); QCOMPARE( msg->encodedContent(), data ); // Calling assemble() should not alter the data. msg->assemble(); QCOMPARE( msg->encodedContent(), data ); } diff --git a/kmime/tests/auto/messagetest.cpp b/kmime/tests/auto/messagetest.cpp index f17891aa4..d98d5e8a5 100644 --- a/kmime/tests/auto/messagetest.cpp +++ b/kmime/tests/auto/messagetest.cpp @@ -1,603 +1,658 @@ /* Copyright (c) 2007 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "messagetest.h" #include #include using namespace KMime; QTEST_KDEMAIN( MessageTest, NoGUI ) void MessageTest::testMainBodyPart() { Message *msg = new Message(); Message *msg2 = new Message(); Content *text = new Content(); text->contentType()->setMimeType( "text/plain" ); Content *html = new Content(); html->contentType()->setMimeType( "text/html" ); // empty message QCOMPARE( msg->mainBodyPart(), msg ); QCOMPARE( msg->mainBodyPart( "text/plain" ), (Content*)0 ); // non-multipart msg->contentType()->setMimeType( "text/html" ); QCOMPARE( msg->mainBodyPart(), msg ); QCOMPARE( msg->mainBodyPart( "text/plain" ), (Content*)0 ); QCOMPARE( msg->mainBodyPart( "text/html" ), msg ); // multipart/mixed msg2->contentType()->setMimeType( "multipart/mixed" ); msg2->addContent( text ); msg2->addContent( html ); QCOMPARE( msg2->mainBodyPart(), text ); QCOMPARE( msg2->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg2->mainBodyPart( "text/html" ), (Content*)0 ); // Careful with removing content here. If we remove one of the two contents // (by adding it to another message), the multipart will automatically be // converted to a single-part, deleting the other content! msg2->clearContents( false ); // mulitpart/alternative msg->contentType()->setMimeType( "multipart/alternative" ); msg->addContent( html ); msg->addContent( text ); QCOMPARE( msg->mainBodyPart(), html ); QCOMPARE( msg->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg->mainBodyPart( "text/html" ), html ); // mulitpart/alternative inside multipart/mixed Message* msg3 = new Message(); msg3->contentType()->setMimeType( "multipart/mixed" ); msg3->addContent( msg ); Content *attach = new Content(); attach->contentType()->setMimeType( "text/plain" ); QCOMPARE( msg3->mainBodyPart(), html ); QCOMPARE( msg3->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg3->mainBodyPart( "text/html" ), html ); } void MessageTest::testBrunosMultiAssembleBug() { QByteArray data = "From: Sender \n" "Subject: Sample message\n" "To: Receiver \n" "Date: Sat, 04 Aug 2007 12:44 +0200\n" "MIME-Version: 1.0\n" "Content-Type: text/plain\n" "X-Foo: bla\n" "X-Bla: foo\n" "\n" "body"; Message *msg = new Message; msg->setContent( data ); msg->parse(); msg->assemble(); QCOMPARE( msg->encodedContent(), data ); msg->inReplyTo(); msg->assemble(); QCOMPARE( msg->encodedContent(), data ); delete msg; } void MessageTest::testWillsAndTillsCrash() { QByteArray deadlyMail = "From: censored@yahoogroups.com\n" "To: censored@yahoogroups.com\n" "Sender: censored@yahoogroups.com\n" "MIME-Version: 1.0\n" "Date: 29 Jan 2006 23:58:21 -0000\n" "Subject: [censored] Birthday Reminder\n" "Reply-To: censored@yahoogroups.com\n" "Content-Type: multipart/alternative;\n boundary=\"YCalReminder=cNM4SNTGA4Cg1MVLaPpqNF1138579098\"\n" "X-Length: 9594\n" "X-UID: 6161\n" "Status: RO\n" "X-Status: OC\n" "X-KMail-EncryptionState:\n" "X-KMail-SignatureState:\n" "X-KMail-MDN-Sent:\n\n"; KMime::Message *msg = new KMime::Message; msg->setContent( deadlyMail ); msg->parse(); QVERIFY( !msg->date()->isEmpty() ); QCOMPARE( msg->subject()->as7BitString( false ), QByteArray( "[censored] Birthday Reminder" ) ); QCOMPARE( msg->from()->mailboxes().count(), 1 ); QCOMPARE( msg->sender()->mailboxes().count(), 1 ); QCOMPARE( msg->replyTo()->mailboxes().count(), 1 ); QCOMPARE( msg->to()->mailboxes().count(), 1 ); QCOMPARE( msg->cc()->mailboxes().count(), 0 ); QCOMPARE( msg->bcc()->mailboxes().count(), 0 ); QCOMPARE( msg->inReplyTo()->identifiers().count(), 0 ); QCOMPARE( msg->messageID()->identifiers().count(), 0 ); delete msg; } void MessageTest::testDavidsParseCrash() { KMime::Message::Ptr mail = readAndParseMail( QLatin1String( "dfaure-crash.mbox" ) ); QCOMPARE( mail->to()->asUnicodeString().toLatin1().data(), "frank@domain.com" ); } void MessageTest::testHeaderFieldWithoutSpace() { // Headers without a space, like the CC header here, are allowed according to // the examples in RFC2822, Appendix A5 QString mail = "From:\n" "To: heinz@test.de\n" "Cc:moritz@test.de\n" "Subject: Test\n" "X-Mailer:"; KMime::Message msg; msg.setContent( mail.toLatin1() ); msg.parse(); QCOMPARE( msg.to()->asUnicodeString(), QString( "heinz@test.de" ) ); QCOMPARE( msg.from()->asUnicodeString(), QString() ); QCOMPARE( msg.cc()->asUnicodeString(), QString( "moritz@test.de" ) ); QCOMPARE( msg.subject()->asUnicodeString(), QString( "Test" ) ); QVERIFY( msg.hasHeader( "X-Mailer" ) ); QVERIFY( msg.headerByType( "X-Mailer" )->asUnicodeString().isEmpty() ); } void MessageTest::testWronglyFoldedHeaders() { // The first subject line here doesn't contain anything. This is invalid, // however there are some mailers out there that produce those messages. QString mail = "Subject:\n" " Hello\n" " World\n" "To: \n" " test@test.de\n\n" ""; KMime::Message msg; msg.setContent( mail.toLatin1() ); msg.parse(); QCOMPARE( msg.subject()->asUnicodeString(), QString( "Hello World" ) ); QCOMPARE( msg.body().data(), "" ); QCOMPARE( msg.to()->asUnicodeString(), QString( "test@test.de" ) ); } void MessageTest::missingHeadersTest() { // Test that the message body is OK even though some headers are missing KMime::Message msg; QString body = "Hi Donald, look at those nice pictures I found!\n"; QString content = "From: georgebush@whitehouse.org\n" "To: donaldrumsfeld@whitehouse.org\n" "Subject: Cute Kittens\n" "\n" + body; msg.setContent( content.toLatin1() ); msg.parse(); msg.assemble(); QCOMPARE( body, QString::fromLatin1( msg.body() ) ); // Now create a new message, based on the content of the first one. // The body of the new message should still be the same. // (there was a bug that caused missing mandatory headers to be // added as a empty newline, which caused parts of the header to // leak into the body) KMime::Message msg2; msg2.setContent( msg.encodedContent() ); msg2.parse(); msg2.assemble(); QCOMPARE( body, QString::fromLatin1( msg2.body() ) ); } void MessageTest::testBug219749() { // Test that the message body is OK even though some headers are missing KMime::Message msg; const QString content = "Content-Type: MULTIPART/MIXED;\n" " BOUNDARY=\"0-1804289383-1260384639=:52580\"\n" "\n" "--0-1804289383-1260384639=:52580\n" "Content-Type: TEXT/plain; CHARSET=UTF-8\n" "\n" "--0-1804289383-1260384639=:52580\n" "Content-Type: APPLICATION/octet-stream\n" "Content-Transfer-Encoding: BASE64\n" "Content-ID: \n" "Content-Disposition: ATTACHMENT; FILENAME=\"jaselka 1.docx\"\n" "\n" "UEsDBBQABgAIAAAAIQDd/JU3ZgEAACAFAAATAAgCW0NvbnRlbnRfVHlwZXNd\n" "SUwAAAAA\n" "\n" "--0-1804289383-1260384639=:52580--\n"; msg.setContent( content.toLatin1() ); msg.parse(); QCOMPARE( msg.contents().size(), 2 ); KMime::Content *attachment = msg.contents()[1]; QCOMPARE( attachment->contentType( false )->mediaType().data(), "application" ); QCOMPARE( attachment->contentType( false )->subType().data(), "octet-stream" ); QCOMPARE( attachment->contentID()->identifier().data(), "jaselka1.docx4AECA1F9@9230725.3CDBB752" ); QCOMPARE( attachment->contentID()->as7BitString( false ).data(), "" ); Headers::ContentDisposition *cd = attachment->contentDisposition( false ); QVERIFY( cd ); QCOMPARE( cd->filename(), QString( "jaselka 1.docx" ) ); } void MessageTest::testBidiSpoofing() { const QString RLO( QChar( 0x202E ) ); const QString PDF( QChar( 0x202C ) ); const QByteArray senderAndRLO = encodeRFC2047String( "Sender" + RLO + " ", "utf-8" ); // The display name of the "From" has an RLO, make sure the KMime parser balances it QByteArray data = "From: " + senderAndRLO + "\n" "\n" "Body"; KMime::Message msg; msg.setContent( data ); msg.parse(); // Test adjusted for taking into account that KMIME now removes bidi control chars // instead of adding PDF chars, because of broken KHTML. //const QString expectedDisplayName = "\"Sender" + RLO + PDF + "\""; const QString expectedDisplayName = "Sender"; const QString expectedMailbox = expectedDisplayName + " "; QCOMPARE( msg.from()->addresses().count(), 1 ); QCOMPARE( msg.from()->asUnicodeString(), expectedMailbox ); QCOMPARE( msg.from()->displayNames().first(), expectedDisplayName ); QCOMPARE( msg.from()->mailboxes().first().name(), expectedDisplayName ); QCOMPARE( msg.from()->mailboxes().first().address().data(), "sender@test.org" ); } // Test to see if header fields of mails with an UTF-16 body are properly read // and written. // See also https://issues.kolab.org/issue3707 void MessageTest::testUtf16() { QByteArray data = "From: foo@bar.com\n" "Subject: UTF-16 Test\n" "MIME-Version: 1.0\n" "Content-Type: Text/Plain;\n" " charset=\"utf-16\"\n" "Content-Transfer-Encoding: base64\n" "\n" "//5UAGgAaQBzACAAaQBzACAAVQBUAEYALQAxADYAIABUAGUAeAB0AC4ACgAKAAo"; KMime::Message msg; msg.setContent( data ); msg.parse(); QCOMPARE( msg.from()->asUnicodeString(), QString( "foo@bar.com" ) ); QCOMPARE( msg.subject()->asUnicodeString(), QString( "UTF-16 Test" ) ); QCOMPARE( msg.decodedText( false, true ), QString( "This is UTF-16 Text." ) ); // Add a new To header, for testings KMime::Headers::To *to = new KMime::Headers::To( &msg ); KMime::Types::Mailbox address; address.setAddress( "test@test.de" ); address.setName( "Fränz Töster" ); to->addAddress( address ); msg.appendHeader( to ); msg.assemble(); QByteArray newData = "From: foo@bar.com\n" "Subject: UTF-16 Test\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=\"utf-16\"\n" "Content-Transfer-Encoding: base64\n" "To: =?ISO-8859-1?Q?Fr=C3=A4nz_T=C3=B6ster?= \n" "\n" "//5UAGgAaQBzACAAaQBzACAAVQBUAEYALQAxADYAIABUAGUAeAB0AC4ACgAKAAoACg==\n"; QCOMPARE( msg.encodedContent().data(), newData.data() ); } void MessageTest::testDecodedText() { QByteArray data = "Subject: Test\n" "\n" "Testing Whitespace \n \n \n\n\n"; KMime::Message msg; msg.setContent( data ); msg.parse(); QCOMPARE( msg.decodedText( true, false ), QString( "Testing Whitespace" ) ); QCOMPARE( msg.decodedText( true, true ), QString( "Testing Whitespace" ) ); QCOMPARE( msg.decodedText( false, true ), QString( "Testing Whitespace \n \n " ) ); QByteArray data2 = "Subject: Test\n" "\n" "Testing Whitespace \n \n \n\n\n "; KMime::Message msg2; msg2.setContent( data2 ); msg2.parse(); QCOMPARE( msg2.decodedText( true, false ), QString( "Testing Whitespace" ) ); QCOMPARE( msg2.decodedText( true, true ), QString( "Testing Whitespace" ) ); QCOMPARE( msg2.decodedText( false, true ), QString( "Testing Whitespace \n \n \n\n\n " ) ); } void MessageTest::testInlineImages() { QByteArray data = "From: \n" "To: kde@kde.org\n" "Subject: Inline Image (unsigned)\n" "Date: Wed, 23 Dec 2009 14:00:59 +0100\n" "MIME-Version: 1.0\n" "Content-Type: multipart/related;\n" " boundary=\"Boundary-02=_LShMLJyjC7zqmVP\"\n" "Content-Transfer-Encoding: 7bit\n" "\n" "\n" "--Boundary-02=_LShMLJyjC7zqmVP\n" "Content-Type: multipart/alternative;\n" " boundary=\"Boundary-01=_LShMLzAUPqE38S8\"\n" "Content-Transfer-Encoding: 7bit\n" "Content-Disposition: inline\n" "\n" "--Boundary-01=_LShMLzAUPqE38S8\n" "Content-Type: text/plain;\n" " charset=\"us-ascii\"\n" "Content-Transfer-Encoding: 7bit\n" "\n" "First line\n" "\n" "\n" "Image above\n" "\n" "Last line\n" "\n" "--Boundary-01=_LShMLzAUPqE38S8\n" "Content-Type: text/html;\n" " charset=\"us-ascii\"\n" "Content-Transfer-Encoding: 7bit\n" "\n" "Line 1\n" "--Boundary-01=_LShMLzAUPqE38S8--\n" "\n" "--Boundary-02=_LShMLJyjC7zqmVP\n" "Content-Type: image/png;\n" " name=\"inlineimage.png\"\n" "Content-Transfer-Encoding: base64\n" "Content-Id: <740439759>\n" "\n" "jxrG/ha/VB+rODav6/d5i1US6Za/YEMvtm2SgJC/CXVFiD3UFSH2UFeE2ENdEWIPdUWIPdQVIfZQ\n" "V4TYQ10RYg91RYg91BUh9lBXhNhDXRFiD3VFiD3UFSH2UFeE2ENdEWIPdUWIPdQVIfZQV4TYQ10R\n" "Yg91RYg91BUh9lBX5E+Tz6Vty1HSx+NR++UuCOqKEHv+Ax0Y5U59+AHBAAAAAElFTkSuQmCC\n" "\n" "--Boundary-02=_LShMLJyjC7zqmVP--"; KMime::Message msg; msg.setContent( data ); msg.parse(); QCOMPARE( msg.contents().size(), 2 ); QCOMPARE( msg.contents()[0]->contentType()->isMultipart(), true ); QCOMPARE( msg.contents()[0]->contentType()->subType().data(), "alternative" ); QCOMPARE( msg.contents()[1]->contentType()->isImage(), true ); QCOMPARE( msg.contents()[1]->contentType()->name(), QString( "inlineimage.png" ) ); QCOMPARE( msg.contents()[1]->contentID()->identifier().data(), "740439759" ); QCOMPARE( msg.contents()[1]->contentID()->as7BitString( false ).data(), "<740439759>" ); } void MessageTest::testIssue3908() { KMime::Message::Ptr msg = readAndParseMail( "issue3908.mbox" ); QCOMPARE( msg->contents().size(), 2 ); KMime::Content *attachment = msg->contents().at( 1 ); QVERIFY( attachment ); QVERIFY( attachment->contentDescription( false ) ); QCOMPARE( attachment->contentDescription()->asUnicodeString(), QString::fromUtf8( "Kontact oder auch KDE-PIM ist der Groupware-Client aus der KDE Software Compilation 4.Eine der Besonderheiten von Kontact " "gegenüber anderen Groupware-Clients ist, dass die Teil-Programme auch weiterhin unabhängig von Kontact gestartet werden " "können. So spielt es zum Beispiel keine Rolle für das Arbeiten mit KMail, ob es mal allein oder mal im Rahmen von Kontact " "gestartet wird: Die Mails und die persönlichen Einstellungen bleiben stets erhalten.Auch sieht Kontact eine modulare " "Anbindung der Programme vor, wodurch sich auch in Zukunft weitere Module entwickeln und anfügen lassen, ohne Kontact " "dafür zu ändern. Dies bietet die Möglichkeit, auch privat entwickelte Module einzubinden und so die Groupware grundlegend " "eigenen Bedürfnissen anzupassen." ) ); } void MessageTest::testIssue3914() { // This loads a mail which has a content-disposition of which the filename parameter is empty. // Check that the parser doesn't choke on this. KMime::Message::Ptr msg = readAndParseMail( "broken-content-disposition.mbox" ); QCOMPARE( msg->subject()->as7BitString().data(), "Subject: Fwd: test broken mail" ); QCOMPARE( msg->contents().size(), 2 ); KMime::Content *attachedMail = msg->contents().at( 1 ); QCOMPARE( attachedMail->contentType()->mimeType().data(), "message/rfc822" ); QVERIFY( attachedMail->contentDisposition( false ) ); QVERIFY( attachedMail->contentDisposition()->hasParameter( "filename" ) ); QVERIFY( attachedMail->contentDisposition()->parameter( "filename" ).isEmpty() ); } void MessageTest::testBug223509() { KMime::Message::Ptr msg = readAndParseMail( "encoding-crash.mbox" ); QCOMPARE( msg->subject()->as7BitString().data(), "Subject: Blub" ); QCOMPARE( msg->contents().size(), 0 ); QCOMPARE( msg->contentTransferEncoding()->encoding(), KMime::Headers::CEbinary ); - QCOMPARE( msg->decodedText().toLatin1().data(), "Bla Bla Bla\n" ); + QCOMPARE( msg->decodedText().toLatin1().data(), "Bla Bla Bla" ); + QCOMPARE(msg->encodedBody().data(), "Bla Bla Bla\n"); // encodedContent() was crashing in this bug because of an invalid assert QVERIFY( !msg->encodedContent().isEmpty() ); // Make sure that the encodedContent() is sane, by parsing it again. KMime::Message msg2; msg2.setContent( msg->encodedContent() ); msg2.parse(); + QCOMPARE(msg2.encodedContent(), msg->encodedContent()); QCOMPARE( msg2.subject()->as7BitString().data(), "Subject: Blub" ); QCOMPARE( msg2.contents().size(), 0 ); QCOMPARE( msg2.contentTransferEncoding()->encoding(), KMime::Headers::CEbinary ); - QEXPECT_FAIL( "", "KMime adds an additional newline", Continue ); - QCOMPARE( msg2.decodedText().toLatin1().data(), "Bla Bla Bla\n" ); + QCOMPARE( msg2.decodedText().toLatin1().data(), "Bla Bla Bla" ); QCOMPARE( msg2.decodedText( true, true /* remove newlines at end */ ).toLatin1().data(), "Bla Bla Bla" ); } void MessageTest::testEncapsulatedMessages() { // // First, test some basic properties to check that the parsing was correct // KMime::Message::Ptr msg = readAndParseMail( "simple-encapsulated.mbox" ); QCOMPARE( msg->contentType()->mimeType().data(), "multipart/mixed" ); QCOMPARE( msg->contents().size(), 2 ); QVERIFY( msg->isTopLevel() ); KMime::Content * const textContent = msg->contents().at( 0 ); QCOMPARE( textContent->contentType()->mimeType().data(), "text/plain" ); QVERIFY( textContent->contents().isEmpty() ); QVERIFY( !textContent->bodyIsMessage() ); QVERIFY( !textContent->bodyAsMessage() ); QVERIFY( !textContent->isTopLevel() ); QCOMPARE( textContent->decodedText( true, true ), QString( "Hi Hans!\nLook at this interesting mail I forwarded to you!" ) ); QCOMPARE( textContent->index().toString().toLatin1().data(), "1" ); KMime::Content * messageContent = msg->contents().at( 1 ); QCOMPARE( messageContent->contentType()->mimeType().data(), "message/rfc822" ); QVERIFY( messageContent->body().isEmpty() ); QCOMPARE( messageContent->contents().count(), 1 ); QVERIFY( messageContent->bodyIsMessage() ); QVERIFY( messageContent->bodyAsMessage().get() ); QVERIFY( !messageContent->isTopLevel() ); QCOMPARE( messageContent->index().toString().toLatin1().data(), "2" ); KMime::Message::Ptr encapsulated = messageContent->bodyAsMessage(); QCOMPARE( encapsulated->contents().size(), 0 ); QCOMPARE( encapsulated->contentType()->mimeType().data(), "text/plain" ); QVERIFY( !encapsulated->bodyIsMessage() ); QVERIFY( !encapsulated->bodyAsMessage() ); QCOMPARE( encapsulated->subject()->as7BitString( false ).data(), "Foo" ); QCOMPARE( encapsulated->decodedText( false, false ), QString( "This is the encapsulated message body." ) ); QCOMPARE( encapsulated.get(), messageContent->bodyAsMessage().get() ); QCOMPARE( encapsulated.get(), messageContent->contents().first() ); QCOMPARE( encapsulated->parent(), messageContent ); QVERIFY( !encapsulated->isTopLevel() ); QCOMPARE( encapsulated->topLevel(), msg.get() ); QCOMPARE( encapsulated->index().toString().toLatin1().data(), "2.1" ); // Now test some misc functions QCOMPARE( msg->storageSize(), msg->head().size() + textContent->storageSize() + messageContent->storageSize() ); QCOMPARE( messageContent->storageSize(), messageContent->head().size() + encapsulated->storageSize() ); // Now change some properties on the encapsulated message encapsulated->subject()->fromUnicodeString( QString( "New subject" ), "us-ascii" ); encapsulated->fromUnicodeString( QString( "New body string." ) ); // Since we didn't assemble the encapsulated message yet, it should still have the old headers QVERIFY( encapsulated->encodedContent().contains( "Foo" ) ); QVERIFY( !encapsulated->encodedContent().contains( "New subject" ) ); // Now assemble the container message msg->assemble(); // Assembling the container message should have assembled the encapsulated message as well. QVERIFY( !encapsulated->encodedContent().contains( "Foo" ) ); QVERIFY( encapsulated->encodedContent().contains( "New subject" ) ); QCOMPARE( encapsulated->body().data(), "New body string." ); QVERIFY( msg->encodedContent().contains( encapsulated->body() ) ); QCOMPARE( msg->contentType()->mimeType().data(), "multipart/mixed" ); QCOMPARE( msg->contents().size(), 2 ); messageContent = msg->contents().at( 1 ); QCOMPARE( messageContent->contentType()->mimeType().data(), "message/rfc822" ); QVERIFY( encapsulated.get() == messageContent->bodyAsMessage().get() ); // Setting a new body and then parsing it should discard the encapsulated message messageContent->contentType()->setMimeType( "text/plain" ); messageContent->assemble(); messageContent->setBody( "Some new body" ); messageContent->parse(); QVERIFY( !messageContent->bodyIsMessage() ); QVERIFY( !messageContent->bodyAsMessage() ); QCOMPARE( messageContent->contents().size(), 0 ); } void MessageTest::testOutlookAttachmentNaming() { KMime::setUseOutlookAttachmentEncoding( true ); // Try and decode KMime::Message::Ptr msg = readAndParseMail( "outlook-attachment.mbox" ); QVERIFY( msg->attachments().count() == 1 ); KMime::Content *attachment = msg->contents()[1]; QCOMPARE( attachment->contentType( false )->mediaType().data(), "text" ); QCOMPARE( attachment->contentType( false )->subType().data(), "x-patch" ); Headers::ContentDisposition *cd = attachment->contentDisposition( false ); QVERIFY( cd ); QCOMPARE( cd->filename(), QString::fromUtf8( "å.diff" ) ); // Try and encode attachment->clear();// = new Content(); attachment->contentDisposition()->setDisposition( Headers::CDattachment ); attachment->contentDisposition()->setFilename( QString::fromUtf8( "å.diff" ) ); attachment->assemble(); kDebug() << "got:" << attachment->contentDisposition()->as7BitString( false ); QCOMPARE( attachment->contentDisposition()->as7BitString( false ), QByteArray( "attachment; filename=\"=?ISO-8859-1?Q?=E5=2Ediff?=\"" ) ); KMime::setUseOutlookAttachmentEncoding( false ); } +void MessageTest::testEncryptedMails() +{ + KMime::Message::Ptr msg = readAndParseMail("x-pkcs7.mbox"); + QVERIFY(msg->attachments().count() == 1); + QVERIFY(KMime::isEncrypted(msg.get()) == true); + QVERIFY(KMime::isInvitation(msg.get()) == false); + QVERIFY(KMime::isSigned(msg.get()) == false); +} + +void MessageTest::testCopyFlags() +{ + { + KMime::Message::Ptr msg = readAndParseMail("x-pkcs7.mbox"); + + Akonadi::Item item; + Akonadi::MessageFlags::copyMessageFlags(*msg, item); + + QVERIFY(item.hasFlag(Akonadi::MessageFlags::Signed) == false); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::Encrypted) == true); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::HasInvitation) == false); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::HasAttachment) == false); + } + + { + KMime::Message::Ptr msg = readAndParseMail("signed.mbox"); + + Akonadi::Item item; + Akonadi::MessageFlags::copyMessageFlags(*msg, item); + + QVERIFY(item.hasFlag(Akonadi::MessageFlags::Signed) == true); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::Encrypted) == false); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::HasInvitation) == true); + QVERIFY(item.hasFlag(Akonadi::MessageFlags::HasAttachment) == true); + } +} + +void MessageTest::testReturnSameMail() +{ + KMime::Message::Ptr msg = readAndParseMail("dontchangemail.mbox"); + QFile file(TEST_DATA_DIR"/mails/dontchangemail.mbox"); + const bool ok = file.open(QIODevice::ReadOnly); + if (!ok) { + qWarning() << file.fileName() << "not found"; + } + Q_ASSERT(ok); + QByteArray fileContent = file.readAll(); + QCOMPARE(msg->encodedContent(), fileContent); + QCOMPARE(msg->decodedText(), QLatin1String("")); + KMime::Message msg2; + msg2.setContent(msg->encodedContent()); + msg2.parse(); + QCOMPARE(msg2.encodedContent(), fileContent); +} + KMime::Message::Ptr MessageTest::readAndParseMail( const QString &mailFile ) const { QFile file( TEST_DATA_DIR"/mails/" + mailFile ); const bool ok = file.open( QIODevice::ReadOnly ); if ( !ok ) { qWarning() << file.fileName() << "not found"; } Q_ASSERT( ok ); const QByteArray data = KMime::CRLFtoLF( file.readAll() ); Q_ASSERT( !data.isEmpty() ); KMime::Message::Ptr msg( new KMime::Message ); msg->setContent( data ); msg->parse(); return msg; } diff --git a/kmime/tests/auto/messagetest.h b/kmime/tests/auto/messagetest.h index 6efac03fb..e7c5a4ae1 100644 --- a/kmime/tests/auto/messagetest.h +++ b/kmime/tests/auto/messagetest.h @@ -1,54 +1,57 @@ /* Copyright (c) 2007 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef MESSAGE_TEST_H #define MESSAGE_TEST_H #include #include #include class MessageTest : public QObject { Q_OBJECT private Q_SLOTS: void testMainBodyPart(); void testBrunosMultiAssembleBug(); void testWillsAndTillsCrash(); void testDavidsParseCrash(); void testHeaderFieldWithoutSpace(); void testWronglyFoldedHeaders(); void missingHeadersTest(); void testBug219749(); void testBidiSpoofing(); void testUtf16(); void testDecodedText(); void testInlineImages(); void testIssue3908(); void testIssue3914(); void testBug223509(); void testEncapsulatedMessages(); void testOutlookAttachmentNaming(); + void testEncryptedMails(); + void testCopyFlags(); + void testReturnSameMail(); private: KMime::Message::Ptr readAndParseMail( const QString &mailFile ) const; }; #endif diff --git a/kmime/tests/data/mails/dontchangemail.mbox b/kmime/tests/data/mails/dontchangemail.mbox new file mode 100644 index 000000000..e68b1459f --- /dev/null +++ b/kmime/tests/data/mails/dontchangemail.mbox @@ -0,0 +1,49 @@ +From: test@test.de +To: a@b.de +Subject: test +Mime-Version: 1.0 +Content-Type: multipart/signed; micalg=PGP-SHA1; + boundary="Sig_/bf9NRhujKrpGsHEluCGG1Ux"; protocol="application/pgp-signature" + +--Sig_/bf9NRhujKrpGsHEluCGG1Ux +Content-Type: multipart/mixed; boundary="MP_/H9EtWvl.za1Pp+XaxTBdQH8" + +--MP_/H9EtWvl.za1Pp+XaxTBdQH8 +Content-Type: text/plain; charset=US-ASCII +Content-Transfer-Encoding: quoted-printable +Content-Disposition: inline + +test + +--MP_/H9EtWvl.za1Pp+XaxTBdQH8 +Content-Type: text/plain +Content-Transfer-Encoding: quoted-printable +Content-Disposition: attachment; filename=poc.txt + +hdsswkrwerifjekdjvsze3diocfgh + +--MP_/H9EtWvl.za1Pp+XaxTBdQH8-- + +--Sig_/bf9NRhujKrpGsHEluCGG1Ux +Content-Type: application/pgp-signature; name=signature.asc +Content-Disposition: attachment; filename=signature.asc + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v2.0.22 (GNU/Linux) + +iQIcBAEBAgAGBQJTOSvMAAoJELtUlLP5ziidaY0P/1fhefScUXUoKB52HXx+uRZY +PAXw0RwIHbOG2V6dcKcC697/XG0xjRItpSkYuK0SzldFVrwarAs8RWqADfqYtL3z +40LRdUjldAP1PNOLyrBRdJv0TMNwu3waKZ2yug3d48dbmdlt5LU3PUEgY88odClg +0mP3oWtAp+gzINwJu3B3pW07kdcvyR165w8MnF0hKyXw9mEMrV0WB3pvUbg76Fym +LsG8haTu+U/qVGSs1NuUEXCPCFxqYo0MPjjjTG3i7yvF/9iBsHTM2kIgbJB5DbGN +HOy0yOxj1AI8wHNKxG+ylM9jYDAholKXbHJBQhI5LE1pVvdZG7svva1xCl7w0752 +C5bL866CJosH9a3qSJkCnJK20yEvJPFhDsA4HcTln1vo4oh9cNON9mYaheHCLCXP +N9cMmNMroC2nMW65IkOIkqUncaMRiH58Ykb/EWS9iPqXzyya7T8mCJed5kgb39Iw +IL1zK6S378nn9qDisGW51Lp4mkiabLMnE62XPoSDwR4zHG8ubd3uGmV1p1RvkETJ +ciWW31cXtEV80LtnaZzwtKrbqQ/nsvIAv5F6d+iQsh74npFgZJPbS9RS3JjUIHk7 +wnglbXaRq7POR7SssNZAM0x8ylFJ/XCgYKkj2w4WYw437IcNGDdpDXWzZ0CT19oC +Z/tmc6Ahb8f3J+8Uj+VU +=/zVi +-----END PGP SIGNATURE----- + +--Sig_/bf9NRhujKrpGsHEluCGG1Ux--