diff --git a/kmime/kmime_charfreq.cpp b/kmime/kmime_charfreq.cpp index 6880a2417..053a3f8b1 100644 --- a/kmime/kmime_charfreq.cpp +++ b/kmime/kmime_charfreq.cpp @@ -1,252 +1,252 @@ /* kmime_charfreq.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001-2002 Marc Mutz This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @file This file is part of the API for handling MIME data and defines the CharFreq class. @brief Defines the CharFreq class. @authors Marc Mutz \ */ #include "kmime_charfreq.h" using namespace KMime; /** * Private class that helps to provide binary compatibility between releases. * @internal */ //@cond PRIVATE //class KMime::CharFreq::Private //{ // public: //}; //@endcond CharFreq::CharFreq( const QByteArray &buf ) : mNUL( 0 ), mCTL( 0 ), mCR( 0 ), mLF( 0 ), mCRLF( 0 ), mPrintable( 0 ), mEightBit( 0 ), mTotal( 0 ), mLineMin( 0xffffffff ), mLineMax( 0 ), mTrailingWS( false ), mLeadingFrom( false ) { if ( !buf.isEmpty() ) { count( buf.data(), buf.size() ); } } CharFreq::CharFreq( const char *buf, size_t len ) : mNUL( 0 ), mCTL( 0 ), mCR( 0 ), mLF( 0 ), mCRLF( 0 ), mPrintable( 0 ), mEightBit( 0 ), mTotal( 0 ), mLineMin( 0xffffffff ), mLineMax( 0 ), mTrailingWS( false ), mLeadingFrom( false ) { if ( buf && len > 0 ) { count( buf, len ); } } //@cond PRIVATE static inline bool isWS( char ch ) { return ( ch == '\t' || ch == ' ' ); } //@endcond void CharFreq::count( const char *it, size_t len ) { const char *end = it + len; uint currentLineLength = 0; // initialize the prevChar with LF so that From_ detection works w/o // special-casing: char prevChar = '\n'; char prevPrevChar = 0; for ( ; it != end ; ++it ) { ++currentLineLength; switch ( *it ) { case '\0': ++mNUL; break; case '\r': ++mCR; break; case '\n': ++mLF; if ( prevChar == '\r' ) { --currentLineLength; ++mCRLF; } if ( currentLineLength >= mLineMax ) { mLineMax = currentLineLength-1; } if ( currentLineLength <= mLineMin ) { mLineMin = currentLineLength-1; } if ( !mTrailingWS ) { if ( isWS( prevChar ) || ( prevChar == '\r' && isWS( prevPrevChar ) ) ) { mTrailingWS = true; } } currentLineLength = 0; break; case 'F': // check for lines starting with From_ if not found already: if ( !mLeadingFrom ) { if ( prevChar == '\n' && end - it >= 5 && !qstrncmp( "From ", it, 5 ) ) { mLeadingFrom = true; } } ++mPrintable; break; default: { uchar c = *it; if ( c == '\t' || ( c >= ' ' && c <= '~' ) ) { ++mPrintable; } else if ( c == 127 || c < ' ' ) { ++mCTL; } else { ++mEightBit; } } } prevPrevChar = prevChar; prevChar = *it; } // consider the length of the last line if ( currentLineLength >= mLineMax ) { mLineMax = currentLineLength; } if ( currentLineLength <= mLineMin ) { mLineMin = currentLineLength; } // check whether the last character is tab or space if ( isWS( prevChar ) ) { mTrailingWS = true; } mTotal = len; } bool CharFreq::isEightBitData() const { return type() == EightBitData; } bool CharFreq::isEightBitText() const { return type() == EightBitText; } bool CharFreq::isSevenBitData() const { return type() == SevenBitData; } bool CharFreq::isSevenBitText() const { return type() == SevenBitText; } bool CharFreq::hasTrailingWhitespace() const { return mTrailingWS; } bool CharFreq::hasLeadingFrom() const { return mLeadingFrom; } CharFreq::Type CharFreq::type() const { #if 0 qDebug( "Total: %d; NUL: %d; CTL: %d;\n" "CR: %d; LF: %d; CRLF: %d;\n" "lineMin: %d; lineMax: %d;\n" "printable: %d; eightBit: %d;\n" "trailing whitespace: %s;\n" "leading 'From ': %s;\n", total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax, printable, eightBit, mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" ); #endif if ( mNUL ) { // must be binary return Binary; } // doesn't contain NUL's: if ( mEightBit ) { if ( mLineMax > 988 ) { return EightBitData; // not allowed in 8bit } if ( mLF != mCRLF || mCR != mCRLF || controlCodesRatio() > 0.2 ) { return EightBitData; } return EightBitText; } // doesn't contain NUL's, nor 8bit chars: if ( mLineMax > 988 ) { return SevenBitData; } - if ( mLF != mCRLF || mCR != mCRLF || controlCodesRatio() > 0.2 ) { + if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) { return SevenBitData; } // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars: return SevenBitText; } float CharFreq::printableRatio() const { if ( mTotal ) { return float(mPrintable) / float(mTotal); } else { return 0; } } float CharFreq::controlCodesRatio() const { if ( mTotal ) { return float(mCTL) / float(mTotal); } else { return 0; } } diff --git a/kmime/tests/kmime_charfreq_test.cpp b/kmime/tests/kmime_charfreq_test.cpp index fc6d9299b..4067821c1 100644 --- a/kmime/tests/kmime_charfreq_test.cpp +++ b/kmime/tests/kmime_charfreq_test.cpp @@ -1,154 +1,161 @@ /* Copyright (c) 2009 Constantin Berzan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_charfreq_test.h" #include #include #include using namespace KMime; QTEST_KDEMAIN( KMimeCharFreqTest, NoGUI ) void KMimeCharFreqTest::test8bitData() { { // If it has NUL then it's Binary (equivalent to EightBitData in CharFreq). QByteArray data( "123" ); data += char( 0 ); data += "test"; kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::Binary ); } { // If it has lines longer than 998, it's EightBitData. QByteArray data; for( int i = 0; i < 999; i++ ) { data += char( 169 ); } kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::EightBitData ); } { // If #CR != #CRLF then it's EightBitData. QByteArray data( "©line1\r\nline2\r" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::EightBitData ); } { // If #LF != #CRLF then it's EightBitData. QByteArray data( "©line1\r\nline2\n" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::EightBitData ); } { // If it has a lot of control chars, it's EightBitData. QByteArray data( "©test\a\a\a\a\a\a\a" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::EightBitData ); } } void KMimeCharFreqTest::test8bitText() { { // If it has no NULs, few CTLs, no stray CRs or LFs, it's EightBitText. QByteArray data( "©beware the beast but enjoy the feast he offers...\r\n" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::EightBitText ); } } void KMimeCharFreqTest::test7bitData() { { // If it has lines longer than 998, it's SevenBitData. QByteArray data; for( int i = 0; i < 999; i++ ) { data += 'a'; } kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::SevenBitData ); } { // If #CR != #CRLF then it's SevenBitData. QByteArray data( "line1\r\nline2\r" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::SevenBitData ); } { // If #LF != #CRLF then it's SevenBitData. QByteArray data( "line1\r\nline2\n" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::SevenBitData ); } + { + // If the text only contains newlines, then it is SevenBitText + QByteArray data( "line1\nline2\n" ); + CharFreq cf( data ); + QCOMPARE( cf.type(), CharFreq::SevenBitText ); + } + { // If it has a lot of control chars, it's SevenBitData. QByteArray data( "test\a\a\a\a\a\a\a" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::SevenBitData ); } } void KMimeCharFreqTest::test7bitText() { { // If it has no NULs, few CTLs, no stray CRs or LFs, it's SevenBitText. QByteArray data( "beware the beast but enjoy the feast he offers...\r\n" ); kDebug() << data; CharFreq cf( data ); QCOMPARE( cf.type(), CharFreq::SevenBitText ); } } void KMimeCharFreqTest::testTrailingWhitespace() { QByteArray data( "test " ); kDebug() << data; CharFreq cf( data ); QVERIFY( cf.hasTrailingWhitespace() ); } void KMimeCharFreqTest::testLeadingFrom() { QByteArray data( "From here thither" ); kDebug() << data; CharFreq cf( data ); QVERIFY( cf.hasLeadingFrom() ); } #include "kmime_charfreq_test.moc"