diff --git a/kpimutils/linklocator.cpp b/kpimutils/linklocator.cpp index 8b5a18dea..a98057597 100644 --- a/kpimutils/linklocator.cpp +++ b/kpimutils/linklocator.cpp @@ -1,458 +1,452 @@ /* Copyright (c) 2002 Dave Corrie This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @file This file is part of the KDEPIM Utilities library and provides the LinkLocator class. @brief Identifies URLs and email addresses embedded in plaintext. @author Dave Corrie \ */ #include "linklocator.h" #include #include #include #include #include #if KDE_IS_VERSION( 4, 0, 95 ) #include #endif #include #include #include #include #include using namespace KPIMUtils; /** Private class that helps to provide binary compatibility between releases. @internal */ //@cond PRIVATE class KPIMUtils::LinkLocator::Private { public: int mMaxUrlLen; int mMaxAddressLen; }; //@endcond #if KDE_IS_VERSION( 4, 0, 95 ) // Use a static for this as calls to the KEmoticons constructor are expensive. K_GLOBAL_STATIC( KEmoticons, sEmoticons ) #endif LinkLocator::LinkLocator( const QString &text, int pos ) : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private ) { d->mMaxUrlLen = 4096; d->mMaxAddressLen = 255; // If you change either of the above values for maxUrlLen or // maxAddressLen, then please also update the documentation for // setMaxUrlLen()/setMaxAddressLen() in the header file AND the // default values used for the maxUrlLen/maxAddressLen parameters // of convertToHtml(). } LinkLocator::~LinkLocator() { delete d; } void LinkLocator::setMaxUrlLen( int length ) { d->mMaxUrlLen = length; } int LinkLocator::maxUrlLen() const { return d->mMaxUrlLen; } void LinkLocator::setMaxAddressLen( int length ) { d->mMaxAddressLen = length; } int LinkLocator::maxAddressLen() const { return d->mMaxAddressLen; } QString LinkLocator::getUrl() { QString url; if ( atUrl() ) { // for reference: rfc1738: // Thus, only alphanumerics, the special characters "$-_.+!*'(),", and // reserved characters used for their reserved purposes may be used // unencoded within a URL. // NOTE: this implementation is not RFC conforming int start = mPos; while ( mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' && QString( "<>[]" ).indexOf( mText[mPos] ) == -1 ) { ++mPos; } // some URLs really end with: # / & - _ const QString allowedSpecialChars = QString( "#/&-_" ); while ( mPos > start && mText[mPos-1].isPunct() && allowedSpecialChars.indexOf( mText[mPos-1] ) == -1 ) { --mPos; } url = mText.mid( start, mPos - start ); if ( isEmptyUrl(url) || mPos - start > maxUrlLen() ) { mPos = start; url = ""; } else { --mPos; } } return url; } // keep this in sync with KMMainWin::slotUrlClicked() bool LinkLocator::atUrl() const { // the following characters are allowed in a dot-atom (RFC 2822): // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); // the character directly before the URL must not be a letter, a number or // any other character allowed in a dot-atom (RFC 2822). if ( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() || ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) { return false; } QChar ch = mText[mPos]; return ( ch == 'h' && ( mText.mid( mPos, 7 ) == "http://" || mText.mid( mPos, 8 ) == "https://" ) ) || ( ch == 'v' && mText.mid( mPos, 6 ) == "vnc://" ) || ( ch == 'f' && ( mText.mid( mPos, 7 ) == "fish://" || mText.mid( mPos, 6 ) == "ftp://" || mText.mid( mPos, 7 ) == "ftps://" ) ) || ( ch == 's' && ( mText.mid( mPos, 7 ) == "sftp://" || mText.mid( mPos, 6 ) == "smb://" ) ) || ( ch == 'm' && mText.mid( mPos, 7 ) == "mailto:" ) || ( ch == 'w' && mText.mid( mPos, 4 ) == "www." ) || ( ch == 'f' && ( mText.mid( mPos, 4 ) == "ftp." || mText.mid( mPos, 7 ) == "file://" ) ) || ( ch == 'n' && mText.mid( mPos, 5 ) == "news:" ); } bool LinkLocator::isEmptyUrl( const QString &url ) const { return url.isEmpty() || url == "http://" || url == "https://" || url == "fish://" || url == "ftp://" || url == "ftps://" || url == "sftp://" || url == "smb://" || url == "vnc://" || url == "mailto" || url == "www" || url == "ftp" || url == "news" || url == "news://"; } QString LinkLocator::getEmailAddress() { QString address; if ( mText[mPos] == '@' ) { // the following characters are allowed in a dot-atom (RFC 2822): // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); // determine the local part of the email address int start = mPos - 1; while ( start >= 0 && mText[start].unicode() < 128 && ( mText[start].isLetterOrNumber() || mText[start] == '@' || // allow @ to find invalid email addresses allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) { if ( mText[start] == '@' ) { return QString(); // local part contains '@' -> no email address } --start; } ++start; // we assume that an email address starts with a letter or a digit while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) { ++start; } if ( start == mPos ) { return QString(); // local part is empty -> no email address } // determine the domain part of the email address int dotPos = INT_MAX; int end = mPos + 1; while ( end < (int)mText.length() && ( mText[end].isLetterOrNumber() || mText[end] == '@' || // allow @ to find invalid email addresses mText[end] == '.' || mText[end] == '-' ) ) { if ( mText[end] == '@' ) { return QString(); // domain part contains '@' -> no email address } if ( mText[end] == '.' ) { dotPos = qMin( dotPos, end ); // remember index of first dot in domain } ++end; } // we assume that an email address ends with a letter or a digit while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) { --end; } if ( end == mPos ) { return QString(); // domain part is empty -> no email address } if ( dotPos >= end ) { return QString(); // domain part doesn't contain a dot } if ( end - start > maxAddressLen() ) { return QString(); // too long -> most likely no email address } address = mText.mid( start, end - start ); mPos = end - 1; } return address; } QString LinkLocator::convertToHtml( const QString &plainText, int flags, int maxUrlLen, int maxAddressLen ) { LinkLocator locator( plainText ); locator.setMaxUrlLen( maxUrlLen ); locator.setMaxAddressLen( maxAddressLen ); QString str; QString result( (QChar*)0, (int)locator.mText.length() * 2 ); QChar ch; int x; bool startOfLine = true; QString emoticon; for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++ ) { ch = locator.mText[locator.mPos]; if ( flags & PreserveSpaces ) { if ( ch == ' ' ) { - if ( startOfLine ) { - startOfLine = false; - } - - // The first space gets replaced by a normal space, the following ones by non-breaking spaces. - // The exception is if the first space is also the last character in this line, then we want - // a non-breaking space only (bug 204101) - // We can't make all spaces non-breaking, as then wordwrap wouldn't work anymore. if ( locator.mPos + 1 < locator.mText.length() ) { - if ( locator.mText[locator.mPos + 1] == '\n' ) { - // The first space in the sequence is the last space in the line, make in - // non-breaking or KHTML will not show it - result += " "; + if ( locator.mText[locator.mPos + 1] != ' ' ) { + + // A single space, make it breaking if not at the start or end of the line + const bool endOfLine = locator.mText[locator.mPos + 1] == '\n'; + if ( !startOfLine && !endOfLine ) + result += ' '; + else + result += " "; } else { - // This is the first space in a sequence of at least one, make it breaking - result += ' '; + + // Whitespace of more than one space, make it all non-breaking + while( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) { + result += " "; + locator.mPos++; + x++; + } + + // We incremented once to often, undo that + locator.mPos--; + x--; } } else { - // Space is the last char in the whole text, so it will be non-breaking + // Last space in the text, it is non-breaking result += " "; } - // Ok, we dealt with the first space now, all following spaces will be handled here - // and converted to non-breaking spaces - locator.mPos++; - x++; - while( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) { - result += " "; - locator.mPos++; - x++; + if ( startOfLine ) { + startOfLine = false; } - - // We incremented mPos and x once too much, so reverse that here - locator.mPos--; - x--; - continue; } else if ( ch == '\t' ) { do { result += " "; x++; } while ( ( x & 7 ) != 0 ); x--; startOfLine = false; continue; } } if ( ch == '\n' ) { result += "
\n"; // Keep the \n, so apps can figure out the quoting levels correctly. startOfLine = true; x = -1; continue; } startOfLine = false; if ( ch == '&' ) { result += "&"; } else if ( ch == '"' ) { result += """; } else if ( ch == '<' ) { result += "<"; } else if ( ch == '>' ) { result += ">"; } else { const int start = locator.mPos; if ( !( flags & IgnoreUrls ) ) { str = locator.getUrl(); if ( !str.isEmpty() ) { QString hyperlink; if ( str.left( 4 ) == "www." ) { hyperlink = "http://" + str; } else if ( str.left( 4 ) == "ftp." ) { hyperlink = "ftp://" + str; } else { hyperlink = str; } str = str.replace( '&', "&" ); result += "" + str + ""; x += locator.mPos - start; continue; } str = locator.getEmailAddress(); if ( !str.isEmpty() ) { // len is the length of the local part int len = str.indexOf( '@' ); QString localPart = str.left( len ); // remove the local part from the result (as '&'s have been expanded to // & we have to take care of the 4 additional characters per '&') result.truncate( result.length() - len - ( localPart.count( '&' ) * 4 ) ); x -= len; result += "" + str + ""; x += str.length() - 1; continue; } } if ( flags & HighlightText ) { str = locator.highlightedText(); if ( !str.isEmpty() ) { result += str; x += locator.mPos - start; continue; } } result += ch; } } #if KDE_IS_VERSION( 4, 0, 95 ) if ( flags & ReplaceSmileys ) { QStringList exclude; exclude << "(c)" << "(C)" << ">:-(" << ">:(" << "(B)" << "(b)" << "(P)" << "(p)"; exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)"; exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)"; exclude << "(g)" << "(F)" << "(f)" << "(H)"; exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)"; static QString cachedEmoticonsThemeName; if ( cachedEmoticonsThemeName.isEmpty() ) { cachedEmoticonsThemeName = KEmoticons::currentThemeName(); } result = sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons( result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude ); } #endif return result; } QString LinkLocator::pngToDataUrl( const QString &iconPath ) { if ( iconPath.isEmpty() ) { return QString(); } QFile pngFile( iconPath ); if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) { return QString(); } QByteArray ba = pngFile.readAll(); pngFile.close(); return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() ); } QString LinkLocator::highlightedText() { // formating symbols must be prepended with a whitespace if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) { return QString(); } const QChar ch = mText[mPos]; if ( ch != '/' && ch != '*' && ch != '_' ) { return QString(); } QRegExp re = QRegExp( QString( "\\%1([0-9A-Za-z]+)\\%2" ).arg( ch ).arg( ch ) ); if ( re.indexIn( mText, mPos ) == mPos ) { int length = re.matchedLength(); // there must be a whitespace after the closing formating symbol if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) { return QString(); } mPos += length - 1; switch ( ch.toLatin1() ) { case '*': return "" + re.cap( 1 ) + ""; case '_': return "" + re.cap( 1 ) + ""; case '/': return "" + re.cap( 1 ) + ""; } } return QString(); } diff --git a/kpimutils/tests/testlinklocator.cpp b/kpimutils/tests/testlinklocator.cpp index b8b6a9c50..da913a583 100644 --- a/kpimutils/tests/testlinklocator.cpp +++ b/kpimutils/tests/testlinklocator.cpp @@ -1,226 +1,230 @@ /* This file is part of the kpimutils library. Copyright (C) 2005 Ingo Kloecker Copyright (C) 2007 Allen Winter This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "testlinklocator.h" #include "testlinklocator.moc" QTEST_KDEMAIN( LinkLocatorTest, NoGUI ) #include "kpimutils/linklocator.h" using namespace KPIMUtils; void LinkLocatorTest::testGetEmailAddress() { // empty input const QString emptyQString; LinkLocator ll1( emptyQString, 0 ); QVERIFY( ll1.getEmailAddress().isEmpty() ); // no '@' at scan position LinkLocator ll2( "foo@bar.baz", 0 ); QVERIFY( ll2.getEmailAddress().isEmpty() ); // '@' in local part LinkLocator ll3( "foo@bar@bar.baz", 7 ); QVERIFY( ll3.getEmailAddress().isEmpty() ); // empty local part LinkLocator ll4( "@bar.baz", 0 ); QVERIFY( ll4.getEmailAddress().isEmpty() ); LinkLocator ll5( ".@bar.baz", 1 ); QVERIFY( ll5.getEmailAddress().isEmpty() ); LinkLocator ll6( " @bar.baz", 1 ); QVERIFY( ll6.getEmailAddress().isEmpty() ); LinkLocator ll7( ".!#$%&'*+-/=?^_`{|}~@bar.baz", strlen( ".!#$%&'*+-/=?^_`{|}~" ) ); QVERIFY( ll7.getEmailAddress().isEmpty() ); // allowed special chars in local part of address LinkLocator ll8( "a.!#$%&'*+-/=?^_`{|}~@bar.baz", strlen( "a.!#$%&'*+-/=?^_`{|}~" ) ); QVERIFY( ll8.getEmailAddress() == "a.!#$%&'*+-/=?^_`{|}~@bar.baz" ); // '@' in domain part LinkLocator ll9 ( "foo@bar@bar.baz", 3 ); QVERIFY( ll9.getEmailAddress().isEmpty() ); // domain part without dot LinkLocator lla( "foo@bar", 3 ); QVERIFY( lla.getEmailAddress().isEmpty() ); LinkLocator llb( "foo@bar.", 3 ); QVERIFY( llb.getEmailAddress().isEmpty() ); LinkLocator llc( ".foo@bar", 4 ); QVERIFY( llc.getEmailAddress().isEmpty() ); LinkLocator lld( "foo@bar ", 3 ); QVERIFY( lld.getEmailAddress().isEmpty() ); LinkLocator lle( " foo@bar", 4 ); QVERIFY( lle.getEmailAddress().isEmpty() ); LinkLocator llf( "foo@bar-bar", 3 ); QVERIFY( llf.getEmailAddress().isEmpty() ); // empty domain part LinkLocator llg( "foo@", 3 ); QVERIFY( llg.getEmailAddress().isEmpty() ); LinkLocator llh( "foo@.", 3 ); QVERIFY( llh.getEmailAddress().isEmpty() ); LinkLocator lli( "foo@-", 3 ); QVERIFY( lli.getEmailAddress().isEmpty() ); // simple address LinkLocator llj( "foo@bar.baz", 3 ); QVERIFY( llj.getEmailAddress() == "foo@bar.baz" ); LinkLocator llk( "foo@bar.baz.", 3 ); QVERIFY( llk.getEmailAddress() == "foo@bar.baz" ); LinkLocator lll( ".foo@bar.baz", 4 ); QVERIFY( lll.getEmailAddress() == "foo@bar.baz" ); LinkLocator llm( "foo@bar.baz-", 3 ); QVERIFY( llm.getEmailAddress() == "foo@bar.baz" ); LinkLocator lln( "-foo@bar.baz", 4 ); QVERIFY( lln.getEmailAddress() == "foo@bar.baz" ); LinkLocator llo( "foo@bar.baz ", 3 ); QVERIFY( llo.getEmailAddress() == "foo@bar.baz" ); LinkLocator llp( " foo@bar.baz", 4 ); QVERIFY( llp.getEmailAddress() == "foo@bar.baz" ); LinkLocator llq( "foo@bar-bar.baz", 3 ); QVERIFY( llq.getEmailAddress() == "foo@bar-bar.baz" ); } void LinkLocatorTest::testGetUrl() { QStringList brackets; brackets << "" << ""; // no brackets brackets << "(" << ")"; brackets << "<" << ">"; brackets << "[" << "]"; brackets << "" << ""; for (int i = 0; i < brackets.count(); i += 2) testGetUrl2(brackets[i], brackets[i+1]); } void LinkLocatorTest::testGetUrl2(const QString &left, const QString &right) { QStringList schemas; schemas << "http://"; schemas << "https://"; schemas << "vnc://"; schemas << "fish://"; schemas << "ftp://"; schemas << "ftps://"; schemas << "sftp://"; schemas << "smb://"; schemas << "file://"; QStringList urls; urls << "www.kde.org"; urls << "user@www.kde.org"; urls << "user:pass@www.kde.org"; urls << "user:pass@www.kde.org:1234"; urls << "user:pass@www.kde.org:1234/sub/path"; urls << "user:pass@www.kde.org:1234/sub/path?a=1"; urls << "user:pass@www.kde.org:1234/sub/path?a=1#anchor"; urls << "user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor"; urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"; foreach (QString schema, schemas) { foreach (QString url, urls) { QString test(left + schema + url + right); LinkLocator ll(test, left.length()); QString gotUrl = ll.getUrl(); bool ok = ( gotUrl == (schema + url) ); //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (schema + url); QVERIFY2( ok, qPrintable(test) ); } } QStringList urlsWithoutSchema; urlsWithoutSchema << ".kde.org"; urlsWithoutSchema << ".kde.org:1234/sub/path"; urlsWithoutSchema << ".kde.org:1234/sub/path?a=1"; urlsWithoutSchema << ".kde.org:1234/sub/path?a=1#anchor"; urlsWithoutSchema << ".kde.org:1234/sub/path/special(123)?a=1#anchor"; urlsWithoutSchema << ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"; QStringList starts; starts << "www" << "ftp" << "news:www"; foreach (QString start, starts) { foreach (QString url, urlsWithoutSchema) { QString test(left + start + url + right); LinkLocator ll(test, left.length()); QString gotUrl = ll.getUrl(); bool ok = ( gotUrl == (start + url) ); //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (start + url); QVERIFY2( ok, qPrintable(test) ); } } // mailto { QString addr = "mailto:test@kde.org"; QString test(left + addr + right); LinkLocator ll(test, left.length()); QString gotUrl = ll.getUrl(); bool ok = ( gotUrl == addr ); //qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << addr; QVERIFY2( ok, qPrintable(test) ); } } void LinkLocatorTest::testHtmlConvert_data() { QTest::addColumn("plainText"); QTest::addColumn("flags"); QTest::addColumn("htmlText"); - QTest::newRow( "" ) << "foo" << 0 << "foo"; - QTest::newRow( "" ) << " foo " << 0 << " foo "; + //QTest::newRow( "" ) << "foo" << 0 << "foo"; + //QTest::newRow( "" ) << " foo " << 0 << " foo "; // Linker error when using PreserveSpaces, therefore the hardcoded 0x01 - QTest::newRow( "" ) << " foo" << 0x01 << " foo"; - QTest::newRow( "" ) << " foo" << 0x01 << "  foo"; - QTest::newRow( "" ) << " foo " << 0x01 << "  foo  "; - QTest::newRow( "" ) << " foo " << 0x01 << "  foo "; + QTest::newRow( "" ) << " foo" << 0x01 << " foo"; + QTest::newRow( "" ) << " foo" << 0x01 << "  foo"; + QTest::newRow( "" ) << " foo " << 0x01 << "  foo  "; + QTest::newRow( "" ) << " foo " << 0x01 << "  foo "; QTest::newRow( "" ) << "bla bla bla bla bla" << 0x01 << "bla bla bla bla bla"; QTest::newRow( "" ) << "bla bla bla \n bla bla bla " << 0x01 - << "bla bla bla 
\n  bla bla bla "; + << "bla bla bla 
\n  bla bla bla "; + QTest::newRow( "" ) << "bla bla bla" << 0x01 + << "bla bla  bla"; + QTest::newRow( "" ) << " bla bla \n bla bla a\n bla bla " << 0x01 + << " bla bla 
\n bla bla a
\n  bla bla "; } void LinkLocatorTest::testHtmlConvert() { QFETCH(QString, plainText); QFETCH(int, flags); QFETCH(QString, htmlText); QString actualHtml = LinkLocator::convertToHtml( plainText, flags ); QCOMPARE( actualHtml, htmlText ); }