diff --git a/kpimutils/linklocator.cpp b/kpimutils/linklocator.cpp index 22cb4fb73..86496ee27 100644 --- a/kpimutils/linklocator.cpp +++ b/kpimutils/linklocator.cpp @@ -1,430 +1,434 @@ /* Copyright (c) 2002 Dave Corrie This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @file This file is part of the KDEPIM Utilities library and provides the LinkLocator class. @brief Identifies URLs and email addresses embedded in plaintext. @author Dave Corrie \ */ #include "linklocator.h" #include #include #include #include #include #if KDE_IS_VERSION( 4, 0, 95 ) #include #endif #include #include #include #include #include using namespace KPIMUtils; /** Private class that helps to provide binary compatibility between releases. @internal */ //@cond PRIVATE class KPIMUtils::LinkLocator::Private { public: int mMaxUrlLen; int mMaxAddressLen; }; //@endcond #if KDE_IS_VERSION( 4, 0, 95 ) // Use a static for this as calls to the KEmoticons constructor are expensive. K_GLOBAL_STATIC( KEmoticons, sEmoticons ) #endif LinkLocator::LinkLocator( const QString &text, int pos ) : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private ) { d->mMaxUrlLen = 4096; d->mMaxAddressLen = 255; // If you change either of the above values for maxUrlLen or // maxAddressLen, then please also update the documentation for // setMaxUrlLen()/setMaxAddressLen() in the header file AND the // default values used for the maxUrlLen/maxAddressLen parameters // of convertToHtml(). } LinkLocator::~LinkLocator() { delete d; } void LinkLocator::setMaxUrlLen( int length ) { d->mMaxUrlLen = length; } int LinkLocator::maxUrlLen() const { return d->mMaxUrlLen; } void LinkLocator::setMaxAddressLen( int length ) { d->mMaxAddressLen = length; } int LinkLocator::maxAddressLen() const { return d->mMaxAddressLen; } QString LinkLocator::getUrl() { QString url; if ( atUrl() ) { - // handle cases like this: http://foobar.org/ + // for reference: rfc1738: + // Thus, only alphanumerics, the special characters "$-_.+!*'(),", and + // reserved characters used for their reserved purposes may be used + // unencoded within a URL. + // NOTE: this implementation is not RFC conforming int start = mPos; while ( mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' && - QString( "<>()[]" ).indexOf( mText[mPos] ) == -1 ) { + QString( "<>[]" ).indexOf( mText[mPos] ) == -1 ) { ++mPos; } - /* some URLs really end with: # / & - _ */ + // some URLs really end with: # / & - _ const QString allowedSpecialChars = QString( "#/&-_" ); while ( mPos > start && mText[mPos-1].isPunct() && allowedSpecialChars.indexOf( mText[mPos-1] ) == -1 ) { --mPos; } url = mText.mid( start, mPos - start ); if ( isEmptyUrl(url) || mPos - start > maxUrlLen() ) { mPos = start; url = ""; } else { --mPos; } } return url; } // keep this in sync with KMMainWin::slotUrlClicked() bool LinkLocator::atUrl() const { // the following characters are allowed in a dot-atom (RFC 2822): // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); // the character directly before the URL must not be a letter, a number or // any other character allowed in a dot-atom (RFC 2822). if ( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() || ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) { return false; } QChar ch = mText[mPos]; return ( ch == 'h' && ( mText.mid( mPos, 7 ) == "http://" || mText.mid( mPos, 8 ) == "https://" ) ) || ( ch == 'v' && mText.mid( mPos, 6 ) == "vnc://" ) || ( ch == 'f' && ( mText.mid( mPos, 7 ) == "fish://" || mText.mid( mPos, 6 ) == "ftp://" || mText.mid( mPos, 7 ) == "ftps://" ) ) || ( ch == 's' && ( mText.mid( mPos, 7 ) == "sftp://" || mText.mid( mPos, 6 ) == "smb://" ) ) || ( ch == 'm' && mText.mid( mPos, 7 ) == "mailto:" ) || ( ch == 'w' && mText.mid( mPos, 4 ) == "www." ) || ( ch == 'f' && ( mText.mid( mPos, 4 ) == "ftp." || mText.mid( mPos, 7 ) == "file://" ) ) || ( ch == 'n' && mText.mid( mPos, 5 ) == "news:" ); } bool LinkLocator::isEmptyUrl( const QString &url ) const { return url.isEmpty() || url == "http://" || url == "https://" || url == "fish://" || url == "ftp://" || url == "ftps://" || url == "sftp://" || url == "smb://" || url == "vnc://" || url == "mailto" || url == "www" || url == "ftp" || url == "news" || url == "news://"; } QString LinkLocator::getEmailAddress() { QString address; if ( mText[mPos] == '@' ) { // the following characters are allowed in a dot-atom (RFC 2822): // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); // determine the local part of the email address int start = mPos - 1; while ( start >= 0 && mText[start].unicode() < 128 && ( mText[start].isLetterOrNumber() || mText[start] == '@' || // allow @ to find invalid email addresses allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) { if ( mText[start] == '@' ) { return QString(); // local part contains '@' -> no email address } --start; } ++start; // we assume that an email address starts with a letter or a digit while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) { ++start; } if ( start == mPos ) { return QString(); // local part is empty -> no email address } // determine the domain part of the email address int dotPos = INT_MAX; int end = mPos + 1; while ( end < (int)mText.length() && ( mText[end].isLetterOrNumber() || mText[end] == '@' || // allow @ to find invalid email addresses mText[end] == '.' || mText[end] == '-' ) ) { if ( mText[end] == '@' ) { return QString(); // domain part contains '@' -> no email address } if ( mText[end] == '.' ) { dotPos = qMin( dotPos, end ); // remember index of first dot in domain } ++end; } // we assume that an email address ends with a letter or a digit while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) { --end; } if ( end == mPos ) { return QString(); // domain part is empty -> no email address } if ( dotPos >= end ) { return QString(); // domain part doesn't contain a dot } if ( end - start > maxAddressLen() ) { return QString(); // too long -> most likely no email address } address = mText.mid( start, end - start ); mPos = end - 1; } return address; } QString LinkLocator::convertToHtml( const QString &plainText, int flags, int maxUrlLen, int maxAddressLen ) { LinkLocator locator( plainText ); locator.setMaxUrlLen( maxUrlLen ); locator.setMaxAddressLen( maxAddressLen ); QString str; QString result( (QChar*)0, (int)locator.mText.length() * 2 ); QChar ch; int x; bool startOfLine = true; QString emoticon; for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++ ) { ch = locator.mText[locator.mPos]; if ( flags & PreserveSpaces ) { if ( ch == ' ' ) { if ( startOfLine ) { result += " "; locator.mPos++, x++; startOfLine = false; } while ( locator.mText[locator.mPos] == ' ' ) { result += ' '; locator.mPos++, x++; if ( locator.mText[locator.mPos] == ' ' ) { result += " "; locator.mPos++, x++; } } locator.mPos--, x--; continue; } else if ( ch == '\t' ) { do { result += " "; x++; } while ( ( x & 7 ) != 0 ); x--; startOfLine = false; continue; } } if ( ch == '\n' ) { result += "
\n"; // Keep the \n, so apps can figure out the quoting levels correctly. startOfLine = true; x = -1; continue; } startOfLine = false; if ( ch == '&' ) { result += "&"; } else if ( ch == '"' ) { result += """; } else if ( ch == '<' ) { result += "<"; } else if ( ch == '>' ) { result += ">"; } else { const int start = locator.mPos; if ( !( flags & IgnoreUrls ) ) { str = locator.getUrl(); if ( !str.isEmpty() ) { QString hyperlink; if ( str.left( 4 ) == "www." ) { hyperlink = "http://" + str; } else if ( str.left( 4 ) == "ftp." ) { hyperlink = "ftp://" + str; } else { hyperlink = str; } str = str.replace( '&', "&" ); result += "" + str + ""; x += locator.mPos - start; continue; } str = locator.getEmailAddress(); if ( !str.isEmpty() ) { // len is the length of the local part int len = str.indexOf( '@' ); QString localPart = str.left( len ); // remove the local part from the result (as '&'s have been expanded to // & we have to take care of the 4 additional characters per '&') result.truncate( result.length() - len - ( localPart.count( '&' ) * 4 ) ); x -= len; result += "" + str + ""; x += str.length() - 1; continue; } } if ( flags & HighlightText ) { str = locator.highlightedText(); if ( !str.isEmpty() ) { result += str; x += locator.mPos - start; continue; } } result += ch; } } #if KDE_IS_VERSION( 4, 0, 95 ) if ( flags & ReplaceSmileys ) { QStringList exclude; exclude << "(c)" << "(C)" << ">:-(" << ">:(" << "(B)" << "(b)" << "(P)" << "(p)"; exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)"; exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)"; exclude << "(g)" << "(F)" << "(f)" << "(H)"; exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)"; static QString cachedEmoticonsThemeName; if ( cachedEmoticonsThemeName.isEmpty() ) { cachedEmoticonsThemeName = KEmoticons::currentThemeName(); } result = sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons( result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude ); } #endif return result; } QString LinkLocator::pngToDataUrl( const QString &iconPath ) { if ( iconPath.isEmpty() ) { return QString(); } QFile pngFile( iconPath ); if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) { return QString(); } QByteArray ba = pngFile.readAll(); pngFile.close(); return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() ); } QString LinkLocator::highlightedText() { // formating symbols must be prepended with a whitespace if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) { return QString(); } const QChar ch = mText[mPos]; if ( ch != '/' && ch != '*' && ch != '_' ) { return QString(); } QRegExp re = QRegExp( QString( "\\%1([0-9A-Za-z]+)\\%2" ).arg( ch ).arg( ch ) ); if ( re.indexIn( mText, mPos ) == mPos ) { int length = re.matchedLength(); // there must be a whitespace after the closing formating symbol if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) { return QString(); } mPos += length - 1; switch ( ch.toLatin1() ) { case '*': return "" + re.cap( 1 ) + ""; case '_': return "" + re.cap( 1 ) + ""; case '/': return "" + re.cap( 1 ) + ""; } } return QString(); } diff --git a/kpimutils/tests/testlinklocator.cpp b/kpimutils/tests/testlinklocator.cpp index 46489f69d..258a85fb2 100644 --- a/kpimutils/tests/testlinklocator.cpp +++ b/kpimutils/tests/testlinklocator.cpp @@ -1,106 +1,196 @@ /* This file is part of the kpimutils library. Copyright (C) 2005 Ingo Kloecker Copyright (C) 2007 Allen Winter This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include +#include #include "testlinklocator.h" #include "testlinklocator.moc" QTEST_KDEMAIN( LinkLocatorTest, NoGUI ) #include "kpimutils/linklocator.h" using namespace KPIMUtils; void LinkLocatorTest::testGetEmailAddress() { // empty input const QString emptyQString; LinkLocator ll1( emptyQString, 0 ); QVERIFY( ll1.getEmailAddress().isEmpty() ); // no '@' at scan position LinkLocator ll2( "foo@bar.baz", 0 ); QVERIFY( ll2.getEmailAddress().isEmpty() ); // '@' in local part LinkLocator ll3( "foo@bar@bar.baz", 7 ); QVERIFY( ll3.getEmailAddress().isEmpty() ); // empty local part LinkLocator ll4( "@bar.baz", 0 ); QVERIFY( ll4.getEmailAddress().isEmpty() ); LinkLocator ll5( ".@bar.baz", 1 ); QVERIFY( ll5.getEmailAddress().isEmpty() ); LinkLocator ll6( " @bar.baz", 1 ); QVERIFY( ll6.getEmailAddress().isEmpty() ); LinkLocator ll7( ".!#$%&'*+-/=?^_`{|}~@bar.baz", strlen( ".!#$%&'*+-/=?^_`{|}~" ) ); QVERIFY( ll7.getEmailAddress().isEmpty() ); // allowed special chars in local part of address LinkLocator ll8( "a.!#$%&'*+-/=?^_`{|}~@bar.baz", strlen( "a.!#$%&'*+-/=?^_`{|}~" ) ); QVERIFY( ll8.getEmailAddress() == "a.!#$%&'*+-/=?^_`{|}~@bar.baz" ); // '@' in domain part LinkLocator ll9 ( "foo@bar@bar.baz", 3 ); QVERIFY( ll9.getEmailAddress().isEmpty() ); // domain part without dot LinkLocator lla( "foo@bar", 3 ); QVERIFY( lla.getEmailAddress().isEmpty() ); LinkLocator llb( "foo@bar.", 3 ); QVERIFY( llb.getEmailAddress().isEmpty() ); LinkLocator llc( ".foo@bar", 4 ); QVERIFY( llc.getEmailAddress().isEmpty() ); LinkLocator lld( "foo@bar ", 3 ); QVERIFY( lld.getEmailAddress().isEmpty() ); LinkLocator lle( " foo@bar", 4 ); QVERIFY( lle.getEmailAddress().isEmpty() ); LinkLocator llf( "foo@bar-bar", 3 ); QVERIFY( llf.getEmailAddress().isEmpty() ); // empty domain part LinkLocator llg( "foo@", 3 ); QVERIFY( llg.getEmailAddress().isEmpty() ); LinkLocator llh( "foo@.", 3 ); QVERIFY( llh.getEmailAddress().isEmpty() ); LinkLocator lli( "foo@-", 3 ); QVERIFY( lli.getEmailAddress().isEmpty() ); // simple address LinkLocator llj( "foo@bar.baz", 3 ); QVERIFY( llj.getEmailAddress() == "foo@bar.baz" ); LinkLocator llk( "foo@bar.baz.", 3 ); QVERIFY( llk.getEmailAddress() == "foo@bar.baz" ); LinkLocator lll( ".foo@bar.baz", 4 ); QVERIFY( lll.getEmailAddress() == "foo@bar.baz" ); LinkLocator llm( "foo@bar.baz-", 3 ); QVERIFY( llm.getEmailAddress() == "foo@bar.baz" ); LinkLocator lln( "-foo@bar.baz", 4 ); QVERIFY( lln.getEmailAddress() == "foo@bar.baz" ); LinkLocator llo( "foo@bar.baz ", 3 ); QVERIFY( llo.getEmailAddress() == "foo@bar.baz" ); LinkLocator llp( " foo@bar.baz", 4 ); QVERIFY( llp.getEmailAddress() == "foo@bar.baz" ); LinkLocator llq( "foo@bar-bar.baz", 3 ); QVERIFY( llq.getEmailAddress() == "foo@bar-bar.baz" ); } +void LinkLocatorTest::testGetUrl() +{ + QStringList brackets; + brackets << "" << ""; // no brackets + brackets << "(" << ")"; + brackets << "<" << ">"; + brackets << "[" << "]"; + brackets << "" << ""; + + for (int i = 0; i < brackets.count(); i += 2) + testGetUrl2(brackets[i], brackets[i+1]); +} + +void LinkLocatorTest::testGetUrl2(const QString &left, const QString &right) +{ + QStringList schemas; + schemas << "http://"; + schemas << "https://"; + schemas << "vnc://"; + schemas << "fish://"; + schemas << "ftp://"; + schemas << "ftps://"; + schemas << "sftp://"; + schemas << "smb://"; + schemas << "file://"; + + QStringList urls; + urls << "www.kde.org"; + urls << "user@www.kde.org"; + urls << "user:pass@www.kde.org"; + urls << "user:pass@www.kde.org:1234"; + urls << "user:pass@www.kde.org:1234/sub/path"; + urls << "user:pass@www.kde.org:1234/sub/path?a=1"; + urls << "user:pass@www.kde.org:1234/sub/path?a=1#anchor"; + urls << "user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor"; + urls << "user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"; + + foreach (QString schema, schemas) + { + foreach (QString url, urls) + { + QString test(left + schema + url + right); + LinkLocator ll(test, left.length()); + QString gotUrl = ll.getUrl(); + + bool ok = ( gotUrl == (schema + url) ); + qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (schema + url); + QVERIFY2( ok, qPrintable(test) ); + } + } + + QStringList urlsWithoutSchema; + urlsWithoutSchema << ".kde.org"; + urlsWithoutSchema << ".kde.org:1234/sub/path"; + urlsWithoutSchema << ".kde.org:1234/sub/path?a=1"; + urlsWithoutSchema << ".kde.org:1234/sub/path?a=1#anchor"; + urlsWithoutSchema << ".kde.org:1234/sub/path/special(123)?a=1#anchor"; + urlsWithoutSchema << ".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"; + + QStringList starts; + starts << "www" << "ftp" << "news:www"; + + foreach (QString start, starts) + { + foreach (QString url, urlsWithoutSchema) + { + QString test(left + start + url + right); + LinkLocator ll(test, left.length()); + QString gotUrl = ll.getUrl(); + + bool ok = ( gotUrl == (start + url) ); + qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << (start + url); + QVERIFY2( ok, qPrintable(test) ); + } + } + + // mailto + { + QString addr = "mailto:test@kde.org"; + QString test(left + addr + right); + LinkLocator ll(test, left.length()); + + QString gotUrl = ll.getUrl(); + + bool ok = ( gotUrl == addr ); + qDebug() << "check:" << (ok ? "OK" : "NOK") << test << "=>" << addr; + QVERIFY2( ok, qPrintable(test) ); + } +} diff --git a/kpimutils/tests/testlinklocator.h b/kpimutils/tests/testlinklocator.h index fa68bfd87..65544eeab 100644 --- a/kpimutils/tests/testlinklocator.h +++ b/kpimutils/tests/testlinklocator.h @@ -1,34 +1,38 @@ /* This file is part of the kpimutils library. Copyright (c) 2007 Allen Winter This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef TESTLINKLOCATOR_H #define TESTLINKLOCATOR_H #include class LinkLocatorTest : public QObject { Q_OBJECT private Q_SLOTS: void testGetEmailAddress(); + void testGetUrl(); + + private: + void testGetUrl2(const QString &left, const QString &right); }; #endif