• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.10.0 API Reference
  • KDE Home
  • Contact Us
 

kpimutils

  • kpimutils
linklocator.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2002 Dave Corrie <kde@davecorrie.com>
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Library General Public
6  License as published by the Free Software Foundation; either
7  version 2 of the License, or (at your option) any later version.
8 
9  This library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Library General Public License for more details.
13 
14  You should have received a copy of the GNU Library General Public License
15  along with this library; see the file COPYING.LIB. If not, write to
16  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  Boston, MA 02110-1301, USA.
18 */
29 #include "linklocator.h"
30 
31 #include <KEmoticons>
32 
33 #include <QtCore/QCoreApplication>
34 #include <QtCore/QFile>
35 #include <QtCore/QRegExp>
36 #include <QTextDocument>
37 
38 #include <climits>
39 
40 using namespace KPIMUtils;
41 
46 //@cond PRIVATE
47 class KPIMUtils::LinkLocator::Private
48 {
49  public:
50  int mMaxUrlLen;
51  int mMaxAddressLen;
52 };
53 //@endcond
54 
55 // Use a static for this as calls to the KEmoticons constructor are expensive.
56 K_GLOBAL_STATIC( KEmoticons, sEmoticons )
57 
58 LinkLocator::LinkLocator( const QString &text, int pos )
59  : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private )
60 {
61  d->mMaxUrlLen = 4096;
62  d->mMaxAddressLen = 255;
63 
64  // If you change either of the above values for maxUrlLen or
65  // maxAddressLen, then please also update the documentation for
66  // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
67  // default values used for the maxUrlLen/maxAddressLen parameters
68  // of convertToHtml().
69 }
70 
71 LinkLocator::~LinkLocator()
72 {
73  delete d;
74 }
75 
76 void LinkLocator::setMaxUrlLen( int length )
77 {
78  d->mMaxUrlLen = length;
79 }
80 
81 int LinkLocator::maxUrlLen() const
82 {
83  return d->mMaxUrlLen;
84 }
85 
86 void LinkLocator::setMaxAddressLen( int length )
87 {
88  d->mMaxAddressLen = length;
89 }
90 
91 int LinkLocator::maxAddressLen() const
92 {
93  return d->mMaxAddressLen;
94 }
95 
96 QString LinkLocator::getUrl()
97 {
98  QString url;
99  if ( atUrl() ) {
100  // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
101  // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
102  // be allowed and should be ignored when the URI is extracted.
103 
104  // This implementation follows this recommendation and
105  // allows the URL to be enclosed within different kind of brackets/quotes
106  // If an URL is enclosed, whitespace characters are allowed and removed, otherwise
107  // the URL ends with the first whitespace
108  // Also, if the URL is enclosed in brackets, the URL itself is not allowed
109  // to contain the closing bracket, as this would be detected as the end of the URL
110 
111  QChar beforeUrl, afterUrl;
112 
113  // detect if the url has been surrounded by brackets or quotes
114  if ( mPos > 0 ) {
115  beforeUrl = mText[mPos - 1];
116 
117  if ( beforeUrl == '(' ) {
118  afterUrl = ')';
119  } else if ( beforeUrl == '[' ) {
120  afterUrl = ']';
121  } else if ( beforeUrl == '<' ) {
122  afterUrl = '>';
123  } else if ( beforeUrl == '>' ) { // for e.g. <link>http://.....</link>
124  afterUrl = '<';
125  } else if ( beforeUrl == '"' ) {
126  afterUrl = '"';
127  }
128  }
129 
130  url.reserve( maxUrlLen() ); // avoid allocs
131  int start = mPos;
132  while ( ( mPos < (int)mText.length() ) &&
133  ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
134  ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
135  ( !afterUrl.isNull() && mText[mPos] != afterUrl ) ) ) {
136  if ( !mText[mPos].isSpace() ) { // skip whitespace
137  url.append( mText[mPos] );
138  if ( url.length() > maxUrlLen() ) {
139  break;
140  }
141  }
142 
143  mPos++;
144  }
145 
146  if ( isEmptyUrl( url ) || ( url.length() > maxUrlLen() ) ) {
147  mPos = start;
148  url = "";
149  } else {
150  --mPos;
151  }
152  }
153 
154  // HACK: This is actually against the RFC. However, most people don't properly escape the URL in
155  // their text with "" or <>. That leads to people writing an url, followed immediatley by
156  // a dot to finish the sentence. That would lead the parser to include the dot in the url,
157  // even though that is not wanted. So work around that here.
158  // Most real-life URLs hopefully don't end with dots or commas.
159  if ( url.length() > 1 ) {
160  QList<QChar> wordBoundaries;
161  wordBoundaries << '.' << ',' << ':' << '!' << '?';
162  if ( wordBoundaries.contains( url.at( url.length() - 1 ) ) ) {
163  url.chop( 1 );
164  --mPos;
165  }
166  }
167 
168  return url;
169 }
170 
171 // keep this in sync with KMMainWin::slotUrlClicked()
172 bool LinkLocator::atUrl() const
173 {
174  // the following characters are allowed in a dot-atom (RFC 2822):
175  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
176  const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
177 
178  // the character directly before the URL must not be a letter, a number or
179  // any other character allowed in a dot-atom (RFC 2822).
180  if ( ( mPos > 0 ) &&
181  ( mText[mPos-1].isLetterOrNumber() ||
182  ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) {
183  return false;
184  }
185 
186  QChar ch = mText[mPos];
187  return
188  ( ch == 'h' && ( mText.mid( mPos, 7 ) == QLatin1String( "http://" ) ||
189  mText.mid( mPos, 8 ) == QLatin1String( "https://" ) ) ) ||
190  ( ch == 'v' && mText.mid( mPos, 6 ) == QLatin1String( "vnc://" ) ) ||
191  ( ch == 'f' && ( mText.mid( mPos, 7 ) == QLatin1String( "fish://" ) ||
192  mText.mid( mPos, 6 ) == QLatin1String( "ftp://" ) ||
193  mText.mid( mPos, 7 ) == QLatin1String( "ftps://" ) ) ) ||
194  ( ch == 's' && ( mText.mid( mPos, 7 ) == QLatin1String( "sftp://" ) ||
195  mText.mid( mPos, 6 ) == QLatin1String( "smb://" ) ) ) ||
196  ( ch == 'm' && mText.mid( mPos, 7 ) == QLatin1String( "mailto:" ) ) ||
197  ( ch == 'w' && mText.mid( mPos, 4 ) == QLatin1String( "www." ) ) ||
198  ( ch == 'f' && ( mText.mid( mPos, 4 ) == QLatin1String( "ftp." ) ||
199  mText.mid( mPos, 7 ) == QLatin1String( "file://" ) ) )||
200  ( ch == 'n' && mText.mid( mPos, 5 ) == QLatin1String( "news:" ) );
201 }
202 
203 bool LinkLocator::isEmptyUrl( const QString &url ) const
204 {
205  return url.isEmpty() ||
206  url == QLatin1String( "http://" ) ||
207  url == QLatin1String( "https://" ) ||
208  url == QLatin1String( "fish://" ) ||
209  url == QLatin1String( "ftp://" ) ||
210  url == QLatin1String( "ftps://" ) ||
211  url == QLatin1String( "sftp://" ) ||
212  url == QLatin1String( "smb://" ) ||
213  url == QLatin1String( "vnc://" ) ||
214  url == QLatin1String( "mailto" ) ||
215  url == QLatin1String( "www" ) ||
216  url == QLatin1String( "ftp" ) ||
217  url == QLatin1String( "news" ) ||
218  url == QLatin1String( "news://" );
219 }
220 
221 QString LinkLocator::getEmailAddress()
222 {
223  QString address;
224 
225  if ( mText[mPos] == '@' ) {
226  // the following characters are allowed in a dot-atom (RFC 2822):
227  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
228  const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
229 
230  // determine the local part of the email address
231  int start = mPos - 1;
232  while ( start >= 0 && mText[start].unicode() < 128 &&
233  ( mText[start].isLetterOrNumber() ||
234  mText[start] == '@' || // allow @ to find invalid email addresses
235  allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) {
236  if ( mText[start] == '@' ) {
237  return QString(); // local part contains '@' -> no email address
238  }
239  --start;
240  }
241  ++start;
242  // we assume that an email address starts with a letter or a digit
243  while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) {
244  ++start;
245  }
246  if ( start == mPos ) {
247  return QString(); // local part is empty -> no email address
248  }
249 
250  // determine the domain part of the email address
251  int dotPos = INT_MAX;
252  int end = mPos + 1;
253  while ( end < (int)mText.length() &&
254  ( mText[end].isLetterOrNumber() ||
255  mText[end] == '@' || // allow @ to find invalid email addresses
256  mText[end] == '.' ||
257  mText[end] == '-' ) ) {
258  if ( mText[end] == '@' ) {
259  return QString(); // domain part contains '@' -> no email address
260  }
261  if ( mText[end] == '.' ) {
262  dotPos = qMin( dotPos, end ); // remember index of first dot in domain
263  }
264  ++end;
265  }
266  // we assume that an email address ends with a letter or a digit
267  while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) {
268  --end;
269  }
270  if ( end == mPos ) {
271  return QString(); // domain part is empty -> no email address
272  }
273  if ( dotPos >= end ) {
274  return QString(); // domain part doesn't contain a dot
275  }
276 
277  if ( end - start > maxAddressLen() ) {
278  return QString(); // too long -> most likely no email address
279  }
280  address = mText.mid( start, end - start );
281 
282  mPos = end - 1;
283  }
284  return address;
285 }
286 
287 QString LinkLocator::convertToHtml( const QString &plainText, int flags,
288  int maxUrlLen, int maxAddressLen )
289 {
290  LinkLocator locator( plainText );
291  locator.setMaxUrlLen( maxUrlLen );
292  locator.setMaxAddressLen( maxAddressLen );
293 
294  QString str;
295  QString result( (QChar*)0, (int)locator.mText.length() * 2 );
296  QChar ch;
297  int x;
298  bool startOfLine = true;
299  QString emoticon;
300 
301  for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length();
302  locator.mPos++, x++ ) {
303  ch = locator.mText[locator.mPos];
304  if ( flags & PreserveSpaces ) {
305  if ( ch == ' ' ) {
306  if ( locator.mPos + 1 < locator.mText.length() ) {
307  if ( locator.mText[locator.mPos + 1] != ' ' ) {
308 
309  // A single space, make it breaking if not at the start or end of the line
310  const bool endOfLine = locator.mText[locator.mPos + 1] == '\n';
311  if ( !startOfLine && !endOfLine ) {
312  result += ' ';
313  } else {
314  result += "&nbsp;";
315  }
316  } else {
317 
318  // Whitespace of more than one space, make it all non-breaking
319  while ( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) {
320  result += "&nbsp;";
321  locator.mPos++;
322  x++;
323  }
324 
325  // We incremented once to often, undo that
326  locator.mPos--;
327  x--;
328  }
329  } else {
330  // Last space in the text, it is non-breaking
331  result += "&nbsp;";
332  }
333 
334  if ( startOfLine ) {
335  startOfLine = false;
336  }
337  continue;
338  } else if ( ch == '\t' ) {
339  do {
340  result += "&nbsp;";
341  x++;
342  } while ( ( x & 7 ) != 0 );
343  x--;
344  startOfLine = false;
345  continue;
346  }
347  }
348  if ( ch == '\n' ) {
349  result += "<br />\n"; // Keep the \n, so apps can figure out the quoting levels correctly.
350  startOfLine = true;
351  x = -1;
352  continue;
353  }
354 
355  startOfLine = false;
356  if ( ch == '&' ) {
357  result += "&amp;";
358  } else if ( ch == '"' ) {
359  result += "&quot;";
360  } else if ( ch == '<' ) {
361  result += "&lt;";
362  } else if ( ch == '>' ) {
363  result += "&gt;";
364  } else {
365  const int start = locator.mPos;
366  if ( !( flags & IgnoreUrls ) ) {
367  str = locator.getUrl();
368  if ( !str.isEmpty() ) {
369  QString hyperlink;
370  if ( str.left( 4 ) == "www." ) {
371  hyperlink = "http://" + str;
372  } else if ( str.left( 4 ) == "ftp." ) {
373  hyperlink = "ftp://" + str;
374  } else {
375  hyperlink = str;
376  }
377 
378  result += "<a href=\"" + hyperlink + "\">" + Qt::escape( str ) + "</a>";
379  x += locator.mPos - start;
380  continue;
381  }
382  str = locator.getEmailAddress();
383  if ( !str.isEmpty() ) {
384  // len is the length of the local part
385  int len = str.indexOf( '@' );
386  QString localPart = str.left( len );
387 
388  // remove the local part from the result (as '&'s have been expanded to
389  // &amp; we have to take care of the 4 additional characters per '&')
390  result.truncate( result.length() -
391  len - ( localPart.count( '&' ) * 4 ) );
392  x -= len;
393 
394  result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
395  x += str.length() - 1;
396  continue;
397  }
398  }
399  if ( flags & HighlightText ) {
400  str = locator.highlightedText();
401  if ( !str.isEmpty() ) {
402  result += str;
403  x += locator.mPos - start;
404  continue;
405  }
406  }
407  result += ch;
408  }
409  }
410 
411  if ( flags & ReplaceSmileys ) {
412  QStringList exclude;
413  exclude << "(c)" << "(C)" << "&gt;:-(" << "&gt;:(" << "(B)" << "(b)" << "(P)" << "(p)";
414  exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)";
415  exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)";
416  exclude << "(g)" << "(F)" << "(f)" << "(H)";
417  exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)";
418  static QString cachedEmoticonsThemeName;
419  if ( cachedEmoticonsThemeName.isEmpty() ) {
420  cachedEmoticonsThemeName = KEmoticons::currentThemeName();
421  }
422  result =
423  sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons(
424  result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude );
425  }
426 
427  return result;
428 }
429 
430 QString LinkLocator::pngToDataUrl( const QString &iconPath )
431 {
432  if ( iconPath.isEmpty() ) {
433  return QString();
434  }
435 
436  QFile pngFile( iconPath );
437  if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) {
438  return QString();
439  }
440 
441  QByteArray ba = pngFile.readAll();
442  pngFile.close();
443  return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() );
444 }
445 
446 QString LinkLocator::highlightedText()
447 {
448  // formating symbols must be prepended with a whitespace
449  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) {
450  return QString();
451  }
452 
453  const QChar ch = mText[mPos];
454  if ( ch != '/' && ch != '*' && ch != '_' && ch != '-' ) {
455  return QString();
456  }
457 
458  QRegExp re =
459  QRegExp( QString( "\\%1((\\w+)([\\s-']\\w+)*( ?[,.:\\?!;])?)\\%2" ).arg( ch ).arg( ch ) );
460  re.setMinimal( true );
461  if ( re.indexIn( mText, mPos ) == mPos ) {
462  int length = re.matchedLength();
463  // there must be a whitespace after the closing formating symbol
464  if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) {
465  return QString();
466  }
467  mPos += length - 1;
468  switch ( ch.toLatin1() ) {
469  case '*':
470  return "<b>*" + re.cap( 1 ) + "*</b>";
471  case '_':
472  return "<u>_" + re.cap( 1 ) + "_</u>";
473  case '/':
474  return "<i>/" + re.cap( 1 ) + "/</i>";
475  case '-':
476  return "<strike>-" + re.cap( 1 ) + "-</strike>";
477  }
478  }
479  return QString();
480 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2013 The KDE developers.
Generated on Fri Mar 8 2013 21:48:51 by doxygen 1.8.3.1 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

kpimutils

Skip menu "kpimutils"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Modules

kdepimlibs-4.10.0 API Reference

Skip menu "kdepimlibs-4.10.0 API Reference"
  • akonadi
  •   contact
  •   kmime
  •   socialutils
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal