libtdepim

linklocator.cpp
1
23#include "linklocator.h"
24#include "pimemoticons.h"
25#include <tdeglobal.h>
26#include <tdestandarddirs.h>
27#include <kstaticdeleter.h>
28#include <kmdcodec.h>
29#include <kdebug.h>
30
31#include <tqstylesheet.h>
32#include <tqfile.h>
33#include <tqregexp.h>
34
35#include <limits.h>
36
37TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
38TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
39
40static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
41static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
42
43LinkLocator::LinkLocator(const TQString& text, int pos)
44 : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
45{
46 // If you change either of the above values for maxUrlLen or
47 // maxAddressLen, then please also update the documentation for
48 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
49 // default values used for the maxUrlLen/maxAddressLen parameters
50 // of convertToHtml().
51
52 if ( !s_smileyEmoticonNameMap ) {
53 smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
54 new TQMap<TQString, TQString>() );
55 for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
56 TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
57 imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
58 s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
59 }
60 }
61
62 if ( !s_smileyEmoticonHTMLCache )
63 smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
64 new TQMap<TQString, TQString>() );
65}
66
68{
69 mMaxUrlLen = length;
70}
71
73{
74 return mMaxUrlLen;
75}
76
78{
79 mMaxAddressLen = length;
80}
81
83{
84 return mMaxAddressLen;
85}
86
88{
89 TQString url;
90 if(atUrl())
91 {
92 // handle cases like this: <link>http://foobar.org/</link>
93 int start = mPos;
94 while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
95 TQString("<>()[]").find(mText[mPos]) == -1)
96 {
97 ++mPos;
98 }
99 /* some URLs really end with: # / & - _ */
100 const TQString allowedSpecialChars = TQString("#/&-_");
101 while(mPos > start && mText[mPos-1].isPunct() &&
102 allowedSpecialChars.find(mText[mPos-1]) == -1 )
103 {
104 --mPos;
105 }
106
107 url = mText.mid(start, mPos - start);
108 if(isEmptyUrl(url) || mPos - start > maxUrlLen())
109 {
110 mPos = start;
111 url = "";
112 }
113 else
114 {
115 --mPos;
116 }
117 }
118 return url;
119}
120
121// keep this in sync with KMMainWin::slotUrlClicked()
122bool LinkLocator::atUrl() const
123{
124 // the following characters are allowed in a dot-atom (RFC 2822):
125 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
126 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
127
128 // the character directly before the URL must not be a letter, a number or
129 // any other character allowed in a dot-atom (RFC 2822).
130 if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
131 ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
132 return false;
133
134 TQChar ch = mText[mPos];
135 return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
136 mText.mid(mPos, 8) == "https://") ) ||
137 (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
138 (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
139 mText.mid(mPos, 6) == "ftp://" ||
140 mText.mid(mPos, 7) == "ftps://") ) ||
141 (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
142 mText.mid(mPos, 6) == "smb://") ) ||
143 (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
144 (ch=='w' && mText.mid(mPos, 4) == "www.") ||
145 (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
146 (ch=='n' && mText.mid(mPos, 5) == "news:");
147 // note: no "file:" for security reasons
148}
149
150bool LinkLocator::isEmptyUrl(const TQString& url)
151{
152 return url.isEmpty() ||
153 url == "http://" ||
154 url == "https://" ||
155 url == "fish://" ||
156 url == "ftp://" ||
157 url == "ftps://" ||
158 url == "sftp://" ||
159 url == "smb://" ||
160 url == "vnc://" ||
161 url == "mailto" ||
162 url == "www" ||
163 url == "ftp" ||
164 url == "news" ||
165 url == "news://";
166}
167
169{
170 TQString address;
171
172 if ( mText[mPos] == '@' ) {
173 // the following characters are allowed in a dot-atom (RFC 2822):
174 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
175 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
176
177 // determine the local part of the email address
178 int start = mPos - 1;
179 while ( start >= 0 && mText[start].unicode() < 128 &&
180 ( mText[start].isLetterOrNumber() ||
181 mText[start] == '@' || // allow @ to find invalid email addresses
182 allowedSpecialChars.find( mText[start] ) != -1 ) ) {
183 if ( mText[start] == '@' )
184 return TQString(); // local part contains '@' -> no email address
185 --start;
186 }
187 ++start;
188 // we assume that an email address starts with a letter or a digit
189 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
190 ++start;
191 if ( start == mPos )
192 return TQString(); // local part is empty -> no email address
193
194 // determine the domain part of the email address
195 int dotPos = INT_MAX;
196 int end = mPos + 1;
197 while ( end < (int)mText.length() &&
198 ( mText[end].isLetterOrNumber() ||
199 mText[end] == '@' || // allow @ to find invalid email addresses
200 mText[end] == '.' ||
201 mText[end] == '-' ) ) {
202 if ( mText[end] == '@' )
203 return TQString(); // domain part contains '@' -> no email address
204 if ( mText[end] == '.' )
205 dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
206 ++end;
207 }
208 // we assume that an email address ends with a letter or a digit
209 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
210 --end;
211 if ( end == mPos )
212 return TQString(); // domain part is empty -> no email address
213 if ( dotPos >= end )
214 return TQString(); // domain part doesn't contain a dot
215
216 if ( end - start > maxAddressLen() )
217 return TQString(); // too long -> most likely no email address
218 address = mText.mid( start, end - start );
219
220 mPos = end - 1;
221 }
222 return address;
223}
224
225TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
226 int maxUrlLen, int maxAddressLen)
227{
228 LinkLocator locator(plainText);
229 locator.setMaxUrlLen(maxUrlLen);
231
232 TQString str;
233 TQString result((TQChar*)0, (int)locator.mText.length() * 2);
234 TQChar ch;
235 int x;
236 bool startOfLine = true;
237 TQString emoticon;
238
239 for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
240 {
241 ch = locator.mText[locator.mPos];
242 if ( flags & PreserveSpaces )
243 {
244 if (ch==' ')
245 {
246 if (startOfLine) {
247 result += "&nbsp;";
248 locator.mPos++, x++;
249 startOfLine = false;
250 }
251 while (locator.mText[locator.mPos] == ' ')
252 {
253 result += " ";
254 locator.mPos++, x++;
255 if (locator.mText[locator.mPos] == ' ') {
256 result += "&nbsp;";
257 locator.mPos++, x++;
258 }
259 }
260 locator.mPos--, x--;
261 continue;
262 }
263 else if (ch=='\t')
264 {
265 do
266 {
267 result += "&nbsp;";
268 x++;
269 }
270 while((x&7) != 0);
271 x--;
272 startOfLine = false;
273 continue;
274 }
275 }
276 if (ch=='\n')
277 {
278 result += "<br />";
279 startOfLine = true;
280 x = -1;
281 continue;
282 }
283
284 startOfLine = false;
285 if (ch=='&')
286 result += "&amp;";
287 else if (ch=='"')
288 result += "&quot;";
289 else if (ch=='<')
290 result += "&lt;";
291 else if (ch=='>')
292 result += "&gt;";
293 else
294 {
295 const int start = locator.mPos;
296 if ( !(flags & IgnoreUrls) ) {
297 str = locator.getUrl();
298 if (!str.isEmpty())
299 {
300 TQString hyperlink;
301 if(str.left(4) == "www.")
302 hyperlink = "http://" + str;
303 else if(str.left(4) == "ftp.")
304 hyperlink = "ftp://" + str;
305 else
306 hyperlink = str;
307
308 str = str.replace('&', "&amp;");
309 result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
310 x += locator.mPos - start;
311 continue;
312 }
313 str = locator.getEmailAddress();
314 if(!str.isEmpty())
315 {
316 // len is the length of the local part
317 int len = str.find('@');
318 TQString localPart = str.left(len);
319
320 // remove the local part from the result (as '&'s have been expanded to
321 // &amp; we have to take care of the 4 additional characters per '&')
322 result.truncate(result.length() - len - (localPart.contains('&')*4));
323 x -= len;
324
325 result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
326 x += str.length() - 1;
327 continue;
328 }
329 }
330 if ( flags & ReplaceSmileys ) {
331 str = locator.getEmoticon();
332 if ( ! str.isEmpty() ) {
333 result += str;
334 x += locator.mPos - start;
335 continue;
336 }
337 }
338 if ( flags & HighlightText ) {
339 str = locator.highlightedText();
340 if ( !str.isEmpty() ) {
341 result += str;
342 x += locator.mPos - start;
343 continue;
344 }
345 }
346 result += ch;
347 }
348 }
349
350 return result;
351}
352
353TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
354{
355 if ( iconPath.isEmpty() )
356 return TQString();
357
358 TQFile pngFile( iconPath );
359 if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
360 return TQString();
361
362 TQByteArray ba = pngFile.readAll();
363 pngFile.close();
364 return TQString::fromLatin1("data:image/png;base64,%1")
365 .arg( KCodecs::base64Encode( ba ).data() );
366}
367
368
369TQString LinkLocator::getEmoticon()
370{
371 // smileys have to be prepended by whitespace
372 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
373 return TQString();
374
375 // since smileys start with ':', ';', '(' or '8' short circuit method
376 const TQChar ch = mText[mPos];
377 if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
378 return TQString();
379
380 // find the end of the smiley (a smiley is at most 4 chars long and ends at
381 // lineend or whitespace)
382 const int MinSmileyLen = 2;
383 const int MaxSmileyLen = 4;
384 int smileyLen = 1;
385 while ( ( smileyLen <= MaxSmileyLen ) &&
386 ( mPos+smileyLen < (int)mText.length() ) &&
387 !mText[mPos+smileyLen].isSpace() )
388 smileyLen++;
389 if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
390 return TQString();
391
392 const TQString smiley = mText.mid( mPos, smileyLen );
393 if ( !s_smileyEmoticonNameMap->contains( smiley ) )
394 return TQString(); // that's not a (known) smiley
395
396 TQString htmlRep;
397 if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
398 htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
399 }
400 else {
401 const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
402 const TQString iconPath = locate( "emoticons",
403 EmotIcons::theme() +
404 TQString::fromLatin1( "/" ) +
405 imageName + TQString::fromLatin1(".png") );
406 const TQString dataUrl = pngToDataUrl( iconPath );
407 if ( dataUrl.isEmpty() ) {
408 htmlRep = TQString();
409 }
410 else {
411 // create an image tag (the text in attribute alt is used
412 // for copy & paste) representing the smiley
413 htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
414 "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
415 .arg( dataUrl,
416 TQStyleSheet::escape( smiley ),
417 TQStyleSheet::escape( smiley ) );
418 }
419 s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
420 }
421
422 if ( !htmlRep.isEmpty() )
423 mPos += smileyLen - 1;
424
425 return htmlRep;
426}
427
428TQString LinkLocator::highlightedText()
429{
430 // formating symbols must be prepended with a whitespace
431 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
432 return TQString();
433
434 const TQChar ch = mText[mPos];
435 if ( ch != '/' && ch != '*' && ch != '_' )
436 return TQString();
437
438 TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
439 if ( re.search( mText, mPos ) == mPos ) {
440 uint length = re.matchedLength();
441 // there must be a whitespace after the closing formating symbol
442 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
443 return TQString();
444 mPos += length - 1;
445 switch ( ch.latin1() ) {
446 case '*':
447 return "<b>" + re.cap( 1 ) + "</b>";
448 case '_':
449 return "<u>" + re.cap( 1 ) + "</u>";
450 case '/':
451 return "<i>" + re.cap( 1 ) + "</i>";
452 }
453 }
454 return TQString();
455}
456
LinkLocator assists in identifying sections of text that can usefully be converted in hyperlinks in h...
Definition: linklocator.h:42
int maxAddressLen() const
Definition: linklocator.cpp:82
TQString getEmailAddress()
Attempts to grab an email address.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
Definition: linklocator.cpp:67
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
Definition: linklocator.cpp:77
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
int mPos
The current scan position.
Definition: linklocator.h:161
int maxUrlLen() const
Definition: linklocator.cpp:72
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point.
Definition: linklocator.cpp:43
TQString mText
The plaintext string being scanned for URLs and email addresses.
Definition: linklocator.h:157
TQString getUrl()
Attempts to grab a URL starting at the current scan position.
Definition: linklocator.cpp:87