• Skip to content
  • Skip to link menu
KDE 4.0 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KIMAP Library

rfccodecs.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  *
00003  *   rfccodecs.cpp - handler for various rfc/mime encodings
00004  *   Copyright (C) 2000 s.carstens@gmx.de
00005  *
00006  *   This library is free software; you can redistribute it and/or
00007  *   modify it under the terms of the GNU Library General Public
00008  *   License as published by the Free Software Foundation; either
00009  *   version 2 of the License, or (at your option) any later version.
00010  *
00011  *   This library is distributed in the hope that it will be useful,
00012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *   Library General Public License for more details.
00015  *
00016  *   You should have received a copy of the GNU Library General Public License
00017  *   along with this library; see the file COPYING.LIB.  If not, write to
00018  *   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *   Boston, MA 02110-1301, USA.
00020  *
00021  *********************************************************************/
00033 #include "rfccodecs.h"
00034 
00035 #include <ctype.h>
00036 #include <sys/types.h>
00037 
00038 #include <stdio.h>
00039 #include <stdlib.h>
00040 
00041 #include <QtCore/QTextCodec>
00042 #include <QtCore/QBuffer>
00043 #include <QtCore/QRegExp>
00044 #include <QtCore/QByteArray>
00045 #include <QtCore/QLatin1Char>
00046 #include <kcodecs.h>
00047 
00048 using namespace KIMAP;
00049 
00050 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
00051 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
00052 
00053 //@cond PRIVATE
00054 static const unsigned char base64chars[] =
00055   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
00056 #define UNDEFINED 64
00057 #define MAXLINE  76
00058 static const char especials[17] = "()<>@,;:\"/[]?.= ";
00059 
00060 /* UTF16 definitions */
00061 #define UTF16MASK       0x03FFUL
00062 #define UTF16SHIFT      10
00063 #define UTF16BASE       0x10000UL
00064 #define UTF16HIGHSTART  0xD800UL
00065 #define UTF16HIGHEND    0xDBFFUL
00066 #define UTF16LOSTART    0xDC00UL
00067 #define UTF16LOEND      0xDFFFUL
00068 //@endcond
00069 
00070 //-----------------------------------------------------------------------------
00071 QString KIMAP::decodeImapFolderName( const QString &inSrc )
00072 {
00073   unsigned char c, i, bitcount;
00074   unsigned long ucs4, utf16, bitbuf;
00075   unsigned char base64[256], utf8[6];
00076   unsigned int srcPtr = 0;
00077   QByteArray dst;
00078   QByteArray src = inSrc.toAscii ();
00079   uint srcLen = inSrc.length();
00080 
00081   /* initialize modified base64 decoding table */
00082   memset( base64, UNDEFINED, sizeof( base64 ) );
00083   for ( i = 0; i < sizeof( base64chars ); ++i ) {
00084     base64[(int)base64chars[i]] = i;
00085   }
00086 
00087   /* loop until end of string */
00088   while ( srcPtr < srcLen ) {
00089     c = src[srcPtr++];
00090     /* deal with literal characters and &- */
00091     if ( c != '&' || src[srcPtr] == '-' ) {
00092       /* encode literally */
00093       dst += c;
00094       /* skip over the '-' if this is an &- sequence */
00095       if ( c == '&' ) {
00096         srcPtr++;
00097       }
00098     } else {
00099       /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
00100       bitbuf = 0;
00101       bitcount = 0;
00102       ucs4 = 0;
00103       while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
00104         ++srcPtr;
00105         bitbuf = ( bitbuf << 6 ) | c;
00106         bitcount += 6;
00107         /* enough bits for a UTF-16 character? */
00108         if ( bitcount >= 16 ) {
00109           bitcount -= 16;
00110           utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
00111           /* convert UTF16 to UCS4 */
00112           if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
00113             ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
00114             continue;
00115           } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
00116             ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
00117           } else {
00118             ucs4 = utf16;
00119           }
00120           /* convert UTF-16 range of UCS4 to UTF-8 */
00121           if ( ucs4 <= 0x7fUL ) {
00122             utf8[0] = ucs4;
00123             i = 1;
00124           } else if ( ucs4 <= 0x7ffUL ) {
00125             utf8[0] = 0xc0 | ( ucs4 >> 6 );
00126             utf8[1] = 0x80 | ( ucs4 & 0x3f );
00127             i = 2;
00128           } else if ( ucs4 <= 0xffffUL ) {
00129             utf8[0] = 0xe0 | ( ucs4 >> 12 );
00130             utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00131             utf8[2] = 0x80 | ( ucs4 & 0x3f );
00132             i = 3;
00133           } else {
00134             utf8[0] = 0xf0 | ( ucs4 >> 18 );
00135             utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
00136             utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00137             utf8[3] = 0x80 | ( ucs4 & 0x3f );
00138             i = 4;
00139           }
00140           /* copy it */
00141           for ( c = 0; c < i; ++c ) {
00142             dst += utf8[c];
00143           }
00144         }
00145       }
00146       /* skip over trailing '-' in modified UTF-7 encoding */
00147       if ( src[srcPtr] == '-' ) {
00148         ++srcPtr;
00149       }
00150     }
00151   }
00152   return QString::fromUtf8( dst.data () );
00153 }
00154 
00155 //-----------------------------------------------------------------------------
00156 QString KIMAP::quoteIMAP( const QString &src )
00157 {
00158   uint len = src.length();
00159   QString result;
00160   result.reserve( 2 * len );
00161   for ( unsigned int i = 0; i < len; i++ ) {
00162     if ( src[i] == '"' || src[i] == '\\' ) {
00163       result += '\\';
00164     }
00165     result += src[i];
00166   }
00167   //result.squeeze(); - unnecessary and slow
00168   return result;
00169 }
00170 
00171 //-----------------------------------------------------------------------------
00172 QString KIMAP::encodeImapFolderName( const QString &inSrc )
00173 {
00174   unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
00175   unsigned int ucs4, bitbuf;
00176   QByteArray src = inSrc.toUtf8 ();
00177   QString dst;
00178 
00179   int srcPtr = 0;
00180   utf7mode = 0;
00181   utf8total = 0;
00182   bitstogo = 0;
00183   utf8pos = 0;
00184   bitbuf = 0;
00185   ucs4 = 0;
00186   while ( srcPtr < src.length () ) {
00187     c = (unsigned char)src[srcPtr++];
00188     /* normal character? */
00189     if ( c >= ' ' && c <= '~' ) {
00190       /* switch out of UTF-7 mode */
00191       if ( utf7mode ) {
00192         if ( bitstogo ) {
00193           dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00194           bitstogo = 0;
00195         }
00196         dst += '-';
00197         utf7mode = 0;
00198       }
00199       dst += c;
00200       /* encode '&' as '&-' */
00201       if ( c == '&' ) {
00202         dst += '-';
00203       }
00204       continue;
00205     }
00206     /* switch to UTF-7 mode */
00207     if ( !utf7mode ) {
00208       dst += '&';
00209       utf7mode = 1;
00210     }
00211     /* Encode US-ASCII characters as themselves */
00212     if ( c < 0x80 ) {
00213       ucs4 = c;
00214       utf8total = 1;
00215     } else if ( utf8total ) {
00216       /* save UTF8 bits into UCS4 */
00217       ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
00218       if ( ++utf8pos < utf8total ) {
00219         continue;
00220       }
00221     } else {
00222       utf8pos = 1;
00223       if ( c < 0xE0 ) {
00224         utf8total = 2;
00225         ucs4 = c & 0x1F;
00226       } else if ( c < 0xF0 ) {
00227         utf8total = 3;
00228         ucs4 = c & 0x0F;
00229       } else {
00230         /* NOTE: can't convert UTF8 sequences longer than 4 */
00231         utf8total = 4;
00232         ucs4 = c & 0x03;
00233       }
00234       continue;
00235     }
00236     /* loop to split ucs4 into two utf16 chars if necessary */
00237     utf8total = 0;
00238     do
00239     {
00240       if ( ucs4 >= UTF16BASE ) {
00241         ucs4 -= UTF16BASE;
00242         bitbuf =
00243           ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
00244         ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
00245         utf16flag = 1;
00246       } else {
00247         bitbuf = ( bitbuf << 16 ) | ucs4;
00248         utf16flag = 0;
00249       }
00250       bitstogo += 16;
00251       /* spew out base64 */
00252       while ( bitstogo >= 6 ) {
00253         bitstogo -= 6;
00254         dst +=
00255           base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
00256       }
00257     }
00258     while ( utf16flag );
00259   }
00260   /* if in UTF-7 mode, finish in ASCII */
00261   if ( utf7mode ) {
00262     if ( bitstogo ) {
00263       dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00264     }
00265     dst += '-';
00266   }
00267   return quoteIMAP( dst );
00268 }
00269 
00270 //-----------------------------------------------------------------------------
00271 QTextCodec *KIMAP::codecForName( const QString &str )
00272 {
00273   if ( str.isEmpty () ) {
00274     return 0;
00275   }
00276   return QTextCodec::codecForName ( str.toLower ().
00277                                     replace ( "windows", "cp" ).toLatin1 () );
00278 }
00279 
00280 //-----------------------------------------------------------------------------
00281 const QString KIMAP::decodeRFC2047String( const QString &str )
00282 {
00283   QString throw_away;
00284 
00285   return decodeRFC2047String( str, throw_away );
00286 }
00287 
00288 //-----------------------------------------------------------------------------
00289 const QString KIMAP::decodeRFC2047String( const QString &str,
00290                                           QString &charset )
00291 {
00292   QString throw_away;
00293 
00294   return decodeRFC2047String( str, charset, throw_away );
00295 }
00296 
00297 //-----------------------------------------------------------------------------
00298 const QString KIMAP::decodeRFC2047String( const QString &str,
00299                                           QString &charset,
00300                                           QString &language )
00301 {
00302   //do we have a rfc string
00303   if ( !str.contains( "=?" ) ) {
00304     return str;
00305   }
00306 
00307   // FIXME get rid of the conversion?
00308   QByteArray aStr = str.toAscii ();  // QString.length() means Unicode chars
00309   QByteArray result;
00310   char *pos, *beg, *end, *mid = 0;
00311   QByteArray cstr;
00312   char encoding = 0, ch;
00313   bool valid;
00314   const int maxLen = 200;
00315   int i;
00316 
00317 //  result.truncate(aStr.length());
00318   for ( pos = aStr.data (); *pos; pos++ ) {
00319     if ( pos[0] != '=' || pos[1] != '?' ) {
00320       result += *pos;
00321       continue;
00322     }
00323     beg = pos + 2;
00324     end = beg;
00325     valid = true;
00326     // parse charset name
00327     for ( i = 2, pos += 2;
00328           i < maxLen &&
00329               ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
00330           i++ )
00331       pos++;
00332     if ( *pos != '?' || i < 4 || i >= maxLen ) {
00333       valid = false;
00334     } else {
00335       charset = QByteArray( beg, i - 1 );  // -2 + 1 for the zero
00336       int pt = charset.lastIndexOf( '*' );
00337       if ( pt != -1 ) {
00338         // save language for later usage
00339         language = charset.right( charset.length () - pt - 1 );
00340 
00341         // tie off language as defined in rfc2047
00342         charset.truncate( pt );
00343       }
00344       // get encoding and check delimiting question marks
00345       encoding = toupper( pos[1] );
00346       if ( pos[2] != '?' ||
00347            ( encoding != 'Q' && encoding != 'B' &&
00348              encoding != 'q' && encoding != 'b' ) ) {
00349         valid = false;
00350       }
00351       pos += 3;
00352       i += 3;
00353 //  kDebug(7116) << "KIMAP::decodeRFC2047String - charset" << charset
00354 //               << "- language" << language << "-'" << pos << "'";
00355     }
00356     if ( valid ) {
00357       mid = pos;
00358       // search for end of encoded part
00359       while ( i < maxLen && *pos && !( *pos == '?' && *(pos + 1) == '=' ) ) {
00360         i++;
00361         pos++;
00362       }
00363       end = pos + 2;//end now points to the first char after the encoded string
00364       if ( i >= maxLen || !*pos ) {
00365         valid = false;
00366       }
00367     }
00368     if ( valid ) {
00369       ch = *pos;
00370       *pos = '\0';
00371       cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
00372       if ( encoding == 'Q' ) {
00373         // decode quoted printable text
00374         for ( i = cstr.length () - 1; i >= 0; i-- ) {
00375           if ( cstr[i] == '_' ) {
00376             cstr[i] = ' ';
00377           }
00378         }
00379 //    kDebug(7116) << "KIMAP::decodeRFC2047String - before QP '"
00380 //    << cstr << "'";
00381         cstr = KCodecs::quotedPrintableDecode( cstr );
00382 //    kDebug(7116) << "KIMAP::decodeRFC2047String - after QP '"
00383 //    << cstr << "'";
00384       } else {
00385         // decode base64 text
00386         cstr = QByteArray::fromBase64( cstr );
00387       }
00388       *pos = ch;
00389       int len = cstr.length();
00390       for ( i = 0; i < len; i++ ) {
00391         result += cstr[i];
00392       }
00393 
00394       pos = end - 1;
00395     } else {
00396 //    kDebug(7116) << "KIMAP::decodeRFC2047String - invalid";
00397       //result += "=?";
00398       //pos = beg -1; // because pos gets increased shortly afterwards
00399       pos = beg - 2;
00400       result += *pos++;
00401       result += *pos;
00402     }
00403   }
00404   if ( !charset.isEmpty () ) {
00405     QTextCodec *aCodec = codecForName( charset.toAscii () );
00406     if ( aCodec ) {
00407 //    kDebug(7116) << "Codec is" << aCodec->name();
00408       return aCodec->toUnicode( result );
00409     }
00410   }
00411   return result;
00412 }
00413 
00414 //-----------------------------------------------------------------------------
00415 const QString KIMAP::encodeRFC2047String( const QString &str )
00416 {
00417   if ( str.isEmpty () ) {
00418     return str;
00419   }
00420 
00421   const signed char *latin =
00422     reinterpret_cast<const signed char *>
00423     ( str.toLatin1().data() ), *l, *start, *stop;
00424   char hexcode;
00425   int numQuotes, i;
00426   int rptr = 0;
00427   // My stats show this number results in 12 resize() out of 73,000
00428   int resultLen = 3 * str.length() / 2;
00429   QByteArray result( resultLen, '\0' );
00430 
00431   while ( *latin ) {
00432     l = latin;
00433     start = latin;
00434     while ( *l ) {
00435       if ( *l == 32 ) {
00436         start = l + 1;
00437       }
00438       if ( *l < 0 ) {
00439         break;
00440       }
00441       l++;
00442     }
00443     if ( *l ) {
00444       numQuotes = 1;
00445       while ( *l ) {
00446         /* The encoded word must be limited to 75 character */
00447         for ( i = 0; i < 16; i++ ) {
00448           if ( *l == especials[i] ) {
00449             numQuotes++;
00450           }
00451         }
00452         if ( *l < 0 ) {
00453           numQuotes++;
00454         }
00455         /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
00456         if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
00457           break;
00458         }
00459         l++;
00460       }
00461       if ( *l ) {
00462         stop = l - 1;
00463         while ( stop >= start && *stop != 32 ) {
00464           stop--;
00465         }
00466         if ( stop <= start ) {
00467           stop = l;
00468         }
00469       } else {
00470         stop = l;
00471       }
00472       if ( resultLen - rptr - 1 <= start -  latin + 1 + 16 ) {
00473         // =?iso-88...
00474         resultLen += ( start - latin + 1 ) * 2 + 20; // more space
00475     result.resize( resultLen );
00476       }
00477       while ( latin < start ) {
00478         result[rptr++] = *latin;
00479         latin++;
00480       }
00481       result.replace( rptr, 15, "=?iso-8859-1?q?" );
00482       rptr += 15;
00483       if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
00484         resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
00485     result.resize( resultLen );
00486       }
00487       while ( latin < stop ) {
00488         // can add up to 3 chars/iteration
00489         numQuotes = 0;
00490         for ( i = 0; i < 16; i++ ) {
00491           if ( *latin == especials[i] ) {
00492             numQuotes = 1;
00493           }
00494         }
00495         if ( *latin < 0 ) {
00496           numQuotes = 1;
00497         }
00498         if ( numQuotes ) {
00499           result[rptr++] = '=';
00500           hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
00501           if ( hexcode >= 58 ) {
00502             hexcode += 7;
00503           }
00504           result[rptr++] = hexcode;
00505           hexcode = ( *latin & 0x0F ) + 48;
00506           if ( hexcode >= 58 ) {
00507             hexcode += 7;
00508           }
00509           result[rptr++] = hexcode;
00510         } else {
00511           result[rptr++] = *latin;
00512         }
00513         latin++;
00514       }
00515       result[rptr++] = '?';
00516       result[rptr++] = '=';
00517     } else {
00518       while ( *latin ) {
00519         if ( rptr == resultLen - 1 ) {
00520           resultLen += 30;
00521           result.resize( resultLen );
00522         }
00523         result[rptr++] = *latin;
00524         latin++;
00525       }
00526     }
00527   }
00528   result[rptr] = 0;
00529   //free (latinStart);
00530   return result;
00531 }
00532 
00533 //-----------------------------------------------------------------------------
00534 const QString KIMAP::encodeRFC2231String( const QString &str )
00535 {
00536   if ( str.isEmpty () ) {
00537     return str;
00538   }
00539 
00540   signed char *latin = (signed char *) calloc (1, str.length () + 1);
00541   char *latin_us = (char *)latin;
00542   strcpy( latin_us, str.toLatin1 () );
00543   signed char *l = latin;
00544   char hexcode;
00545   int i;
00546   bool quote;
00547   while ( *l ) {
00548     if ( *l < 0 ) {
00549       break;
00550     }
00551     l++;
00552   }
00553   if ( !*l ) {
00554     free( latin );
00555     return str;
00556   }
00557   QByteArray result;
00558   l = latin;
00559   while ( *l ) {
00560     quote = *l < 0;
00561     for ( i = 0; i < 16; i++ ) {
00562       if ( *l == especials[i] ) {
00563         quote = true;
00564       }
00565     }
00566     if ( quote ) {
00567       result += '%';
00568       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00569       if ( hexcode >= 58 ) {
00570         hexcode += 7;
00571       }
00572       result += hexcode;
00573       hexcode = ( *l & 0x0F ) + 48;
00574       if ( hexcode >= 58 ) {
00575         hexcode += 7;
00576       }
00577       result += hexcode;
00578     } else {
00579       result += *l;
00580     }
00581     l++;
00582   }
00583   free( latin );
00584   return result;
00585 }
00586 
00587 //-----------------------------------------------------------------------------
00588 const QString KIMAP::decodeRFC2231String( const QString &str )
00589 {
00590   int p = str.indexOf ( '\'' );
00591 
00592   //see if it is an rfc string
00593   if ( p < 0 ) {
00594     return str;
00595   }
00596 
00597   int l = str.lastIndexOf( '\'' );
00598 
00599   //second is language
00600   if ( p >= l ) {
00601     return str;
00602   }
00603 
00604   //first is charset or empty
00605   QString charset = str.left ( p );
00606   QString st = str.mid ( l + 1 );
00607   QString language = str.mid ( p + 1, l - p - 1 );
00608 
00609   //kDebug(7116) << "Charset:" << charset << "Language:" << language;
00610 
00611   char ch, ch2;
00612   p = 0;
00613   while ( p < (int) st.length () ) {
00614     if ( st.at( p ) == 37 ) {
00615       ch = st.at( p + 1 ).toLatin1 () - 48;
00616       if ( ch > 16 ) {
00617         ch -= 7;
00618       }
00619       ch2 = st.at( p + 2 ).toLatin1 () - 48;
00620       if ( ch2 > 16 ) {
00621         ch2 -= 7;
00622       }
00623       st.replace( p, 1, ch * 16 + ch2 );
00624       st.remove ( p + 1, 2 );
00625     }
00626     p++;
00627   }
00628   return st;
00629 }

KIMAP Library

Skip menu "KIMAP Library"
  • Main Page
  • File List
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • kabc
  • kblog
  • kcal
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.5.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal