• Skip to content
  • Skip to link menu
KDE 4.0 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     This library is free software; you can redistribute it and/or
00008     modify it under the terms of the GNU Library General Public
00009     License as published by the Free Software Foundation; either
00010     version 2 of the License, or (at your option) any later version.
00011 
00012     This library is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015     Library General Public License for more details.
00016 
00017     You should have received a copy of the GNU Library General Public License
00018     along with this library; see the file COPYING.LIB.  If not, write to
00019     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020     Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "kmime_header_parsing.h"
00024 
00025 #include "kmime_codecs.h"
00026 #include "kmime_util.h"
00027 #include "kmime_dateformatter.h"
00028 #include "kmime_warning.h"
00029 
00030 #include <kglobal.h>
00031 #include <kcharsets.h>
00032 
00033 #include <QtCore/QTextCodec>
00034 #include <QtCore/QMap>
00035 #include <QtCore/QStringList>
00036 
00037 #include <ctype.h> // for isdigit
00038 #include <cassert>
00039 
00040 using namespace KMime;
00041 using namespace KMime::Types;
00042 
00043 namespace KMime {
00044 
00045 namespace Types {
00046 
00047 QString AddrSpec::asString() const
00048 {
00049   bool needsQuotes = false;
00050   QString result;
00051   for ( int i = 0 ; i < localPart.length() ; ++i ) {
00052     const char ch = localPart[i].toLatin1();
00053     if ( ch == '.' || isAText( ch ) ) {
00054       result += ch;
00055     } else {
00056       needsQuotes = true;
00057       if ( ch == '\\' || ch == '"' ) {
00058         result += '\\';
00059       }
00060       result += ch;
00061     }
00062   }
00063   if ( needsQuotes ) {
00064     return '"' + result + "\"@" + domain;
00065   } else {
00066     return result + '@' + domain;
00067   }
00068 }
00069 
00070 bool AddrSpec::isEmpty() const
00071 {
00072   return localPart.isEmpty() && domain.isEmpty();
00073 }
00074 
00075 QByteArray Mailbox::address() const
00076 {
00077   return mAddrSpec.asString().toLatin1();
00078 }
00079 
00080 AddrSpec Mailbox::addrSpec() const
00081 {
00082   return mAddrSpec;
00083 }
00084 
00085 QString Mailbox::name() const
00086 {
00087   return mDisplayName;
00088 }
00089 
00090 void Mailbox::setAddress( const AddrSpec &addr )
00091 {
00092   mAddrSpec = addr;
00093 }
00094 
00095 void Mailbox::setAddress( const QByteArray &addr )
00096 {
00097   const char *cursor = addr.constData();
00098   if ( !HeaderParsing::parseAngleAddr( cursor,
00099                                        cursor + addr.length(), mAddrSpec ) ) {
00100     if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
00101                                         mAddrSpec ) ) {
00102       kWarning() << "Invalid address";
00103       return;
00104     }
00105   }
00106 }
00107 
00108 void Mailbox::setName( const QString &name )
00109 {
00110   mDisplayName = name;
00111 }
00112 
00113 void Mailbox::setNameFrom7Bit( const QByteArray &name,
00114                                const QByteArray &defaultCharset )
00115 {
00116   QByteArray cs;
00117   mDisplayName = decodeRFC2047String( name, cs, defaultCharset, false );
00118 }
00119 
00120 bool Mailbox::hasAddress() const
00121 {
00122   return !mAddrSpec.isEmpty();
00123 }
00124 
00125 bool Mailbox::hasName() const
00126 {
00127   return !mDisplayName.isEmpty();
00128 }
00129 
00130 QString Mailbox::prettyAddress() const
00131 {
00132   if ( !hasName() ) {
00133     return address();
00134   }
00135   QString s = name();
00136   if ( hasAddress() ) {
00137     s += QLatin1String(" <") + address() + QLatin1Char('>');
00138   }
00139   return s;
00140 }
00141 
00142 void Mailbox::fromUnicodeString( const QString &s )
00143 {
00144   from7BitString( encodeRFC2047String( s, "utf-8", false ) );
00145 }
00146 
00147 void Mailbox::from7BitString( const QByteArray &s )
00148 {
00149   const char *cursor = s.constData();
00150   HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
00151 }
00152 
00153 QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
00154 {
00155   if ( !hasName() ) {
00156     return address();
00157   }
00158   QByteArray rv;
00159   if ( isUsAscii( name() ) ) {
00160     QByteArray tmp = name().toLatin1();
00161     addQuotes( tmp, false );
00162     rv += tmp;
00163   } else {
00164     rv += encodeRFC2047String( name(), encCharset, true );
00165   }
00166   if ( hasAddress() ) {
00167     rv += " <" + address() + '>';
00168   }
00169   return rv;
00170 }
00171 
00172 } // namespace Types
00173 
00174 namespace HeaderParsing {
00175 
00176 // parse the encoded-word (scursor points to after the initial '=')
00177 bool parseEncodedWord( const char* &scursor, const char * const send,
00178                        QString &result, QByteArray &language,
00179                        QByteArray &usedCS, const QByteArray &defaultCS,
00180                        bool forceCS )
00181 {
00182   // make sure the caller already did a bit of the work.
00183   assert( *(scursor-1) == '=' );
00184 
00185   //
00186   // STEP 1:
00187   // scan for the charset/language portion of the encoded-word
00188   //
00189 
00190   char ch = *scursor++;
00191 
00192   if ( ch != '?' ) {
00193     kDebug(5320) << "first";
00194     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00195     return false;
00196   }
00197 
00198   // remember start of charset (ie. just after the initial "=?") and
00199   // language (just after the first '*') fields:
00200   const char * charsetStart = scursor;
00201   const char * languageStart = 0;
00202 
00203   // find delimiting '?' (and the '*' separating charset and language
00204   // tags, if any):
00205   for ( ; scursor != send ; scursor++ ) {
00206     if ( *scursor == '?') {
00207       break;
00208     } else if ( *scursor == '*' && languageStart == 0 ) {
00209       languageStart = scursor + 1;
00210     }
00211   }
00212 
00213   // not found? can't be an encoded-word!
00214   if ( scursor == send || *scursor != '?' ) {
00215     kDebug(5320) << "second";
00216     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00217     return false;
00218   }
00219 
00220   // extract the language information, if any (if languageStart is 0,
00221   // language will be null, too):
00222   QByteArray maybeLanguage( languageStart, scursor - languageStart );
00223   // extract charset information (keep in mind: the size given to the
00224   // ctor is one off due to the \0 terminator):
00225   QByteArray maybeCharset( charsetStart,
00226                            ( languageStart ? languageStart - 1 : scursor ) - charsetStart );
00227 
00228   //
00229   // STEP 2:
00230   // scan for the encoding portion of the encoded-word
00231   //
00232 
00233   // remember start of encoding (just _after_ the second '?'):
00234   scursor++;
00235   const char * encodingStart = scursor;
00236 
00237   // find next '?' (ending the encoding tag):
00238   for ( ; scursor != send ; scursor++ ) {
00239     if ( *scursor == '?' ) {
00240       break;
00241     }
00242   }
00243 
00244   // not found? Can't be an encoded-word!
00245   if ( scursor == send || *scursor != '?' ) {
00246     kDebug(5320) << "third";
00247     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00248     return false;
00249   }
00250 
00251   // extract the encoding information:
00252   QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
00253 
00254   kDebug(5320) << "parseEncodedWord: found charset == \"" << maybeCharset
00255            << "\"; language == \"" << maybeLanguage
00256            << "\"; encoding == \"" << maybeEncoding << "\"";
00257 
00258   //
00259   // STEP 3:
00260   // scan for encoded-text portion of encoded-word
00261   //
00262 
00263   // remember start of encoded-text (just after the third '?'):
00264   scursor++;
00265   const char * encodedTextStart = scursor;
00266 
00267   // find next '?' (ending the encoded-text):
00268   for ( ; scursor != send ; scursor++ ) {
00269     if ( *scursor == '?' ) {
00270       break;
00271     }
00272   }
00273 
00274   // not found? Can't be an encoded-word!
00275   // ### maybe evaluate it nonetheless if the rest is OK?
00276   if ( scursor == send || *scursor != '?' ) {
00277     kDebug(5320) << "fourth";
00278     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00279     return false;
00280   }
00281   scursor++;
00282   // check for trailing '=':
00283   if ( scursor == send || *scursor != '=' ) {
00284     kDebug(5320) << "fifth";
00285     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00286     return false;
00287   }
00288   scursor++;
00289 
00290   // set end sentinel for encoded-text:
00291   const char * const encodedTextEnd = scursor - 2;
00292 
00293   //
00294   // STEP 4:
00295   // setup decoders for the transfer encoding and the charset
00296   //
00297 
00298   // try if there's a codec for the encoding found:
00299   Codec * codec = Codec::codecForName( maybeEncoding );
00300   if ( !codec ) {
00301     KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
00302     return false;
00303   }
00304 
00305   // get an instance of a corresponding decoder:
00306   Decoder * dec = codec->makeDecoder();
00307   assert( dec );
00308 
00309   // try if there's a (text)codec for the charset found:
00310   bool matchOK = false;
00311   QTextCodec *textCodec = 0;
00312   if ( forceCS || maybeCharset.isEmpty() ) {
00313     textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00314     usedCS = cachedCharset( defaultCS );
00315   } else {
00316     textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00317     if ( !matchOK ) {  //no suitable codec found => use default charset
00318       textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00319       usedCS = cachedCharset( defaultCS );
00320     } else {
00321       usedCS = cachedCharset( maybeCharset );
00322     }
00323   }
00324 
00325   if ( !matchOK || !textCodec ) {
00326     KMIME_WARN_UNKNOWN( Charset, maybeCharset );
00327     delete dec;
00328     return false;
00329   };
00330 
00331   kDebug(5320) << "mimeName(): \"" << textCodec->name() << "\"";
00332 
00333   // allocate a temporary buffer to store the 8bit text:
00334   int encodedTextLength = encodedTextEnd - encodedTextStart;
00335   QByteArray buffer;
00336   buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
00337   QByteArray::Iterator bit = buffer.begin();
00338   QByteArray::ConstIterator bend = buffer.end();
00339 
00340   //
00341   // STEP 5:
00342   // do the actual decoding
00343   //
00344 
00345   if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) ) {
00346     KMIME_WARN << codec->name() << "codec lies about it's maxDecodedSizeFor("
00347                << encodedTextLength << ")\nresult may be truncated";
00348   }
00349 
00350   result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
00351 
00352   kDebug(5320) << "result now: \"" << result << "\"";
00353   // cleanup:
00354   delete dec;
00355   language = maybeLanguage;
00356 
00357   return true;
00358 }
00359 
00360 static inline void eatWhiteSpace( const char* &scursor, const char * const send )
00361 {
00362   while ( scursor != send &&
00363           ( *scursor == ' ' || *scursor == '\n' ||
00364             *scursor == '\t' || *scursor == '\r' ) )
00365     scursor++;
00366 }
00367 
00368 bool parseAtom( const char * &scursor, const char * const send,
00369                 QString &result, bool allow8Bit )
00370 {
00371   QPair<const char*,int> maybeResult;
00372 
00373   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00374     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00375     return true;
00376   }
00377 
00378   return false;
00379 }
00380 
00381 bool parseAtom( const char * &scursor, const char * const send,
00382                 QPair<const char*,int> &result, bool allow8Bit )
00383 {
00384   bool success = false;
00385   const char *start = scursor;
00386 
00387   while ( scursor != send ) {
00388     signed char ch = *scursor++;
00389     if ( ch > 0 && isAText( ch ) ) {
00390       // AText: OK
00391       success = true;
00392     } else if ( allow8Bit && ch < 0 ) {
00393       // 8bit char: not OK, but be tolerant.
00394       KMIME_WARN_8BIT( ch );
00395       success = true;
00396     } else {
00397       // CTL or special - marking the end of the atom:
00398       // re-set sursor to point to the offending
00399       // char and return:
00400       scursor--;
00401       break;
00402     }
00403   }
00404   result.first = start;
00405   result.second = scursor - start;
00406   return success;
00407 }
00408 
00409 bool parseToken( const char * &scursor, const char * const send,
00410                  QString &result, bool allow8Bit )
00411 {
00412   QPair<const char*,int> maybeResult;
00413 
00414   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00415     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00416     return true;
00417   }
00418 
00419   return false;
00420 }
00421 
00422 bool parseToken( const char * &scursor, const char * const send,
00423                  QPair<const char*,int> &result, bool allow8Bit )
00424 {
00425   bool success = false;
00426   const char * start = scursor;
00427 
00428   while ( scursor != send ) {
00429     signed char ch = *scursor++;
00430     if ( ch > 0 && isTText( ch ) ) {
00431       // TText: OK
00432       success = true;
00433     } else if ( allow8Bit && ch < 0 ) {
00434       // 8bit char: not OK, but be tolerant.
00435       KMIME_WARN_8BIT( ch );
00436       success = true;
00437     } else {
00438       // CTL or tspecial - marking the end of the atom:
00439       // re-set sursor to point to the offending
00440       // char and return:
00441       scursor--;
00442       break;
00443     }
00444   }
00445   result.first = start;
00446   result.second = scursor - start;
00447   return success;
00448 }
00449 
00450 #define READ_ch_OR_FAIL if ( scursor == send ) {        \
00451     KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
00452     return false;                                       \
00453   } else {                                              \
00454     ch = *scursor++;                                    \
00455   }
00456 
00457 // known issues:
00458 //
00459 // - doesn't handle quoted CRLF
00460 
00461 bool parseGenericQuotedString( const char* &scursor, const char * const send,
00462                                QString &result, bool isCRLF,
00463                                const char openChar, const char closeChar )
00464 {
00465   char ch;
00466   // We are in a quoted-string or domain-literal or comment and the
00467   // cursor points to the first char after the openChar.
00468   // We will apply unfolding and quoted-pair removal.
00469   // We return when we either encounter the end or unescaped openChar
00470   // or closeChar.
00471 
00472   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00473 
00474   while ( scursor != send ) {
00475     ch = *scursor++;
00476 
00477     if ( ch == closeChar || ch == openChar ) {
00478       // end of quoted-string or another opening char:
00479       // let caller decide what to do.
00480       return true;
00481     }
00482 
00483     switch( ch ) {
00484     case '\\':      // quoted-pair
00485       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00486       READ_ch_OR_FAIL;
00487       KMIME_WARN_IF_8BIT( ch );
00488       result += QChar( ch );
00489       break;
00490     case '\r':
00491       // ###
00492       // The case of lonely '\r' is easy to solve, as they're
00493       // not part of Unix Line-ending conventions.
00494       // But I see a problem if we are given Unix-native
00495       // line-ending-mails, where we cannot determine anymore
00496       // whether a given '\n' was part of a CRLF or was occurring
00497       // on it's own.
00498       READ_ch_OR_FAIL;
00499       if ( ch != '\n' ) {
00500         // CR on it's own...
00501         KMIME_WARN_LONE( CR );
00502         result += QChar('\r');
00503         scursor--; // points to after the '\r' again
00504       } else {
00505         // CRLF encountered.
00506         // lookahead: check for folding
00507         READ_ch_OR_FAIL;
00508         if ( ch == ' ' || ch == '\t' ) {
00509           // correct folding;
00510           // position cursor behind the CRLF WSP (unfolding)
00511           // and add the WSP to the result
00512           result += QChar( ch );
00513         } else {
00514           // this is the "shouldn't happen"-case. There is a CRLF
00515           // inside a quoted-string without it being part of FWS.
00516           // We take it verbatim.
00517           KMIME_WARN_NON_FOLDING( CRLF );
00518           result += "\r\n";
00519           // the cursor is decremented again, so's we need not
00520           // duplicate the whole switch here. "ch" could've been
00521           // everything (incl. openChar or closeChar).
00522           scursor--;
00523         }
00524       }
00525       break;
00526     case '\n':
00527       // Note: CRLF has been handled above already!
00528       // ### LF needs special treatment, depending on whether isCRLF
00529       // is true (we can be sure a lonely '\n' was meant this way) or
00530       // false ('\n' alone could have meant LF or CRLF in the original
00531       // message. This parser assumes CRLF iff the LF is followed by
00532       // either WSP (folding) or NULL (premature end of quoted-string;
00533       // Should be fixed, since NULL is allowed as per rfc822).
00534       READ_ch_OR_FAIL;
00535       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00536         // folding
00537         // correct folding
00538         result += QChar( ch );
00539       } else {
00540         // non-folding
00541         KMIME_WARN_LONE( LF );
00542         result += QChar('\n');
00543         // pos is decremented, so's we need not duplicate the whole
00544         // switch here. ch could've been everything (incl. <">, "\").
00545         scursor--;
00546       }
00547       break;
00548     default:
00549       KMIME_WARN_IF_8BIT( ch );
00550       result += QChar( ch );
00551     }
00552   }
00553 
00554   return false;
00555 }
00556 
00557 // known issues:
00558 //
00559 // - doesn't handle encoded-word inside comments.
00560 
00561 bool parseComment( const char* &scursor, const char * const send,
00562                    QString &result, bool isCRLF, bool reallySave )
00563 {
00564   int commentNestingDepth = 1;
00565   const char *afterLastClosingParenPos = 0;
00566   QString maybeCmnt;
00567   const char *oldscursor = scursor;
00568 
00569   assert( *(scursor-1) == '(' );
00570 
00571   while ( commentNestingDepth ) {
00572     QString cmntPart;
00573     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00574       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00575       // see the kdoc for above function for the possible conditions
00576       // we have to check:
00577       switch ( *(scursor-1) ) {
00578       case ')':
00579         if ( reallySave ) {
00580           // add the chunk that's now surely inside the comment.
00581           result += maybeCmnt;
00582           result += cmntPart;
00583           if ( commentNestingDepth > 1 ) {
00584             // don't add the outermost ')'...
00585             result += QChar(')');
00586           }
00587           maybeCmnt.clear();
00588         }
00589         afterLastClosingParenPos = scursor;
00590         --commentNestingDepth;
00591         break;
00592       case '(':
00593         if ( reallySave ) {
00594           // don't add to "result" yet, because we might find that we
00595           // are already outside the (broken) comment...
00596           maybeCmnt += cmntPart;
00597           maybeCmnt += QChar('(');
00598         }
00599         ++commentNestingDepth;
00600         break;
00601       default: assert( 0 );
00602       } // switch
00603     } else {
00604       // !parseGenericQuotedString, ie. premature end
00605       if ( afterLastClosingParenPos ) {
00606         scursor = afterLastClosingParenPos;
00607       } else {
00608         scursor = oldscursor;
00609       }
00610       return false;
00611     }
00612   } // while
00613 
00614   return true;
00615 }
00616 
00617 // known issues: none.
00618 
00619 bool parsePhrase( const char* &scursor, const char * const send,
00620                   QString &result, bool isCRLF )
00621 {
00622   enum {
00623     None, Phrase, Atom, EncodedWord, QuotedString
00624   } found = None;
00625 
00626   QString tmp;
00627   QByteArray lang, charset;
00628   const char *successfullyParsed = 0;
00629   // only used by the encoded-word branch
00630   const char *oldscursor;
00631   // used to suppress whitespace between adjacent encoded-words
00632   // (rfc2047, 6.2):
00633   bool lastWasEncodedWord = false;
00634 
00635   while ( scursor != send ) {
00636     char ch = *scursor++;
00637     switch ( ch ) {
00638     case '.': // broken, but allow for intorop's sake
00639       if ( found == None ) {
00640         --scursor;
00641         return false;
00642       } else {
00643         if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
00644           result += ". ";
00645         } else {
00646           result += '.';
00647         }
00648         successfullyParsed = scursor;
00649       }
00650       break;
00651     case '"': // quoted-string
00652       tmp.clear();
00653       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00654         successfullyParsed = scursor;
00655         assert( *(scursor-1) == '"' );
00656         switch ( found ) {
00657         case None:
00658           found = QuotedString;
00659           break;
00660         case Phrase:
00661         case Atom:
00662         case EncodedWord:
00663         case QuotedString:
00664           found = Phrase;
00665           result += QChar(' '); // rfc822, 3.4.4
00666           break;
00667         default:
00668           assert( 0 );
00669         }
00670         lastWasEncodedWord = false;
00671         result += tmp;
00672       } else {
00673         // premature end of quoted string.
00674         // What to do? Return leading '"' as special? Return as quoted-string?
00675         // We do the latter if we already found something, else signal failure.
00676         if ( found == None ) {
00677           return false;
00678         } else {
00679           result += QChar(' '); // rfc822, 3.4.4
00680           result += tmp;
00681           return true;
00682         }
00683       }
00684       break;
00685     case '(': // comment
00686       // parse it, but ignore content:
00687       tmp.clear();
00688       if ( parseComment( scursor, send, tmp, isCRLF,
00689                          false /*don't bother with the content*/ ) ) {
00690         successfullyParsed = scursor;
00691         lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00692       } else {
00693         if ( found == None ) {
00694           return false;
00695         } else {
00696           scursor = successfullyParsed;
00697           return true;
00698         }
00699       }
00700       break;
00701     case '=': // encoded-word
00702       tmp.clear();
00703       oldscursor = scursor;
00704       lang.clear();
00705       charset.clear();
00706       if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
00707         successfullyParsed = scursor;
00708         switch ( found ) {
00709         case None:
00710           found = EncodedWord;
00711           break;
00712         case Phrase:
00713         case EncodedWord:
00714         case Atom:
00715         case QuotedString:
00716           if ( !lastWasEncodedWord ) {
00717             result += QChar(' '); // rfc822, 3.4.4
00718           }
00719           found = Phrase;
00720           break;
00721         default: assert( 0 );
00722         }
00723         lastWasEncodedWord = true;
00724         result += tmp;
00725         break;
00726       } else {
00727         // parse as atom:
00728         scursor = oldscursor;
00729       }
00730       // fall though...
00731 
00732     default: //atom
00733       tmp.clear();
00734       scursor--;
00735       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00736         successfullyParsed = scursor;
00737         switch ( found ) {
00738         case None:
00739           found = Atom;
00740           break;
00741         case Phrase:
00742         case Atom:
00743         case EncodedWord:
00744         case QuotedString:
00745           found = Phrase;
00746           result += QChar(' '); // rfc822, 3.4.4
00747           break;
00748         default:
00749           assert( 0 );
00750         }
00751         lastWasEncodedWord = false;
00752         result += tmp;
00753       } else {
00754         if ( found == None ) {
00755           return false;
00756         } else {
00757           scursor = successfullyParsed;
00758           return true;
00759         }
00760       }
00761     }
00762     eatWhiteSpace( scursor, send );
00763   }
00764 
00765   return found != None;
00766 }
00767 
00768 bool parseDotAtom( const char* &scursor, const char * const send,
00769                    QString &result, bool isCRLF )
00770 {
00771   eatCFWS( scursor, send, isCRLF );
00772 
00773   // always points to just after the last atom parsed:
00774   const char *successfullyParsed;
00775 
00776   QString tmp;
00777   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
00778     return false;
00779   }
00780   result += tmp;
00781   successfullyParsed = scursor;
00782 
00783   while ( scursor != send ) {
00784 
00785     // end of header or no '.' -> return
00786     if ( scursor == send || *scursor != '.' ) {
00787       return true;
00788     }
00789     scursor++; // eat '.'
00790 
00791     if ( scursor == send || !isAText( *scursor ) ) {
00792       // end of header or no AText, but this time following a '.'!:
00793       // reset cursor to just after last successfully parsed char and
00794       // return:
00795       scursor = successfullyParsed;
00796       return true;
00797     }
00798 
00799     // try to parse the next atom:
00800     QString maybeAtom;
00801     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00802       scursor = successfullyParsed;
00803       return true;
00804     }
00805 
00806     result += QChar('.');
00807     result += maybeAtom;
00808     successfullyParsed = scursor;
00809   }
00810 
00811   scursor = successfullyParsed;
00812   return true;
00813 }
00814 
00815 void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
00816 {
00817   QString dummy;
00818 
00819   while ( scursor != send ) {
00820     const char *oldscursor = scursor;
00821 
00822     char ch = *scursor++;
00823 
00824     switch( ch ) {
00825     case ' ':
00826     case '\t': // whitespace
00827     case '\r':
00828     case '\n': // folding
00829       continue;
00830 
00831     case '(': // comment
00832       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
00833         continue;
00834       }
00835       scursor = oldscursor;
00836       return;
00837 
00838     default:
00839       scursor = oldscursor;
00840       return;
00841     }
00842   }
00843 }
00844 
00845 bool parseDomain( const char* &scursor, const char * const send,
00846                   QString &result, bool isCRLF )
00847 {
00848   eatCFWS( scursor, send, isCRLF );
00849   if ( scursor == send ) {
00850     return false;
00851   }
00852 
00853   // domain := dot-atom / domain-literal / atom *("." atom)
00854   //
00855   // equivalent to:
00856   // domain = dot-atom / domain-literal,
00857   // since parseDotAtom does allow CFWS between atoms and dots
00858 
00859   if ( *scursor == '[' ) {
00860     // domain-literal:
00861     QString maybeDomainLiteral;
00862     // eat '[':
00863     scursor++;
00864     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00865                                       isCRLF, '[', ']' ) ) {
00866       if ( scursor == send ) {
00867         // end of header: check for closing ']':
00868         if ( *(scursor-1) == ']' ) {
00869           // OK, last char was ']':
00870           result = maybeDomainLiteral;
00871           return true;
00872         } else {
00873           // not OK, domain-literal wasn't closed:
00874           return false;
00875         }
00876       }
00877       // we hit openChar in parseGenericQuotedString.
00878       // include it in maybeDomainLiteral and keep on parsing:
00879       if ( *(scursor-1) == '[' ) {
00880         maybeDomainLiteral += QChar('[');
00881         continue;
00882       }
00883       // OK, real end of domain-literal:
00884       result = maybeDomainLiteral;
00885       return true;
00886     }
00887   } else {
00888     // dot-atom:
00889     QString maybeDotAtom;
00890     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00891       result = maybeDotAtom;
00892       return true;
00893     }
00894   }
00895   return false;
00896 }
00897 
00898 bool parseObsRoute( const char* &scursor, const char* const send,
00899                     QStringList &result, bool isCRLF, bool save )
00900 {
00901   while ( scursor != send ) {
00902     eatCFWS( scursor, send, isCRLF );
00903     if ( scursor == send ) {
00904       return false;
00905     }
00906 
00907     // empty entry:
00908     if ( *scursor == ',' ) {
00909       scursor++;
00910       if ( save ) {
00911         result.append( QString() );
00912       }
00913       continue;
00914     }
00915 
00916     // empty entry ending the list:
00917     if ( *scursor == ':' ) {
00918       scursor++;
00919       if ( save ) {
00920         result.append( QString() );
00921       }
00922       return true;
00923     }
00924 
00925     // each non-empty entry must begin with '@':
00926     if ( *scursor != '@' ) {
00927       return false;
00928     } else {
00929       scursor++;
00930     }
00931 
00932     QString maybeDomain;
00933     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
00934       return false;
00935     }
00936     if ( save ) {
00937       result.append( maybeDomain );
00938     }
00939 
00940     // eat the following (optional) comma:
00941     eatCFWS( scursor, send, isCRLF );
00942     if ( scursor == send ) {
00943       return false;
00944     }
00945     if ( *scursor == ':' ) {
00946       scursor++;
00947       return true;
00948     }
00949     if ( *scursor == ',' ) {
00950       scursor++;
00951     }
00952   }
00953 
00954   return false;
00955 }
00956 
00957 bool parseAddrSpec( const char* &scursor, const char * const send,
00958                     AddrSpec &result, bool isCRLF )
00959 {
00960   //
00961   // STEP 1:
00962   // local-part := dot-atom / quoted-string / word *("." word)
00963   //
00964   // this is equivalent to:
00965   // local-part := word *("." word)
00966 
00967   QString maybeLocalPart;
00968   QString tmp;
00969 
00970   while ( scursor != send ) {
00971     // first, eat any whitespace
00972     eatCFWS( scursor, send, isCRLF );
00973 
00974     char ch = *scursor++;
00975     switch ( ch ) {
00976     case '.': // dot
00977       maybeLocalPart += QChar('.');
00978       break;
00979 
00980     case '@':
00981       goto SAW_AT_SIGN;
00982       break;
00983 
00984     case '"': // quoted-string
00985       tmp.clear();
00986       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00987         maybeLocalPart += tmp;
00988       } else {
00989         return false;
00990       }
00991       break;
00992 
00993     default: // atom
00994       scursor--; // re-set scursor to point to ch again
00995       tmp.clear();
00996       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
00997         maybeLocalPart += tmp;
00998       } else {
00999         return false; // parseAtom can only fail if the first char is non-atext.
01000       }
01001       break;
01002     }
01003   }
01004 
01005   return false;
01006 
01007   //
01008   // STEP 2:
01009   // domain
01010   //
01011 
01012 SAW_AT_SIGN:
01013 
01014   assert( *(scursor-1) == '@' );
01015 
01016   QString maybeDomain;
01017   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
01018     return false;
01019   }
01020 
01021   result.localPart = maybeLocalPart;
01022   result.domain = maybeDomain;
01023 
01024   return true;
01025 }
01026 
01027 bool parseAngleAddr( const char* &scursor, const char * const send,
01028                      AddrSpec &result, bool isCRLF )
01029 {
01030   // first, we need an opening angle bracket:
01031   eatCFWS( scursor, send, isCRLF );
01032   if ( scursor == send || *scursor != '<' ) {
01033     return false;
01034   }
01035   scursor++; // eat '<'
01036 
01037   eatCFWS( scursor, send, isCRLF );
01038   if ( scursor == send ) {
01039     return false;
01040   }
01041 
01042   if ( *scursor == '@' || *scursor == ',' ) {
01043     // obs-route: parse, but ignore:
01044     KMIME_WARN << "obsolete source route found! ignoring.";
01045     QStringList dummy;
01046     if ( !parseObsRoute( scursor, send, dummy,
01047                          isCRLF, false /* don't save */ ) ) {
01048       return false;
01049     }
01050     // angle-addr isn't complete until after the '>':
01051     if ( scursor == send ) {
01052       return false;
01053     }
01054   }
01055 
01056   // parse addr-spec:
01057   AddrSpec maybeAddrSpec;
01058   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01059     return false;
01060   }
01061 
01062   eatCFWS( scursor, send, isCRLF );
01063   if ( scursor == send || *scursor != '>' ) {
01064     return false;
01065   }
01066   scursor++;
01067 
01068   result = maybeAddrSpec;
01069   return true;
01070 
01071 }
01072 
01073 bool parseMailbox( const char* &scursor, const char * const send,
01074                    Mailbox &result, bool isCRLF )
01075 {
01076   eatCFWS( scursor, send, isCRLF );
01077   if ( scursor == send ) {
01078     return false;
01079   }
01080 
01081   AddrSpec maybeAddrSpec;
01082   QString maybeDisplayName;
01083 
01084   // first, try if it's a vanilla addr-spec:
01085   const char * oldscursor = scursor;
01086   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01087     result.setAddress( maybeAddrSpec );
01088     // check for the obsolete form of display-name (as comment):
01089     eatWhiteSpace( scursor, send );
01090     if ( scursor != send && *scursor == '(' ) {
01091       scursor++;
01092       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01093         return false;
01094       }
01095     }
01096     result.setNameFrom7Bit( maybeDisplayName.toLatin1() );
01097     return true;
01098   }
01099   scursor = oldscursor;
01100 
01101   // second, see if there's a display-name:
01102   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01103     // failed: reset cursor, note absent display-name
01104     maybeDisplayName.clear();
01105     scursor = oldscursor;
01106   } else {
01107     // succeeded: eat CFWS
01108     eatCFWS( scursor, send, isCRLF );
01109     if ( scursor == send ) {
01110       return false;
01111     }
01112   }
01113 
01114   // third, parse the angle-addr:
01115   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
01116     return false;
01117   }
01118 
01119   if ( maybeDisplayName.isNull() ) {
01120     // check for the obsolete form of display-name (as comment):
01121     eatWhiteSpace( scursor, send );
01122     if ( scursor != send && *scursor == '(' ) {
01123       scursor++;
01124       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01125         return false;
01126       }
01127     }
01128   }
01129 
01130   result.setName( maybeDisplayName );
01131   result.setAddress( maybeAddrSpec );
01132   return true;
01133 }
01134 
01135 bool parseGroup( const char* &scursor, const char * const send,
01136                  Address &result, bool isCRLF )
01137 {
01138   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
01139   //
01140   // equivalent to:
01141   // group   := display-name ":" [ obs-mbox-list ] ";"
01142 
01143   eatCFWS( scursor, send, isCRLF );
01144   if ( scursor == send ) {
01145     return false;
01146   }
01147 
01148   // get display-name:
01149   QString maybeDisplayName;
01150   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01151     return false;
01152   }
01153 
01154   // get ":":
01155   eatCFWS( scursor, send, isCRLF );
01156   if ( scursor == send || *scursor != ':' ) {
01157     return false;
01158   }
01159 
01160   result.displayName = maybeDisplayName;
01161 
01162   // get obs-mbox-list (may contain empty entries):
01163   scursor++;
01164   while ( scursor != send ) {
01165     eatCFWS( scursor, send, isCRLF );
01166     if ( scursor == send ) {
01167       return false;
01168     }
01169 
01170     // empty entry:
01171     if ( *scursor == ',' ) {
01172       scursor++;
01173       continue;
01174     }
01175 
01176     // empty entry ending the list:
01177     if ( *scursor == ';' ) {
01178       scursor++;
01179       return true;
01180     }
01181 
01182     Mailbox maybeMailbox;
01183     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01184       return false;
01185     }
01186     result.mailboxList.append( maybeMailbox );
01187 
01188     eatCFWS( scursor, send, isCRLF );
01189     // premature end:
01190     if ( scursor == send ) {
01191       return false;
01192     }
01193     // regular end of the list:
01194     if ( *scursor == ';' ) {
01195       scursor++;
01196       return true;
01197     }
01198     // eat regular list entry separator:
01199     if ( *scursor == ',' ) {
01200       scursor++;
01201     }
01202   }
01203   return false;
01204 }
01205 
01206 bool parseAddress( const char* &scursor, const char * const send,
01207                    Address &result, bool isCRLF )
01208 {
01209   // address       := mailbox / group
01210 
01211   eatCFWS( scursor, send, isCRLF );
01212   if ( scursor == send ) {
01213     return false;
01214   }
01215 
01216   // first try if it's a single mailbox:
01217   Mailbox maybeMailbox;
01218   const char * oldscursor = scursor;
01219   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01220     // yes, it is:
01221     result.displayName.clear();
01222     result.mailboxList.append( maybeMailbox );
01223     return true;
01224   }
01225   scursor = oldscursor;
01226 
01227   Address maybeAddress;
01228 
01229   // no, it's not a single mailbox. Try if it's a group:
01230   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
01231     return false;
01232   }
01233 
01234   result = maybeAddress;
01235   return true;
01236 }
01237 
01238 bool parseAddressList( const char* &scursor, const char * const send,
01239                        AddressList &result, bool isCRLF )
01240 {
01241   while ( scursor != send ) {
01242     eatCFWS( scursor, send, isCRLF );
01243     // end of header: this is OK.
01244     if ( scursor == send ) {
01245       return true;
01246     }
01247     // empty entry: ignore:
01248     if ( *scursor == ',' ) {
01249       scursor++;
01250       continue;
01251     }
01252     // broken clients might use ';' as list delimiter, accept that as well
01253     if ( *scursor == ';' ) {
01254       scursor++;
01255       continue;
01256     }
01257 
01258     // parse one entry
01259     Address maybeAddress;
01260     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
01261       return false;
01262     }
01263     result.append( maybeAddress );
01264 
01265     eatCFWS( scursor, send, isCRLF );
01266     // end of header: this is OK.
01267     if ( scursor == send ) {
01268       return true;
01269     }
01270     // comma separating entries: eat it.
01271     if ( *scursor == ',' ) {
01272       scursor++;
01273     }
01274   }
01275   return true;
01276 }
01277 
01278 static QString asterisk = QString::fromLatin1( "*0*", 1 );
01279 static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
01280 //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );
01281 
01282 bool parseParameter( const char* &scursor, const char * const send,
01283                      QPair<QString,QStringOrQPair> &result, bool isCRLF )
01284 {
01285   // parameter = regular-parameter / extended-parameter
01286   // regular-parameter = regular-parameter-name "=" value
01287   // extended-parameter =
01288   // value = token / quoted-string
01289   //
01290   // note that rfc2231 handling is out of the scope of this function.
01291   // Therefore we return the attribute as QString and the value as
01292   // (start,length) tupel if we see that the value is encoded
01293   // (trailing asterisk), for parseParameterList to decode...
01294 
01295   eatCFWS( scursor, send, isCRLF );
01296   if ( scursor == send ) {
01297     return false;
01298   }
01299 
01300   //
01301   // parse the parameter name:
01302   //
01303   QString maybeAttribute;
01304   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
01305     return false;
01306   }
01307 
01308   eatCFWS( scursor, send, isCRLF );
01309   // premature end: not OK (haven't seen '=' yet).
01310   if ( scursor == send || *scursor != '=' ) {
01311     return false;
01312   }
01313   scursor++; // eat '='
01314 
01315   eatCFWS( scursor, send, isCRLF );
01316   if ( scursor == send ) {
01317     // don't choke on attribute=, meaning the value was omitted:
01318     if ( maybeAttribute.endsWith( asterisk ) ) {
01319       KMIME_WARN << "attribute ends with \"*\", but value is empty!"
01320         "Chopping away \"*\".";
01321       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01322     }
01323     result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01324     return true;
01325   }
01326 
01327   const char * oldscursor = scursor;
01328 
01329   //
01330   // parse the parameter value:
01331   //
01332   QStringOrQPair maybeValue;
01333   if ( *scursor == '"' ) {
01334     // value is a quoted-string:
01335     scursor++;
01336     if ( maybeAttribute.endsWith( asterisk ) ) {
01337       // attributes ending with "*" designate extended-parameters,
01338       // which cannot have quoted-strings as values. So we remove the
01339       // trailing "*" to not confuse upper layers.
01340       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
01341         "Chopping away \"*\".";
01342       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01343     }
01344 
01345     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01346       scursor = oldscursor;
01347       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01348       return false; // this case needs further processing by upper layers!!
01349     }
01350   } else {
01351     // value is a token:
01352     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01353       scursor = oldscursor;
01354       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01355       return false; // this case needs further processing by upper layers!!
01356     }
01357   }
01358 
01359   result = qMakePair( maybeAttribute.toLower(), maybeValue );
01360   return true;
01361 }
01362 
01363 bool parseRawParameterList( const char* &scursor, const char * const send,
01364                             QMap<QString,QStringOrQPair> &result,
01365                             bool isCRLF )
01366 {
01367   // we use parseParameter() consecutively to obtain a map of raw
01368   // attributes to raw values. "Raw" here means that we don't do
01369   // rfc2231 decoding and concatenation. This is left to
01370   // parseParameterList(), which will call this function.
01371   //
01372   // The main reason for making this chunk of code a separate
01373   // (private) method is that we can deal with broken parameters
01374   // _here_ and leave the rfc2231 handling solely to
01375   // parseParameterList(), which will still be enough work.
01376 
01377   while ( scursor != send ) {
01378     eatCFWS( scursor, send, isCRLF );
01379     // empty entry ending the list: OK.
01380     if ( scursor == send ) {
01381       return true;
01382     }
01383     // empty list entry: ignore.
01384     if ( *scursor == ';' ) {
01385       scursor++;
01386       continue;
01387     }
01388 
01389     QPair<QString,QStringOrQPair> maybeParameter;
01390     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01391       // we need to do a bit of work if the attribute is not
01392       // NULL. These are the cases marked with "needs further
01393       // processing" in parseParameter(). Specifically, parsing of the
01394       // token or the quoted-string, which should represent the value,
01395       // failed. We take the easy way out and simply search for the
01396       // next ';' to start parsing again. (Another option would be to
01397       // take the text between '=' and ';' as value)
01398       if ( maybeParameter.first.isNull() ) {
01399         return false;
01400       }
01401       while ( scursor != send ) {
01402         if ( *scursor++ == ';' ) {
01403           goto IS_SEMICOLON;
01404         }
01405       }
01406       // scursor == send case: end of list.
01407       return true;
01408     IS_SEMICOLON:
01409       // *scursor == ';' case: parse next entry.
01410       continue;
01411     }
01412     // successful parsing brings us here:
01413     result.insert( maybeParameter.first, maybeParameter.second );
01414 
01415     eatCFWS( scursor, send, isCRLF );
01416     // end of header: ends list.
01417     if ( scursor == send ) {
01418       return true;
01419     }
01420     // regular separator: eat it.
01421     if ( *scursor == ';' ) {
01422       scursor++;
01423     }
01424   }
01425   return true;
01426 }
01427 
01428 static void decodeRFC2231Value( Codec* &rfc2231Codec,
01429                                 QTextCodec* &textcodec,
01430                                 bool isContinuation, QString &value,
01431                                 QPair<const char*,int> &source )
01432 {
01433   //
01434   // parse the raw value into (charset,language,text):
01435   //
01436 
01437   const char * decBegin = source.first;
01438   const char * decCursor = decBegin;
01439   const char * decEnd = decCursor + source.second;
01440 
01441   if ( !isContinuation ) {
01442     // find the first single quote
01443     while ( decCursor != decEnd ) {
01444       if ( *decCursor == '\'' ) {
01445         break;
01446       } else {
01447         decCursor++;
01448       }
01449     }
01450 
01451     if ( decCursor == decEnd ) {
01452       // there wasn't a single single quote at all!
01453       // take the whole value to be in latin-1:
01454       KMIME_WARN << "No charset in extended-initial-value."
01455         "Assuming \"iso-8859-1\".";
01456       value += QString::fromLatin1( decBegin, source.second );
01457       return;
01458     }
01459 
01460     QByteArray charset( decBegin, decCursor - decBegin );
01461 
01462     const char * oldDecCursor = ++decCursor;
01463     // find the second single quote (we ignore the language tag):
01464     while ( decCursor != decEnd ) {
01465       if ( *decCursor == '\'' ) {
01466         break;
01467       } else {
01468         decCursor++;
01469       }
01470     }
01471     if ( decCursor == decEnd ) {
01472       KMIME_WARN << "No language in extended-initial-value."
01473         "Trying to recover.";
01474       decCursor = oldDecCursor;
01475     } else {
01476       decCursor++;
01477     }
01478 
01479     // decCursor now points to the start of the
01480     // "extended-other-values":
01481 
01482     //
01483     // get the decoders:
01484     //
01485 
01486     bool matchOK = false;
01487     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01488     if ( !matchOK ) {
01489       textcodec = 0;
01490       KMIME_WARN_UNKNOWN( Charset, charset );
01491     }
01492   }
01493 
01494   if ( !rfc2231Codec ) {
01495     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01496     assert( rfc2231Codec );
01497   }
01498 
01499   if ( !textcodec ) {
01500     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01501     return;
01502   }
01503 
01504   Decoder * dec = rfc2231Codec->makeDecoder();
01505   assert( dec );
01506 
01507   //
01508   // do the decoding:
01509   //
01510 
01511   QByteArray buffer;
01512   buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01513   QByteArray::Iterator bit = buffer.begin();
01514   QByteArray::ConstIterator bend = buffer.end();
01515 
01516   if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
01517     KMIME_WARN << rfc2231Codec->name()
01518                << "codec lies about it's maxDecodedSizeFor()" << endl
01519                << "result may be truncated";
01520   }
01521 
01522   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01523 
01524   kDebug(5320) << "value now: \"" << value << "\"";
01525   // cleanup:
01526   delete dec;
01527 }
01528 
01529 // known issues:
01530 //  - permutes rfc2231 continuations when the total number of parts
01531 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01532 
01533 bool parseParameterList( const char* &scursor, const char * const send,
01534                          QMap<QString,QString> &result, bool isCRLF )
01535 {
01536   // parse the list into raw attribute-value pairs:
01537   QMap<QString,QStringOrQPair> rawParameterList;
01538   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
01539     return false;
01540   }
01541 
01542   if ( rawParameterList.isEmpty() ) {
01543     return true;
01544   }
01545 
01546   // decode rfc 2231 continuations and alternate charset encoding:
01547 
01548   // NOTE: this code assumes that what QMapIterator delivers is sorted
01549   // by the key!
01550 
01551   Codec * rfc2231Codec = 0;
01552   QTextCodec * textcodec = 0;
01553   QString attribute;
01554   QString value;
01555   enum Modes {
01556     NoMode = 0x0, Continued = 0x1, Encoded = 0x2
01557   } mode;
01558 
01559   QMap<QString,QStringOrQPair>::Iterator it, end = rawParameterList.end();
01560 
01561   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01562     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01563       //
01564       // new attribute:
01565       //
01566 
01567       // store the last attribute/value pair in the result map now:
01568       if ( !attribute.isNull() ) {
01569         result.insert( attribute, value );
01570       }
01571       // and extract the information from the new raw attribute:
01572       value.clear();
01573       attribute = it.key();
01574       mode = NoMode;
01575       // is the value encoded?
01576       if ( attribute.endsWith( asterisk ) ) {
01577         attribute.truncate( attribute.length() - 1 );
01578         mode = (Modes) ((int) mode | Encoded);
01579       }
01580       // is the value continued?
01581       if ( attribute.endsWith( asteriskZero ) ) {
01582         attribute.truncate( attribute.length() - 2 );
01583         mode = (Modes) ((int) mode | Continued);
01584       }
01585       //
01586       // decode if necessary:
01587       //
01588       if ( mode & Encoded ) {
01589         decodeRFC2231Value( rfc2231Codec, textcodec,
01590                             false, /* isn't continuation */
01591                             value, (*it).qpair );
01592       } else {
01593         // not encoded.
01594         if ( (*it).qpair.first ) {
01595           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01596         } else {
01597           value += (*it).qstring;
01598         }
01599       }
01600 
01601       //
01602       // shortcut-processing when the value isn't encoded:
01603       //
01604 
01605       if ( !(mode & Continued) ) {
01606         // save result already:
01607         result.insert( attribute, value );
01608         // force begin of a new attribute:
01609         attribute.clear();
01610       }
01611     } else { // it.key().startsWith( attribute )
01612       //
01613       // continuation
01614       //
01615 
01616       // ignore the section and trust QMap to have sorted the keys:
01617       if ( it.key().endsWith( asterisk ) ) {
01618         // encoded
01619         decodeRFC2231Value( rfc2231Codec, textcodec,
01620                             true, /* is continuation */
01621                             value, (*it).qpair );
01622       } else {
01623         // not encoded
01624         if ( (*it).qpair.first ) {
01625           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01626         } else {
01627           value += (*it).qstring;
01628         }
01629       }
01630     }
01631   }
01632 
01633   // write last attr/value pair:
01634   if ( !attribute.isNull() ) {
01635     result.insert( attribute, value );
01636   }
01637 
01638   return true;
01639 }
01640 
01641 static const char * stdDayNames[] = {
01642   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01643 };
01644 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01645 
01646 static bool parseDayName( const char* &scursor, const char * const send )
01647 {
01648   // check bounds:
01649   if ( send - scursor < 3 ) {
01650     return false;
01651   }
01652 
01653   for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
01654     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01655       scursor += 3;
01656       // kDebug(5320) << "found" << stdDayNames[i];
01657       return true;
01658     }
01659   }
01660 
01661   return false;
01662 }
01663 
01664 static const char * stdMonthNames[] = {
01665   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01666   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
01667 };
01668 static const int stdMonthNamesLen =
01669                               sizeof stdMonthNames / sizeof *stdMonthNames;
01670 
01671 static bool parseMonthName( const char* &scursor, const char * const send,
01672                             int &result )
01673 {
01674   // check bounds:
01675   if ( send - scursor < 3 ) {
01676     return false;
01677   }
01678 
01679   for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
01680     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01681       scursor += 3;
01682       return true;
01683     }
01684   }
01685 
01686   // not found:
01687   return false;
01688 }
01689 
01690 static const struct {
01691   const char * tzName;
01692   long int secsEastOfGMT;
01693 } timeZones[] = {
01694   // rfc 822 timezones:
01695   { "GMT", 0 },
01696   { "UT", 0 },
01697   { "EDT", -4*3600 },
01698   { "EST", -5*3600 },
01699   { "MST", -5*3600 },
01700   { "CST", -6*3600 },
01701   { "MDT", -6*3600 },
01702   { "MST", -7*3600 },
01703   { "PDT", -7*3600 },
01704   { "PST", -8*3600 },
01705   // common, non-rfc-822 zones:
01706   { "CET", 1*3600 },
01707   { "MET", 1*3600 },
01708   { "UTC", 0 },
01709   { "CEST", 2*3600 },
01710   { "BST", 1*3600 },
01711   // rfc 822 military timezones:
01712   { "Z", 0 },
01713   { "A", -1*3600 },
01714   { "B", -2*3600 },
01715   { "C", -3*3600 },
01716   { "D", -4*3600 },
01717   { "E", -5*3600 },
01718   { "F", -6*3600 },
01719   { "G", -7*3600 },
01720   { "H", -8*3600 },
01721   { "I", -9*3600 },
01722   // J is not used!
01723   { "K", -10*3600 },
01724   { "L", -11*3600 },
01725   { "M", -12*3600 },
01726   { "N", 1*3600 },
01727   { "O", 2*3600 },
01728   { "P", 3*3600 },
01729   { "Q", 4*3600 },
01730   { "R", 5*3600 },
01731   { "S", 6*3600 },
01732   { "T", 7*3600 },
01733   { "U", 8*3600 },
01734   { "V", 9*3600 },
01735   { "W", 10*3600 },
01736   { "X", 11*3600 },
01737   { "Y", 12*3600 },
01738 };
01739 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01740 
01741 static bool parseAlphaNumericTimeZone( const char* &scursor,
01742                                        const char * const send,
01743                                        long int &secsEastOfGMT,
01744                                        bool &timeZoneKnown )
01745 {
01746   QPair<const char*,int> maybeTimeZone( 0, 0 );
01747   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
01748     return false;
01749   }
01750   for ( int i = 0 ; i < timeZonesLen ; ++i ) {
01751     if ( qstrnicmp( timeZones[i].tzName,
01752                     maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01753       scursor += maybeTimeZone.second;
01754       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01755       timeZoneKnown = true;
01756       return true;
01757     }
01758   }
01759 
01760   // don't choke just because we don't happen to know the time zone
01761   KMIME_WARN_UNKNOWN( time zone,
01762                       QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
01763   secsEastOfGMT = 0;
01764   timeZoneKnown = false;
01765   return true;
01766 }
01767 
01768 // parse a number and return the number of digits parsed:
01769 int parseDigits( const char* &scursor, const char * const send, int &result )
01770 {
01771   result = 0;
01772   int digits = 0;
01773   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01774     result *= 10;
01775     result += int( *scursor - '0' );
01776   }
01777   return digits;
01778 }
01779 
01780 static bool parseTimeOfDay( const char* &scursor, const char * const send,
01781                             int &hour, int &min, int &sec, bool isCRLF=false )
01782 {
01783   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01784 
01785   //
01786   // 2DIGIT representing "hour":
01787   //
01788   if ( !parseDigits( scursor, send, hour ) ) {
01789     return false;
01790   }
01791 
01792   eatCFWS( scursor, send, isCRLF );
01793   if ( scursor == send || *scursor != ':' ) {
01794     return false;
01795   }
01796   scursor++; // eat ':'
01797 
01798   eatCFWS( scursor, send, isCRLF );
01799   if ( scursor == send ) {
01800     return false;
01801   }
01802 
01803   //
01804   // 2DIGIT representing "minute":
01805   //
01806   if ( !parseDigits( scursor, send, min ) ) {
01807     return false;
01808   }
01809 
01810   eatCFWS( scursor, send, isCRLF );
01811   if ( scursor == send ) {
01812     return true; // seconds are optional
01813   }
01814 
01815   //
01816   // let's see if we have a 2DIGIT representing "second":
01817   //
01818   if ( *scursor == ':' ) {
01819     // yepp, there are seconds:
01820     scursor++; // eat ':'
01821     eatCFWS( scursor, send, isCRLF );
01822     if ( scursor == send ) {
01823       return false;
01824     }
01825 
01826     if ( !parseDigits( scursor, send, sec ) ) {
01827       return false;
01828     }
01829   } else {
01830     sec = 0;
01831   }
01832 
01833   return true;
01834 }
01835 
01836 bool parseTime( const char* &scursor, const char * send,
01837                 int &hour, int &min, int &sec, long int &secsEastOfGMT,
01838                 bool &timeZoneKnown, bool isCRLF )
01839 {
01840   // time := time-of-day CFWS ( zone / obs-zone )
01841   //
01842   // obs-zone    := "UT" / "GMT" /
01843   //                "EST" / "EDT" / ; -0500 / -0400
01844   //                "CST" / "CDT" / ; -0600 / -0500
01845   //                "MST" / "MDT" / ; -0700 / -0600
01846   //                "PST" / "PDT" / ; -0800 / -0700
01847   //                "A"-"I" / "a"-"i" /
01848   //                "K"-"Z" / "k"-"z"
01849 
01850   eatCFWS( scursor, send, isCRLF );
01851   if ( scursor == send ) {
01852     return false;
01853   }
01854 
01855   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
01856     return false;
01857   }
01858 
01859   eatCFWS( scursor, send, isCRLF );
01860   if ( scursor == send ) {
01861     timeZoneKnown = false;
01862     secsEastOfGMT = 0;
01863     return true; // allow missing timezone
01864   }
01865 
01866   timeZoneKnown = true;
01867   if ( *scursor == '+' || *scursor == '-' ) {
01868     // remember and eat '-'/'+':
01869     const char sign = *scursor++;
01870     // numerical timezone:
01871     int maybeTimeZone;
01872     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
01873       return false;
01874     }
01875     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01876     if ( sign == '-' ) {
01877       secsEastOfGMT *= -1;
01878       if ( secsEastOfGMT == 0 ) {
01879         timeZoneKnown = false; // -0000 means indetermined tz
01880       }
01881     }
01882   } else {
01883     // maybe alphanumeric timezone:
01884     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
01885       return false;
01886     }
01887   }
01888   return true;
01889 }
01890 
01891 bool parseDateTime( const char* &scursor, const char * const send,
01892                     KDateTime &result, bool isCRLF )
01893 {
01894   // Parsing date-time; strict mode:
01895   //
01896   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
01897   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
01898   //                time
01899   //
01900   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
01901   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
01902   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
01903 
01904   result = KDateTime();
01905   QDateTime maybeDateTime;
01906 
01907   eatCFWS( scursor, send, isCRLF );
01908   if ( scursor == send ) {
01909     return false;
01910   }
01911 
01912   //
01913   // let's see if there's a day-of-week:
01914   //
01915   if ( parseDayName( scursor, send ) ) {
01916     eatCFWS( scursor, send, isCRLF );
01917     if ( scursor == send ) {
01918       return false;
01919     }
01920     // day-name should be followed by ',' but we treat it as optional:
01921     if ( *scursor == ',' ) {
01922       scursor++; // eat ','
01923       eatCFWS( scursor, send, isCRLF );
01924     }
01925   }
01926 
01927   //
01928   // 1*2DIGIT representing "day" (of month):
01929   //
01930   int maybeDay;
01931   if ( !parseDigits( scursor, send, maybeDay ) ) {
01932     return false;
01933   }
01934 
01935   eatCFWS( scursor, send, isCRLF );
01936   if ( scursor == send ) {
01937     return false;
01938   }
01939 
01940   //
01941   // month-name:
01942   //
01943   int maybeMonth = 0;
01944   if ( !parseMonthName( scursor, send, maybeMonth ) ) {
01945     return false;
01946   }
01947   if ( scursor == send ) {
01948     return false;
01949   }
01950   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01951   ++maybeMonth; // 0-11 -> 1-12
01952 
01953   eatCFWS( scursor, send, isCRLF );
01954   if ( scursor == send ) {
01955     return false;
01956   }
01957 
01958   //
01959   // 2*DIGIT representing "year":
01960   //
01961   int maybeYear;
01962   if ( !parseDigits( scursor, send, maybeYear ) ) {
01963     return false;
01964   }
01965   // RFC 2822 4.3 processing:
01966   if ( maybeYear < 50 ) {
01967     maybeYear += 2000;
01968   } else if ( maybeYear < 1000 ) {
01969     maybeYear += 1900;
01970   }
01971   // else keep as is
01972   if ( maybeYear < 1900 ) {
01973     return false; // rfc2822, 3.3
01974   }
01975 
01976   eatCFWS( scursor, send, isCRLF );
01977   if ( scursor == send ) {
01978     return false;
01979   }
01980 
01981   maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
01982 
01983   //
01984   // time
01985   //
01986   int maybeHour, maybeMinute, maybeSecond;
01987   long int secsEastOfGMT;
01988   bool timeZoneKnown = true;
01989 
01990   if ( !parseTime( scursor, send,
01991                    maybeHour, maybeMinute, maybeSecond,
01992                    secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
01993     return false;
01994   }
01995 
01996   maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
01997   if ( !maybeDateTime.isValid() )
01998     return false;
01999 
02000   result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
02001   if ( !result.isValid() )
02002     return false;
02003   return true;
02004 }
02005 
02006 } // namespace HeaderParsing
02007 
02008 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • kabc
  • kblog
  • kcal
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.5.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal