kdecore Library API Documentation

kurl.cpp

00001 /*
00002     Copyright (C) 1999 Torben Weis <weis@kde.org>
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017     Boston, MA 02111-1307, USA.
00018 */
00019 
00020 #include "kurl.h"
00021 
00022 #ifndef KDE_QT_ONLY
00023 #include <kdebug.h>
00024 #include <kglobal.h>
00025 #include <kidna.h>
00026 #include <kprotocolinfo.h>
00027 #endif
00028 
00029 #include <stdio.h>
00030 #include <assert.h>
00031 #include <ctype.h>
00032 #include <stdlib.h>
00033 #include <unistd.h>
00034 
00035 #include <qurl.h>
00036 #include <qdir.h>
00037 #include <qstringlist.h>
00038 #include <qregexp.h>
00039 #include <qstylesheet.h>
00040 #include <qmap.h>
00041 #include <qtextcodec.h>
00042 #include <qmutex.h>
00043 
00044 static const QString fileProt = "file";
00045 
00046 static QTextCodec * codecForHint( int encoding_hint /* not 0 ! */ )
00047 {
00048     return QTextCodec::codecForMib( encoding_hint );
00049 }
00050 
00051 static QString encode( const QString& segment, bool encode_slash, int encoding_hint )
00052 {
00053   const char *encode_string;
00054   if (encode_slash)
00055     encode_string = "<>#@\"&%?={}|^~[]\'`\\:+/";
00056   else
00057     encode_string = "<>#@\"&%?={}|^~[]\'`\\:+";
00058 
00059   QCString local;
00060   if (encoding_hint==0)
00061     local = segment.local8Bit();
00062   else
00063   {
00064       QTextCodec * textCodec = codecForHint( encoding_hint );
00065       if (!textCodec)
00066           local = segment.local8Bit();
00067       else
00068           local = textCodec->fromUnicode( segment );
00069   }
00070 
00071   int old_length = local.length();
00072 
00073   if ( !old_length )
00074     return segment.isNull() ? QString::null : QString(""); // differentiate null and empty
00075 
00076   // a worst case approximation
00077   QChar *new_segment = new QChar[ old_length * 3 + 1 ];
00078   int new_length = 0;
00079 
00080   for ( int i = 0; i < old_length; i++ )
00081   {
00082     // 'unsave' and 'reserved' characters
00083     // according to RFC 1738,
00084     // 2.2. URL Character Encoding Issues (pp. 3-4)
00085     // WABA: Added non-ascii
00086     unsigned char character = local[i];
00087     if ( (character <= 32) || (character >= 127) ||
00088          strchr(encode_string, character) )
00089     {
00090       new_segment[ new_length++ ] = '%';
00091 
00092       unsigned int c = character / 16;
00093       c += (c > 9) ? ('A' - 10) : '0';
00094       new_segment[ new_length++ ] = c;
00095 
00096       c = character % 16;
00097       c += (c > 9) ? ('A' - 10) : '0';
00098       new_segment[ new_length++ ] = c;
00099 
00100     }
00101     else
00102       new_segment[ new_length++ ] = local[i];
00103   }
00104 
00105   QString result = QString(new_segment, new_length);
00106   delete [] new_segment;
00107   return result;
00108 }
00109 
00110 static QString encodeHost( const QString& segment, bool encode_slash, int encoding_hint )
00111 {
00112   // Hostnames are encoded differently
00113   // we use the IDNA transformation instead
00114 
00115   // Note: when merging qt-addon, use QResolver::domainToAscii here
00116 #ifndef KDE_QT_ONLY
00117   Q_UNUSED( encode_slash );
00118   Q_UNUSED( encoding_hint );
00119   return KIDNA::toAscii(segment);
00120 #else
00121   return encode(segment, encode_slash, encoding_hint);
00122 #endif
00123 }
00124 
00125 static int hex2int( unsigned int _char )
00126 {
00127   if ( _char >= 'A' && _char <='F')
00128     return _char - 'A' + 10;
00129   if ( _char >= 'a' && _char <='f')
00130     return _char - 'a' + 10;
00131   if ( _char >= '0' && _char <='9')
00132     return _char - '0';
00133   return -1;
00134 }
00135 
00136 // WABA: The result of lazy_encode isn't usable for a URL which
00137 // needs to satisfies RFC requirements. However, the following
00138 // operation will make it usable again:
00139 //      encode(decode(...))
00140 //
00141 // As a result one can see that url.prettyURL() does not result in
00142 // a RFC compliant URL but that the following sequence does:
00143 //      KURL(url.prettyURL()).url()
00144 
00145 
00146 static QString lazy_encode( const QString& segment )
00147 {
00148   int old_length = segment.length();
00149 
00150   if ( !old_length )
00151     return QString::null;
00152 
00153   // a worst case approximation
00154   QChar *new_segment = new QChar[ old_length * 3 + 1 ];
00155   int new_length = 0;
00156 
00157   for ( int i = 0; i < old_length; i++ )
00158   {
00159     unsigned int character = segment[i].unicode(); // Don't use latin1()
00160                                                    // It returns 0 for non-latin1 values
00161     // Small set of really ambiguous chars
00162     if ((character < 32) ||  // Low ASCII
00163         ((character == '%') && // The escape character itself
00164            (i+2 < old_length) && // But only if part of a valid escape sequence!
00165           (hex2int(segment[i+1].unicode())!= -1) &&
00166           (hex2int(segment[i+2].unicode())!= -1)) ||
00167         (character == '?') || // Start of query delimiter
00168         (character == '@') || // Username delimiter
00169         (character == '#') || // Start of reference delimiter
00170         ((character == 32) && (i+1 == old_length))) // A trailing space
00171     {
00172       new_segment[ new_length++ ] = '%';
00173 
00174       unsigned int c = character / 16;
00175       c += (c > 9) ? ('A' - 10) : '0';
00176       new_segment[ new_length++ ] = c;
00177 
00178       c = character % 16;
00179       c += (c > 9) ? ('A' - 10) : '0';
00180       new_segment[ new_length++ ] = c;
00181     }
00182     else
00183     new_segment[ new_length++ ] = segment[i];
00184   }
00185 
00186   QString result = QString(new_segment, new_length);
00187   delete [] new_segment;
00188   return result;
00189 }
00190 
00191 static void decode( const QString& segment, QString &decoded, QString &encoded, int encoding_hint=0, bool updateDecoded = true )
00192 {
00193   decoded = QString::null;
00194   encoded = segment;
00195 
00196   int old_length = segment.length();
00197   if ( !old_length )
00198     return;
00199 
00200   QTextCodec *textCodec = 0;
00201   if (encoding_hint)
00202       textCodec = codecForHint( encoding_hint );
00203 
00204   if (!textCodec)
00205       textCodec = QTextCodec::codecForLocale();
00206 
00207   QCString csegment = textCodec->fromUnicode(segment);
00208   // Check if everything went ok
00209   if (textCodec->toUnicode(csegment) != segment)
00210   {
00211       // Uh oh
00212       textCodec = codecForHint( 106 ); // Fall back to utf-8
00213       csegment = textCodec->fromUnicode(segment);
00214   } 
00215   old_length = csegment.length();
00216 
00217   int new_length = 0;
00218   int new_length2 = 0;
00219 
00220   // make a copy of the old one
00221   char *new_segment = new char[ old_length + 1 ];
00222   QChar *new_usegment = new QChar[ old_length * 3 + 1 ];
00223 
00224   int i = 0;
00225   while( i < old_length )
00226   {
00227     bool bReencode = false;
00228     unsigned char character = csegment[ i++ ];
00229     if ((character <= ' ') || (character > 127))
00230        bReencode = true;
00231 
00232     new_usegment [ new_length2++ ] = character;
00233     if (character == '%' )
00234     {
00235       int a = i+1 < old_length ? hex2int( csegment[i] ) : -1;
00236       int b = i+1 < old_length ? hex2int( csegment[i+1] ) : -1;
00237       if ((a == -1) || (b == -1)) // Only replace if sequence is valid
00238       {
00239          // Contains stray %, make sure to re-encode!
00240          bReencode = true;
00241       }
00242       else
00243       {
00244          // Valid %xx sequence
00245          character = a * 16 + b; // Replace with value of %dd
00246          if (!character && updateDecoded)
00247             break; // Stop at %00
00248 
00249          new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];
00250          new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];
00251       }
00252     }
00253     if (bReencode)
00254     {
00255       new_length2--;
00256       new_usegment [ new_length2++ ] = '%';
00257 
00258       unsigned int c = character / 16;
00259       c += (c > 9) ? ('A' - 10) : '0';
00260       new_usegment[ new_length2++ ] = c;
00261 
00262       c = character % 16;
00263       c += (c > 9) ? ('A' - 10) : '0';
00264       new_usegment[ new_length2++ ] = c;
00265     }
00266 
00267     new_segment [ new_length++ ] = character;
00268   }
00269   new_segment [ new_length ] = 0;
00270 
00271   encoded = QString( new_usegment, new_length2);
00272 
00273   // Encoding specified
00274   if (updateDecoded)
00275   {
00276      QByteArray array;
00277      array.setRawData(new_segment, new_length);
00278      decoded = textCodec->toUnicode( array, new_length );
00279      array.resetRawData(new_segment, new_length);
00280      QCString validate = textCodec->fromUnicode(decoded);
00281 
00282      if (strcmp(validate.data(), new_segment) != 0)
00283      {
00284         decoded = QString::fromLocal8Bit(new_segment, new_length);
00285      }
00286   }
00287 
00288   delete [] new_segment;
00289   delete [] new_usegment;
00290 }
00291 
00292 static QString decode(const QString &segment, int encoding_hint = 0)
00293 {
00294   QString result;
00295   QString tmp;
00296   decode(segment, result, tmp, encoding_hint);
00297   return result;
00298 }
00299 
00300 static QString cleanpath(const QString &_path, bool cleanDirSeparator, bool decodeDots)
00301 {
00302   if (_path.isEmpty()) return QString::null;
00303   
00304   if (_path[0] != '/')
00305      return _path; // Don't mangle mailto-style URLs
00306   
00307   QString path = _path;
00308 
00309   int len = path.length();
00310 
00311   if (decodeDots)
00312   {
00313 #ifndef KDE_QT_ONLY
00314      static const QString &encodedDot = KGlobal::staticQString("%2e");
00315 #else
00316      QString encodedDot("%2e");
00317 #endif
00318      if (path.find(encodedDot, 0, false) != -1)
00319      {
00320 #ifndef KDE_QT_ONLY
00321         static const QString &encodedDOT = KGlobal::staticQString("%2E"); // Uppercase!
00322 #else
00323         QString encodedDOT("%2E");
00324 #endif
00325         path.replace(encodedDot, ".");
00326         path.replace(encodedDOT, ".");
00327         len = path.length();
00328      }
00329   }
00330 
00331   bool slash = (len && path[len-1] == '/') ||
00332                (len > 1 && path[len-2] == '/' && path[len-1] == '.');
00333 
00334   // The following code cleans up directory path much like
00335   // QDir::cleanDirPath() except it can be made to ignore multiple
00336   // directory separators by setting the flag to false.  That fixes
00337   // bug# 15044, mail.altavista.com and other similar brain-dead server
00338   // implementations that do not follow what has been specified in
00339   // RFC 2396!! (dA)
00340   QString result;
00341   int cdUp, orig_pos, pos;
00342 
00343   cdUp = 0;
00344   pos = orig_pos = len;
00345   while ( pos && (pos = path.findRev('/',--pos)) != -1 )
00346   {
00347     len = orig_pos - pos - 1;
00348     if ( len == 2 && path[pos+1] == '.' && path[pos+2] == '.' )
00349       cdUp++;
00350     else
00351     {
00352       // Ignore any occurrences of '.'
00353       // This includes entries that simply do not make sense like /..../
00354       if ( (len || !cleanDirSeparator) &&
00355            (len != 1 || path[pos+1] != '.' ) )
00356       {
00357           if ( !cdUp )
00358               result.prepend(path.mid(pos, len+1));
00359           else
00360               cdUp--;
00361       }
00362     }
00363     orig_pos = pos;
00364   }
00365 
00366   if ( result.isEmpty() )
00367     result = "/";
00368   else if ( slash && result[result.length()-1] != '/' )
00369        result.append('/');
00370 
00371   return result;
00372 }
00373 
00374 bool KURL::isRelativeURL(const QString &_url)
00375 {
00376   int len = _url.length();
00377   if (!len) return true; // Very short relative URL.
00378   const QChar *str = _url.unicode();
00379 
00380   // Absolute URL must start with alpha-character
00381   if (!isalpha(str[0].latin1()))
00382      return true; // Relative URL
00383 
00384   for(int i = 1; i < len; i++)
00385   {
00386      char c = str[i].latin1(); // Note: non-latin1 chars return 0!
00387      if (c == ':')
00388         return false; // Absolute URL
00389 
00390      // Protocol part may only contain alpha, digit, + or -
00391      if (!isalpha(c) && !isdigit(c) && (c != '+') && (c != '-'))
00392         return true; // Relative URL
00393   }
00394   // URL did not contain ':'
00395   return true; // Relative URL
00396 }
00397 
00398 KURL::List::List(const KURL &url)
00399 {
00400     append( url );
00401 }
00402 
00403 KURL::List::List(const QStringList &list)
00404 {
00405   for (QStringList::ConstIterator it = list.begin();
00406        it != list.end();
00407        it++)
00408     {
00409       append( KURL(*it) );
00410     }
00411 }
00412 
00413 QStringList KURL::List::toStringList() const
00414 {
00415   QStringList lst;
00416    for( KURL::List::ConstIterator it = begin();
00417         it != end();
00418         it++)
00419    {
00420       lst.append( (*it).url() );
00421    }
00422    return lst;
00423 }
00424 
00425 
00426 KURL::KURL()
00427 {
00428   reset();
00429 }
00430 
00431 KURL::~KURL()
00432 {
00433 }
00434 
00435 
00436 KURL::KURL( const QString &url, int encoding_hint )
00437 {
00438   reset();
00439   parse( url, encoding_hint );
00440 }
00441 
00442 KURL::KURL( const char * url, int encoding_hint )
00443 {
00444   reset();
00445   parse( QString::fromLatin1(url), encoding_hint );
00446 }
00447 
00448 KURL::KURL( const QCString& url, int encoding_hint )
00449 {
00450   reset();
00451   parse( QString::fromLatin1(url), encoding_hint );
00452 }
00453 
00454 KURL::KURL( const KURL& _u )
00455 {
00456   *this = _u;
00457 }
00458 
00459 QDataStream & operator<< (QDataStream & s, const KURL & a)
00460 {
00461   QString QueryForWire=a.m_strQuery_encoded;
00462   if (!a.m_strQuery_encoded.isNull())
00463     QueryForWire.prepend("?");
00464 
00465     s << a.m_strProtocol << a.m_strUser << a.m_strPass << a.m_strHost
00466       << a.m_strPath << a.m_strPath_encoded << QueryForWire << a.m_strRef_encoded
00467       << Q_INT8(a.m_bIsMalformed ? 1 : 0) << a.m_iPort;
00468     return s;
00469 }
00470 
00471 QDataStream & operator>> (QDataStream & s, KURL & a)
00472 {
00473     Q_INT8 malf;
00474     QString QueryFromWire;
00475     s >> a.m_strProtocol >> a.m_strUser >> a.m_strPass >> a.m_strHost
00476       >> a.m_strPath >> a.m_strPath_encoded >> QueryFromWire >> a.m_strRef_encoded
00477       >> malf >> a.m_iPort;
00478     a.m_bIsMalformed = (malf != 0);
00479 
00480     if ( QueryFromWire.isEmpty() )
00481       a.m_strQuery_encoded = QString::null;
00482     else
00483       a.m_strQuery_encoded = QueryFromWire.mid(1);
00484 
00485     a.m_iUriMode = KURL::uriModeForProtocol( a.m_strProtocol );
00486 
00487     return s;
00488 }
00489 
00490 #ifndef QT_NO_NETWORKPROTOCOL
00491 KURL::KURL( const QUrl &u )
00492 {
00493   *this = u;
00494 }
00495 #endif
00496 
00497 KURL::KURL( const KURL& _u, const QString& _rel_url, int encoding_hint )
00498 {
00499   if (_u.hasSubURL()) // Operate on the last suburl, not the first
00500   {
00501     KURL::List lst = split( _u );
00502     KURL u(lst.last(), _rel_url, encoding_hint);
00503     lst.remove( lst.last() );
00504     lst.append( u );
00505     *this = join( lst );
00506     return;
00507   }
00508   // WORKAROUND THE RFC 1606 LOOPHOLE THAT ALLOWS
00509   // http:/index.html AS A VALID SYNTAX FOR RELATIVE
00510   // URLS. ( RFC 2396 section 5.2 item # 3 )
00511   QString rUrl = _rel_url;
00512   int len = _u.m_strProtocol.length();
00513   if ( !_u.m_strHost.isEmpty() && !rUrl.isEmpty() &&
00514        rUrl.find( _u.m_strProtocol, 0, false ) == 0 &&
00515        rUrl[len] == ':' && (rUrl[len+1] != '/' ||
00516        (rUrl[len+1] == '/' && rUrl[len+2] != '/')) )
00517   {
00518     rUrl.remove( 0, rUrl.find( ':' ) + 1 );
00519   }
00520 
00521   if ( rUrl.isEmpty() )
00522   {
00523     *this = _u;
00524   }
00525   else if ( rUrl[0] == '#' )
00526   {
00527     *this = _u;
00528     QString ref = decode(rUrl.mid(1), encoding_hint);
00529     if ( ref.isNull() )
00530         ref = ""; // we know there was an (empty) html ref, we saw the '#'
00531     setHTMLRef( ref );
00532   }
00533   else if ( isRelativeURL( rUrl) )
00534   {
00535     *this = _u;
00536     m_strQuery_encoded = QString::null;
00537     m_strRef_encoded = QString::null;
00538     if ( rUrl[0] == '/')
00539     {
00540         if ((rUrl.length() > 1) && (rUrl[1] == '/'))
00541         {
00542            m_strHost = QString::null;
00543         }
00544         m_strPath = QString::null;
00545         m_strPath_encoded = QString::null;
00546     }
00547     else if ( rUrl[0] != '?' )
00548     {
00549        int pos = m_strPath.findRev( '/' );
00550        if (pos >= 0)
00551           m_strPath.truncate(pos);
00552        m_strPath += '/';
00553        if (!m_strPath_encoded.isEmpty())
00554        {
00555           pos = m_strPath_encoded.findRev( '/' );
00556           if (pos >= 0)
00557              m_strPath_encoded.truncate(pos);
00558           m_strPath_encoded += '/';
00559        }
00560     }
00561     else
00562     {
00563        if ( m_strPath.isEmpty() )
00564           m_strPath = '/';
00565     }
00566     KURL tmp( url() + rUrl, encoding_hint);
00567     *this = tmp;
00568     cleanPath(false);
00569   }
00570   else
00571   {
00572     KURL tmp( rUrl, encoding_hint);
00573     *this = tmp;
00574     // Preserve userinfo if applicable.
00575     if (!_u.m_strUser.isEmpty() && m_strUser.isEmpty() && (_u.m_strHost == m_strHost) && (_u.m_strProtocol == m_strProtocol))
00576     {
00577        m_strUser = _u.m_strUser;
00578        m_strPass = _u.m_strPass;
00579     }
00580     cleanPath(false);
00581   }
00582 }
00583 
00584 void KURL::reset()
00585 {
00586   m_strProtocol = QString::null;
00587   m_strUser = QString::null;
00588   m_strPass = QString::null;
00589   m_strHost = QString::null;
00590   m_strPath = QString::null;
00591   m_strPath_encoded = QString::null;
00592   m_strQuery_encoded = QString::null;
00593   m_strRef_encoded = QString::null;
00594   m_bIsMalformed = true;
00595   m_iPort = 0;
00596   m_iUriMode = Auto;
00597 }
00598 
00599 bool KURL::isEmpty() const
00600 {
00601   return (m_strPath.isEmpty() && m_strProtocol.isEmpty());
00602 }
00603 
00604 void KURL::parse( const QString& _url, int encoding_hint )
00605 {
00606     if ( _url.isEmpty() || m_iUriMode == Invalid )
00607     {
00608     m_strProtocol = _url;
00609     m_iUriMode = Invalid;
00610     return;
00611     }
00612 
00613     const QChar* buf = _url.unicode();
00614     const QChar* orig = buf;
00615     uint len = _url.length();
00616     uint pos = 0;
00617 
00618     // Node 1: Accept alpha or slash
00619     QChar x = buf[pos++];
00620     if ( x == '/' )
00621     {
00622     // A slash means we immediately proceed to parse it as a file URL.
00623     m_iUriMode = URL;
00624     m_strProtocol = fileProt;
00625     parseURL( _url, encoding_hint );
00626     return;
00627     }
00628     if ( !isalpha( (int)x ) )
00629     goto NodeErr;
00630 
00631     // Node 2: Accept any amount of (alpha|digit|'+'|'-')
00632     // '.' is not currently accepted, because current KURL may be confused.
00633     // Proceed with :// :/ or :
00634     while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00635              buf[pos] == '+' || buf[pos] == '-')) pos++;
00636 
00637     if (pos < len && buf[pos] == ':' )
00638     {
00639     m_strProtocol = QString( orig, pos ).lower();
00640     if ( m_iUriMode == Auto )
00641         m_iUriMode = uriModeForProtocol( m_strProtocol );
00642     // Proceed to correct parse function.
00643     switch ( m_iUriMode )
00644   {
00645     case RawURI:
00646         parseRawURI( _url );
00647         return;
00648     case Mailto:
00649         parseMailto( _url );
00650         return;
00651     case URL:
00652         parseURL( _url, encoding_hint );
00653         return;
00654     default:
00655         // Unknown URI mode results in an invalid URI.
00656         break;
00657     }
00658     }
00659 
00660 NodeErr:
00661     reset();
00662     m_strProtocol = _url;
00663     m_iUriMode = Invalid;
00664 }
00665 
00666 void KURL::parseRawURI( const QString& _url, int encoding_hint )
00667 {
00668     uint len = _url.length();
00669     const QChar* buf = _url.unicode();
00670 
00671     uint pos = 0;
00672 
00673     // Accept any amount of (alpha|digit|'+'|'-')
00674     // '.' is not currently accepted, because current KURL may be confused.
00675     // Proceed with :
00676     while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00677              buf[pos] == '+' || buf[pos] == '-')) pos++;
00678 
00679     // Note that m_strProtocol is already set here, so we just skip over the protocol.
00680     if (pos < len && buf[pos] == ':' )
00681     pos++;
00682     else {
00683     reset();
00684     m_strProtocol = _url;
00685     m_iUriMode = Invalid;
00686     return;
00687     }
00688 
00689     if ( pos == len )
00690     m_strPath = QString::null;
00691     else
00692     m_strPath = decode( QString( buf + pos, len - pos ), encoding_hint );
00693 
00694     m_bIsMalformed = false;
00695 
00696     return;
00697 }
00698 
00699 void KURL::parseMailto( const QString& _url, int encoding_hint )
00700 {
00701     parseURL( _url, encoding_hint);
00702     if ( m_bIsMalformed )
00703     return;
00704     QRegExp mailre("(.+@)(.+)");
00705     if ( mailre.exactMatch( m_strPath ) )
00706     {
00707 #ifndef KDE_QT_ONLY
00708     QString host = KIDNA::toUnicode( mailre.cap( 2 ) );
00709     if (host.isEmpty())
00710         host = mailre.cap( 2 ).lower();
00711 #else
00712     QString host = mailre.cap( 2 ).lower();
00713 #endif
00714     m_strPath = mailre.cap( 1 ) + host;
00715   }
00716 }
00717 
00718 void KURL::parseURL( const QString& _url, int encoding_hint )
00719 {
00720   QString port;
00721   bool badHostName = false;
00722   int start = 0;
00723   uint len = _url.length();
00724   const QChar* buf = _url.unicode();
00725 
00726   QChar delim;
00727   QString tmp;
00728 
00729   uint pos = 0;
00730 
00731   // Node 1: Accept alpha or slash
00732   QChar x = buf[pos++];
00733   if ( x == '/' )
00734     goto Node9;
00735   if ( !isalpha( (int)x ) )
00736     goto NodeErr;
00737 
00738   // Node 2: Accept any amount of (alpha|digit|'+'|'-')
00739   // '.' is not currently accepted, because current KURL may be confused.
00740   // Proceed with :// :/ or :
00741   while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00742           buf[pos] == '+' || buf[pos] == '-')) pos++;
00743 
00744   // Note that m_strProtocol is already set here, so we just skip over the protocol.
00745   if ( pos+2 < len && buf[pos] == ':' && buf[pos+1] == '/' && buf[pos+2] == '/' )
00746     {
00747       pos += 3;
00748     }
00749   else if (pos+1 < len && buf[pos] == ':' ) // Need to always compare length()-1 otherwise KURL passes "http:" as legal!!
00750     {
00751       pos++;
00752       start = pos;
00753       goto Node9;
00754     }
00755   else
00756     goto NodeErr;
00757 
00758   //Node 3: We need at least one character here
00759   if ( pos == len )
00760       goto NodeErr;
00761   start = pos;
00762 
00763   // Node 4: Accept any amount of characters.
00764   if (buf[pos] == '[')     // An IPv6 host follows.
00765       goto Node8;
00766   // Terminate on / or @ or ? or # or " or ; or <
00767   x = buf[pos];
00768   while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') )
00769   {
00770      if ((x == '\"') || (x == ';') || (x == '<'))
00771         badHostName = true;
00772      if (++pos == len)
00773         break;
00774      x = buf[pos];
00775   }
00776   if ( pos == len )
00777     {
00778       if (badHostName)
00779          goto NodeErr;
00780 
00781       setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00782       goto NodeOk;
00783     }
00784   if ( x == '@' )
00785     {
00786       m_strUser = decode(QString( buf + start, pos - start ), encoding_hint);
00787       pos++;
00788       goto Node7;
00789     }
00790   else if ( (x == '/') || (x == '?') || (x == '#'))
00791     {
00792       if (badHostName)
00793          goto NodeErr;
00794 
00795       setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00796       start = pos;
00797       goto Node9;
00798     }
00799   else if ( x != ':' )
00800     goto NodeErr;
00801   m_strUser = decode(QString( buf + start, pos - start ), encoding_hint);
00802   pos++;
00803 
00804   // Node 5: We need at least one character
00805   if ( pos == len )
00806     goto NodeErr;
00807   start = pos++;
00808 
00809   // Node 6: Read everything until @, /, ? or #
00810   while( (pos < len) &&
00811         (buf[pos] != '@') &&
00812         (buf[pos] != '/') &&
00813         (buf[pos] != '?') &&
00814         (buf[pos] != '#')) pos++;
00815   // If we now have a '@' the ':' seperates user and password.
00816   // Otherwise it seperates host and port.
00817   if ( (pos == len) || (buf[pos] != '@') )
00818     {
00819       // Ok the : was used to separate host and port
00820       if (badHostName)
00821          goto NodeErr;
00822       setHost(m_strUser);
00823       m_strUser = QString::null;
00824       QString tmp( buf + start, pos - start );
00825       char *endptr;
00826       m_iPort = (unsigned short int)strtol(tmp.ascii(), &endptr, 10);
00827       if ((pos == len) && (strlen(endptr) == 0))
00828         goto NodeOk;
00829       // there is more after the digits
00830       pos -= strlen(endptr);
00831       if ((buf[pos] != '@') &&
00832           (buf[pos] != '/') &&
00833           (buf[pos] != '?') &&
00834           (buf[pos] != '#'))
00835         goto NodeErr;
00836 
00837       start = pos++;
00838       goto Node9;
00839     }
00840   m_strPass = decode(QString( buf + start, pos - start), encoding_hint);
00841   pos++;
00842 
00843   // Node 7: We need at least one character
00844  Node7:
00845   if ( pos == len )
00846     goto NodeErr;
00847 
00848  Node8:
00849   if (buf[pos] == '[')
00850   {
00851     // IPv6 address
00852     start = ++pos; // Skip '['
00853 
00854     if (pos == len)
00855     {
00856        badHostName = true;
00857        goto NodeErr;
00858     }
00859     // Node 8b: Read everything until ] or terminate
00860     badHostName = false;
00861     x = buf[pos];
00862     while( (x != ']') )
00863     {
00864        if ((x == '\"') || (x == ';') || (x == '<'))
00865           badHostName = true;
00866        if (++pos == len)
00867        {
00868           badHostName = true;
00869           break;
00870        }
00871        x = buf[pos];
00872     }
00873     if (badHostName)
00874        goto NodeErr;
00875     setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00876     if (pos < len) pos++; // Skip ']'
00877     if (pos == len)
00878        goto NodeOk;
00879   }
00880   else
00881   {
00882     // Non IPv6 address
00883     start = pos;
00884 
00885     // Node 8b: Read everything until / : or terminate
00886     badHostName = false;
00887     x = buf[pos];
00888     while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') )
00889     {
00890        if ((x == '\"') || (x == ';') || (x == '<'))
00891           badHostName = true;
00892        if (++pos == len)
00893           break;
00894        x = buf[pos];
00895     }
00896     if (badHostName)
00897        goto NodeErr;
00898     if ( pos == len )
00899     {
00900        setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00901        goto NodeOk;
00902     }
00903     setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00904   }
00905   x = buf[pos];
00906   if ( x == '/' )
00907     {
00908       start = pos++;
00909       goto Node9;
00910     }
00911   else if ( x != ':' )
00912     goto NodeErr;
00913   pos++;
00914 
00915   // Node 8a: Accept at least one digit
00916   if ( pos == len )
00917     goto NodeErr;
00918   start = pos;
00919   if ( !isdigit( buf[pos++] ) )
00920     goto NodeErr;
00921 
00922   // Node 8b: Accept any amount of digits
00923   while( pos < len && isdigit( buf[pos] ) ) pos++;
00924   port = QString( buf + start, pos - start );
00925   m_iPort = port.toUShort();
00926   if ( pos == len )
00927     goto NodeOk;
00928   start = pos++;
00929 
00930  Node9: // parse path until query or reference reached
00931 
00932   while( pos < len && buf[pos] != '#' && buf[pos]!='?' ) pos++;
00933 
00934   tmp = QString( buf + start, pos - start );
00935   //kdDebug(126)<<" setting encoded path&query to:"<<tmp<<endl;
00936   setEncodedPath( tmp, encoding_hint );
00937 
00938   if ( pos == len )
00939       goto NodeOk;
00940 
00941  //Node10: // parse query or reference depending on what comes first
00942   delim = (buf[pos++]=='#'?'?':'#');
00943 
00944   start = pos;
00945 
00946   while(pos < len && buf[pos]!=delim ) pos++;
00947 
00948   tmp = QString(buf + start, pos - start);
00949   if (delim=='#')
00950       _setQuery(tmp, encoding_hint);
00951   else
00952       m_strRef_encoded = tmp;
00953 
00954   if (pos == len)
00955       goto NodeOk;
00956 
00957  //Node11: // feed the rest into the remaining variable
00958   tmp = QString( buf + pos + 1, len - pos - 1);
00959   if (delim == '#')
00960       m_strRef_encoded = tmp;
00961   else
00962       _setQuery(tmp, encoding_hint);
00963 
00964  NodeOk:
00965   //kdDebug(126)<<"parsing finished. m_strProtocol="<<m_strProtocol<<" m_strHost="<<m_strHost<<" m_strPath="<<m_strPath<<endl;
00966   m_bIsMalformed = false; // Valid URL
00967 
00968   //kdDebug()<<"Prot="<<m_strProtocol<<"\nUser="<<m_strUser<<"\nPass="<<m_strPass<<"\nHost="<<m_strHost<<"\nPath="<<m_strPath<<"\nQuery="<<m_strQuery_encoded<<"\nRef="<<m_strRef_encoded<<"\nPort="<<m_iPort<<endl;
00969   if (m_strProtocol.isEmpty())
00970   {
00971     m_iUriMode = URL;
00972     m_strProtocol = fileProt;
00973   }
00974   return;
00975 
00976  NodeErr:
00977 //  kdDebug(126) << "KURL couldn't parse URL \"" << _url << "\"" << endl;
00978   reset();
00979   m_strProtocol = _url;
00980   m_iUriMode = Invalid;
00981 }
00982 
00983 KURL& KURL::operator=( const QString& _url )
00984 {
00985   reset();
00986   parse( _url );
00987 
00988   return *this;
00989 }
00990 
00991 KURL& KURL::operator=( const char * _url )
00992 {
00993   reset();
00994   parse( QString::fromLatin1(_url) );
00995 
00996   return *this;
00997 }
00998 
00999 #ifndef QT_NO_NETWORKPROTOCOL
01000 KURL& KURL::operator=( const QUrl & u )
01001 {
01002   m_strProtocol = u.protocol();
01003   m_iUriMode = Auto;
01004   m_strUser = u.user();
01005   m_strPass = u.password();
01006   m_strHost = u.host();
01007   m_strPath = u.path( false );
01008   m_strPath_encoded = QString::null;
01009   m_strQuery_encoded = u.query();
01010   m_strRef_encoded = u.ref();
01011   m_bIsMalformed = !u.isValid();
01012   m_iPort = u.port();
01013 
01014   return *this;
01015 }
01016 #endif
01017 
01018 KURL& KURL::operator=( const KURL& _u )
01019 {
01020   m_strProtocol = _u.m_strProtocol;
01021   m_strUser = _u.m_strUser;
01022   m_strPass = _u.m_strPass;
01023   m_strHost = _u.m_strHost;
01024   m_strPath = _u.m_strPath;
01025   m_strPath_encoded = _u.m_strPath_encoded;
01026   m_strQuery_encoded = _u.m_strQuery_encoded;
01027   m_strRef_encoded = _u.m_strRef_encoded;
01028   m_bIsMalformed = _u.m_bIsMalformed;
01029   m_iPort = _u.m_iPort;
01030   m_iUriMode = _u.m_iUriMode;
01031 
01032   return *this;
01033 }
01034 
01035 bool KURL::operator<( const KURL& _u) const
01036 {
01037   int i;
01038   if (!_u.isValid())
01039   {
01040      if (!isValid())
01041      {
01042         i = m_strProtocol.compare(_u.m_strProtocol);
01043         return (i < 0);
01044      }
01045      return false;
01046   }
01047   if (!isValid())
01048      return true;
01049   
01050   i = m_strProtocol.compare(_u.m_strProtocol);
01051   if (i) return (i < 0);
01052 
01053   i = m_strHost.compare(_u.m_strHost);
01054   if (i) return (i < 0);
01055 
01056   if (m_iPort != _u.m_iPort) return (m_iPort < _u.m_iPort);
01057 
01058   i = m_strPath.compare(_u.m_strPath);
01059   if (i) return (i < 0);
01060 
01061   i = m_strQuery_encoded.compare(_u.m_strQuery_encoded);
01062   if (i) return (i < 0);
01063 
01064   i = m_strRef_encoded.compare(_u.m_strRef_encoded);
01065   if (i) return (i < 0);
01066 
01067   i = m_strUser.compare(_u.m_strUser);
01068   if (i) return (i < 0);
01069   
01070   i = m_strPass.compare(_u.m_strPass);
01071   if (i) return (i < 0);
01072 
01073   return false;    
01074 }
01075 
01076 bool KURL::operator==( const KURL& _u ) const
01077 {
01078   if ( !isValid() || !_u.isValid() )
01079     return false;
01080 
01081   if ( m_strProtocol == _u.m_strProtocol &&
01082        m_strUser == _u.m_strUser &&
01083        m_strPass == _u.m_strPass &&
01084        m_strHost == _u.m_strHost &&
01085        m_strPath == _u.m_strPath &&
01086        // The encoded path may be null, but the URLs are still equal (David)
01087        ( m_strPath_encoded.isNull() || _u.m_strPath_encoded.isNull() ||
01088          m_strPath_encoded == _u.m_strPath_encoded ) &&
01089        m_strQuery_encoded == _u.m_strQuery_encoded &&
01090        m_strRef_encoded == _u.m_strRef_encoded &&
01091        m_iPort == _u.m_iPort )
01092   {
01093     return true;
01094   }
01095 
01096   return false;
01097 }
01098 
01099 bool KURL::operator==( const QString& _u ) const
01100 {
01101   KURL u( _u );
01102   return ( *this == u );
01103 }
01104 
01105 bool KURL::cmp( const KURL &u, bool ignore_trailing ) const
01106 {
01107   return equals( u, ignore_trailing );
01108 }
01109 
01110 bool KURL::equals( const KURL &_u, bool ignore_trailing ) const
01111 {
01112   if ( !isValid() || !_u.isValid() )
01113     return false;
01114 
01115   if ( ignore_trailing )
01116   {
01117     QString path1 = path(1);
01118     QString path2 = _u.path(1);
01119     if ( path1 != path2 )
01120       return false;
01121 
01122     if ( m_strProtocol == _u.m_strProtocol &&
01123          m_strUser == _u.m_strUser &&
01124          m_strPass == _u.m_strPass &&
01125          m_strHost == _u.m_strHost &&
01126          m_strQuery_encoded == _u.m_strQuery_encoded &&
01127          m_strRef_encoded == _u.m_strRef_encoded &&
01128          m_iPort == _u.m_iPort )
01129       return true;
01130 
01131     return false;
01132   }
01133 
01134   return ( *this == _u );
01135 }
01136 
01137 bool KURL::isParentOf( const KURL& _u ) const
01138 {
01139   if ( !isValid() || !_u.isValid() )
01140     return false;
01141 
01142   if ( m_strProtocol == _u.m_strProtocol &&
01143        m_strUser == _u.m_strUser &&
01144        m_strPass == _u.m_strPass &&
01145        m_strHost == _u.m_strHost &&
01146        m_strQuery_encoded == _u.m_strQuery_encoded &&
01147        m_strRef_encoded == _u.m_strRef_encoded &&
01148        m_iPort == _u.m_iPort )
01149   {
01150     if ( path().isEmpty() || _u.path().isEmpty() )
01151         return false; // can't work with implicit paths
01152 
01153     QString p1( cleanpath( path(), true, false ) );
01154     if ( p1[p1.length()-1] != '/' )
01155         p1 += '/';
01156     QString p2( cleanpath( _u.path(), true, false ) );
01157     if ( p2[p2.length()-1] != '/' )
01158         p2 += '/';
01159 
01160     //kdDebug(126) << "p1=" << p1 << endl;
01161     //kdDebug(126) << "p2=" << p2 << endl;
01162     //kdDebug(126) << "p1.length()=" << p1.length() << endl;
01163     //kdDebug(126) << "p2.left(!$)=" << p2.left( p1.length() ) << endl;
01164     return p2.startsWith( p1 );
01165   }
01166   return false;
01167 }
01168 
01169 void KURL::setFileName( const QString& _txt )
01170 {
01171   m_strRef_encoded = QString::null;
01172   int i = 0;
01173   while( _txt[i] == '/' ) ++i;
01174   QString tmp;
01175   if ( i )
01176     tmp = _txt.mid( i );
01177   else
01178     tmp = _txt;
01179 
01180   QString path = m_strPath_encoded.isEmpty() ? m_strPath : m_strPath_encoded;
01181   if ( path.isEmpty() )
01182     path = "/";
01183   else
01184   {
01185     int lastSlash = path.findRev( '/' );
01186     if ( lastSlash == -1)
01187     {
01188       // The first character is not a '/' ???
01189       // This looks strange ...
01190       path = "/";
01191     }
01192     else if ( path.right(1) != "/" )
01193       path.truncate( lastSlash+1 ); // keep the "/"
01194   }
01195   if (m_strPath_encoded.isEmpty())
01196   {
01197      path += tmp;
01198      setPath( path );
01199   }
01200   else
01201   {
01202      path += encode_string(tmp);
01203      setEncodedPath( path );
01204   }
01205   cleanPath();
01206 }
01207 
01208 void KURL::cleanPath( bool cleanDirSeparator ) // taken from the old KURL
01209 {
01210   if (m_iUriMode != URL) return;
01211   m_strPath = cleanpath(m_strPath, cleanDirSeparator, false);
01212   // WABA: Is this safe when "/../" is encoded with %?
01213   m_strPath_encoded = cleanpath(m_strPath_encoded, cleanDirSeparator, true);
01214 }
01215 
01216 static QString trailingSlash( int _trailing, const QString &path )
01217 {
01218   QString result = path;
01219 
01220   if ( _trailing == 0 )
01221     return result;
01222   else if ( _trailing == 1 )
01223   {
01224     int len = result.length();
01225     if ( (len == 0) || (result[ len - 1 ] != '/') )
01226       result += "/";
01227     return result;
01228   }
01229   else if ( _trailing == -1 )
01230   {
01231     if ( result == "/" )
01232       return result;
01233     int len = result.length();
01234     if ( (len != 0) && (result[ len - 1 ] == '/') )
01235       result.truncate( len - 1 );
01236     return result;
01237   }
01238   else {
01239     assert( 0 );
01240     return QString::null;
01241   }
01242 }
01243 
01244 void KURL::adjustPath( int _trailing )
01245 {
01246   if (!m_strPath_encoded.isEmpty())
01247   {
01248      m_strPath_encoded = trailingSlash( _trailing, m_strPath_encoded );
01249   }
01250   m_strPath = trailingSlash( _trailing, m_strPath );
01251 }
01252 
01253 
01254 QString KURL::encodedPathAndQuery( int _trailing, bool _no_empty_path, int encoding_hint ) const
01255 {
01256   QString tmp;
01257   if (!m_strPath_encoded.isEmpty() && encoding_hint == 0)
01258   {
01259      tmp = trailingSlash( _trailing, m_strPath_encoded );
01260   }
01261   else
01262   {
01263      tmp = path( _trailing );
01264      if ( _no_empty_path && tmp.isEmpty() )
01265         tmp = "/";
01266      tmp = encode( tmp, false, encoding_hint );
01267   }
01268 
01269   // TODO apply encoding_hint to the query
01270   if (!m_strQuery_encoded.isNull())
01271       tmp += '?' + m_strQuery_encoded;
01272   return tmp;
01273 }
01274 
01275 void KURL::setEncodedPath( const QString& _txt, int encoding_hint )
01276 {
01277   m_strPath_encoded = _txt;
01278 
01279   decode( m_strPath_encoded, m_strPath, m_strPath_encoded, encoding_hint );
01280   // Throw away encoding for local files, makes file-operations faster.
01281   if (m_strProtocol == fileProt)
01282      m_strPath_encoded = QString::null;
01283 
01284   if ( m_iUriMode == Auto )
01285     m_iUriMode = URL;
01286 }
01287 
01288 
01289 void KURL::setEncodedPathAndQuery( const QString& _txt, int encoding_hint )
01290 {
01291   int pos = _txt.find( '?' );
01292   if ( pos == -1 )
01293   {
01294     setEncodedPath(_txt, encoding_hint);
01295     m_strQuery_encoded = QString::null;
01296   }
01297   else
01298   {
01299     setEncodedPath(_txt.left( pos ), encoding_hint);
01300     _setQuery(_txt.right(_txt.length() - pos - 1), encoding_hint);
01301   }
01302 }
01303 
01304 QString KURL::path( int _trailing ) const
01305 {
01306   return trailingSlash( _trailing, path() );
01307 }
01308 
01309 bool KURL::isLocalFile() const
01310 {
01311   if ( (m_strProtocol != fileProt ) || hasSubURL() )
01312      return false;
01313      
01314   if (m_strHost.isEmpty() || (m_strHost == "localhost"))
01315      return true;
01316      
01317   char hostname[ 256 ];
01318   hostname[ 0 ] = '\0';
01319   if (!gethostname( hostname, 255 ))
01320      hostname[sizeof(hostname)-1] = '\0';
01321      
01322   for(char *p = hostname; *p; p++)
01323      *p = tolower(*p);
01324      
01325   return (m_strHost == hostname);
01326 }
01327 
01328 void KURL::setFileEncoding(const QString &encoding)
01329 {
01330   if (!isLocalFile())
01331      return;
01332 
01333   QString q = query();
01334 
01335   if (!q.isEmpty() && (q[0] == '?'))
01336      q = q.mid(1);
01337 
01338   QStringList args = QStringList::split('&', q);
01339   for(QStringList::Iterator it = args.begin();
01340       it != args.end();)
01341   {
01342       QString s = decode_string(*it);
01343       if (s.startsWith("charset="))
01344          it = args.erase(it);
01345       else
01346          ++it;
01347   }
01348   if (!encoding.isEmpty())
01349      args.append("charset="+encode_string(encoding));
01350 
01351   if (args.isEmpty())
01352      _setQuery(QString::null);
01353   else
01354      _setQuery(args.join("&"));
01355 }
01356 
01357 QString KURL::fileEncoding() const
01358 {
01359   if (!isLocalFile())
01360      return QString::null;
01361 
01362   QString q = query();
01363 
01364   if (q.isEmpty())
01365      return QString::null;
01366 
01367   if (q[0] == '?')
01368      q = q.mid(1);
01369 
01370   QStringList args = QStringList::split('&', q);
01371   for(QStringList::ConstIterator it = args.begin();
01372       it != args.end();
01373       ++it)
01374   {
01375       QString s = decode_string(*it);
01376       if (s.startsWith("charset="))
01377          return s.mid(8);
01378   }
01379   return QString::null;
01380 }
01381 
01382 bool KURL::hasSubURL() const
01383 {
01384   if ( m_strProtocol.isEmpty() || m_bIsMalformed )
01385     return false;
01386   if (m_strRef_encoded.isEmpty())
01387      return false;
01388   if (m_strRef_encoded.startsWith("gzip:"))
01389      return true;
01390   if (m_strRef_encoded.startsWith("bzip:"))
01391      return true;
01392   if (m_strRef_encoded.startsWith("bzip2:"))
01393      return true;
01394   if (m_strRef_encoded.startsWith("tar:"))
01395      return true;
01396   if (m_strRef_encoded.startsWith("ar:"))
01397      return true;
01398   if (m_strRef_encoded.startsWith("zip:"))
01399      return true;
01400   if ( m_strProtocol == "error" ) // anything that starts with error: has suburls
01401      return true;
01402   return false;
01403 }
01404 
01405 QString KURL::url( int _trailing, int encoding_hint ) const
01406 {
01407   if( m_bIsMalformed )
01408   {
01409     // Return the whole url even when the url is
01410     // malformed.  Under such conditions the url
01411     // is stored in m_strProtocol.
01412     return m_strProtocol;
01413   }
01414 
01415   QString u = m_strProtocol;
01416   if (!u.isEmpty())
01417     u += ":";
01418 
01419   if ( hasHost() )
01420   {
01421     u += "//";
01422     if ( hasUser() )
01423     {
01424       u += encode(m_strUser, true, encoding_hint);
01425       if ( hasPass() )
01426       {
01427         u += ":";
01428         u += encode(m_strPass, true, encoding_hint);
01429       }
01430       u += "@";
    }
    if ( m_iUriMode == URL )
    {
      bool IPv6 = (m_strHost.find(':') != -1);
      if (IPv6)
        u += '[' + m_strHost + ']';
      else
        u += encodeHost(m_strHost, true, encoding_hint);
      if ( m_iPort != 0 ) {
        QString buffer;
        buffer.sprintf( ":%u", m_iPort );
01431         u += buffer;
01432       }
01433     }
01434     else
01435     {
01436       u += m_strHost;
01437     }
01438   }
01439 
01440   if ( m_iUriMode == URL || m_iUriMode == Mailto )
01441     u += encodedPathAndQuery( _trailing, false, encoding_hint );
01442   else
01443     u += m_strPath;
01444 
01445   if ( hasRef() )
01446   {
01447     u += "#";
01448     u += m_strRef_encoded;
01449   }
01450 
01451   return u;
01452 }
01453 
01454 QString KURL::prettyURL( int _trailing ) const
01455 {
01456   if( m_bIsMalformed )
01457   {
01458     // Return the whole url even when the url is
01459     // malformed.  Under such conditions the url
01460     // is stored in m_strProtocol.
01461     return m_strProtocol;
01462   }
01463 
01464   QString u = m_strProtocol;
01465   if (!u.isEmpty())
01466      u += ":";
01467 
01468   if ( hasHost() )
01469   {
01470     u += "//";
01471     if ( hasUser() )
01472     {
01473       u += lazy_encode(m_strUser);
01474       // Don't show password!
01475       u += "@";
    }
    if ( m_iUriMode == URL )
    {
    bool IPv6 = (m_strHost.find(':') != -1);
    if (IPv6)
    {
       u += '[' + m_strHost + ']';
    }
    else
    {
       u += lazy_encode(m_strHost);
    }
    }
    else
    {
      u += lazy_encode(m_strHost);
    }
    if ( m_iPort != 0 ) {
      QString buffer;
      buffer.sprintf( ":%u", m_iPort );
01476       u += buffer;
01477     }
01478   }
01479 
01480   u += trailingSlash( _trailing, lazy_encode( m_strPath ) );
01481   if (!m_strQuery_encoded.isNull())
01482       u += '?' + m_strQuery_encoded;
01483 
01484   if ( hasRef() )
01485   {
01486     u += "#";
01487     u += m_strRef_encoded;
01488   }
01489 
01490   return u;
01491 }
01492 
01493 QString KURL::prettyURL( int _trailing, AdjustementFlags _flags) const
01494 {
01495     QString u = prettyURL(_trailing);
01496     if (_flags & StripFileProtocol && u.startsWith("file:"))
01497         u.remove(0, 5);
01498     return u;
01499 }
01500 
01501 QString KURL::htmlURL() const
01502 {
01503   return QStyleSheet::escape(prettyURL());
01504 }
01505 
01506 KURL::List KURL::split( const KURL& _url )
01507 {
01508   QString ref;
01509   KURL::List lst;
01510   KURL url = _url;
01511 
01512   while(true)
01513   {
01514      KURL u = url;
01515      u.m_strRef_encoded = QString::null;
01516      lst.append(u);
01517      if (url.hasSubURL())
01518      {
01519         url = KURL(url.m_strRef_encoded);
01520      }
01521      else
01522      {
01523         ref = url.m_strRef_encoded;
01524         break;
01525      }
01526   }
01527 
01528   // Set HTML ref in all URLs.
01529   KURL::List::Iterator it;
01530   for( it = lst.begin() ; it != lst.end(); ++it )
01531   {
01532      (*it).m_strRef_encoded = ref;
01533   }
01534 
01535   return lst;
01536 }
01537 
01538 KURL::List KURL::split( const QString& _url )
01539 {
01540   return split(KURL(_url));
01541 }
01542 
01543 KURL KURL::join( const KURL::List & lst )
01544 {
01545   if (lst.isEmpty()) return KURL();
01546   KURL tmp;
01547 
01548   KURL::List::ConstIterator first = lst.fromLast();
01549   for( KURL::List::ConstIterator it = first; it != lst.end(); --it )
01550   {
01551      KURL u(*it);
01552      if (it != first)
01553      {
01554         if (!u.m_strRef_encoded) u.m_strRef_encoded = tmp.url();
01555         else u.m_strRef_encoded += "#" + tmp.url(); // Support more than one suburl thingy
01556      }
01557      tmp = u;
01558   }
01559 
01560   return tmp;
01561 }
01562 
01563 QString KURL::fileName( bool _strip_trailing_slash ) const
01564 {
01565   QString fname;
01566   if (hasSubURL()) { // If we have a suburl, then return the filename from there
01567     KURL::List list = KURL::split(*this);
01568     KURL::List::Iterator it = list.fromLast();
01569     return (*it).fileName(_strip_trailing_slash);
01570   }
01571   const QString &path = m_strPath;
01572 
01573   int len = path.length();
01574   if ( len == 0 )
01575     return fname;
01576 
01577   if ( _strip_trailing_slash )
01578   {
01579     while ( len >= 1 && path[ len - 1 ] == '/' )
01580       len--;
01581   }
01582   else if ( path[ len - 1 ] == '/' )
01583     return fname;
01584 
01585   // Does the path only consist of '/' characters ?
01586   if ( len == 1 && path[ 0 ] == '/' )
01587     return fname;
01588 
01589   // Skip last n slashes
01590   int n = 1;
01591   if (!m_strPath_encoded.isEmpty())
01592   {
01593      // This is hairy, we need the last unencoded slash.
01594      // Count in the encoded string how many encoded slashes follow the last
01595      // unencoded one.
01596      int i = m_strPath_encoded.findRev( '/', len - 1 );
01597      QString fileName_encoded = m_strPath_encoded.mid(i+1);
01598      n += fileName_encoded.contains("%2f", false);
01599   }
01600   int i = len;
01601   do {
01602     i = path.findRev( '/', i - 1 );
01603   }
01604   while (--n && (i > 0));
01605 
01606   // If ( i == -1 ) => the first character is not a '/'
01607   // So it's some URL like file:blah.tgz, return the whole path
01608   if ( i == -1 ) {
01609     if ( len == (int)path.length() )
01610       fname = path;
01611     else
01612       // Might get here if _strip_trailing_slash is true
01613       fname = path.left( len );
01614   }
01615   else
01616   {
01617      fname = path.mid( i + 1, len - i - 1 ); // TO CHECK
01618   }
01619   return fname;
01620 }
01621 
01622 void KURL::addPath( const QString& _txt )
01623 {
01624   if (hasSubURL())
01625   {
01626      KURL::List lst = split( *this );
01627      KURL &u = lst.last();
01628      u.addPath(_txt);
01629      *this = join( lst );
01630      return;
01631   }
01632   
01633   m_strPath_encoded = QString::null;
01634 
01635   if ( _txt.isEmpty() )
01636     return;
01637 
01638   int i = 0;
01639   int len = m_strPath.length();
01640   // NB: avoid three '/' when building a new path from nothing
01641   if ( len == 0 ) {
01642     while( _txt[i] == '/' ) ++i;
01643   }
01644   // Add the trailing '/' if it is missing
01645   else if ( _txt[0] != '/' && ( len == 0 || m_strPath[ len - 1 ] != '/' ) )
01646     m_strPath += "/";
01647 
01648   // No double '/' characters
01649   i = 0;
01650   if ( len != 0 && m_strPath[ len - 1 ] == '/' )
01651   {
01652     while( _txt[i] == '/' )
01653       ++i;
01654   }
01655 
01656   m_strPath += _txt.mid( i );
01657 }
01658 
01659 QString KURL::directory( bool _strip_trailing_slash_from_result,
01660                          bool _ignore_trailing_slash_in_path ) const
01661 {
01662   QString result = m_strPath_encoded.isEmpty() ? m_strPath : m_strPath_encoded;
01663   if ( _ignore_trailing_slash_in_path )
01664     result = trailingSlash( -1, result );
01665 
01666   if ( result.isEmpty() || result == "/" )
01667     return result;
01668 
01669   int i = result.findRev( "/" );
01670   // If ( i == -1 ) => the first character is not a '/'
01671   // So it's some URL like file:blah.tgz, with no path
01672   if ( i == -1 )
01673     return QString::null;
01674 
01675   if ( i == 0 )
01676   {
01677     result = "/";
01678     return result;
01679   }
01680 
01681   if ( _strip_trailing_slash_from_result )
01682     result = result.left( i );
01683   else
01684     result = result.left( i + 1 );
01685 
01686   if (!m_strPath_encoded.isEmpty())
01687     result = decode(result);
01688 
01689   return result;
01690 }
01691 
01692 
01693 bool KURL::cd( const QString& _dir )
01694 {
01695   if ( _dir.isEmpty() || m_bIsMalformed )
01696     return false;
01697 
01698   if (hasSubURL())
01699   {
01700      KURL::List lst = split( *this );
01701      KURL &u = lst.last();
01702      u.cd(_dir);
01703      *this = join( lst );
01704      return true;
01705   }
01706 
01707   // absolute path ?
01708   if ( _dir[0] == '/' )
01709   {
01710     m_strPath_encoded = QString::null;
01711     m_strPath = _dir;
01712     setHTMLRef( QString::null );
01713     m_strQuery_encoded = QString::null;
01714     return true;
01715   }
01716 
01717   // Users home directory on the local disk ?
01718   if ( ( _dir[0] == '~' ) && ( m_strProtocol == fileProt ))
01719   {
01720     m_strPath_encoded = QString::null;
01721     m_strPath = QDir::homeDirPath();
01722     m_strPath += "/";
01723     m_strPath += _dir.right(m_strPath.length() - 1);
01724     setHTMLRef( QString::null );
01725     m_strQuery_encoded = QString::null;
01726     return true;
01727   }
01728 
01729   // relative path
01730   // we always work on the past of the first url.
01731   // Sub URLs are not touched.
01732 
01733   // append '/' if necessary
01734   QString p = path(1);
01735   p += _dir;
01736   p = cleanpath( p, true, false );
01737   setPath( p );
01738 
01739   setHTMLRef( QString::null );
01740   m_strQuery_encoded = QString::null;
01741 
01742   return true;
01743 }
01744 
01745 KURL KURL::upURL( ) const
01746 {
01747   if (!query().isEmpty())
01748   {
01749      KURL u(*this);
01750      u._setQuery(QString::null);
01751      return u;
01752   };
01753 
01754   if (!hasSubURL())
01755   {
01756      KURL u(*this);
01757      u.cd("../");
01758      return u;
01759   }
01760 
01761   // We have a subURL.
01762   KURL::List lst = split( *this );
01763   if (lst.isEmpty())
01764       return KURL(); // Huh?
01765   while (true)
01766   {
01767      KURL &u = lst.last();
01768      QString old = u.path();
01769      u.cd("../");
01770      if (u.path() != old)
01771          break; // Finshed.
01772      if (lst.count() == 1)
01773          break; // Finished.
01774      lst.remove(lst.fromLast());
01775   }
01776   return join( lst );
01777 }
01778 
01779 QString KURL::htmlRef() const
01780 {
01781   if ( !hasSubURL() )
01782   {
01783     return decode( ref() );
01784   }
01785 
01786   List lst = split( *this );
01787   return decode( (*lst.begin()).ref() );
01788 }
01789 
01790 QString KURL::encodedHtmlRef() const
01791 {
01792   if ( !hasSubURL() )
01793   {
01794     return ref();
01795   }
01796 
01797   List lst = split( *this );
01798   return (*lst.begin()).ref();
01799 }
01800 
01801 void KURL::setHTMLRef( const QString& _ref )
01802 {
01803   if ( !hasSubURL() )
01804   {
01805     m_strRef_encoded = encode( _ref, true, 0 /*?*/);
01806     return;
01807   }
01808 
01809   List lst = split( *this );
01810 
01811   (*lst.begin()).setRef( encode( _ref, true, 0 /*?*/) );
01812 
01813   *this = join( lst );
01814 }
01815 
01816 bool KURL::hasHTMLRef() const
01817 {
01818   if ( !hasSubURL() )
01819   {
01820     return hasRef();
01821   }
01822 
01823   List lst = split( *this );
01824   return (*lst.begin()).hasRef();
01825 }
01826 
01827 void
01828 KURL::setProtocol( const QString& _txt )
01829 {
01830    m_strProtocol = _txt;
01831    if ( m_iUriMode == Auto ) m_iUriMode = uriModeForProtocol( m_strProtocol );
01832    m_bIsMalformed = false;
01833 }
01834 
01835 void
01836 KURL::setUser( const QString& _txt )
01837 {
01838    m_strUser = _txt;
01839 }
01840 
01841 void
01842 KURL::setPass( const QString& _txt )
01843 {
01844    m_strPass = _txt;
01845 }
01846 
01847 void
01848 KURL::setHost( const QString& _txt )
01849 {
01850   if ( m_iUriMode == Auto )
01851     m_iUriMode = URL;
01852   switch ( m_iUriMode )
01853   {
01854   case URL:
01855 #ifndef KDE_QT_ONLY
01856    m_strHost = KIDNA::toUnicode(_txt);
01857    if (m_strHost.isEmpty())
01858       m_strHost = _txt.lower(); // Probably an invalid hostname, but...
01859 #else
01860    m_strHost = _txt.lower();
01861 #endif
01862     break;
01863   default:
01864     m_strHost = _txt;
01865     break;
01866   }
01867 }
01868 
01869 void
01870 KURL::setPort( unsigned short int _p )
01871 {
01872    m_iPort = _p;
01873 }
01874 
01875 void KURL::setPath( const QString & path )
01876 {
01877   if (isEmpty())
01878     m_bIsMalformed = false;
01879   if (m_strProtocol.isEmpty())
01880   {
01881     m_strProtocol = fileProt;
01882   }
01883   m_strPath = path;
01884   m_strPath_encoded = QString::null;
01885   if ( m_iUriMode == Auto )
01886     m_iUriMode = URL;
01887 }
01888 
01889 void KURL::setDirectory( const QString &dir)
01890 {
01891   if ( dir.endsWith("/"))
01892      setPath(dir);
01893   else
01894      setPath(dir+"/");
01895 }
01896 
01897 void KURL::setQuery( const QString &_txt, int encoding_hint)
01898 {
01899    if (_txt[0] == '?')
01900       _setQuery( _txt.mid(1), encoding_hint );
01901    else
01902       _setQuery( _txt, encoding_hint );
01903 }
01904 
01905 // This is a private function that expects a query without '?'
01906 void KURL::_setQuery( const QString &_txt, int encoding_hint)
01907 {
01908    m_strQuery_encoded = _txt;
01909    if (!_txt.length())
01910       return;
01911 
01912    int l = m_strQuery_encoded.length();
01913    int i = 0;
01914    QString result;
01915    while (i < l)
01916    {
01917       int s = i;
01918       // Re-encode. Break encoded string up according to the reserved
01919       // characters '&:;=/?' and re-encode part by part.
01920       while(i < l)
01921       {
01922          char c = m_strQuery_encoded[i].latin1();
01923          if ((c == '&') || (c == ':') || (c == ';') ||
01924              (c == '=') || (c == '/') || (c == '?'))
01925             break;
01926          i++;
01927       }
01928       if (i > s)
01929       {
01930          QString tmp = m_strQuery_encoded.mid(s, i-s);
01931          QString newTmp;
01932          decode( tmp, newTmp, tmp, encoding_hint, false );
01933          result += tmp;
01934       }
01935       if (i < l)
01936       {
01937          result += m_strQuery_encoded[i];
01938          i++;
01939       }
01940    }
01941    m_strQuery_encoded = result;
01942 }
01943 
01944 QString KURL::query() const
01945 {
01946     if (m_strQuery_encoded.isNull())
01947         return QString::null;
01948     return '?'+m_strQuery_encoded;
01949 }
01950 
01951 QString KURL::decode_string(const QString &str, int encoding_hint)
01952 {
01953    return decode(str, encoding_hint);
01954 }
01955 
01956 QString KURL::encode_string(const QString &str, int encoding_hint)
01957 {
01958    return encode(str, false, encoding_hint);
01959 }
01960 
01961 QString KURL::encode_string_no_slash(const QString &str, int encoding_hint)
01962 {
01963    return encode(str, true, encoding_hint);
01964 }
01965 
01966 bool urlcmp( const QString& _url1, const QString& _url2 )
01967 {
01968   // Both empty ?
01969   if ( _url1.isEmpty() && _url2.isEmpty() )
01970     return true;
01971   // Only one empty ?
01972   if ( _url1.isEmpty() || _url2.isEmpty() )
01973     return false;
01974 
01975   KURL::List list1 = KURL::split( _url1 );
01976   KURL::List list2 = KURL::split( _url2 );
01977 
01978   // Malformed ?
01979   if ( list1.isEmpty() || list2.isEmpty() )
01980     return false;
01981 
01982   return ( list1 == list2 );
01983 }
01984 
01985 bool urlcmp( const QString& _url1, const QString& _url2, bool _ignore_trailing, bool _ignore_ref )
01986 {
01987   // Both empty ?
01988   if ( _url1.isEmpty() && _url2.isEmpty() )
01989     return true;
01990   // Only one empty ?
01991   if ( _url1.isEmpty() || _url2.isEmpty() )
01992     return false;
01993 
01994   KURL::List list1 = KURL::split( _url1 );
01995   KURL::List list2 = KURL::split( _url2 );
01996 
01997   // Malformed ?
01998   if ( list1.isEmpty() || list2.isEmpty() )
01999     return false;
02000 
02001   unsigned int size = list1.count();
02002   if ( list2.count() != size )
02003     return false;
02004 
02005   if ( _ignore_ref )
02006   {
02007     (*list1.begin()).setRef(QString::null);
02008     (*list2.begin()).setRef(QString::null);
02009   }
02010 
02011   KURL::List::Iterator it1 = list1.begin();
02012   KURL::List::Iterator it2 = list2.begin();
02013   for( ; it1 != list1.end() ; ++it1, ++it2 )
02014     if ( !(*it1).equals( *it2, _ignore_trailing ) )
02015       return false;
02016 
02017   return true;
02018 }
02019 
02020 QMap< QString, QString > KURL::queryItems( int options ) const {
02021   return queryItems(options, 0);
02022 }
02023 
02024 QMap< QString, QString > KURL::queryItems( int options, int encoding_hint ) const {
02025   if ( m_strQuery_encoded.isEmpty() )
02026     return QMap<QString,QString>();
02027 
02028   QMap< QString, QString > result;
02029   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02030   for ( QStringList::const_iterator it = items.begin() ; it != items.end() ; ++it ) {
02031     int equal_pos = (*it).find( '=' );
02032     if ( equal_pos > 0 ) { // = is not the first char...
02033       QString name = (*it).left( equal_pos );
02034       if ( options & CaseInsensitiveKeys )
02035     name = name.lower();
02036       QString value = (*it).mid( equal_pos + 1 );
02037       if ( value.isEmpty() )
02038     result.insert( name, QString::fromLatin1("") );
02039       else {
02040     // ### why is decoding name not necessary?
02041     value.replace( '+', ' ' ); // + in queries means space
02042     result.insert( name, decode_string( value, encoding_hint ) );
02043       }
02044     } else if ( equal_pos < 0 ) { // no =
02045       QString name = (*it);
02046       if ( options & CaseInsensitiveKeys )
02047     name = name.lower();
02048       result.insert( name, QString::null );
02049     }
02050   }
02051 
02052   return result;
02053 }
02054 
02055 QString KURL::queryItem( const QString& _item ) const
02056 {
02057   return queryItem( _item, 0 );
02058 }
02059 
02060 QString KURL::queryItem( const QString& _item, int encoding_hint ) const
02061 {
02062   QString item = _item + '=';
02063   if ( m_strQuery_encoded.length() <= 1 )
02064     return QString::null;
02065 
02066   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02067   unsigned int _len = item.length();
02068   for ( QStringList::ConstIterator it = items.begin(); it != items.end(); ++it )
02069   {
02070     if ( (*it).startsWith( item ) )
02071     {
02072       if ( (*it).length() > _len )
02073       {
02074         QString str = (*it).mid( _len );
02075         str.replace( '+', ' ' ); // + in queries means space.
02076         return decode_string( str, encoding_hint );
02077       }
02078       else // empty value
02079         return QString::fromLatin1("");
02080     }
02081   }
02082 
02083   return QString::null;
02084 }
02085 
02086 void KURL::removeQueryItem( const QString& _item )
02087 {
02088   QString item = _item + '=';
02089   if ( m_strQuery_encoded.length() <= 1 )
02090     return;
02091 
02092   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02093   for ( QStringList::Iterator it = items.begin(); it != items.end(); )
02094   {
02095     if ( (*it).startsWith( item ) || (*it == _item) )
02096     {
02097       QStringList::Iterator deleteIt = it;
02098       ++it;
02099       items.remove(deleteIt);
02100     }
02101     else
02102     {
02103        ++it;
02104     }
02105   }
02106   m_strQuery_encoded = items.join( "&" );
02107 }
02108 
02109 void KURL::addQueryItem( const QString& _item, const QString& _value, int encoding_hint )
02110 {
02111   QString item = _item + '=';
02112   QString value = encode( _value, true, encoding_hint );
02113 
02114   if (!m_strQuery_encoded.isEmpty())
02115      m_strQuery_encoded += '&';
02116   m_strQuery_encoded += item + value;
02117 }
02118 
02119 // static
02120 KURL KURL::fromPathOrURL( const QString& text )
02121 {
02122     if ( text.isEmpty() )
02123         return KURL();
02124     
02125     KURL url;
02126     if ( text[0] == '/' )
02127         url.setPath( text );
02128     else
02129         url = text;
02130 
02131     return url;
02132 }
02133 
02134 static QString _relativePath(const QString &base_dir, const QString &path, bool &isParent)
02135 {
02136    QString _base_dir(QDir::cleanDirPath(base_dir));
02137    QString _path(QDir::cleanDirPath(path.isEmpty() || (path[0] != '/') ? _base_dir+"/"+path : path));
02138 
02139    if (_base_dir.isEmpty())
02140       return _path;
02141 
02142    if (_base_dir[_base_dir.length()-1] != '/')
02143       _base_dir.append('/');
02144 
02145    QStringList list1 = QStringList::split('/', _base_dir);
02146    QStringList list2 = QStringList::split('/', _path);
02147                                                                    
02148    // Find where they meet
02149    uint level = 0;
02150    uint maxLevel = QMIN(list1.count(), list2.count());
02151    while((level < maxLevel) && (list1[level] == list2[level])) level++;
02152   
02153    QString result;                                                                       
02154    // Need to go down out of the first path to the common branch.
02155    for(uint i = level; i < list1.count(); i++)
02156       result.append("../");
02157 
02158    // Now up up from the common branch to the second path.
02159    for(uint i = level; i < list2.count(); i++)
02160       result.append(list2[i]).append("/");
02161 
02162    if ((level < list2.count()) && (path[path.length()-1] != '/'))
02163       result.truncate(result.length()-1);
02164 
02165    isParent = (level == list1.count());
02166 
02167    return result;
02168 }
02169 
02170 QString KURL::relativePath(const QString &base_dir, const QString &path, bool *isParent)
02171 {
02172    bool parent;
02173    QString result = _relativePath(base_dir, path, parent);
02174    if (parent)
02175       result.prepend("./");
02176       
02177    if (isParent)
02178       *isParent = parent;
02179    
02180    return result;
02181 }
02182 
02183 
02184 QString KURL::relativeURL(const KURL &base_url, const KURL &url, int encoding_hint)
02185 {
02186    if ((url.protocol() != base_url.protocol()) ||
02187        (url.host() != base_url.host()) ||
02188        (url.port() && url.port() != base_url.port()) ||
02189        (url.hasUser() && url.user() != base_url.user()) ||
02190        (url.hasPass() && url.pass() != base_url.pass()))
02191    {
02192       return url.url(0, encoding_hint);
02193    }
02194 
02195    QString relURL;
02196    
02197    if ((base_url.path() != url.path()) || (base_url.query() != url.query()))
02198    {
02199       bool dummy;
02200       QString basePath = base_url.directory(false, false);
02201       relURL = encode( _relativePath(basePath, url.path(), dummy), false, encoding_hint);
02202       relURL += url.query();
02203    }
02204 
02205    if ( url.hasRef() )
02206    {
02207       relURL += "#";
02208       relURL += url.ref();
02209    }
02210 
02211    if ( relURL.isEmpty() )
02212       return "./";
02213 
02214    return relURL;
02215 }
02216 
02217 int KURL::uriMode() const
02218 {
02219   return m_iUriMode;
02220 }
02221 
02222 KURL::URIMode KURL::uriModeForProtocol(const QString& protocol)
02223 {
02224 #ifndef KDE_QT_ONLY
02225     KURL::URIMode mode = Auto;
02226     if (KGlobal::_instance)
02227         mode = KProtocolInfo::uriParseMode(protocol);
02228     if (mode == Auto ) {
02229 #else
02230         KURL::URIMode mode = Auto;
02231 #endif
02232     if ( protocol == "ed2k" || protocol == "sig2dat" || protocol == "slsk" || protocol == "data" ) mode = RawURI;
02233     else if ( protocol == "mailto" ) mode = Mailto;
02234     else mode = URL;
02235 #ifndef KDE_QT_ONLY
02236     }
02237 #endif
02238     return mode;
02239 }
02240 
KDE Logo
This file is part of the documentation for kdecore Library Version 3.2.1.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Thu Mar 4 22:43:33 2004 by doxygen 1.3.6-20040222 written by Dimitri van Heesch, © 1997-2003