System documentation of the GNU Image-Finding Tool

Main Page | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members

merge_sort_streams.h

00001 /* -*- mode: c++ -*- 
00002  */
00003 /* 
00004    
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva, University of Bayreuth
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License
00019     along with this program; if not, write to the Free Software
00020     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 
00022 */
00023 #ifndef _MERGE_SORT
00024 #define _MERGE_SORT
00025 
00026 //#warning FIXME: this needs to be added to the GIFT distro. It speeds up the generation
00027 //#warning of inverted files considerably
00028 //(done)
00029 
00030 #include <memory> // auto_ptr
00031 #include <algorithm> // using swap
00032 #include <fstream>   // file access
00033 #include <cassert>
00034 
00035 using namespace std;
00036 
00037 //#define STREAMPOS_T fstream::pos_type
00038 
00041 template<typename T>
00042 class CStreamPos{
00044   T mContent;
00045 public:
00046   CStreamPos(T inContent):mContent(inContent){
00047     
00048   }
00049 
00050   CStreamPos(long int inContent):mContent(inContent){
00051     
00052   }
00053 
00054   operator T()const{
00055     return mContent;
00056   }
00057   operator long int()const{
00058     return mContent;
00059   }
00060 
00061   CStreamPos<T>& operator++ (){
00062     mContent=mContent+static_cast<T>(1);
00063     return *this;
00064   }
00065   CStreamPos<T> operator++ (int){
00066     CStreamPos<T> lReturnValue=*this;
00067     mContent=mContent+static_cast<T>(1);
00068     return lReturnValue;
00069   }
00070 
00071   CStreamPos<T> operator% (int inMod)const{
00072     return mContent % inMod;
00073   }
00074   CStreamPos<T> operator/ (int inMod)const{
00075     return mContent / inMod;
00076   }
00077   bool operator< (CStreamPos<T>& inThan)const{
00078     return this->mContent < inThan.mContent;
00079   }
00080   bool operator! ()const{
00081     return !(this->mContent);
00082   }
00083   CStreamPos<T> operator+ (CStreamPos<T>& inSummand){
00084     return CStreamPos<T>(mContent + inSummand.mContent);
00085   }
00086 };
00087 
00088 #if __GNUC__==2
00089 #define STREAMPOS_T long long int
00090 #else
00091 #define STREAMPOS_T CStreamPos<fstream::pos_type>
00092 #endif
00093 // ex long long int
00094 
00105 template<class T>
00106 void merge_streams(istream& in1, const STREAMPOS_T inCount1,
00107                    istream& in2, const STREAMPOS_T inCount2,
00108                    ostream& out,
00109                    int inNumberOfPageElements=1){
00110 
00111   {
00112 //     cout << "Merging: " 
00113 //       << inCount1
00114 //       << ","
00115 //       << inCount2
00116 //       << endl;
00117     const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00118 
00119 
00120 
00121     if(!(inCount1)){// if there is nothing to merge
00122       for(STREAMPOS_T i=0;//...copy
00123           i<inCount2;
00124           i++){
00125         T l2;
00126         in2.read((char*)&l2,
00127                  sizeof(l2));
00128         assert(in2);
00129 
00130         out.write((char*)&l2,
00131                   sizeof(l2));
00132         assert(out);
00133       }
00134       return;//and return
00135     }
00136     if(!inCount2){//if there is nothing to merge
00137       for(STREAMPOS_T i=0;//...copy
00138           i<inCount1;
00139           i++){
00140         T l1;
00141         in1.read((char*)&l1,
00142                  sizeof(l1));
00143         assert(in1);
00144         out.write((char*)&l1,
00145                   sizeof(l1));
00146         assert(out);
00147       }
00148       return;//and return
00149     }
00150     
00151     STREAMPOS_T lI1(0);
00152     STREAMPOS_T lI2(0);
00153     
00154     //read the first record from both files
00155     T l1;
00156     in1.read((char*)&l1,
00157              sizeof(l1));
00158     assert(in1);
00159     T l2;
00160     in2.read((char*)&l2,
00161              sizeof(l2));
00162     assert(in2);
00163     
00164     while((lI1<inCount1)
00165           &&(lI2<inCount2)){
00166       if(l1<l2){
00167         //
00168         out.write((char*)&l1,
00169                   sizeof(l1));
00170         assert(out);
00171 
00172         //
00173         lI1++;
00174         if(lI1<inCount1){
00175           in1.read((char*)&l1,
00176                    sizeof(l1));
00177           assert(in1);
00178         }
00179       }else{
00180         //
00181         out.write((char*)&l2,
00182                   sizeof(l2));
00183         //
00184         lI2++;
00185         if(lI2<inCount2){
00186           in2.read((char*)&l2,
00187                    sizeof(l2));
00188           assert(in2);
00189         }
00190       }
00191     }
00192     while(lI1<inCount1){
00193       //
00194       out.write((char*)&l1,
00195                 sizeof(l1));
00196       //
00197       lI1++;
00198       if(lI1<inCount1){
00199         in1.read((char*)&l1,
00200                  sizeof(l1));
00201         assert(in1);
00202       }
00203     }
00204     while(lI2<inCount2){
00205       //
00206       out.write((char*)&l2,
00207                 sizeof(l2));
00208       assert(out);
00209       //
00210       lI2++;
00211       if(lI2<inCount2){
00212         in2.read((char*)&l2,
00213                  sizeof(l2));
00214         assert(in2);
00215       }
00216     }
00217   }
00218 }
00219 template<typename T>
00220 void first_level_quicksort(int inNumberOfPageElements,
00221                            const char* inFileToBeSortedName,
00222                            const char* inTemporaryName){
00223 
00224   cout << "Starting quicksort: "
00225        << inNumberOfPageElements 
00226        << " elements per page." << endl
00227        << "Sorting files " << inFileToBeSortedName << endl
00228        << "to            " << inTemporaryName << endl;
00229   cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
00230   auto_ptr<T> lPage(new T[inNumberOfPageElements]);
00231 
00232   cout << "H" << flush;  
00233 
00234   const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00235 
00236   cout << "I" << flush;
00237 
00238   STREAMPOS_T lFileSize(0);
00239   ifstream lToBeSorted1(inFileToBeSortedName);
00240   assert(lToBeSorted1);
00241   lToBeSorted1.seekg(0,
00242                     ios::end);
00243   lFileSize=lToBeSorted1.tellg();
00244   lToBeSorted1.clear();
00245   lToBeSorted1.seekg(0,
00246                     ios::beg);
00247   cout << "E" << flush;
00248 
00249   ofstream lTemporary(inTemporaryName);
00250   assert(lTemporary);
00251   cout << "R" << flush;
00252 
00253   STREAMPOS_T lSum(0);
00254 
00255   T* lBegin(lPage.get());
00256   T* lEnd(lPage.get());
00257   
00258   cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl;
00259   while((lSum<lFileSize)
00260         && lToBeSorted1){
00261 
00262     cout << "." << flush;
00263 
00264     int lRead(0);
00265     if(lSum+lPageSize < lFileSize){
00266       lToBeSorted1.read((char*)lPage.get(),
00267                         lPageSize);
00268       lRead=lPageSize;
00269     }else{
00270       lToBeSorted1.read((char*)lPage.get(),
00271                         lFileSize-lSum);      
00272       lRead=lFileSize-lSum;
00273     }
00274     if(lRead){
00275       lEnd=lBegin+(lRead)/sizeof(T);
00276       sort(lBegin,lEnd);
00277       lTemporary.write((char*)lPage.get(),lRead);
00278       assert(lTemporary);
00279     }
00280   }
00281   cout << "." << endl;
00282 
00283 }
00284 
00295 template<class T>
00296 char* merge_sort_streams(const char* inFileToBeSortedName,
00297                         const char* inTemporaryName,
00298                         int inNumberOfPageElements=(1 << 20)
00299                         ){
00300 
00301   const char* lFileToBeSortedName(inFileToBeSortedName);
00302   const char* lTemporaryName(inTemporaryName);
00303 
00304   STREAMPOS_T lFileSize(0);
00305   ifstream lToBeSorted1(inFileToBeSortedName);
00306   lToBeSorted1.seekg(0,
00307                     ios::end);
00308   lFileSize=lToBeSorted1.tellg();
00309   lToBeSorted1.close();
00310   
00311   ofstream lTemporary;
00312   ifstream lToBeSorted2;
00313  
00314 #ifdef first_level_quick
00315   first_level_quicksort<T>(inNumberOfPageElements,
00316                            inFileToBeSortedName,
00317                            inTemporaryName);
00318   swap(lFileToBeSortedName,
00319        lTemporaryName);
00320 #else 
00321   cout << "STARTING mit MERGESIZE1" << endl;
00322   inNumberOfPageElements=1;
00323 #endif
00324   STREAMPOS_T lCount(0);
00325   for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements);
00326       (iMergeSize < lFileSize)
00327         || !(lCount%2)
00328         // ||(lCount%2) makes sure that we will get 
00329         // the result in inFileToBeSorted
00330         // the ! is, because we have have already
00331         // the quicksort sorting pass behind us
00332         ;
00333       (iMergeSize = iMergeSize << 1),
00334         (lCount=lCount+static_cast<STREAMPOS_T>(1))){
00335     
00336     cout << "MERGESORT MergeSize " 
00337          << iMergeSize 
00338          << endl;
00339     
00340     lToBeSorted1.open(lFileToBeSortedName);
00341     lToBeSorted1.clear();
00342 
00343     lToBeSorted2.open(lFileToBeSortedName);
00344     lToBeSorted2.clear();
00345 
00346     lTemporary.open(lTemporaryName);
00347     lTemporary.clear();
00348     
00349     
00350     for(STREAMPOS_T i(0);
00351         i<lFileSize;
00352         i = i + iMergeSize*2){
00353       lToBeSorted1.seekg(i);
00354 
00355       if(!lToBeSorted1){
00356         cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg("
00357              << static_cast<long int>(i)
00358              << ")"
00359              << endl;
00360       }
00361       
00362       assert(lToBeSorted1);
00363 
00364       if(i+iMergeSize<lFileSize){
00365         lToBeSorted2.seekg(i+iMergeSize);
00366         assert(lToBeSorted2);
00367       }
00368 
00369       STREAMPOS_T lMergeSize1=lFileSize-i;
00370       if(lFileSize < i){// underflow
00371         lMergeSize1=0;//correct underflow
00372       }
00373       STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
00374       if(lFileSize < i + iMergeSize){// underflow of lMergeSize2
00375         lMergeSize2=0;//correct underflow
00376       }
00377       
00378       // lToBeSorted1.clear();
00379       // lToBeSorted2.clear();
00380       
00381       if(lMergeSize1>iMergeSize){
00382         lMergeSize1=iMergeSize;
00383       }
00384       if(lMergeSize2>iMergeSize){
00385         lMergeSize2=iMergeSize;
00386       }
00387 
00388 #if __GNUC__==2
00389       merge_streams<T>(lToBeSorted1,
00390                        lMergeSize1/sizeof(T),
00391                        lToBeSorted2,
00392                        lMergeSize2/sizeof(T),
00393                        lTemporary,
00394                        inNumberOfPageElements
00395                        );
00396 #else
00397       merge_streams<T>(lToBeSorted1,
00398                        lMergeSize1.operator/(sizeof(T)),
00399                        lToBeSorted2,
00400                        lMergeSize2.operator/(sizeof(T)),
00401                        lTemporary,
00402                        inNumberOfPageElements
00403                        );
00404 #endif
00405     }
00406     
00407     lTemporary.close();
00408     lToBeSorted1.close();
00409     lToBeSorted2.close();
00410     swap(lFileToBeSortedName,
00411          lTemporaryName);
00412     cout << "endmerge" << endl;
00413   }
00414   return strdup(lFileToBeSortedName);
00415 }
00416 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen