System documentation of the GNU Image-Finding Tool

Main Page | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members

CAcIFFileSystem.h

00001 /* -*- mode: c++ -*- 
00002 */
00003 /* 
00004 
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License
00019     along with this program; if not, write to the Free Software
00020     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 
00022 */
00023 // -*- mode: c++ -*-
00024 
00025 
00026 class CXMLElement;
00027 
00028 /*
00029 *
00030 * 
00031 *  This class manages the access to the inverted file as well 
00032 *    as its generation
00033 *
00034 *
00035 *
00036 * modification history:
00037 *
00038 * WM   1099 changed documentation format
00039 *           completed documentation
00040 * HM 090399 created the documentation
00041 * WM   1098 created the file
00042 *
00043 *
00044 *
00045 * compiler defines used:
00046 *
00047 *
00048 */
00049 
00050 #ifndef _CACIFFILESYSTEM
00051 #define _CACIFFILESYSTEM
00052 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
00053 #include <string>
00054 #include "libMRML/include/TID.h"
00055 #include "libMRML/include/CSelfDestroyPointer.h"
00056 #include "libMRML/include/CArraySelfDestroyPointer.h"
00057 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
00058 #include "libMRML/include/CMutex.h" // multi threading
00059 //#include "CCollectionFrequencyList.h"
00060 #include "libGIFTAcInvertedFile/include/CADIHash.h"
00061 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
00062 #include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
00063 #include <iostream>
00064 #include <fstream>
00065 #include <map>
00066 #include <vector>
00067 #ifdef HAS_HASH_MAP
00068 #include <hash_map>
00069 #define HASH_MAP hash_map
00070 #else
00071 #define HASH_MAP map
00072 #endif
00073 #include <functional>
00074 #include <algorithm>
00075 
00076 #include "libMRML/include/CMagic.h"
00077 
00078 
00079 typedef TID TFeatureID ;
00080 
00091 class CAcIFFileSystem:public CAcInvertedFile{  
00092 
00093 protected:
00095   CMutex mMutex;
00101   CSelfDestroyPointer<CAcURL2FTS> mURL2FTS;
00103   TID mMaximumFeatureID;
00106 #ifndef V295
00107   string mInvertedFileBuffer;
00108 #else
00109   CArraySelfDestroyPointer<char> mInvertedFileBuffer;
00110 #endif
00111 
00113   string mTemporaryIndexingFileBase;
00115   mutable CSelfDestroyPointer<istream> mInvertedFile;
00116 
00118   mutable ifstream mOffsetFile;
00119 
00121   ifstream mFeatureDescriptionFile;
00122 
00124   string mInvertedFileName;
00125 
00127   string mOffsetFileName;
00128 
00130   string mFeatureDescriptionFileName;
00131 
00133   typedef HASH_MAP<TID,unsigned int> CIDToOffset;//new hash
00135   CIDToOffset mIDToOffset;
00136 
00138   mutable HASH_MAP<TID,double> mFeatureToCollectionFrequency;//new hash
00139 
00143   HASH_MAP<TID,unsigned int> mFeatureDescription;//new hash_
00144 
00148   CADIHash mDocumentInformation;
00150 
00153   void writeOffsetFileElement(TID inFeatureID,
00154                               int inPosition,
00155                               ostream& inOpenOffsetFile);
00157   CDocumentFrequencyList* getFeatureFile(string inFileName)const;
00158 public:
00160   bool operator()()const;
00161 
00188   CAcIFFileSystem(const CXMLElement& inCollectionElement);
00190   bool init(bool);
00191 
00193   ~CAcIFFileSystem();
00194   
00196   string IDToURL(TID inID)const;
00197 
00201   CDocumentFrequencyList* FeatureToList(TFeatureID)const;
00202 
00204   CDocumentFrequencyList* URLToFeatureList(string inURL)const;
00205 
00207   CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
00208 
00210 
00211 
00215   double FeatureToCollectionFrequency(TFeatureID)const;
00216 
00218   unsigned int getFeatureDescription(TID inFeatureID)const;
00220 
00224   double DIDToMaxDocumentFrequency(TID)const;
00225 
00227   double DIDToDFSquareSum(TID)const;
00228 
00230   double DIDToSquareDFLogICFSum(TID)const;
00232 
00233   /*@name Inverted File Generation and Consistency Checking*/
00235 
00243   bool generateInvertedFile();
00244 
00252   bool newGenerateInvertedFile();
00253 
00256   bool checkConsistency();
00257 
00264   bool findWithinStream(TID inFeatureID,
00265                         TID inDocumentID,
00266                         double inDocumentFrequency)const;
00267   
00269 
00275   virtual pair<bool,TID> URLToID(const string& inURL)const;
00276   
00278   void getAllIDs(list<TID>&)const;
00281   void getAllAccessorElements(list<CAccessorElement>&)const;
00286   void getRandomIDs(list<TID>&,
00287                     list<TID>::size_type)const;
00296   void getRandomAccessorElements(list<CAccessorElement>& outResult,
00297                                   list<CAccessorElement>::size_type inSize)const;
00299   int size()const;
00301 
00302   TID getMaximumFeatureID()const;
00310   list<TID>* getAllFeatureIDs()const;
00316   virtual pair<bool,CAccessorElement> IDToAccessorElement(TID inID)const;
00318   operator bool()const;
00319 
00320 };
00321 
00322 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen