00001 /* -*- mode: c++ -*- 00002 */ 00003 /* 00004 00005 GIFT, a flexible content based image retrieval system. 00006 Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program; if not, write to the Free Software 00020 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 00022 */ 00023 // -*- mode: c++ -*- 00024 00025 00026 class CXMLElement; 00027 00047 #ifndef _CINVERTEDFILEACCESSOR 00048 #define _CINVERTEDFILEACCESSOR 00049 #include "libGIFTAcInvertedFile/include/uses-declarations.h" 00050 #include <string> 00051 #include "libMRML/include/TID.h" 00052 #include "libMRML/include/CSelfDestroyPointer.h" 00053 #include "libMRML/include/CArraySelfDestroyPointer.h" 00054 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h" 00055 #include "CCollectionFrequencyList.h" 00056 #include "libGIFTAcInvertedFile/include/CADIHash.h" 00057 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h" 00058 #include <iostream> 00059 #include <fstream> 00060 #include <map> 00061 #include <vector> 00062 #ifdef HAS_HASH_MAP 00063 #include <hash_map> 00064 #else 00065 #define hash_map map 00066 #endif 00067 #include <functional> 00068 #include <algorithm> 00069 00070 #include "libMRML/include/CMagic.h" 00071 00072 00073 typedef TID TFeatureID ; 00074 00081 class CAcInvertedFile:public CAcURL2FTS{ 00082 00083 protected: 00085 TID mMaximumFeatureID; 00088 CArraySelfDestroyPointer<char> mInvertedFileBuffer; 00090 mutable CSelfDestroyPointer<istream> mInvertedFile; 00091 00093 mutable ifstream mOffsetFile; 00094 00096 ifstream mFeatureDescriptionFile; 00097 00099 string mInvertedFileName; 00100 00102 string mOffsetFileName; 00103 00105 string mFeatureDescriptionFileName; 00106 00108 typedef hash_map<TID,unsigned int> CIDToOffset;//new hash 00110 CIDToOffset mIDToOffset; 00111 00113 mutable hash_map<TID,double> mFeatureToCollectionFrequency;//new hash 00114 00118 hash_map<TID,unsigned int> mFeatureDescription;//new hash_ 00119 00123 CADIHash mDocumentInformation; 00125 00128 void writeOffsetFileElement(TID inFeatureID, 00129 int inPosition, 00130 ostream& inOpenOffsetFile); 00132 CDocumentFrequencyList* getFeatureFile(string inFileName)const; 00133 public: 00135 bool operator()()const; 00136 00151 CAcInvertedFile(const CXMLElement& inCollectionElement); 00153 bool init(bool); 00154 00156 ~CAcInvertedFile(); 00157 00159 string IDToURL(TID inID)const; 00160 00162 TID URLToID(const string& inURL)const; 00163 00167 CDocumentFrequencyList* FeatureToList(TFeatureID)const; 00168 00170 CDocumentFrequencyList* URLToFeatureList(string inURL)const; 00171 00173 CDocumentFrequencyList* DIDToFeatureList(TID inDID)const; 00174 00176 00177 00181 double FeatureToCollectionFrequency(TFeatureID)const; 00182 00184 unsigned int getFeatureDescription(TID inFeatureID)const; 00186 00190 double DIDToMaxDocumentFrequency(TID)const; 00191 00193 double DIDToDFSquareSum(TID)const; 00194 00196 double DIDToSquareDFLogICFSum(TID)const; 00198 00199 /*@name Inverted File Generation and Consistency Checking*/ 00201 00209 bool generateInvertedFile(); 00210 00218 bool newGenerateInvertedFile(); 00219 00222 bool checkConsistency(); 00223 00227 bool findWithinStream(TID inFeatureID, 00228 TID inDocumentID, 00229 double inDocumentFrequency)const; 00230 00232 00234 TID getMaximumFeatureID()const; 00242 list<TID>* getAllFeatureIDs()const; 00243 }; 00244 00245 #endif