System documentation of the GNU Image-Finding Tool

Main Page | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members

CAcSQLInvertedFile.h

00001 /* -*- mode: c++ -*- 
00002 */
00003 /* 
00004 
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License
00019     along with this program; if not, write to the Free Software
00020     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 
00022 */
00023 // -*- mode: c++ -*-
00024 
00025 
00026 class CXMLElement;
00027 
00047 #ifndef _CINVERTEDFILEACCESSOR
00048 #define _CINVERTEDFILEACCESSOR
00049 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
00050 #include <string>
00051 #include "libMRML/include/TID.h"
00052 #include "libMRML/include/CSelfDestroyPointer.h"
00053 #include "libMRML/include/CArraySelfDestroyPointer.h"
00054 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
00055 #include "CCollectionFrequencyList.h"
00056 #include "libGIFTAcInvertedFile/include/CADIHash.h"
00057 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
00058 #include <iostream>
00059 #include <fstream>
00060 #include <map>
00061 #include <vector>
00062 #ifdef HAS_HASH_MAP
00063 #include <hash_map>
00064 #else
00065 #define hash_map map
00066 #endif
00067 #include <functional>
00068 #include <algorithm>
00069 
00070 #include "libMRML/include/CMagic.h"
00071 
00072 
00073 typedef TID TFeatureID ;
00074 
00081 class CAcInvertedFile:public CAcURL2FTS{  
00082 
00083 protected:
00085   TID mMaximumFeatureID;
00088   CArraySelfDestroyPointer<char> mInvertedFileBuffer;
00090   mutable CSelfDestroyPointer<istream> mInvertedFile;
00091 
00093   mutable ifstream mOffsetFile;
00094 
00096   ifstream mFeatureDescriptionFile;
00097 
00099   string mInvertedFileName;
00100 
00102   string mOffsetFileName;
00103 
00105   string mFeatureDescriptionFileName;
00106 
00108   typedef hash_map<TID,unsigned int> CIDToOffset;//new hash
00110   CIDToOffset mIDToOffset;
00111 
00113   mutable hash_map<TID,double> mFeatureToCollectionFrequency;//new hash
00114 
00118   hash_map<TID,unsigned int> mFeatureDescription;//new hash_
00119 
00123   CADIHash mDocumentInformation;
00125 
00128   void writeOffsetFileElement(TID inFeatureID,
00129                               int inPosition,
00130                               ostream& inOpenOffsetFile);
00132   CDocumentFrequencyList* getFeatureFile(string inFileName)const;
00133 public:
00135   bool operator()()const;
00136 
00151   CAcInvertedFile(const CXMLElement& inCollectionElement);
00153   bool init(bool);
00154 
00156   ~CAcInvertedFile();
00157   
00159   string IDToURL(TID inID)const;
00160 
00162   TID URLToID(const string& inURL)const;
00163   
00167   CDocumentFrequencyList* FeatureToList(TFeatureID)const;
00168 
00170   CDocumentFrequencyList* URLToFeatureList(string inURL)const;
00171 
00173   CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
00174 
00176 
00177 
00181   double FeatureToCollectionFrequency(TFeatureID)const;
00182 
00184   unsigned int getFeatureDescription(TID inFeatureID)const;
00186 
00190   double DIDToMaxDocumentFrequency(TID)const;
00191 
00193   double DIDToDFSquareSum(TID)const;
00194 
00196   double DIDToSquareDFLogICFSum(TID)const;
00198 
00199   /*@name Inverted File Generation and Consistency Checking*/
00201 
00209   bool generateInvertedFile();
00210 
00218   bool newGenerateInvertedFile();
00219 
00222   bool checkConsistency();
00223 
00227   bool findWithinStream(TID inFeatureID,
00228                         TID inDocumentID,
00229                         double inDocumentFrequency)const;
00230   
00232 
00234   TID getMaximumFeatureID()const;
00242   list<TID>* getAllFeatureIDs()const;
00243 };
00244 
00245 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen