OpenTREP Logo  0.08.01
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
opentrep-indexer.cpp
Go to the documentation of this file.
1// STL
2#include <cassert>
3#include <iostream>
4#include <sstream>
5#include <fstream>
6#include <vector>
7#include <string>
8// Boost (Extended STL)
9#include <boost/date_time/posix_time/posix_time.hpp>
10#include <boost/date_time/gregorian/gregorian.hpp>
11#include <boost/program_options.hpp>
12// OpenTREP
14#include <opentrep/Location.hpp>
16#include <opentrep/DBType.hpp>
19#include <opentrep/config/opentrep-paths.hpp>
20
21
22// //////// Type definitions ///////
23typedef std::vector<std::string> WordList_T;
24
25
26// //////// Constants //////
30const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-indexer.log");
31
45
46
47// ///////// Parsing of Options & Configuration /////////
50
52int readConfiguration (int argc, char* argv[],
53 std::string& ioPORFilepath,
54 std::string& ioXapianDBFilepath,
55 std::string& ioSQLDBTypeString,
56 std::string& ioSQLDBConnectionString,
57 unsigned short& ioDeploymentNumber,
58 bool& ioIncludeNonIATAPOR,
59 bool& ioIndexPORInXapian,
60 bool& ioAddPORInDB,
61 std::string& ioLogFilename,
62 std::ostringstream& oStr) {
63
64 // Declare a group of options that will be allowed only on command line
65 boost::program_options::options_description generic ("Generic options");
66 generic.add_options()
67 ("prefix", "print installation prefix")
68 ("version,v", "print version string")
69 ("help,h", "produce help message");
70
71 // Declare a group of options that will be allowed both on command
72 // line and in config file
73 boost::program_options::options_description config ("Configuration");
74 config.add_options()
75 ("porfile,p",
76 boost::program_options::value< std::string >(&ioPORFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_POR_FILEPATH),
77 "POR file-path (e.g., optd_por_public.csv)")
78 ("xapiandb,d",
79 boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
80 "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
81 ("sqldbtype,t",
82 boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
83 "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, pg for PostgreSQL, mysql for MariaDB/MySQL)")
84 ("sqldbconx,s",
85 boost::program_options::value< std::string >(&ioSQLDBConnectionString),
86 "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, \"dbname=trep_trep user=trep password=trep\" for PostgreSQL, \"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
87 ("deploymentnb,m",
88 boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
89 "Deployment number (from to N, where N=1 normally)")
90 ("noniata,n",
91 boost::program_options::value<bool>(&ioIncludeNonIATAPOR)->default_value(K_OPENTREP_DEFAULT_POR_INCLUDING),
92 "Whether or not to include POR not referenced by IATA (0 = only IATA-referenced POR, 1 = all POR are included)")
93 ("xapianindex,x",
94 boost::program_options::value<bool>(&ioIndexPORInXapian)->default_value(OPENTREP::DEFAULT_OPENTREP_INDEX_IN_XAPIAN),
95 "Whether or not to index the POR in Xapian (0 = do not touch the Xapian index, 1 = re-index all the POR in Xapian)")
96 ("dbadd,a",
97 boost::program_options::value<bool>(&ioAddPORInDB)->default_value(OPENTREP::DEFAULT_OPENTREP_ADD_IN_DB),
98 "Whether or not to add and index the POR in the SQL-based database (0 = do not touch the SQL-based database, 1 = add and re-index all the POR in the SQL-based database)")
99 ("log,l",
100 boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
101 "Filepath for the logs")
102 ;
103
104 // Hidden options, will be allowed both on command line and
105 // in config file, but will not be shown to the user.
106 boost::program_options::options_description hidden ("Hidden options");
107 hidden.add_options()
108 ("copyright",
109 boost::program_options::value< std::vector<std::string> >(),
110 "Show the copyright (license)");
111
112 boost::program_options::options_description cmdline_options;
113 cmdline_options.add(generic).add(config).add(hidden);
114
115 boost::program_options::options_description config_file_options;
116 config_file_options.add(config).add(hidden);
117
118 boost::program_options::options_description visible ("Allowed options");
119 visible.add(generic).add(config);
120
121 boost::program_options::positional_options_description p;
122 p.add ("copyright", -1);
123
124 boost::program_options::variables_map vm;
125 boost::program_options::
126 store (boost::program_options::command_line_parser (argc, argv).
127 options (cmdline_options).positional(p).run(), vm);
128
129 std::ifstream ifs ("opentrep-indexer.cfg");
130 boost::program_options::store (parse_config_file (ifs, config_file_options),
131 vm);
132 boost::program_options::notify (vm);
133
134 if (vm.count ("help")) {
135 std::cout << visible << std::endl;
137 }
138
139 if (vm.count ("version")) {
140 std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
142 }
143
144 if (vm.count ("prefix")) {
145 std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
147 }
148
149 if (vm.count ("porfile")) {
150 ioPORFilepath = vm["porfile"].as< std::string >();
151 oStr << "POR file-path is: " << ioPORFilepath << std::endl;
152 }
153
154 if (vm.count ("deploymentnb")) {
155 ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
156 oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
157 }
158
159 if (vm.count ("xapiandb")) {
160 ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
161 oStr << "Xapian index/database filepath is: " << ioXapianDBFilepath
162 << ioDeploymentNumber << std::endl;
163 }
164
165 // Parse the SQL database type, if any is given
166 if (vm.count ("sqldbtype")) {
167 ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
168 oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
169 }
170
182 const OPENTREP::DBType lDBType (ioSQLDBTypeString);
183 if (lDBType == OPENTREP::DBType::NODB) {
184 ioAddPORInDB = false;
185 ioSQLDBConnectionString = "";
186
187 } else if (lDBType == OPENTREP::DBType::SQLITE3) {
188 ioAddPORInDB = true;
189 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
190
191 } else if (lDBType == OPENTREP::DBType::PG) {
192 ioAddPORInDB = true;
193 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_PG_CONN_STRING;
194
195 } else if (lDBType == OPENTREP::DBType::MYSQL) {
196 ioAddPORInDB = true;
197 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
198 }
199
200 // Set the SQL database connection string, if any is given
201 if (vm.count ("sqldbconx")) {
202 ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
203 }
204
205 // Reporting of the SQL database connection string
206 if (lDBType == OPENTREP::DBType::SQLITE3
207 || lDBType == OPENTREP::DBType::PG
208 || lDBType == OPENTREP::DBType::MYSQL) {
209 const std::string& lSQLDBConnString =
211 ioSQLDBConnectionString,
212 ioDeploymentNumber);
213 //
214 oStr << "SQL database connection string is: " << lSQLDBConnString
215 << std::endl;
216 }
217
218 oStr << "Are non-IATA-referenced POR included? " << ioIncludeNonIATAPOR
219 << std::endl;
220
221 oStr << "Index the POR in Xapian? " << ioIndexPORInXapian << std::endl;
222
223 oStr << "Add and re-index the POR in the SQL-based database? " << ioAddPORInDB
224 << std::endl;
225
226 if (vm.count ("log")) {
227 ioLogFilename = vm["log"].as< std::string >();
228 oStr << "Log filename is: " << ioLogFilename << std::endl;
229 }
230
231 return 0;
232}
233
234
235// /////////////// M A I N /////////////////
236int main (int argc, char* argv[]) {
237
238 // Output log File
239 std::string lLogFilename;
240
241 // File-path of POR (points of reference)
242 std::string lPORFilepathStr;
243
244 // Xapian database name (directory of the index)
245 std::string lXapianDBNameStr;
246
247 // SQL database type
248 std::string lSQLDBTypeStr;
249
250 // SQL database connection string
251 std::string lSQLDBConnectionStr;
252
253 // Deployment number/version
254 OPENTREP::DeploymentNumber_T lDeploymentNumber;
255
256 // Whether or not to include non-IATA-referenced POR
257 OPENTREP::shouldIndexNonIATAPOR_T lIncludeNonIATAPOR;
258
259 // Whether or not to index the POR in Xapian
260 OPENTREP::shouldIndexPORInXapian_T lShouldIndexPORInXapian;
261
262 // Whether or not to insert the POR in the SQL database
263 OPENTREP::shouldAddPORInSQLDB_T lShouldAddPORInSQLDB;
264
265 // Log stream for the introduction part
266 std::ostringstream oIntroStr;
267
268 // Call the command-line option parser
269 const int lOptionParserStatus =
270 readConfiguration (argc, argv, lPORFilepathStr, lXapianDBNameStr,
271 lSQLDBTypeStr, lSQLDBConnectionStr, lDeploymentNumber,
272 lIncludeNonIATAPOR, lShouldIndexPORInXapian,
273 lShouldAddPORInSQLDB, lLogFilename, oIntroStr);
274
275 if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
276 return 0;
277 }
278
279 // Set the log parameters
280 std::ofstream logOutputFile;
281 // open and clean the log outputfile
282 logOutputFile.open (lLogFilename.c_str());
283 logOutputFile.clear();
284
285 //
286 oIntroStr << "Parsing and indexing the OpenTravelData POR data file (into "
287 << "Xapian and/or SQL databases) may take a few tens of minutes "
288 << "on some architectures (and a few minutes on fastest ones)..."
289 << std::endl;
290 std::cout << oIntroStr.str();
291
292 // DEBUG
293 // Get the current time in UTC Timezone
294 boost::posix_time::ptime lTimeUTC =
295 boost::posix_time::second_clock::universal_time();
296 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
297 << __LINE__ << "]:Parameters:" << std::endl
298 << oIntroStr.str() << std::endl;
299
300 // Initialise the context
301 const OPENTREP::PORFilePath_T lPORFilepath (lPORFilepathStr);
302 const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
303 const OPENTREP::DBType lDBType (lSQLDBTypeStr);
304 const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
305 OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lPORFilepath,
306 lXapianDBName, lDBType,
307 lSQLDBConnStr, lDeploymentNumber,
308 lIncludeNonIATAPOR,
309 lShouldIndexPORInXapian,
310 lShouldAddPORInSQLDB);
311
312 // Launch the indexation
313 const OPENTREP::NbOfDBEntries_T lNbOfEntries =
314 opentrepService.insertIntoDBAndXapian();
315
316 //
317 std::ostringstream oStr;
318 oStr << lNbOfEntries << " entries have been processed" << std::endl;
319 std::cout << oStr.str();
320
321 // Get the current time in UTC Timezone
322 lTimeUTC = boost::posix_time::second_clock::universal_time();
323 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
324 << __LINE__ << "]:" << oStr.str() << std::endl;
325
326 // Close the Log outputFile
327 logOutputFile.close();
328
329 return 0;
330}
Interface for the OPENTREP Services.
NbOfDBEntries_T insertIntoDBAndXapian()
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
const std::string DEFAULT_OPENTREP_PG_CONN_STRING
bool shouldAddPORInSQLDB_T
unsigned int NbOfDBEntries_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
const bool DEFAULT_OPENTREP_INDEX_IN_XAPIAN
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
bool shouldIndexPORInXapian_T
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
const bool DEFAULT_OPENTREP_ADD_IN_DB
const std::string DEFAULT_OPENTREP_POR_FILEPATH
bool shouldIndexNonIATAPOR_T
const int K_OPENTREP_EARLY_RETURN_STATUS
const bool K_OPENTREP_DEFAULT_POR_INCLUDING
int main(int argc, char *argv[])
int readConfiguration(int argc, char *argv[], std::string &ioPORFilepath, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, bool &ioIncludeNonIATAPOR, bool &ioIndexPORInXapian, bool &ioAddPORInDB, std::string &ioLogFilename, std::ostringstream &oStr)
std::vector< std::string > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
Enumeration of database types.
Definition DBType.hpp:17