/* Copyright (C) 2006 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
// Takes a query and run it, no gui, results to stdout

#ifndef TEST_RECOLLQ
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/stat.h>
#include <limits.h>

#include <iostream>
#include <list>
#include <string>
using namespace std;

#include "rcldb.h"
#include "rclquery.h"
#include "rclconfig.h"
#include "pathut.h"
#include "rclinit.h"
#include "debuglog.h"
#include "wasastringtoquery.h"
#include "wasatorcl.h"
#include "internfile.h"
#include "wipedir.h"
#include "transcode.h"
#include "textsplit.h"
#include "smallut.h"
#include "base64.h"

bool dump_contents(RclConfig *rclconfig, TempDir& tmpdir, Rcl::Doc& idoc)
{
    FileInterner interner(idoc, rclconfig, tmpdir,
                          FileInterner::FIF_forPreview);
    Rcl::Doc fdoc;
    string ipath = idoc.ipath;
    if (interner.internfile(fdoc, ipath)) {
	cout << fdoc.text << endl;
    } else {
	cout << "Cant turn to text:" << idoc.url << " | " << idoc.ipath << endl;
    }
    return true;
}

void output_fields(const vector<string>fields, Rcl::Doc& doc,
		   Rcl::Query& query, Rcl::Db& rcldb)
{
    for (vector<string>::const_iterator it = fields.begin();
	 it != fields.end(); it++) {
	string out;
	if (!it->compare("abstract")) {
	    string abstract;
	    rcldb.makeDocAbstract(doc, &query, abstract);
	    base64_encode(abstract, out);
	} else {
	    base64_encode(doc.meta[*it], out);
	}
	cout << out << " ";
    }
    cout << endl;
}

static char *thisprog;
static char usage [] =
" -P: Show the date span for all the documents present in the index\n"
" [-o|-a|-f] [-q] <query string>\n"
" Runs a recoll query and displays result lines. \n"
"  Default: will interpret the argument(s) as a xesam query string\n"
"    query may be like: \n"
"    implicit AND, Exclusion, field spec:    t1 -t2 title:t3\n"
"    OR has priority: t1 OR t2 t3 OR t4 means (t1 OR t2) AND (t3 OR t4)\n"
"    Phrase: \"t1 t2\" (needs additional quoting on cmd line)\n"
"  -o Emulate the GUI simple search in ANY TERM mode\n"
"  -a Emulate the GUI simple search in ALL TERMS mode\n"
"  -f Emulate the GUI simple search in filename mode\n"
"  -q is just ignored (compatibility with the recoll GUI command line)\n"
"Common options:\n"
"    -c <configdir> : specify config directory, overriding $RECOLL_CONFDIR\n"
"    -d also dump file contents\n"
"    -n [first-]<cnt> define the result slice. The default value for [first]\n"
"       is 0. Without the option, the default max count is 2000.\n"
"       Use n=0 for no limit\n"
"    -b : basic. Just output urls, no mime types or titles\n"
"    -Q : no result lines, just the processed query and result count\n"
"    -m : dump the whole document meta[] array for each result\n"
"    -A : output the document abstracts\n"
"    -S fld : sort by field <fld>\n"
"    -D : sort descending\n"
"    -i <dbdir> : additional index, several can be given\n"
"    -e use url encoding (%xx) for urls\n"
"    -F <field name list> : output exactly these fields for each result.\n"
"       The field values are encoded in base64, output in one line and \n"
"       separated by one space character. This is the recommended format \n"
"       for use by other programs. Use a normal query with option -m to \n"
"       see the field names.\n"
;
static void
Usage(void)
{
    cerr << thisprog <<  ": usage:" << endl << usage;
    exit(1);
}

// ATTENTION A LA COMPATIBILITE AVEC LES OPTIONS DE recoll
// -q, -t and -l are accepted and ignored
// -a/f/o -c have the same meaning
// -h is not used

static int     op_flags;
#define OPT_A     0x1
#define OPT_a     0x2
#define OPT_b     0x4
#define OPT_c     0x8
#define OPT_D     0x10
#define OPT_d     0x20
#define OPT_f     0x40
#define OPT_i     0x80
#define OPT_l     0x100
#define OPT_m     0x200
#define OPT_n     0x400
#define OPT_o     0x800
#define OPT_P     0x1000
#define OPT_Q     0x2000
#define OPT_q     0x4000
#define OPT_S     0x8000
#define OPT_s     0x10000
#define OPT_t     0x20000
#define OPT_e     0x40000
#define OPT_F     0x80000

int recollq(RclConfig **cfp, int argc, char **argv)
{
    string a_config;
    string sortfield;
    string stemlang("english");
    list<string> extra_dbs;
    string sf;
    vector<string> fields;

    int firstres = 0;
    int maxcount = 2000;
    thisprog = argv[0];
    argc--; argv++;

    while (argc > 0 && **argv == '-') {
        (*argv)++;
        if (!(**argv))
            /* Cas du "adb - core" */
            Usage();
        while (**argv)
            switch (*(*argv)++) {
            case 'A':   op_flags |= OPT_A; break;
            case 'a':   op_flags |= OPT_a; break;
            case 'b':   op_flags |= OPT_b; break;
	    case 'c':	op_flags |= OPT_c; if (argc < 2)  Usage();
		a_config = *(++argv);
		argc--; goto b1;
            case 'd':   op_flags |= OPT_d; break;
            case 'D':   op_flags |= OPT_D; break;
            case 'e':   op_flags |= OPT_e; break;
            case 'f':   op_flags |= OPT_f; break;
	    case 'F':	op_flags |= OPT_F; if (argc < 2)  Usage();
		sf = *(++argv);
		argc--; goto b1;
	    case 'i':	op_flags |= OPT_i; if (argc < 2)  Usage();
		extra_dbs.push_back(*(++argv));
		argc--; goto b1;
            case 'l':   op_flags |= OPT_l; break;
            case 'm':   op_flags |= OPT_m; break;
	    case 'n':	op_flags |= OPT_n; if (argc < 2)  Usage();
	    {
		string rescnt = *(++argv);
		string::size_type dash = rescnt.find("-");
		if (dash != string::npos) {
		    firstres = atoi(rescnt.substr(0, dash).c_str());
		    if (dash < rescnt.size()-1) {
			maxcount = atoi(rescnt.substr(dash+1).c_str());
		    }
		} else {
		    maxcount = atoi(rescnt.c_str());
		}
		if (maxcount <= 0) maxcount = INT_MAX;
	    }
	    argc--; goto b1;
            case 'o':   op_flags |= OPT_o; break;
            case 'P':   op_flags |= OPT_P; break;
            case 'q':   op_flags |= OPT_q; break;
            case 'Q':   op_flags |= OPT_Q; break;
	    case 'S':	op_flags |= OPT_S; if (argc < 2)  Usage();
		sortfield = *(++argv);
		argc--; goto b1;
	    case 's':	op_flags |= OPT_s; if (argc < 2)  Usage();
		stemlang = *(++argv);
		argc--; goto b1;
            case 't':   op_flags |= OPT_t; break;
            default: Usage();   break;
            }
    b1: argc--; argv++;
    }

    string reason;
    *cfp = recollinit(0, 0, reason, &a_config);
    RclConfig *rclconfig = *cfp;
    if (!rclconfig || !rclconfig->ok()) {
	fprintf(stderr, "Recoll init failed: %s\n", reason.c_str());
	exit(1);
    }

    if (argc < 1 && !(op_flags & OPT_P)) {
	Usage();
    }
    if (op_flags & OPT_F) {
	if (op_flags & (OPT_b|OPT_d|OPT_b|OPT_Q|OPT_m|OPT_A))
	    Usage();
	stringToStrings(sf, fields);
    }
    Rcl::Db rcldb(rclconfig);
    if (!extra_dbs.empty()) {
        for (list<string>::iterator it = extra_dbs.begin();
             it != extra_dbs.end(); it++) {
            if (!rcldb.addQueryDb(*it)) {
                cerr << "Can't add index: " << *it << endl;
                exit(1);
            }
        }
    }

    if (!rcldb.open(Rcl::Db::DbRO)) {
	cerr << "Cant open database in " << rclconfig->getDbDir() << 
	    " reason: " << rcldb.getReason() << endl;
	exit(1);
    }

    if (op_flags & OPT_P) {
        int minyear, maxyear;
        if (!rcldb.maxYearSpan(&minyear, &maxyear)) {
            cerr << "maxYearSpan failed: " << rcldb.getReason() << endl;
            exit(1);
        } else {
            cout << "Min year " << minyear << " Max year " << maxyear << endl;
            exit(0);
        }
    }

    if (argc < 1) {
	Usage();
    }
    string qs = *argv++;argc--;
    while (argc > 0) {
	qs += string(" ") + *argv++;argc--;
    }

    {
	string uq;
	string charset = rclconfig->getDefCharset(true);
	int ercnt;
	if (!transcode(qs, uq, charset, "UTF-8", &ercnt)) {
	    fprintf(stderr, "Can't convert command line args to utf-8\n");
	    exit(1);
	} else if (ercnt) {
	    fprintf(stderr, "%d errors while converting arguments from %s "
		    "to utf-8\n", ercnt, charset.c_str());
	}
	qs = uq;
    }

    Rcl::SearchData *sd = 0;

    if (op_flags & (OPT_a|OPT_o|OPT_f)) {
	sd = new Rcl::SearchData(Rcl::SCLT_OR);
	Rcl::SearchDataClause *clp = 0;
	if (op_flags & OPT_f) {
	    clp = new Rcl::SearchDataClauseFilename(qs);
	} else {
	    // If there is no white space inside the query, then the user
	    // certainly means it as a phrase.
	    bool isreallyaphrase = false;
	    if (!TextSplit::hasVisibleWhite(qs))
		isreallyaphrase = true;
	    clp = isreallyaphrase ? 
		new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, qs, 0) :
		new Rcl::SearchDataClauseSimple((op_flags & OPT_o)?
						Rcl::SCLT_OR : Rcl::SCLT_AND, 
						qs);
	}
	if (sd)
	    sd->addClause(clp);
    } else {
	sd = wasaStringToRcl(rclconfig, qs, reason);
    }

    if (!sd) {
	cerr << "Query string interpretation failed: " << reason << endl;
	return 1;
    }
    sd->setStemlang(stemlang);

    RefCntr<Rcl::SearchData> rq(sd);
    Rcl::Query query(&rcldb);
    if (op_flags & OPT_S) {
	query.setSortBy(sortfield, (op_flags & OPT_D) ? false : true);
    }
    Chrono chron;
    query.setQuery(rq);
    int cnt = query.getResCnt();
    if (!(op_flags & OPT_b)) {
	cout << "Recoll query: " << rq->getDescription() << endl;
	if (firstres == 0) {
	    if (cnt <= maxcount)
		cout << cnt << " results" << endl;
	    else
		cout << cnt << " results (printing  " << maxcount << " max):" 
		     << endl;
	} else {
	    cout << "Printing at most " << cnt - (firstres+maxcount) <<
		" results from first " << firstres << endl;
	}
    }
    if (op_flags & OPT_Q)
	cout << "Query setup took " << chron.millis() << " mS" << endl;

    if (op_flags & OPT_Q)
	return(0);

    for (int i = firstres; i < firstres + maxcount; i++) {
	Rcl::Doc doc;
	if (!query.getDoc(i, doc))
	    break;

	if (op_flags & OPT_F) {
	    output_fields(fields, doc, query, rcldb);
	    continue;
	}

	if (op_flags & OPT_e) 
	    doc.url = url_encode(doc.url);

	if (op_flags & OPT_b) {
		cout << doc.url << endl;
	} else {
	    string titleorfn = doc.meta[Rcl::Doc::keytt];
	    if (titleorfn.empty())
		titleorfn = doc.meta[Rcl::Doc::keyfn];

	    char cpc[20];
	    sprintf(cpc, "%d", doc.pc);
	    cout 
		<< doc.mimetype << "\t"
		<< "[" << doc.url << "]" << "\t" 
		<< "[" << titleorfn << "]" << "\t"
		<< doc.fbytes << "\tbytes" << "\t"
		<<  endl;
	    if (op_flags & OPT_m) {
		for (map<string,string>::const_iterator it = doc.meta.begin();
		     it != doc.meta.end(); it++) {
		    cout << it->first << " = " << it->second << endl;
		}
	    }
            if (op_flags & OPT_A) {
                string abstract;
                if (rcldb.makeDocAbstract(doc, &query, abstract)) {
                    cout << "ABSTRACT" << endl;
                    cout << abstract << endl;
                    cout << "/ABSTRACT" << endl;
                }
            }
        }
        if (op_flags & OPT_d) {
	    static TempDir tmpdir;
	    if (!tmpdir.ok()) {
		cerr << "Can't create temporary directory: " << 
		    tmpdir.getreason() << endl;
		return(1);
	    }
            dump_contents(rclconfig, tmpdir, doc);
        }	
    }

    return 0;
}

#else // TEST_RECOLLQ The test driver is actually the useful program...
#include <stdlib.h>

#include "rclconfig.h"
#include "recollq.h"

static RclConfig *rclconfig;

int main(int argc, char **argv)
{
    return(recollq(&rclconfig, argc, argv));
}
#endif // TEST_RECOLLQ
