/***********************************************************************
 Merge Index for IISA (prototype)
                                   Copyright (C) 1998, Takuya NAKAYAMA
***********************************************************************/
#include <stdio.h>
#include "sufary.h"
#include "ix_code.h"
#include "strop.h"

unsigned int *startid;

void print_help (void)
{
    exit(1);
}

void output_num_file(char* ofname, int ifile_num, int fname0,
		     int argc, char** argv)
{
    int i, j, id;
    FILE *fp, *nfp;
    char* str1;
    char buf[10000], fname[10000];

    str1 = str_append(ofname, ".num");
    if ((nfp = fopen(str1, "w+")) == NULL) {
	fprintf (stderr, "cannot open file %s.\n", str1);
	exit(1);
    }
    free(str1);

    fprintf (nfp, "#LAST_ID=%d\n", startid[ifile_num]);
    
    j = 0;
    for (i = fname0; i < argc; i++, j++) {
	/* open *.num */
	str1 = str_append(argv[i], ".num");
	if ((fp = fopen(str1, "r")) == NULL) {
	    fprintf (stderr, "cannot open file %s.\n", str1);
	    exit(1);
	}
	free(str1);
	
	while (fgets(buf, 10000, fp) != NULL) {
	    if (strncmp(buf, "#LAST_ID=", 9) == 0) continue;
	    sscanf(buf, "%s %d", fname, &id);
	    id += startid[j];
	    fprintf(nfp, "%s %d\n", fname, id);
	}
	close(fp);
    }
    close(nfp);
    
}


long* blptr (SUFARY* sa, long* ptr)
{
    long *eod = (long*)sa->arymap + sa->arraysize -1;
    if (ptr >= eod) return NULL;
    for (;;) {
	if (*ptr == 0) return ptr;
	if ( *(sa->txtmap+(*ptr-1)) == 0x00) return ptr;
/*	printf("%s\n", sa->txtmap+(*ptr-1));*/
	if (++ptr >= eod) return NULL;
    }
}

void merge_process (SUFARY** sa, int fnum, char* ofname)
{
    int i, j, rslt, mincheck[fnum], chknum, last;
    long *arypt[fnum];
    char *str[fnum], *minstr;
    int fnum_org = fnum;
    IxCode     code;
    IxCodeInfo info[fnum];
    int idnum, mgnum;

    /*    */
    for (i = 0; i < fnum; i++) {
	arypt[i] = (long*)sa[i]->arymap;
	arypt[i] = blptr(sa[i], arypt[i]);
	str[i] = sa[i]->txtmap + (*arypt[i]);
    }
    /*  merge sort */
    while (fnum > 1) {
	minstr = str[0];
	chknum = 1;
	for (i = 0; i < fnum_org; i++) {
	    if (str[i] == NULL) {
		mincheck[i] = 0; continue;
	    }
	    if (minstr == NULL) {
		minstr = str[i]; mincheck[i] = ++chknum;
		continue;
	    }
	    rslt = strcmp(minstr, str[i]);
	    if (rslt > 0) {
		minstr = str[i];
		mincheck[i] = ++chknum;
	    } else if (rslt == 0) {
		mincheck[i] = chknum;
	    } else {
		mincheck[i] = 0;
	    }
	}
	printf("%s ", minstr);

	idnum = 0;
	for (i = 0; i < fnum_org; i++) {
	    if (fnum < 2) break;
	    if (mincheck[i] < chknum) {
		info[i].num = 0;
		continue;
	    }
	    code.buf = sa[i]->txtmap + (*arypt[i]);
	    info[i] = ix_decode(code);
	    idnum += info[i].num;

	    arypt[i]++;
	    if ((arypt[i] = blptr(sa[i], arypt[i])) == NULL) {
		fnum--;
		str[i] = NULL;
	    } else {
		str[i] = sa[i]->txtmap + (*arypt[i]);
	    }
	}
	printf("%d", idnum);
	for (i = 0; i < fnum_org; i++) {
	    if (info[i].num < 1) continue;
	    for (j = 0; j < info[i].num; j++) {
		printf(" %d %d",
		       info[i].files[j]+startid[i],
		       info[i].score[j]);
	    }
	    free(info[i].files);
	    free(info[i].score);
	}
	printf("\n");
    }
    for (i = 0; i < fnum_org; i++)
	while (arypt[i] != NULL) {
	    code.buf = sa[i]->txtmap + (*arypt[i]);
	    info[0] = ix_decode(code);
	    printf("%s %d", info[0].str, info[0].num);
	    for (j = 0; j < info[0].num; j++) {
		printf(" %d %d",
		       info[0].files[j]+startid[i],
		       info[0].score[j]);
	    }
	    printf("\n");
	    free(info[0].files);
	    free(info[0].score);
	    arypt[i]++;
	    arypt[i] = blptr(sa[i], arypt[i]);
	}
}

int main (int argc, char** argv)
{
    SUFARY **sa;
    int    i, check, ifile_num, j;
    FILE   *fp;
    char   *str1, *str2, buf[10000];
    int    fname0;
    char   *ofname = "MGIND";
    
    /* parse arguments */
    for (i = 1; i < argc; i++) {
	if (argv[i][0] != '-') {
	    break;
	}
	switch (argv[i][1]) {
	  case 'o':
	    i++;
	    ofname = argv[i];
	    break;
	  default:
	    print_help();
	}
    }
    if (i == argc)
	print_help();
    else
	fprintf(stderr,"Output: %s.(ix|ix.ary|num)\n", ofname);

    ifile_num = argc-i;
    sa = (SUFARY**)malloc(sizeof(SUFARY*)*ifile_num);
    startid = (int*)malloc(sizeof(int)*ifile_num+1);

    j = 0; fname0 = i;
    for (; i < argc; i++, j++) {
	/* open *.num */
	str1 = str_append(argv[i], ".num");
	if ((fp = fopen(str1, "r")) == NULL) {
	    fprintf (stderr, "cannot open file %s.\n", str1);
	    exit(1);
	}
	free(str1);
	check = 0; startid[0] = 0;
	while (fgets(buf, 10000, fp) != NULL) {
	    if (strncmp(buf, "#LAST_ID=", 9) == 0) {
		startid[j+1] = atoi(&(buf[9])) + 1;
		fprintf(stderr, "[%s] %d files ", argv[i], startid[j+1]);
		startid[j+1] = startid[j] + startid[j+1];
		fprintf(stderr, "(%d)\n", startid[j]);
		check = 1;
		break;
	    }
	}
	close(fp);
	if (check == 0) {
	    fprintf (stderr, "cannot find string \"#LAST_ID=\" in %s",
		     argv[i]);
	    exit(1);
	}
	/* open *.ix and *.ix.ary */
	str1 = str_append(argv[i], ".ix");
	str2 = str_append(argv[i], ".ix.ary");
	if  ((sa[j] = sa_openfiles(str1, str2)) == NULL) {
	    fprintf (stderr, "cannot open file %s or %s.\n", str1, str2);
	    exit(1);
	}
	free(str1); free(str2);
    }

    merge_process(sa, ifile_num, ofname);

    output_num_file(ofname, ifile_num, fname0, argc, argv);
    
    exit(0);
}
