/*
** Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM
** ==========================================================================
**
** These functions should be prototyped in the Lynx version of HTML.c.
*/
#include "HTUtils.h"
#include "tcp.h"

#include "HTML.h"

#include "LYGlobalDefs.h"
#include "LYUtils.h"

#include "LYexit.h"
#include "LYLeaks.h"

#define FREE(x) if (x) {free(x); x=NULL;}

/* Declare the character sets and variables.
 * There may be an unlimited number of sets.
 * The sets will be referenced using the
 * LYCharSets array in LYCharSets.c.
 */
extern char ** LYCharSets[];
extern char * LYchar_set_names[];
extern CONST char * LYEntityNames[];
extern char ** p_entity_values;
extern int current_char_set;


/*
**  This function converts HTML entities within a string to
**  to their translations in the active LYCharSets array.
**  It also converts decimal escaped characters to their
**  HTML entity names and then similarly translates those.
**  The string is converted in place, on the assumption that
**  the conversion strings will not be longer than the entity
**  strings, such that the overall string will never grow.
**  This assumption is true for the current LYCharSets arrays.
**  Make sure it stays true! - FM
*/

PUBLIC char * LYUnEscapeEntities ARGS2(
	char *,	str,
	BOOLEAN, plain_space)
{
    char * p = str;
    char * q = str;
    char * cp;
    char cpe;
    int len, value;
    int high, low, diff, i;
    extern BOOLEAN HTUseRawLatin;

    if (!str || *str == '\0')
        return str;

    while(*p) {
        if (*p == '&') {
	    p++;
	    len = strlen(p);
	    if (*p == '#' && len > 2 && isdigit(*(p+1)) &&
	        (!isalnum(*(p+2)) ||
		 (len > 3 && isdigit(*(p+2)) &&
		  (!isalnum(*(p+3)) ||
		   (len > 4 && isdigit(*(p+3)) && !isalnum(*(p+4))))))) {
		/*
		 * It's decimal escaped.
		 */
		cp = ++p;
		while (isdigit(*p))
		    p++;
		cpe = *p;
		*p++ = '\0';
                value = atoi(cp);
		if (value == 160 && plain_space) {
		    *q++ = ' ';
	        } else if (value > 255 ||
			   (value < 32 &&
			    value != 9 && value != 10 && value != 13) ||
			   (value > 126 && value < 160)) {
		    /*
		     * Illegal value.
		     */
		    *q++ = '&';
		    *q++ = '#';
		    for (p=cp; *p; p++) {
		        *q++ = *p;
		    }
		    *q++ = cpe;
		    p++;
		} else if (value < 160 ||
			   HTUseRawLatin) {	 /* No conversion needed. */
	            *q++ = (unsigned char)value;
		    if (cpe == '&' || cpe == '<' || cpe == '>') {
		        p--;
			*p = cpe;
		    }
		} else {	    /* Convert and handle as named entity */
		    CONST char * name;
		    value -= 160;
		    name = HTMLGetEntityName(value);
		    for(low=0, high = HTML_dtd.number_of_entities;
		        high > low;
			diff < 0 ? (low = i+1) : (high = i)) {
			/* Binary search */
			i = (low + (high-low)/2);
			diff = strcmp(HTML_dtd.entity_names[i], name);
			if (diff==0) {
			    /*
			     * Found the entity.  Assume that the length
			     * of the value does not exceed the length of
			     * the raw entity, so that the overall string
			     * does not need to grow.  Make sure this stays
			     * true in the LYCharSets arrays. - FM
			     */
			    int j;
			    for (j = 0; p_entity_values[i][j]; j++)
			        *q++ = (unsigned char)(p_entity_values[i][j]);
			    break;
			}
		    }
		    if (cpe == '&' || cpe == '<') {
		        p--;
			*p = cpe;
		    }
		}
	    } else if (isalnum(*p)) {
		/*
		 * Check for an HTML entity.
		 */
		cp = p;
		while (isalnum(*cp))
		    cp++;
		cpe = *cp;
		*cp = '\0';
		for(low=0, high = HTML_dtd.number_of_entities;
		    high > low ;
		    diff < 0 ? (low = i+1) : (high = i)) {
		    /* Binary search */
		    i = (low + (high-low)/2);
		    diff = strcmp(HTML_dtd.entity_names[i], p);
		    if (diff==0) {
		        /*
			 * Found the entity.  Assume that the length
			 * of the value does not exceed the length of
			 * the raw entity, so that the overall string
			 * does not need to grow.  Make sure this stays
			 * true in the LYCharSets arrays. - FM
			 */
			int j;
			if (plain_space &&
			    (!strcmp("nbsp", HTML_dtd.entity_names[i]) ||
			     !strcmp("emsp", HTML_dtd.entity_names[i]) ||
			     !strcmp("ensp", HTML_dtd.entity_names[i])))
			    *q++ = ' ';
			else
			    for (j = 0; p_entity_values[i][j]; j++)
			        *q++ = (unsigned char)(p_entity_values[i][j]);
			*cp = cpe;
			if (*cp == '&' || *cp == '<')
			    p = cp;
			else
			    p = (cp+1);
			break;
		    }
		}
		*cp = cpe;
		if (diff != 0) {
		    /*
		     * Entity not found.
		     */
		    *q++ = '&';
		}

	    } else {
		/*
		 * It's a raw ampersand.
		 */
		*q++ = '&';
	    }
	} else {
	    if (plain_space &&
	        (*p == 1 || *p == 2 || ((unsigned char)*p) == 160)) {
	        *q++ = ' ';
		p++;
	    } else {
	        *q++ = *p++;
	    }
	}
    }
    
    *q = '\0';
    return str;
}

/*
**  This function reallocates an allocated string
**  with any 8-bit characters (>160) converted to
**  their HTML entity names and then translated
**  for the current character set. - FM
*/

PUBLIC void LYExpandString ARGS1(
	char **, str)
{
    char *p = *str;
    char *q = *str;
    CONST char *name;
    int i, j, value, high, low, diff;
    extern BOOLEAN HTUseRawLatin;

    if (!p || *p == '\0')
        return;

    *str = NULL;
    StrAllocCopy(*str, "");

    for (i = 0; p[i]; i++) {
        if (((unsigned char)p[i]) == 160) {
	    p[i] == 1;
	} else if (((unsigned char)p[i]) > 160 && !HTUseRawLatin) {
	    value = (int)(((unsigned char)p[i]) - 160);
	    p[i] = '\0';
	    StrAllocCat(*str, q);
	    q = &p[i+1];
	    name = HTMLGetEntityName(value);
	    for (low=0, high = HTML_dtd.number_of_entities;
		 high > low;
		 diff < 0 ? (low = j+1) : (high = j)) {
		/* Binary search */
		j = (low + (high-low)/2);
		diff = strcmp(HTML_dtd.entity_names[j], name);
		if (diff==0) {
		    StrAllocCat(*str, p_entity_values[j]);
		    break;
		}
	    }
	}
    }
    StrAllocCat(*str, q);
    free_and_clear(&p);
    return;
}

/*
** This function returns OL TYPE="A" strings in
** the range of " A." (1) to "ZZZ." (18278). - FM
*/
PUBLIC char *LYUppercaseA_OL_String ARGS1(
	int, seqnum)
{
    static char OLstring[8];

    if (seqnum <= 1 ) {
        strcpy(OLstring, " A.");
        return OLstring;
    }
    if (seqnum < 27) {
        sprintf(OLstring, " %c.", (seqnum + 64));
        return OLstring;
    }
    if (seqnum < 703) {
        sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 64),
		(seqnum - ((seqnum-1)/26)*26 + 64));
        return OLstring;
    }
    if (seqnum < 18279) {
        sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 64),
		(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 64),
		(seqnum - ((seqnum-1)/26)*26 + 64));
        return OLstring;
    }
    strcpy(OLstring, "ZZZ.");
    return OLstring;
}

/*
** This function returns OL TYPE="a" strings in
** the range of " a." (1) to "zzz." (18278). - FM
*/
PUBLIC char *LYLowercaseA_OL_String ARGS1(
	int, seqnum)
{
    static char OLstring[8];

    if (seqnum <= 1 ) {
        strcpy(OLstring, " a.");
        return OLstring;
    }
    if (seqnum < 27) {
        sprintf(OLstring, " %c.", (seqnum + 96));
        return OLstring;
    }
    if (seqnum < 703) {
        sprintf(OLstring, "%c%c.", ((seqnum-1)/26 + 96),
		(seqnum - ((seqnum-1)/26)*26 + 96));
        return OLstring;
    }
    if (seqnum < 18279) {
        sprintf(OLstring, "%c%c%c.", ((seqnum-27)/676 + 96),
		(((seqnum - ((seqnum-27)/676)*676)-1)/26 + 96),
		(seqnum - ((seqnum-1)/26)*26 + 96));
        return OLstring;
    }
    strcpy(OLstring, "zzz.");
    return OLstring;
}

/*
** This function returns OL TYPE="I" strings in the
** range of " I." (1) to "MMM." (3000).- FM
*/
PUBLIC char *LYUppercaseI_OL_String ARGS1(
	int, seqnum)
{
    static char OLstring[8];
    int Arabic = seqnum;

    if (Arabic >= 3000) {
        strcpy(OLstring, "MMM.");
        return OLstring;
    }

    switch(Arabic) {
    case 1:
        strcpy(OLstring, " I.");
        return OLstring;
    case 5:
        strcpy(OLstring, " V.");
        return OLstring;
    case 10:
        strcpy(OLstring, " X.");
        return OLstring;
    case 50:
        strcpy(OLstring, " L.");
        return OLstring;
    case 100:
        strcpy(OLstring, " C.");
        return OLstring;
    case 500:
        strcpy(OLstring, " D.");
        return OLstring;
    case 1000:
        strcpy(OLstring, " M.");
        return OLstring;
    default:
        OLstring[0] = '\0';
	break;
    }

    while (Arabic >= 1000) {
        strcat(OLstring, "M");
        Arabic -= 1000;
    }

    if (Arabic >= 900) {
        strcat(OLstring, "CM");
	Arabic -= 900;
    }

    if (Arabic >= 500) {
	strcat(OLstring, "D");
        Arabic -= 500;
	while (Arabic >= 500) {
	    strcat(OLstring, "C");
	    Arabic -= 10;
	}
    }

    if (Arabic >= 400) {
	strcat(OLstring, "CD");
        Arabic -= 400;
    }

    while (Arabic >= 100) {
        strcat(OLstring, "C");
        Arabic -= 100;
    }

    if (Arabic >= 90) {
        strcat(OLstring, "XC");
	Arabic -= 90;
    }

    if (Arabic >= 50) {
	strcat(OLstring, "L");
        Arabic -= 50;
	while (Arabic >= 50) {
	    strcat(OLstring, "X");
	    Arabic -= 10;
	}
    }

    if (Arabic >= 40) {
	strcat(OLstring, "XL");
        Arabic -= 40;
    }

    while (Arabic > 10) {
        strcat(OLstring, "X");
	Arabic -= 10;
    }    

    switch (Arabic) {
    case 1:
        strcat(OLstring, "I.");
	break;
    case 2:
        strcat(OLstring, "II.");
	break;
    case 3:
        strcat(OLstring, "III.");
	break;
    case 4:
        strcat(OLstring, "IV.");
	break;
    case 5:
        strcat(OLstring, "V.");
	break;
    case 6:
        strcat(OLstring, "VI.");
	break;
    case 7:
        strcat(OLstring, "VII.");
	break;
    case 8:
        strcat(OLstring, "VIII.");
	break;
    case 9:
        strcat(OLstring, "IX.");
	break;
    case 10:
        strcat(OLstring, "X.");
	break;
    default:
        strcat(OLstring, ".");
	break;
    }

    return OLstring;
}

/*
** This function returns OL TYPE="i" strings in
** range of " i." (1) to "mmm." (3000).- FM
*/
PUBLIC char *LYLowercaseI_OL_String ARGS1(
	int, seqnum)
{
    static char OLstring[8];
    int Arabic = seqnum;

    if (Arabic >= 3000) {
        strcpy(OLstring, "mmm.");
        return OLstring;
    }

    switch(Arabic) {
    case 1:
        strcpy(OLstring, " i.");
        return OLstring;
    case 5:
        strcpy(OLstring, " v.");
        return OLstring;
    case 10:
        strcpy(OLstring, " x.");
        return OLstring;
    case 50:
        strcpy(OLstring, " l.");
        return OLstring;
    case 100:
        strcpy(OLstring, " c.");
        return OLstring;
    case 500:
        strcpy(OLstring, " d.");
        return OLstring;
    case 1000:
        strcpy(OLstring, " m.");
        return OLstring;
    default:
        OLstring[0] = '\0';
	break;
    }

    while (Arabic >= 1000) {
        strcat(OLstring, "m");
        Arabic -= 1000;
    }

    if (Arabic >= 900) {
        strcat(OLstring, "cm");
	Arabic -= 900;
    }

    if (Arabic >= 500) {
	strcat(OLstring, "d");
        Arabic -= 500;
	while (Arabic >= 500) {
	    strcat(OLstring, "c");
	    Arabic -= 10;
	}
    }

    if (Arabic >= 400) {
	strcat(OLstring, "cd");
        Arabic -= 400;
    }

    while (Arabic >= 100) {
        strcat(OLstring, "c");
        Arabic -= 100;
    }

    if (Arabic >= 90) {
        strcat(OLstring, "xc");
	Arabic -= 90;
    }

    if (Arabic >= 50) {
	strcat(OLstring, "l");
        Arabic -= 50;
	while (Arabic >= 50) {
	    strcat(OLstring, "x");
	    Arabic -= 10;
	}
    }

    if (Arabic >= 40) {
	strcat(OLstring, "xl");
        Arabic -= 40;
    }

    while (Arabic > 10) {
        strcat(OLstring, "x");
	Arabic -= 10;
    }    

    switch (Arabic) {
    case 1:
        strcat(OLstring, "i.");
	break;
    case 2:
        strcat(OLstring, "ii.");
	break;
    case 3:
        strcat(OLstring, "iii.");
	break;
    case 4:
        strcat(OLstring, "iv.");
	break;
    case 5:
        strcat(OLstring, "v.");
	break;
    case 6:
        strcat(OLstring, "vi.");
	break;
    case 7:
        strcat(OLstring, "vii.");
	break;
    case 8:
        strcat(OLstring, "viii.");
	break;
    case 9:
        strcat(OLstring, "ix.");
	break;
    case 10:
        strcat(OLstring, "x.");
	break;
    default:
        strcat(OLstring, ".");
	break;
    }

    return OLstring;
}

