/***************************************************************************
                             html.c - HTML tools
                             -------------------
                     (C) 2002 by the Everybuddy team
                            www.everybuddy.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "html.h"

eb_encoded_data html_encoding[] = {
  {'<', "&lt;"}, {'>', "&gt;"}, {'&', "&amp;"}, {'"', "&quot;"}, {'\0', NULL}
};

void eb_html_strlower(char * s) // cause strcasecmp() isn't ANSI - argh!
{
  int a;
  for(a=0; s[a]!='\0'; a++)
  { s[a]=tolower(s[a]); }
}

char * eb_html_get_param(char * string, char * pname)
{
  int tname=0;
  int pos=0;
  int plen=strlen(pname);

  while(1)
  {
    if(string[pos]=='"')
    {
      for(; string[pos]!='"' && string[pos]!='\0'; pos++);
    }

    if(string[pos]=='>' || string[pos]=='\0') { return NULL; } // not found

    if(isspace(string[pos]) || string[pos]=='=')
    {
      if(pos-tname==plen && !strncasecmp(string+tname, pname, plen))
      {
        int abort=0;

        for(; string[pos]!='=' && string[pos]!='\0'; pos++)
        { if(!isspace(string[pos])) { abort=1; tname=pos+1; break; } }

        if(string[pos]=='\0') { return NULL; }

        if(!abort)
        {
          int len;
          char * retval;

          pos++; // move past the '='
          for(; isspace(string[pos]); pos++);
          if(string[pos]=='"')
          {
            pos++;
            for(len=0; string[pos+len]!='"' && string[pos+len]!='\0' ; len++);
          } else {
            for(len=0; !isspace(string[pos+len]) && string[pos+len]!='\0' && string[pos+len]!='>'; len++);
          }

          retval=(char *)malloc(len+1);
          strncpy(retval, string+pos, len);
          retval[len]='\0';
          return retval;
        }
      } else {
        tname=pos+1;
      }
    }

    pos++;
  }
}

eb_html_item * eb_html_parse_plain(char * string)
{
  int a, pos=0;
  eb_html_item * item=(eb_html_item *)malloc(sizeof(eb_html_item));
  item->type=EB_HTML_BODY;
  item->contents=NULL;
  
  for(a=0; string[a]!='\0'; a++)
  {
    int b;
    for(b=0; html_encoding[b].real!='\0'; b++)
    {
      if(string[a]==html_encoding[b].real)
      {
        eb_html_item * i;
        if(pos!=a)
        {
          char * c=(char *)malloc(a-pos+1);
          i=(eb_html_item *)malloc(sizeof(eb_html_item));
          i->type=EB_HTML_TEXT;
          i->contents=NULL;
          i->data=c;
          strncpy(c, string+pos, a-pos);
          c[a-pos]='\0';
          
          item->contents=e_list_append(item->contents, i);
        }
      
        i=(eb_html_item *)malloc(sizeof(eb_html_item));
        i->type=EB_HTML_ENCODED;
        i->data=&(html_encoding[b]);
        i->contents=NULL;
        
        item->contents=e_list_append(item->contents, i);
        
        pos=a+1;
        break;
      }
    }
  }
  
  return item;
}

eb_html_item * eb_html_parse_item(char * string, int * pos, int * attribs)
{
  eb_html_item * item=NULL;
  int len, a;

  if(string[0]=='\0') { return NULL; }

  if(string[0]=='<') // it's a tag
  {
    char * buf;
    int buflen;
    for(len=1; string[len]!='>' && string[len]!='\0' && string[len]!='<'; len++);

    if(string[len]=='>') // if this was a normal end-of-tag...
    {
      buf=(char *)malloc(len);

      for(a=1; isspace(string[a]) && a<len; a++); // ignore padding, eg "<  font..."

      buflen=0;
      for(; !isspace(string[a]) && (a==1 || string[a]!='/') && a<len; a++) // copy the tag name into buf
      {
        buf[buflen++]=string[a];
      }
      buf[buflen]='\0';

      eb_html_strlower(buf); // convert to lowercase, because strcasecmp() isn't ANSI, so I can't use it

      if(!strcmp(buf, "a")) // it's a link
      {
        eb_link_data * data=(eb_link_data *)malloc(sizeof(eb_link_data));

        item=(eb_html_item *)malloc(sizeof(eb_html_item));
        item->type=EB_HTML_LINK;
        item->data=data;
        item->contents=NULL;

        data->target=eb_html_get_param(string+a, "href");
      }

      if(!strcmp(buf, "b") || !strcmp(buf, "u") || !strcmp(buf, "i")
       || !strcmp(buf, "/b") || !strcmp(buf, "/u") || !strcmp(buf, "/i"))
      {
        eb_attrib_data * data;
        int attrib=0;
        char c;

        c=buf[0];
        if(c=='/') { c=buf[1]; }

        switch(c)
        {
          case('i') : { attrib=EB_HTML_ITALIC; break; }
          case('b') : { attrib=EB_HTML_BOLD; break; }
          case('u') : { attrib=EB_HTML_ULINE; break; }
        }

        if((attribs[attrib] && buf[0]=='/')
         || (!attribs[attrib] && buf[0]!='/'))
        {
          data=(eb_attrib_data *)malloc(sizeof(eb_attrib_data));
          data->attrib=attrib;
          data->state=(buf[0]!='/');
          attribs[attrib]=data->state;
          item=(eb_html_item *)malloc(sizeof(eb_html_item));
          item->type=EB_HTML_ATTRIB;
          item->data=data;
          item->contents=NULL;
        }

        free(buf);
        *pos=len+1;
        return item;
      }

      if(!strcmp(buf, "br") || !strcmp(buf, "p") || !strcmp(buf, "/p"))
      {
        free(buf);

        item=(eb_html_item *)malloc(sizeof(eb_html_item));
        item->type=EB_HTML_NEWLINE;
        item->data=NULL;
        item->contents=NULL;

        *pos=len+1;
        return item;
      }

      if(!strcmp(buf, "font"))
      {
        eb_font_data * data=(eb_font_data *)malloc(sizeof(eb_font_data));
        char * col;
        char * size;

        item=(eb_html_item *)malloc(sizeof(eb_html_item));
        item->type=EB_HTML_FONT;
        item->contents=NULL;
        item->data=data;

        data->face=eb_html_get_param(string+a, "face");
        data->size=0;
        data->size_abs=0;
        col=eb_html_get_param(string+a, "color");
        if(col==NULL)
        {
          data->col=0;
        } else {
          data->col=1;
          if(col[0]=='#' && strlen(col)==7)
          {
            char tmp[3];
            tmp[2]='\0';

            tmp[0]=col[1];
            tmp[1]=col[2];
            data->r=(unsigned char)strtol(tmp, NULL, 16);
            tmp[0]=col[3];
            tmp[1]=col[4];
            data->g=(unsigned char)strtol(tmp, NULL, 16);
            tmp[0]=col[5];
            tmp[1]=col[6];
            data->b=(unsigned char)strtol(tmp, NULL, 16);
          } else {
            data->r=data->g=data->b=0;
            if(!strcmp(col, "red"))
            { data->r=255; }
            if(!strcmp(col, "green"))
            { data->g=255; }
            if(!strcmp(col, "blue"))
            { data->b=255; }
            if(!strcmp(col, "white"))
            { data->r=data->g=data->b=255; }
          }
          free(col);
        }

        size=eb_html_get_param(string+a, "size");
        if(size!=NULL)
        {
          data->size=atoi(size);
          if(size[0]!='+' && size[0]!='-' && data->size!=0)
          { data->size_abs=1; }
          free(size);
        }
      }

      // and finally, stuff to ignore

      if(!strcmp(buf, "html") || !strcmp(buf, "body") || !strcmp(buf, "/html") || !strcmp(buf, "/body"))
      {
        free(buf);
        *pos=len+1;
        return NULL;
      }

      if(item!=NULL)
      {
        *pos=len+1;
        while(1) // All the content inside this tag
        {
          int ipos=0;
          eb_html_item * ti=eb_html_parse_item(string+*pos, &ipos, attribs);
          *pos+=ipos;
          if(ti==NULL)
          {
            if(ipos==0) { break; }
          } else {
            item->contents=e_list_append(item->contents, ti);
          }

          if(strlen(string) - *pos >= buflen+3)
          {
            char * s=string+*pos;

            if(s[0]=='<' && s[1]=='/' && !strncasecmp(s+2, buf, buflen) && s[buflen+2]=='>')
            {
              *pos+=buflen+3;
              break;
            }
          }
        }

        free(buf);
        return item;
      } else {
        free(buf);
        // if we didn't recognise this tag, treat it as text - control will
        // now cascade to the text handling code
      }
    }
  }

  if(string[0]=='<') // ...but we got this far, so it's no recognised tag
  {
    item=(eb_html_item *)malloc(sizeof(eb_html_item));
    item->type=EB_HTML_ENCODED;
    item->contents=NULL;
    item->data=&(html_encoding[0]);
    
    *pos=1;
    return item;
  }

  if(string[0]=='&')
  {
    int a;

    for(a=0; html_encoding[a].real!='\0'; a++)
    {
      if(!strncmp(html_encoding[a].encoded, string, strlen(html_encoding[a].encoded)))
      {
        item=(eb_html_item *)malloc(sizeof(eb_html_item));
        item->type=EB_HTML_ENCODED;
        item->data=&(html_encoding[a]);
        item->contents=NULL;

        *pos=strlen(html_encoding[a].encoded);
        return item;
      }
    }

    // if we got here, we didn't recognise the entity
    item=(eb_html_item *)malloc(sizeof(eb_html_item));
    item->type=EB_HTML_ENCODED;
    item->data=&(html_encoding[2]);
    item->contents=NULL;

    *pos=1;
    return item;
  }

  if(string[0]=='\n')
  {
    item=(eb_html_item *)malloc(sizeof(eb_html_item));
    item->type=EB_HTML_NEWLINE;
    item->data=NULL;
    item->contents=NULL;

    *pos=1;
    return item;
  }

  // otherwise, it's text

  item=(eb_html_item *)malloc(sizeof(eb_html_item));
  item->type=EB_HTML_TEXT;
  item->contents=NULL;

  for(len=1; string[len]!='\0' && string[len]!='<' && string[len]!='&' && string[len]!='\n'; len++);

  item->data=malloc(len+1);
  strncpy((char *)item->data, string, len);
  ((char *)item->data)[len]='\0';

  *pos=len;
  return item;
}

eb_html_item * eb_html_parse(char * string)
{
  int pos=0, ipos=0, a;
  int attribs[3]={0,0,0};

  eb_html_item * item=(eb_html_item *)malloc(sizeof(eb_html_item));

  item->type=EB_HTML_BODY;
  item->data=NULL;
  item->contents=NULL;

  while(1)
  {
    eb_html_item * ti;
    ipos=0;
    ti=eb_html_parse_item(string+pos, &ipos, attribs);
    pos+=ipos;
    if(ti==NULL)
    {
      if(ipos==0) { break; }
    } else {
      item->contents=e_list_append(item->contents, ti);
    }
  }

  for(a=0; a<3; a++)
  {
    if(attribs[a])
    {
      eb_html_item * ti=(eb_html_item *)malloc(sizeof(eb_html_item));
      eb_attrib_data * data=(eb_attrib_data *)malloc(sizeof(eb_attrib_data));
      ti->type=EB_HTML_ATTRIB;
      ti->data=data;
      ti->contents=NULL;
      data->state=0;
      data->attrib=a;

      item->contents=e_list_append(item->contents, ti);
    }
  }

  return item;
}

int eb_html_required_space(eb_html_item * item)
{
  int total;
  EList * n;

  switch(item->type)
  {
    case(EB_HTML_BODY) : { total=0; break; }
    case(EB_HTML_TEXT) : { total=strlen((char *)item->data); break; }
    case(EB_HTML_NEWLINE) : { total=5; break; }
    case(EB_HTML_FONT) :
    {
      eb_font_data * data=(eb_font_data *)item->data;

      total=16;
      if(data->face!=NULL)
      { total+=10+strlen(data->face); }

      if(data->col)
      { total+=20; }

      if(data->size)
      { total+=32; } // so we don't overflow if someone does something stupid

      break;
    }
    case(EB_HTML_LINK) :
    {
      char * target=((eb_link_data *)item->data)->target;
      total=16;
      if(target!=NULL)
      { total+=strlen(((eb_link_data *)item->data)->target); }
      break;
    }
    case(EB_HTML_ENCODED) : { total=16; break; }
    case(EB_HTML_ATTRIB) : { total=4; break; }
    default : { printf("HEEELP! Unknown HTML entity, budgeting 4k to it...\n"); total=4096; }
  }

  for(n=item->contents; n!=NULL; n=n->next)
  {
    total+=eb_html_required_space((eb_html_item *)n->data);
  }

  return total;
}

char * eb_html_render(eb_html_item * item)
{
  int space=eb_html_required_space(item);
  char * s=(char *)malloc(space+1);
  EList * n;

  s[0]='\0';

  switch(item->type)
  {
    case(EB_HTML_TEXT) : { strcpy(s, (char *)item->data); break; }
    case(EB_HTML_NEWLINE) : { strcpy(s, "<br>\n"); break; }
    case(EB_HTML_LINK) : {
      char * target=((eb_link_data *)item->data)->target;
      if(target==NULL) { break; }
      sprintf(s, "<a href=\"%s\">", target); break;
    }
    case(EB_HTML_FONT) : {
      eb_font_data * data=(eb_font_data *)item->data;

      strcpy(s, "<font");

      if(data->face!=NULL)
      {
        sprintf(s+strlen(s), " face=\"%s\"", data->face);
      }

      if(data->col)
      {
        sprintf(s+strlen(s), " color=\"#%02x%02x%02x\"", data->r, data->g, data->b);
      }

      if(data->size!=0)
      {
        strcat(s, " size=\"");
        if(!data->size_abs && data->size>=0)
        { strcat(s, "+"); }
        sprintf(s+strlen(s), "%d\"", data->size);
      }

      strcat(s, ">");
      break;
    }
    case(EB_HTML_ENCODED) : { strcpy(s, ((eb_encoded_data *)item->data)->encoded); break; }

    case(EB_HTML_ATTRIB) : {
      eb_attrib_data * data=(eb_attrib_data *)item->data;
      int pos=0;

      s[pos++]='<';
      if(data->state==0) { s[pos++]='/'; }
      switch(data->attrib)
      {
        case(EB_HTML_BOLD) : { s[pos++]='b'; break; }
        case(EB_HTML_ITALIC) : { s[pos++]='i'; break; }
        case(EB_HTML_ULINE) : { s[pos++]='u'; break; }
      }
      s[pos++]='>';
      s[pos++]='\0';

      break;
    }
  }

  for(n=item->contents; n!=NULL; n=n->next)
  {
    char * tmp=eb_html_render((eb_html_item *)n->data);
    strcat(s, tmp);
    free(tmp);
  }

  switch(item->type)
  {
    case(EB_HTML_LINK) :
    { if(((eb_link_data *)item->data)->target!=NULL) { strcat(s, "</a>"); } break; }
    case(EB_HTML_FONT) : { strcat(s, "</font>"); break; }
  }

  return s;
}

char * eb_html_render_plain(eb_html_item * item)
{
  int space=eb_html_required_space(item);
  char * s=(char *)malloc(space+1);
  EList * n;

  s[0]='\0';

  if(item->type==EB_HTML_TEXT)
  {
    strcpy(s, (char *)item->data);
  }

  if(item->type==EB_HTML_NEWLINE)
  { s[0]='\n'; s[1]='\0'; }

  if(item->type==EB_HTML_ENCODED)
  { s[0]=((eb_encoded_data *)item->data)->real; s[1]='\0'; }

  for(n=item->contents; n!=NULL; n=n->next)
  {
    char * tmp=eb_html_render_plain((eb_html_item *)n->data);
    strcat(s, tmp);
    free(tmp);
  }

  return s;
}

void eb_html_destroy(eb_html_item * item)
{
  EList * n;

  switch(item->type)
  {
    case(EB_HTML_TEXT) : { free(item->data); break; }
    case(EB_HTML_NEWLINE) : { break; }
    case(EB_HTML_ENCODED) : { break; }
    case(EB_HTML_LINK) : {
      eb_link_data * data=(eb_link_data *)item->data;
      if(data->target!=NULL) { free(data->target); }
      free(data);
      break;
    }
    case(EB_HTML_FONT) : {
      eb_font_data * data=(eb_font_data *)item->data;

      if(data->face!=NULL) { free(data->face); }
      free(data);
      break;
    }
    case(EB_HTML_ATTRIB) : {
      free((eb_attrib_data *)item->data);
      break;
    }
  }

  for(n=item->contents; n!=NULL; n=n->next)
  {
    eb_html_destroy((eb_html_item *)n->data);
  }

  e_list_free(item->contents);
  free(item);
}

