/*#define DEBUGGING*/
/*****************************************************************

  (c) Rob Hartill  1994


  This program searches a database, in which each record appears
  on a separate line, for records which match a list of requirements
  which is read from standard input. 

  The length of the input is supplied to the program by an environment
  variable called CONTENT_LENGTH.

  The input is a HTTP POSTed request consisting of a series of items,
  at most one item per field, with the following format,
      field_number|search_type|search_item[/search_item]*=on&
                                                         ^^^
               the '=on' is attached by the WWW browser -+++

      search_type is a numeric description of the type of search to
      be performed, e.g. substrings ANDed
                         substrings ORed
                         numeric/numeric ranges
                         century/century ranges

  Nothing in this file needs to be altered, all user (re)configurable
  features are accessed via the config.h file.

*****************************************************************/

#include <stdio.h>
#include <string.h>

#include "config.h"

void grep_fields();
char *in_lowercase();
void decode_escape_sequences();
void decode_slash_and_bar();
void plus_to_space();
void lowercase();


void main()
  {
  search_field search_on_field[FIELDS_PER_RECORD + 1];
      /* holds the lists of substrings to search for, for each field */


  int combine_list_method,   /* ANDING or ORING of the substring lists
                                 with each other
                             */

      item,                  /* Index to the nth substring of a list 
                             */
      substring_list,        /* The input will consist of substring lists 
                                 this is used to index them 
                             */
      content_length;        /* The length in bytes of the input query.
                             */

  char *next_field,          /* Points the the next '&' delimited string in 
                                 the input.
                             */
       *unprocessed_query,   /* Will hold the whole request which is 
                                 waiting to be read from "stdin" 
                             */
       *token;               /* Used to point to the next '&' delimited
                                string from the input.
                             */


  content_length = atoi(getenv("CONTENT_LENGTH"));

  printf("Content-type: text/html\n\n<TITLE>%s</TITLE>",URL_TITLE);

  /* Reserve some memory to hold the incoming POSTed request
  */
  unprocessed_query = (char *) malloc(content_length+2);

  if (unprocessed_query == NULL)
     {  
     printf("Unable to reserve enough memory to parse the request<br>\n");
     return;
     }


  /* Read the input query.
  */
  fgets(unprocessed_query, content_length+1, stdin);

  /* Add a '&' field delimiter for good measure. */
  strcat(unprocessed_query,"&");

  /* The query may contain escape sequences whose "%" prefix has itself
      been escaped, so we'll need to decode twice, and convert spaces to
      pluses in between, so that genuine pluses aren't changed to space
      at a later stage.
  */
  decode_slash_and_bar(unprocessed_query);


#ifdef DEBUGGING
  printf("---%s---(%d)<br>\n",unprocessed_query,content_length);
#endif

  substring_list = 0;   /* Let's read each '&' delimited query list, one
                             at a time
                        */

  token = strtok(unprocessed_query, "&");
  while ( token != NULL )
    {
    next_field = token + strlen(token) + 1;  /* Remember how far into the
                                                 query we've processed. 
                                             */

    /* One of two special entries will exist, which refer to the
        logical ANDinf or ORing of the matching fields.
    */ 
    if (strcmp(token,"ANDING=on") == 0)
       {
        combine_list_method = ANDING;
        token = strtok(next_field, "&\0");
        break;
       }
    if (strcmp(token,"ANDING=off") == 0)
       {
        combine_list_method = ORING;
        token = strtok(next_field, "&\0");
        break;
       }

    /* Not an ANDING=on or ORING=on, so must be a search list.
        First read the field number 
    */
    token = strtok(token, "|" );

    if (token == NULL)
       {
       printf("Error. Missing field number for entry #<b>%d</b>.<br>This shouldn't happen, please report this to %s<p>\n", substring_list+1, DATABASE_MAINTAINER);
       return;
       }

    search_on_field[substring_list].field_num = atoi(token);

    if  (search_on_field[substring_list].field_num <0 ||
         search_on_field[substring_list].field_num >FIELDS_PER_RECORD)
       {
       printf("Error. Field #<b>%s</b> for entry #<b>%s</b> is out of the acceptable range.<p>This shouldn't happen, please report this to %s<p>\n", token, substring_list+1, DATABASE_MAINTAINER);
       return;
       }

    /* To get this far, we've successfully read the field number,
       now we need to know the type of search we are to perform on
       this field
    */
    token = strtok( NULL, "|" );
    if (token == NULL)
       {
       printf("Error. Missing search type for entry #<b>%d</b>.<br>This shouldn't happen, please report this to %s<p>\n", substring_list+1, DATABASE_MAINTAINER);
       return;
       }

    /* Okay we have a search type, but is it valid ?
    */
    search_on_field[substring_list].method = atoi(token);
    if  (search_on_field[substring_list].method != ANDING &&
         search_on_field[substring_list].method != ORING &&
         search_on_field[substring_list].method != YEARS &&
         search_on_field[substring_list].method != CENTURIES)
       {
       printf("Error. Unkown search type <b>%s</b> for entry #<b>%d<b><br>. This shouldn't happen, please report this to %s<p>\n", token, substring_list+1, DATABASE_MAINTAINER);
       return;
       }


    /* So we now now what type of search we are going to perform, and on 
       which field. We now need to know what we are going to search for.
    */
    item = 0;
    do
        {
        /* Each search item will be delimited by a "/", lets get
            the next one.
        */
        token = strtok( NULL, "/" );
        
        /* if there wasn't a problem, and the item to search for isn't
            "=on" which is a FORMS tag, we can add this search item
            to the list of search items for this field.
        */
        if (token != NULL && strcmp(token, "=on") != 0)
          {
          /* Remember where this search item is located in the input
              string.
          */
          decode_escape_sequences(token);
          plus_to_space(token);
          decode_escape_sequences(token);

          lowercase(token);
          search_on_field[substring_list].search_items[item] = token;
          } 
        else 
          search_on_field[substring_list].search_items[item] = NULL;

        item++;

        } while (token != NULL && substring_list < MAX_SUBSTRINGS_PER_FIELD-1);

        substring_list++;

        token = strtok(next_field, "&\0");

    } /* Now for the next query list */

  search_on_field[substring_list].field_num = 0; /* end marker */

  
  /* Okay we now know what to search for, how to search for it, and where
      to search, so why not do the search ?.
  */
  printf(HEADER_BLURB);   
  grep_fields(search_on_field, combine_list_method, DATABASE_FILENAME);
  printf(FOOTER_BLURB);

  }



/* This 'grep' function searches through a text database, containing one
    record per line, and any number of fields per record.

   For each record, we check each search list for a match, giving up
    when it's pointless continueing, or when we're certain the record
     is a match.

   Output from this function is formated according to OUTPUT_FORMAT &
    OUTPUT_DATA which are defined in the config.h file.
*/
void grep_fields(the_search_fields, search_method, database_name)

  search_field the_search_fields[];  /* what to search for and were */
  char         *database_name;        /* database filename           */
  int           search_method;        /* search method and/or/range  */

  {
  int fields_in_each_record,/* number of fields per record detected */
      field,                /* an index to a field in a record      */
      search_item,          /* an index to a search list            */
      search_for,           /* an index into a search list          */
      anding,               /* true if anding list items            */
      oring,                /* true if oring list items             */
      year_range,           /* true if list is a year/range         */
      century_range,        /* true if list is a centure/range      */
      match,                /* true if a search was successful      */
      hits;                 /* the number of matching records found */

  FILE *database;           /* the database we're searching         */
  char *field_number[FIELDS_PER_RECORD];  
                            /* an array of pointers to a record's fields */
  char *pos_in_record,      /* movable pointer within a record */
       *new_pos_in_record,  /* another pointer */
       record[MAX_RECORD_LENGTH];
                            /* holds a record as we check it   */

  search_field    *current_search; /* the list currently being searched for */

  int current_record_hi_year, current_record_low_year, low_year, hi_year;

  hits = 0; /* A counter of the number of matching record 
            */

  /* open the database for reading 
  */
  database = fopen(database_name, "r");
  if (database == NULL)
     {
     printf("Error. Unable to open file <b>%s</b>.<br> Please notify <b>%s</b><p>\n",
         database_name, DATABASE_MAINTAINER);
     return;
    }

  /* for each record in the database, check to see if it's one we want
  */
  while (fgets(record, MAX_RECORD_LENGTH, database) != NULL)
     {
     /* read all the fields 
     */
     field = 0;
     pos_in_record = &record[0];
     while (*pos_in_record != '\n')  /* while there are more fields 
                                     */
       {
       field_number[field++] = pos_in_record;  /* save position of this field 
                                               */
       if ((new_pos_in_record = strchr(pos_in_record, FIELD_DELIMITER)) != NULL)
           {
           *new_pos_in_record = '\0';
              /* swaps the delimiter for an end of string marker 
              */
       
            pos_in_record = new_pos_in_record + 1; /* go to next field 
                                                   */
            }
       else
          pos_in_record = strchr(pos_in_record, '\n');
              /* last field, jump to end of record */
      }
    *pos_in_record = '\0';  /* mark end of last field */

    fields_in_each_record = field;   /* remember the size of each record */
   
    /* we now have one database record in field_number[]           */
    /* now perform the substring searching on the relavent fields  */

    search_item = match = 0;

    /* for each search list (per field) */
    current_search = &the_search_fields[search_item];

    while ( current_search->field_num != 0 )
       {
       search_for = 0;
      
       field = current_search->field_num;
           /* check the nth field */

       /* what's the search method ?  
       */
       anding         = (current_search->method == ANDING);
       oring          = (current_search->method == ORING);
       year_range     = (current_search->method == YEARS);
       century_range  = (current_search->method == CENTURIES);

       /* for each item in the search list 
       */
       while ( search_for < MAX_SUBSTRINGS_PER_FIELD && current_search->search_items[search_for] != NULL )
           {
           if ( anding || oring )
               match = 
                   (strstr(
                      in_lowercase(field_number[field-1]),
                      current_search->search_items[search_for]
                    ) != NULL);

           if ( year_range ) /* Are we looking for a year ?
                             */
               {
               /* Examine the year field of the current record, it
                   should be  xxxx-yyyy or just xxxx, so if we
                   only find xxxx, can make yyyy = xxxx
               */
               current_record_hi_year = get_first_numeric(strchr(field_number[field-1],'-'));

               current_record_low_year = get_first_numeric(field_number[field-1]);

               if (current_record_hi_year == 0) 
                  current_record_hi_year = current_record_low_year;

          
               /* Similar (same?) format as the current record's year field,
                   so get a high and a low year.
               */
               low_year =
                     get_first_numeric(current_search->search_items[search_for]);

               hi_year = get_first_numeric(
                        strchr(current_search->search_items[search_for],'-'));

               if ( hi_year == 0 ) hi_year = low_year;

               /* There's a match with this record, if the two ranges
                   of years intersect.
               */
               match = (
                        ( (current_record_hi_year >= low_year) &&
                          (current_record_hi_year <= hi_year) )
                        ||
                        ( (current_record_low_year >= low_year) &&
                          (current_record_low_year <= hi_year) )
                        );

               }

           if ( century_range ) /* searching for a century/range of centuries 
                                */
               {

                current_record_hi_year = get_first_numeric(strchr(field_number[field-1],'-'));

                current_record_low_year = get_first_numeric(field_number[field-1]);

                if (   (strstr(field_number[field-1],"BC") != NULL
                     && strchr(field_number[field-1],'-') == NULL
                       )
                   || (
                      strstr(field_number[field-1],"BC") != NULL &&     
                      strchr(field_number[field-1],'-') != NULL &&

                      strstr(field_number[field-1],"BC") <     
                      strchr(field_number[field-1],'-')
                      )
                   )
                   current_record_low_year *= -1;

               if (strstr(field_number[field-1],"BC") != NULL
                  && strchr(field_number[field-1],'-') != NULL 
                  && strstr(field_number[field-1],"BC") >
                     strchr(field_number[field-1],'-')
                  )
                  current_record_hi_year *= -1;

               if (current_record_hi_year == 0) 
                   current_record_hi_year = current_record_low_year;

               low_year =
                 get_first_numeric(current_search->search_items[search_for]);

               hi_year = get_first_numeric(
                    strchr(current_search->search_items[search_for],'-'));

               if (  ( strstr(current_search->search_items[search_for],"bc") != NULL
                   && strchr(current_search->search_items[search_for],'-') == NULL 
                     )
                  ||  (
                      strstr(current_search->search_items[search_for],"bc")!=NULL &&
                      strchr(current_search->search_items[search_for],'-')!=NULL &&

                      strstr(current_search->search_items[search_for],"bc") <
                      strchr(current_search->search_items[search_for],'-')
                      )
                   )
                   current_record_low_year *= -1;

              if (strstr(current_search->search_items[search_for],"bc") != NULL
                 && strchr(current_search->search_items[search_for],'-') != NULL 
                 &&  strstr(current_search->search_items[search_for],"bc") >
                     strchr(current_search->search_items[search_for],'-')
                 )
                 current_record_hi_year *= -1;

              if ( hi_year == 0 ) hi_year = low_year;


              match = (
                      ( (current_record_hi_year >= low_year) &&
                        (current_record_hi_year <= hi_year) )
                      ||
                      ( (current_record_low_year >= low_year) &&
                        (current_record_low_year <= hi_year) )
                      );
              }
          
          /* Check to see if there's any point/need to carry on checking
              other search items for this field.
          */
          if ( !match && anding)  break;
          if (  match && oring)   break;
          if (  match && year_range) break; 
          if (  match && century_range) break; 

          search_for++;  /* check next list item */
          }

       /* do we have a match so far */
       /* if we're 'anding', and there was no match, give up now */
       if ( search_method == ANDING && !match ) break;

       /* if we're 'oring' and there was a match, we have success */
       if ( search_method == ORING && match) break;

       search_item++;  /* now for the next list */
       current_search = &the_search_fields[search_item];
       }

   /* if we have a match, output the record */
   if ( match )
       {
       printf(OUTPUT_FORMAT,OUTPUT_DATA);
       hits++;
       }
   
   /* next record from the database please */
   }

   /* If the search was unsuccesful, tell the poor user. */
   if ( hits == 0 ) printf(NO_HITS_MESSAGE);

   fclose(database);
   
   }



/**************************************************************
 in_lowercase returns a copy of a string, converted into 
   lowercase.
***************************************************************/
char *in_lowercase(to_convert)
char *to_convert;
   {
   int indx = 0;
   static char converted[MAX_FIELD_LENGTH];
   
   strcpy(converted, to_convert);

   while (converted[indx] != '\0')
     {
     converted[indx] = tolower(converted[indx]);
     indx++;
     }
   return &converted[0];
   }

/**************************************************************
 lowercase returns THE string, converted into 
   lowercase.
***************************************************************/
void lowercase(to_convert)
char *to_convert;
   {
   int indx = 0;
   

   while (to_convert[indx] != '\0')
     {
     to_convert[indx] = tolower(to_convert[indx]);
     indx++;
     }
   }


/**************************************************************
 decode_escape_sequences takes a string as input, and translates 
   HTTP escape sequences, format %hh, into plain ASCII
***************************************************************/
void decode_escape_sequences(request)
  char *request;
  {
  int indx,indx2;
  long hex_char;
  char hex_num[3];

  indx = indx2 = 0;
  while ( request[indx] != '\0' )
     {
     if ( request[indx] == '%')
        {
        if ((request[indx+1] >='0' && request[indx+1] <= '9') ||
           (request[indx+1] >='A' && request[indx+1] <= 'F'))
           { 
           if ((request[indx+2] >='0' && request[indx+2] <= '9') ||
              (request[indx+2] >='A' && request[indx+2] <= 'F'))
              {
              hex_num[0] = request[indx+1];
              hex_num[1] = request[indx+2];
              hex_num[2] = '\0';
              hex_char = strtol(hex_num,(char **)NULL,16);

              request[indx2++] = (char) hex_char;
                 
              indx += 3;
              }
             else request[indx2++] = request[indx++];
           }
          else request[indx2++] = request[indx++];
        }
        else request[indx2++] = request[indx++];
     }
  request[indx2] = '\0';
  }


/**************************************************************
 plus_to_space takes a string as input, and translates 
   '+' into ' '. Note theat '+' in HTTP requests is a field
 separator.
***************************************************************/
void plus_to_space(string)
char *string;
  {
  int indx;

  indx = 0;
  while( string[indx] != '\0' )
     {
     if (string[indx] == '+') string[indx] = ' ';
     indx++;
     }
  }


/**************************************************************
 get_first_numeric takes a string as input and retrurns the
  first numeric integer it finds therein.
***************************************************************/
int get_first_numeric(from_string)
  char *from_string;
  {
  if (from_string == NULL) return 0;
  return atoi(from_string+strcspn(from_string,"0123456789"));
  }


/**************************************************************
 decode_escape_sequences takes a string as input, and translates 
   HTTP escape sequences, format %hh, into plain ASCII
***************************************************************/
void decode_slash_and_bar(request)
  char *request;
  {
  int indx,indx2;
  long hex_char;
  char hex_num[3];

  indx = indx2 = 0;
  while ( request[indx] != '\0' )
     {
     if ( request[indx] == '%')
        {
        if ((request[indx+1] =='2' && request[indx+2] == 'F') ||
           (request[indx+1] == '7' && request[indx+2] == 'C'))
           { 
              hex_num[0] = request[indx+1];
              hex_num[1] = request[indx+2];
              hex_num[2] = '\0';
              hex_char = strtol(hex_num,(char **)NULL,16);

              request[indx2++] = (char) hex_char;
                 
              indx += 3;
           }
          else request[indx2++] = request[indx++];
        }
        else request[indx2++] = request[indx++];
     }
  request[indx2] = '\0';
  }

