#define DEBUGGING /***************************************************************** (c) Rob Hartill 1994 This program searches a database, in which each record appears on a separate line, for records which match a list of requirements which is read from standard input. The length of the input is supplied to the program by an environment variable called CONTENT_LENGTH. The input is a HTTP POSTed request consisting of a series of items, at most one item per field, with the following format, field_number|search_type|search_item[/search_item]*=on& ^^^ the '=on' is attached by the WWW browser -+++ search_type is a numeric description of the type of search to be performed, e.g. substrings ANDed substrings ORed numeric/numeric ranges century/century ranges Nothing in this file needs to be altered, all user (re)configurable features are accessed via the config.h file. *****************************************************************/ #include #include #include #include "config.h" void grep_fields(); char *in_lowercase(); void decode_escape_sequences(); void decode_slash_and_bar(); void plus_to_space(); void lowercase(); void main() { search_field search_on_field[FIELDS_PER_RECORD + 1+5]; /* holds the lists of substrings to search for, for each field */ int combine_list_method, /* ANDING or ORING of the substring lists with each other */ item, /* Index to the nth substring of a list */ substring_list, /* The input will consist of substring lists this is used to index them */ content_length; /* The length in bytes of the input query. */ char *next_field, /* Points the the next '&' delimited string in the input. */ *unprocessed_query, /* Will hold the whole request which is waiting to be read from "stdin" */ *token; /* Used to point to the next '&' delimited string from the input. */ if ( strcmp(getenv("REQUEST_METHOD"), "POST") == 0 ) content_length = atoi(getenv("CONTENT_LENGTH")); else content_length = strlen(getenv("QUERY_STRING")); printf("Content-type: text/html\n\n%s",URL_TITLE); /* Reserve some memory to hold the incoming POSTed request */ unprocessed_query = (char *) malloc(content_length+2); if (unprocessed_query == NULL) { printf("Unable to reserve enough memory to parse the request
\n"); return; } /* Read the input query. */ if ( strcmp(getenv("REQUEST_METHOD"), "POST") == 0 ) fgets(unprocessed_query, content_length+1, stdin); else strcpy(unprocessed_query, getenv("QUERY_STRING")); /* Add a '&' field delimiter for good measure. */ strcat(unprocessed_query,"&"); /* The query may contain escape sequences whose "%" prefix has itself been escaped, so we'll need to decode twice, and convert spaces to pluses in between, so that genuine pluses aren't changed to space at a later stage. */ decode_slash_and_bar(unprocessed_query); #ifdef DEBUGGING printf("%
\n",unprocessed_query); #endif substring_list = 0; /* Let's read each '&' delimited query list, one at a time */ token = strtok(unprocessed_query, "&"); while ( token != NULL ) { next_field = token + strlen(token) + 1; /* Remember how far into the query we've processed. */ /* One of two special entries will exist, which refer to the logical ANDinf or ORing of the matching fields. */ if (strcmp(token,"ANDING=on") == 0) { combine_list_method = ANDING; token = strtok(next_field, "&\0"); break; } if (strcmp(token,"ANDING=off") == 0) { combine_list_method = ORING; token = strtok(next_field, "&\0"); break; } /* Not an ANDING=on or ORING=on, so must be a search list. First read the field number */ token = strtok(token, "|" ); if (token == NULL) { printf("Error. Missing field number for entry #%d.
This shouldn't happen, please report this to %s

\n", substring_list+1, DATABASE_MAINTAINER); return; } search_on_field[substring_list].field_num = atoi(token); if (search_on_field[substring_list].field_num <0 || search_on_field[substring_list].field_num >FIELDS_PER_RECORD) { printf("Error. Field #%s for entry #%s is out of the acceptable range.

This shouldn't happen, please report this to %s

\n", token, substring_list+1, DATABASE_MAINTAINER); return; } /* To get this far, we've successfully read the field number, now we need to know the type of search we are to perform on this field */ token = strtok( NULL, "|" ); if (token == NULL) { printf("Error. Missing search type for entry #%d.
This shouldn't happen, please report this to %s

\n", substring_list+1, DATABASE_MAINTAINER); return; } /* Okay we have a search type, but is it valid ? */ search_on_field[substring_list].method = atoi(token); if (search_on_field[substring_list].method != ANDING && search_on_field[substring_list].method != ORING && search_on_field[substring_list].method != YEARS && search_on_field[substring_list].method != CENTURIES) { printf("Error. Unkown search type %s for entry #%d
. This shouldn't happen, please report this to %s

\n", token, substring_list+1, DATABASE_MAINTAINER); return; } /* So we now now what type of search we are going to perform, and on which field. We now need to know what we are going to search for. */ item = 0; do { /* Each search item will be delimited by a "/", lets get the next one. */ token = strtok( NULL, "/" ); /* if there wasn't a problem, and the item to search for isn't "=on" which is a FORMS tag, we can add this search item to the list of search items for this field. */ if (token != NULL && strcmp(token, "=on") != 0) { /* Remember where this search item is located in the input string. */ decode_escape_sequences(token); plus_to_space(token); decode_escape_sequences(token); lowercase(token); search_on_field[substring_list].search_items[item] = token; } else search_on_field[substring_list].search_items[item] = NULL; item++; } while (token != NULL && substring_list < MAX_SUBSTRINGS_PER_FIELD-1); substring_list++; token = strtok(next_field, "&\0"); } /* Now for the next query list */ search_on_field[substring_list].field_num = 0; /* end marker */ /* Okay we now know what to search for, how to search for it, and where to search, so why not do the search ?. */ printf(HEADER_BLURB); grep_fields(search_on_field, combine_list_method, DATABASE_FILENAME); printf(FOOTER_BLURB); } /* This 'grep' function searches through a text database, containing one record per line, and any number of fields per record. For each record, we check each search list for a match, giving up when it's pointless continueing, or when we're certain the record is a match. Output from this function is formated according to OUTPUT_FORMAT & OUTPUT_DATA which are defined in the config.h file. */ void grep_fields(the_search_fields, search_method, database_name) search_field the_search_fields[]; /* what to search for and were */ char *database_name; /* database filename */ int search_method; /* search method and/or/range */ { int fields_in_each_record,/* number of fields per record detected */ field, /* an index to a field in a record */ search_item, /* an index to a search list */ search_for, /* an index into a search list */ anding, /* true if anding list items */ oring, /* true if oring list items */ year_range, /* true if list is a year/range */ century_range, /* true if list is a centure/range */ match, /* true if a search was successful */ hits; /* the number of matching records found */ FILE *database; /* the database we're searching */ char *field_number[FIELDS_PER_RECORD+5]; /* an array of pointers to a record's fields */ char *pos_in_record, /* movable pointer within a record */ *new_pos_in_record, /* another pointer */ record[MAX_RECORD_LENGTH], /* holds a record as we check it */ *current_item, /* the current item being searched for */ *current_field, /* the current field being checked */ *from_y, *to_y; /* for choping up centurie ranges */ search_field *current_search; /* the list currently being searched for */ int current_record_hi_year, current_record_low_year, low_year, hi_year; hits = 0; /* A counter of the number of matching record */ /* open the database for reading */ database = fopen(database_name, "r"); if (database == NULL) { printf("Error. Unable to open file %s.
Please notify %s

\n", database_name, DATABASE_MAINTAINER); return; } /* for each record in the database, check to see if it's one we want */ while (fgets(record, MAX_RECORD_LENGTH, database) != NULL) { /* read all the fields */ field = 0; pos_in_record = &record[0]; while (*pos_in_record != '\n') /* while there are more fields */ { field_number[field++] = pos_in_record; /* save position of this field */ if ((new_pos_in_record = strchr(pos_in_record, FIELD_DELIMITER)) != NULL) { *new_pos_in_record = '\0'; /* swaps the delimiter for an end of string marker */ pos_in_record = new_pos_in_record + 1; /* go to next field */ } else pos_in_record = strchr(pos_in_record, '\n'); /* last field, jump to end of record */ } *pos_in_record = '\0'; /* mark end of last field */ fields_in_each_record = field; /* remember the size of each record */ /* we now have one database record in field_number[] */ /* now perform the substring searching on the relavent fields */ search_item = match = 0; /* for each search list (per field) */ current_search = &the_search_fields[search_item]; while ( current_search->field_num != 0 ) { search_for = 0; field = current_search->field_num; /* check the nth field */ current_field = field_number[field-1]; /* what's the search method ? */ anding = (current_search->method == ANDING); oring = (current_search->method == ORING); year_range = (current_search->method == YEARS); century_range = (current_search->method == CENTURIES); /* for each item in the search list */ while ( search_for < MAX_SUBSTRINGS_PER_FIELD && current_search->search_items[search_for] != NULL ) { current_item = current_search->search_items[search_for]; if ( anding || oring ) match = (strstr( in_lowercase(current_field), current_item ) != NULL); if ( year_range ) /* Are we looking for a year ? */ { /* Examine the year field of the current record, it should be xxxx-yyyy or just xxxx, so if we only find xxxx, can make yyyy = xxxx */ from_y = current_field; if (from_y != NULL) to_y = strchr(from_y,'-'); else to_y = NULL; if (to_y != NULL) { *to_y = '\0'; to_y++; } current_record_low_year = get_first_numeric(from_y,"BC",-9999); current_record_hi_year = get_first_numeric(to_y,"BC",-9999); if (to_y != NULL) *(to_y-1) = '-'; if (current_record_hi_year == -9999) current_record_hi_year = current_record_low_year; from_y = current_item; if (from_y != NULL) to_y = strchr(from_y,'-'); else to_y = NULL; if (to_y != NULL) { *to_y = '\0'; to_y++; } /* Similar (same?) format as the current record's year field, so get a high and a low year. */ low_year = get_first_numeric(from_y,"bc",-9999); hi_year = get_first_numeric(to_y,"bc",-9999); if (to_y != NULL) *(to_y-1) = '-'; if ( hi_year == -9999 ) hi_year = low_year; /* There's a match with this record, if the two ranges of years intersect. */ match = ( ( (current_record_hi_year >= low_year) && (current_record_hi_year <= hi_year) ) || ( (current_record_low_year >= low_year) && (current_record_low_year <= hi_year) ) ); } if ( century_range ) /* searching for a century/range of centuries */ { from_y = current_field; if (from_y != NULL) to_y = strchr(from_y,'-'); else to_y = NULL; if (to_y != NULL) { *to_y = '\0'; to_y++; } current_record_low_year = get_first_numeric(from_y,"BC",-9999); current_record_hi_year = get_first_numeric(to_y,"BC",-9999); if (to_y != NULL) *(to_y-1) = '-'; if (current_record_hi_year == -9999) current_record_hi_year = current_record_low_year; /*****************************************************/ from_y = current_item; if (from_y != NULL) to_y = strchr(from_y,'-'); else to_y = NULL; if (to_y != NULL) { *to_y = '\0'; to_y++; } low_year = get_first_numeric(from_y,"bc",-9999); hi_year = get_first_numeric(to_y,"bc",-9999); if (to_y != NULL) *(to_y-1) = '-'; if ( hi_year == -9999 ) hi_year = low_year; match = ( ( (current_record_hi_year >= low_year) && (current_record_hi_year <= hi_year) ) || ( (current_record_low_year >= low_year) && (current_record_low_year <= hi_year) ) ); } /* printf("

  • %d %d %d %d\n",current_record_low_year, current_record_hi_year, low_year, hi_year); */ /* Check to see if there's any point/need to carry on checking other search items for this field. */ if ( !match && anding) break; if ( match && oring) break; if ( match && year_range) break; if ( match && century_range) break; search_for++; /* check next list item */ } /* do we have a match so far */ /* if we're 'anding', and there was no match, give up now */ if ( search_method == ANDING && !match ) break; /* if we're 'oring' and there was a match, we have success */ if ( search_method == ORING && match) break; search_item++; /* now for the next list */ current_search = &the_search_fields[search_item]; } /* if we have a match, output the record */ if ( match ) { printf(OUTPUT_FORMAT,OUTPUT_DATA); hits++; } /* next record from the database please */ } /* If the search was unsuccesful, tell the poor user. */ if ( hits == 0 ) printf(NO_HITS_MESSAGE); fclose(database); } /************************************************************** in_lowercase returns a string, converted into lowercase. ***************************************************************/ char *in_lowercase(to_convert) char *to_convert; { int indx = 0; static char converted[MAX_FIELD_LENGTH]; strcpy(converted, to_convert); while (converted[indx] != '\0') { converted[indx] = tolower(converted[indx]); indx++; } return &converted[0]; } /************************************************************** lowercase returns THE string, converted into lowercase. ***************************************************************/ void lowercase(to_convert) char *to_convert; { int indx = 0; while (to_convert[indx] != '\0') { to_convert[indx] = tolower(to_convert[indx]); indx++; } } /************************************************************** decode_escape_sequences takes a string as input, and translates HTTP escape sequences, format %hh, into plain ASCII ***************************************************************/ void decode_escape_sequences(request) char *request; { int indx,indx2; long hex_char; char hex_num[3]; indx = indx2 = 0; while ( request[indx] != '\0' ) { if ( request[indx] == '%') { if ((request[indx+1] >='0' && request[indx+1] <= '9') || (request[indx+1] >='A' && request[indx+1] <= 'F')) { if ((request[indx+2] >='0' && request[indx+2] <= '9') || (request[indx+2] >='A' && request[indx+2] <= 'F')) { hex_num[0] = request[indx+1]; hex_num[1] = request[indx+2]; hex_num[2] = '\0'; hex_char = strtol(hex_num,(char **)NULL,16); request[indx2++] = (char) hex_char; indx += 3; } else request[indx2++] = request[indx++]; } else request[indx2++] = request[indx++]; } else request[indx2++] = request[indx++]; } request[indx2] = '\0'; } /************************************************************** plus_to_space takes a string as input, and translates '+' into ' '. Note theat '+' in HTTP requests is a field separator. ***************************************************************/ void plus_to_space(string) char *string; { int indx; indx = 0; while( string[indx] != '\0' ) { if (string[indx] == '+') string[indx] = ' '; indx++; } } /************************************************************** get_first_numeric takes a string as input and retrurns the first numeric integer it finds therein. ***************************************************************/ int get_first_numeric(from_string, negate_string, null_return) char *from_string; char *negate_string; int null_return; { if (from_string == NULL || strcspn(from_string,"0123456789") == strlen(from_string) ) return null_return; return atoi(from_string+strcspn(from_string,"0123456789")) * ( strstr(from_string, negate_string) != NULL ? -1 : 1); } /************************************************************** decode_escape_sequences takes a string as input, and translates HTTP escape sequences, format %hh, into plain ASCII ***************************************************************/ void decode_slash_and_bar(request) char *request; { int indx,indx2; long hex_char; char hex_num[3]; indx = indx2 = 0; while ( request[indx] != '\0' ) { if ( request[indx] == '%') { if ((request[indx+1] =='2' && request[indx+2] == 'F') || (request[indx+1] == '7' && request[indx+2] == 'C')) { hex_num[0] = request[indx+1]; hex_num[1] = request[indx+2]; hex_num[2] = '\0'; hex_char = strtol(hex_num,(char **)NULL,16); request[indx2++] = (char) hex_char; indx += 3; } else request[indx2++] = request[indx++]; } else request[indx2++] = request[indx++]; } request[indx2] = '\0'; }