Logo Search packages:      
Sourcecode: texfam version File versions  Download package

HTWAIS.c

/*                                                     HTWAIS.c
**    WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS
**
**    (c) COPYRIGHT MIT 1995.
**    Please first read the full copyright statement in the file COPYRIGH.
**    @(#) $Id: HTWAIS.c,v 2.56 1998/05/04 19:37:40 frystyk Exp $
**
**    This module allows a WWW server or client to read data from a
**    remote  WAIS server, and provide that data to a WWW client in
**    hypertext form. Source files, once retrieved, are stored and used
**    to provide information about the index when that is acessed.
**
** Authors
**    BK    Brewster Kahle, Thinking Machines, <Brewster@think.com>
**    TBL   Tim Berners-Lee, CERN <timbl@w3.org>
**
** Contributors
**    QL    QingLong, Yggdrasil Inc., <qinglong@Yggdrasil.com>
**
** History
**       Sep 91   TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.
**       Feb 91   TBL Generated HTML cleaned up a bit (quotes, escaping)
**                    Refers to lists of sources. 
**       Mar 93   TBL   Lib 2.0 compatible module made.     
**       May 95       CHJ modified for freeWAIS-0.5
**       Jun 97   QL  modified for w3c-libwww-5.0a.
**       Mar 98   QL  modified for w3c-libwww-5.1i.
**
** Bugs
**    Uses C stream i/o to read and write sockets, which won't work
**    on VMS TCP systems.
**
**    Should cache connections.
**
**    ANSI C only as written
**
** Bugs fixed
**      NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)
**
** WAIS comments:
**
**    1.    Separate directories for different system's .o would help
**    2.    Document ids are rather long!
**
** WWW Address mapping convention:
**
**    /servername/database/type/length/document-id
**
**    /servername/database?word+word+word
*/
/* WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.

   Brewster@think.com
*/


#define DIRECTORY "/cnidr.org:210/directory-of-servers"

#define BIG 1024  /* identifier size limit  @@@@@ */

/* Library include files */
#include "wwwsys.h"
#include "WWWUtil.h"
#include "WWWCore.h"
#include "WWWHTML.h"
#include "HTReqMan.h"
 
#ifdef HAVE_WAIS_WAIS_H
#include "wais/wais.h"
#else
#ifdef HAVE_WAIS_H
#include "wais.h"
#else
#ifdef WAIS_INCLUDE
#include WAIS_INCLUDE
#else
#include "wais/wais.h"
#endif
#endif
#endif

/*                From WAIS
**                ---------
*/
#undef MAX_MESSAGE_LEN
#define MAX_MESSAGE_LEN 100000
#undef CHARS_PER_PAGE
#define CHARS_PER_PAGE   10000 /* number of chars retrieved in each request */
#undef WAISSEARCH_DATE
#define WAISSEARCH_DATE "Fri Jul 19 1991"

/*                FROM WWW
**                --------
*/
#define      BUFFER_SIZE 4096 /* Arbitrary size for efficiency */
#define LINE_BUFFER_SIZE 2048

#define HEX_ESCAPE '%'

extern FILE * logfile;            /* Log file output */

PRIVATE int HTMaxWAISLines = 200; /* Max number of entries from a search */


/* Hypertext object building machinery */
#define PUTC(c)     (*target->isa->put_character)(target, c)
#define PUTS(s)     (*target->isa->put_string)(target, s)
#define START(e)    (*target->isa->start_element)(target, e, 0, 0)
#define END(e)      (*target->isa->end_element)(target, e)
#define FREE_TARGET (*target->isa->_free)(target)



/*
 * Type definitions and global variables etc. local to this module
 */


/* Final states have negative value */
typedef enum _HTWAISState
{
  HTWAIS_ERROR            = -2,
  HTWAIS_OK               = -1,
  HTWAIS_BEGIN            =  0,
  HTWAIS_PARSING_URL      =  1,
  HTWAIS_NEED_CONNECTION  =  2,
  HTWAIS_NEED_REQUEST     =  3,
  HTWAIS_NEED_RESPONSE    =  4,
  HTWAIS_PARSING_RESPONSE =  5,
  HTWAIS_FETCH_DOCUMENT   =  6,
  HTWAIS_CLEANUP          =  7
} HTWAISState;


/*
 * This is the context structure for this module
 */
typedef struct _wais_info
{
  BOOL        as_gate;          /* Client is using us as gateway */
  HTWAISState state;            /* Current State */
  int         result;           /* Result to report to the after filter */
  HTNet*      net;            /* Net object */
  FILE*       connection;
  char*       names;            /* Copy of arg to be hacked up */
  char*       basetitle;
  char*       wais_database;    /* name of current database */
  char*        www_database;    /* Same name escaped */
  char*       request_message;  /* arbitrary message limit */
  char*       response_message; /* arbitrary message limit */
} wais_info;


struct _HTStream
{
  const HTStreamClass* isa;
  HTStream*            target;
  HTRequest*           request;
  wais_info*           wais;
  int                  status;
  /* ... */
};


struct _HTInputStream
{
  const HTInputStreamClass *  isa;
};


struct _HTStructured
{
  const HTStructuredClass *   isa;
  /* ... */
};


/* ------------------------------------------------------------------------- */
/*                      Auxilliary Functions                     */
/* ------------------------------------------------------------------------- */


/*                                              HTshowDiags
 */
/* modified from Jonny G's version in ui/question.c */

void HTshowDiags (
      HTStream *        target,
      diagnosticRecord **     d)
{
  long i;

  for (i = 0; d[i] != NULL; i++) {
    if (d[i]->ADDINFO != NULL) {
      PUTS("Diagnostic code is ");
      PUTS(d[i]->DIAG);
      PUTC(' ');
      PUTS(d[i]->ADDINFO);
      PUTC('\n'); ;
    }
  }
}

/*    Matrix of allowed characters in filenames
**    -----------------------------------------
*/

PRIVATE BOOL acceptable[256];
PRIVATE BOOL acceptable_inited = NO;

PRIVATE void init_acceptable (void)
{
 unsigned int i;
 char * good = 
         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";

 if (acceptable_inited == YES) return;

 for(i=256; i--; ) acceptable[i] = NO;
 for(;*good; good++) acceptable[(unsigned int)*good] = YES;
 acceptable_inited = YES;
}


/*    Transform file identifier into WWW address
**    ------------------------------------------
**
**
** On exit,
**    returns           nil if error
**                pointer to malloced string (must be freed) if ok
*/
char * WWW_from_archie  (char * file)
{
    char * end;
    char * result;
    char * colon;
    for(end=file; *end > ' '; end++);     /* assumes ASCII encoding*/
    if ((result = (char  *) HT_MALLOC(10 + (end-file))) == NULL)
        HT_OUTOFMEM("result ");
    if (!result) return result;           /* Malloc error */
    strcpy(result, "file://");
    strncat(result, file, end-file);
    colon = strchr(result+7, ':');  /* Expect colon after host */
    if (colon) {
      for(; colon[0]; colon[0]=colon[1], colon++);    /* move down */
    }
    return result;
} /* WWW_from_archie */


/*    Transform document identifier into URL
**    --------------------------------------
**
** Bugs: A static buffer of finite size is used!
**    The format of the docid MUST be good!
**
** On exit,
**    returns           nil if error
**                pointer to malloced string (must be freed) if ok
*/
PRIVATE char hex [17] = "0123456789ABCDEF";

PRIVATE char * WWW_from_WAIS (any * docid)

{
    static unsigned char buf[BIG];
    char num[10];
    unsigned char * q = buf;
    char * p = (docid->bytes);
    int i, l;
    if (PROT_TRACE) {
      char *p;
      HTTrace("HTLoadWAIS.. id (%d bytes) is ", (int)docid->size);
      for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
          if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
            HTTrace("%c", *p);
          else
            HTTrace("<%x>", (unsigned)*p);
      }
      HTTrace("\n");
    }  
    for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) {
      if (PROT_TRACE)
          HTTrace("............ Record type %d, length %d\n",
                (unsigned char) p[0], (unsigned char) p[1]);
      sprintf(num, "%d", (int)*p);
      memcpy(q, num, strlen(num));
      q += strlen(num);
      p++;
      *q++ = '=';       /* Separate */
      l = (int)((unsigned char)*p);
      p++;
      if (l > 127)
          {
            l = (l - 128) * 128;
            l = l + (int)((unsigned char)*p);
            p++;
          }
      
      for (i = 0; i < l; i++, p++)
          {
            if (!acceptable[(unsigned char)*p]) 
                {
                  *q++ = HEX_ESCAPE;
                  *q++ = hex[((unsigned char)*p) >> 4];
                  *q++ = hex[((unsigned char)*p) & 15];
                }
            else *q++ = (unsigned char)*p;
          }
      *q++= ';';        /* Terminate field */
#ifdef OLD_CODE
        if (*p>10) {
          if (PROT_TRACE)
            HTTrace("WAIS........ DOCID record type of %d!\n", *p);
          return 0;
      }
      {     /* Bug fix -- allow any byte value 15 Apr 93 */
          unsigned int i = (unsigned) *p++;
          
          if (i > 99) {
            *q++ = (i/100) + '0';
            i = i % 100;
          }
          if (i > 9) {
            *q++ = (i/10) + '0';
            i = i % 10;
          }
          *q++ = i + '0';     /* Record type */
      }
      *q++ = '=';       /* Separate */
      l = *p++;         /* Length */
      for(i=0; i<l; i++, p++){
          if (!acceptable[(int)*p]) {
            *q++ = HEX_ESCAPE;      /* Means hex commming */
            *q++ = hex[(*p) >> 4];
            *q++ = hex[(*p) & 15];
          }
          else *q++ = *p;
      }
      *q++= ';';        /* Terminate field */
#endif /* OLD_CODE */
    }
    *q++ = 0;                 /* Terminate string */
    if (PROT_TRACE) HTTrace("HTLoadWAIS.. WWW form of id: %s\n", buf); 
    {
        char *result;
      if ((result = (char *) HT_MALLOC((int) strlen((char *) buf)+1))==NULL)
          HT_OUTOFMEM("WWW_from_WAIS");
      strcpy(result, (char *) buf);
      return result;
    }
} /* WWW_from_WAIS */


/*    Transform URL into WAIS document identifier
**    -------------------------------------------
**
** On entry,
**    docname           points to valid name produced originally by
**                WWW_from_WAIS
** On exit,
**    docid->size is valid
**    docid->bytes      is malloced and must later be freed.
*/
PRIVATE any * WAIS_from_WWW  (any * docid, char * docname)
{
    char *z;      /* Output pointer */
    char *sor;    /* Start of record - points to size field. */
    char *p;      /* Input pointer */
    char *q;      /* Poisition of "=" */
    char *s;      /* Position of semicolon */
    int n;  /* size */
    if (PROT_TRACE)
      HTTrace("HTLoadWAIS.. WWW id (to become WAIS id): %s\n",
            docname); 
    for(n=0, p = docname; *p; p++) {      /* Count sizes of strings */
        n++;
      if (*p == ';')  n--;          /* Not converted */
      else if (*p == HEX_ESCAPE) n=n-2;   /* Save two bytes */
        docid->size = n;
    }
    
    /* result record */
    if ((docid->bytes = (char *) HT_MALLOC(docid->size+32)) == NULL)
      HT_OUTOFMEM("docid->bytes");
    z = docid->bytes;
    
    for(p = docname; *p; ) {
      q = strchr(p, '=');
      if (!q) 
          return 0;
      *q = '\0';
      *z++ = atoi(p);
      *q = '=';
      s = strchr(q, ';');     /* (Check only) */
      if (!s) 
          return 0;     /* Bad! No ';';   */
      sor = z;          /* Remember where the size field was */
      z++;              /* Skip record size for now */
      
      {
          int len;
          int tmp;
          for(p=q+1; *p!=';' ; ) {
            if (*p == HEX_ESCAPE) {
                char c;
                unsigned int b;
                p++;
                c = *p++;
                b = HTAsciiHexToChar(c);
                c = *p++;
                if (!c) 
                  break;      /* Odd number of chars! */
                *z++ = (b<<4) + HTAsciiHexToChar(c);
            } else {
                *z++ = *p++;  /* Record */
            }
          }
          len = (z-sor-1);
          
          z = sor;
          if (len > 127) {
            tmp = (len / 128);
            len = len - (tmp * 128);
            tmp = tmp + 128;
            *z++ = (char)tmp;
            *z = (char)len;
          } else {
            *z = (char)len;
          }
          z++;
      }
      
      for(p=q+1; *p!=';' ; )  {
          if (*p == HEX_ESCAPE) {
            char c;
            unsigned int b;
            p++;
            c = *p++;
            b = HTAsciiHexToChar(c);
            c = *p++;
            if (!c) 
                break;  /* Odd number of chars! */
            *z++ = (b<<4) + HTAsciiHexToChar(c);
          } else {
            *z++ = *p++;      /* Record */
          }
      }
      p++;              /* After semicolon: start of next record */
    }

#ifdef OLD_CODE
    for(p = docname; *p; ) {  /* Convert of strings */
                        /* Record type */
                        
      *z = 0;                 /* Initialize record type */
      while (*p >= '0' && *p <= '9') {
          *z = *z*10 + (*p++ - '0');      /* Decode decimal record type */
      }
      z++;
      if (*p != '=') return 0;
      q = p;
      
/*        *z++ = *p++ - '0';
      q = strchr(p , '=');
      if (!q) return 0;
*/
      s = strchr(q, ';');     /* (Check only) */
      if (!s) return 0; /* Bad! No ';';   */
        sor = z;        /* Remember where the size field was */
      z++;              /* Skip record size for now   */
      for(p=q+1; *p!=';' ; ) {
         if (*p == HEX_ESCAPE) {
              char c;
            unsigned int b;
            p++;
              c = *p++;
            b =   HTAsciiHexToChar(c);
            c = *p++;
            if (!c) break;    /* Odd number of chars! */
            *z++ = (b<<4) + HTAsciiHexToChar(c);
          } else {
              *z++ = *p++;    /* Record */
          }
      }
      *sor = (z-sor-1); /* Fill in size -- not counting size itself */
      p++;              /* After semicolon: start of next record */
    }
#endif /* OLD_CODE */
    if (PROT_TRACE) {
      char *p;
      HTTrace("WAIS........ id (%d bytes) is ", (int)docid->size);
      for(p=docid->bytes; p<docid->bytes+docid->size; p++) {
          if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */
            HTTrace("%c", *p);
          else
            HTTrace("<%x>", (unsigned)*p);
      }
      HTTrace("\n");
    }  
    return docid;       /* Ok */
    
} /* WAIS_from_WWW */


/*    Send a plain text record to the client          output_text_record()
**    --------------------------------------
*/

PRIVATE void output_text_record (
    HTStream *                target,
    WAISDocumentText *        record,
    boolean             quote_string_quotes,
    boolean                    binary)
{
  long count;
  if (binary) {
    (*target->isa->put_block)(target,
                        record->DocumentText->bytes,
                        record->DocumentText->size);
    return;
  }

  for(count = 0; count < record->DocumentText->size; count++){
    long ch = (unsigned char)record->DocumentText->bytes[count];
    if (ch == 27) {     /* What is this in for? Tim */

          /* then we have an escape code */
          /* if the next letter is '(' or ')', then ignore two letters */
          if('(' == record->DocumentText->bytes[count + 1] ||
            ')' == record->DocumentText->bytes[count + 1])
          count += 1;             /* it is a term marker */
          else count += 4;          /* it is a paragraph marker */
    } else if (ch == '\n' || ch == '\r') {
          PUTC('\n');
    } else if ((ch=='\t') || isprint(ch)){
          PUTC(ch);
    } 
  }
} /* output text record */



/*    Format A Search response for the client         display_search_response
**    ---------------------------------------
** modified from tracy shen's version in wutil.c
** displays either a text record or a set of headlines.
*/
void display_search_response (HTStructured *          target,
                        SearchResponseAPDU *    response,
                        char *                  database,
                        char *                  keywords)
{
  WAISSearchResponse  *info;
  long i, k;
  char line[LINE_BUFFER_SIZE];      /* For building strings to display */
  
  BOOL archie =  strstr(database, "archie")!=0; /* Specical handling */

  
  if (PROT_TRACE) HTTrace("WAIS........ Displaying search response\n");
  sprintf(line,
        "Index %s contains the following %d item%s relevant to '%s'.\n",
        database,
        (int)(response->NumberOfRecordsReturned),
        response->NumberOfRecordsReturned ==1 ? "" : "s",
        keywords);
  PUTS(line);
  PUTS("The first figure for each entry is its relative score, ");
  PUTS("the second the number of lines in the item.");
  START(HTML_MENU);

  if ( response->DatabaseDiagnosticRecords != 0 ) {
    info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords;
    i =0; 

    if (info->Diagnostics != NULL)
      HTshowDiags((HTStream*)target, info->Diagnostics);

    if ( info->DocHeaders != 0 ) {
      for (k=0; info->DocHeaders[k] != 0; k++ ) {
      WAISDocumentHeader* head = info->DocHeaders[k];
      char * headline = trim_junk(head->Headline);
      any * docid = head->DocumentID;
      char * docname;               /* printable version of docid */
      i++;

/*    Make a printable string out of the document id.
*/
      if (PROT_TRACE)
          HTTrace("HTWAIS:  %2ld: Score: %4ld, lines:%4ld '%s'\n", i,
                (long int)(info->DocHeaders[k]->Score),
                (long int)(info->DocHeaders[k]->Lines),
                headline);

      START(HTML_LI);
      sprintf(line, "%4ld  %4ld  ",
          head->Score,
          head->Lines);
      PUTS(line);

      if (archie) {
          char * www_name = WWW_from_archie(headline);
          if (www_name) {
            HTStartAnchor(target, NULL, www_name);
            PUTS(headline);
            
            END(HTML_A);
            HT_FREE(www_name);
          } else {
             PUTS(headline);
             PUTS(" (bad file name)");
          }
      } else { /* Not archie */
          docname =  WWW_from_WAIS(docid);
          if (docname) {
            char * dbname = HTEscape(database, URL_XPALPHAS);
                char types_array[1000]; /* bad */
                char *type_escaped;
                types_array[0] = 0;
                if (head->Types) {
                    int i;
                    for (i = 0; head->Types[i]; i++)
                      {
                        if (i)
                          strcat (types_array, ",");

                        type_escaped = HTEscape (head->Types[i], URL_XALPHAS);
                        strcat (types_array, type_escaped);
                        HT_FREE(type_escaped);
                      }
                    if (PROT_TRACE)
                      HTTrace("WAIS........ Types_array `%s\'\n",
                         types_array);
            } else {
                    strcat (types_array, "TEXT");
            }
            sprintf(line, "%s/%s/%d/%s",
                        dbname,
                        types_array,
                        (int)(head->DocumentLength),
                        docname);
#ifdef OLD_CODE
            sprintf(line, "%s/%s/%d/%s",        /* W3 address */
                            dbname,
                head->Types ? head->Types[0] : "TEXT",
                (int)(head->DocumentLength),
                docname);
#endif /* OLD_CODE */
            HTStartAnchor(target, NULL, ( (head->Types) 
                  && (!strcmp(head->Types[0], "URL"))) ? 
                        headline : line); /* NT, Sep 93 */
            PUTS(headline);
            END(HTML_A);
            HT_FREE(dbname);
            HT_FREE(docname);
          } else {
             PUTS("(bad doc id)");
          }
        }
      } /* next document header */
    } /* if there were any document headers */
    
    if ( info->ShortHeaders != 0 ) {
      k =0;
      while (info->ShortHeaders[k] != 0 ) {
      i++;
      PUTS( "(Short Header record, can't display)");
      }
    }
    if ( info->LongHeaders != 0 ) {
      k =0;
      while (info->LongHeaders[k] != 0) {
      i++;
      PUTS( "\nLong Header record, can't display\n");
      }
    }
    if ( info->Text != 0 ) {
      k =0;
      while (info->Text[k] != 0) {
      i++;
      PUTS( "\nText record\n");
      output_text_record((HTStream*)target, info->Text[k++], false, false);
      }
    }
    if ( info->Headlines != 0 ) {
      k =0;
      while (info->Headlines[k] != 0) {
      i++;
      PUTS( "\nHeadline record, can't display\n");
      /* dsply_headline_record( info->Headlines[k++]); */
      }
    }
    if ( info->Codes != 0 ) {
      k =0;
      while (info->Codes[k] != 0) {
      i++;
      PUTS( "\nCode record, can't display\n");
      /* dsply_code_record( info->Codes[k++]); */
      }
    }
  }                     /* Loop: display user info */
  END(HTML_MENU);
  PUTC('\n'); ;
}


/*    HTWAISCleanup
**    -----------
**      This function closes the connection and frees memory.
**      Returns YES on OK, else NO
*/
PRIVATE int HTWAISCleanup (HTRequest *req, int status)
{
 HTNet* net;
 HTStream* input;
 wais_info* theWAISinfo = NULL;


 if (req)
   {
    net = HTRequest_net(req);
    input = HTRequest_inputStream(req);

    /* Free stream with data TO network */
    if (HTRequest_isDestination(req))
      HTRequest_removeDestination(req);
     else
      if (input)
      {
       if (status == HT_INTERRUPTED)
         (*input->isa->abort)(input, NULL);
        else
         (*input->isa->_free)(input);

       HTRequest_setInputStream(req, NULL);
      }

    if (net)
      {
       if ((theWAISinfo = (wais_info*)HTNet_context(net)))
       {
        theWAISinfo->state = HTWAIS_CLEANUP;

        if (theWAISinfo->connection)
          close_connection_to_server(theWAISinfo->connection);

        if (theWAISinfo->wais_database)
          HT_FREE(theWAISinfo->wais_database);

        if (theWAISinfo->request_message)
          s_free(theWAISinfo->request_message);

        if (theWAISinfo->response_message)
          s_free(theWAISinfo->response_message);

        HT_FREE(theWAISinfo->names);
        HT_FREE(theWAISinfo->basetitle);

        if (status < 0)
          {
           HTParentAnchor* anchor;
           char* unescaped = NULL;
           void* err_par;
           unsigned int err_par_length;

           if ((anchor = HTRequest_anchor(req)))   /* Be robust */
             {
            char* arg;

            if ((arg = HTAnchor_physical(anchor)))
              {
               StrAllocCopy(unescaped, arg);
               HTUnEscape(unescaped);
              }
             }

           if (unescaped)
             {
            err_par = (void*)unescaped;
            err_par_length = (unsigned int)(strlen(unescaped));
             }
            else
             {
            err_par = (void*)"???";
            err_par_length = 3;
             }

           HTRequest_addError(req, ERR_FATAL, NO, HTERR_INTERNAL,
                        err_par, err_par_length, "HTLoadWAIS");
           if (unescaped) HT_FREE(unescaped);
          }

        /* Free and remove our own context structure for wais */
        HT_FREE(theWAISinfo);
        HTNet_setContext(net, NULL);
       }
       /* End ``if (theWAISinfo)'' */

       /* Remove the request object */
       HTNet_delete(net, status);
      }

    return YES;
   }
  else
   return NO;
}




/*    Load Document from WAIS Server                        HTLoadWAIS()
**    ------------------------------
**
** On entry,
**    request           This is the request structure
** On exit,
**    returns           <0          Error has occured
**                HT_LOADED   OK
*/

#define MAX_KEYWORDS_LENGTH 4000
#define MAX_SERVER_LENGTH   1000
#define MAX_DATABASE_LENGTH 1000
#define MAX_SERVICE_LENGTH  1000

PRIVATE int HTWAISEvent(SOCKET soc, void * pVoid, HTEventType type)
{
 wais_info* theWAISinfo;  /* Specific protocol information */
 HTNet* net;              /* Generic protocol information */
 HTRequest* request;
 HTParentAnchor* anchor;
 const char * arg;
 HTStream* sink;
 HTFormat format_out;
#if 0
 static const char * error_header = "<h1>Access error</h1>\n<p>The following error occured in accesing a WAIS server:\n</p>\n";
#endif
 char* key;               /* pointer to keywords in URL */
 long request_buffer_length;  /* how of the request is left */
 SearchResponseAPDU* retrieval_response = 0;
 char keywords[MAX_KEYWORDS_LENGTH + 1];
 char* server_name = NULL;
 char* service;
 char* docname = NULL;
 char* doctype = NULL;
 long document_length = -1;
 BOOL ok = NO;
 int status = HT_ERROR;

#if 0
 FILE* connection = 0;
 char* names;           /* Copy of arg to be hacked up */
 char* basetitle = NULL;
 char* wais_database = NULL;    /* name of current database */
 char*  www_database = NULL;    /* Same name escaped */
 char*  request_message = NULL; /* arbitrary message limit */
 char* response_message = NULL; /* arbitrary message limit */
#endif
    
#if 0
 extern FILE * connect_to_server();
#endif


 if ((theWAISinfo = (wais_info*)pVoid))   /* Be robust */
   {
    if ((net = theWAISinfo->net) == NULL) return HT_ERROR;
    if ((request = HTNet_request(net)) == NULL) return HT_ERROR;
    if ((anchor = HTRequest_anchor(request)))
      arg = HTAnchor_physical(anchor);
     else
      return HT_ERROR;

    sink = HTRequest_outputStream(request);
    format_out = HTRequest_outputFormat(request);
   }
  else
   return HT_ERROR;
    

 if (type == HTEvent_BEGIN)
   {
    theWAISinfo->state  = HTWAIS_BEGIN;
    theWAISinfo->result = HT_ERROR;
   }
  else
   if (type == HTEvent_CLOSE)
     {
     /* Interrupted */
      char interrupted[] = "request interruption";

      HTRequest_addError(request, ERR_FATAL, NO, HTERR_INTERRUPTED,
                   (void*)interrupted, (unsigned int)strlen(interrupted),
                   "HTLoadWAIS");
      HTWAISCleanup(request, HT_INTERRUPTED);
      return HT_OK;
     }
    else
     if (type == HTEvent_END)
       {
      HTWAISCleanup(request, (theWAISinfo ? theWAISinfo->result : HT_ERROR));
      return HT_OK;
       }


 /*   Decipher and check syntax of WWW address:
 **   ----------------------------------------
 **
 **   First we remove the "wais:" if it was spcified.  920110
 */

 if (!acceptable_inited) init_acceptable();
 theWAISinfo->state = HTWAIS_PARSING_URL;
 if (PROT_TRACE) HTTrace("HTLoadWAIS.. Looking for \"%s\"\n", arg);

 theWAISinfo->names = HTParse(arg, "",
                        PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION);
 key = strchr(theWAISinfo->names, '?');

 if (key)
   {
    char* p;

    *key++ = 0;   /* Split off keywords */

    for (p=key; *p; p++) if (*p == '+') *p = ' ';
    HTUnEscape(key);
   }

 if (theWAISinfo->names[0] == '/')
   {
    server_name = theWAISinfo->names+1;

      /* Accept one or two */
    if ((theWAISinfo->as_gate = (*server_name == '/'))) server_name++;

    if ((theWAISinfo->www_database = strchr(server_name, '/')))
      {
       *(theWAISinfo->www_database)++ = 0;  /* Separate database name */
       doctype = strchr(theWAISinfo->www_database, '/');

       if (key)
       ok = YES;  /* Don't need doc details */
        else
       if (doctype)
         {  /* If not search parse doc details */
          char* doclength;

          *doctype++ = 0;     /* Separate rest of doc address */
          doclength = strchr(doctype, '/');
          if (doclength)
            {
             *doclength++ = 0;
             document_length = atol(doclength);
             if (document_length)
             {
              docname = strchr(doclength, '/');
              if (docname)
                {
                 *docname++ = 0;
                 ok = YES;    /* To avoid a goto! */
                } /* if docname */
             } /* if document_length valid */
            } /* if doclength */
         }
        else
         { /* no doctype?  Assume index required */
          if (!key) key = "";
          ok = YES;
         } /* if doctype */
      } /* if database */
   }
     
 if (!ok)
   {
    char *unescaped = NULL;

    StrAllocCopy(unescaped, arg);
    HTUnEscape(unescaped);
    HTRequest_addError(request, ERR_FATAL, NO, HTERR_BAD_REQUEST,
                   (void *) unescaped, (int) strlen(unescaped),
                   "HTLoadWAIS");
    HT_FREE(unescaped);
    HT_FREE(theWAISinfo->names);
    return -1;
   }
    
 if (PROT_TRACE) HTTrace("HTLoadWAIS.. URL Parsed OK\n");
 theWAISinfo->state = HTWAIS_NEED_CONNECTION;

 if ((service = strchr(theWAISinfo->names, ':')))
   *service++ = 0;
  else
   service = "210";

 if ((server_name ? server_name[0] : 0))
   {
    boolean do_need_to_connect_to_server = true;

    if (key) if ((*key) == 0) do_need_to_connect_to_server = false;

    if (do_need_to_connect_to_server)
      {
       if ((theWAISinfo->connection = connect_to_server(server_name,
                                          atoi(service)))
         == NULL)
       {
        char* host = HTParse(arg, "", PARSE_HOST);

        if (PROT_TRACE)
          HTTrace("HTLoadWAIS.."
                " Can't open connection to %s via service %s.\n",
                server_name, service);

        HTRequest_addError(request, ERR_FATAL, NO, HTERR_WAIS_NO_CONNECT,
                       (void *) host, (int) strlen(host), "HTLoadWAIS");
        theWAISinfo->result = HT_ERROR;
        HTWAISCleanup(request, status);
        return status;
       }
      }
   }
  else
   theWAISinfo->connection = NULL;

 StrAllocCopy(theWAISinfo->wais_database, theWAISinfo->www_database);
 HTUnEscape(theWAISinfo->wais_database);


 /* Make title name without the .src */
 {
  char *srcstr;

  StrAllocCopy(theWAISinfo->basetitle, theWAISinfo->wais_database);
  if ((srcstr = strstr(theWAISinfo->basetitle, ".src")) != NULL)
    *srcstr = '\0';
 }
    

 /* This below fixed size stuff is terrible */
 if ((theWAISinfo->request_message =
                       (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char)))
     == NULL)
   HT_OUTOFMEM("WAIS request message");

 if ((theWAISinfo->response_message =
                       (char*)s_malloc((size_t)MAX_MESSAGE_LEN * sizeof(char)))
     == NULL)
   HT_OUTOFMEM("WAIS response message");


 /*   If keyword search is performed but there are no keywords,
 **   the user has followed a link to the index itself.
 **     It would be appropriate at this point to send him the .SRC file - how?
 */

 if (key)
   {
    if (*key)
      {
      /*
       *   S E A R C H   (nonempty key (*key != 0))
       */
       char *p;
       HTStructured* target;

       theWAISinfo->state = HTWAIS_NEED_RESPONSE;

       strncpy(keywords, key, MAX_KEYWORDS_LENGTH);
       while ((p = strchr(keywords,'+'))) *p = ' ';

       /* Send advance title to get something fast to the other end */

       target = HTMLGenerator(request, NULL, WWW_HTML, format_out, sink);

       START(HTML_HTML);
       START(HTML_HEAD);
       START(HTML_TITLE);
       PUTS(keywords);
       PUTS(" in ");
       PUTS(theWAISinfo->basetitle);
       END(HTML_TITLE);
       END(HTML_HEAD);

       START(HTML_BODY);
       START(HTML_H1);
       PUTS("WAIS Search of \"");
       PUTS(keywords);
       PUTS("\" in ");
       PUTS(theWAISinfo->basetitle);
       END(HTML_H1);

       START(HTML_ISINDEX);

       request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */
       if (PROT_TRACE)
       HTTrace("HTLoadWAIS.. Search for `%s' in `%s'\n",
             keywords, theWAISinfo->wais_database);

       if (generate_search_apdu(theWAISinfo->request_message + HEADER_LENGTH, 
                        &request_buffer_length, 
                        keywords, theWAISinfo->wais_database, NULL,
                        HTMaxWAISLines) == NULL)
       {
        if (PROT_TRACE)
          HTTrace("WAIS Search. Too many lines in response\n");

        HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW, 
                       NULL, 0, "HTLoadWAIS");
       }

       if (!interpret_message(theWAISinfo->request_message, 
                        MAX_MESSAGE_LEN - request_buffer_length, 
                        theWAISinfo->response_message,
                        MAX_MESSAGE_LEN,
                        theWAISinfo->connection,
                        false /* true verbose */
                        ))
       {
        if (PROT_TRACE)
          HTTrace("WAIS Search. Too many lines in response\n");

        HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW, 
                       NULL, 0, "HTLoadWAIS");
       }
        else
       {    /* returned message ok */
        SearchResponseAPDU  *query_response = 0;

        readSearchResponseAPDU(&query_response,
                        theWAISinfo->response_message + HEADER_LENGTH);
        display_search_response(target, 
                          query_response, theWAISinfo->wais_database,
                          keywords);
        if (query_response->DatabaseDiagnosticRecords)
          freeWAISSearchResponse(query_response->DatabaseDiagnosticRecords);

        freeSearchResponseAPDU(query_response);
       }    /* returned message not too large */

       END(HTML_BODY);
       END(HTML_HTML);
       FREE_TARGET;

       HTAnchor_setFormat(anchor, WWW_HTML);

       theWAISinfo->result = status = HT_LOADED;
      }
     else
      {
      /*
       *   I N D E X   (key is empty (*key = 0))
       */
#ifdef CACHE_FILE_PREFIX
       char filename[256];
       FILE * fp;
#endif
       HTStructured* target = HTMLGenerator(request, NULL,
                                  WWW_HTML, format_out, sink);

       theWAISinfo->state = HTWAIS_NEED_REQUEST;

       {
      START(HTML_HTML);
      START(HTML_HEAD);
      START(HTML_TITLE);
      PUTS(theWAISinfo->basetitle);
      PUTS(" Index");
      END(HTML_TITLE);
      END(HTML_HEAD);

      START(HTML_BODY);
      START(HTML_H1);
      PUTS("WAIS Index: ");
      PUTS(theWAISinfo->basetitle);
      END(HTML_H1);
       }

       START(HTML_ISINDEX);

       /* If we have seen a source file for this database, use that: */

#ifdef CACHE_FILE_PREFIX
       sprintf(filename,
             "%sWSRC-%s:%s:%.100s.txt",
             CACHE_FILE_PREFIX,
             server_name, service, theWAISinfo->www_database);

       fp = fopen(filename, "r");   /* Have we found this already? */
       if (PROT_TRACE)
       HTTrace("HTLoadWAIS.. Description of server %s %s.\n",
             filename,
             fp ? "exists already" : "does NOT exist!");

       if (fp)
       {
        int c;

        START(HTML_PRE);   /* Preformatted description */
        while((c=getc(fp)) != EOF) PUTC(c);   /* Transfer file */
        END(HTML_PRE);
        fclose(fp);
       }
#endif

       END(HTML_BODY);
       END(HTML_HTML);
       FREE_TARGET;
      }

    HTAnchor_setFormat(anchor, WWW_HTML);

    theWAISinfo->result = status = HT_LOADED;
   }
  else
   {/* document rather than search */
   /*
    *   D O C U M E N T    F E T C H   (no key (key == NULL))
    */
    boolean binary = true;     /* how to transfer stuff coming over */
    HTStream* target;
    HTAtom* document_type_atom = HTAtom_for("application/octet-stream");
    long count;
    any   doc_chunk;
    any * docid = &doc_chunk;

    theWAISinfo->state = HTWAIS_FETCH_DOCUMENT;
    if (PROT_TRACE)
      HTTrace("HTLoadWAIS.. Retrieve document `%s'\n"
            "............ type `%s' length %ld\n",
            (docname ? docname : "unknown"),
            (doctype ? doctype : "unknown"),
            document_length);

    if (doctype)
      {
       if (strcmp(doctype, "WSRC") == 0)
       {
        document_type_atom = HTAtom_for("application/x-wais-source");
        binary = false;
       }
        else
       if (strcmp(doctype, "TEXT") == 0)
         {
          document_type_atom = WWW_UNKNOWN;
          binary = false;
         }
        else
         if (strcmp(doctype, "HTML") == 0)
           {
            document_type_atom = WWW_HTML;
            binary = false;
           }
          else
           if (strcmp(doctype, "GIF") == 0) document_type_atom = WWW_GIF;
      }

    HTAnchor_setFormat(anchor, document_type_atom);

    /* Guess on TEXT format as it might be HTML */
    if ((target = HTStreamStack(HTAnchor_format(anchor),
                        HTRequest_outputFormat(request),
                        HTRequest_outputStream(request),
                        request, YES))
      == NULL)
      {
       theWAISinfo->result = HT_ERROR;
       status = -1;
       HTWAISCleanup(request, status);
       return status;
      }

    /* Decode hex or litteral format for document ID */
    WAIS_from_WWW(docid, docname);

    /* Loop over slices of the document */
    for (count = 0; count * CHARS_PER_PAGE < document_length; count++)
      {
       char *type = s_strdup(doctype);

       request_buffer_length = MAX_MESSAGE_LEN;       /* Amount left */
       if (PROT_TRACE) HTTrace("HTLoadWAIS.. Slice number %ld\n", count);
       if (generate_retrieval_apdu(theWAISinfo->request_message + HEADER_LENGTH,
                           &request_buffer_length, 
                           docid, CT_byte,
                           count * CHARS_PER_PAGE,
                           HTMIN((count + 1) * CHARS_PER_PAGE,
                               document_length),
                             type,
                           theWAISinfo->wais_database) == 0)
       {
        HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW, 
                       NULL, 0, "HTLoadWAIS");
       }

       HT_FREE(type);

       /* Actually do the transaction given by request_message */   
       if (interpret_message(theWAISinfo->request_message, 
                       MAX_MESSAGE_LEN - request_buffer_length, 
                       theWAISinfo->response_message,
                       MAX_MESSAGE_LEN,
                       theWAISinfo->connection,
                       false /* true verbose */ 
                       )
         == 0)
       {
        HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_OVERFLOW, 
                       NULL, 0, "HTLoadWAIS");
       }

       /* Parse the result which came back into memory. */
       readSearchResponseAPDU(&retrieval_response, 
                        theWAISinfo->response_message + HEADER_LENGTH);
       {
      WAISSearchResponse* searchres = (WAISSearchResponse*)retrieval_response->DatabaseDiagnosticRecords;

      if (!searchres->Text)
        {
         if (searchres->Diagnostics &&
             *searchres->Diagnostics &&
             (*searchres->Diagnostics)->ADDINFO)
           {
            char *errmsg = (*searchres->Diagnostics)->ADDINFO;

            HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
                         (void *) errmsg, (int) strlen(errmsg),
                         "HTLoadWAIS");
           }
          else
           {
            HTRequest_addError(request, ERR_WARN, NO, HTERR_WAIS_MODULE,
                         NULL, 0, "HTLoadWAIS");
           }

         (*target->isa->_free)(target);
         HTRequest_setOutputStream(request, NULL);
         HT_FREE(docid->bytes);
         freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords); 
         freeSearchResponseAPDU(retrieval_response);
         theWAISinfo->result = HT_OK;
         HTWAISCleanup(request, status);
        }
       else
        {
         output_text_record(target, *searchres->Text,
                        false, binary);
         freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords);
         freeSearchResponseAPDU(retrieval_response);
        } /* If text existed */
       }
      } /* Loop over slices */

    (*target->isa->_free)(target);
    HTRequest_setOutputStream(request, NULL);
    HT_FREE(docid->bytes);

    theWAISinfo->result = status = HT_LOADED;
   }
 /* End ``if (key)'' */

 HTWAISCleanup(request, status);

 return status;
}


PUBLIC int HTLoadWAIS (SOCKET soc, HTRequest* request)
{
 wais_info* theWAISinfo;  /* Specific protocol information */
 HTNet* net;              /* Generic protocol information */
 HTParentAnchor* anchor;

 /*
  * Initiate a new wais structure and bind to request structure.
  * This is actually state HTWAIS_BEGIN,
  * but it can't be in the state machine,
  * as we need the structure first.
  */

 if (request)
   {
    if ((anchor = HTRequest_anchor(request)) == NULL) return HT_ERROR;
    if ((net = HTRequest_net(request)) == NULL) return HT_ERROR;
   }
  else
   return HT_ERROR;

 if (PROT_TRACE)
   HTTrace("HTWAIS...... Looking for `%s\'\n", HTAnchor_physical(anchor));

                     /* Get existing copy */
 if ((theWAISinfo = (wais_info*)HTNet_context(net)) == NULL)
   {
    if ((theWAISinfo = (wais_info*)HT_CALLOC(1, sizeof(wais_info))) == NULL)
      HT_OUTOFMEM("HTLoadWAIS");

    HTNet_setEventCallback(net, HTWAISEvent);
    HTNet_setEventParam(net, theWAISinfo);  /* callbacks get theWAISinfo* */
    HTNet_setContext(net, theWAISinfo);

    theWAISinfo->state  = HTWAIS_BEGIN;
    theWAISinfo->result = HT_ERROR;
    theWAISinfo->net    = net;
   }

 /* get it started - ops is ignored */
 return HTWAISEvent(soc, theWAISinfo, HTEvent_BEGIN);
}

Generated by  Doxygen 1.6.0   Back to index