Access Manager

/*
**	(c) COPYRIGHT MIT 1995.
**	Please first read the full copyright statement in the file COPYRIGH.
*/
This module keeps a list of valid protocol (naming scheme) specifiers with associated access code. It allows documents to be loaded given various combinations of parameters. New access protocols may be registered at any time.

This module is implemented by HTAccess.c, and it is a part of the Library of Common Code.

The module contains a lot of stuff but the main topics are:

#ifndef HTACCESS_H
#define HTACCESS_H

#include "HTList.h"
#include "HTChunk.h"

Global Flags

Flags and variables which may be set to control the Library

Redirections

The maximum number of redirections is pr. default 10. This prevents the library from going into an infinite loop which is kind of nice :-)
extern int HTMaxRedirections;

Other Flags

extern char * HTClientHost;		/* Name or number of telnetting host */
extern BOOL HTSecure;			/* Disable security holes? */

extern char * HTImServer;		/* If I'm cern_httpd */
extern BOOL HTImProxy;			/* If I'm cern_httpd as a proxy */

Initializing and Terminating the Library

These two functions initiates memory and settings for the Library and cleans up memory kept by the Library when about to exit the application. It is highly recommended that they are used!
extern BOOL HTLibInit NOPARAMS;
extern BOOL HTLibTerminate NOPARAMS;

Method Management

These are the valid methods, see HTTP Methods.

NOTE: the anchor list of allowed methods are not a bitflag, not at list.

typedef enum {
	METHOD_INVALID	= 0x0,
	METHOD_GET	= 0x1,
	METHOD_HEAD	= 0x2,    
	METHOD_POST	= 0x4,    
	METHOD_PUT	= 0x8,    
	METHOD_DELETE	= 0x10,
	METHOD_LINK	= 0x20,
	METHOD_UNLINK	= 0x40
} HTMethod;

Get Method Enumeration

Gives the enumeration value of the method as a function of the (char *) name.
extern HTMethod HTMethod_enum PARAMS((CONST char * name));

Get Method String

The reverse of HTMethod_enum()
extern CONST char * HTMethod_name PARAMS((HTMethod method));

This section might be move to the Access Authentication Module

Match Template Against Filename

/* extern						HTAA_templateMatch()
**		STRING COMPARISON FUNCTION FOR FILE NAMES
**		   WITH ONE WILDCARD * IN THE TEMPLATE
** NOTE:
**	This is essentially the same code as in HTRules.c, but it
**	cannot be used because it is embedded in between other code.
**	(In fact, HTRules.c should use this routine, but then this
**	 routine would have to be more sophisticated... why is life
**	 sometimes so hard...)
**
** ON ENTRY:
**	tmplate		is a template string to match the file name
**			agaist, may contain a single wildcard
**			character * which matches zero or more
**			arbitrary characters.
**	filename	is the filename (or pathname) to be matched
**			agaist the template.
**
** ON EXIT:
**	returns		YES, if filename matches the template.
**			NO, otherwise.
*/
extern BOOL HTAA_templateMatch PARAMS((CONST char * tmplate, 
				       CONST char * filename));

The following have to be defined in advance of the other include files because of circular references.
typedef struct _HTRequest HTRequest;
typedef struct _HTNetInfo HTNetInfo;

/*
** Callback to a protocol module
*/
typedef int (*HTLoadCallBack)	PARAMS((HTRequest *	req));

#include "HTAnchor.h"
#include "HTFormat.h"
#include "HTAAUtil.h"		/* HTAAScheme, HTAAFailReason */
#include "HTAABrow.h"		/* HTAASetup */

General HTTP Header Mask

This mask enables and disables which automaticly generated HTTP headers are to be sent in a request. If the bit is not turned on they are not sent. Extra header can be generated when initializing the ExtraHeaders field. The first set is general headers and the second is request headers.
typedef enum _HeaderEnum {
    HT_DATE		= 0x1,
    HT_MESSAGE_ID	= 0x2,
    HT_MIME		= 0x4,				     /* MIME-Version */

    HT_ACCEPT		= 0x8,
    HT_FROM		= 0x10,
    HT_PRAGMA		= 0x20,
    HT_REFERER		= 0x40,
    HT_USER_AGENT	= 0x80
} HeaderEnum;

#define DEFAULT_HEADERS HT_ACCEPT+HT_REFERER+HT_USER_AGENT

Entity Header Mask

The entity headers contain information about the object sent in the HTTP transaction. See the Anchor module, for the storage of entity headers. This flag defines which headers are to be sent in a request together with an entity body (the O stands for object)
typedef enum _EntityHeaderEnum {
    HT_ALLOW		= 0x1,
    HT_CONTENT_ENCODING	= 0x2,
    HT_CONTENT_LANGUAGE	= 0x4,
    HT_CONTENT_LENGTH	= 0x8,
    HT_CTE		= 0x10,			/* Content-Transfer-Encoding */
    HT_CONTENT_TYPE	= 0x20,
    HT_DERIVED_FROM	= 0x40,
    HT_EXPIRES		= 0x80,
    HT_LAST_MODIFIED	= 0x200,
    HT_LINK		= 0x400,
    HT_TITLE		= 0x800,
    HT_URI		= 0x1000,
    HT_VERSION		= 0x2000
} EntityHeaderEnum;

#define DEFAULT_ENTITY_HEADERS HT_CONTENT_TYPE+HT_CONTENT_LENGTH \
	+HT_DERIVED_FROM+HT_VERSION

Default WWW Addresses

These control the home page selection. To mess with these for normal browses is asking for user confusion.
#define LOGICAL_DEFAULT "WWW_HOME"	      /* Defined to be the home page */

#ifndef PERSONAL_DEFAULT
#define PERSONAL_DEFAULT "WWW/default.html"		/* in home directory */
#endif

/* If the home page isn't found, use this file: */
#ifndef LAST_RESORT
#define LAST_RESORT	"http://www.w3.org/"
#endif

/* If one telnets to an access point it will look in this file for home page */
#ifndef REMOTE_POINTER
#define REMOTE_POINTER  "/etc/www-remote.url"		    /* can't be file */
#endif

/* and if that fails it will use this. */
#ifndef REMOTE_ADDRESS
#define REMOTE_ADDRESS  "http://www.w3.org/remote.html"   /* can't be file */
#endif

/* Default log file name */
#ifndef DEFAULT_LOGFILE
#define DEFAULT_LOGFILE		"www-log"
#endif

#ifndef LOCAL_DEFAULT_FILE
#define LOCAL_DEFAULT_FILE "/usr/local/lib/WWW/default.html"
#endif

/* This is the default cache directory: */
#ifndef HT_CACHE_ROOT
#define HT_CACHE_ROOT		"/tmp"
#endif

/* The default directory for "save locally" and "save and execute" files: */
#ifndef HT_TMP_ROOT
#define HT_TMP_ROOT		"/tmp"
#endif

Protocol Specific Information

This structure contains information about socket number, input buffer for reading from the network etc. The structure is used through out the protocol modules and is the refenrence point for introducing multi threaded execution into the library, see specifications on Multiple Threads.
typedef enum _SocAction {
    SOC_INVALID = -1,
    SOC_WRITE = 0,				/* By default ready to write */
    SOC_READ,
    SOC_INTERRUPT
} SocAction;

struct _HTNetInfo {
    SOCKFD		sockfd;				/* Socket descripter */
    SockA 		sock_addr;		/* SockA is defined in tcp.h */
    HTInputSocket *	isoc;				     /* Input buffer */
    SocAction		action;			/* Result of the select call */
    HTStream *		target;				    /* Target stream */
    int 		addressCount;	     /* Attempts if multi-homed host */
    time_t		connecttime;		 /* Used on multihomed hosts */
    struct _HTRequest *	request;	   /* Link back to request structure */
};
Note: The AddressCount varaible is used to count the number of attempt to connect to a multi-homed host so we know when to stop trying new IP-addresses.

The Request structure

When a request is handled, all kinds of things about it need to be passed along. These are all put into a HTRequest structure. This is the most essential structure in the library. It contains two main categories of information regarding a request: Applications using the Library should never use the internal library dependent information. It's only because we dont have real classes that we can't hide it.

Note: If you reuse the request structure for more than one request then make sure that the request is re-initialized, so that no `old' data is reused, see functions to manipulate HTRequest Structure. The library handles its own internal information from request to request but the information set by the caller is untouched.

The elements of the request structure are as follows:

struct _HTRequest {

Application Dependent - Set by the caller of HTAccess

    HTMethod	method;
An enum used to specify the HTTP method used for the actual request. The default value is GET.

HTTP Header Information

    HTList * 	conversions;
NULL, or a local list of specific conversions which the format manager can do in order to fulfill the request. It typically points to a list set up on initialisation time for example by HTInit(). There is also a global list of conversions which contains a generic set of possible conversions.
    HTList *	encodings;
The list of encodings acceptable in the output stream.
    HTList *	languages;
The list of (human) language values acceptable in the response. The default is all languages.
    HeaderEnum		HeaderMask;
    EntityHeaderEnum	EntityMask;
These two masks defines which headers to include in a HTTP request (or any other MIME-like protocol). the first mask defines what autogenerated HTTP header fields should be sent in the HTTP request. The default value is DEFAULT_HEADERS. The second defines what entity headers should be sent along with an entity (using PUT or POST).
    HTParentAnchor *parentAnchor;
If this parameter is set then a `Referer: <parent address> can be generated in the request to the server, see Referer field in a HTTP Request
   char * ExtraHeaders;
Extra header information can be send along with a request using this variable. The text is sent as is so it must be preformatted with <CRLF> line terminators.

Streams From Network to Application

    HTStream *	output_stream; 
The output stream is to be used to put data down to as they come in from the network and back to the application. The default value is NULL which means that the stream goes to the user (display).
    HTAtom * 	output_format;
The desired format of the output stream. This can be used to get unconverted data etc. from the library. If NULL, then WWW_PRESENT is default value.
    HTStream*	error_stream;
All object bodies sent from the server with status codes different from 200 OK will be put down this stream. This can be used as a debug window etc. If the value is NULL (default) then the stream used is HTBlackHole.
    HTAtom * 	error_format;
The desired format of the error stream. This can be used to get unconverted data etc. from the library. The default value if WWW_HTML as a character based only has one WWW_PRESENT.

Streams From Application to Network

    HTStream *	input_stream; 
The input stream is to be used by the PostCallBack function to put data out on the network. The user should not initialize this field.
    HTAtom * 	input_format;
The desired format of the output stream. This can be used to upload converted data to a remote server. If NULL, then WWW_SOURCE is default value.
    int (*PostCallBack)		PARAMS((struct _HTRequest *	request,
					HTStream *		target));
The call back function which is called when the current request is ready for sending (posting) the data object. The request is the current request so that the application knows which post we are handling. The function must have the same return values as the other Load functions.

Other Flags

    BOOL BlockingIO;
    BOOL ForceReload;
This flag can be set to override if a protocol module is registered as using non-blocking IO.
    BOOL (* callback ) PARAMS((struct _HTRequest* request,
						void *param));
A function to be called back in the event that a file has been saved to disk by HTSaveAndCallBack for example.
    void *	context;
An arbitrary pointer passed to HTAccess and passed back as a parameter to the callback.

Library Dependent - Set by Library

None of the bits below may be looked at by a WWW application. The Library handles the cleanup by itself.
    HTParentAnchor*	anchor;
The anchor for the object in question. Set immediately by HTAcesss. Used by the protocol and parsing modules. Valid thoughout the access.
    HTChildAnchor *	childAnchor;	/* For element within the object  */
The anchor for the sub object if any. The object builder should ensure that is is selected, highlighted, etc when the object is loaded.
    struct _HTRequest *	CopyRequest;
We need to know if we have a remote request sending data along with this request.
    void *	using_cache;
    BOOL	using_proxy;
Pointer to cache element if cache hit anfd if using proxy
    BOOL	error_block;		/* YES if stream has been used	  */
    HTList *	error_stack;		/* List of errors		  */
These two fields are used by the error reporting system to keep a stack of messages.
    HTNetInfo *	net_info;		/* Information about socket etc. */
    int		redirections;		/* Number of redirections */
    time_t	retry_after;		/* Absolut time for a retry */
Protocol specific information, socket number etc.
    char *	redirect;		/* Location or URI */
    char *	WWWAAScheme;		/* WWW-Authenticate scheme */
    char *	WWWAARealm;		/* WWW-Authenticate realm */
    char *	WWWprotection;		/* WWW-Protection-Template */
Information taken from the MIME header specifically oriented towards the request (not the object itself)
    char *	authorization;		/* Authorization: field		  */
    HTAAScheme	scheme;			/* Authentication scheme used	  */
    HTInputSocket *	isoc;		/* InputSocket object for reading */
#if 0
    HTAtom *	content_type;		/* Content-Type:		  */
    HTAtom *	content_language;	/* Language			  */
    HTAtom *	content_encoding;	/* Encoding			  */
    int		content_length;		/* Content-Length:		  */
#endif
These header fields are only used by the server and will be removed at some point.
    HTList *	valid_schemes;		/* Valid auth.schemes		  */
    HTAssocList **	scheme_specifics;/* Scheme-specific parameters	  */
    char *	authenticate;		/* WWW-authenticate: field */
    char *	prot_template;		/* WWW-Protection-Template: field */
    HTAASetup *	setup;			/* Doc protection info		  */
    HTAARealm *	realm;			/* Password realm		  */
    char *	dialog_msg;		/* Authentication prompt (client) */
These fields are used by the HTTP access authentication used by a client application.

Windows Specific Information

#ifdef _WINDOWS 
	HWND		hwnd;		/* Windows handle for MSWindows   */
	unsigned long   winMsg;         /* msg number of Windows eloop    */
#endif /* _WINDOWS */
};

Functions to Manipulate a HTRequest Structure

Just to make things easier especially for clients, here are some functions to manipulate the request structure:

Create blank request

This request has defaults in -- in most cases it will need some information added before being passed to HTAccess, but it will work as is for a simple request.
extern HTRequest * HTRequest_new NOPARAMS;

Delete request structure

Frees also conversion list hanging from req->conversions.
extern void HTRequest_delete PARAMS((HTRequest * req));

Clear a request structure

Clears a request structure so that it can be reused. The only thing that differs from using free/new is that the list of conversions is kept.

NOTE: It is NOT recommended to reuse a request structure!!!

extern void HTRequest_clear PARAMS((HTRequest * req));

Functions for Loading a Document

There are several different ways of loading a document. However, the major difference between them is whether the document is referenced by NOTE: From release 3.0 of the Library, the return codes from the loading functions are no mode BOOL, that is YES or NO. Insted they have been replaced with the following set of return codes defined in the Utility module:
HT_WOULD_BLOCK
An I/O operation would block
HT_ERROR
Error has occured
HT_LOADED
Success
HT_NO_DATA
Success, but no document loaded. This might be the situation when a telnet sesssion is started etc.
HT_RETRY
The remote server is down but will serve documents from the calendar time indicated in HTRequest->retry_after.
However, a general rule about the return codes is that ERRORS have a negative value whereas SUCCESS has a positive value.

There are also some functions to help the client getting started with the first URI.

Load a document from relative URL

extern int HTLoadRelative	PARAMS((CONST char * 	relative_name,
					HTParentAnchor*	here,
					HTRequest *	request));

Load a document from absolute URL

extern int HTLoadAbsolute	PARAMS((CONST char *	addr,
					HTRequest *	request));

Load a document from absolute name to a stream

extern int HTLoadToStream	PARAMS((CONST char * 	addr,
					BOOL 		filter,
					HTRequest *	request));

Load a document from anchor

The anchor parameter may be a child anchor. The anchor in the request is set to the parent anchor. The recursive function keeps the error stack in the request structure so that no information is lost having more than one call. See also HTBindAnchor().
extern int HTLoadAnchor		PARAMS((HTAnchor  * 	a,
					HTRequest *	request));
extern int HTLoadAnchorRecursive PARAMS((HTAnchor *	a,
					HTRequest *	request));

Load a Document

These are two internal routines for loading a document which has an address AND a matching anchor. (The public routines are called with one OR the other.) This is recursively called from file load module to try ftp (though this will be obsolete in the next major release).

If keep_error_stack is YES then the error (or info) stack is not cleared from the previous call.

extern int HTLoad		PARAMS((HTRequest * request,
					BOOL keep_error_stack));
extern BOOL HTLoadTerminate	PARAMS((HTRequest * request, int status));

Search Using Relative URL

Performs a search on word given by the user. Adds the search words to the end of the current address and attempts to open the new address.
extern int HTSearch		PARAMS((CONST char *	keywords,
					HTParentAnchor*	here,
					HTRequest *	request));

Search using Absolute URL

Performs a keyword search on word given by the user. Adds the keyword to the end of the current address and attempts to open the new address.
extern int HTSearchAbsolute	PARAMS((CONST char *	keywords,
					CONST char * 	indexname,
					HTRequest *	request));

Help Function for Clients to get started

These function helps the client to load the first document. They are not mandatory to use - but they make life easier!

Bind an anchor to a request structure without loading

extern BOOL HTBindAnchor PARAMS((HTAnchor *anchor, HTRequest *request));

Generate the Anchor for the Home Page

As it involves file access, this should only be done once when the program first runs. This is a default algorithm using the WWW_HOME environment variable.
extern HTParentAnchor * HTHomeAnchor NOPARAMS;

Find Related Name

Creates a local file URI that can be used as a relative name when calling HTParse() to expand a relative file name to an absolute one.

The code for this routine originates from the Line Mode Browser and was moved here by howcome@dxcern.cern.ch in order for all clients to take advantage.

extern char *  HTFindRelatedName NOPARAMS;

Functions for Posting a Document

NOTE: The Posting functions are used to send a data object along with the request. The functions have the same set of return codes as for the Load Functions.

Get a Save Stream

On Entry,

request->anchor
is valid anchor which has previously beeing loaded

On exit,

returns
0 if error else a stream to save the object to.
extern HTStream * HTSaveStream	PARAMS((HTRequest * request));

Copy an Anchor

Fetch the URL (possibly local file URL) and send it using either PUT or POST directly to the remote destination using HTTP, that is remote copy of object O from A to B where A might be the host of the application. The caller can decide the exact method used and which HTTP header fields to transmit by setting the user fields in the destination request structure.
extern int HTCopyAnchor		PARAMS((HTAnchor *	src_anchor,
					HTRequest *	src_req,
					HTParentAnchor *dest_anchor,
					HTRequest *	dest_req));

Upload an Anchor

Send the contents (in hyperdoc) of the source anchor using either PUT or POST to the remote destination using HTTP. The caller can decide the exact method used and which HTTP header fields to transmit by setting the user fields in the request structure. Format conversion can be made on the fly by setting the input_format field in the destination request structure. If the content-length is unknown (-1) then a content-length counter is automaticly put into the stream pipe.
extern int HTUploadAnchor	PARAMS((HTAnchor *	src_anchor,
					HTParentAnchor *dest_anchor,
					HTRequest *	dest_req));

Access Method Registration

An access method is defined by an HTProtocol structure which point to the routines for performing the various logical operations on an object: in HTTP terms, GET, PUT, and POST. The access methods supported in the Library are initiated automaticly using the private function HTAccessInit() if not defined HT_NO_INIT

Each of these routine takes as a parameter a request structure containing details of the request. When the protocol class routine is called, the anchor element in the request is already valid (made valid by HTAccess).

typedef enum _HTSocBlock {
    SOC_BLOCK,
    SOC_NON_BLOCK
} HTSocBlock;

typedef struct _HTProtocol {
    char *	name;
    HTSocBlock	block;	
    int		(*load)		PARAMS((HTRequest *	request));
    HTStream*	(*saveStream)	PARAMS((HTRequest *	request));
    HTStream* 	(*postStream)	PARAMS((HTRequest *	request,
					HTParentAnchor* postTo));
} HTProtocol;

extern BOOL HTRegisterProtocol PARAMS((HTProtocol * protocol));
extern void HTDisposeProtocols NOPARAMS;

Uses Protocol Blocking IO

A small function to make life easier. Returns YES or NO. If the Library is run in NON-INTERACTIVE MODE then the function always returns YES;
extern BOOL HTProtocolBlocking	PARAMS((HTRequest *	request));
end
#endif /* HTACCESS_H */
end of HTAccess