/* Copyright (c) 2003, WebThing Ltd Author: Nick Kew This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* mod_xmlns Adds XML Namespace processing to the Apache Webserver http://apache.webthing.com/mod_xmlns/ This module doesn't directly do anything. It's a harness for plugging in XMLNS processors. A processor for XHTML ( xmlns="http://www.w3.org/1999/xhtml") is bundled, and others are available from WebThing - see the webpage for details. You can also develop new processors to handle other namespaces or provide alternative behaviour - see below. */ /* Note to Users You are requested to register as a user, at http://apache.webthing.com/registration.html This entitles you to support from the developer. I'm unlikely to reply to help/support requests from non-registered users, unless you're paying and/or offering constructive feedback such as bug reports or sensible suggestions for further development. It also makes a small contribution to the effort that's gone into developing this work. */ /* Note to Developers and Distributors You are encouraged to develop namespace processors to work with this module. Please let me know if you do! A good starting-point would be to look at the simple processor for XHTML bundled with this module. */ /* Portability I'm not sure how portable this is. It should work cleanly on Unix/Linux systems. Since it uses POSIX regexps directly (not through APR), it may not compile on non-posix-compliant system. Hacking it to be fully portable should be very straightforward, */ /* This is one of the SAX Parser family of output filter modules from http://apache.webthing.com/ SAX Filter modules for XML (and XHTML): mod_xmlns: Free, pre-stable Simple harness for XML Namespace processing mod_publisher_xml: Work in progress Harness for XML Namespace processing, DTD and Entity manipulation, tight control of outgoing markup, templated publishing. SAX Filter modules for HTML (and XHTML): mod_accessibility: Commercial HTML cleanup and correction, data discovery, user empowerment and presentation options. mod_publisher_html: Work in progress Extensive HTML manipulation: templating, substitutions, variables, SSI. mod_proxy_html: Free, stable Rewriting of HTML links, specifically for use in a reverse proxy. Other markup processing modules for Apache from WebThing: XSLT output filter (free, stable) SAX and DOM APIs (free, experimental) XMTP and XMLRPC filter (free, experimental) XML and HTML Validation (free, stable) HTML Accessibility Analysis (commercial) */ #include /* apache */ #include #include #include #include #include #include #include module AP_MODULE_DECLARE_DATA xmlns_module ; const char* NULLPREFIX = "" ; #define PREFIX (prefix?prefix:NULLPREFIX) #include "mod_xmlns.h" typedef struct xmlns_svr { xmlns* namespaces ; } xmlns_svr ; typedef struct nslist { struct nslist* next ; const XML_Char* prefix ; const XML_Char* uri ; } nslist ; static void* cr_svr_cfg(apr_pool_t* pool, server_rec* x) { return apr_pcalloc(pool, sizeof(xmlns_svr) ) ; } /* Namespace-related stuff: config and callbacks */ static const char* load_namespace(cmd_parms* cmd, void* cfg, const char* symname, const char* filename) { xmlns_svr* svr = ap_get_module_config( cmd->server->module_config, &xmlns_module) ; apr_dso_handle_t* dlhandle = 0; apr_dso_handle_sym_t symbol ; if ( apr_dso_load(&dlhandle, filename, cmd->pool) != APR_SUCCESS ) { char* buf = apr_palloc(cmd->pool, 256) ; return apr_dso_error(dlhandle, buf, 256) ; } if ( apr_dso_sym(&symbol, dlhandle, symname) != APR_SUCCESS ) { char* buf = apr_palloc(cmd->pool, 256) ; apr_dso_error(dlhandle, buf, 256) ; apr_dso_unload(dlhandle) ; return buf ; } ((xmlns*)symbol)->next = svr->namespaces ; svr->namespaces = (xmlns*) symbol ; apr_pool_cleanup_register(cmd->pool, dlhandle, (void*)apr_dso_unload, apr_pool_cleanup_null) ; return NULL ; } int parsename(const XML_Char* name, parsedname* p) { char* sp = strchr(name, ' ') ; int nparts ; if ( sp ) { p->ns = name ; p->nslen = ( sp - name ) ; p->elt = name + p->nslen + 1 ; sp = strchr(p->elt, ' ') ; if ( sp ) { p->eltlen = ( sp - p->elt ) ; p->prefix = p->elt + p->eltlen + 1 ; p->prefixlen = strlen(p->prefix) ; nparts = 3 ; } else { p->eltlen = strlen(p->elt) ; p->prefix = (void*) ( p->prefixlen = 0 ) ; nparts = 2 ; } } else { p->elt = name ; p->eltlen = strlen(name) ; p->prefix = p->ns = (void*) ( p->prefixlen = p->nslen = 0 ) ; nparts = 1 ; } return nparts ; } void xstartElement(void* ctx, const XML_Char* name, const XML_Char** atts) { // PUTS(name) ; parsedname name3 ; int nparts = parsename(name, &name3) ; apr_hash_index_t* index ; if ( nparts > 1 ) { /* see if we have an alternative handler registered for namespace */ xmlns_active* p = apr_hash_get(CTX->activens, name3.prefix?name3.prefix:NULLPREFIX, APR_HASH_KEY_STRING) ; if ( p && p->ns && p->ns->StartElement && p->ns->StartElement(ctx, &name3, atts) ) return ; } /* Default: either no handler, or it returned 0 */ ap_fputc(F, BB, '<') ; if ( nparts == 3 ) { ap_fwrite(F, BB, name3.prefix, name3.prefixlen) ; ap_fputc(F, BB, ':') ; } ap_fwrite(F, BB, name3.elt, name3.eltlen) ; for ( index = apr_hash_first(CTX->f->r->pool, CTX->activens) ; index ; index = apr_hash_next(index) ) { const void* prefix ; void* rec ; apr_ssize_t len ; apr_hash_this(index, &prefix, &len, &rec) ; if ( ! ((xmlns_active*)rec)->printed ) { if ( prefix && strlen((const char*)prefix) ) if ( ((xmlns_active*)rec)->ns ) ap_fputstrs(F, BB, " xmlns:", (const char*)prefix , "=\"", ((xmlns_active*)rec)->ns->xmlns, "\"", NULL) ; else { ap_fputstrs(F, BB, " xmlns:", (const char*)prefix , "=\"", NULL) ; ap_fwrite(F, BB, name3.ns, name3.nslen) ; ap_fputc(F, BB, '"') ; } else if ( ((xmlns_active*)rec)->ns ) ap_fputstrs(F, BB, " xmlns=\"", ((xmlns_active*)rec)->ns->xmlns, "\"", NULL) ; else { ap_fputs(F, BB, " xmlns=\"") ; ap_fwrite(F, BB, name3.ns, name3.nslen) ; ap_fputc(F, BB, '"') ; } ((xmlns_active*)rec)->printed = 1 ; } } if ( atts ) { const XML_Char** a ; for ( a = atts ; *a ; a += 2 ) { ap_fputstrs(F, BB, " ", a[0], "=\"", a[1], "\"", NULL) ; } } ap_fputc(F, BB, '>') ; } void xendElement(void* ctx, const XML_Char* name) { parsedname name3 ; int nparts = parsename(name, &name3) ; if ( nparts > 1 ) { /* see if we have an alternative handler registered for namespace */ xmlns_active* p = apr_hash_get(CTX->activens, name3.prefix?name3.prefix:NULLPREFIX, APR_HASH_KEY_STRING) ; if ( p && p->ns && p->ns->EndElement && p->ns->EndElement(ctx, &name3) ) return ; } ap_fputs(F, BB, "') ; } static void xstartNamespaceDecl(void* ctx, const XML_Char *prefix, const XML_Char *uri) { xmlns* ns ; xmlns_active* rec = apr_pcalloc(CTX->f->r->pool, sizeof(xmlns_active)) ; rec->prev = apr_hash_get(CTX->activens, PREFIX, APR_HASH_KEY_STRING) ; apr_hash_set(CTX->activens, PREFIX, APR_HASH_KEY_STRING, rec) ; for ( ns = CTX->svr->namespaces ; ns ; ns = ns->next ) if ( ! strcmp(ns->xmlns, uri ) ) { rec->ns = ns ; if ( ns->StartNamespace ) { ns->StartNamespace(ctx, rec) ; } break ; } } static void xendNamespaceDecl(void* ctx, const XML_Char *prefix) { xmlns_active* p = apr_hash_get(CTX->activens, PREFIX, APR_HASH_KEY_STRING) ; if ( p ) { if ( p->ns && p->ns->EndNamespace ) p->ns->EndNamespace(ctx, p) ; apr_hash_set(CTX->activens, PREFIX, APR_HASH_KEY_STRING, p->prev) ; } } void xdefault(void* ctx, const XML_Char* buf, int len) { ap_fwrite(F, BB, buf, len) ; /* escape not required */ } #define CALLBACK(fn,handler) if ( handler ) fn ( parser , handler ) XML_Parser attachCallbacks(XML_Parser parser ) { /* Option for default to complain ? */ CALLBACK(XML_SetDefaultHandler, xdefault) ; /* LoadNamespace mech for replacing these */ CALLBACK(XML_SetStartElementHandler, xstartElement) ; CALLBACK(XML_SetEndElementHandler, xendElement) ; CALLBACK(XML_SetStartNamespaceDeclHandler, xstartNamespaceDecl) ; CALLBACK(XML_SetEndNamespaceDeclHandler, xendNamespaceDecl) ; return parser ; } char* ctype2encoding(apr_pool_t* pool, const char* in) { char* x ; char* ptr ; char* ctype ; if ( ! in ) return 0 ; ctype = strdup(in) ; for ( ptr = ctype ; *ptr; ++ptr) if ( isupper(*ptr) ) *ptr = tolower(*ptr) ; if ( ptr = strstr(ctype, "charset=") , ptr > 0 ) { ptr += 8 ; // jump over "charset=" and chop anything that follows charset if ( x = strpbrk(ptr, " ;") , x != NULL ) *x = 0 ; } x = ptr ? apr_pstrdup(pool, ptr) : 0 ; free (ctype ) ; return x ; } static int xmlns_filter_init(ap_filter_t* f) { saxctxt* fctx ; char* enc = ctype2encoding(f->r->pool, f->r->content_type) ; /* remove content-length filter */ ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ; ap_filter_t* ff = f->next ; do { ap_filter_t* fnext = ff->next ; if ( ff->frec == clf ) ap_remove_output_filter(ff) ; ff = fnext ; } while ( ff ) ; fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ; fctx->svr = ap_get_module_config(f->r->server->module_config, &xmlns_module) ; fctx->f = f ; fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; fctx->activens = apr_hash_make(f->r->pool) ; /* chunked encoding enables HTTP keepalive */ if ( f->r->proto_num >= 1001 ) { if ( ! f->r->main && ! f->r->prev ) f->r->chunked = 1 ; } fctx->parser = XML_ParserCreateNS(enc, ' ') ; attachCallbacks(fctx->parser) ; XML_SetReturnNSTriplet(fctx->parser, 1) ; XML_SetUserData(fctx->parser, fctx) ; return OK ; } static int xmlns_filter(ap_filter_t* f, apr_bucket_brigade* bb) { apr_bucket* b ; const char* buf = 0 ; apr_size_t bytes = 0 ; saxctxt* ctxt = (saxctxt*)f->ctx ; if ( ! ctxt ) return ap_pass_brigade(f->next, bb) ; for ( b = APR_BRIGADE_FIRST(bb) ; b != APR_BRIGADE_SENTINEL(bb) ; b = APR_BUCKET_NEXT(b) ) { if ( APR_BUCKET_IS_EOS(b) ) { if ( XML_Parse(ctxt->parser, buf, 0, 1) != XML_STATUS_OK ) { enum XML_Error err = XML_GetErrorCode(ctxt->parser) ; const XML_LChar* msg = XML_ErrorString(err) ; ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Endparse Error %d: %s", err, msg) ; } APR_BRIGADE_INSERT_TAIL(ctxt->bb, apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ; ap_pass_brigade(ctxt->f->next, ctxt->bb) ; XML_ParserFree(ctxt->parser) ; } else if ( APR_BUCKET_IS_FLUSH(b) ) { APR_BRIGADE_INSERT_TAIL(ctxt->bb, apr_bucket_flush_create(ctxt->bb->bucket_alloc) ) ; } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS ) { if ( XML_Parse(ctxt->parser, buf, bytes, 0) != XML_STATUS_OK ) { enum XML_Error err = XML_GetErrorCode(ctxt->parser) ; const XML_LChar* msg = XML_ErrorString(err) ; ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Parse Error %d: %s", err, msg) ; } } else { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ; } } apr_brigade_destroy(bb) ; return APR_SUCCESS ; } static const command_rec xmlns_cmds[] = { AP_INIT_TAKE2("LoadNamespace", load_namespace, NULL, RSRC_CONF, NULL) , { NULL } } ; static void xmlns_hooks(apr_pool_t* p) { ap_register_output_filter("xmlns", xmlns_filter, xmlns_filter_init, AP_FTYPE_RESOURCE) ; } module AP_MODULE_DECLARE_DATA xmlns_module = { STANDARD20_MODULE_STUFF, NULL, NULL, cr_svr_cfg, NULL, xmlns_cmds, xmlns_hooks } ;