/* Copyright (c) 2003, WebThing Ltd Author: Nick Kew This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ //#define apr_socket_send apr_send //#define apr_socket_connect apr_connect //#define apr_socket_recv apr_recv #ifndef HTTPCLIENT #define HTTPCLIENT #define ApacheHTTP HTTPClient #include #include #if 1 #include #include #include #else #include #endif #include //#define BUFLEN 4096 #define BUFLEN 8192 #define TIMEOUT 3000000 class HTTPClient { apr_pool_t* pool ; const char* args ; apr_socket_t* sock ; char* ctype ; char* enc ; size_t bytes ; int code ; bool is_open ; char buf[BUFLEN] ; apr_table_t* resp_headers ; size_t offs ; size_t count ; size_t clen ; size_t redirects ; typedef enum { CONTENT_OK, REDIRECT_OK, ERROR_OK, BAD_HTTP } resp_type ; resp_type interpret_headers() { if ( ( code >= 300 ) && ( code < 400 ) ) { const char* loc = header("Location") ; if ( loc && *loc ) return REDIRECT_OK ; } const char* len = header("Content-Length") ; if ( len ) for ( const char* p = len; isdigit(*p); ++p ) clen = 10 * clen + (*p - '0') ; const char* tp = header("Content-Type") ; if ( tp ) { char* sep = strchr(tp, ';') ; ctype = apr_pstrndup(pool, tp, (sep-tp)) ; regex_t* encrx = ap_pregcomp(pool, "charset[ \t\r\n]*=[ \t\r\n]*[\"']?([A-Za-z0-9_-]+)", REG_ICASE|REG_EXTENDED) ; regmatch_t match[2] ; if ( ap_regexec(encrx, tp, 2, match, 0) == 0 ) enc = apr_pstrndup(pool, tp+match[1].rm_so, match[1].rm_eo - match[1].rm_so) ; ap_pregfree(pool, encrx) ; if ( ( code >= 200 ) && ( code < 300 ) ) return CONTENT_OK ; else return ERROR_OK ; } else { return BAD_HTTP ; } } bool parse_status(BasicWriter& out) { unsigned int i; code = 0 ; char* p = buf ; enum { BEFORE, HTTP_VERSION, SP1, STATUS, SP2, REASON } state = BEFORE ; for ( offs = i = 0 ; ( i < bytes) && (offs == 0) ; ++i, ++p) switch ( state ) { case BEFORE: if ( ! isspace(*p) ) state = HTTP_VERSION ; break ; case HTTP_VERSION: if ( isspace(*p) ) state = SP1 ; break ; case SP1: if ( isdigit(*p) ) { code = (*p - '0') ; state = STATUS ; } break ; case STATUS: if ( isdigit(*p) ) code = 10*code + (*p - '0') ; else state = SP2 ; break ; case SP2: if ( !isspace(*p) ) state = REASON ; //fallthrough case REASON: if ( ( *p == '\r' ) && ( *(p+1) == '\n' ) ) offs = (i + 2) ; else if ( *p == '\n' ) { out.puts("Malformed HTTP response (your server's line-endings are not compatible with HTTP). I'll try error-correction.") ; offs = (i + 1 ) ; } break ; } if ( ( offs <= 0 ) || (offs >= bytes ) ) return false ; else return true ; } bool parse_header() { char* key = 0 ; char* val ; if ( resp_headers ) apr_table_clear(resp_headers) ; // explicit clear to deal with redirects else resp_headers = apr_table_make(pool, 12) ; for ( char* p = buf+offs; offs < bytes ; offs = (p - buf) ) { char* eol = strpbrk(p, "\r\n") ; char* colon ; if ( eol == p ) { // end of headers p = eol + ((*eol == '\r') ? 2 : 1) ; offs = ( p - buf ) ; break ; } if ( colon = strchr(p, ':') , colon) { key = apr_pstrndup(pool, p, (colon - p) ) ; for (p = colon+1; isspace(*p); ++p) ; val = apr_pstrndup(pool, p, (eol - p) ) ; apr_table_set(resp_headers, key, val) ; } else if ( key ) { val = apr_pstrndup(pool, p, (eol - p) ) ; apr_table_merge(resp_headers, key, val) ; } p = eol+ ((*eol == '\r') ? 2 : 1) ; } if ( offs > bytes ) return false ; else return true ; } const HTTPClient& send(const char* buf) const { apr_size_t len = strlen(buf) ; apr_socket_send(sock, buf, &len) ; return *this ; } void send_hdr(const char* hdr) const { char* val = getArg(pool, args, hdr) ; if ( val && strlen(val) ) { ap_unescape_url(val) ; send(hdr).send(": ").send(val).send("\r\n") ; } } void send_request(apr_uri_t& uri, BasicWriter& out) const { //const char* method = "GET" ; //send(method).send(" ").send(uri.path) ; send("GET ").send(uri.path?uri.path:"/") ; if ( uri.query ) send("?").send(uri.query) ; send(" HTTP/1.0\r\n" "Connection: Close\r\n" "User-Agent: Page Valet/4.1pre5\r\n" "Host: ").send(uri.hostname).send("\r\n") ; send_hdr("Accept") ; send_hdr("Accept-Charset") ; send_hdr("Accept-Language") ; send_hdr("Cookie") ; const char* name = getArg(pool, args, "username") ; const char* pass = getArg(pool, args, "password") ; if ( name && pass ) { char* buf = apr_pstrcat(pool, name, ":", pass, NULL) ; // char* dst = 0 ; if ( strlen(buf) < 64 ) { char dst[128] ; apr_base64_encode(dst, buf, strlen(buf)) ; send("Authorization: Basic ").send(dst).send("\r\n") ; } else out.puts("Username or password too long - ignored.") ; } send("\r\n") ; } bool open1(const char* url, BasicWriter& out) { regex_t* http_url = ap_pregcomp(pool, "http://[A-Za-z\\..-_]+", REG_ICASE|REG_NOMATCH) ; if ( ap_regexec(http_url, url, 0, 0, 0) != 0 ) { out.puts("Bad URL: ").escape(url).puts(" (only HTTP supported)\n") ; ap_pregfree(pool, http_url) ; return false ; } ap_pregfree(pool, http_url) ; apr_uri_t uri ; apr_sockaddr_t* sa = 0 ; if ( apr_uri_parse(pool, url, &uri) != APR_SUCCESS ) { out.puts("Bad URL: ").escape(url).puts("\n") ; return 0 ; } if ( ! uri.port ) uri.port = 80 ; //const char* req = req_headers(uri) ; if ( apr_socket_create(&sock, PF_INET, SOCK_STREAM, pool) != APR_SUCCESS ) return false ; if ( apr_socket_timeout_set(sock, TIMEOUT) != APR_SUCCESS ) return false ; if ( apr_sockaddr_info_get(&sa, uri.hostname, APR_UNSPEC, uri.port, APR_IPV4_ADDR_OK, pool) != APR_SUCCESS ) return false ; int err ; if ( err = apr_socket_connect(sock, sa), err != APR_SUCCESS ) { out.puts("Error ").puti(err).puts(" in connect\n") ; return false ; } send_request(uri, out) ; /* apr_size_t len = strlen(req) ; if ( apr_socket_send(sock, req, &len ) != APR_SUCCESS ) return false ; */ #ifdef SIMPLE_READ bytes = BUFLEN ; if ( apr_socket_recv( sock, buf, &bytes ) != APR_SUCCESS ) { out.puts("Can't talk to ").escape(url).puts("\n") ; return false ; } #else bytes = 0 ; apr_status_t recv_status ; do { size_t to_read = BUFLEN - bytes ; recv_status = apr_socket_recv( sock, buf + bytes, &to_read ) ; bytes += to_read ; } while ( ( recv_status == APR_SUCCESS ) && ( bytes < BUFLEN ) ) ; if ( APR_STATUS_IS_TIMEUP(recv_status) ) { out.puts("Timeout reading response from ").escape(url) .puts("") ; } #endif if ( ! parse_status(out) || ! parse_header() ) { out.puts("Bad response from ").escape(url).puts("\n") ; return false ; } return true ; } #if 0 bool parseHead(void* http, char* tp) { int noenc = 1 ; char* sep = strchr(tp, ';') ; ctype = apr_pstrndup(pool, tp, (sep-tp)) ; if ( sep ) { regex_t* encrx = ap_pregcomp(pool, "charset[ \t\r\n]*=[ \t\r\n]*[\"']?([A-Za-z0-9_-]+)", REG_ICASE|REG_EXTENDED) ; regmatch_t match[2] ; if ( noenc = ap_regexec(encrx, sep, 2, match, 0) , !noenc ) set_encoding(apr_pstrndup(pool, sep+match[1].rm_so, match[1].rm_eo - match[1].rm_so) ) ; ap_pregfree(pool, encrx) ; } code = xmlNanoHTTPReturnCode(http) ; return !noenc ; } #endif public: HTTPClient(request_rec* r) : pool(r->pool), args(r->args) , sock(0), ctype(0), enc(0), bytes(0), is_open(true) , resp_headers(0), offs(0) , count(0), clen(0), redirects(0) { } ~HTTPClient() { close() ; } void close() { if ( is_open ) { #if 0 if ( http ) xmlNanoHTTPClose(http) ; #endif if ( sock ) apr_socket_close(sock) ; is_open = false ; } } bool open(const char* url, BasicWriter& w) { if ( ++redirects >= 4 ) { w.puts("Too many redirects - bailing out") ; close() ; return false ; } if ( ! open1(url, w) ) { w.puts("Error accessing ").escape(url) .puts(" - aborting.\n") ; close() ; return false ; } switch ( interpret_headers() ) { char* newurl ; case ERROR_OK: w.puts("HTTP Error ").puti(code) .puts(" - validating error document") ; // fallthrough case CONTENT_OK: w.puts("").escape(url).puts("") ; return true ; case REDIRECT_OK: newurl = apr_pstrdup(pool, header("Location") ) ; w.puts("").escape(url).puts(" redirected us to ") .escape(newurl).puts("") ; return open(newurl, w) ; default: return false ; } return is_open ; } size_t read(char** b) { if ( ( offs <= 0 ) || ( offs >= bytes ) ) { bytes = BUFLEN ; if ( ( clen > 0 ) && ( clen - count < BUFLEN ) ) bytes = clen - count ; apr_status_t s = apr_socket_recv( sock, buf, &bytes) ; if ( APR_STATUS_IS_EOF( s ) || (bytes == 0) ) close() ; *b = buf ; } else { bytes -= offs ; *b = buf + offs ; //memmove(buf, buf+offs, bytes) ; } count += bytes ; offs = 0 ; /* if ( ! bytes ) close() ; */ return bytes ; #if 0 if (!is_open) return 0 ; //bytes = xmlNanoHTTPRead(http, buf, BUFLEN) ; *b = buf ; if ( ! bytes ) close() ; return bytes ; #endif } const bool isopen() const { return is_open ; } const char* encoding() const { return enc ; } const char* content_type() const { return ctype ; } const size_t content_length() const { return clen ; } const size_t length() const { return count ; } const char* header(const char* key) const { return apr_table_get(resp_headers, key) ; } const int status() const { return code ; } void headers(apr_table_do_callback_fn_t fn, BasicWriter& out) const { out.puts("") ; apr_table_do ( fn, (void*) &out, resp_headers, 0 ) ; out.puts("") ; } /* should move this to OpenSP? */ #if 0 void set_encoding(char* x) { enc = x ; putenv("SP_CHARSET_FIXED=1") ; putenv(apr_pstrcat(pool, "SP_ENCODING=", enc, NULL)) ; } #define SUPPORTED_ENCODINGS "ascii,us-ascii,utf-8,utf-16,ucs-2,iso-10646-ucs-2,ucs-4,iso-10646-ucs-4,utf-32,unicode,euc-jp,euc-kr,euc-cn,cn-gb,gb2312,sjis,shift_jis,big5,cn-big5,iso-8859-1,iso-8859-2,iso-8859-3,iso-8859-4,iso-8859-5,iso-8859-6,iso-8859-7,iso-8859-8,iso-8859-9,iso-8859-15,koi8-r,koi8,xml" const bool supported_encoding() const { if ( ! enc || strlen(enc) < 3 ) return false ; for ( char* x = enc; *x; ++x ) if ( isupper(*x) ) *x = tolower(*x) ; if ( ! strstr(SUPPORTED_ENCODINGS, enc) ) return false ; else return true ; } #endif } ; #endif