diff --git a/xcap/authentication.py b/xcap/authentication.py index 413f793..e48ec8e 100644 --- a/xcap/authentication.py +++ b/xcap/authentication.py @@ -1,366 +1,366 @@ """XCAP authentication module""" # XXX this module should be either renamed or refactored as it does more then just auth. from hashlib import md5 from zope.interface import Interface, implements from twisted.internet import defer from twisted.python import failure from twisted.cred import credentials, portal, checkers, error as credError -from twisted.web2 import http, server, stream, responsecode, http_headers -from twisted.web2.auth.wrapper import HTTPAuthResource, UnauthorizedResponse from application.configuration.datatypes import NetworkRangeList from application.configuration import ConfigSection, ConfigSetting import struct import socket import urlparse import xcap from xcap.datatypes import XCAPRootURI from xcap.appusage import getApplicationForURI, namespaces, public_get_applications from xcap.errors import ResourceNotFound from xcap.uri import XCAPUser, XCAPUri -from twisted.web2.auth import basic, digest +from xcap.web import http, server, stream, responsecode, http_headers +from xcap.web.auth import basic, digest +from xcap.web.auth.wrapper import HTTPAuthResource, UnauthorizedResponse # body of 404 error message to render when user requests xcap-root # it's html, because XCAP root is often published on the web. # NOTE: there're no plans to convert other error messages to html. # Since a web-browser is not the primary tool for accessing XCAP server, text/plain # is easier for clients to present to user/save to logs/etc. WELCOME = ('Not Found' '

Not Found

XCAP server does not serve anything ' 'directly under XCAP Root URL. You have to be more specific.' '

' '
OpenXCAP/%s
' '') % xcap.__version__ class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' default_realm = ConfigSetting(type=str, value=None) trusted_peers = ConfigSetting(type=NetworkRangeList, value=NetworkRangeList('none')) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' root = ConfigSetting(type=XCAPRootURI, value=None) def generateWWWAuthenticate(headers): _generated = [] for seq in headers: scheme, challenge = seq[0], seq[1] # If we're going to parse out to something other than a dict # we need to be able to generate from something other than a dict try: l = [] for k,v in dict(challenge).iteritems(): l.append("%s=%s" % (k, k in ("algorithm", "stale") and v or http_headers.quoteString(v))) _generated.append("%s %s" % (scheme, ", ".join(l))) except ValueError: _generated.append("%s %s" % (scheme, challenge)) return _generated http_headers.generator_response_headers["WWW-Authenticate"] = (generateWWWAuthenticate,) http_headers.DefaultHTTPHandler.updateGenerators(http_headers.generator_response_headers) del generateWWWAuthenticate def parseNodeURI(node_uri, default_realm): """Parses the given Node URI, containing the XCAP root, document selector, and node selector, and returns an XCAPUri instance if succesful.""" xcap_root = None for uri in ServerConfig.root.uris: if node_uri.startswith(uri): xcap_root = uri break if xcap_root is None: raise ResourceNotFound("XCAP root not found for URI: %s" % node_uri) resource_selector = node_uri[len(xcap_root):] if not resource_selector or resource_selector=='/': raise ResourceNotFound(WELCOME, http_headers.MimeType("text", "html")) r = XCAPUri(xcap_root, resource_selector, namespaces) if r.user.domain is None: r.user.domain = default_realm return r class ITrustedPeerCredentials(credentials.ICredentials): def checkPeer(self, trusted_peers): pass class TrustedPeerCredentials(object): implements(ITrustedPeerCredentials) def __init__(self, peer): self.peer = peer def checkPeer(self, trusted_peers): for range in trusted_peers: if struct.unpack('!L', socket.inet_aton(self.peer))[0] & range[1] == range[0]: return True return False class IPublicGetApplicationCredentials(credentials.ICredentials): def checkApplication(self): pass class PublicGetApplicationCredentials(object): implements(IPublicGetApplicationCredentials) def checkApplication(self): return True ## credentials checkers class TrustedPeerChecker(object): implements(checkers.ICredentialsChecker) credentialInterfaces = (ITrustedPeerCredentials,) def __init__(self, trusted_peers): self.trusted_peers = trusted_peers def requestAvatarId(self, credentials): """Return the avatar ID for the credentials which must have a 'peer' attribute, or an UnauthorizedLogin in case of a failure.""" if credentials.checkPeer(self.trusted_peers): return defer.succeed(credentials.peer) return defer.fail(credError.UnauthorizedLogin()) class PublicGetApplicationChecker(object): implements(checkers.ICredentialsChecker) credentialInterfaces = (IPublicGetApplicationCredentials,) def requestAvatarId(self, credentials): """We already know that the method is GET and the application is a 'public GET application', we just need to say that the authentication succeeded.""" if credentials.checkApplication(): return defer.succeed(None) return defer.fail(credError.UnauthorizedLogin()) ## avatars class IAuthUser(Interface): pass class ITrustedPeer(Interface): pass class IPublicGetApplication(Interface): pass class AuthUser(str): """Authenticated XCAP User avatar.""" implements(IAuthUser) class TrustedPeer(str): """Trusted peer avatar.""" implements(ITrustedPeer) class PublicGetApplication(str): """Public get application avatar.""" implements(IPublicGetApplication) ## realm class XCAPAuthRealm(object): """XCAP authentication realm. Receives an avatar ID (a string identifying the user) and a list of interfaces the avatar needs to support. It returns an avatar that encapsulates data about that user.""" implements(portal.IRealm) def requestAvatar(self, avatarId, mind, *interfaces): if IAuthUser in interfaces: return IAuthUser, AuthUser(avatarId) elif ITrustedPeer in interfaces: return ITrustedPeer, TrustedPeer(avatarId) elif IPublicGetApplication in interfaces: return IPublicGetApplication, PublicGetApplication(avatarId) raise NotImplementedError("Only IAuthUser and ITrustedPeer interfaces are supported") def get_cred(request, default_realm): auth = request.headers.getHeader('authorization') if auth: typ, data = auth if typ == 'basic': return data.decode('base64').split(':', 1)[0], default_realm elif typ == 'digest': raise NotImplementedError return None, default_realm ## authentication wrapper for XCAP resources class XCAPAuthResource(HTTPAuthResource): def allowedMethods(self): return 'GET', 'PUT', 'DELETE' def _updateRealm(self, realm): """Updates the realm of the attached credential factories.""" for factory in self.credentialFactories.values(): factory.realm = realm def authenticate(self, request): """Authenticates an XCAP request.""" parsed_url = urlparse.urlparse(request.uri) if request.port in (80, 443): uri = request.scheme + "://" + request.host + parsed_url.path else: uri = request.scheme + "://" + request.host + ":" + str(request.port) + parsed_url.path if parsed_url.query: uri += "?%s" % parsed_url.query xcap_uri = parseNodeURI(uri, AuthenticationConfig.default_realm) request.xcap_uri = xcap_uri if xcap_uri.doc_selector.context=='global': return defer.succeed(self.wrappedResource) ## For each request the authentication realm must be ## dinamically deducted from the XCAP request URI realm = xcap_uri.user.domain if realm is None: raise ResourceNotFound('Unknown domain (the domain part of "username@domain" is required because this server has no default domain)') if not xcap_uri.user.username: # for 'global' requests there's no username@domain in the URI, # so we will use username and domain from Authorization header xcap_uri.user.username, xcap_uri.user.domain = get_cred(request, AuthenticationConfig.default_realm) self._updateRealm(realm) # If we receive a GET to a 'public GET application' we will not authenticate it if request.method == "GET" and public_get_applications.has_key(xcap_uri.application_id): return self.portal.login(PublicGetApplicationCredentials(), None, IPublicGetApplication ).addCallbacks(self._loginSucceeded, self._publicGetApplicationLoginFailed, (request,), None, (request,), None) remote_addr = request.remoteAddr.host if AuthenticationConfig.trusted_peers: return self.portal.login(TrustedPeerCredentials(remote_addr), None, ITrustedPeer ).addCallbacks(self._loginSucceeded, self._trustedPeerLoginFailed, (request,), None, (request,), None) return HTTPAuthResource.authenticate(self, request) def _trustedPeerLoginFailed(self, result, request): """If the peer is not trusted, fallback to HTTP basic/digest authentication.""" return HTTPAuthResource.authenticate(self, request) def _publicGetApplicationLoginFailed(self, result, request): return HTTPAuthResource.authenticate(self, request) def _loginSucceeded(self, avatar, request): """Authorizes an XCAP request after it has been authenticated.""" interface, avatar_id = avatar ## the avatar is the authenticated XCAP User xcap_uri = request.xcap_uri application = getApplicationForURI(xcap_uri) if not application: raise ResourceNotFound if interface is IAuthUser and application.is_authorized(XCAPUser.parse(avatar_id), xcap_uri): return HTTPAuthResource._loginSucceeded(self, avatar, request) elif interface is ITrustedPeer or interface is IPublicGetApplication: return HTTPAuthResource._loginSucceeded(self, avatar, request) else: return failure.Failure( http.HTTPError( UnauthorizedResponse( self.credentialFactories, request.remoteAddr))) def locateChild(self, request, seg): """ Authenticate the request then return the C{self.wrappedResource} and the unmodified segments. We're not using path location, we want to fall back to the renderHTTP() call. """ #return self.authenticate(request), seg return self, server.StopTraversal def renderHTTP(self, request): """ Authenticate the request then return the result of calling renderHTTP on C{self.wrappedResource} """ if request.method not in self.allowedMethods(): response = http.Response(responsecode.NOT_ALLOWED) response.headers.setHeader("allow", self.allowedMethods()) return response def _renderResource(resource): return resource.renderHTTP(request) def _finished_reading(ignore, result): data = ''.join(result) request.attachment = data d = self.authenticate(request) d.addCallback(_renderResource) return d if request.method in ('PUT', 'DELETE'): # we need to authenticate the request after all the attachment stream # has been read # QQQ DELETE doesn't have any attachments, does it? nor does GET. # QQQ Reading attachment when there isn't one won't hurt, will it? # QQQ So why don't we just do it all the time for all requests? data = [] d = stream.readStream(request.stream, data.append) d.addCallback(_finished_reading, data) return d else: d = self.authenticate(request) d.addCallback(_renderResource) return d class BasicCredentials(credentials.UsernamePassword): """Custom Basic Credentials, which support both plain and hashed checks.""" implements(credentials.IUsernamePassword, digest.IUsernameDigestHash) def __init__(self, username, password, realm): credentials.UsernamePassword.__init__(self, username, password) self.realm = realm @property def hash(self): return md5('{0.username}:{0.realm}:{0.password}'.format(self)).hexdigest() def checkHash(self, digestHash): return digestHash == self.hash class BasicCredentialFactory(basic.BasicCredentialFactory): def decode(self, response, request): credential = super(BasicCredentialFactory, self).decode(response, request) return BasicCredentials(credential.username, credential.password, self.realm) class DigestCredentialFactory(digest.DigestCredentialFactory): def generateOpaque(self, nonce, clientip): return super(DigestCredentialFactory, self).generateOpaque(nonce=nonce, clientip=clientip or '') def verifyOpaque(self, opaque, nonce, clientip): return super(DigestCredentialFactory, self).verifyOpaque(opaque=opaque, nonce=nonce, clientip=clientip or '') diff --git a/xcap/errors.py b/xcap/errors.py index 2cfdad0..b1a7633 100644 --- a/xcap/errors.py +++ b/xcap/errors.py @@ -1,138 +1,138 @@ """XCAP errors module""" from xml.sax.saxutils import quoteattr -from twisted.web2 import http_headers -from twisted.web2.http import Response, HTTPError +from xcap.web import http_headers +from xcap.web.http import Response, HTTPError __all__ = [ 'XCAPError', 'ResourceNotFound', 'NotWellFormedError', 'SchemaValidationError', 'NotUTF8Error', 'NotXMLAtrributeValueError', 'NotXMLFragmentError', 'CannotInsertError', 'CannotDeleteError', 'NoParentError', 'UniquenessFailureError', 'ConstraintFailureError'] class ResourceNotFound(HTTPError): def __init__(self, msg="", content_type=None): self.msg = msg response = Response(404, stream=msg) if content_type is None: content_type = http_headers.MimeType("text", "plain") response.headers.setHeader("content-type", content_type) HTTPError.__init__(self, response) def __str__(self): return self.msg class XCAPError(HTTPError): code = 409 namespace = "urn:ietf:params:xml:ns:xcap-error" tag = "undefined" phrase = '' def __init__(self, phrase=None, comment=''): if phrase is not None: self.phrase = phrase if comment: self.comment = '', '-->') + '\n-->' else: self.comment = '' self.response = XMLErrorResponse(self.code, self.build_xml_output()) HTTPError.__init__(self, self.response) def build_xml_output(self): return """ %s""" % (self.namespace, self.format_my_tag()) def format_my_body(self): return '' def format_my_phrase(self): if self.phrase: return ' phrase=%s' % quoteattr(self.phrase) else: return '' def format_my_tag(self): phrase_attr = self.format_my_phrase() body = self.format_my_body() if body or self.comment: return '<%s%s>%s%s' % (self.tag, phrase_attr, self.comment, body, self.tag) else: return '<%s%s/>' % (self.tag, phrase_attr) def __str__(self): try: return self.format_my_tag() except: return '' class XMLErrorResponse(Response): """ A L{Response} object which simply contains a status code and a description of what happened. """ def __init__(self, code, output): """ @param code: a response code in L{responsecode.RESPONSES}. @param output: the body to be attached to the response """ output = output.encode("utf-8") mime_params = {"charset": "utf-8"} Response.__init__(self, code=code, stream=output) ## Its MIME type, registered by this specification, is "application/xcap-error+xml". self.headers.setHeader("content-type", http_headers.MimeType("application", "xcap-error+xml", mime_params)) class SchemaValidationError(XCAPError): tag = "schema-validation-error" class NotXMLFragmentError(XCAPError): tag = "not-xml-frag" class CannotInsertError(XCAPError): tag = "cannot-insert" class CannotDeleteError(XCAPError): tag = "cannot-delete" class NotXMLAtrributeValueError(XCAPError): tag = "not-xml-att-value" class NotWellFormedError(XCAPError): tag = "not-well-formed" class ConstraintFailureError(XCAPError): tag = "constraint-failure" class NotUTF8Error(XCAPError): tag = "not-utf-8" class NoParentError(XCAPError): tag = "no-parent" def __init__(self, phrase='', ancestor='', comment=''): self.ancestor = ancestor XCAPError.__init__(self, phrase, comment) def format_my_body(self): if self.ancestor: return "%s" % self.ancestor else: return "" class UniquenessFailureError(XCAPError): tag = "uniqueness-failure" def __init__(self, **kwargs): self.exists = kwargs.pop('exists') XCAPError.__init__(self, **kwargs) def format_my_body(self): return "" % quoteattr(self.exists) diff --git a/xcap/resource.py b/xcap/resource.py index 8db3fb6..a392413 100644 --- a/xcap/resource.py +++ b/xcap/resource.py @@ -1,128 +1,128 @@ """XCAP resources module""" -from twisted.web2 import http, resource, responsecode -from twisted.web2.http_headers import ETag, MimeType -from twisted.web2.static import MetaDataMixin +from xcap.web import http, resource, responsecode +from xcap.web.http_headers import ETag, MimeType +from xcap.web.static import MetaDataMixin class XCAPResource(resource.Resource, resource.LeafResource, MetaDataMixin): def __init__(self, xcap_uri, application): self.xcap_uri = xcap_uri self.application = application self.e_tag = None def checkPreconditions(self, request): ## don't let renderHTTP to automatically check preconditions, we'll do this ourselves return None def checkEtag(self, request, etag, exists=True): http.checkPreconditions(request, etag=ETag(etag), entityExists=exists) def renderHTTP(self, request): d = resource.Resource.renderHTTP(self, request) d.addCallback(self.sendResponse) return d def setHeaders(self, response): ## Don't provide additional resource information to error responses, ## this is already done by the responses in the errors module if response.code < 400: for (header, value) in ( ("etag", self.etag()), ("content-type", self.contentType()) ): if value is not None: response.headers.setHeader(header, value) return response def sendResponse(self, response): if response.etag: self.e_tag = ETag(response.etag) response = http.Response(response.code, stream=response.data) return self.setHeaders(response) def etag(self): return self.e_tag or None class XCAPDocument(XCAPResource): def http_GET(self, request): d = self.application.get_document(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def http_PUT(self, request): application = self.application document = request.attachment return application.put_document(self.xcap_uri, document, lambda e, exists=True: self.checkEtag(request, e, exists)) def http_DELETE(self, request): d = self.application.delete_document(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def contentType(self): try: return MimeType.fromString(self.application.mime_type) except TypeError: return None class XCAPElement(XCAPResource): content_type = MimeType.fromString("application/xcap-el+xml") def http_GET(self, request): d = self.application.get_element(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def http_DELETE(self, request): d = self.application.delete_element(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def http_PUT(self, request): content_type = request.headers.getHeader('content-type') if not content_type or content_type != self.content_type: raise http.HTTPError(responsecode.UNSUPPORTED_MEDIA_TYPE) element = request.attachment d = self.application.put_element(self.xcap_uri, element, lambda e: self.checkEtag(request, e)) return d def contentType(self): return self.content_type class XCAPAttribute(XCAPResource): content_type = MimeType.fromString("application/xcap-att+xml") def contentType(self): return self.content_type def http_GET(self, request): d = self.application.get_attribute(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def http_DELETE(self, request): d = self.application.delete_attribute(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d def http_PUT(self, request): content_type = request.headers.getHeader('content-type') if not content_type or content_type != self.content_type: raise http.HTTPError(responsecode.UNSUPPORTED_MEDIA_TYPE) attribute = request.attachment d = self.application.put_attribute(self.xcap_uri, attribute, lambda e: self.checkEtag(request, e)) return d class XCAPNamespaceBinding(XCAPResource): content_type = MimeType.fromString("application/xcap-ns+xml") def contentType(self): return self.content_type def http_GET(self, request): d = self.application.get_ns_bindings(self.xcap_uri, lambda e: self.checkEtag(request, e)) return d diff --git a/xcap/server.py b/xcap/server.py index 2259d7f..2788cba 100644 --- a/xcap/server.py +++ b/xcap/server.py @@ -1,177 +1,177 @@ """HTTP handling for the XCAP server""" from __future__ import absolute_import import resource as _resource import sys from application.configuration.datatypes import IPAddress, NetworkRangeList from application.configuration import ConfigSection, ConfigSetting from application import log from twisted.internet import reactor -from twisted.web2 import channel, resource, http, responsecode, server from twisted.cred.portal import Portal import xcap from xcap import authentication from xcap.datatypes import XCAPRootURI from xcap.appusage import getApplicationForURI, Backend from xcap.resource import XCAPDocument, XCAPElement, XCAPAttribute, XCAPNamespaceBinding from xcap.logutil import web_logger from xcap.tls import Certificate, PrivateKey +from xcap.web import channel, resource, http, responsecode, server from xcap.xpath import AttributeSelector, NamespaceSelector server.VERSION = "OpenXCAP/%s" % xcap.__version__ class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' type = 'digest' cleartext_passwords = True default_realm = ConfigSetting(type=str, value=None) trusted_peers = ConfigSetting(type=NetworkRangeList, value=NetworkRangeList('none')) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' address = ConfigSetting(type=IPAddress, value='0.0.0.0') root = ConfigSetting(type=XCAPRootURI, value=None) backend = ConfigSetting(type=Backend, value=None) class TLSConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'TLS' certificate = ConfigSetting(type=Certificate, value=None) private_key = ConfigSetting(type=PrivateKey, value=None) if ServerConfig.root is None: log.critical('The XCAP root URI is not defined') sys.exit(1) if ServerConfig.backend is None: log.critical('OpenXCAP needs a backend to be specified in order to run') sys.exit(1) # Increase the system limit for the maximum number of open file descriptors try: _resource.setrlimit(_resource.RLIMIT_NOFILE, (99999, 99999)) except ValueError: log.warning('Could not raise open file descriptor limit') class XCAPRoot(resource.Resource, resource.LeafResource): addSlash = True def allowedMethods(self): # not used , but methods were already checked by XCAPAuthResource return ('GET', 'PUT', 'DELETE') def resourceForURI(self, xcap_uri): application = getApplicationForURI(xcap_uri) if not xcap_uri.node_selector: return XCAPDocument(xcap_uri, application) else: terminal_selector = xcap_uri.node_selector.terminal_selector if isinstance(terminal_selector, AttributeSelector): return XCAPAttribute(xcap_uri, application) elif isinstance(terminal_selector, NamespaceSelector): return XCAPNamespaceBinding(xcap_uri, application) else: return XCAPElement(xcap_uri, application) def renderHTTP(self, request): application = getApplicationForURI(request.xcap_uri) if not application: return http.Response(responsecode.NOT_FOUND, stream="Application not supported") resource = self.resourceForURI(request.xcap_uri) return resource.renderHTTP(request) class Request(server.Request): def writeResponse(self, response): web_logger.log_access(request=self, response=response) return server.Request.writeResponse(self, response) class HTTPChannel(channel.http.HTTPChannel): inputTimeOut = 30 def __init__(self): channel.http.HTTPChannel.__init__(self) # if connection wasn't completed for 30 seconds, terminate it, # this avoids having lingering TCP connections which don't complete # the TLS handshake self.setTimeout(30) def timeoutConnection(self): if self.transport: log.info('Timing out client: {}'.format(self.transport.getPeer())) channel.http.HTTPChannel.timeoutConnection(self) class HTTPFactory(channel.HTTPFactory): noisy = False protocol = HTTPChannel class XCAPSite(server.Site): def __call__(self, *args, **kwargs): return Request(site=self, *args, **kwargs) class XCAPServer(object): def __init__(self): portal = Portal(authentication.XCAPAuthRealm()) if AuthenticationConfig.cleartext_passwords: http_checker = ServerConfig.backend.PlainPasswordChecker() else: http_checker = ServerConfig.backend.HashPasswordChecker() portal.registerChecker(http_checker) trusted_peers = AuthenticationConfig.trusted_peers portal.registerChecker(authentication.TrustedPeerChecker(trusted_peers)) portal.registerChecker(authentication.PublicGetApplicationChecker()) auth_type = AuthenticationConfig.type if auth_type == 'basic': credential_factory = authentication.BasicCredentialFactory(auth_type) elif auth_type == 'digest': credential_factory = authentication.DigestCredentialFactory('MD5', auth_type) else: raise ValueError('Invalid authentication type: %r. Please check the configuration.' % auth_type) root = authentication.XCAPAuthResource(XCAPRoot(), (credential_factory,), portal, (authentication.IAuthUser,)) self.site = XCAPSite(root) def _start_https(self, reactor): from gnutls.interfaces.twisted import X509Credentials from gnutls.connection import TLSContext, TLSContextServerOptions cert, pKey = TLSConfig.certificate, TLSConfig.private_key if cert is None or pKey is None: log.critical('The TLS certificate/key could not be loaded') sys.exit(1) credentials = X509Credentials(cert, pKey) tls_context = TLSContext(credentials, server_options=TLSContextServerOptions(certificate_request=None)) reactor.listenTLS(ServerConfig.root.port, HTTPFactory(self.site), tls_context, interface=ServerConfig.address) log.info('TLS started') def start(self): log.info('Listening on: %s:%d' % (ServerConfig.address, ServerConfig.root.port)) log.info('XCAP root: %s' % ServerConfig.root) if ServerConfig.root.startswith('https'): self._start_https(reactor) else: reactor.listenTCP(ServerConfig.root.port, HTTPFactory(self.site), interface=ServerConfig.address) reactor.run(installSignalHandlers=ServerConfig.backend.installSignalHandlers) diff --git a/xcap/web/__init__.py b/xcap/web/__init__.py new file mode 100644 index 0000000..31a9d18 --- /dev/null +++ b/xcap/web/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" + +Twisted Web2: a better Twisted Web Server. + +""" + +from xcap.web._version import version +__version__ = version.short() diff --git a/xcap/web/_version.py b/xcap/web/_version.py new file mode 100644 index 0000000..e63ee75 --- /dev/null +++ b/xcap/web/_version.py @@ -0,0 +1,3 @@ +# This is an auto-generated file. Do not edit it. +from twisted.python import versions +version = versions.Version('xcap.web', 8, 1, 0) diff --git a/xcap/web/auth/__init__.py b/xcap/web/auth/__init__.py new file mode 100644 index 0000000..2a6344b --- /dev/null +++ b/xcap/web/auth/__init__.py @@ -0,0 +1,3 @@ +""" +Client and server implementations of http authentication +""" diff --git a/xcap/web/auth/basic.py b/xcap/web/auth/basic.py new file mode 100644 index 0000000..e5152f4 --- /dev/null +++ b/xcap/web/auth/basic.py @@ -0,0 +1,32 @@ + +from twisted.cred import credentials, error +from xcap.web.auth.interfaces import ICredentialFactory + +from zope.interface import implements + +class BasicCredentialFactory(object): + """ + Credential Factory for HTTP Basic Authentication + """ + + implements(ICredentialFactory) + + scheme = 'basic' + + def __init__(self, realm): + self.realm = realm + + def getChallenge(self, peer): + return {'realm': self.realm} + + def decode(self, response, request): + try: + creds = (response + '===').decode('base64') + except: + raise error.LoginFailed('Invalid credentials') + + creds = creds.split(':', 1) + if len(creds) == 2: + return credentials.UsernamePassword(*creds) + else: + raise error.LoginFailed('Invalid credentials') diff --git a/xcap/web/auth/digest.py b/xcap/web/auth/digest.py new file mode 100644 index 0000000..992e88a --- /dev/null +++ b/xcap/web/auth/digest.py @@ -0,0 +1,349 @@ +# Copyright (c) 2006-2008 Twisted Matrix Laboratories. + +""" +Implementation of RFC2617: HTTP Digest Authentication + +http://www.faqs.org/rfcs/rfc2617.html +""" +import sys +import time +import random + +from hashlib import md5, sha1 +from twisted.cred import credentials, error +from zope.interface import implements, Interface + +from xcap.web.auth.interfaces import ICredentialFactory + +# The digest math + +algorithms = { + 'md5': md5, + 'md5-sess': md5, + 'sha': sha1, +} + +# DigestCalcHA1 +def calcHA1( + pszAlg, + pszUserName, + pszRealm, + pszPassword, + pszNonce, + pszCNonce, + preHA1=None +): + """ + @param pszAlg: The name of the algorithm to use to calculate the digest. + Currently supported are md5 md5-sess and sha. + + @param pszUserName: The username + @param pszRealm: The realm + @param pszPassword: The password + @param pszNonce: The nonce + @param pszCNonce: The cnonce + + @param preHA1: If available this is a str containing a previously + calculated HA1 as a hex string. If this is given then the values for + pszUserName, pszRealm, and pszPassword are ignored. + """ + + if (preHA1 and (pszUserName or pszRealm or pszPassword)): + raise TypeError(("preHA1 is incompatible with the pszUserName, " + "pszRealm, and pszPassword arguments")) + + if preHA1 is None: + # We need to calculate the HA1 from the username:realm:password + m = algorithms[pszAlg]() + m.update(pszUserName) + m.update(":") + m.update(pszRealm) + m.update(":") + m.update(pszPassword) + HA1 = m.digest() + else: + # We were given a username:realm:password + HA1 = preHA1.decode('hex') + + if pszAlg == "md5-sess": + m = algorithms[pszAlg]() + m.update(HA1) + m.update(":") + m.update(pszNonce) + m.update(":") + m.update(pszCNonce) + HA1 = m.digest() + + return HA1.encode('hex') + +# DigestCalcResponse +def calcResponse( + HA1, + algo, + pszNonce, + pszNonceCount, + pszCNonce, + pszQop, + pszMethod, + pszDigestUri, + pszHEntity, +): + m = algorithms[algo]() + m.update(pszMethod) + m.update(":") + m.update(pszDigestUri) + if pszQop == "auth-int": + m.update(":") + m.update(pszHEntity) + HA2 = m.digest().encode('hex') + + m = algorithms[algo]() + m.update(HA1) + m.update(":") + m.update(pszNonce) + m.update(":") + if pszNonceCount and pszCNonce: # pszQop: + m.update(pszNonceCount) + m.update(":") + m.update(pszCNonce) + m.update(":") + m.update(pszQop) + m.update(":") + m.update(HA2) + respHash = m.digest().encode('hex') + return respHash + + +class IUsernameDigestHash(Interface): + """ + This credential is used when a CredentialChecker has access to the hash + of the username:realm:password as in an Apache .htdigest file. + """ + def checkHash(self, digestHash): + """ + @param digestHash: The hashed username:realm:password to check against. + + @return: a deferred which becomes, or a boolean indicating if the + hash matches. + """ + + +class DigestedCredentials: + """Yet Another Simple HTTP Digest authentication scheme""" + + implements(credentials.IUsernameHashedPassword, + IUsernameDigestHash) + + def __init__(self, username, method, realm, fields): + self.username = username + self.method = method + self.realm = realm + self.fields = fields + + def checkPassword(self, password): + response = self.fields.get('response') + uri = self.fields.get('uri') + nonce = self.fields.get('nonce') + cnonce = self.fields.get('cnonce') + nc = self.fields.get('nc') + algo = self.fields.get('algorithm', 'md5').lower() + qop = self.fields.get('qop', 'auth') + + expected = calcResponse( + calcHA1(algo, self.username, self.realm, password, nonce, cnonce), + algo, nonce, nc, cnonce, qop, self.method, uri, None + ) + + return expected == response + + def checkHash(self, digestHash): + response = self.fields.get('response') + uri = self.fields.get('uri') + nonce = self.fields.get('nonce') + cnonce = self.fields.get('cnonce') + nc = self.fields.get('nc') + algo = self.fields.get('algorithm', 'md5').lower() + qop = self.fields.get('qop', 'auth') + + expected = calcResponse( + calcHA1(algo, None, None, None, nonce, cnonce, preHA1=digestHash), + algo, nonce, nc, cnonce, qop, self.method, uri, None + ) + + return expected == response + + +class DigestCredentialFactory(object): + """ + Support for RFC2617 HTTP Digest Authentication + + @cvar CHALLENGE_LIFETIME_SECS: The number of seconds for which an + opaque should be valid. + + @ivar privateKey: A random string used for generating the secure opaque. + """ + + implements(ICredentialFactory) + + CHALLENGE_LIFETIME_SECS = 15 * 60 # 15 minutes + + scheme = "digest" + + def __init__(self, algorithm, realm): + """ + @type algorithm: C{str} + @param algorithm: case insensitive string that specifies + the hash algorithm used, should be either, md5, md5-sess + or sha + + @type realm: C{str} + @param realm: case sensitive string that specifies the realm + portion of the challenge + """ + self.algorithm = algorithm + self.realm = realm + + c = tuple([random.randrange(sys.maxint) for _ in range(3)]) + + self.privateKey = '%d%d%d' % c + + def generateNonce(self): + c = tuple([random.randrange(sys.maxint) for _ in range(3)]) + c = '%d%d%d' % c + return c + + def _getTime(self): + """ + Parameterize the time based seed used in generateOpaque + so we can deterministically unittest it's behavior. + """ + return time.time() + + def generateOpaque(self, nonce, clientip): + """ + Generate an opaque to be returned to the client. + This should be a unique string that can be returned to us and verified. + """ + + # Now, what we do is encode the nonce, client ip and a timestamp + # in the opaque value with a suitable digest + key = "%s,%s,%s" % (nonce, clientip, str(int(self._getTime()))) + digest = md5(key + self.privateKey).hexdigest() + ekey = key.encode('base64') + return "%s-%s" % (digest, ekey.strip('\n')) + + def verifyOpaque(self, opaque, nonce, clientip): + """ + Given the opaque and nonce from the request, as well as the clientip + that made the request, verify that the opaque was generated by us. + And that it's not too old. + + @param opaque: The opaque value from the Digest response + @param nonce: The nonce value from the Digest response + @param clientip: The remote IP address of the client making the request + + @return: C{True} if the opaque was successfully verified. + + @raise error.LoginFailed: if C{opaque} could not be parsed or + contained the wrong values. + """ + + # First split the digest from the key + opaqueParts = opaque.split('-') + if len(opaqueParts) != 2: + raise error.LoginFailed('Invalid response, invalid opaque value') + + # Verify the key + key = opaqueParts[1].decode('base64') + keyParts = key.split(',') + + if len(keyParts) != 3: + raise error.LoginFailed('Invalid response, invalid opaque value') + + if keyParts[0] != nonce: + raise error.LoginFailed( + 'Invalid response, incompatible opaque/nonce values') + + if keyParts[1] != clientip: + raise error.LoginFailed( + 'Invalid response, incompatible opaque/client values') + + if (int(self._getTime()) - int(keyParts[2]) > + DigestCredentialFactory.CHALLENGE_LIFETIME_SECS): + + raise error.LoginFailed( + 'Invalid response, incompatible opaque/nonce too old') + + # Verify the digest + digest = md5(key + self.privateKey).hexdigest() + if digest != opaqueParts[0]: + raise error.LoginFailed('Invalid response, invalid opaque value') + + return True + + def getChallenge(self, peer): + """ + Generate the challenge for use in the WWW-Authenticate header + + @param peer: The L{IAddress} of the requesting client. + + @return: The C{dict} that can be used to generate a WWW-Authenticate + header. + """ + + c = self.generateNonce() + o = self.generateOpaque(c, peer.host) + + return {'nonce': c, + 'opaque': o, + 'qop': 'auth', + 'algorithm': self.algorithm, + 'realm': self.realm} + + def decode(self, response, request): + """ + Decode the given response and attempt to generate a + L{DigestedCredentials} from it. + + @type response: C{str} + @param response: A string of comma seperated key=value pairs + + @type request: L{xcap.web.server.Request} + @param request: the request being processed + + @return: L{DigestedCredentials} + + @raise: L{error.LoginFailed} if the response does not contain a + username, a nonce, an opaque, or if the opaque is invalid. + """ + def unq(s): + if s[0] == s[-1] == '"': + return s[1:-1] + return s + response = ' '.join(response.splitlines()) + parts = response.split(',') + + auth = {} + + for (k, v) in [p.split('=', 1) for p in parts]: + auth[k.strip()] = unq(v.strip()) + + username = auth.get('username') + if not username: + raise error.LoginFailed('Invalid response, no username given.') + + if 'opaque' not in auth: + raise error.LoginFailed('Invalid response, no opaque given.') + + if 'nonce' not in auth: + raise error.LoginFailed('Invalid response, no nonce given.') + + # Now verify the nonce/opaque values for this client + if self.verifyOpaque(auth.get('opaque'), + auth.get('nonce'), + request.remoteAddr.host): + + return DigestedCredentials(username, + request.method, + self.realm, + auth) diff --git a/xcap/web/auth/interfaces.py b/xcap/web/auth/interfaces.py new file mode 100644 index 0000000..6a2e89c --- /dev/null +++ b/xcap/web/auth/interfaces.py @@ -0,0 +1,59 @@ +from zope.interface import Interface, Attribute + +class ICredentialFactory(Interface): + """ + A credential factory provides state between stages in HTTP + authentication. It is ultimately in charge of creating an + ICredential for the specified scheme, that will be used by + cred to complete authentication. + """ + scheme = Attribute(("string indicating the authentication scheme " + "this factory is associated with.")) + + def getChallenge(peer): + """ + Generate a challenge the client may respond to. + + @type peer: L{twisted.internet.interfaces.IAddress} + @param peer: The client's address + + @rtype: C{dict} + @return: dictionary of challenge arguments + """ + + def decode(response, request): + """ + Create a credentials object from the given response. + May raise twisted.cred.error.LoginFailed if the response is invalid. + + @type response: C{str} + @param response: scheme specific response string + + @type request: L{xcap.web.server.Request} + @param request: the request being processed + + @return: ICredentials + """ + + +class IAuthenticatedRequest(Interface): + """ + A request that has been authenticated with the use of Cred, + and holds a reference to the avatar returned by portal.login + """ + + avatarInterface = Attribute(("The credential interface implemented by " + "the avatar")) + + avatar = Attribute("The application specific avatar returned by " + "the application's realm") + + +class IHTTPUser(Interface): + """ + A generic interface that can implemented by an avatar to provide + access to the username used when authenticating. + """ + + username = Attribute(("A string representing the username portion of " + "the credentials used for authentication")) \ No newline at end of file diff --git a/xcap/web/auth/wrapper.py b/xcap/web/auth/wrapper.py new file mode 100644 index 0000000..e05e25c --- /dev/null +++ b/xcap/web/auth/wrapper.py @@ -0,0 +1,200 @@ + +""" +Wrapper Resources for rfc2617 HTTP Auth. +""" +from zope.interface import implements, directlyProvides +from twisted.cred import error, credentials +from twisted.python import failure +from xcap.web import responsecode +from xcap.web import http +from xcap.web import iweb +from xcap.web.auth.interfaces import IAuthenticatedRequest + +class UnauthorizedResponse(http.StatusResponse): + """A specialized response class for generating www-authenticate headers + from the given L{CredentialFactory} instances + """ + + def __init__(self, factories, remoteAddr=None): + """ + @param factories: A L{dict} of {'scheme': ICredentialFactory} + + @param remoteAddr: An L{IAddress} for the connecting client. + """ + + super(UnauthorizedResponse, self).__init__( + responsecode.UNAUTHORIZED, + "You are not authorized to access this resource.") + + authHeaders = [] + for factory in factories.itervalues(): + authHeaders.append((factory.scheme, + factory.getChallenge(remoteAddr))) + + self.headers.setHeader('www-authenticate', authHeaders) + + +class HTTPAuthResource(object): + """I wrap a resource to prevent it being accessed unless the authentication + can be completed using the credential factory, portal, and interfaces + specified. + """ + + implements(iweb.IResource) + + def __init__(self, wrappedResource, credentialFactories, + portal, interfaces): + """ + @param wrappedResource: A L{xcap.web.iweb.IResource} to be returned + from locateChild and render upon successful + authentication. + + @param credentialFactories: A list of instances that implement + L{ICredentialFactory}. + @type credentialFactories: L{list} + + @param portal: Portal to handle logins for this resource. + @type portal: L{twisted.cred.portal.Portal} + + @param interfaces: the interfaces that are allowed to log in via the + given portal + @type interfaces: L{tuple} + """ + + self.wrappedResource = wrappedResource + + self.credentialFactories = dict([(factory.scheme, factory) + for factory in credentialFactories]) + self.portal = portal + self.interfaces = interfaces + + def _loginSucceeded(self, avatar, request): + """ + Callback for successful login. + + @param avatar: A tuple of the form (interface, avatar) as + returned by your realm. + + @param request: L{IRequest} that encapsulates this auth + attempt. + + @return: the IResource in C{self.wrappedResource} + """ + request.avatarInterface, request.avatar = avatar + + directlyProvides(request, IAuthenticatedRequest) + + def _addAuthenticateHeaders(request, response): + """ + A response filter that adds www-authenticate headers + to an outgoing response if it's code is UNAUTHORIZED (401) + and it does not already have them. + """ + if response.code == responsecode.UNAUTHORIZED: + if not response.headers.hasHeader('www-authenticate'): + newResp = UnauthorizedResponse(self.credentialFactories, + request.remoteAddr) + + response.headers.setHeader( + 'www-authenticate', + newResp.headers.getHeader('www-authenticate')) + + return response + + _addAuthenticateHeaders.handleErrors = True + + request.addResponseFilter(_addAuthenticateHeaders) + + return self.wrappedResource + + def _loginFailed(self, result, request): + """ + Errback for failed login. + + @param result: L{Failure} returned by portal.login + + @param request: L{IRequest} that encapsulates this auth + attempt. + + @return: A L{Failure} containing an L{HTTPError} containing the + L{UnauthorizedResponse} if C{result} is an L{UnauthorizedLogin} + or L{UnhandledCredentials} error + """ + result.trap(error.UnauthorizedLogin, error.UnhandledCredentials) + + return failure.Failure( + http.HTTPError( + UnauthorizedResponse( + self.credentialFactories, + request.remoteAddr))) + + def login(self, factory, response, request): + """ + @param factory: An L{ICredentialFactory} that understands the given + response. + + @param response: The client's authentication response as a string. + + @param request: The request that prompted this authentication attempt. + + @return: A L{Deferred} that fires with the wrappedResource on success + or a failure containing an L{UnauthorizedResponse} + """ + try: + creds = factory.decode(response, request) + except error.LoginFailed: + raise http.HTTPError(UnauthorizedResponse( + self.credentialFactories, + request.remoteAddr)) + + + return self.portal.login(creds, None, *self.interfaces + ).addCallbacks(self._loginSucceeded, + self._loginFailed, + (request,), None, + (request,), None) + + def authenticate(self, request): + """ + Attempt to authenticate the givin request + + @param request: An L{IRequest} to be authenticated. + """ + authHeader = request.headers.getHeader('authorization') + + if authHeader is None: + return self.portal.login(credentials.Anonymous(), + None, + *self.interfaces + ).addCallbacks(self._loginSucceeded, + self._loginFailed, + (request,), None, + (request,), None) + + elif authHeader[0] not in self.credentialFactories: + raise http.HTTPError(UnauthorizedResponse( + self.credentialFactories, + request.remoteAddr)) + else: + return self.login(self.credentialFactories[authHeader[0]], + authHeader[1], request) + + def locateChild(self, request, seg): + """ + Authenticate the request then return the C{self.wrappedResource} + and the unmodified segments. + """ + return self.authenticate(request), seg + + def renderHTTP(self, request): + """ + Authenticate the request then return the result of calling renderHTTP + on C{self.wrappedResource} + """ + def _renderResource(resource): + return resource.renderHTTP(request) + + d = self.authenticate(request) + d.addCallback(_renderResource) + + return d diff --git a/xcap/web/channel/__init__.py b/xcap/web/channel/__init__.py new file mode 100644 index 0000000..92766f3 --- /dev/null +++ b/xcap/web/channel/__init__.py @@ -0,0 +1,8 @@ +# See LICENSE for details. + +""" +Various backend channel implementations for web. +""" +from xcap.web.channel.http import HTTPFactory + +__all__ = ['HTTPFactory'] diff --git a/xcap/web/channel/http.py b/xcap/web/channel/http.py new file mode 100644 index 0000000..8a8c95d --- /dev/null +++ b/xcap/web/channel/http.py @@ -0,0 +1,898 @@ + +import socket +import warnings + +from cStringIO import StringIO +from twisted.internet import interfaces, protocol, reactor +from twisted.protocols import policies, basic +from twisted.python import log +from zope.interface import implements + +from xcap.web import responsecode +from xcap.web import http_headers +from xcap.web import http + +PERSIST_NO_PIPELINE, PERSIST_PIPELINE = (1,2) + +_cachedHostNames = {} +def _cachedGetHostByAddr(hostaddr): + hostname = _cachedHostNames.get(hostaddr) + if hostname is None: + try: + hostname = socket.gethostbyaddr(hostaddr)[0] + except socket.herror: + hostname = hostaddr + _cachedHostNames[hostaddr]=hostname + return hostname + +class StringTransport(object): + """ + I am a StringIO wrapper that conforms for the transport API. I support + the 'writeSequence' method. + """ + def __init__(self): + self.s = StringIO() + def writeSequence(self, seq): + self.s.write(''.join(seq)) + def __getattr__(self, attr): + return getattr(self.__dict__['s'], attr) + +class AbortedException(Exception): + pass + + +class HTTPParser(object): + """This class handles the parsing side of HTTP processing. With a suitable + subclass, it can parse either the client side or the server side of the + connection. + """ + + # Class config: + parseCloseAsEnd = False + + # Instance vars + chunkedIn = False + headerlen = 0 + length = None + inHeaders = None + partialHeader = '' + connHeaders = None + finishedReading = False + + channel = None + + # For subclassing... + # Needs attributes: + # version + + # Needs functions: + # createRequest() + # processRequest() + # _abortWithError() + # handleContentChunk(data) + # handleContentComplete() + + # Needs functions to exist on .channel + # channel.maxHeaderLength + # channel.requestReadFinished(self) + # channel.setReadPersistent(self, persistent) + # (from LineReceiver): + # channel.setRawMode() + # channel.setLineMode(extraneous) + # channel.pauseProducing() + # channel.resumeProducing() + # channel.stopProducing() + + + def __init__(self, channel): + self.inHeaders = http_headers.Headers() + self.channel = channel + + def lineReceived(self, line): + if self.chunkedIn: + # Parsing a chunked input + if self.chunkedIn == 1: + # First we get a line like "chunk-size [';' chunk-extension]" + # (where chunk extension is just random crap as far as we're concerned) + # RFC says to ignore any extensions you don't recognize -- that's all of them. + chunksize = line.split(';', 1)[0] + try: + self.length = int(chunksize, 16) + except: + self._abortWithError(responsecode.BAD_REQUEST, "Invalid chunk size, not a hex number: %s!" % chunksize) + if self.length < 0: + self._abortWithError(responsecode.BAD_REQUEST, "Invalid chunk size, negative.") + + if self.length == 0: + # We're done, parse the trailers line + self.chunkedIn = 3 + else: + # Read self.length bytes of raw data + self.channel.setRawMode() + elif self.chunkedIn == 2: + # After we got data bytes of the appropriate length, we end up here, + # waiting for the CRLF, then go back to get the next chunk size. + if line != '': + self._abortWithError(responsecode.BAD_REQUEST, "Excess %d bytes sent in chunk transfer mode" % len(line)) + self.chunkedIn = 1 + elif self.chunkedIn == 3: + # TODO: support Trailers (maybe! but maybe not!) + + # After getting the final "0" chunk we're here, and we *EAT MERCILESSLY* + # any trailer headers sent, and wait for the blank line to terminate the + # request. + if line == '': + self.allContentReceived() + # END of chunk handling + elif line == '': + # Empty line => End of headers + if self.partialHeader: + self.headerReceived(self.partialHeader) + self.partialHeader = '' + self.allHeadersReceived() # can set chunkedIn + self.createRequest() + if self.chunkedIn: + # stay in linemode waiting for chunk header + pass + elif self.length == 0: + # no content expected + self.allContentReceived() + else: + # await raw data as content + self.channel.setRawMode() + # Should I do self.pauseProducing() here? + self.processRequest() + else: + self.headerlen += len(line) + if self.headerlen > self.channel.maxHeaderLength: + self._abortWithError(responsecode.BAD_REQUEST, 'Headers too long.') + + if line[0] in ' \t': + # Append a header continuation + self.partialHeader += line + else: + if self.partialHeader: + self.headerReceived(self.partialHeader) + self.partialHeader = line + + def rawDataReceived(self, data): + """Handle incoming content.""" + datalen = len(data) + if datalen < self.length: + self.handleContentChunk(data) + self.length = self.length - datalen + else: + self.handleContentChunk(data[:self.length]) + extraneous = data[self.length:] + channel = self.channel # could go away from allContentReceived. + if not self.chunkedIn: + self.allContentReceived() + else: + # NOTE: in chunked mode, self.length is the size of the current chunk, + # so we still have more to read. + self.chunkedIn = 2 # Read next chunksize + + channel.setLineMode(extraneous) + + def headerReceived(self, line): + """Store this header away. Check for too much header data + (> channel.maxHeaderLength) and abort the connection if so. + """ + nameval = line.split(':', 1) + if len(nameval) != 2: + self._abortWithError(responsecode.BAD_REQUEST, "No ':' in header.") + + name, val = nameval + val = val.lstrip(' \t') + self.inHeaders.addRawHeader(name, val) + + + def allHeadersReceived(self): + # Split off connection-related headers + connHeaders = self.splitConnectionHeaders() + + # Set connection parameters from headers + self.setConnectionParams(connHeaders) + self.connHeaders = connHeaders + + def allContentReceived(self): + self.finishedReading = True + self.channel.requestReadFinished(self) + self.handleContentComplete() + + + def splitConnectionHeaders(self): + """ + Split off connection control headers from normal headers. + + The normal headers are then passed on to user-level code, while the + connection headers are stashed in .connHeaders and used for things like + request/response framing. + + This corresponds roughly with the HTTP RFC's description of 'hop-by-hop' + vs 'end-to-end' headers in RFC2616 S13.5.1, with the following + exceptions: + + * proxy-authenticate and proxy-authorization are not treated as + connection headers. + + * content-length is, as it is intimiately related with low-level HTTP + parsing, and is made available to user-level code via the stream + length, rather than a header value. (except for HEAD responses, in + which case it is NOT used by low-level HTTP parsing, and IS kept in + the normal headers. + """ + + def move(name): + h = inHeaders.getRawHeaders(name, None) + if h is not None: + inHeaders.removeHeader(name) + connHeaders.setRawHeaders(name, h) + + # NOTE: According to HTTP spec, we're supposed to eat the + # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that + # doesn't sound like a good idea to me, because it makes it impossible + # to have a non-authenticating transparent proxy in front of an + # authenticating proxy. An authenticating proxy can eat them itself. + # + # 'Proxy-Connection' is an undocumented HTTP 1.0 abomination. + connHeaderNames = ['content-length', 'connection', 'keep-alive', 'te', + 'trailers', 'transfer-encoding', 'upgrade', + 'proxy-connection'] + inHeaders = self.inHeaders + connHeaders = http_headers.Headers() + + move('connection') + if self.version < (1,1): + # Remove all headers mentioned in Connection, because a HTTP 1.0 + # proxy might have erroneously forwarded it from a 1.1 client. + for name in connHeaders.getHeader('connection', ()): + if inHeaders.hasHeader(name): + inHeaders.removeHeader(name) + else: + # Otherwise, just add the headers listed to the list of those to move + connHeaderNames.extend(connHeaders.getHeader('connection', ())) + + # If the request was HEAD, self.length has been set to 0 by + # HTTPClientRequest.submit; in this case, Content-Length should + # be treated as a response header, not a connection header. + + # Note: this assumes the invariant that .length will always be None + # coming into this function, unless this is a HEAD request. + if self.length is not None: + connHeaderNames.remove('content-length') + + for headername in connHeaderNames: + move(headername) + + return connHeaders + + def setConnectionParams(self, connHeaders): + # Figure out persistent connection stuff + if self.version >= (1,1): + if 'close' in connHeaders.getHeader('connection', ()): + readPersistent = False + else: + readPersistent = PERSIST_PIPELINE + elif 'keep-alive' in connHeaders.getHeader('connection', ()): + readPersistent = PERSIST_NO_PIPELINE + else: + readPersistent = False + + + # Okay, now implement section 4.4 Message Length to determine + # how to find the end of the incoming HTTP message. + transferEncoding = connHeaders.getHeader('transfer-encoding') + + if transferEncoding: + if transferEncoding[-1] == 'chunked': + # Chunked + self.chunkedIn = 1 + # Cut off the chunked encoding (cause it's special) + transferEncoding = transferEncoding[:-1] + elif not self.parseCloseAsEnd: + # Would close on end of connection, except this can't happen for + # client->server data. (Well..it could actually, since TCP has half-close + # but the HTTP spec says it can't, so we'll pretend it's right.) + self._abortWithError(responsecode.BAD_REQUEST, "Transfer-Encoding received without chunked in last position.") + + # TODO: support gzip/etc encodings. + # FOR NOW: report an error if the client uses any encodings. + # They shouldn't, because we didn't send a TE: header saying it's okay. + if transferEncoding: + self._abortWithError(responsecode.NOT_IMPLEMENTED, "Transfer-Encoding %s not supported." % transferEncoding) + else: + # No transfer-coding. + self.chunkedIn = 0 + if self.parseCloseAsEnd: + # If no Content-Length, then it's indeterminate length data + # (unless the responsecode was one of the special no body ones) + # Also note that for HEAD requests, connHeaders won't have + # content-length even if the response did. + if self.code in http.NO_BODY_CODES: + self.length = 0 + else: + self.length = connHeaders.getHeader('content-length', self.length) + + # If it's an indeterminate stream without transfer encoding, it must be + # the last request. + if self.length is None: + readPersistent = False + else: + # If no Content-Length either, assume no content. + self.length = connHeaders.getHeader('content-length', 0) + + # Set the calculated persistence + self.channel.setReadPersistent(readPersistent) + + def abortParse(self): + # If we're erroring out while still reading the request + if not self.finishedReading: + self.finishedReading = True + self.channel.setReadPersistent(False) + self.channel.requestReadFinished(self) + + # producer interface + def pauseProducing(self): + if not self.finishedReading: + self.channel.pauseProducing() + + def resumeProducing(self): + if not self.finishedReading: + self.channel.resumeProducing() + + def stopProducing(self): + if not self.finishedReading: + self.channel.stopProducing() + +class HTTPChannelRequest(HTTPParser): + """This class handles the state and parsing for one HTTP request. + It is responsible for all the low-level connection oriented behavior. + Thus, it takes care of keep-alive, de-chunking, etc., and passes + the non-connection headers on to the user-level Request object.""" + + command = path = version = None + queued = 0 + request = None + + out_version = "HTTP/1.1" + + def __init__(self, channel, queued=0): + HTTPParser.__init__(self, channel) + self.queued=queued + + # Buffer writes to a string until we're first in line + # to write a response + if queued: + self.transport = StringTransport() + else: + self.transport = self.channel.transport + + # set the version to a fallback for error generation + self.version = (1,0) + + + def gotInitialLine(self, initialLine): + parts = initialLine.split() + + # Parse the initial request line + if len(parts) != 3: + if len(parts) == 1: + parts.append('/') + if len(parts) == 2 and parts[1][0] == '/': + parts.append('HTTP/0.9') + else: + self._abortWithError(responsecode.BAD_REQUEST, 'Bad request line: %s' % initialLine) + + self.command, self.path, strversion = parts + try: + protovers = http.parseVersion(strversion) + if protovers[0] != 'http': + raise ValueError() + except ValueError: + self._abortWithError(responsecode.BAD_REQUEST, "Unknown protocol: %s" % strversion) + + self.version = protovers[1:3] + + # Ensure HTTP 0 or HTTP 1. + if self.version[0] > 1: + self._abortWithError(responsecode.HTTP_VERSION_NOT_SUPPORTED, 'Only HTTP 0.9 and HTTP 1.x are supported.') + + if self.version[0] == 0: + # simulate end of headers, as HTTP 0 doesn't have headers. + self.lineReceived('') + + def lineLengthExceeded(self, line, wasFirst=False): + code = wasFirst and responsecode.REQUEST_URI_TOO_LONG or responsecode.BAD_REQUEST + self._abortWithError(code, 'Header line too long.') + + def createRequest(self): + self.request = self.channel.requestFactory(self, self.command, self.path, self.version, self.length, self.inHeaders) + del self.inHeaders + + def processRequest(self): + self.request.process() + + def handleContentChunk(self, data): + self.request.handleContentChunk(data) + + def handleContentComplete(self): + self.request.handleContentComplete() + +############## HTTPChannelRequest *RESPONSE* methods ############# + producer = None + chunkedOut = False + finished = False + + ##### Request Callbacks ##### + def writeIntermediateResponse(self, code, headers=None): + if self.version >= (1,1): + self._writeHeaders(code, headers, False) + + def writeHeaders(self, code, headers): + self._writeHeaders(code, headers, True) + + def _writeHeaders(self, code, headers, addConnectionHeaders): + # HTTP 0.9 doesn't have headers. + if self.version[0] == 0: + return + + l = [] + code_message = responsecode.RESPONSES.get(code, "Unknown Status") + + l.append('%s %s %s\r\n' % (self.out_version, code, + code_message)) + if headers is not None: + for name, valuelist in headers.getAllRawHeaders(): + for value in valuelist: + l.append("%s: %s\r\n" % (name, value)) + + if addConnectionHeaders: + # if we don't have a content length, we send data in + # chunked mode, so that we can support persistent connections. + if (headers.getHeader('content-length') is None and + self.command != "HEAD" and code not in http.NO_BODY_CODES): + if self.version >= (1,1): + l.append("%s: %s\r\n" % ('Transfer-Encoding', 'chunked')) + self.chunkedOut = True + else: + # Cannot use persistent connections if we can't do chunking + self.channel.dropQueuedRequests() + + if self.channel.isLastRequest(self): + l.append("%s: %s\r\n" % ('Connection', 'close')) + elif self.version < (1,1): + l.append("%s: %s\r\n" % ('Connection', 'Keep-Alive')) + + l.append("\r\n") + self.transport.writeSequence(l) + + + def write(self, data): + if not data: + return + elif self.chunkedOut: + self.transport.writeSequence(("%X\r\n" % len(data), data, "\r\n")) + else: + self.transport.write(data) + + def finish(self): + """We are finished writing data.""" + if self.finished: + warnings.warn("Warning! request.finish called twice.", stacklevel=2) + return + + if self.chunkedOut: + # write last chunk and closing CRLF + self.transport.write("0\r\n\r\n") + + self.finished = True + if not self.queued: + self._cleanup() + + + def abortConnection(self, closeWrite=True): + """Abort the HTTP connection because of some kind of unrecoverable + error. If closeWrite=False, then only abort reading, but leave + the writing side alone. This is mostly for internal use by + the HTTP request parsing logic, so that it can call an error + page generator. + + Otherwise, completely shut down the connection. + """ + self.abortParse() + if closeWrite: + if self.producer: + self.producer.stopProducing() + self.unregisterProducer() + + self.finished = True + if self.queued: + self.transport.reset() + self.transport.truncate() + else: + self._cleanup() + + def getHostInfo(self): + t=self.channel.transport + secure = interfaces.ISSLTransport(t, None) is not None + host = t.getHost() + host.host = _cachedGetHostByAddr(host.host) + return host, secure + + def getRemoteHost(self): + return self.channel.transport.getPeer() + + ##### End Request Callbacks ##### + + def _abortWithError(self, errorcode, text=''): + """Handle low level protocol errors.""" + headers = http_headers.Headers() + headers.setHeader('content-length', len(text)+1) + + self.abortConnection(closeWrite=False) + self.writeHeaders(errorcode, headers) + self.write(text) + self.write("\n") + self.finish() + raise AbortedException + + def _cleanup(self): + """Called when have finished responding and are no longer queued.""" + if self.producer: + log.err(RuntimeError("Producer was not unregistered for %s" % self)) + self.unregisterProducer() + self.channel.requestWriteFinished(self) + del self.transport + + # methods for channel - end users should not use these + + def noLongerQueued(self): + """Notify the object that it is no longer queued. + + We start writing whatever data we have to the transport, etc. + + This method is not intended for users. + """ + if not self.queued: + raise RuntimeError, "noLongerQueued() got called unnecessarily." + + self.queued = 0 + + # set transport to real one and send any buffer data + data = self.transport.getvalue() + self.transport = self.channel.transport + if data: + self.transport.write(data) + + # if we have producer, register it with transport + if (self.producer is not None) and not self.finished: + self.transport.registerProducer(self.producer, True) + + # if we're finished, clean up + if self.finished: + self._cleanup() + + + # consumer interface + def registerProducer(self, producer, streaming): + """Register a producer. + """ + + if self.producer: + raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer) + + self.producer = producer + + if self.queued: + producer.pauseProducing() + else: + self.transport.registerProducer(producer, streaming) + + def unregisterProducer(self): + """Unregister the producer.""" + if not self.queued: + self.transport.unregisterProducer() + self.producer = None + + def connectionLost(self, reason): + """connection was lost""" + if self.queued and self.producer: + self.producer.stopProducing() + self.producer = None + if self.request: + self.request.connectionLost(reason) + +class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin, object): + """A receiver for HTTP requests. Handles splitting up the connection + for the multiple HTTPChannelRequests that may be in progress on this + channel. + + @ivar timeOut: number of seconds to wait before terminating an + idle connection. + + @ivar maxPipeline: number of outstanding in-progress requests + to allow before pausing the input. + + @ivar maxHeaderLength: number of bytes of header to accept from + the client. + + """ + + implements(interfaces.IHalfCloseableProtocol) + + ## Configuration parameters. Set in instances or subclasses. + + # How many simultaneous requests to handle. + maxPipeline = 4 + + # Timeout when between two requests + betweenRequestsTimeOut = 15 + # Timeout between lines or bytes while reading a request + inputTimeOut = 60 * 4 + + # maximum length of headers (10KiB) + maxHeaderLength = 10240 + + # Allow persistent connections? + allowPersistentConnections = True + + # ChannelRequest + chanRequestFactory = HTTPChannelRequest + requestFactory = http.Request + + + _first_line = 2 + readPersistent = PERSIST_PIPELINE + + _readLost = False + _writeLost = False + + _lingerTimer = None + chanRequest = None + + def _callLater(self, secs, fun): + reactor.callLater(secs, fun) + + def __init__(self): + # the request queue + self.requests = [] + + def connectionMade(self): + self.setTimeout(self.inputTimeOut) + self.factory.outstandingRequests+=1 + + def lineReceived(self, line): + if self._first_line: + self.setTimeout(self.inputTimeOut) + # if this connection is not persistent, drop any data which + # the client (illegally) sent after the last request. + if not self.readPersistent: + self.dataReceived = self.lineReceived = lambda *args: None + return + + # IE sends an extraneous empty line (\r\n) after a POST request; + # eat up such a line, but only ONCE + if not line and self._first_line == 1: + self._first_line = 2 + return + + self._first_line = 0 + + if not self.allowPersistentConnections: + # Don't allow a second request + self.readPersistent = False + + try: + self.chanRequest = self.chanRequestFactory(self, len(self.requests)) + self.requests.append(self.chanRequest) + self.chanRequest.gotInitialLine(line) + except AbortedException: + pass + else: + try: + self.chanRequest.lineReceived(line) + except AbortedException: + pass + + def lineLengthExceeded(self, line): + if self._first_line: + # Fabricate a request object to respond to the line length violation. + self.chanRequest = self.chanRequestFactory(self, + len(self.requests)) + self.requests.append(self.chanRequest) + self.chanRequest.gotInitialLine("GET fake HTTP/1.0") + try: + self.chanRequest.lineLengthExceeded(line, self._first_line) + except AbortedException: + pass + + def rawDataReceived(self, data): + self.setTimeout(self.inputTimeOut) + try: + self.chanRequest.rawDataReceived(data) + except AbortedException: + pass + + def requestReadFinished(self, request): + if(self.readPersistent is PERSIST_NO_PIPELINE or + len(self.requests) >= self.maxPipeline): + self.pauseProducing() + + # reset state variables + self._first_line = 1 + self.chanRequest = None + self.setLineMode() + + # Disable the idle timeout, in case this request takes a long + # time to finish generating output. + if len(self.requests) > 0: + self.setTimeout(None) + + def _startNextRequest(self): + # notify next request, if present, it can start writing + del self.requests[0] + + if self._writeLost: + self.transport.loseConnection() + elif self.requests: + self.requests[0].noLongerQueued() + + # resume reading if allowed to + if(not self._readLost and + self.readPersistent is not PERSIST_NO_PIPELINE and + len(self.requests) < self.maxPipeline): + self.resumeProducing() + elif self._readLost: + # No more incoming data, they already closed! + self.transport.loseConnection() + else: + # no requests in queue, resume reading + self.setTimeout(self.betweenRequestsTimeOut) + self.resumeProducing() + + def setReadPersistent(self, persistent): + if self.readPersistent: + # only allow it to be set if it's not currently False + self.readPersistent = persistent + + def dropQueuedRequests(self): + """Called when a response is written that forces a connection close.""" + self.readPersistent = False + # Tell all requests but first to abort. + for request in self.requests[1:]: + request.connectionLost(None) + del self.requests[1:] + + def isLastRequest(self, request): + # Is this channel handling the last possible request + return not self.readPersistent and self.requests[-1] == request + + def requestWriteFinished(self, request): + """Called by first request in queue when it is done.""" + if request != self.requests[0]: raise TypeError + + # Don't del because we haven't finished cleanup, so, + # don't want queue len to be 0 yet. + self.requests[0] = None + + if self.readPersistent or len(self.requests) > 1: + # Do this in the next reactor loop so as to + # not cause huge call stacks with fast + # incoming requests. + self._callLater(0, self._startNextRequest) + else: + self.lingeringClose() + + def timeoutConnection(self): + #log.msg("Timing out client: %s" % str(self.transport.getPeer())) + policies.TimeoutMixin.timeoutConnection(self) + + def lingeringClose(self): + """ + This is a bit complicated. This process is necessary to ensure proper + workingness when HTTP pipelining is in use. + + Here is what it wants to do: + + 1. Finish writing any buffered data, then close our write side. + While doing so, read and discard any incoming data. + + 2. When that happens (writeConnectionLost called), wait up to 20 + seconds for the remote end to close their write side (our read + side). + + 3. + - If they do (readConnectionLost called), close the socket, + and cancel the timeout. + + - If that doesn't happen, the timer fires, and makes the + socket close anyways. + """ + + # Close write half + self.transport.loseWriteConnection() + + # Throw out any incoming data + self.dataReceived = self.lineReceived = lambda *args: None + self.transport.resumeProducing() + + def writeConnectionLost(self): + # Okay, all data has been written + # In 20 seconds, actually close the socket + self._lingerTimer = reactor.callLater(20, self._lingerClose) + self._writeLost = True + + def _lingerClose(self): + self._lingerTimer = None + self.transport.loseConnection() + + def readConnectionLost(self): + """Read connection lost""" + # If in the lingering-close state, lose the socket. + if self._lingerTimer: + self._lingerTimer.cancel() + self._lingerTimer = None + self.transport.loseConnection() + return + + # If between requests, drop connection + # when all current requests have written their data. + self._readLost = True + if not self.requests: + # No requests in progress, lose now. + self.transport.loseConnection() + + # If currently in the process of reading a request, this is + # probably a client abort, so lose the connection. + if self.chanRequest: + self.transport.loseConnection() + + def connectionLost(self, reason): + self.factory.outstandingRequests-=1 + + self._writeLost = True + self.readConnectionLost() + self.setTimeout(None) + + # Tell all requests to abort. + for request in self.requests: + if request is not None: + request.connectionLost(reason) + +class OverloadedServerProtocol(protocol.Protocol): + def connectionMade(self): + self.transport.write("HTTP/1.0 503 Service Unavailable\r\n" + "Content-Type: text/html\r\n" + "Connection: close\r\n\r\n" + "503 Service Unavailable" + "

Service Unavailable

" + "The server is currently overloaded, " + "please try again later.") + self.transport.loseConnection() + +class HTTPFactory(protocol.ServerFactory): + """Factory for HTTP server.""" + + protocol = HTTPChannel + + protocolArgs = None + + outstandingRequests = 0 + + def __init__(self, requestFactory, maxRequests=600, **kwargs): + self.maxRequests=maxRequests + self.protocolArgs = kwargs + self.protocolArgs['requestFactory']=requestFactory + + def buildProtocol(self, addr): + if self.outstandingRequests >= self.maxRequests: + return OverloadedServerProtocol() + + p = protocol.ServerFactory.buildProtocol(self, addr) + + for arg,value in self.protocolArgs.iteritems(): + setattr(p, arg, value) + return p + +__all__ = ['HTTPFactory', ] diff --git a/xcap/web/compat.py b/xcap/web/compat.py new file mode 100644 index 0000000..75bd95b --- /dev/null +++ b/xcap/web/compat.py @@ -0,0 +1,447 @@ +from __future__ import generators + +from urllib import quote, string + +import UserDict, math, time +from cStringIO import StringIO + +from xcap.web import http_headers, iweb, stream, responsecode +from twisted.internet import defer, address +from twisted.python import components +from twisted.spread import pb + +from zope.interface import implements + +class HeaderAdapter(UserDict.DictMixin): + def __init__(self, headers): + self._headers = headers + + def __getitem__(self, name): + raw = self._headers.getRawHeaders(name) + if raw is None: + raise KeyError(name) + return ', '.join(raw) + + def __setitem__(self, name, value): + self._headers.setRawHeaders([value]) + + def __delitem__(self, name): + if not self._headers.hasHeader(name): + raise KeyError(name) + self._headers.removeHeader(name) + + def iteritems(self): + for k,v in self._headers.getAllRawHeaders(): + yield k, ', '.join(v) + + def keys(self): + return [k for k, _ in self.iteritems()] + + def __iter__(self): + for k, _ in self.iteritems(): + yield k + + def has_key(self, name): + return self._headers.hasHeader(name) + +def makeOldRequestAdapter(original): + # Cache the adapter. Replace this with a more better generalized + # mechanism when one becomes available. + if not hasattr(original, '_oldRequest'): + original._oldRequest = OldRequestAdapter(original) + return original._oldRequest + +def _addressToTuple(addr): + if isinstance(addr, address.IPv4Address): + return ('INET', addr.host, addr.port) + elif isinstance(addr, address.UNIXAddress): + return ('UNIX', addr.name) + else: + return tuple(addr) + +class OldRequestAdapter(pb.Copyable, components.Componentized, object): + """Adapt old requests to new request + """ + implements(iweb.IOldRequest) + + def _getFrom(where, name): + def _get(self): + return getattr(getattr(self, where), name) + return property(_get) + + def _getsetFrom(where, name): + def _get(self): + return getattr(getattr(self, where), name) + def _set(self, new): + setattr(getattr(self, where), name, new) + def _del(self): + delattr(getattr(self, where), name) + return property(_get, _set, _del) + + def _getsetHeaders(where): + def _get(self): + headers = getattr(self, where).headers + return HeaderAdapter(headers) + + def _set(self, newheaders): + headers = http_headers.Headers() + for n,v in newheaders.items(): + headers.setRawHeaders(n, (v,)) + newheaders = headers + getattr(self, where).headers = newheaders + + return property(_get, _set) + + + code = _getsetFrom('response', 'code') + code_message = "" + + method = _getsetFrom('request', 'method') + uri = _getsetFrom('request', 'uri') + def _getClientproto(self): + return "HTTP/%d.%d" % self.request.clientproto + clientproto = property(_getClientproto) + + received_headers = _getsetHeaders('request') + headers = _getsetHeaders('response') + path = _getsetFrom('request', 'path') + + # cookies = # Do I need this? + # received_cookies = # Do I need this? + content = StringIO() #### FIXME + args = _getsetFrom('request', 'args') + # stack = # WTF is stack? + prepath = _getsetFrom('request', 'prepath') + postpath = _getsetFrom('request', 'postpath') + + def _getClient(self): + return "WTF" + client = property(_getClient) + + def _getHost(self): + return address.IPv4Address("TCP", self.request.host, self.request.port) + host = property(_getHost) + + def __init__(self, request): + from xcap.web import http + components.Componentized.__init__(self) + self.request = request + self.response = http.Response(stream=stream.ProducerStream()) + # This deferred will be fired by the first call to write on OldRequestAdapter + # and will cause the headers to be output. + self.deferredResponse = defer.Deferred() + + def getStateToCopyFor(self, issuer): + # This is for distrib compatibility + x = {} + + x['prepath'] = self.prepath + x['postpath'] = self.postpath + x['method'] = self.method + x['uri'] = self.uri + + x['clientproto'] = self.clientproto + self.content.seek(0, 0) + x['content_data'] = self.content.read() + x['remote'] = pb.ViewPoint(issuer, self) + + x['host'] = _addressToTuple(self.request.chanRequest.channel.transport.getHost()) + x['client'] = _addressToTuple(self.request.chanRequest.channel.transport.getPeer()) + + return x + + def getTypeToCopy(self): + # lie to PB so the ResourcePublisher doesn't have to know xcap.web + # exists which is good because xcap.web doesn't exist. + return 'twisted.web.server.Request' + + def registerProducer(self, producer, streaming): + self.response.stream.registerProducer(producer, streaming) + + def unregisterProducer(self): + self.response.stream.unregisterProducer() + + def finish(self): + if self.deferredResponse is not None: + d = self.deferredResponse + self.deferredResponse = None + d.callback(self.response) + self.response.stream.finish() + + def write(self, data): + if self.deferredResponse is not None: + d = self.deferredResponse + self.deferredResponse = None + d.callback(self.response) + self.response.stream.write(data) + + def getHeader(self, name): + raw = self.request.headers.getRawHeaders(name) + if raw is None: + return None + return ', '.join(raw) + + def setHeader(self, name, value): + """Set an outgoing HTTP header. + """ + self.response.headers.setRawHeaders(name, [value]) + + def setResponseCode(self, code, message=None): + # message ignored + self.response.code = code + + def setLastModified(self, when): + # Never returns CACHED -- can it and still be compliant? + when = long(math.ceil(when)) + self.response.headers.setHeader('last-modified', when) + return None + + def setETag(self, etag): + self.response.headers.setRawHeaders('etag', [etag]) + return None + + def getAllHeaders(self): + return dict(self.headers.iteritems()) + + def getRequestHostname(self): + return self.request.host + + + def getCookie(self, key): + for cookie in self.request.headers.getHeader('cookie', ()): + if cookie.name == key: + return cookie.value + + return None + + def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None): + if expires is None and max_age is not None: + expires=max_age-time.time() + cookie = http_headers.Cookie(k,v, expires=expires, domain=domain, path=path, comment=comment, secure=secure) + self.response.headers.setHeader('set-cookie', self.request.headers.getHeader('set-cookie', ())+(cookie,)) + + def notifyFinish(self): + ### FIXME + return None +# return self.request.notifyFinish() + + def getHost(self): + return self.host + + def setHost(self, host, port, ssl=0): + self.request.host = host + self.request.port = port + self.request.scheme = ssl and 'https' or 'http' + + def isSecure(self): + return self.request.scheme == 'https' + + def getClientIP(self): + if isinstance(self.request.chanRequest.getRemoteHost(), address.IPv4Address): + return self.client.host + else: + return None + return self.request.chanRequest.getRemoteHost() + return "127.0.0.1" + + def getClient(self): + return "127.0.0.1" + +### FIXME: + def getUser(self): + return "" + + def getPassword(self): + return "" + +# Identical to original methods -- hopefully these don't have to change + def sibLink(self, name): + "Return the text that links to a sibling of the requested resource." + if self.postpath: + return (len(self.postpath)*"../") + name + else: + return name + + def childLink(self, name): + "Return the text that links to a child of the requested resource." + lpp = len(self.postpath) + if lpp > 1: + return ((lpp-1)*"../") + name + elif lpp == 1: + return name + else: # lpp == 0 + if len(self.prepath) and self.prepath[-1]: + return self.prepath[-1] + '/' + name + else: + return name + + def redirect(self, url): + """Utility function that does a redirect. + + The request should have finish() called after this. + """ + self.setResponseCode(responsecode.FOUND) + self.setHeader("location", url) + + def prePathURL(self): + port = self.getHost().port + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostport = '' + else: + hostport = ':%d' % port + return quote('http%s://%s%s/%s' % ( + self.isSecure() and 's' or '', + self.getRequestHostname(), + hostport, + string.join(self.prepath, '/')), "/:") + +# def URLPath(self): +# from twisted.python import urlpath +# return urlpath.URLPath.fromRequest(self) + +# But nevow wants it to look like this... :( + def URLPath(self): + from nevow import url + return url.URL.fromContext(self) + + def rememberRootURL(self, url=None): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + if url is None: + url = self.prePathURL() + # remove one segment + self.appRootURL = url[:url.rindex("/")] + else: + self.appRootURL = url + + def getRootURL(self): + """ + Get a previously-remembered URL. + """ + return self.appRootURL + + + session = None + + def getSession(self, sessionInterface = None): + # Session management + if not self.session: + # FIXME: make sitepath be something + cookiename = string.join(['TWISTED_SESSION'] + self.sitepath, "_") + sessionCookie = self.getCookie(cookiename) + if sessionCookie: + try: + self.session = self.site.getSession(sessionCookie) + except KeyError: + pass + # if it still hasn't been set, fix it up. + if not self.session: + self.session = self.site.makeSession() + self.addCookie(cookiename, self.session.uid, path='/') + self.session.touch() + if sessionInterface: + return self.session.getComponent(sessionInterface) + return self.session + + +class OldNevowResourceAdapter(object): + implements(iweb.IResource) + + def __init__(self, original): + # Can't use self.__original= because of __setattr__. + self.__dict__['_OldNevowResourceAdapter__original']=original + + def __getattr__(self, name): + return getattr(self.__original, name) + + def __setattr__(self, name, value): + setattr(self.__original, name, value) + + def __delattr__(self, name): + delattr(self.__original, name) + + def locateChild(self, ctx, segments): + from xcap.web.server import parsePOSTData + request = iweb.IRequest(ctx) + if request.method == "POST": + return parsePOSTData(request).addCallback( + lambda x: self.__original.locateChild(ctx, segments)) + return self.__original.locateChild(ctx, segments) + + def renderHTTP(self, ctx): + from xcap.web.server import parsePOSTData + request = iweb.IRequest(ctx) + if request.method == "POST": + return parsePOSTData(request).addCallback(self.__reallyRender, ctx) + return self.__reallyRender(None, ctx) + + def __reallyRender(self, ignored, ctx): + # This deferred will be called when our resource is _finished_ + # writing, and will make sure we write the rest of our data + # and finish the connection. + defer.maybeDeferred(self.__original.renderHTTP, ctx).addCallback(self.__finish, ctx) + + # Sometimes the __original.renderHTTP will write() before we + # even get this far, and we don't want to return + # oldRequest.deferred if it's already been set to None. + oldRequest = iweb.IOldRequest(ctx) + if oldRequest.deferredResponse is None: + return oldRequest.response + return oldRequest.deferredResponse + + def __finish(self, data, ctx): + oldRequest = iweb.IOldRequest(ctx) + oldRequest.write(data) + oldRequest.finish() + + +class OldResourceAdapter(object): + implements(iweb.IOldNevowResource) + + def __init__(self, original): + self.original = original + + def __repr__(self): + return "<%s @ 0x%x adapting %r>" % (self.__class__.__name__, id(self), self.original) + + def locateChild(self, req, segments): + import server + request = iweb.IOldRequest(req) + if self.original.isLeaf: + return self, server.StopTraversal + name = segments[0] + if name == '': + res = self + else: + request.prepath.append(request.postpath.pop(0)) + res = self.original.getChildWithDefault(name, request) + request.postpath.insert(0, request.prepath.pop()) + + if isinstance(res, defer.Deferred): + return res.addCallback(lambda res: (res, segments[1:])) + + return res, segments[1:] + + def _handle_NOT_DONE_YET(self, data, request): + from twisted.web.server import NOT_DONE_YET + if data == NOT_DONE_YET: + # Return a deferred that will never fire, so the finish + # callback doesn't happen. This is because, when returning + # NOT_DONE_YET, the page is responsible for calling finish. + return defer.Deferred() + else: + return data + + def renderHTTP(self, req): + request = iweb.IOldRequest(req) + result = defer.maybeDeferred(self.original.render, request).addCallback( + self._handle_NOT_DONE_YET, request) + return result + +__all__ = [] diff --git a/xcap/web/dirlist.py b/xcap/web/dirlist.py new file mode 100644 index 0000000..0aff7be --- /dev/null +++ b/xcap/web/dirlist.py @@ -0,0 +1,119 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + +"""Directory listing.""" + +# system imports +import os +import urllib +import stat +import time + +# twisted imports +from xcap.web import iweb, resource, http, http_headers + +def formatFileSize(size): + if size < 1024: + return '%i' % size + elif size < (1024**2): + return '%iK' % (size / 1024) + elif size < (1024**3): + return '%iM' % (size / (1024**2)) + else: + return '%iG' % (size / (1024**3)) + +class DirectoryLister(resource.Resource): + def __init__(self, pathname, dirs=None, + contentTypes={}, + contentEncodings={}, + defaultType='text/html'): + self.contentTypes = contentTypes + self.contentEncodings = contentEncodings + self.defaultType = defaultType + # dirs allows usage of the File to specify what gets listed + self.dirs = dirs + self.path = pathname + resource.Resource.__init__(self) + + def data_listing(self, request, data): + if self.dirs is None: + directory = os.listdir(self.path) + directory.sort() + else: + directory = self.dirs + + files = [] + + for path in directory: + url = urllib.quote(path, '/') + fullpath = os.path.join(self.path, path) + try: + st = os.stat(fullpath) + except OSError: + continue + if stat.S_ISDIR(st.st_mode): + url = url + '/' + files.append({ + 'link': url, + 'linktext': path + "/", + 'size': '', + 'type': '-', + 'lastmod': time.strftime("%Y-%b-%d %H:%M", time.localtime(st.st_mtime)) + }) + else: + from xcap.web.static import getTypeAndEncoding + mimetype, encoding = getTypeAndEncoding( + path, + self.contentTypes, self.contentEncodings, self.defaultType) + + filesize = st.st_size + files.append({ + 'link': url, + 'linktext': path, + 'size': formatFileSize(filesize), + 'type': mimetype, + 'lastmod': time.strftime("%Y-%b-%d %H:%M", time.localtime(st.st_mtime)) + }) + + return files + + def __repr__(self): + return '' % self.path + + __str__ = __repr__ + + + def render(self, request): + title = "Directory listing for %s" % urllib.unquote(request.path) + + s= """%s

%s

""" % (title,title) + s+="" + s+="" + even = False + for row in self.data_listing(request, None): + s+='' % (even and 'even' or 'odd',) + s+='' % row + even = not even + + s+="
FilenameSizeLast ModifiedFile Type
%(linktext)s%(size)s%(lastmod)s%(type)s
" + response = http.Response(200, {}, s) + response.headers.setHeader("content-type", http_headers.MimeType('text', 'html')) + return response + +__all__ = ['DirectoryLister'] diff --git a/xcap/web/error.py b/xcap/web/error.py new file mode 100644 index 0000000..2a9b41b --- /dev/null +++ b/xcap/web/error.py @@ -0,0 +1,102 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Default error output filter for xcap.web. +""" + +from xcap.web import stream, http_headers +from xcap.web.responsecode import * + +# 300 - Should include entity with choices +# 301 - +# 304 - Must include Date, ETag, Content-Location, Expires, Cache-Control, Vary. +# +# 401 - Must include WWW-Authenticate. +# 405 - Must include Allow. +# 406 - Should include entity describing allowable characteristics +# 407 - Must include Proxy-Authenticate +# 413 - May include Retry-After +# 416 - Should include Content-Range +# 503 - Should include Retry-After + +ERROR_MESSAGES = { + # 300 + # no MULTIPLE_CHOICES + MOVED_PERMANENTLY: 'The document has permanently moved
here.', + FOUND: 'The document has temporarily moved here.', + SEE_OTHER: 'The results are available here.', + # no NOT_MODIFIED + USE_PROXY: "Access to this resource must be through the proxy %(location)s.", + # 306 unused + TEMPORARY_REDIRECT: 'The document has temporarily moved here.', + + # 400 + BAD_REQUEST: "Your browser sent an invalid request.", + UNAUTHORIZED: "You are not authorized to view the resource at %(uri)s. Perhaps you entered a wrong password, or perhaps your browser doesn't support authentication.", + PAYMENT_REQUIRED: "Payment Required (useful result code, this...).", + FORBIDDEN: "You don't have permission to access %(uri)s.", + NOT_FOUND: "The resource %(uri)s cannot be found.", + NOT_ALLOWED: "The requested method %(method)s is not supported by %(uri)s.", + NOT_ACCEPTABLE: "No representation of %(uri)s that is acceptable to your client could be found.", + PROXY_AUTH_REQUIRED: "You are not authorized to view the resource at %(uri)s. Perhaps you entered a wrong password, or perhaps your browser doesn't support authentication.", + REQUEST_TIMEOUT: "Server timed out waiting for your client to finish sending the HTTP request.", + CONFLICT: "Conflict (?)", + GONE: "The resource %(uri)s has been permanently removed.", + LENGTH_REQUIRED: "The resource %(uri)s requires a Content-Length header.", + PRECONDITION_FAILED: "A precondition evaluated to false.", + REQUEST_ENTITY_TOO_LARGE: "The provided request entity data is too longer than the maximum for the method %(method)s at %(uri)s.", + REQUEST_URI_TOO_LONG: "The request URL is longer than the maximum on this server.", + UNSUPPORTED_MEDIA_TYPE: "The provided request data has a format not understood by the resource at %(uri)s.", + REQUESTED_RANGE_NOT_SATISFIABLE: "None of the ranges given in the Range request header are satisfiable by the resource %(uri)s.", + EXPECTATION_FAILED: "The server does support one of the expectations given in the Expect header.", + + # 500 + INTERNAL_SERVER_ERROR: "An internal error occurred trying to process your request. Sorry.", + NOT_IMPLEMENTED: "Some functionality requested is not implemented on this server.", + BAD_GATEWAY: "An upstream server returned an invalid response.", + SERVICE_UNAVAILABLE: "This server cannot service your request becaues it is overloaded.", + GATEWAY_TIMEOUT: "An upstream server is not responding.", + HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported.", + INSUFFICIENT_STORAGE_SPACE: "There is insufficient storage space available to perform that request.", + NOT_EXTENDED: "This server does not support the a mandatory extension requested." +} + +# Is there a good place to keep this function? +def _escape(original): + if original is None: + return None + return original.replace("&", "&").replace("<", "<").replace(">", ">").replace("\"", """) + +def defaultErrorHandler(request, response): + if response.stream is not None: + # Already got an error message + return response + if response.code < 300: + # We only do error messages + return response + + message = ERROR_MESSAGES.get(response.code, None) + if message is None: + # No message specified for that code + return response + + message = message % { + 'uri':_escape(request.uri), + 'location':_escape(response.headers.getHeader('location')), + 'method':_escape(request.method) + } + + title = RESPONSES.get(response.code, "") + body = ("%d %s" + "

%s

%s") % ( + response.code, title, title, message) + + response.headers.setHeader("content-type", http_headers.MimeType('text', 'html')) + response.stream = stream.MemoryStream(body) + + return response +defaultErrorHandler.handleErrors = True + + +__all__ = ['defaultErrorHandler',] diff --git a/xcap/web/fileupload.py b/xcap/web/fileupload.py new file mode 100644 index 0000000..0e7e794 --- /dev/null +++ b/xcap/web/fileupload.py @@ -0,0 +1,374 @@ +from __future__ import generators + +import re +from zope.interface import implements +import urllib +import tempfile + +from twisted.internet import defer +from xcap.web.stream import IStream, FileStream, BufferedStream, readStream +from xcap.web.stream import generatorToStream, readAndDiscard +from xcap.web import http_headers +from cStringIO import StringIO + +################################### +##### Multipart MIME Reader ##### +################################### + +class MimeFormatError(Exception): + pass + +# parseContentDispositionFormData is absolutely horrible, but as +# browsers don't seem to believe in sensible quoting rules, it's +# really the only way to handle the header. (Quotes can be in the +# filename, unescaped) +cd_regexp = re.compile( + ' *form-data; *name="([^"]*)"(?:; *filename="(.*)")?$', + re.IGNORECASE) + +def parseContentDispositionFormData(value): + match = cd_regexp.match(value) + if not match: + # Error parsing. + raise ValueError("Unknown content-disposition format.") + name=match.group(1) + filename=match.group(2) + return name, filename + + +#@defer.deferredGenerator +def _readHeaders(stream): + """Read the MIME headers. Assumes we've just finished reading in the + boundary string.""" + + ctype = fieldname = filename = None + headers = [] + + # Now read headers + while 1: + line = stream.readline(size=1024) + if isinstance(line, defer.Deferred): + line = defer.waitForDeferred(line) + yield line + line = line.getResult() + #print "GOT", line + if not line.endswith('\r\n'): + if line == "": + raise MimeFormatError("Unexpected end of stream.") + else: + raise MimeFormatError("Header line too long") + + line = line[:-2] # strip \r\n + if line == "": + break # End of headers + + parts = line.split(':', 1) + if len(parts) != 2: + raise MimeFormatError("Header did not have a :") + name, value = parts + name = name.lower() + headers.append((name, value)) + + if name == "content-type": + ctype = http_headers.parseContentType(http_headers.tokenize((value,), foldCase=False)) + elif name == "content-disposition": + fieldname, filename = parseContentDispositionFormData(value) + + if ctype is None: + ctype == http_headers.MimeType('application', 'octet-stream') + if fieldname is None: + raise MimeFormatError('Content-disposition invalid or omitted.') + + # End of headers, return (field name, content-type, filename) + yield fieldname, filename, ctype + return +_readHeaders = defer.deferredGenerator(_readHeaders) + + +class _BoundaryWatchingStream(object): + def __init__(self, stream, boundary): + self.stream = stream + self.boundary = boundary + self.data = '' + self.deferred = defer.Deferred() + + length = None # unknown + def read(self): + if self.stream is None: + if self.deferred is not None: + deferred = self.deferred + self.deferred = None + deferred.callback(None) + return None + newdata = self.stream.read() + if isinstance(newdata, defer.Deferred): + return newdata.addCallbacks(self._gotRead, self._gotError) + return self._gotRead(newdata) + + def _gotRead(self, newdata): + if not newdata: + raise MimeFormatError("Unexpected EOF") + # BLECH, converting buffer back into string. + self.data += str(newdata) + data = self.data + boundary = self.boundary + off = data.find(boundary) + + if off == -1: + # No full boundary, check for the first character + off = data.rfind(boundary[0], max(0, len(data)-len(boundary))) + if off != -1: + # We could have a partial boundary, store it for next time + self.data = data[off:] + return data[:off] + else: + self.data = '' + return data + else: + self.stream.pushback(data[off+len(boundary):]) + self.stream = None + return data[:off] + + def _gotError(self, err): + # Propogate error back to MultipartMimeStream also + if self.deferred is not None: + deferred = self.deferred + self.deferred = None + deferred.errback(err) + return err + + def close(self): + # Assume error will be raised again and handled by MMS? + readAndDiscard(self).addErrback(lambda _: None) + +class MultipartMimeStream(object): + implements(IStream) + def __init__(self, stream, boundary): + self.stream = BufferedStream(stream) + self.boundary = "--"+boundary + self.first = True + + def read(self): + """ + Return a deferred which will fire with a tuple of: + (fieldname, filename, ctype, dataStream) + or None when all done. + + Format errors will be sent to the errback. + + Returns None when all done. + + IMPORTANT: you *must* exhaust dataStream returned by this call + before calling .read() again! + """ + if self.first: + self.first = False + d = self._readFirstBoundary() + else: + d = self._readBoundaryLine() + d.addCallback(self._doReadHeaders) + d.addCallback(self._gotHeaders) + return d + + def _readFirstBoundary(self): + #print "_readFirstBoundary" + line = self.stream.readline(size=1024) + if isinstance(line, defer.Deferred): + line = defer.waitForDeferred(line) + yield line + line = line.getResult() + if line != self.boundary + '\r\n': + raise MimeFormatError("Extra data before first boundary: %r looking for: %r" % (line, self.boundary + '\r\n')) + + self.boundary = "\r\n"+self.boundary + yield True + return + _readFirstBoundary = defer.deferredGenerator(_readFirstBoundary) + + def _readBoundaryLine(self): + #print "_readBoundaryLine" + line = self.stream.readline(size=1024) + if isinstance(line, defer.Deferred): + line = defer.waitForDeferred(line) + yield line + line = line.getResult() + + if line == "--\r\n": + # THE END! + yield False + return + elif line != "\r\n": + raise MimeFormatError("Unexpected data on same line as boundary: %r" % (line,)) + yield True + return + _readBoundaryLine = defer.deferredGenerator(_readBoundaryLine) + + def _doReadHeaders(self, morefields): + #print "_doReadHeaders", morefields + if not morefields: + return None + return _readHeaders(self.stream) + + def _gotHeaders(self, headers): + if headers is None: + return None + bws = _BoundaryWatchingStream(self.stream, self.boundary) + self.deferred = bws.deferred + ret=list(headers) + ret.append(bws) + return tuple(ret) + + +def readIntoFile(stream, outFile, maxlen): + """Read the stream into a file, but not if it's longer than maxlen. + Returns Deferred which will be triggered on finish. + """ + curlen = [0] + def done(_): + return _ + def write(data): + curlen[0] += len(data) + if curlen[0] > maxlen: + raise MimeFormatError("Maximum length of %d bytes exceeded." % + maxlen) + + outFile.write(data) + return readStream(stream, write).addBoth(done) + +#@defer.deferredGenerator +def parseMultipartFormData(stream, boundary, + maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024): + # If the stream length is known to be too large upfront, abort immediately + + if stream.length is not None and stream.length > maxSize: + raise MimeFormatError("Maximum length of %d bytes exceeded." % + maxSize) + + mms = MultipartMimeStream(stream, boundary) + numFields = 0 + args = {} + files = {} + + while 1: + datas = mms.read() + if isinstance(datas, defer.Deferred): + datas = defer.waitForDeferred(datas) + yield datas + datas = datas.getResult() + if datas is None: + break + + numFields+=1 + if numFields == maxFields: + raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields) + + # Parse data + fieldname, filename, ctype, stream = datas + if filename is None: + # Not a file + outfile = StringIO() + maxBuf = min(maxSize, maxMem) + else: + outfile = tempfile.NamedTemporaryFile() + maxBuf = maxSize + x = readIntoFile(stream, outfile, maxBuf) + if isinstance(x, defer.Deferred): + x = defer.waitForDeferred(x) + yield x + x = x.getResult() + if filename is None: + # Is a normal form field + outfile.seek(0) + data = outfile.read() + args.setdefault(fieldname, []).append(data) + maxMem -= len(data) + maxSize -= len(data) + else: + # Is a file upload + maxSize -= outfile.tell() + outfile.seek(0) + files.setdefault(fieldname, []).append((filename, ctype, outfile)) + + + yield args, files + return +parseMultipartFormData = defer.deferredGenerator(parseMultipartFormData) + +################################### +##### x-www-urlencoded reader ##### +################################### + + +def parse_urlencoded_stream(input, maxMem=100*1024, + keep_blank_values=False, strict_parsing=False): + lastdata = '' + still_going=1 + + while still_going: + try: + yield input.wait + data = input.next() + except StopIteration: + pairs = [lastdata] + still_going=0 + else: + maxMem -= len(data) + if maxMem < 0: + raise MimeFormatError("Maximum length of %d bytes exceeded." % + maxMem) + pairs = str(data).split('&') + pairs[0] = lastdata + pairs[0] + lastdata=pairs.pop() + + for name_value in pairs: + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise MimeFormatError("bad query field: %s") % `name_value` + continue + if len(nv[1]) or keep_blank_values: + name = urllib.unquote(nv[0].replace('+', ' ')) + value = urllib.unquote(nv[1].replace('+', ' ')) + yield name, value +parse_urlencoded_stream = generatorToStream(parse_urlencoded_stream) + +def parse_urlencoded(stream, maxMem=100*1024, maxFields=1024, + keep_blank_values=False, strict_parsing=False): + d = {} + numFields = 0 + + s=parse_urlencoded_stream(stream, maxMem, keep_blank_values, strict_parsing) + + while 1: + datas = s.read() + if isinstance(datas, defer.Deferred): + datas = defer.waitForDeferred(datas) + yield datas + datas = datas.getResult() + if datas is None: + break + name, value = datas + + numFields += 1 + if numFields == maxFields: + raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields) + + if name in d: + d[name].append(value) + else: + d[name] = [value] + yield d + return +parse_urlencoded = defer.deferredGenerator(parse_urlencoded) + + +if __name__ == '__main__': + d = parseMultipartFormData( + FileStream(open("upload.txt")), "----------0xKhTmLbOuNdArY") + from twisted.python import log + d.addErrback(log.err) + def pr(s): + print s + d.addCallback(pr) + +__all__ = ['parseMultipartFormData', 'parse_urlencoded', 'parse_urlencoded_stream', 'MultipartMimeStream', 'MimeFormatError'] diff --git a/xcap/web/filter/__init__.py b/xcap/web/filter/__init__.py new file mode 100644 index 0000000..b13fbdd --- /dev/null +++ b/xcap/web/filter/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Output filters. +""" diff --git a/xcap/web/filter/gzip.py b/xcap/web/filter/gzip.py new file mode 100644 index 0000000..566f514 --- /dev/null +++ b/xcap/web/filter/gzip.py @@ -0,0 +1,79 @@ +from __future__ import generators +import struct +import zlib +from xcap.web import stream + +# TODO: ungzip (can any browsers actually generate gzipped +# upload data?) But it's necessary for client anyways. + +def gzipStream(input, compressLevel=6): + crc, size = zlib.crc32(''), 0 + # magic header, compression method, no flags + header = '\037\213\010\000' + # timestamp + header += struct.pack('= size: + end = size - 1 + + if start >= size: + raise UnsatisfiableRangeRequest + + return start,end + +def makeUnsatisfiable(request, oldresponse): + if request.headers.hasHeader('if-range'): + return oldresponse # Return resource instead of error + response = http.Response(responsecode.REQUESTED_RANGE_NOT_SATISFIABLE) + response.headers.setHeader("content-range", ('bytes', None, None, oldresponse.stream.length)) + return response + +def makeSegment(inputStream, lastOffset, start, end): + offset = start - lastOffset + length = end + 1 - start + + if offset != 0: + before, inputStream = inputStream.split(offset) + before.close() + return inputStream.split(length) + +def rangefilter(request, oldresponse): + if oldresponse.stream is None: + return oldresponse + size = oldresponse.stream.length + if size is None: + # Does not deal with indeterminate length outputs + return oldresponse + + oldresponse.headers.setHeader('accept-ranges',('bytes',)) + + rangespec = request.headers.getHeader('range') + + # If we've got a range header and the If-Range header check passes, and + # the range type is bytes, do a partial response. + if (rangespec is not None and http.checkIfRange(request, oldresponse) and + rangespec[0] == 'bytes'): + # If it's a single range, return a simple response + if len(rangespec[1]) == 1: + try: + start,end = canonicalizeRange(rangespec[1][0], size) + except UnsatisfiableRangeRequest: + return makeUnsatisfiable(request, oldresponse) + + response = http.Response(responsecode.PARTIAL_CONTENT, oldresponse.headers) + response.headers.setHeader('content-range',('bytes',start, end, size)) + + content, after = makeSegment(oldresponse.stream, 0, start, end) + after.close() + response.stream = content + return response + else: + # Return a multipart/byteranges response + lastOffset = -1 + offsetList = [] + for arange in rangespec[1]: + try: + start,end = canonicalizeRange(arange, size) + except UnsatisfiableRangeRequest: + continue + if start <= lastOffset: + # Stupid client asking for out-of-order or overlapping ranges, PUNT! + return oldresponse + offsetList.append((start,end)) + lastOffset = end + + if not offsetList: + return makeUnsatisfiable(request, oldresponse) + + content_type = oldresponse.headers.getRawHeaders('content-type') + boundary = "%x%x" % (int(time.time()*1000000), os.getpid()) + response = http.Response(responsecode.PARTIAL_CONTENT, oldresponse.headers) + + response.headers.setHeader('content-type', + http_headers.MimeType('multipart', 'byteranges', + [('boundary', boundary)])) + response.stream = out = stream.CompoundStream() + + + lastOffset = 0 + origStream = oldresponse.stream + + headerString = "\r\n--%s" % boundary + if len(content_type) == 1: + headerString+='\r\nContent-Type: %s' % content_type[0] + headerString+="\r\nContent-Range: %s\r\n\r\n" + + for start,end in offsetList: + out.addStream(headerString % + http_headers.generateContentRange(('bytes', start, end, size))) + + content, origStream = makeSegment(origStream, lastOffset, start, end) + lastOffset = end + 1 + out.addStream(content) + origStream.close() + out.addStream("\r\n--%s--\r\n" % boundary) + return response + else: + return oldresponse + + +__all__ = ['rangefilter'] diff --git a/xcap/web/http.py b/xcap/web/http.py new file mode 100644 index 0000000..0eef031 --- /dev/null +++ b/xcap/web/http.py @@ -0,0 +1,473 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + +"""HyperText Transfer Protocol implementation. + +The second coming. + +Maintainer: U{James Y Knight } + +""" +# import traceback; log.msg(''.join(traceback.format_stack())) + +# system imports +import socket +import time +import cgi + +# twisted imports +from twisted.internet import interfaces, error +from twisted.python import log, components +from zope.interface import implements + +# sibling imports +from xcap.web import responsecode +from xcap.web import http_headers +from xcap.web import iweb +from xcap.web import stream +from xcap.web.stream import IByteStream + +defaultPortForScheme = {'http': 80, 'https':443, 'ftp':21} + +def splitHostPort(scheme, hostport): + """Split the host in "host:port" format into host and port fields. + If port was not specified, use the default for the given scheme, if + known. Returns a tuple of (hostname, portnumber).""" + + # Split hostport into host and port + hostport = hostport.split(':', 1) + try: + if len(hostport) == 2: + return hostport[0], int(hostport[1]) + except ValueError: + pass + return hostport[0], defaultPortForScheme.get(scheme, 0) + + +def parseVersion(strversion): + """Parse version strings of the form Protocol '/' Major '.' Minor. E.g. 'HTTP/1.1'. + Returns (protocol, major, minor). + Will raise ValueError on bad syntax.""" + + proto, strversion = strversion.split('/') + major, minor = strversion.split('.') + major, minor = int(major), int(minor) + if major < 0 or minor < 0: + raise ValueError("negative number") + return (proto.lower(), major, minor) + + +class HTTPError(Exception): + def __init__(self, codeOrResponse): + """An Exception for propagating HTTP Error Responses. + + @param codeOrResponse: The numeric HTTP code or a complete http.Response + object. + @type codeOrResponse: C{int} or L{http.Response} + """ + Exception.__init__(self) + self.response = iweb.IResponse(codeOrResponse) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, self.response) + + +class Response(object): + """An object representing an HTTP Response to be sent to the client. + """ + implements(iweb.IResponse) + + code = responsecode.OK + headers = None + stream = None + + def __init__(self, code=None, headers=None, stream=None): + """ + @param code: The HTTP status code for this Response + @type code: C{int} + + @param headers: Headers to be sent to the client. + @type headers: C{dict}, L{xcap.web.http_headers.Headers}, or + C{None} + + @param stream: Content body to send to the HTTP client + @type stream: L{xcap.web.stream.IByteStream} + """ + + if code is not None: + self.code = int(code) + + if headers is not None: + if isinstance(headers, dict): + headers = http_headers.Headers(headers) + self.headers=headers + else: + self.headers = http_headers.Headers() + + if stream is not None: + self.stream = IByteStream(stream) + + def __repr__(self): + if self.stream is None: + streamlen = None + else: + streamlen = self.stream.length + + return "<%s.%s code=%d, streamlen=%s>" % (self.__module__, self.__class__.__name__, self.code, streamlen) + + +class StatusResponse (Response): + """ + A L{Response} object which simply contains a status code and a description of + what happened. + """ + def __init__(self, code, description, title=None): + """ + @param code: a response code in L{responsecode.RESPONSES}. + @param description: a string description. + @param title: the message title. If not specified or C{None}, defaults + to C{responsecode.RESPONSES[code]}. + """ + if title is None: + title = cgi.escape(responsecode.RESPONSES[code]) + + output = "".join(( + "", + "", + "%s" % (title,), + "", + "", + "

%s

" % (title,), + "

%s

" % (cgi.escape(description),), + "", + "", + )) + + if type(output) == unicode: + output = output.encode("utf-8") + mime_params = {"charset": "utf-8"} + else: + mime_params = {} + + super(StatusResponse, self).__init__(code=code, stream=output) + + self.headers.setHeader("content-type", http_headers.MimeType("text", "html", mime_params)) + + self.description = description + + def __repr__(self): + return "<%s %s %s>" % (self.__class__.__name__, self.code, self.description) + + +class RedirectResponse (StatusResponse): + """ + A L{Response} object that contains a redirect to another network location. + """ + def __init__(self, location): + """ + @param location: the URI to redirect to. + """ + super(RedirectResponse, self).__init__( + responsecode.MOVED_PERMANENTLY, + "Document moved to %s." % (location,) + ) + + self.headers.setHeader("location", location) + + +def NotModifiedResponse(oldResponse=None): + if oldResponse is not None: + headers=http_headers.Headers() + for header in ( + # Required from sec 10.3.5: + 'date', 'etag', 'content-location', 'expires', + 'cache-control', 'vary', + # Others: + 'server', 'proxy-authenticate', 'www-authenticate', 'warning'): + value = oldResponse.headers.getRawHeaders(header) + if value is not None: + headers.setRawHeaders(header, value) + else: + headers = None + return Response(code=responsecode.NOT_MODIFIED, headers=headers) + + +def checkPreconditions(request, response=None, entityExists=True, etag=None, lastModified=None): + """Check to see if this request passes the conditional checks specified + by the client. May raise an HTTPError with result codes L{NOT_MODIFIED} + or L{PRECONDITION_FAILED}, as appropriate. + + This function is called automatically as an output filter for GET and + HEAD requests. With GET/HEAD, it is not important for the precondition + check to occur before doing the action, as the method is non-destructive. + + However, if you are implementing other request methods, like PUT + for your resource, you will need to call this after determining + the etag and last-modified time of the existing resource but + before actually doing the requested action. In that case, + + This examines the appropriate request headers for conditionals, + (If-Modified-Since, If-Unmodified-Since, If-Match, If-None-Match, + or If-Range), compares with the etag and last and + and then sets the response code as necessary. + + @param response: This should be provided for GET/HEAD methods. If + it is specified, the etag and lastModified arguments will + be retrieved automatically from the response headers and + shouldn't be separately specified. Not providing the + response with a GET request may cause the emitted + "Not Modified" responses to be non-conformant. + + @param entityExists: Set to False if the entity in question doesn't + yet exist. Necessary for PUT support with 'If-None-Match: *'. + + @param etag: The etag of the resource to check against, or None. + + @param lastModified: The last modified date of the resource to check + against, or None. + + @raise: HTTPError: Raised when the preconditions fail, in order to + abort processing and emit an error page. + + """ + if response: + assert etag is None and lastModified is None + # if the code is some sort of error code, don't do anything + if not ((response.code >= 200 and response.code <= 299) + or response.code == responsecode.PRECONDITION_FAILED): + return False + etag = response.headers.getHeader("etag") + lastModified = response.headers.getHeader("last-modified") + + def matchETag(tags, allowWeak): + if entityExists and '*' in tags: + return True + if etag is None: + return False + return ((allowWeak or not etag.weak) and + ([etagmatch for etagmatch in tags if etag.match(etagmatch, strongCompare=not allowWeak)])) + + # First check if-match/if-unmodified-since + # If either one fails, we return PRECONDITION_FAILED + match = request.headers.getHeader("if-match") + if match: + if not matchETag(match, False): + raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource does not have a matching ETag.")) + + unmod_since = request.headers.getHeader("if-unmodified-since") + if unmod_since: + if not lastModified or lastModified > unmod_since: + raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has changed.")) + + # Now check if-none-match/if-modified-since. + # This bit is tricky, because of the requirements when both IMS and INM + # are present. In that case, you can't return a failure code + # unless *both* checks think it failed. + # Also, if the INM check succeeds, ignore IMS, because INM is treated + # as more reliable. + + # I hope I got the logic right here...the RFC is quite poorly written + # in this area. Someone might want to verify the testcase against + # RFC wording. + + # If IMS header is later than current time, ignore it. + notModified = None + ims = request.headers.getHeader('if-modified-since') + if ims: + notModified = (ims < time.time() and lastModified and lastModified <= ims) + + inm = request.headers.getHeader("if-none-match") + if inm: + if request.method in ("HEAD", "GET"): + # If it's a range request, don't allow a weak ETag, as that + # would break. + canBeWeak = not request.headers.hasHeader('Range') + if notModified != False and matchETag(inm, canBeWeak): + raise HTTPError(NotModifiedResponse(response)) + else: + if notModified != False and matchETag(inm, False): + raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has a matching ETag.")) + else: + if notModified == True: + if request.method in ("HEAD", "GET"): + raise HTTPError(NotModifiedResponse(response)) + else: + # S14.25 doesn't actually say what to do for a failing IMS on + # non-GET methods. But Precondition Failed makes sense to me. + raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has not changed.")) + +def checkIfRange(request, response): + """Checks for the If-Range header, and if it exists, checks if the + test passes. Returns true if the server should return partial data.""" + + ifrange = request.headers.getHeader("if-range") + + if ifrange is None: + return True + if isinstance(ifrange, http_headers.ETag): + return ifrange.match(response.headers.getHeader("etag"), strongCompare=True) + else: + return ifrange == response.headers.getHeader("last-modified") + + +class _NotifyingProducerStream(stream.ProducerStream): + doStartReading = None + + def __init__(self, length=None, doStartReading=None): + stream.ProducerStream.__init__(self, length=length) + self.doStartReading = doStartReading + + def read(self): + if self.doStartReading is not None: + doStartReading = self.doStartReading + self.doStartReading = None + doStartReading() + + return stream.ProducerStream.read(self) + + def write(self, data): + self.doStartReading = None + stream.ProducerStream.write(self, data) + + def finish(self): + self.doStartReading = None + stream.ProducerStream.finish(self) + + +# response codes that must have empty bodies +NO_BODY_CODES = (responsecode.NO_CONTENT, responsecode.NOT_MODIFIED) + +class Request(object): + """A HTTP request. + + Subclasses should override the process() method to determine how + the request will be processed. + + @ivar method: The HTTP method that was used. + @ivar uri: The full URI that was requested (includes arguments). + @ivar headers: All received headers + @ivar clientproto: client HTTP version + @ivar stream: incoming data stream. + """ + + implements(iweb.IRequest, interfaces.IConsumer) + + known_expects = ('100-continue',) + + def __init__(self, chanRequest, command, path, version, contentLength, headers): + """ + @param chanRequest: the channel request we're associated with. + """ + self.chanRequest = chanRequest + self.method = command + self.uri = path + self.clientproto = version + + self.headers = headers + + if '100-continue' in self.headers.getHeader('expect', ()): + doStartReading = self._sendContinue + else: + doStartReading = None + self.stream = _NotifyingProducerStream(contentLength, doStartReading) + self.stream.registerProducer(self.chanRequest, True) + + def checkExpect(self): + """Ensure there are no expectations that cannot be met. + Checks Expect header against self.known_expects.""" + expects = self.headers.getHeader('expect', ()) + for expect in expects: + if expect not in self.known_expects: + raise HTTPError(responsecode.EXPECTATION_FAILED) + + def process(self): + """Called by channel to let you process the request. + + Can be overridden by a subclass to do something useful.""" + pass + + def handleContentChunk(self, data): + """Callback from channel when a piece of data has been received. + Puts the data in .stream""" + self.stream.write(data) + + def handleContentComplete(self): + """Callback from channel when all data has been received. """ + self.stream.unregisterProducer() + self.stream.finish() + + def connectionLost(self, reason): + """connection was lost""" + pass + + def __repr__(self): + return '<%s %s %s>'% (self.method, self.uri, self.clientproto) + + def _sendContinue(self): + self.chanRequest.writeIntermediateResponse(responsecode.CONTINUE) + + def _finished(self, x): + """We are finished writing data.""" + self.chanRequest.finish() + + def _error(self, reason): + if reason.check(error.ConnectionLost): + log.msg("Request error: " + reason.getErrorMessage()) + else: + log.err(reason) + # Only bother with cleanup on errors other than lost connection. + self.chanRequest.abortConnection() + + def writeResponse(self, response): + """ + Write a response. + """ + if self.stream.doStartReading is not None: + # Expect: 100-continue was requested, but 100 response has not been + # sent, and there's a possibility that data is still waiting to be + # sent. + # + # Ideally this means the remote side will not send any data. + # However, because of compatibility requirements, it might timeout, + # and decide to do so anyways at the same time we're sending back + # this response. Thus, the read state is unknown after this. + # We must close the connection. + self.chanRequest.channel.setReadPersistent(False) + # Nothing more will be read + self.chanRequest.allContentReceived() + + if response.code != responsecode.NOT_MODIFIED: + # Not modified response is *special* and doesn't get a content-length. + if response.stream is None: + response.headers.setHeader('content-length', 0) + elif response.stream.length is not None: + response.headers.setHeader('content-length', response.stream.length) + self.chanRequest.writeHeaders(response.code, response.headers) + + # if this is a "HEAD" request, or a special response code, + # don't return any data. + if self.method == "HEAD" or response.code in NO_BODY_CODES: + if response.stream is not None: + response.stream.close() + self._finished(None) + return + + d = stream.StreamProducer(response.stream).beginProducing(self.chanRequest) + d.addCallback(self._finished).addErrback(self._error) + + +from xcap.web import compat +components.registerAdapter(compat.makeOldRequestAdapter, iweb.IRequest, iweb.IOldRequest) +components.registerAdapter(compat.OldNevowResourceAdapter, iweb.IOldNevowResource, iweb.IResource) +components.registerAdapter(Response, int, iweb.IResponse) + +try: + # If twisted.web is installed, add an adapter for it + from twisted.web import resource +except: + pass +else: + components.registerAdapter(compat.OldResourceAdapter, resource.IResource, iweb.IOldNevowResource) + +__all__ = ['HTTPError', 'NotModifiedResponse', 'Request', 'Response', 'checkIfRange', 'checkPreconditions', 'defaultPortForScheme', 'parseVersion', 'splitHostPort'] + diff --git a/xcap/web/http_headers.py b/xcap/web/http_headers.py new file mode 100644 index 0000000..8965658 --- /dev/null +++ b/xcap/web/http_headers.py @@ -0,0 +1,1538 @@ +from __future__ import generators + +import types, time +from calendar import timegm +import base64 +import re + +def dashCapitalize(s): + ''' Capitalize a string, making sure to treat - as a word seperator ''' + return '-'.join([ x.capitalize() for x in s.split('-')]) + +# datetime parsing and formatting +weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] +weekdayname_lower = [name.lower() for name in weekdayname] +monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] +monthname_lower = [name and name.lower() for name in monthname] + +# HTTP Header parsing API + +header_case_mapping = {} + +def casemappingify(d): + global header_case_mapping + newd = dict([(key.lower(),key) for key in d.keys()]) + header_case_mapping.update(newd) + +def lowerify(d): + return dict([(key.lower(),value) for key,value in d.items()]) + + +class HeaderHandler(object): + """HeaderHandler manages header generating and parsing functions. + """ + HTTPParsers = {} + HTTPGenerators = {} + + def __init__(self, parsers=None, generators=None): + """ + @param parsers: A map of header names to parsing functions. + @type parsers: L{dict} + + @param generators: A map of header names to generating functions. + @type generators: L{dict} + """ + + if parsers: + self.HTTPParsers.update(parsers) + if generators: + self.HTTPGenerators.update(generators) + + def parse(self, name, header): + """ + Parse the given header based on its given name. + + @param name: The header name to parse. + @type name: C{str} + + @param header: A list of unparsed headers. + @type header: C{list} of C{str} + + @return: The return value is the parsed header representation, + it is dependent on the header. See the HTTP Headers document. + """ + parser = self.HTTPParsers.get(name, None) + if parser is None: + raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,)) + + try: + for p in parser: + # print "Parsing %s: %s(%s)" % (name, repr(p), repr(h)) + header = p(header) + # if isinstance(h, types.GeneratorType): + # h=list(h) + except ValueError,v: + # print v + header=None + + return header + + def generate(self, name, header): + """ + Generate the given header based on its given name. + + @param name: The header name to generate. + @type name: C{str} + + @param header: A parsed header, such as the output of + L{HeaderHandler}.parse. + + @return: C{list} of C{str} each representing a generated HTTP header. + """ + generator = self.HTTPGenerators.get(name, None) + + if generator is None: + # print self.generators + raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,)) + + for g in generator: + header = g(header) + + #self._raw_headers[name] = h + return header + + def updateParsers(self, parsers): + """Update en masse the parser maps. + + @param parsers: Map of header names to parser chains. + @type parsers: C{dict} + """ + casemappingify(parsers) + self.HTTPParsers.update(lowerify(parsers)) + + def addParser(self, name, value): + """Add an individual parser chain for the given header. + + @param name: Name of the header to add + @type name: C{str} + + @param value: The parser chain + @type value: C{str} + """ + self.updateParsers({name: value}) + + def updateGenerators(self, generators): + """Update en masse the generator maps. + + @param parsers: Map of header names to generator chains. + @type parsers: C{dict} + """ + casemappingify(generators) + self.HTTPGenerators.update(lowerify(generators)) + + def addGenerators(self, name, value): + """Add an individual generator chain for the given header. + + @param name: Name of the header to add + @type name: C{str} + + @param value: The generator chain + @type value: C{str} + """ + self.updateGenerators({name: value}) + + def update(self, parsers, generators): + """Conveniently update parsers and generators all at once. + """ + self.updateParsers(parsers) + self.updateGenerators(generators) + + +DefaultHTTPHandler = HeaderHandler() + + +## HTTP DateTime parser +def parseDateTime(dateString): + """Convert an HTTP date string (one of three formats) to seconds since epoch.""" + parts = dateString.split() + + if not parts[0][0:3].lower() in weekdayname_lower: + # Weekday is stupid. Might have been omitted. + try: + return parseDateTime("Sun, "+dateString) + except ValueError: + # Guess not. + pass + + partlen = len(parts) + if (partlen == 5 or partlen == 6) and parts[1].isdigit(): + # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without "GMT") + # This is the normal format + day = parts[1] + month = parts[2] + year = parts[3] + time = parts[4] + elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: + # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without without "GMT") + # Two digit year, yucko. + day, month, year = parts[1].split('-') + time = parts[2] + year=int(year) + if year < 69: + year = year + 2000 + elif year < 100: + year = year + 1900 + elif len(parts) == 5: + # 3rd date format: Sun Nov 6 08:49:37 1994 + # ANSI C asctime() format. + day = parts[2] + month = parts[1] + year = parts[4] + time = parts[3] + else: + raise ValueError("Unknown datetime format %r" % dateString) + + day = int(day) + month = int(monthname_lower.index(month.lower())) + year = int(year) + hour, min, sec = map(int, time.split(':')) + return int(timegm((year, month, day, hour, min, sec))) + + +##### HTTP tokenizer +class Token(str): + __slots__=[] + tokens = {} + def __new__(self, char): + token = Token.tokens.get(char) + if token is None: + Token.tokens[char] = token = str.__new__(self, char) + return token + + def __repr__(self): + return "Token(%s)" % str.__repr__(self) + + +http_tokens = " \t\"()<>@,;:\\/[]?={}" +http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" + +def tokenize(header, foldCase=True): + """Tokenize a string according to normal HTTP header parsing rules. + + In particular: + - Whitespace is irrelevant and eaten next to special separator tokens. + Its existance (but not amount) is important between character strings. + - Quoted string support including embedded backslashes. + - Case is insignificant (and thus lowercased), except in quoted strings. + (unless foldCase=False) + - Multiple headers are concatenated with ',' + + NOTE: not all headers can be parsed with this function. + + Takes a raw header value (list of strings), and + Returns a generator of strings and Token class instances. + """ + tokens=http_tokens + ctls=http_ctls + + string = ",".join(header) + list = [] + start = 0 + cur = 0 + quoted = False + qpair = False + inSpaces = -1 + qstring = None + + for x in string: + if quoted: + if qpair: + qpair = False + qstring = qstring+string[start:cur-1]+x + start = cur+1 + elif x == '\\': + qpair = True + elif x == '"': + quoted = False + yield qstring+string[start:cur] + qstring=None + start = cur+1 + elif x in tokens: + if start != cur: + if foldCase: + yield string[start:cur].lower() + else: + yield string[start:cur] + + start = cur+1 + if x == '"': + quoted = True + qstring = "" + inSpaces = False + elif x in " \t": + if inSpaces is False: + inSpaces = True + else: + inSpaces = -1 + yield Token(x) + elif x in ctls: + raise ValueError("Invalid control character: %d in header" % ord(x)) + else: + if inSpaces is True: + yield Token(' ') + inSpaces = False + + inSpaces = False + cur = cur+1 + + if qpair: + raise ValueError, "Missing character after '\\'" + if quoted: + raise ValueError, "Missing end quote" + + if start != cur: + if foldCase: + yield string[start:cur].lower() + else: + yield string[start:cur] + +def split(seq, delim): + """The same as str.split but works on arbitrary sequences. + Too bad it's not builtin to python!""" + + cur = [] + for item in seq: + if item == delim: + yield cur + cur = [] + else: + cur.append(item) + yield cur + +# def find(seq, *args): +# """The same as seq.index but returns -1 if not found, instead +# Too bad it's not builtin to python!""" +# try: +# return seq.index(value, *args) +# except ValueError: +# return -1 + + +def filterTokens(seq): + """Filter out instances of Token, leaving only a list of strings. + + Used instead of a more specific parsing method (e.g. splitting on commas) + when only strings are expected, so as to be a little lenient. + + Apache does it this way and has some comments about broken clients which + forget commas (?), so I'm doing it the same way. It shouldn't + hurt anything, in any case. + """ + + l=[] + for x in seq: + if not isinstance(x, Token): + l.append(x) + return l + +##### parser utilities: +def checkSingleToken(tokens): + if len(tokens) != 1: + raise ValueError, "Expected single token, not %s." % (tokens,) + return tokens[0] + +def parseKeyValue(val): + if len(val) == 1: + return val[0],None + elif len(val) == 3 and val[1] == Token('='): + return val[0],val[2] + raise ValueError, "Expected key or key=value, but got %s." % (val,) + +def parseArgs(field): + args=split(field, Token(';')) + val = args.next() + args = [parseKeyValue(arg) for arg in args] + return val,args + +def listParser(fun): + """Return a function which applies 'fun' to every element in the + comma-separated list""" + def listParserHelper(tokens): + fields = split(tokens, Token(',')) + for field in fields: + if len(field) != 0: + yield fun(field) + + return listParserHelper + +def last(seq): + """Return seq[-1]""" + + return seq[-1] + +##### Generation utilities +def quoteString(s): + return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') + +def listGenerator(fun): + """Return a function which applies 'fun' to every element in + the given list, then joins the result with generateList""" + def listGeneratorHelper(l): + return generateList([fun(e) for e in l]) + + return listGeneratorHelper + +def generateList(seq): + return ", ".join(seq) + +def singleHeader(item): + return [item] + +def generateKeyValues(kvs): + l = [] + # print kvs + for k,v in kvs: + if v is None: + l.append('%s' % k) + else: + l.append('%s=%s' % (k,v)) + return ";".join(l) + + +class MimeType(object): + def fromString(klass, mimeTypeString): + """Generate a MimeType object from the given string. + + @param mimeTypeString: The mimetype to parse + + @return: L{MimeType} + """ + return DefaultHTTPHandler.parse('content-type', [mimeTypeString]) + + fromString = classmethod(fromString) + + def __init__(self, mediaType, mediaSubtype, params={}, **kwargs): + """ + @type mediaType: C{str} + + @type mediaSubtype: C{str} + + @type params: C{dict} + """ + self.mediaType = mediaType + self.mediaSubtype = mediaSubtype + self.params = dict(params) + + if kwargs: + self.params.update(kwargs) + + def __eq__(self, other): + if not isinstance(other, MimeType): return NotImplemented + return (self.mediaType == other.mediaType and + self.mediaSubtype == other.mediaSubtype and + self.params == other.params) + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params) + + def __hash__(self): + return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.iteritems())) + +##### Specific header parsers. +def parseAccept(field): + type,args = parseArgs(field) + + if len(type) != 3 or type[1] != Token('/'): + raise ValueError, "MIME Type "+str(type)+" invalid." + + # okay, this spec is screwy. A 'q' parameter is used as the separator + # between MIME parameters and (as yet undefined) additional HTTP + # parameters. + + num = 0 + for arg in args: + if arg[0] == 'q': + mimeparams=tuple(args[0:num]) + params=args[num:] + break + num = num + 1 + else: + mimeparams=tuple(args) + params=[] + + # Default values for parameters: + qval = 1.0 + + # Parse accept parameters: + for param in params: + if param[0] =='q': + qval = float(param[1]) + else: + # Warn? ignored parameter. + pass + + ret = MimeType(type[0],type[2],mimeparams),qval + return ret + +def parseAcceptQvalue(field): + type,args=parseArgs(field) + + type = checkSingleToken(type) + + qvalue = 1.0 # Default qvalue is 1 + for arg in args: + if arg[0] == 'q': + qvalue = float(arg[1]) + return type,qvalue + +def addDefaultCharset(charsets): + if charsets.get('*') is None and charsets.get('iso-8859-1') is None: + charsets['iso-8859-1'] = 1.0 + return charsets + +def addDefaultEncoding(encodings): + if encodings.get('*') is None and encodings.get('identity') is None: + # RFC doesn't specify a default value for identity, only that it + # "is acceptable" if not mentioned. Thus, give it a very low qvalue. + encodings['identity'] = .0001 + return encodings + + +def parseContentType(header): + # Case folding is disabled for this header, because of use of + # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf + # So, we need to explicitly .lower() the type/subtype and arg keys. + + type,args = parseArgs(header) + + if len(type) != 3 or type[1] != Token('/'): + raise ValueError, "MIME Type "+str(type)+" invalid." + + args = [(kv[0].lower(), kv[1]) for kv in args] + + return MimeType(type[0].lower(), type[2].lower(), tuple(args)) + +def parseContentMD5(header): + try: + return base64.decodestring(header) + except Exception,e: + raise ValueError(e) + +def parseContentRange(header): + """Parse a content-range header into (kind, start, end, realLength). + + realLength might be None if real length is not known ('*'). + start and end might be None if start,end unspecified (for response code 416) + """ + kind, other = header.strip().split() + if kind.lower() != "bytes": + raise ValueError("a range of type %r is not supported") + startend, realLength = other.split("/") + if startend.strip() == '*': + start,end=None,None + else: + start, end = map(int, startend.split("-")) + if realLength == "*": + realLength = None + else: + realLength = int(realLength) + return (kind, start, end, realLength) + +def parseExpect(field): + type,args=parseArgs(field) + + type=parseKeyValue(type) + return (type[0], (lambda *args:args)(type[1], *args)) + +def parseExpires(header): + # """HTTP/1.1 clients and caches MUST treat other invalid date formats, + # especially including the value 0, as in the past (i.e., "already expired").""" + + try: + return parseDateTime(header) + except ValueError: + return 0 + +def parseIfModifiedSince(header): + # Ancient versions of netscape and *current* versions of MSIE send + # If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123 + # which is blantantly RFC-violating and not documented anywhere + # except bug-trackers for web frameworks. + + # So, we'll just strip off everything after a ';'. + return parseDateTime(header.split(';', 1)[0]) + +def parseIfRange(headers): + try: + return ETag.parse(tokenize(headers)) + except ValueError: + return parseDateTime(last(headers)) + +def parseRange(range): + range = list(range) + if len(range) < 3 or range[1] != Token('='): + raise ValueError("Invalid range header format: %s" %(range,)) + + type=range[0] + if type != 'bytes': + raise ValueError("Unknown range unit: %s." % (type,)) + rangeset=split(range[2:], Token(',')) + ranges = [] + + for byterangespec in rangeset: + if len(byterangespec) != 1: + raise ValueError("Invalid range header format: %s" % (range,)) + start,end=byterangespec[0].split('-') + + if not start and not end: + raise ValueError("Invalid range header format: %s" % (range,)) + + if start: + start = int(start) + else: + start = None + + if end: + end = int(end) + else: + end = None + + if start and end and start > end: + raise ValueError("Invalid range header, start > end: %s" % (range,)) + ranges.append((start,end)) + return type,ranges + +def parseRetryAfter(header): + try: + # delta seconds + return time.time() + int(header) + except ValueError: + # or datetime + return parseDateTime(header) + +# WWW-Authenticate and Authorization + +def parseWWWAuthenticate(tokenized): + headers = [] + + tokenList = list(tokenized) + + while tokenList: + scheme = tokenList.pop(0) + challenge = {} + last = None + kvChallenge = False + + while tokenList: + token = tokenList.pop(0) + if token == Token('='): + kvChallenge = True + challenge[last] = tokenList.pop(0) + last = None + + elif token == Token(','): + if kvChallenge: + if len(tokenList) > 1 and tokenList[1] != Token('='): + break + + else: + break + + else: + last = token + + if last and scheme and not challenge and not kvChallenge: + challenge = last + last = None + + headers.append((scheme, challenge)) + + if last and last not in (Token('='), Token(',')): + if headers[-1] == (scheme, challenge): + scheme = last + challenge = {} + headers.append((scheme, challenge)) + + return headers + +def parseAuthorization(header): + scheme, rest = header.split(' ', 1) + # this header isn't tokenized because it may eat characters + # in the unquoted base64 encoded credentials + return scheme.lower(), rest + +#### Header generators +def generateAccept(accept): + mimeType,q = accept + + out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) + if mimeType.params: + out+=';'+generateKeyValues(mimeType.params.iteritems()) + + if q != 1.0: + out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.') + + return out + +def removeDefaultEncoding(seq): + for item in seq: + if item[0] != 'identity' or item[1] != .0001: + yield item + +def generateAcceptQvalue(keyvalue): + if keyvalue[1] == 1.0: + return "%s" % keyvalue[0:1] + else: + return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.') + +def parseCacheControl(kv): + k, v = parseKeyValue(kv) + if k == 'max-age' or k == 'min-fresh' or k == 's-maxage': + # Required integer argument + if v is None: + v = 0 + else: + v = int(v) + elif k == 'max-stale': + # Optional integer argument + if v is not None: + v = int(v) + elif k == 'private' or k == 'no-cache': + # Optional list argument + if v is not None: + v = [field.strip().lower() for field in v.split(',')] + return k, v + +def generateCacheControl((k, v)): + if v is None: + return str(k) + else: + if k == 'no-cache' or k == 'private': + # quoted list of values + v = quoteString(generateList( + [header_case_mapping.get(name) or dashCapitalize(name) for name in v])) + return '%s=%s' % (k,v) + +def generateContentRange(tup): + """tup is (type, start, end, len) + len can be None. + """ + type, start, end, len = tup + if len == None: + len = '*' + else: + len = int(len) + if start == None and end == None: + startend = '*' + else: + startend = '%d-%d' % (start, end) + + return '%s %s/%s' % (type, startend, len) + +def generateDateTime(secSinceEpoch): + """Convert seconds since epoch to HTTP datetime string.""" + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(secSinceEpoch) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + weekdayname[wd], + day, monthname[month], year, + hh, mm, ss) + return s + +def generateExpect(item): + if item[1][0] is None: + out = '%s' % (item[0],) + else: + out = '%s=%s' % (item[0], item[1][0]) + if len(item[1]) > 1: + out += ';'+generateKeyValues(item[1][1:]) + return out + +def generateRange(range): + def noneOr(s): + if s is None: + return '' + return s + + type,ranges=range + + if type != 'bytes': + raise ValueError("Unknown range unit: "+type+".") + + return (type+'='+ + ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1])) + for startend in ranges])) + +def generateRetryAfter(when): + # always generate delta seconds format + return str(int(when - time.time())) + +def generateContentType(mimeType): + out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) + if mimeType.params: + out+=';'+generateKeyValues(mimeType.params.iteritems()) + return out + +def generateIfRange(dateOrETag): + if isinstance(dateOrETag, ETag): + return dateOrETag.generate() + else: + return generateDateTime(dateOrETag) + +# WWW-Authenticate and Authorization + +def generateWWWAuthenticate(headers): + _generated = [] + for seq in headers: + scheme, challenge = seq[0], seq[1] + + # If we're going to parse out to something other than a dict + # we need to be able to generate from something other than a dict + + try: + l = [] + for k,v in dict(challenge).iteritems(): + l.append("%s=%s" % (k, quoteString(v))) + + _generated.append("%s %s" % (scheme, ", ".join(l))) + except ValueError: + _generated.append("%s %s" % (scheme, challenge)) + + return _generated + +def generateAuthorization(seq): + return [' '.join(seq)] + + +#### +class ETag(object): + def __init__(self, tag, weak=False): + self.tag = str(tag) + self.weak = weak + + def match(self, other, strongCompare): + # Sec 13.3. + # The strong comparison function: in order to be considered equal, both + # validators MUST be identical in every way, and both MUST NOT be weak. + # + # The weak comparison function: in order to be considered equal, both + # validators MUST be identical in every way, but either or both of + # them MAY be tagged as "weak" without affecting the result. + + if not isinstance(other, ETag) or other.tag != self.tag: + return False + + if strongCompare and (other.weak or self.weak): + return False + return True + + def __eq__(self, other): + return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "Etag(%r, weak=%r)" % (self.tag, self.weak) + + def parse(tokens): + tokens=tuple(tokens) + if len(tokens) == 1 and not isinstance(tokens[0], Token): + return ETag(tokens[0]) + + if(len(tokens) == 3 and tokens[0] == "w" + and tokens[1] == Token('/')): + return ETag(tokens[2], weak=True) + + raise ValueError("Invalid ETag.") + + parse=staticmethod(parse) + + def generate(self): + if self.weak: + return 'W/'+quoteString(self.tag) + else: + return quoteString(self.tag) + +def parseStarOrETag(tokens): + tokens=tuple(tokens) + if tokens == ('*',): + return '*' + else: + return ETag.parse(tokens) + +def generateStarOrETag(etag): + if etag=='*': + return etag + else: + return etag.generate() + +#### Cookies. Blech! +class Cookie(object): + # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', 'discard', 'secure', 'comment', 'commenturl', 'version'] + + def __init__(self, name, value, path=None, domain=None, ports=None, expires=None, discard=False, secure=False, comment=None, commenturl=None, version=0): + self.name=name + self.value=value + self.path=path + self.domain=domain + self.ports=ports + self.expires=expires + self.discard=discard + self.secure=secure + self.comment=comment + self.commenturl=commenturl + self.version=version + + def __repr__(self): + s="Cookie(%r=%r" % (self.name, self.value) + if self.path is not None: s+=", path=%r" % (self.path,) + if self.domain is not None: s+=", domain=%r" % (self.domain,) + if self.ports is not None: s+=", ports=%r" % (self.ports,) + if self.expires is not None: s+=", expires=%r" % (self.expires,) + if self.secure is not False: s+=", secure=%r" % (self.secure,) + if self.comment is not None: s+=", comment=%r" % (self.comment,) + if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,) + if self.version != 0: s+=", version=%r" % (self.version,) + s+=")" + return s + + def __eq__(self, other): + return (isinstance(other, Cookie) and + other.path == self.path and + other.domain == self.domain and + other.ports == self.ports and + other.expires == self.expires and + other.secure == self.secure and + other.comment == self.comment and + other.commenturl == self.commenturl and + other.version == self.version) + + def __ne__(self, other): + return not self.__eq__(other) + + +def parseCookie(headers): + """Bleargh, the cookie spec sucks. + This surely needs interoperability testing. + There are two specs that are supported: + Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html + Version 1) http://www.faqs.org/rfcs/rfc2965.html + """ + + cookies = [] + # There can't really be multiple cookie headers according to RFC, because + # if multiple headers are allowed, they must be joinable with ",". + # Neither new RFC2965 cookies nor old netscape cookies are. + + header = ';'.join(headers) + if header[0:8].lower() == "$version": + # RFC2965 cookie + h=tokenize([header], foldCase=False) + r_cookies = split(h, Token(',')) + for r_cookie in r_cookies: + last_cookie = None + rr_cookies = split(r_cookie, Token(';')) + for cookie in rr_cookies: + nameval = tuple(split(cookie, Token('='))) + if len(nameval) == 2: + (name,), (value,) = nameval + else: + (name,), = nameval + value = None + + name=name.lower() + if name == '$version': + continue + if name[0] == '$': + if last_cookie is not None: + if name == '$path': + last_cookie.path=value + elif name == '$domain': + last_cookie.domain=value + elif name == '$port': + if value is None: + last_cookie.ports = () + else: + last_cookie.ports=tuple([int(s) for s in value.split(',')]) + else: + last_cookie = Cookie(name, value, version=1) + cookies.append(last_cookie) + else: + # Oldstyle cookies don't do quoted strings or anything sensible. + # All characters are valid for names except ';' and '=', and all + # characters are valid for values except ';'. Spaces are stripped, + # however. + r_cookies = header.split(';') + for r_cookie in r_cookies: + name,value = r_cookie.split('=', 1) + name=name.strip(' \t') + value=value.strip(' \t') + + cookies.append(Cookie(name, value)) + + return cookies + +cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$" +cookie_validname_re = re.compile(cookie_validname) +cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$' +cookie_validvalue_re = re.compile(cookie_validvalue) + +def generateCookie(cookies): + # There's a fundamental problem with the two cookie specifications. + # They both use the "Cookie" header, and the RFC Cookie header only allows + # one version to be specified. Thus, when you have a collection of V0 and + # V1 cookies, you have to either send them all as V0 or send them all as + # V1. + + # I choose to send them all as V1. + + # You might think converting a V0 cookie to a V1 cookie would be lossless, + # but you'd be wrong. If you do the conversion, and a V0 parser tries to + # read the cookie, it will see a modified form of the cookie, in cases + # where quotes must be added to conform to proper V1 syntax. + # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y") + + # However, that is what we will do, anyways. It has a high probability of + # breaking applications that only handle oldstyle cookies, where some other + # application set a newstyle cookie that is applicable over for site + # (or host), AND where the oldstyle cookie uses a value which is invalid + # syntax in a newstyle cookie. + + # Also, the cookie name *cannot* be quoted in V1, so some cookies just + # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These + # are just dicarded during conversion. + + # As this is an unsolvable problem, I will pretend I can just say + # OH WELL, don't do that, or else upgrade your old applications to have + # newstyle cookie parsers. + + # I will note offhandedly that there are *many* sites which send V0 cookies + # that are not valid V1 cookie syntax. About 20% for my cookies file. + # However, they do not generally mix them with V1 cookies, so this isn't + # an issue, at least right now. I have not tested to see how many of those + # webapps support RFC2965 V1 cookies. I suspect not many. + + max_version = max([cookie.version for cookie in cookies]) + + if max_version == 0: + # no quoting or anything. + return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies]) + else: + str_cookies = ['$Version="1"'] + for cookie in cookies: + if cookie.version == 0: + # Version 0 cookie: we make sure the name and value are valid + # V1 syntax. + + # If they are, we use them as is. This means in *most* cases, + # the cookie will look literally the same on output as it did + # on input. + # If it isn't a valid name, ignore the cookie. + # If it isn't a valid value, quote it and hope for the best on + # the other side. + + if cookie_validname_re.match(cookie.name) is None: + continue + + value=cookie.value + if cookie_validvalue_re.match(cookie.value) is None: + value = quoteString(value) + + str_cookies.append("%s=%s" % (cookie.name, value)) + else: + # V1 cookie, nice and easy + str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value))) + + if cookie.path: + str_cookies.append("$Path=%s" % quoteString(cookie.path)) + if cookie.domain: + str_cookies.append("$Domain=%s" % quoteString(cookie.domain)) + if cookie.ports is not None: + if len(cookie.ports) == 0: + str_cookies.append("$Port") + else: + str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) + return ';'.join(str_cookies) + +def parseSetCookie(headers): + setCookies = [] + for header in headers: + try: + parts = header.split(';') + l = [] + + for part in parts: + namevalue = part.split('=',1) + if len(namevalue) == 1: + name=namevalue[0] + value=None + else: + name,value=namevalue + value=value.strip(' \t') + + name=name.strip(' \t') + + l.append((name, value)) + + setCookies.append(makeCookieFromList(l, True)) + except ValueError: + # If we can't parse one Set-Cookie, ignore it, + # but not the rest of Set-Cookies. + pass + return setCookies + +def parseSetCookie2(toks): + outCookies = [] + for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))] + for y in split(toks, Token(','))]: + try: + outCookies.append(makeCookieFromList(cookie, False)) + except ValueError: + # Again, if we can't handle one cookie -- ignore it. + pass + return outCookies + +def makeCookieFromList(tup, netscapeFormat): + name, value = tup[0] + if name is None or value is None: + raise ValueError("Cookie has missing name or value") + if name.startswith("$"): + raise ValueError("Invalid cookie name: %r, starts with '$'." % name) + cookie = Cookie(name, value) + hadMaxAge = False + + for name,value in tup[1:]: + name = name.lower() + + if value is None: + if name in ("discard", "secure"): + # Boolean attrs + value = True + elif name != "port": + # Can be either boolean or explicit + continue + + if name in ("comment", "commenturl", "discard", "domain", "path", "secure"): + # simple cases + setattr(cookie, name, value) + elif name == "expires" and not hadMaxAge: + if netscapeFormat and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + cookie.expires = parseDateTime(value) + elif name == "max-age": + hadMaxAge = True + cookie.expires = int(value) + time.time() + elif name == "port": + if value is None: + cookie.ports = () + else: + if netscapeFormat and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + cookie.ports = tuple([int(s) for s in value.split(',')]) + elif name == "version": + cookie.version = int(value) + + return cookie + + +def generateSetCookie(cookies): + setCookies = [] + for cookie in cookies: + out = ["%s=%s" % (cookie.name, cookie.value)] + if cookie.expires: + out.append("expires=%s" % generateDateTime(cookie.expires)) + if cookie.path: + out.append("path=%s" % cookie.path) + if cookie.domain: + out.append("domain=%s" % cookie.domain) + if cookie.secure: + out.append("secure") + + setCookies.append('; '.join(out)) + return setCookies + +def generateSetCookie2(cookies): + setCookies = [] + for cookie in cookies: + out = ["%s=%s" % (cookie.name, quoteString(cookie.value))] + if cookie.comment: + out.append("Comment=%s" % quoteString(cookie.comment)) + if cookie.commenturl: + out.append("CommentURL=%s" % quoteString(cookie.commenturl)) + if cookie.discard: + out.append("Discard") + if cookie.domain: + out.append("Domain=%s" % quoteString(cookie.domain)) + if cookie.expires: + out.append("Max-Age=%s" % (cookie.expires - time.time())) + if cookie.path: + out.append("Path=%s" % quoteString(cookie.path)) + if cookie.ports is not None: + if len(cookie.ports) == 0: + out.append("Port") + else: + out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) + if cookie.secure: + out.append("Secure") + out.append('Version="1"') + setCookies.append('; '.join(out)) + return setCookies + +def parseDepth(depth): + if depth not in ("0", "1", "infinity"): + raise ValueError("Invalid depth header value: %s" % (depth,)) + return depth + +def parseOverWrite(overwrite): + if overwrite == "F": + return False + elif overwrite == "T": + return True + raise ValueError("Invalid overwrite header value: %s" % (overwrite,)) + +def generateOverWrite(overwrite): + if overwrite: + return "T" + else: + return "F" + +##### Random stuff that looks useful. +# def sortMimeQuality(s): +# def sorter(item1, item2): +# if item1[0] == '*': +# if item2[0] == '*': +# return 0 + + +# def sortQuality(s): +# def sorter(item1, item2): +# if item1[1] < item2[1]: +# return -1 +# if item1[1] < item2[1]: +# return 1 +# if item1[0] == item2[0]: +# return 0 + + +# def getMimeQuality(mimeType, accepts): +# type,args = parseArgs(mimeType) +# type=type.split(Token('/')) +# if len(type) != 2: +# raise ValueError, "MIME Type "+s+" invalid." + +# for accept in accepts: +# accept,acceptQual=accept +# acceptType=accept[0:1] +# acceptArgs=accept[2] + +# if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and +# (args == acceptArgs or len(acceptArgs) == 0)): +# return acceptQual + +# def getQuality(type, accepts): +# qual = accepts.get(type) +# if qual is not None: +# return qual + +# return accepts.get('*') + +# Headers object +class __RecalcNeeded(object): + def __repr__(self): + return "" + +_RecalcNeeded = __RecalcNeeded() + +class Headers(object): + """This class stores the HTTP headers as both a parsed representation and + the raw string representation. It converts between the two on demand.""" + + def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler): + self._raw_headers = {} + self._headers = {} + self.handler = handler + if headers is not None: + for key, value in headers.iteritems(): + self.setHeader(key, value) + if rawHeaders is not None: + for key, value in rawHeaders.iteritems(): + self.setRawHeaders(key, value) + + def _setRawHeaders(self, headers): + self._raw_headers = headers + self._headers = {} + + def _toParsed(self, name): + r = self._raw_headers.get(name, None) + h = self.handler.parse(name, r) + if h is not None: + self._headers[name] = h + return h + + def _toRaw(self, name): + h = self._headers.get(name, None) + r = self.handler.generate(name, h) + if r is not None: + self._raw_headers[name] = r + return r + + def hasHeader(self, name): + """Does a header with the given name exist?""" + name=name.lower() + return self._raw_headers.has_key(name) + + def getRawHeaders(self, name, default=None): + """Returns a list of headers matching the given name as the raw string given.""" + + name=name.lower() + raw_header = self._raw_headers.get(name, default) + if raw_header is not _RecalcNeeded: + return raw_header + + return self._toRaw(name) + + def getHeader(self, name, default=None): + """Ret9urns the parsed representation of the given header. + The exact form of the return value depends on the header in question. + + If no parser for the header exists, raise ValueError. + + If the header doesn't exist, return default (or None if not specified) + """ + name=name.lower() + parsed = self._headers.get(name, default) + if parsed is not _RecalcNeeded: + return parsed + return self._toParsed(name) + + def setRawHeaders(self, name, value): + """Sets the raw representation of the given header. + Value should be a list of strings, each being one header of the + given name. + """ + name=name.lower() + self._raw_headers[name] = value + self._headers[name] = _RecalcNeeded + + def setHeader(self, name, value): + """Sets the parsed representation of the given header. + Value should be a list of objects whose exact form depends + on the header in question. + """ + name=name.lower() + self._raw_headers[name] = _RecalcNeeded + self._headers[name] = value + + def addRawHeader(self, name, value): + """ + Add a raw value to a header that may or may not already exist. + If it exists, add it as a separate header to output; do not + replace anything. + """ + name=name.lower() + raw_header = self._raw_headers.get(name) + if raw_header is None: + # No header yet + raw_header = [] + self._raw_headers[name] = raw_header + elif raw_header is _RecalcNeeded: + raw_header = self._toRaw(name) + + raw_header.append(value) + self._headers[name] = _RecalcNeeded + + def removeHeader(self, name): + """Removes the header named.""" + + name=name.lower() + if self._raw_headers.has_key(name): + del self._raw_headers[name] + del self._headers[name] + + def __repr__(self): + return ''% (self._raw_headers, self._headers) + + def canonicalNameCaps(self, name): + """Return the name with the canonical capitalization, if known, + otherwise, Caps-After-Dashes""" + return header_case_mapping.get(name) or dashCapitalize(name) + + def getAllRawHeaders(self): + """Return an iterator of key,value pairs of all headers + contained in this object, as strings. The keys are capitalized + in canonical capitalization.""" + for k,v in self._raw_headers.iteritems(): + if v is _RecalcNeeded: + v = self._toRaw(k) + yield self.canonicalNameCaps(k), v + + def makeImmutable(self): + """Make this header set immutable. All mutating operations will + raise an exception.""" + self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise + + def _mutateRaise(self, *args): + raise AttributeError("This header object is immutable as the headers have already been sent.") + + +"""The following dicts are all mappings of header to list of operations + to perform. The first operation should generally be 'tokenize' if the + header can be parsed according to the normal tokenization rules. If + it cannot, generally the first thing you want to do is take only the + last instance of the header (in case it was sent multiple times, which + is strictly an error, but we're nice.). + """ + +iteritems = lambda x: x.iteritems() + + +parser_general_headers = { + 'Cache-Control':(tokenize, listParser(parseCacheControl), dict), + 'Connection':(tokenize,filterTokens), + 'Date':(last,parseDateTime), +# 'Pragma':tokenize +# 'Trailer':tokenize + 'Transfer-Encoding':(tokenize,filterTokens), +# 'Upgrade':tokenize +# 'Via':tokenize,stripComment +# 'Warning':tokenize +} + +generator_general_headers = { + 'Cache-Control':(iteritems, listGenerator(generateCacheControl), singleHeader), + 'Connection':(generateList,singleHeader), + 'Date':(generateDateTime,singleHeader), +# 'Pragma': +# 'Trailer': + 'Transfer-Encoding':(generateList,singleHeader), +# 'Upgrade': +# 'Via': +# 'Warning': +} + +parser_request_headers = { + 'Accept': (tokenize, listParser(parseAccept), dict), + 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), + 'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), + 'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict), + 'Authorization': (last, parseAuthorization), + 'Cookie':(parseCookie,), + 'Expect':(tokenize, listParser(parseExpect), dict), + 'From':(last,), + 'Host':(last,), + 'If-Match':(tokenize, listParser(parseStarOrETag), list), + 'If-Modified-Since':(last, parseIfModifiedSince), + 'If-None-Match':(tokenize, listParser(parseStarOrETag), list), + 'If-Range':(parseIfRange,), + 'If-Unmodified-Since':(last,parseDateTime), + 'Max-Forwards':(last,int), +# 'Proxy-Authorization':str, # what is "credentials" + 'Range':(tokenize, parseRange), + 'Referer':(last,str), # TODO: URI object? + 'TE':(tokenize, listParser(parseAcceptQvalue), dict), + 'User-Agent':(last,str), +} + +generator_request_headers = { + 'Accept': (iteritems,listGenerator(generateAccept),singleHeader), + 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), + 'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader), + 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), + 'Authorization': (generateAuthorization,), # what is "credentials" + 'Cookie':(generateCookie,singleHeader), + 'Expect':(iteritems, listGenerator(generateExpect), singleHeader), + 'From':(str,singleHeader), + 'Host':(str,singleHeader), + 'If-Match':(listGenerator(generateStarOrETag), singleHeader), + 'If-Modified-Since':(generateDateTime,singleHeader), + 'If-None-Match':(listGenerator(generateStarOrETag), singleHeader), + 'If-Range':(generateIfRange, singleHeader), + 'If-Unmodified-Since':(generateDateTime,singleHeader), + 'Max-Forwards':(str, singleHeader), +# 'Proxy-Authorization':str, # what is "credentials" + 'Range':(generateRange,singleHeader), + 'Referer':(str,singleHeader), + 'TE': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), + 'User-Agent':(str,singleHeader), +} + +parser_response_headers = { + 'Accept-Ranges':(tokenize, filterTokens), + 'Age':(last,int), + 'ETag':(tokenize, ETag.parse), + 'Location':(last,), # TODO: URI object? +# 'Proxy-Authenticate' + 'Retry-After':(last, parseRetryAfter), + 'Server':(last,), + 'Set-Cookie':(parseSetCookie,), + 'Set-Cookie2':(tokenize, parseSetCookie2), + 'Vary':(tokenize, filterTokens), + 'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False), + parseWWWAuthenticate,) +} + +generator_response_headers = { + 'Accept-Ranges':(generateList, singleHeader), + 'Age':(str, singleHeader), + 'ETag':(ETag.generate, singleHeader), + 'Location':(str, singleHeader), +# 'Proxy-Authenticate' + 'Retry-After':(generateRetryAfter, singleHeader), + 'Server':(str, singleHeader), + 'Set-Cookie':(generateSetCookie,), + 'Set-Cookie2':(generateSetCookie2,), + 'Vary':(generateList, singleHeader), + 'WWW-Authenticate':(generateWWWAuthenticate,) +} + +parser_entity_headers = { + 'Allow':(lambda str:tokenize(str, foldCase=False), filterTokens), + 'Content-Encoding':(tokenize, filterTokens), + 'Content-Language':(tokenize, filterTokens), + 'Content-Length':(last, int), + 'Content-Location':(last,), # TODO: URI object? + 'Content-MD5':(last, parseContentMD5), + 'Content-Range':(last, parseContentRange), + 'Content-Type':(lambda str:tokenize(str, foldCase=False), parseContentType), + 'Expires':(last, parseExpires), + 'Last-Modified':(last, parseDateTime), + } + +generator_entity_headers = { + 'Allow':(generateList, singleHeader), + 'Content-Encoding':(generateList, singleHeader), + 'Content-Language':(generateList, singleHeader), + 'Content-Length':(str, singleHeader), + 'Content-Location':(str, singleHeader), + 'Content-MD5':(base64.encodestring, lambda x: x.strip("\n"), singleHeader), + 'Content-Range':(generateContentRange, singleHeader), + 'Content-Type':(generateContentType, singleHeader), + 'Expires':(generateDateTime, singleHeader), + 'Last-Modified':(generateDateTime, singleHeader), + } + +DefaultHTTPHandler.updateParsers(parser_general_headers) +DefaultHTTPHandler.updateParsers(parser_request_headers) +DefaultHTTPHandler.updateParsers(parser_response_headers) +DefaultHTTPHandler.updateParsers(parser_entity_headers) + +DefaultHTTPHandler.updateGenerators(generator_general_headers) +DefaultHTTPHandler.updateGenerators(generator_request_headers) +DefaultHTTPHandler.updateGenerators(generator_response_headers) +DefaultHTTPHandler.updateGenerators(generator_entity_headers) + + +# casemappingify(DefaultHTTPParsers) +# casemappingify(DefaultHTTPGenerators) + +# lowerify(DefaultHTTPParsers) +# lowerify(DefaultHTTPGenerators) diff --git a/xcap/web/iweb.py b/xcap/web/iweb.py new file mode 100644 index 0000000..b949db8 --- /dev/null +++ b/xcap/web/iweb.py @@ -0,0 +1,378 @@ + +""" + I contain the interfaces for several web related objects including IRequest + and IResource. I am based heavily on ideas from nevow.inevow +""" + +from zope.interface import Attribute, Interface, interface + +# server.py interfaces +class IResource(Interface): + """ + An HTTP resource. + + I serve 2 main purposes: one is to provide a standard representation for + what HTTP specification calls an 'entity', and the other is to provide an + mechanism for mapping URLs to content. + """ + + def locateChild(req, segments): + """Locate another object which can be adapted to IResource. + + @return: A 2-tuple of (resource, remaining-path-segments), + or a deferred which will fire the above. + + Causes the object publishing machinery to continue on + with specified resource and segments, calling the + appropriate method on the specified resource. + + If you return (self, L{server.StopTraversal}), this + instructs web to immediately stop the lookup stage, + and switch to the rendering stage, leaving the + remaining path alone for your render function to + handle. + """ + + def renderHTTP(req): + """Return an IResponse or a deferred which will fire an + IResponse. This response will be written to the web browser + which initiated the request. + """ + +# Is there a better way to do this than this funky extra class? +_default = object() +class SpecialAdaptInterfaceClass(interface.InterfaceClass): + # A special adapter for IResource to handle the extra step of adapting + # from IOldNevowResource-providing resources. + def __call__(self, other, alternate=_default): + result = super(SpecialAdaptInterfaceClass, self).__call__(other, alternate) + if result is not alternate: + return result + + result = IOldNevowResource(other, alternate) + if result is not alternate: + result = IResource(result) + return result + if alternate is not _default: + return alternate + raise TypeError('Could not adapt', other, self) +IResource.__class__ = SpecialAdaptInterfaceClass + +class IOldNevowResource(Interface): + # Shared interface with inevow.IResource + """ + I am a web resource. + """ + + def locateChild(ctx, segments): + """Locate another object which can be adapted to IResource + Return a tuple of resource, path segments + """ + + def renderHTTP(ctx): + """Return a string or a deferred which will fire a string. This string + will be written to the web browser which initiated this request. + + Unlike iweb.IResource, this expects the incoming data to have already been read + and parsed into request.args and request.content, and expects to return a + string instead of a response object. + """ + +class ICanHandleException(Interface): + # Shared interface with inevow.ICanHandleException + def renderHTTP_exception(request, failure): + """Render an exception to the given request object. + """ + + def renderInlineException(request, reason): + """Return stan representing the exception, to be printed in the page, + not replacing the page.""" + + +# http.py interfaces +class IResponse(Interface): + """I'm a response.""" + code = Attribute("The HTTP response code") + headers = Attribute("A http_headers.Headers instance of headers to send") + stream = Attribute("A stream.IByteStream of outgoing data, or else None.") + +class IRequest(Interface): + """I'm a request for a web resource + """ + + method = Attribute("The HTTP method from the request line, e.g. GET") + uri = Attribute("The raw URI from the request line. May or may not include host.") + clientproto = Attribute("Protocol from the request line, e.g. HTTP/1.1") + + headers = Attribute("A http_headers.Headers instance of incoming headers.") + stream = Attribute("A stream.IByteStream of incoming data.") + + def writeResponse(response): + """Write an IResponse object to the client""" + + chanRequest = Attribute("The ChannelRequest. I wonder if this is public really?") + +class IOldRequest(Interface): + # Shared interface with inevow.ICurrentSegments + """An old HTTP request. + + Subclasses should override the process() method to determine how + the request will be processed. + + @ivar method: The HTTP method that was used. + @ivar uri: The full URI that was requested (includes arguments). + @ivar path: The path only (arguments not included). + @ivar args: All of the arguments, including URL and POST arguments. + @type args: A mapping of strings (the argument names) to lists of values. + i.e., ?foo=bar&foo=baz&quux=spam results in + {'foo': ['bar', 'baz'], 'quux': ['spam']}. + @ivar received_headers: All received headers + """ + # Methods for received request + def getHeader(key): + """Get a header that was sent from the network. + """ + + def getCookie(key): + """Get a cookie that was sent from the network. + """ + + + def getAllHeaders(): + """Return dictionary of all headers the request received.""" + + def getRequestHostname(): + """Get the hostname that the user passed in to the request. + + This will either use the Host: header (if it is available) or the + host we are listening on if the header is unavailable. + """ + + def getHost(): + """Get my originally requesting transport's host. + + Don't rely on the 'transport' attribute, since Request objects may be + copied remotely. For information on this method's return value, see + twisted.internet.tcp.Port. + """ + + def getClientIP(): + pass + def getClient(): + pass + def getUser(): + pass + def getPassword(): + pass + def isSecure(): + pass + + def getSession(sessionInterface = None): + pass + + def URLPath(): + pass + + def prePathURL(): + pass + + def rememberRootURL(): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + + def getRootURL(): + """ + Get a previously-remembered URL. + """ + + # Methods for outgoing request + def finish(): + """We are finished writing data.""" + + def write(data): + """ + Write some data as a result of an HTTP request. The first + time this is called, it writes out response data. + """ + + def addCookie(k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None): + """Set an outgoing HTTP cookie. + + In general, you should consider using sessions instead of cookies, see + twisted.web.server.Request.getSession and the + twisted.web.server.Session class for details. + """ + + def setResponseCode(code, message=None): + """Set the HTTP response code. + """ + + def setHeader(k, v): + """Set an outgoing HTTP header. + """ + + def redirect(url): + """Utility function that does a redirect. + + The request should have finish() called after this. + """ + + def setLastModified(when): + """Set the X{Last-Modified} time for the response to this request. + + If I am called more than once, I ignore attempts to set + Last-Modified earlier, only replacing the Last-Modified time + if it is to a later value. + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} if appropriate for the time given. + + @param when: The last time the resource being returned was + modified, in seconds since the epoch. + @type when: number + @return: If I am a X{If-Modified-Since} conditional request and + the time given is not newer than the condition, I return + L{http.CACHED} to indicate that you should write no + body. Otherwise, I return a false value. + """ + + def setETag(etag): + """Set an X{entity tag} for the outgoing response. + + That's \"entity tag\" as in the HTTP/1.1 X{ETag} header, \"used + for comparing two or more entities from the same requested + resource.\" + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate + for the tag given. + + @param etag: The entity tag for the resource being returned. + @type etag: string + @return: If I am a X{If-None-Match} conditional request and + the tag matches one in the request, I return + L{http.CACHED} to indicate that you should write + no body. Otherwise, I return a false value. + """ + + def setHost(host, port, ssl=0): + """Change the host and port the request thinks it's using. + + This method is useful for working with reverse HTTP proxies (e.g. + both Squid and Apache's mod_proxy can do this), when the address + the HTTP client is using is different than the one we're listening on. + + For example, Apache may be listening on https://www.example.com, and then + forwarding requests to http://localhost:8080, but we don't want HTML produced + by Twisted to say 'http://localhost:8080', they should say 'https://www.example.com', + so we do:: + + request.setHost('www.example.com', 443, ssl=1) + + This method is experimental. + """ + +class IChanRequestCallbacks(Interface): + """The bits that are required of a Request for interfacing with a + IChanRequest object""" + + def __init__(chanRequest, command, path, version, contentLength, inHeaders): + """Create a new Request object. + @param chanRequest: the IChanRequest object creating this request + @param command: the HTTP command e.g. GET + @param path: the HTTP path e.g. /foo/bar.html + @param version: the parsed HTTP version e.g. (1,1) + @param contentLength: how much data to expect, or None if unknown + @param inHeaders: the request headers""" + + def process(): + """Process the request. Called as soon as it's possibly reasonable to + return a response. handleContentComplete may or may not have been called already.""" + + def handleContentChunk(data): + """Called when a piece of incoming data has been received.""" + + def handleContentComplete(): + """Called when the incoming data stream is finished.""" + + def connectionLost(reason): + """Called if the connection was lost.""" + + +class IChanRequest(Interface): + def writeIntermediateResponse(code, headers=None): + """Write a non-terminating response. + + Intermediate responses cannot contain data. + If the channel does not support intermediate responses, do nothing. + + @ivar code: The response code. Should be in the 1xx range. + @type code: int + @ivar headers: the headers to send in the response + @type headers: C{twisted.web.http_headers.Headers} + """ + pass + + def writeHeaders(code, headers): + """Write a final response. + + @param code: The response code. Should not be in the 1xx range. + @type code: int + @param headers: the headers to send in the response. They will be augmented + with any connection-oriented headers as necessary for the protocol. + @type headers: C{twisted.web.http_headers.Headers} + """ + pass + + def write(data): + """Write some data. + + @param data: the data bytes + @type data: str + """ + pass + + def finish(): + """Finish the request, and clean up the connection if necessary. + """ + pass + + def abortConnection(): + """Forcibly abort the connection without cleanly closing. + Use if, for example, you can't write all the data you promised. + """ + pass + + def registerProducer(producer, streaming): + """Register a producer with the standard API.""" + pass + + def unregisterProducer(): + """Unregister a producer.""" + pass + + def getHostInfo(): + """Returns a tuple of (address, socket user connected to, + boolean, was it secure). Note that this should not necsessarily + always return the actual local socket information from + twisted. E.g. in a CGI, it should use the variables coming + from the invoking script. + """ + + def getRemoteHost(): + """Returns an address of the remote host. + + Like getHostInfo, this information may come from the real + socket, or may come from additional information, depending on + the transport. + """ + + persistent = Attribute("""Whether this request supports HTTP connection persistence. May be set to False. Should not be set to other values.""") + + +class ISite(Interface): + pass + +__all__ = ['ICanHandleException', 'IChanRequest', 'IChanRequestCallbacks', 'IOldNevowResource', 'IOldRequest', 'IRequest', 'IResource', 'IResponse', 'ISite'] diff --git a/xcap/web/resource.py b/xcap/web/resource.py new file mode 100644 index 0000000..961d808 --- /dev/null +++ b/xcap/web/resource.py @@ -0,0 +1,294 @@ +# Copyright (c) 2001-2007 Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I hold the lowest-level L{Resource} class and related mix-in classes. +""" + +# System Imports +from zope.interface import implements + +from xcap.web import iweb, http, server, responsecode + +class RenderMixin(object): + """ + Mix-in class for L{iweb.IResource} which provides a dispatch mechanism for + handling HTTP methods. + """ + def allowedMethods(self): + """ + @return: A tuple of HTTP methods that are allowed to be invoked on this resource. + """ + if not hasattr(self, "_allowed_methods"): + self._allowed_methods = tuple([name[5:] for name in dir(self) if name.startswith('http_')]) + return self._allowed_methods + + def checkPreconditions(self, request): + """ + Checks all preconditions imposed by this resource upon a request made + against it. + @param request: the request to process. + @raise http.HTTPError: if any precondition fails. + @return: C{None} or a deferred whose callback value is C{request}. + """ + # + # http.checkPreconditions() gets called by the server after every + # GET or HEAD request. + # + # For other methods, we need to know to bail out before request + # processing, especially for methods that modify server state (eg. PUT). + # We also would like to do so even for methods that don't, if those + # methods might be expensive to process. We're assuming that GET and + # HEAD are not expensive. + # + if request.method not in ("GET", "HEAD"): + http.checkPreconditions(request) + + # Check per-method preconditions + method = getattr(self, "preconditions_" + request.method, None) + if method: + return method(request) + + def renderHTTP(self, request): + """ + See L{iweb.IResource.renderHTTP}. + + This implementation will dispatch the given C{request} to another method + of C{self} named C{http_}METHOD, where METHOD is the HTTP method used by + C{request} (eg. C{http_GET}, C{http_POST}, etc.). + + Generally, a subclass should implement those methods instead of + overriding this one. + + C{http_*} methods are expected provide the same interface and return the + same results as L{iweb.IResource}C{.renderHTTP} (and therefore this method). + + C{etag} and C{last-modified} are added to the response returned by the + C{http_*} header, if known. + + If an appropriate C{http_*} method is not found, a + L{responsecode.NOT_ALLOWED}-status response is returned, with an + appropriate C{allow} header. + + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + method = getattr(self, "http_" + request.method, None) + if not method: + response = http.Response(responsecode.NOT_ALLOWED) + response.headers.setHeader("allow", self.allowedMethods()) + return response + + d = self.checkPreconditions(request) + if d is None: + return method(request) + else: + return d.addCallback(lambda _: method(request)) + + def http_OPTIONS(self, request): + """ + Respond to a OPTIONS request. + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + response = http.Response(responsecode.OK) + response.headers.setHeader("allow", self.allowedMethods()) + return response + + def http_TRACE(self, request): + """ + Respond to a TRACE request. + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + return server.doTrace(request) + + def http_HEAD(self, request): + """ + Respond to a HEAD request. + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + return self.http_GET(request) + + def http_GET(self, request): + """ + Respond to a GET request. + + This implementation validates that the request body is empty and then + dispatches the given C{request} to L{render} and returns its result. + + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + if request.stream.length != 0: + return responsecode.REQUEST_ENTITY_TOO_LARGE + + return self.render(request) + + def render(self, request): + """ + Subclasses should implement this method to do page rendering. + See L{http_GET}. + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + raise NotImplementedError("Subclass must implement render method.") + +class Resource(RenderMixin): + """ + An L{iweb.IResource} implementation with some convenient mechanisms for + locating children. + """ + implements(iweb.IResource) + + addSlash = False + + def locateChild(self, request, segments): + """ + Locates a child resource of this resource. + @param request: the request to process. + @param segments: a sequence of URL path segments. + @return: a tuple of C{(child, segments)} containing the child + of this resource which matches one or more of the given C{segments} in + sequence, and a list of remaining segments. + """ + w = getattr(self, 'child_%s' % (segments[0], ), None) + + if w: + r = iweb.IResource(w, None) + if r: + return r, segments[1:] + return w(request), segments[1:] + + factory = getattr(self, 'childFactory', None) + if factory is not None: + r = factory(request, segments[0]) + if r: + return r, segments[1:] + + return None, [] + + def child_(self, request): + """ + This method locates a child with a trailing C{"/"} in the URL. + @param request: the request to process. + """ + if self.addSlash and len(request.postpath) == 1: + return self + return None + + def putChild(self, path, child): + """ + Register a static child. + + This implementation registers children by assigning them to attributes + with a C{child_} prefix. C{resource.putChild("foo", child)} is + therefore same as C{o.child_foo = child}. + + @param path: the name of the child to register. You almost certainly + don't want C{"/"} in C{path}. If you want to add a "directory" + resource (e.g. C{/foo/}) specify C{path} as C{""}. + @param child: an object adaptable to L{iweb.IResource}. + """ + setattr(self, 'child_%s' % (path, ), child) + + def http_GET(self, request): + if self.addSlash and request.prepath[-1] != '': + # If this is a directory-ish resource... + return http.RedirectResponse(request.unparseURL(path=request.path+'/')) + + return super(Resource, self).http_GET(request) + + +class PostableResource(Resource): + """ + A L{Resource} capable of handling the POST request method. + + @cvar maxMem: maximum memory used during the parsing of the data. + @type maxMem: C{int} + @cvar maxFields: maximum number of form fields allowed. + @type maxFields: C{int} + @cvar maxSize: maximum size of the whole post allowed. + @type maxSize: C{int} + """ + maxMem = 100 * 1024 + maxFields = 1024 + maxSize = 10 * 1024 * 1024 + + def http_POST(self, request): + """ + Respond to a POST request. + Reads and parses the incoming body data then calls L{render}. + + @param request: the request to process. + @return: an object adaptable to L{iweb.IResponse}. + """ + return server.parsePOSTData(request, + self.maxMem, self.maxFields, self.maxSize + ).addCallback(lambda res: self.render(request)) + + +class LeafResource(RenderMixin): + """ + A L{Resource} with no children. + """ + implements(iweb.IResource) + + def locateChild(self, request, segments): + return self, server.StopTraversal + +class RedirectResource(LeafResource): + """ + A L{LeafResource} which always performs a redirect. + """ + implements(iweb.IResource) + + def __init__(self, *args, **kwargs): + """ + Parameters are URL components and are the same as those for + L{urlparse.urlunparse}. URL components which are not specified will + default to the corresponding component of the URL of the request being + redirected. + """ + self._args = args + self._kwargs = kwargs + + def renderHTTP(self, request): + return http.RedirectResponse(request.unparseURL(*self._args, **self._kwargs)) + +class WrapperResource(object): + """ + An L{iweb.IResource} implementation which wraps a L{RenderMixin} instance + and provides a hook in which a subclass can implement logic that is called + before request processing on the contained L{Resource}. + """ + implements(iweb.IResource) + + def __init__(self, resource): + self.resource=resource + + def hook(self, request): + """ + Override this method in order to do something before passing control on + to the wrapped resource's C{renderHTTP} and C{locateChild} methods. + @return: None or a L{Deferred}. If a deferred object is + returned, it's value is ignored, but C{renderHTTP} and + C{locateChild} are chained onto the deferred as callbacks. + """ + raise NotImplementedError() + + def locateChild(self, request, segments): + x = self.hook(request) + if x is not None: + return x.addCallback(lambda data: (self.resource, segments)) + return self.resource, segments + + def renderHTTP(self, request): + x = self.hook(request) + if x is not None: + return x.addCallback(lambda data: self.resource) + return self.resource + + +__all__ = ['RenderMixin', 'Resource', 'PostableResource', 'LeafResource', 'WrapperResource'] diff --git a/xcap/web/responsecode.py b/xcap/web/responsecode.py new file mode 100644 index 0000000..942266d --- /dev/null +++ b/xcap/web/responsecode.py @@ -0,0 +1,114 @@ +# Copyright (c) 2001-2004 Twisted Matrix Laboratories. +# See LICENSE for details. + +CONTINUE = 100 +SWITCHING = 101 + +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 + +MULTIPLE_CHOICE = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 + +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTH_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 +UNPROCESSABLE_ENTITY = 422 # RFC 2518 +LOCKED = 423 # RFC 2518 +FAILED_DEPENDENCY = 424 # RFC 2518 + +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE_SPACE = 507 +NOT_EXTENDED = 510 + +RESPONSES = { + # 100 + CONTINUE: "Continue", + SWITCHING: "Switching Protocols", + + # 200 + OK: "OK", + CREATED: "Created", + ACCEPTED: "Accepted", + NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information", + NO_CONTENT: "No Content", + RESET_CONTENT: "Reset Content.", + PARTIAL_CONTENT: "Partial Content", + MULTI_STATUS: "Multi-Status", + + # 300 + MULTIPLE_CHOICE: "Multiple Choices", + MOVED_PERMANENTLY: "Moved Permanently", + FOUND: "Found", + SEE_OTHER: "See Other", + NOT_MODIFIED: "Not Modified", + USE_PROXY: "Use Proxy", + # 306 unused + TEMPORARY_REDIRECT: "Temporary Redirect", + + # 400 + BAD_REQUEST: "Bad Request", + UNAUTHORIZED: "Unauthorized", + PAYMENT_REQUIRED: "Payment Required", + FORBIDDEN: "Forbidden", + NOT_FOUND: "Not Found", + NOT_ALLOWED: "Method Not Allowed", + NOT_ACCEPTABLE: "Not Acceptable", + PROXY_AUTH_REQUIRED: "Proxy Authentication Required", + REQUEST_TIMEOUT: "Request Time-out", + CONFLICT: "Conflict", + GONE: "Gone", + LENGTH_REQUIRED: "Length Required", + PRECONDITION_FAILED: "Precondition Failed", + REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large", + REQUEST_URI_TOO_LONG: "Request-URI Too Long", + UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type", + REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable", + EXPECTATION_FAILED: "Expectation Failed", + UNPROCESSABLE_ENTITY: "Unprocessable Entity", + LOCKED: "Locked", + FAILED_DEPENDENCY: "Failed Dependency", + + # 500 + INTERNAL_SERVER_ERROR: "Internal Server Error", + NOT_IMPLEMENTED: "Not Implemented", + BAD_GATEWAY: "Bad Gateway", + SERVICE_UNAVAILABLE: "Service Unavailable", + GATEWAY_TIMEOUT: "Gateway Time-out", + HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported", + INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space", + NOT_EXTENDED: "Not Extended" + } + +# No __all__ necessary -- everything is exported diff --git a/xcap/web/server.py b/xcap/web/server.py new file mode 100644 index 0000000..99cdc79 --- /dev/null +++ b/xcap/web/server.py @@ -0,0 +1,575 @@ +# Copyright (c) 2001-2008 Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +This is a web-server which integrates with the twisted.internet +infrastructure. +""" + +# System Imports +import cgi, time, urlparse +from urllib import quote, unquote +from urlparse import urlsplit + +import weakref + +from zope.interface import implements +# Twisted Imports +from twisted.internet import defer +from twisted.python import log, failure + +# Sibling Imports +from xcap.web import http, iweb, fileupload, responsecode +from xcap.web import http_headers +from xcap.web.filter.range import rangefilter +from xcap.web import error + +from xcap.web import version as web_version +from twisted import __version__ as twisted_version + +VERSION = "Twisted/%s TwistedWeb/%s" % (twisted_version, web_version) +_errorMarker = object() + + +def defaultHeadersFilter(request, response): + if not response.headers.hasHeader('server'): + response.headers.setHeader('server', VERSION) + if not response.headers.hasHeader('date'): + response.headers.setHeader('date', time.time()) + return response +defaultHeadersFilter.handleErrors = True + +def preconditionfilter(request, response): + if request.method in ("GET", "HEAD"): + http.checkPreconditions(request, response) + return response + +def doTrace(request): + request = iweb.IRequest(request) + txt = "%s %s HTTP/%d.%d\r\n" % (request.method, request.uri, + request.clientproto[0], request.clientproto[1]) + + l=[] + for name, valuelist in request.headers.getAllRawHeaders(): + for value in valuelist: + l.append("%s: %s\r\n" % (name, value)) + txt += ''.join(l) + + return http.Response( + responsecode.OK, + {'content-type': http_headers.MimeType('message', 'http')}, + txt) + + +def parsePOSTData(request, maxMem=100*1024, maxFields=1024, + maxSize=10*1024*1024): + """ + Parse data of a POST request. + + @param request: the request to parse. + @type request: L{xcap.web.http.Request}. + @param maxMem: maximum memory used during the parsing of the data. + @type maxMem: C{int} + @param maxFields: maximum number of form fields allowed. + @type maxFields: C{int} + @param maxSize: maximum size of file upload allowed. + @type maxSize: C{int} + + @return: a deferred that will fire when the parsing is done. The deferred + itself doesn't hold a return value, the request is modified directly. + @rtype: C{defer.Deferred} + """ + if request.stream.length == 0: + return defer.succeed(None) + + parser = None + ctype = request.headers.getHeader('content-type') + + if ctype is None: + return defer.succeed(None) + + def updateArgs(data): + args = data + request.args.update(args) + + def updateArgsAndFiles(data): + args, files = data + request.args.update(args) + request.files.update(files) + + def error(f): + f.trap(fileupload.MimeFormatError) + raise http.HTTPError( + http.StatusResponse(responsecode.BAD_REQUEST, str(f.value))) + + if (ctype.mediaType == 'application' + and ctype.mediaSubtype == 'x-www-form-urlencoded'): + d = fileupload.parse_urlencoded(request.stream) + d.addCallbacks(updateArgs, error) + return d + elif (ctype.mediaType == 'multipart' + and ctype.mediaSubtype == 'form-data'): + boundary = ctype.params.get('boundary') + if boundary is None: + return defer.fail(http.HTTPError( + http.StatusResponse( + responsecode.BAD_REQUEST, + "Boundary not specified in Content-Type."))) + d = fileupload.parseMultipartFormData(request.stream, boundary, + maxMem, maxFields, maxSize) + d.addCallbacks(updateArgsAndFiles, error) + return d + else: + return defer.fail(http.HTTPError( + http.StatusResponse( + responsecode.BAD_REQUEST, + "Invalid content-type: %s/%s" % ( + ctype.mediaType, ctype.mediaSubtype)))) + + +class StopTraversal(object): + """ + Indicates to Request._handleSegment that it should stop handling + path segments. + """ + pass + + +class Request(http.Request): + """ + vars: + site + + remoteAddr + + scheme + host + port + path + params + querystring + + args + files + + prepath + postpath + + @ivar path: The path only (arguments not included). + @ivar args: All of the arguments, including URL and POST arguments. + @type args: A mapping of strings (the argument names) to lists of values. + i.e., ?foo=bar&foo=baz&quux=spam results in + {'foo': ['bar', 'baz'], 'quux': ['spam']}. + + """ + implements(iweb.IRequest) + + site = None + _initialprepath = None + responseFilters = [rangefilter, preconditionfilter, + error.defaultErrorHandler, defaultHeadersFilter] + + def __init__(self, *args, **kw): + if kw.has_key('site'): + self.site = kw['site'] + del kw['site'] + if kw.has_key('prepathuri'): + self._initialprepath = kw['prepathuri'] + del kw['prepathuri'] + + # Copy response filters from the class + self.responseFilters = self.responseFilters[:] + self.files = {} + self.resources = [] + http.Request.__init__(self, *args, **kw) + + def addResponseFilter(self, f, atEnd=False): + if atEnd: + self.responseFilters.append(f) + else: + self.responseFilters.insert(0, f) + + def unparseURL(self, scheme=None, host=None, port=None, + path=None, params=None, querystring=None, fragment=None): + """Turn the request path into a url string. For any pieces of + the url that are not specified, use the value from the + request. The arguments have the same meaning as the same named + attributes of Request.""" + + if scheme is None: scheme = self.scheme + if host is None: host = self.host + if port is None: port = self.port + if path is None: path = self.path + if params is None: params = self.params + if querystring is None: query = self.querystring + if fragment is None: fragment = '' + + if port == http.defaultPortForScheme.get(scheme, 0): + hostport = host + else: + hostport = host + ':' + str(port) + + return urlparse.urlunparse(( + scheme, hostport, path, + params, querystring, fragment)) + + def _parseURL(self): + if self.uri[0] == '/': + # Can't use urlparse for request_uri because urlparse + # wants to be given an absolute or relative URI, not just + # an abs_path, and thus gets '//foo' wrong. + self.scheme = self.host = self.path = self.params = self.querystring = '' + if '?' in self.uri: + self.path, self.querystring = self.uri.split('?', 1) + else: + self.path = self.uri + if ';' in self.path: + self.path, self.params = self.path.split(';', 1) + else: + # It is an absolute uri, use standard urlparse + (self.scheme, self.host, self.path, + self.params, self.querystring, fragment) = urlparse.urlparse(self.uri) + + if self.querystring: + self.args = cgi.parse_qs(self.querystring, True) + else: + self.args = {} + + path = map(unquote, self.path[1:].split('/')) + if self._initialprepath: + # We were given an initial prepath -- this is for supporting + # CGI-ish applications where part of the path has already + # been processed + prepath = map(unquote, self._initialprepath[1:].split('/')) + + if path[:len(prepath)] == prepath: + self.prepath = prepath + self.postpath = path[len(prepath):] + else: + self.prepath = [] + self.postpath = path + else: + self.prepath = [] + self.postpath = path + #print "_parseURL", self.uri, (self.uri, self.scheme, self.host, self.path, self.params, self.querystring) + + def _fixupURLParts(self): + hostaddr, secure = self.chanRequest.getHostInfo() + if not self.scheme: + self.scheme = ('http', 'https')[secure] + + if self.host: + self.host, self.port = http.splitHostPort(self.scheme, self.host) + else: + # If GET line wasn't an absolute URL + host = self.headers.getHeader('host') + if host: + self.host, self.port = http.splitHostPort(self.scheme, host) + else: + # When no hostname specified anywhere, either raise an + # error, or use the interface hostname, depending on + # protocol version + if self.clientproto >= (1,1): + raise http.HTTPError(responsecode.BAD_REQUEST) + self.host = hostaddr.host + self.port = hostaddr.port + + + def process(self): + "Process a request." + try: + self.checkExpect() + resp = self.preprocessRequest() + if resp is not None: + self._cbFinishRender(resp).addErrback(self._processingFailed) + return + self._parseURL() + self._fixupURLParts() + self.remoteAddr = self.chanRequest.getRemoteHost() + except: + failedDeferred = self._processingFailed(failure.Failure()) + return + + d = defer.Deferred() + d.addCallback(self._getChild, self.site.resource, self.postpath) + d.addCallback(lambda res, req: res.renderHTTP(req), self) + d.addCallback(self._cbFinishRender) + d.addErrback(self._processingFailed) + d.callback(None) + + def preprocessRequest(self): + """Do any request processing that doesn't follow the normal + resource lookup procedure. "OPTIONS *" is handled here, for + example. This would also be the place to do any CONNECT + processing.""" + + if self.method == "OPTIONS" and self.uri == "*": + response = http.Response(responsecode.OK) + response.headers.setHeader('allow', ('GET', 'HEAD', 'OPTIONS', 'TRACE')) + return response + # This is where CONNECT would go if we wanted it + return None + + def _getChild(self, _, res, path, updatepaths=True): + """Call res.locateChild, and pass the result on to _handleSegment.""" + + self.resources.append(res) + + if not path: + return res + + result = res.locateChild(self, path) + if isinstance(result, defer.Deferred): + return result.addCallback(self._handleSegment, res, path, updatepaths) + else: + return self._handleSegment(result, res, path, updatepaths) + + def _handleSegment(self, result, res, path, updatepaths): + """Handle the result of a locateChild call done in _getChild.""" + + newres, newpath = result + # If the child resource is None then display a error page + if newres is None: + raise http.HTTPError(responsecode.NOT_FOUND) + + # If we got a deferred then we need to call back later, once the + # child is actually available. + if isinstance(newres, defer.Deferred): + return newres.addCallback( + lambda actualRes: self._handleSegment( + (actualRes, newpath), res, path, updatepaths) + ) + + if path: + url = quote("/" + "/".join(path)) + else: + url = "/" + + if newpath is StopTraversal: + # We need to rethink how to do this. + #if newres is res: + self._rememberResource(res, url) + return res + #else: + # raise ValueError("locateChild must not return StopTraversal with a resource other than self.") + + newres = iweb.IResource(newres) + if newres is res: + assert not newpath is path, "URL traversal cycle detected when attempting to locateChild %r from resource %r." % (path, res) + assert len(newpath) < len(path), "Infinite loop impending..." + + if updatepaths: + # We found a Resource... update the request.prepath and postpath + for x in xrange(len(path) - len(newpath)): + self.prepath.append(self.postpath.pop(0)) + + child = self._getChild(None, newres, newpath, updatepaths=updatepaths) + self._rememberResource(child, url) + + return child + + _urlsByResource = weakref.WeakKeyDictionary() + + def _rememberResource(self, resource, url): + """ + Remember the URL of a visited resource. + """ + self._urlsByResource[resource] = url + return resource + + def urlForResource(self, resource): + """ + Looks up the URL of the given resource if this resource was found while + processing this request. Specifically, this includes the requested + resource, and resources looked up via L{locateResource}. + + Note that a resource may be found at multiple URIs; if the same resource + is visited at more than one location while processing this request, + this method will return one of those URLs, but which one is not defined, + nor whether the same URL is returned in subsequent calls. + + @param resource: the resource to find a URI for. This resource must + have been obtained from the request (ie. via its C{uri} attribute, or + through its C{locateResource} or C{locateChildResource} methods). + @return: a valid URL for C{resource} in this request. + @raise NoURLForResourceError: if C{resource} has no URL in this request + (because it was not obtained from the request). + """ + resource = self._urlsByResource.get(resource, None) + if resource is None: + raise NoURLForResourceError(resource) + return resource + + def locateResource(self, url): + """ + Looks up the resource with the given URL. + @param uri: The URL of the desired resource. + @return: a L{Deferred} resulting in the L{IResource} at the + given URL or C{None} if no such resource can be located. + @raise HTTPError: If C{url} is not a URL on the site that this + request is being applied to. The contained response will + have a status code of L{responsecode.BAD_GATEWAY}. + @raise HTTPError: If C{url} contains a query or fragment. + The contained response will have a status code of + L{responsecode.BAD_REQUEST}. + """ + if url is None: return None + + # + # Parse the URL + # + (scheme, host, path, query, fragment) = urlsplit(url) + + if query or fragment: + raise http.HTTPError(http.StatusResponse( + responsecode.BAD_REQUEST, + "URL may not contain a query or fragment: %s" % (url,) + )) + + # The caller shouldn't be asking a request on one server to lookup a + # resource on some other server. + if (scheme and scheme != self.scheme) or (host and host != self.headers.getHeader("host")): + raise http.HTTPError(http.StatusResponse( + responsecode.BAD_GATEWAY, + "URL is not on this site (%s://%s/): %s" % (scheme, self.headers.getHeader("host"), url) + )) + + segments = path.split("/") + assert segments[0] == "", "URL path didn't begin with '/': %s" % (path,) + segments = map(unquote, segments[1:]) + + def notFound(f): + f.trap(http.HTTPError) + if f.value.response.code != responsecode.NOT_FOUND: + return f + return None + + d = defer.maybeDeferred(self._getChild, None, self.site.resource, segments, updatepaths=False) + d.addCallback(self._rememberResource, path) + d.addErrback(notFound) + return d + + def locateChildResource(self, parent, childName): + """ + Looks up the child resource with the given name given the parent + resource. This is similar to locateResource(), but doesn't have to + start the lookup from the root resource, so it is potentially faster. + @param parent: the parent of the resource being looked up. This resource + must have been obtained from the request (ie. via its C{uri} attribute, + or through its C{locateResource} or C{locateChildResource} methods). + @param childName: the name of the child of C{parent} to looked up. + to C{parent}. + @return: a L{Deferred} resulting in the L{IResource} at the + given URL or C{None} if no such resource can be located. + @raise NoURLForResourceError: if C{resource} was not obtained from the + request. + """ + if parent is None or childName is None: + return None + + assert "/" not in childName, "Child name may not contain '/': %s" % (childName,) + + parentURL = self.urlForResource(parent) + if not parentURL.endswith("/"): + parentURL += "/" + url = parentURL + quote(childName) + + segment = childName + + def notFound(f): + f.trap(http.HTTPError) + if f.value.response.code != responsecode.NOT_FOUND: + return f + return None + + d = defer.maybeDeferred(self._getChild, None, parent, [segment], updatepaths=False) + d.addCallback(self._rememberResource, url) + d.addErrback(notFound) + return d + + def _processingFailed(self, reason): + if reason.check(http.HTTPError) is not None: + # If the exception was an HTTPError, leave it alone + d = defer.succeed(reason.value.response) + else: + # Otherwise, it was a random exception, so give a + # ICanHandleException implementer a chance to render the page. + def _processingFailed_inner(reason): + handler = iweb.ICanHandleException(self, self) + return handler.renderHTTP_exception(self, reason) + d = defer.maybeDeferred(_processingFailed_inner, reason) + + d.addCallback(self._cbFinishRender) + d.addErrback(self._processingReallyFailed, reason) + return d + + def _processingReallyFailed(self, reason, origReason): + log.msg("Exception rendering error page:", isErr=1) + log.err(reason) + log.msg("Original exception:", isErr=1) + log.err(origReason) + + body = ("Internal Server Error" + "

Internal Server Error

An error occurred rendering the requested page. Additionally, an error occured rendering the error page.") + + response = http.Response( + responsecode.INTERNAL_SERVER_ERROR, + {'content-type': http_headers.MimeType('text','html')}, + body) + self.writeResponse(response) + + def _cbFinishRender(self, result): + def filterit(response, f): + if (hasattr(f, 'handleErrors') or + (response.code >= 200 and response.code < 300)): + return f(self, response) + else: + return response + + response = iweb.IResponse(result, None) + if response: + d = defer.Deferred() + for f in self.responseFilters: + d.addCallback(filterit, f) + d.addCallback(self.writeResponse) + d.callback(response) + return d + + resource = iweb.IResource(result, None) + if resource: + self.resources.append(resource) + d = defer.maybeDeferred(resource.renderHTTP, self) + d.addCallback(self._cbFinishRender) + return d + + raise TypeError("html is not a resource or a response") + + def renderHTTP_exception(self, req, reason): + log.msg("Exception rendering:", isErr=1) + log.err(reason) + + body = ("Internal Server Error" + "

Internal Server Error

An error occurred rendering the requested page. More information is available in the server log.") + + return http.Response( + responsecode.INTERNAL_SERVER_ERROR, + {'content-type': http_headers.MimeType('text','html')}, + body) + +class Site(object): + def __init__(self, resource): + """Initialize. + """ + self.resource = iweb.IResource(resource) + + def __call__(self, *args, **kwargs): + return Request(site=self, *args, **kwargs) + + +class NoURLForResourceError(RuntimeError): + def __init__(self, resource): + RuntimeError.__init__(self, "Resource %r has no URL in this request." % (resource,)) + self.resource = resource + + +__all__ = ['Request', 'Site', 'StopTraversal', 'VERSION', 'defaultHeadersFilter', 'doTrace', 'parsePOSTData', 'preconditionfilter', 'NoURLForResourceError'] diff --git a/xcap/web/static.py b/xcap/web/static.py new file mode 100644 index 0000000..bc1067f --- /dev/null +++ b/xcap/web/static.py @@ -0,0 +1,597 @@ +# Copyright (c) 2001-2008 Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +I deal with static resources. +""" + +# System Imports +import os, time, stat +import tempfile + +# Sibling Imports +from xcap.web import http_headers, resource +from xcap.web import http, iweb, stream, responsecode, server, dirlist + +# Twisted Imports +from twisted.python import filepath +from twisted.internet.defer import maybeDeferred +from zope.interface import implements + +class MetaDataMixin(object): + """ + Mix-in class for L{iweb.IResource} which provides methods for accessing resource + metadata specified by HTTP. + """ + def etag(self): + """ + @return: The current etag for the resource if available, None otherwise. + """ + return None + + def lastModified(self): + """ + @return: The last modified time of the resource if available, None otherwise. + """ + return None + + def creationDate(self): + """ + @return: The creation date of the resource if available, None otherwise. + """ + return None + + def contentLength(self): + """ + @return: The size in bytes of the resource if available, None otherwise. + """ + return None + + def contentType(self): + """ + @return: The MIME type of the resource if available, None otherwise. + """ + return None + + def contentEncoding(self): + """ + @return: The encoding of the resource if available, None otherwise. + """ + return None + + def displayName(self): + """ + @return: The display name of the resource if available, None otherwise. + """ + return None + + def exists(self): + """ + @return: True if the resource exists on the server, False otherwise. + """ + return True + +class StaticRenderMixin(resource.RenderMixin, MetaDataMixin): + def checkPreconditions(self, request): + # This code replaces the code in resource.RenderMixin + if request.method not in ("GET", "HEAD"): + http.checkPreconditions( + request, + entityExists = self.exists(), + etag = self.etag(), + lastModified = self.lastModified(), + ) + + # Check per-method preconditions + method = getattr(self, "preconditions_" + request.method, None) + if method: + return method(request) + + def renderHTTP(self, request): + """ + See L{resource.RenderMixIn.renderHTTP}. + + This implementation automatically sets some headers on the response + based on data available from L{MetaDataMixin} methods. + """ + def setHeaders(response): + response = iweb.IResponse(response) + + # Don't provide additional resource information to error responses + if response.code < 400: + # Content-* headers refer to the response content, not + # (necessarily) to the resource content, so they depend on the + # request method, and therefore can't be set here. + for (header, value) in ( + ("etag", self.etag()), + ("last-modified", self.lastModified()), + ): + if value is not None: + response.headers.setHeader(header, value) + + return response + + def onError(f): + # If we get an HTTPError, run its response through setHeaders() as + # well. + f.trap(http.HTTPError) + return setHeaders(f.value.response) + + d = maybeDeferred(super(StaticRenderMixin, self).renderHTTP, request) + return d.addCallbacks(setHeaders, onError) + +class Data(resource.Resource): + """ + This is a static, in-memory resource. + """ + def __init__(self, data, type): + self.data = data + self.type = http_headers.MimeType.fromString(type) + self.created_time = time.time() + + def etag(self): + lastModified = self.lastModified() + return http_headers.ETag("%X-%X" % (lastModified, hash(self.data)), + weak=(time.time() - lastModified <= 1)) + + def lastModified(self): + return self.creationDate() + + def creationDate(self): + return self.created_time + + def contentLength(self): + return len(self.data) + + def contentType(self): + return self.type + + def render(self, req): + return http.Response( + responsecode.OK, + http_headers.Headers({'content-type': self.contentType()}), + stream=self.data) + + +class File(StaticRenderMixin): + """ + File is a resource that represents a plain non-interpreted file + (although it can look for an extension like .rpy or .cgi and hand the + file to a processor for interpretation if you wish). Its constructor + takes a file path. + + Alternatively, you can give a directory path to the constructor. In this + case the resource will represent that directory, and its children will + be files underneath that directory. This provides access to an entire + filesystem tree with a single Resource. + + If you map the URL 'http://server/FILE' to a resource created as + File('/tmp'), then http://server/FILE/ will return an HTML-formatted + listing of the /tmp/ directory, and http://server/FILE/foo/bar.html will + return the contents of /tmp/foo/bar.html . + """ + implements(iweb.IResource) + + def _getContentTypes(self): + if not hasattr(File, "_sharedContentTypes"): + File._sharedContentTypes = loadMimeTypes() + return File._sharedContentTypes + + contentTypes = property(_getContentTypes) + + contentEncodings = { + ".gz" : "gzip", + ".bz2": "bzip2" + } + + processors = {} + + indexNames = ["index", "index.html", "index.htm", "index.trp", "index.rpy"] + + type = None + + def __init__(self, path, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None): + """Create a file with the given path. + """ + super(File, self).__init__() + + self.putChildren = {} + self.fp = filepath.FilePath(path) + # Remove the dots from the path to split + self.defaultType = defaultType + self.ignoredExts = list(ignoredExts) + if processors is not None: + self.processors = dict([ + (key.lower(), value) + for key, value in processors.items() + ]) + + if indexNames is not None: + self.indexNames = indexNames + + def exists(self): + return self.fp.exists() + + def etag(self): + if not self.fp.exists(): return None + + st = self.fp.statinfo + + # + # Mark ETag as weak if it was modified more recently than we can + # measure and report, as it could be modified again in that span + # and we then wouldn't know to provide a new ETag. + # + weak = (time.time() - st.st_mtime <= 1) + + return http_headers.ETag( + "%X-%X-%X" % (st.st_ino, st.st_size, st.st_mtime), + weak=weak + ) + + def lastModified(self): + if self.fp.exists(): + return self.fp.getmtime() + else: + return None + + def creationDate(self): + if self.fp.exists(): + return self.fp.getmtime() + else: + return None + + def contentLength(self): + if self.fp.exists(): + if self.fp.isfile(): + return self.fp.getsize() + else: + # Computing this would require rendering the resource; let's + # punt instead. + return None + else: + return None + + def _initTypeAndEncoding(self): + self._type, self._encoding = getTypeAndEncoding( + self.fp.basename(), + self.contentTypes, + self.contentEncodings, + self.defaultType + ) + + # Handle cases not covered by getTypeAndEncoding() + if self.fp.isdir(): self._type = "httpd/unix-directory" + + def contentType(self): + if not hasattr(self, "_type"): + self._initTypeAndEncoding() + return http_headers.MimeType.fromString(self._type) + + def contentEncoding(self): + if not hasattr(self, "_encoding"): + self._initTypeAndEncoding() + return self._encoding + + def displayName(self): + if self.fp.exists(): + return self.fp.basename() + else: + return None + + def ignoreExt(self, ext): + """Ignore the given extension. + + Serve file.ext if file is requested + """ + self.ignoredExts.append(ext) + + def directoryListing(self): + return dirlist.DirectoryLister(self.fp.path, + self.listChildren(), + self.contentTypes, + self.contentEncodings, + self.defaultType) + + def putChild(self, name, child): + """ + Register a child with the given name with this resource. + @param name: the name of the child (a URI path segment) + @param child: the child to register + """ + self.putChildren[name] = child + + def getChild(self, name): + """ + Look up a child resource. + @return: the child of this resource with the given name. + """ + if name == "": + return self + + child = self.putChildren.get(name, None) + if child: return child + + child_fp = self.fp.child(name) + if child_fp.exists(): + return self.createSimilarFile(child_fp.path) + else: + return None + + def listChildren(self): + """ + @return: a sequence of the names of all known children of this resource. + """ + children = self.putChildren.keys() + if self.fp.isdir(): + children += [c for c in self.fp.listdir() if c not in children] + return children + + def locateChild(self, req, segments): + """ + See L{IResource}C{.locateChild}. + """ + # If getChild() finds a child resource, return it + child = self.getChild(segments[0]) + if child is not None: return (child, segments[1:]) + + # If we're not backed by a directory, we have no children. + # But check for existance first; we might be a collection resource + # that the request wants created. + self.fp.restat(False) + if self.fp.exists() and not self.fp.isdir(): return (None, ()) + + # OK, we need to return a child corresponding to the first segment + path = segments[0] + + if path: + fpath = self.fp.child(path) + else: + # Request is for a directory (collection) resource + return (self, server.StopTraversal) + + # Don't run processors on directories - if someone wants their own + # customized directory rendering, subclass File instead. + if fpath.isfile(): + processor = self.processors.get(fpath.splitext()[1].lower()) + if processor: + return ( + processor(fpath.path), + segments[1:]) + + elif not fpath.exists(): + sibling_fpath = fpath.siblingExtensionSearch(*self.ignoredExts) + if sibling_fpath is not None: + fpath = sibling_fpath + + return self.createSimilarFile(fpath.path), segments[1:] + + def renderHTTP(self, req): + self.fp.restat(False) + return super(File, self).renderHTTP(req) + + def render(self, req): + """You know what you doing.""" + if not self.fp.exists(): + return responsecode.NOT_FOUND + + if self.fp.isdir(): + if req.uri[-1] != "/": + # Redirect to include trailing '/' in URI + return http.RedirectResponse(req.unparseURL(path=req.path+'/')) + else: + ifp = self.fp.childSearchPreauth(*self.indexNames) + if ifp: + # Render from the index file + standin = self.createSimilarFile(ifp.path) + else: + # Render from a DirectoryLister + standin = dirlist.DirectoryLister( + self.fp.path, + self.listChildren(), + self.contentTypes, + self.contentEncodings, + self.defaultType + ) + return standin.render(req) + + try: + f = self.fp.open() + except IOError, e: + import errno + if e[0] == errno.EACCES: + return responsecode.FORBIDDEN + elif e[0] == errno.ENOENT: + return responsecode.NOT_FOUND + else: + raise + + response = http.Response() + response.stream = stream.FileStream(f, 0, self.fp.getsize()) + + for (header, value) in ( + ("content-type", self.contentType()), + ("content-encoding", self.contentEncoding()), + ): + if value is not None: + response.headers.setHeader(header, value) + + return response + + def createSimilarFile(self, path): + return self.__class__(path, self.defaultType, self.ignoredExts, + self.processors, self.indexNames[:]) + + +class FileSaver(resource.PostableResource): + allowedTypes = (http_headers.MimeType('text', 'plain'), + http_headers.MimeType('text', 'html'), + http_headers.MimeType('text', 'css')) + + def __init__(self, destination, expectedFields=[], allowedTypes=None, maxBytes=1000000, permissions=0644): + self.destination = destination + self.allowedTypes = allowedTypes or self.allowedTypes + self.maxBytes = maxBytes + self.expectedFields = expectedFields + self.permissions = permissions + + def makeUniqueName(self, filename): + """Called when a unique filename is needed. + + filename is the name of the file as given by the client. + + Returns the fully qualified path of the file to create. The + file must not yet exist. + """ + + return tempfile.mktemp(suffix=os.path.splitext(filename)[1], dir=self.destination) + + def isSafeToWrite(self, filename, mimetype, filestream): + """Returns True if it's "safe" to write this file, + otherwise it raises an exception. + """ + + if filestream.length > self.maxBytes: + raise IOError("%s: File exceeds maximum length (%d > %d)" % (filename, + filestream.length, + self.maxBytes)) + + if mimetype not in self.allowedTypes: + raise IOError("%s: File type not allowed %s" % (filename, mimetype)) + + return True + + def writeFile(self, filename, mimetype, fileobject): + """Does the I/O dirty work after it calls isSafeToWrite to make + sure it's safe to write this file. + """ + filestream = stream.FileStream(fileobject) + + if self.isSafeToWrite(filename, mimetype, filestream): + outname = self.makeUniqueName(filename) + + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0) + + fileobject = os.fdopen(os.open(outname, flags, self.permissions), 'wb', 0) + + stream.readIntoFile(filestream, fileobject) + + return outname + + def render(self, req): + content = [""] + + if req.files: + for fieldName in req.files: + if fieldName in self.expectedFields: + for finfo in req.files[fieldName]: + try: + outname = self.writeFile(*finfo) + content.append("Saved file %s
" % outname) + except IOError, err: + content.append(str(err) + "
") + else: + content.append("%s is not a valid field" % fieldName) + + else: + content.append("No files given") + + content.append("") + + return http.Response(responsecode.OK, {}, stream='\n'.join(content)) + + +# FIXME: hi there I am a broken class +# """I contain AsIsProcessor, which serves files 'As Is' +# Inspired by Apache's mod_asis +# """ +# +# class ASISProcessor: +# implements(iweb.IResource) +# +# def __init__(self, path): +# self.path = path +# +# def renderHTTP(self, request): +# request.startedWriting = 1 +# return File(self.path) +# +# def locateChild(self, request): +# return None, () + +## +# Utilities +## + +dangerousPathError = http.HTTPError(responsecode.NOT_FOUND) #"Invalid request URL." + +def isDangerous(path): + return path == '..' or '/' in path or os.sep in path + +def addSlash(request): + return "http%s://%s%s/" % ( + request.isSecure() and 's' or '', + request.getHeader("host"), + (request.uri.split('?')[0])) + +def loadMimeTypes(mimetype_locations=['/etc/mime.types']): + """ + Multiple file locations containing mime-types can be passed as a list. + The files will be sourced in that order, overriding mime-types from the + files sourced beforehand, but only if a new entry explicitly overrides + the current entry. + """ + import mimetypes + # Grab Python's built-in mimetypes dictionary. + contentTypes = mimetypes.types_map + # Update Python's semi-erroneous dictionary with a few of the + # usual suspects. + contentTypes.update( + { + '.conf': 'text/plain', + '.diff': 'text/plain', + '.exe': 'application/x-executable', + '.flac': 'audio/x-flac', + '.java': 'text/plain', + '.ogg': 'application/ogg', + '.oz': 'text/x-oz', + '.swf': 'application/x-shockwave-flash', + '.tgz': 'application/x-gtar', + '.wml': 'text/vnd.wap.wml', + '.xul': 'application/vnd.mozilla.xul+xml', + '.py': 'text/plain', + '.patch': 'text/plain', + } + ) + # Users can override these mime-types by loading them out configuration + # files (this defaults to ['/etc/mime.types']). + for location in mimetype_locations: + if os.path.exists(location): + contentTypes.update(mimetypes.read_mime_types(location)) + + return contentTypes + +def getTypeAndEncoding(filename, types, encodings, defaultType): + p, ext = os.path.splitext(filename) + ext = ext.lower() + if encodings.has_key(ext): + enc = encodings[ext] + ext = os.path.splitext(p)[1].lower() + else: + enc = None + type = types.get(ext, defaultType) + return type, enc + +## +# Test code +## + +if __name__ == '__builtin__': + # Running from twistd -y + from twisted.application import service, strports + from xcap.web import server + res = File('/') + application = service.Application("demo") + s = strports.service('8080', server.Site(res)) + s.setServiceParent(application) diff --git a/xcap/web/stream.py b/xcap/web/stream.py new file mode 100644 index 0000000..3b39eb8 --- /dev/null +++ b/xcap/web/stream.py @@ -0,0 +1,1082 @@ + +""" +The stream module provides a simple abstraction of streaming +data. While Twisted already has some provisions for handling this in +its Producer/Consumer model, the rather complex interactions between +producer and consumer makes it difficult to implement something like +the CompoundStream object. Thus, this API. + +The IStream interface is very simple. It consists of two methods: +read, and close. The read method should either return some data, None +if there is no data left to read, or a Deferred. Close frees up any +underlying resources and causes read to return None forevermore. + +IByteStream adds a bit more to the API: +1) read is required to return objects conforming to the buffer interface. +2) .length, which may either an integer number of bytes remaining, or +None if unknown +3) .split(position). Split takes a position, and splits the +stream in two pieces, returning the two new streams. Using the +original stream after calling split is not allowed. + +There are two builtin source stream classes: FileStream and +MemoryStream. The first produces data from a file object, the second +from a buffer in memory. Any number of these can be combined into one +stream with the CompoundStream object. Then, to interface with other +parts of Twisted, there are two transcievers: StreamProducer and +ProducerStream. The first takes a stream and turns it into an +IPushProducer, which will write to a consumer. The second is a +consumer which is a stream, so that other producers can write to it. +""" + +from __future__ import generators + +import copy, os, types, sys +from zope.interface import Interface, Attribute, implements +from twisted.internet.defer import Deferred +from twisted.internet import interfaces as ti_interfaces, defer, reactor, protocol, error as ti_error +from twisted.python import components, log +from twisted.python.failure import Failure + +# Python 2.4.2 (only) has a broken mmap that leaks a fd every time you call it. +if sys.version_info[0:3] != (2,4,2): + try: + import mmap + except ImportError: + mmap = None +else: + mmap = None + +############################## +#### Interfaces #### +############################## + +class IStream(Interface): + """A stream of arbitrary data.""" + + def read(): + """Read some data. + + Returns some object representing the data. + If there is no more data available, returns None. + Can also return a Deferred resulting in one of the above. + + Errors may be indicated by exception or by a Deferred of a Failure. + """ + + def close(): + """Prematurely close. Should also cause further reads to + return None.""" + +class IByteStream(IStream): + """A stream which is of bytes.""" + + length = Attribute("""How much data is in this stream. Can be None if unknown.""") + + def read(): + """Read some data. + + Returns an object conforming to the buffer interface, or + if there is no more data available, returns None. + Can also return a Deferred resulting in one of the above. + + Errors may be indicated by exception or by a Deferred of a Failure. + """ + def split(point): + """Split this stream into two, at byte position 'point'. + + Returns a tuple of (before, after). After calling split, no other + methods should be called on this stream. Doing so will have undefined + behavior. + + If you cannot implement split easily, you may implement it as:: + + return fallbackSplit(self, point) + """ + + def close(): + """Prematurely close this stream. Should also cause further reads to + return None. Additionally, .length should be set to 0. + """ + +class ISendfileableStream(Interface): + def read(sendfile=False): + """ + Read some data. + If sendfile == False, returns an object conforming to the buffer + interface, or else a Deferred. + + If sendfile == True, returns either the above, or a SendfileBuffer. + """ + +class SimpleStream(object): + """Superclass of simple streams with a single buffer and a offset and length + into that buffer.""" + implements(IByteStream) + + length = None + start = None + + def read(self): + return None + + def close(self): + self.length = 0 + + def split(self, point): + if self.length is not None: + if point > self.length: + raise ValueError("split point (%d) > length (%d)" % (point, self.length)) + b = copy.copy(self) + self.length = point + if b.length is not None: + b.length -= point + b.start += point + return (self, b) + +############################## +#### FileStream #### +############################## + +# maximum mmap size +MMAP_LIMIT = 4*1024*1024 +# minimum mmap size +MMAP_THRESHOLD = 8*1024 + +# maximum sendfile length +SENDFILE_LIMIT = 16777216 +# minimum sendfile size +SENDFILE_THRESHOLD = 256 + +def mmapwrapper(*args, **kwargs): + """ + Python's mmap call sucks and ommitted the "offset" argument for no + discernable reason. Replace this with a mmap module that has offset. + """ + + offset = kwargs.get('offset', None) + if offset in [None, 0]: + if 'offset' in kwargs: + del kwargs['offset'] + else: + raise mmap.error("mmap: Python sucks and does not support offset.") + return mmap.mmap(*args, **kwargs) + +class FileStream(SimpleStream): + implements(ISendfileableStream) + """A stream that reads data from a file. File must be a normal + file that supports seek, (e.g. not a pipe or device or socket).""" + # 65K, minus some slack + CHUNK_SIZE = 2 ** 2 ** 2 ** 2 - 32 + + f = None + def __init__(self, f, start=0, length=None, useMMap=bool(mmap)): + """ + Create the stream from file f. If you specify start and length, + use only that portion of the file. + """ + self.f = f + self.start = start + if length is None: + self.length = os.fstat(f.fileno()).st_size + else: + self.length = length + self.useMMap = useMMap + + def read(self, sendfile=False): + if self.f is None: + return None + + length = self.length + if length == 0: + self.f = None + return None + + if sendfile and length > SENDFILE_THRESHOLD: + # XXX: Yay using non-existent sendfile support! + # FIXME: if we return a SendfileBuffer, and then sendfile + # fails, then what? Or, what if file is too short? + readSize = min(length, SENDFILE_LIMIT) + res = SendfileBuffer(self.f, self.start, readSize) + self.length -= readSize + self.start += readSize + return res + + if self.useMMap and length > MMAP_THRESHOLD: + readSize = min(length, MMAP_LIMIT) + try: + res = mmapwrapper(self.f.fileno(), readSize, + access=mmap.ACCESS_READ, offset=self.start) + #madvise(res, MADV_SEQUENTIAL) + self.length -= readSize + self.start += readSize + return res + except mmap.error: + pass + + # Fall back to standard read. + readSize = min(length, self.CHUNK_SIZE) + + self.f.seek(self.start) + b = self.f.read(readSize) + bytesRead = len(b) + if not bytesRead: + raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length)) + else: + self.length -= bytesRead + self.start += bytesRead + return b + + def close(self): + self.f = None + SimpleStream.close(self) + +components.registerAdapter(FileStream, file, IByteStream) + +############################## +#### MemoryStream #### +############################## + +class MemoryStream(SimpleStream): + """A stream that reads data from a buffer object.""" + def __init__(self, mem, start=0, length=None): + """ + Create the stream from buffer object mem. If you specify start and length, + use only that portion of the buffer. + """ + self.mem = mem + self.start = start + if length is None: + self.length = len(mem) - start + else: + if len(mem) < length: + raise ValueError("len(mem) < start + length") + self.length = length + + def read(self): + if self.mem is None: + return None + if self.length == 0: + result = None + else: + result = buffer(self.mem, self.start, self.length) + self.mem = None + self.length = 0 + return result + + def close(self): + self.mem = None + SimpleStream.close(self) + +components.registerAdapter(MemoryStream, str, IByteStream) +components.registerAdapter(MemoryStream, types.BufferType, IByteStream) + +############################## +#### CompoundStream #### +############################## + +class CompoundStream(object): + """A stream which is composed of many other streams. + + Call addStream to add substreams. + """ + + implements(IByteStream, ISendfileableStream) + deferred = None + length = 0 + + def __init__(self, buckets=()): + self.buckets = [IByteStream(s) for s in buckets] + + def addStream(self, bucket): + """Add a stream to the output""" + bucket = IByteStream(bucket) + self.buckets.append(bucket) + if self.length is not None: + if bucket.length is None: + self.length = None + else: + self.length += bucket.length + + def read(self, sendfile=False): + if self.deferred is not None: + raise RuntimeError("Call to read while read is already outstanding") + + if not self.buckets: + return None + + if sendfile and ISendfileableStream.providedBy(self.buckets[0]): + try: + result = self.buckets[0].read(sendfile) + except: + return self._gotFailure(Failure()) + else: + try: + result = self.buckets[0].read() + except: + return self._gotFailure(Failure()) + + if isinstance(result, Deferred): + self.deferred = result + result.addCallbacks(self._gotRead, self._gotFailure, (sendfile,)) + return result + + return self._gotRead(result, sendfile) + + def _gotFailure(self, f): + self.deferred = None + del self.buckets[0] + self.close() + return f + + def _gotRead(self, result, sendfile): + self.deferred = None + if result is None: + del self.buckets[0] + # Next bucket + return self.read(sendfile) + + if self.length is not None: + self.length -= len(result) + return result + + def split(self, point): + num = 0 + origPoint = point + for bucket in self.buckets: + num+=1 + + if point == 0: + b = CompoundStream() + b.buckets = self.buckets[num:] + del self.buckets[num:] + return self,b + + if bucket.length is None: + # Indeterminate length bucket. + # give up and use fallback splitter. + return fallbackSplit(self, origPoint) + + if point < bucket.length: + before,after = bucket.split(point) + b = CompoundStream() + b.buckets = self.buckets[num:] + b.buckets[0] = after + + del self.buckets[num+1:] + self.buckets[num] = before + return self,b + + point -= bucket.length + + def close(self): + for bucket in self.buckets: + bucket.close() + self.buckets = [] + self.length = 0 + + +############################## +#### readStream #### +############################## + +class _StreamReader(object): + """Process a stream's data using callbacks for data and stream finish.""" + + def __init__(self, stream, gotDataCallback): + self.stream = stream + self.gotDataCallback = gotDataCallback + self.result = Deferred() + + def run(self): + # self.result may be del'd in _read() + result = self.result + self._read() + return result + + def _read(self): + try: + result = self.stream.read() + except: + self._gotError(Failure()) + return + if isinstance(result, Deferred): + result.addCallbacks(self._gotData, self._gotError) + else: + self._gotData(result) + + def _gotError(self, failure): + result = self.result + del self.result, self.gotDataCallback, self.stream + result.errback(failure) + + def _gotData(self, data): + if data is None: + result = self.result + del self.result, self.gotDataCallback, self.stream + result.callback(None) + return + try: + self.gotDataCallback(data) + except: + self._gotError(Failure()) + return + reactor.callLater(0, self._read) + +def readStream(stream, gotDataCallback): + """Pass a stream's data to a callback. + + Returns Deferred which will be triggered on finish. Errors in + reading the stream or in processing it will be returned via this + Deferred. + """ + return _StreamReader(stream, gotDataCallback).run() + + +def readAndDiscard(stream): + """Read all the data from the given stream, and throw it out. + + Returns Deferred which will be triggered on finish. + """ + return readStream(stream, lambda _: None) + +def readIntoFile(stream, outFile): + """Read a stream and write it into a file. + + Returns Deferred which will be triggered on finish. + """ + def done(_): + outFile.close() + return _ + return readStream(stream, outFile.write).addBoth(done) + +def connectStream(inputStream, factory): + """Connect a protocol constructed from a factory to stream. + + Returns an output stream from the protocol. + + The protocol's transport will have a finish() method it should + call when done writing. + """ + # XXX deal better with addresses + p = factory.buildProtocol(None) + out = ProducerStream() + out.disconnecting = False # XXX for LineReceiver suckage + p.makeConnection(out) + readStream(inputStream, lambda _: p.dataReceived(_)).addCallbacks( + lambda _: p.connectionLost(ti_error.ConnectionDone()), lambda _: p.connectionLost(_)) + return out + +############################## +#### fallbackSplit #### +############################## + +def fallbackSplit(stream, point): + after = PostTruncaterStream(stream, point) + before = TruncaterStream(stream, point, after) + return (before, after) + +class TruncaterStream(object): + def __init__(self, stream, point, postTruncater): + self.stream = stream + self.length = point + self.postTruncater = postTruncater + + def read(self): + if self.length == 0: + if self.postTruncater is not None: + postTruncater = self.postTruncater + self.postTruncater = None + postTruncater.sendInitialSegment(self.stream.read()) + self.stream = None + return None + + result = self.stream.read() + if isinstance(result, Deferred): + return result.addCallback(self._gotRead) + else: + return self._gotRead(result) + + def _gotRead(self, data): + if data is None: + raise ValueError("Ran out of data for a split of a indeterminate length source") + if self.length >= len(data): + self.length -= len(data) + return data + else: + before = buffer(data, 0, self.length) + after = buffer(data, self.length) + self.length = 0 + if self.postTruncater is not None: + postTruncater = self.postTruncater + self.postTruncater = None + postTruncater.sendInitialSegment(after) + self.stream = None + return before + + def split(self, point): + if point > self.length: + raise ValueError("split point (%d) > length (%d)" % (point, self.length)) + + post = PostTruncaterStream(self.stream, point) + trunc = TruncaterStream(post, self.length - point, self.postTruncater) + self.length = point + self.postTruncater = post + return self, trunc + + def close(self): + if self.postTruncater is not None: + self.postTruncater.notifyClosed(self) + else: + # Nothing cares about the rest of the stream + self.stream.close() + self.stream = None + self.length = 0 + + +class PostTruncaterStream(object): + deferred = None + sentInitialSegment = False + truncaterClosed = None + closed = False + + length = None + def __init__(self, stream, point): + self.stream = stream + self.deferred = Deferred() + if stream.length is not None: + self.length = stream.length - point + + def read(self): + if not self.sentInitialSegment: + self.sentInitialSegment = True + if self.truncaterClosed is not None: + readAndDiscard(self.truncaterClosed) + self.truncaterClosed = None + return self.deferred + + return self.stream.read() + + def split(self, point): + return fallbackSplit(self, point) + + def close(self): + self.closed = True + if self.truncaterClosed is not None: + # have first half close itself + self.truncaterClosed.postTruncater = None + self.truncaterClosed.close() + elif self.sentInitialSegment: + # first half already finished up + self.stream.close() + + self.deferred = None + + # Callbacks from TruncaterStream + def sendInitialSegment(self, data): + if self.closed: + # First half finished, we don't want data. + self.stream.close() + self.stream = None + if self.deferred is not None: + if isinstance(data, Deferred): + data.chainDeferred(self.deferred) + else: + self.deferred.callback(data) + + def notifyClosed(self, truncater): + if self.closed: + # we are closed, have first half really close + truncater.postTruncater = None + truncater.close() + elif self.sentInitialSegment: + # We are trying to read, read up first half + readAndDiscard(truncater) + else: + # Idle, store closed info. + self.truncaterClosed = truncater + +######################################## +#### ProducerStream/StreamProducer #### +######################################## + +class ProducerStream(object): + """Turns producers into a IByteStream. + Thus, implements IConsumer and IByteStream.""" + + implements(IByteStream, ti_interfaces.IConsumer) + length = None + closed = False + failed = False + producer = None + producerPaused = False + deferred = None + + bufferSize = 5 + + def __init__(self, length=None): + self.buffer = [] + self.length = length + + # IByteStream implementation + def read(self): + if self.buffer: + return self.buffer.pop(0) + elif self.closed: + self.length = 0 + if self.failed: + f = self.failure + del self.failure + return defer.fail(f) + return None + else: + deferred = self.deferred = Deferred() + if self.producer is not None and (not self.streamingProducer + or self.producerPaused): + self.producerPaused = False + self.producer.resumeProducing() + + return deferred + + def split(self, point): + return fallbackSplit(self, point) + + def close(self): + """Called by reader of stream when it is done reading.""" + self.buffer=[] + self.closed = True + if self.producer is not None: + self.producer.stopProducing() + self.producer = None + self.deferred = None + + # IConsumer implementation + def write(self, data): + if self.closed: + return + + if self.deferred: + deferred = self.deferred + self.deferred = None + deferred.callback(data) + else: + self.buffer.append(data) + if(self.producer is not None and self.streamingProducer + and len(self.buffer) > self.bufferSize): + self.producer.pauseProducing() + self.producerPaused = True + + def finish(self, failure=None): + """Called by producer when it is done. + + If the optional failure argument is passed a Failure instance, + the stream will return it as errback on next Deferred. + """ + self.closed = True + if not self.buffer: + self.length = 0 + if self.deferred is not None: + deferred = self.deferred + self.deferred = None + if failure is not None: + self.failed = True + deferred.errback(failure) + else: + deferred.callback(None) + else: + if failure is not None: + self.failed = True + self.failure = failure + + def registerProducer(self, producer, streaming): + if self.producer is not None: + raise RuntimeError("Cannot register producer %s, because producer %s was never unregistered." % (producer, self.producer)) + + if self.closed: + producer.stopProducing() + else: + self.producer = producer + self.streamingProducer = streaming + if not streaming: + producer.resumeProducing() + + def unregisterProducer(self): + self.producer = None + +class StreamProducer(object): + """A push producer which gets its data by reading a stream.""" + implements(ti_interfaces.IPushProducer) + + deferred = None + finishedCallback = None + paused = False + consumer = None + + def __init__(self, stream, enforceStr=True): + self.stream = stream + self.enforceStr = enforceStr + + def beginProducing(self, consumer): + if self.stream is None: + return defer.succeed(None) + + self.consumer = consumer + finishedCallback = self.finishedCallback = Deferred() + self.consumer.registerProducer(self, True) + self.resumeProducing() + return finishedCallback + + def resumeProducing(self): + self.paused = False + if self.deferred is not None: + return + + try: + data = self.stream.read() + except: + self.stopProducing(Failure()) + return + + if isinstance(data, Deferred): + self.deferred = data.addCallbacks(self._doWrite, self.stopProducing) + else: + self._doWrite(data) + + def _doWrite(self, data): + if self.consumer is None: + return + if data is None: + # The end. + if self.consumer is not None: + self.consumer.unregisterProducer() + if self.finishedCallback is not None: + self.finishedCallback.callback(None) + self.finishedCallback = self.deferred = self.consumer = self.stream = None + return + + self.deferred = None + if self.enforceStr: + # XXX: sucks that we have to do this. make transport.write(buffer) work! + data = str(buffer(data)) + self.consumer.write(data) + + if not self.paused: + self.resumeProducing() + + def pauseProducing(self): + self.paused = True + + def stopProducing(self, failure=ti_error.ConnectionLost()): + if self.consumer is not None: + self.consumer.unregisterProducer() + if self.finishedCallback is not None: + if failure is not None: + self.finishedCallback.errback(failure) + else: + self.finishedCallback.callback(None) + self.finishedCallback = None + self.paused = True + if self.stream is not None: + self.stream.close() + + self.finishedCallback = self.deferred = self.consumer = self.stream = None + +############################## +#### ProcessStreamer #### +############################## + +class _ProcessStreamerProtocol(protocol.ProcessProtocol): + + def __init__(self, inputStream, outStream, errStream): + self.inputStream = inputStream + self.outStream = outStream + self.errStream = errStream + self.resultDeferred = defer.Deferred() + + def connectionMade(self): + p = StreamProducer(self.inputStream) + # if the process stopped reading from the input stream, + # this is not an error condition, so it oughtn't result + # in a ConnectionLost() from the input stream: + p.stopProducing = lambda err=None: StreamProducer.stopProducing(p, err) + + d = p.beginProducing(self.transport) + d.addCallbacks(lambda _: self.transport.closeStdin(), + self._inputError) + + def _inputError(self, f): + log.msg("Error in input stream for %r" % self.transport) + log.err(f) + self.transport.closeStdin() + + def outReceived(self, data): + self.outStream.write(data) + + def errReceived(self, data): + self.errStream.write(data) + + def outConnectionLost(self): + self.outStream.finish() + + def errConnectionLost(self): + self.errStream.finish() + + def processEnded(self, reason): + self.resultDeferred.errback(reason) + del self.resultDeferred + + +class ProcessStreamer(object): + """Runs a process hooked up to streams. + + Requires an input stream, has attributes 'outStream' and 'errStream' + for stdout and stderr. + + outStream and errStream are public attributes providing streams + for stdout and stderr of the process. + """ + + def __init__(self, inputStream, program, args, env={}): + self.outStream = ProducerStream() + self.errStream = ProducerStream() + self._protocol = _ProcessStreamerProtocol(IByteStream(inputStream), self.outStream, self.errStream) + self._program = program + self._args = args + self._env = env + + def run(self): + """Run the process. + + Returns Deferred which will eventually have errback for non-clean (exit code > 0) + exit, with ProcessTerminated, or callback with None on exit code 0. + """ + # XXX what happens if spawn fails? + reactor.spawnProcess(self._protocol, self._program, self._args, env=self._env) + del self._env + return self._protocol.resultDeferred.addErrback(lambda _: _.trap(ti_error.ProcessDone)) + +############################## +#### generatorToStream #### +############################## + +class _StreamIterator(object): + done=False + + def __iter__(self): + return self + def next(self): + if self.done: + raise StopIteration + return self.value + wait=object() + +class _IteratorStream(object): + length = None + + def __init__(self, fun, stream, args, kwargs): + self._stream=stream + self._streamIterator = _StreamIterator() + self._gen = fun(self._streamIterator, *args, **kwargs) + + def read(self): + try: + val = self._gen.next() + except StopIteration: + return None + else: + if val is _StreamIterator.wait: + newdata = self._stream.read() + if isinstance(newdata, defer.Deferred): + return newdata.addCallback(self._gotRead) + else: + return self._gotRead(newdata) + return val + + def _gotRead(self, data): + if data is None: + self._streamIterator.done=True + else: + self._streamIterator.value=data + return self.read() + + def close(self): + self._stream.close() + del self._gen, self._stream, self._streamIterator + + def split(self): + return fallbackSplit(self) + +def generatorToStream(fun): + """Converts a generator function into a stream. + + The function should take an iterator as its first argument, + which will be converted *from* a stream by this wrapper, and + yield items which are turned *into* the results from the + stream's 'read' call. + + One important point: before every call to input.next(), you + *MUST* do a "yield input.wait" first. Yielding this magic value + takes care of ensuring that the input is not a deferred before + you see it. + + >>> from xcap.web import stream + >>> from string import maketrans + >>> alphabet = 'abcdefghijklmnopqrstuvwxyz' + >>> + >>> def encrypt(input, key): + ... code = alphabet[key:] + alphabet[:key] + ... translator = maketrans(alphabet+alphabet.upper(), code+code.upper()) + ... yield input.wait + ... for s in input: + ... yield str(s).translate(translator) + ... yield input.wait + ... + >>> encrypt = stream.generatorToStream(encrypt) + >>> + >>> plaintextStream = stream.MemoryStream('SampleSampleSample') + >>> encryptedStream = encrypt(plaintextStream, 13) + >>> encryptedStream.read() + 'FnzcyrFnzcyrFnzcyr' + >>> + >>> plaintextStream = stream.MemoryStream('SampleSampleSample') + >>> encryptedStream = encrypt(plaintextStream, 13) + >>> evenMoreEncryptedStream = encrypt(encryptedStream, 13) + >>> evenMoreEncryptedStream.read() + 'SampleSampleSample' + + """ + def generatorToStream_inner(stream, *args, **kwargs): + return _IteratorStream(fun, stream, args, kwargs) + return generatorToStream_inner + + +############################## +#### BufferedStream #### +############################## + +class BufferedStream(object): + """A stream which buffers its data to provide operations like + readline and readExactly.""" + + data = "" + def __init__(self, stream): + self.stream = stream + + def _readUntil(self, f): + """Internal helper function which repeatedly calls f each time + after more data has been received, until it returns non-None.""" + while True: + r = f() + if r is not None: + yield r; return + + newdata = self.stream.read() + if isinstance(newdata, defer.Deferred): + newdata = defer.waitForDeferred(newdata) + yield newdata; newdata = newdata.getResult() + + if newdata is None: + # End Of File + newdata = self.data + self.data = '' + yield newdata; return + self.data += str(newdata) + _readUntil = defer.deferredGenerator(_readUntil) + + def readExactly(self, size=None): + """Read exactly size bytes of data, or, if size is None, read + the entire stream into a string.""" + if size is not None and size < 0: + raise ValueError("readExactly: size cannot be negative: %s", size) + + def gotdata(): + data = self.data + if size is not None and len(data) >= size: + pre,post = data[:size], data[size:] + self.data = post + return pre + return self._readUntil(gotdata) + + + def readline(self, delimiter='\r\n', size=None): + """ + Read a line of data from the string, bounded by + delimiter. The delimiter is included in the return value. + + If size is specified, read and return at most that many bytes, + even if the delimiter has not yet been reached. If the size + limit falls within a delimiter, the rest of the delimiter, and + the next line will be returned together. + """ + if size is not None and size < 0: + raise ValueError("readline: size cannot be negative: %s" % (size, )) + + def gotdata(): + data = self.data + if size is not None: + splitpoint = data.find(delimiter, 0, size) + if splitpoint == -1: + if len(data) >= size: + splitpoint = size + else: + splitpoint += len(delimiter) + else: + splitpoint = data.find(delimiter) + if splitpoint != -1: + splitpoint += len(delimiter) + + if splitpoint != -1: + pre = data[:splitpoint] + self.data = data[splitpoint:] + return pre + return self._readUntil(gotdata) + + def pushback(self, pushed): + """Push data back into the buffer.""" + + self.data = pushed + self.data + + def read(self): + data = self.data + if data: + self.data = "" + return data + return self.stream.read() + + def _len(self): + l = self.stream.length + if l is None: + return None + return l + len(self.data) + + length = property(_len) + + def split(self, offset): + off = offset - len(self.data) + + pre, post = self.stream.split(max(0, off)) + pre = BufferedStream(pre) + post = BufferedStream(post) + if off < 0: + pre.data = self.data[:-off] + post.data = self.data[-off:] + else: + pre.data = self.data + + return pre, post + + +def substream(stream, start, end): + if start > end: + raise ValueError("start position must be less than end position %r" + % ((start, end),)) + stream = stream.split(start)[1] + return stream.split(end - start)[0] + + + +__all__ = ['IStream', 'IByteStream', 'FileStream', 'MemoryStream', 'CompoundStream', + 'readAndDiscard', 'fallbackSplit', 'ProducerStream', 'StreamProducer', + 'BufferedStream', 'readStream', 'ProcessStreamer', 'readIntoFile', + 'generatorToStream'] +