757 lines
29 KiB
Python
757 lines
29 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2009 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
"""A class to serve pages from zip files and use memcache for performance.
|
|
|
|
This contains a class and a function to create an anonymous instance of the
|
|
class to serve HTTP GET requests. Memcache is used to increase response speed
|
|
and lower processing cycles used in serving. Credit to Guido van Rossum and
|
|
his implementation of zipserve which served as a reference as I wrote this.
|
|
|
|
MemcachedZipHandler: Class that serves request
|
|
create_handler: method to create instance of MemcachedZipHandler
|
|
"""
|
|
|
|
__author__ = 'jmatt@google.com (Justin Mattson)'
|
|
|
|
import email.Utils
|
|
import logging
|
|
import mimetypes
|
|
import re
|
|
import sys
|
|
import time
|
|
import yaml
|
|
import zipfile
|
|
|
|
from google.appengine.api import memcache
|
|
from google.appengine.ext import webapp
|
|
from google.appengine.ext.webapp import util
|
|
from time import localtime, strftime
|
|
|
|
def create_handler(zip_files, max_age=None, public=None):
|
|
"""Factory method to create a MemcachedZipHandler instance.
|
|
|
|
Args:
|
|
zip_files: A list of file names, or a list of lists of file name, first
|
|
member of file mappings. See MemcachedZipHandler documentation for
|
|
more information about using the list of lists format
|
|
max_age: The maximum client-side cache lifetime
|
|
public: Whether this should be declared public in the client-side cache
|
|
Returns:
|
|
A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
|
|
Engine
|
|
|
|
Raises:
|
|
ValueError: if the zip_files argument is not a list
|
|
"""
|
|
# verify argument integrity. If the argument is passed in list format,
|
|
# convert it to list of lists format
|
|
if zip_files and type(zip_files).__name__ == 'list':
|
|
num_items = len(zip_files)
|
|
while num_items > 0:
|
|
if type(zip_files[num_items - 1]).__name__ != 'list':
|
|
zip_files[num_items - 1] = [zip_files[num_items-1]]
|
|
num_items -= 1
|
|
else:
|
|
raise ValueError('File name arguments must be a list')
|
|
|
|
class HandlerWrapper(MemcachedZipHandler):
|
|
"""Simple wrapper for an instance of MemcachedZipHandler.
|
|
|
|
I'm still not sure why this is needed
|
|
"""
|
|
def get(self, name):
|
|
self.zipfilenames = zip_files
|
|
self.TrueGet(name)
|
|
if max_age is not None:
|
|
MAX_AGE = max_age
|
|
if public is not None:
|
|
PUBLIC = public
|
|
|
|
return HandlerWrapper
|
|
|
|
|
|
class MemcachedZipHandler(webapp.RequestHandler):
|
|
"""Handles get requests for a given URL.
|
|
|
|
Serves a GET request from a series of zip files. As files are served they are
|
|
put into memcache, which is much faster than retreiving them from the zip
|
|
source file again. It also uses considerably fewer CPU cycles.
|
|
"""
|
|
zipfile_cache = {} # class cache of source zip files
|
|
MAX_AGE = 600 # max client-side cache lifetime
|
|
PUBLIC = True # public cache setting
|
|
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
|
|
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
|
|
REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
|
|
REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
|
|
REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
|
|
# redirect table entry
|
|
REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
|
|
# a redirect table entry
|
|
REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
|
|
# redirect table entry
|
|
REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
|
|
# redirect should be served
|
|
REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
|
|
# Redirect should be served
|
|
intlString = 'intl/'
|
|
validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-cn','zh-TW','zh-tw']
|
|
|
|
def TrueGet(self, reqUri):
|
|
"""The top-level entry point to serving requests.
|
|
|
|
Called 'True' get because it does the work when called from the wrapper
|
|
class' get method. Some logic is applied to the request to serve files
|
|
from an intl/<lang>/... directory or fall through to the default language.
|
|
|
|
Args:
|
|
name: URL requested
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
langName = 'en'
|
|
resetLangCookie = False
|
|
urlLangName = None
|
|
retry = False
|
|
isValidIntl = False
|
|
isStripped = False
|
|
|
|
# Try to retrieve the user's lang pref from the cookie. If there is no
|
|
# lang pref cookie in the request, add set-cookie to the response with the
|
|
# default value of 'en'.
|
|
try:
|
|
langName = self.request.cookies['android_developer_pref_lang']
|
|
except KeyError:
|
|
resetLangCookie = True
|
|
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
|
|
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
|
|
|
|
# Do some prep for handling intl requests. Parse the url and validate
|
|
# the intl/lang substring, extract the url lang code (urlLangName) and the
|
|
# the uri that follows the intl/lang substring(contentUri)
|
|
sections = reqUri.split("/", 2)
|
|
isIntl = len(sections) > 2 and (sections[0] == "intl")
|
|
if isIntl:
|
|
isValidIntl = sections[1] in self.validLangs
|
|
urlLangName = sections[1]
|
|
contentUri = sections[2]
|
|
logging.info(' Content URI is [%s]...', contentUri)
|
|
if isValidIntl:
|
|
if (langName != urlLangName) or (langName == 'en'):
|
|
# if the lang code in the request is different from that in
|
|
# the cookie, or if the target lang is en, strip the
|
|
# intl/nn substring. It will later be redirected to
|
|
# the user's preferred language url.
|
|
# logging.info(' Handling a MISMATCHED intl request')
|
|
reqUri = contentUri
|
|
isStripped = True
|
|
isValidIntl = False
|
|
isIntl = False
|
|
#logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
|
|
#else:
|
|
# logging.info('INTL PREP no need to reset langName')
|
|
else:
|
|
contentUri = reqUri
|
|
|
|
# Apply manual redirects from redirects.yaml. This occurs before any
|
|
# other mutations are performed, to avoid odd redirect behavior
|
|
# (For example, a user may want to redirect a directory without having
|
|
# /index.html appended.)
|
|
did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
|
|
if did_redirect:
|
|
return
|
|
|
|
# Preprocess the req url. If it references a directory or the domain itself,
|
|
# append '/index.html' to the url and 302 redirect. Otherwise, continue
|
|
# processing the request below.
|
|
did_redirect = self.PreprocessUrl(reqUri, langName)
|
|
if did_redirect:
|
|
return
|
|
|
|
# Send for processing
|
|
if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
|
|
# handle a 'clean' request.
|
|
# Try to form a response using the actual request url.
|
|
# logging.info(' Request being handled as clean: [%s]', name)
|
|
if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
|
|
# If CreateResponse returns False, there was no such document
|
|
# in the intl/lang tree. Before going to 404, see if there is an
|
|
# English-language version of the doc in the default
|
|
# default tree and return it, else go to 404.
|
|
self.CreateResponse(contentUri, langName, False, resetLangCookie)
|
|
|
|
elif isIntl:
|
|
# handle the case where we need to pass through an invalid intl req
|
|
# for processing (so as to get 404 as appropriate). This is needed
|
|
# because intl urls are passed through clean and retried in English,
|
|
# if necessary.
|
|
# logging.info(' Handling an invalid intl request...')
|
|
self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
|
|
|
|
else:
|
|
# handle the case where we have a non-clean url (usually a non-intl
|
|
# url) that we need to interpret in the context of any lang pref
|
|
# that is set. Prepend an intl/lang string to the request url and
|
|
# send it as a 302 redirect. After the redirect, the subsequent
|
|
# request will be handled as a clean url.
|
|
self.RedirToIntl(reqUri, self.intlString, langName)
|
|
|
|
def ProcessManualRedirects(self, contentUri, langName, isIntl):
|
|
"""Compute any manual redirects for a request and execute them.
|
|
|
|
This allows content authors to manually define a set of regex rules which,
|
|
when matched, will cause an HTTP redirect to be performed.
|
|
|
|
Redirect rules are typically stored in a file named redirects.yaml. See the
|
|
comments in that file for more information about formatting.
|
|
|
|
Redirect computations are stored in memcache for performance.
|
|
|
|
Note that international URIs are handled automatically, and are assumed to
|
|
mirror redirects for non-intl requests.
|
|
|
|
Args:
|
|
contentUri: The relative URI (without leading slash) that was requested.
|
|
This should NOT contain an intl-prefix, if otherwise present.
|
|
langName: The requested language.
|
|
isIntl: True if contentUri originally contained an intl prefix.
|
|
|
|
Results:
|
|
boolean: True if a redirect has been set, False otherwise.
|
|
"""
|
|
# Redirect data is stored in memcache for performance
|
|
memcache_key = self.REDIRECT_PREFIX + contentUri
|
|
redirect_data = memcache.get(memcache_key)
|
|
if redirect_data is None:
|
|
logging.info('Redirect cache miss. Computing new redirect data.\n'
|
|
'Memcache Key: ' + memcache_key)
|
|
redirect_data = self.ComputeManualRedirectUrl(contentUri)
|
|
memcache.set(memcache_key, redirect_data)
|
|
contentUri = redirect_data[0]
|
|
redirectType = redirect_data[1]
|
|
|
|
# If this is an international URL, prepend intl path to minimize
|
|
# number of redirects
|
|
if isIntl:
|
|
contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
|
|
|
|
if redirectType is None:
|
|
# No redirect necessary
|
|
return False
|
|
elif redirectType == self.REDIRECT_TYPE_PERM:
|
|
logging.info('Sending permanent redirect: ' + contentUri);
|
|
self.redirect(contentUri, permanent=True)
|
|
return True
|
|
elif redirectType == self.REDIRECT_TYPE_TEMP:
|
|
logging.info('Sending temporary redirect: ' + contentUri);
|
|
self.redirect(contentUri, permanent=False)
|
|
return True
|
|
else:
|
|
# Invalid redirect type
|
|
logging.error('Invalid redirect type: %s', redirectType)
|
|
raise ('Invalid redirect type: %s', redirectType)
|
|
|
|
def ComputeManualRedirectUrl(self, uri):
|
|
"""Read redirects file and evaluate redirect rules for a given URI.
|
|
|
|
Args:
|
|
uri: The relative URI (without leading slash) for which redirect data
|
|
should be computed. No special handling of intl URIs is pefromed
|
|
at this level.
|
|
|
|
Returns:
|
|
tuple: The computed redirect data. This tuple has two parts:
|
|
redirect_uri: The new URI that should be used. (If no redirect rule is
|
|
found, the original input to 'uri' will be returned.
|
|
redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
|
|
for an HTTP 302 redirect, or None if no redirect should be performed.
|
|
"""
|
|
# Redircts are defined in a file named redirects.yaml.
|
|
try:
|
|
f = open(self.REDIRECT_FILE)
|
|
data = yaml.load(f)
|
|
f.close()
|
|
except IOError, e:
|
|
logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
|
|
'): ' + e.strerror)
|
|
return (uri, None)
|
|
|
|
# The incoming path is missing a leading slash. However, many parts of the
|
|
# redirect system require leading slashes to distinguish between relative
|
|
# and absolute redirects. So, to compensate for this, we'll add a leading
|
|
# slash here as well.
|
|
uri = '/' + uri
|
|
|
|
# Check to make sure we actually got an iterable list out of the YAML file
|
|
if data is None:
|
|
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
|
|
'YAML.')
|
|
elif 'redirects' not in data:
|
|
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
|
|
'properly formatted -- no \'redirects:\' header.')
|
|
elif hasattr(data['redirects'], '__iter__'):
|
|
# Iterate through redirect data, try to find a redirect that matches.
|
|
for redirect in data['redirects']:
|
|
# Note: re.search adds an implied '^' to the beginning of the regex
|
|
# This means that the regex must match from the beginning of the
|
|
# string.
|
|
try:
|
|
if re.match(redirect[self.REDIRECT_SRC], uri):
|
|
# Match found. Apply redirect rule.
|
|
redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
|
|
redirect[self.REDIRECT_DST], uri)
|
|
logging.info('Redirect rule matched.\n'
|
|
'Rule: %s\n'
|
|
'Src: %s\n'
|
|
'Dst: %s',
|
|
redirect[self.REDIRECT_SRC], uri, redirect_uri)
|
|
if self.REDIRECT_TYPE in redirect:
|
|
redirect_type = redirect[self.REDIRECT_TYPE]
|
|
else:
|
|
# Default redirect type, if unspecified
|
|
redirect_type = self.REDIRECT_TYPE_PERM
|
|
return (redirect_uri, redirect_type)
|
|
except:
|
|
e = sys.exc_info()[1]
|
|
raise ('Error while processing redirect rule.\n'
|
|
'Rule: %s\n'
|
|
'Error: %s' % (redirect[self.REDIRECT_SRC], e))
|
|
# No redirect found, return URL unchanged
|
|
return (uri, None)
|
|
|
|
def isCleanUrl(self, name, langName, isValidIntl, isStripped):
|
|
"""Determine whether to pass an incoming url straight to processing.
|
|
|
|
Args:
|
|
name: The incoming URL
|
|
|
|
Returns:
|
|
boolean: Whether the URL should be sent straight to processing
|
|
"""
|
|
# logging.info(' >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
|
|
if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
|
|
return True
|
|
|
|
def PreprocessUrl(self, name, langName):
|
|
"""Any preprocessing work on the URL when it comes in.
|
|
|
|
Put any work related to interpreting the incoming URL here. For example,
|
|
this is used to redirect requests for a directory to the index.html file
|
|
in that directory. Subclasses should override this method to do different
|
|
preprocessing.
|
|
|
|
Args:
|
|
name: The incoming URL
|
|
|
|
Returns:
|
|
True if the request was redirected to '/index.html'.
|
|
Otherewise False.
|
|
"""
|
|
|
|
# determine if this is a request for a directory
|
|
final_path_segment = name
|
|
final_slash_offset = name.rfind('/')
|
|
if final_slash_offset != len(name) - 1:
|
|
final_path_segment = name[final_slash_offset + 1:]
|
|
if final_path_segment.find('.') == -1:
|
|
name = ''.join([name, '/'])
|
|
|
|
# if this is a directory or the domain itself, redirect to /index.html
|
|
if not name or (name[len(name) - 1:] == '/'):
|
|
uri = ''.join(['/', name, 'index.html'])
|
|
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
|
|
self.redirect(uri, False)
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def RedirToIntl(self, name, intlString, langName):
|
|
"""Redirect an incoming request to the appropriate intl uri.
|
|
|
|
For non-en langName, builds the intl/lang string from a
|
|
base (en) string and redirects (302) the request to look for
|
|
a version of the file in langName. For en langName, simply
|
|
redirects a stripped uri string (intl/nn removed).
|
|
|
|
Args:
|
|
name: The incoming, preprocessed URL
|
|
|
|
Returns:
|
|
The lang-specific URL
|
|
"""
|
|
if not (langName == 'en'):
|
|
builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
|
|
else:
|
|
builtIntlLangUri = name
|
|
uri = ''.join(['/', builtIntlLangUri])
|
|
logging.info('-->REDIRECTING %s to %s', name, uri)
|
|
self.redirect(uri, False)
|
|
return uri
|
|
|
|
def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
|
|
"""Process the url and form a response, if appropriate.
|
|
|
|
Attempts to retrieve the requested file (name) from cache,
|
|
negative cache, or store (zip) and form the response.
|
|
For intl requests that are not found (in the localized tree),
|
|
returns False rather than forming a response, so that
|
|
the request can be retried with the base url (this is the
|
|
fallthrough to default language).
|
|
|
|
For requests that are found, forms the headers and
|
|
adds the content to the response entity. If the request was
|
|
for an intl (localized) url, also resets the language cookie
|
|
to the language specified in the url if needed, to ensure that
|
|
the client language and response data remain harmonious.
|
|
|
|
Args:
|
|
name: The incoming, preprocessed URL
|
|
langName: The language id. Used as necessary to reset the
|
|
language cookie in the response.
|
|
isValidIntl: If present, indicates whether the request is
|
|
for a language-specific url
|
|
resetLangCookie: Whether the response should reset the
|
|
language cookie to 'langName'
|
|
|
|
Returns:
|
|
True: A response was successfully created for the request
|
|
False: No response was created.
|
|
"""
|
|
# see if we have the page in the memcache
|
|
logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
|
|
name, langName, isValidIntl, resetLangCookie)
|
|
resp_data = self.GetFromCache(name)
|
|
if resp_data is None:
|
|
logging.info(' Cache miss for %s', name)
|
|
resp_data = self.GetFromNegativeCache(name)
|
|
if resp_data is None:
|
|
resp_data = self.GetFromStore(name)
|
|
|
|
# IF we have the file, put it in the memcache
|
|
# ELSE put it in the negative cache
|
|
if resp_data is not None:
|
|
self.StoreOrUpdateInCache(name, resp_data)
|
|
elif isValidIntl:
|
|
# couldn't find the intl doc. Try to fall through to English.
|
|
#logging.info(' Retrying with base uri...')
|
|
return False
|
|
else:
|
|
logging.info(' Adding %s to negative cache, serving 404', name)
|
|
self.StoreInNegativeCache(name)
|
|
self.Write404Error()
|
|
return True
|
|
else:
|
|
# found it in negative cache
|
|
self.Write404Error()
|
|
return True
|
|
|
|
# found content from cache or store
|
|
logging.info('FOUND CLEAN')
|
|
if resetLangCookie:
|
|
logging.info(' Resetting android_developer_pref_lang cookie to [%s]',
|
|
langName)
|
|
expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
|
|
self.response.headers.add_header('Set-Cookie',
|
|
'android_developer_pref_lang=%s; path=/; expires=%s' %
|
|
(langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
|
|
mustRevalidate = False
|
|
if ('.html' in name):
|
|
# revalidate html files -- workaround for cache inconsistencies for
|
|
# negotiated responses
|
|
mustRevalidate = True
|
|
#logging.info(' Adding [Vary: Cookie] to response...')
|
|
self.response.headers.add_header('Vary', 'Cookie')
|
|
content_type, encoding = mimetypes.guess_type(name)
|
|
if content_type:
|
|
self.response.headers['Content-Type'] = content_type
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif (name == 'favicon.ico'):
|
|
self.response.headers['Content-Type'] = 'image/x-icon'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif name.endswith('.psd'):
|
|
self.response.headers['Content-Type'] = 'application/octet-stream'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif name.endswith('.svg'):
|
|
self.response.headers['Content-Type'] = 'image/svg+xml'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif name.endswith('.mp4'):
|
|
self.response.headers['Content-Type'] = 'video/mp4'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif name.endswith('.webm'):
|
|
self.response.headers['Content-Type'] = 'video/webm'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
elif name.endswith('.ogv'):
|
|
self.response.headers['Content-Type'] = 'video/ogg'
|
|
self.SetCachingHeaders(mustRevalidate)
|
|
self.response.out.write(resp_data)
|
|
return True
|
|
|
|
def GetFromStore(self, file_path):
|
|
"""Retrieve file from zip files.
|
|
|
|
Get the file from the source, it must not have been in the memcache. If
|
|
possible, we'll use the zip file index to quickly locate where the file
|
|
should be found. (See MapToFileArchive documentation for assumptions about
|
|
file ordering.) If we don't have an index or don't find the file where the
|
|
index says we should, look through all the zip files to find it.
|
|
|
|
Args:
|
|
file_path: the file that we're looking for
|
|
|
|
Returns:
|
|
The contents of the requested file
|
|
"""
|
|
resp_data = None
|
|
file_itr = iter(self.zipfilenames)
|
|
|
|
# decode any escape characters in the URI
|
|
# Note: We are currenty just looking for '@' (%40)
|
|
file_path = file_path.replace('%40', '@')
|
|
|
|
# check the index, if we have one, to see what archive the file is in
|
|
archive_name = self.MapFileToArchive(file_path)
|
|
if not archive_name:
|
|
archive_name = file_itr.next()[0]
|
|
|
|
while resp_data is None and archive_name:
|
|
zip_archive = self.LoadZipFile(archive_name)
|
|
if zip_archive:
|
|
|
|
# we expect some lookups will fail, and that's okay, 404s will deal
|
|
# with that
|
|
try:
|
|
resp_data = zip_archive.read(file_path)
|
|
except (KeyError, RuntimeError), err:
|
|
# no op
|
|
x = False
|
|
if resp_data is not None:
|
|
logging.info('%s read from %s', file_path, archive_name)
|
|
|
|
try:
|
|
archive_name = file_itr.next()[0]
|
|
except (StopIteration), err:
|
|
archive_name = False
|
|
|
|
return resp_data
|
|
|
|
def LoadZipFile(self, zipfilename):
|
|
"""Convenience method to load zip file.
|
|
|
|
Just a convenience method to load the zip file from the data store. This is
|
|
useful if we ever want to change data stores and also as a means of
|
|
dependency injection for testing. This method will look at our file cache
|
|
first, and then load and cache the file if there's a cache miss
|
|
|
|
Args:
|
|
zipfilename: the name of the zip file to load
|
|
|
|
Returns:
|
|
The zip file requested, or None if there is an I/O error
|
|
"""
|
|
zip_archive = None
|
|
zip_archive = self.zipfile_cache.get(zipfilename)
|
|
if zip_archive is None:
|
|
try:
|
|
zip_archive = zipfile.ZipFile(zipfilename)
|
|
self.zipfile_cache[zipfilename] = zip_archive
|
|
except (IOError, RuntimeError), err:
|
|
logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
|
|
err))
|
|
return zip_archive
|
|
|
|
def MapFileToArchive(self, file_path):
|
|
"""Given a file name, determine what archive it should be in.
|
|
|
|
This method makes two critical assumptions.
|
|
(1) The zip files passed as an argument to the handler, if concatenated
|
|
in that same order, would result in a total ordering
|
|
of all the files. See (2) for ordering type.
|
|
(2) Upper case letters before lower case letters. The traversal of a
|
|
directory tree is depth first. A parent directory's files are added
|
|
before the files of any child directories
|
|
|
|
Args:
|
|
file_path: the file to be mapped to an archive
|
|
|
|
Returns:
|
|
The name of the archive where we expect the file to be
|
|
"""
|
|
num_archives = len(self.zipfilenames)
|
|
while num_archives > 0:
|
|
target = self.zipfilenames[num_archives - 1]
|
|
if len(target) > 1:
|
|
if self.CompareFilenames(target[1], file_path) >= 0:
|
|
return target[0]
|
|
num_archives -= 1
|
|
|
|
return None
|
|
|
|
def CompareFilenames(self, file1, file2):
|
|
"""Determines whether file1 is lexigraphically 'before' file2.
|
|
|
|
WARNING: This method assumes that paths are output in a depth-first,
|
|
with parent directories' files stored before childs'
|
|
|
|
We say that file1 is lexigraphically before file2 if the last non-matching
|
|
path segment of file1 is alphabetically before file2.
|
|
|
|
Args:
|
|
file1: the first file path
|
|
file2: the second file path
|
|
|
|
Returns:
|
|
A positive number if file1 is before file2
|
|
A negative number if file2 is before file1
|
|
0 if filenames are the same
|
|
"""
|
|
f1_segments = file1.split('/')
|
|
f2_segments = file2.split('/')
|
|
|
|
segment_ptr = 0
|
|
while (segment_ptr < len(f1_segments) and
|
|
segment_ptr < len(f2_segments) and
|
|
f1_segments[segment_ptr] == f2_segments[segment_ptr]):
|
|
segment_ptr += 1
|
|
|
|
if len(f1_segments) == len(f2_segments):
|
|
|
|
# we fell off the end, the paths much be the same
|
|
if segment_ptr == len(f1_segments):
|
|
return 0
|
|
|
|
# we didn't fall of the end, compare the segments where they differ
|
|
if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
|
|
return 1
|
|
elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
|
|
return -1
|
|
else:
|
|
return 0
|
|
|
|
# the number of segments differs, we either mismatched comparing
|
|
# directories, or comparing a file to a directory
|
|
else:
|
|
|
|
# IF we were looking at the last segment of one of the paths,
|
|
# the one with fewer segments is first because files come before
|
|
# directories
|
|
# ELSE we just need to compare directory names
|
|
if (segment_ptr + 1 == len(f1_segments) or
|
|
segment_ptr + 1 == len(f2_segments)):
|
|
return len(f2_segments) - len(f1_segments)
|
|
else:
|
|
if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
|
|
return 1
|
|
elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
|
|
return -1
|
|
else:
|
|
return 0
|
|
|
|
def SetCachingHeaders(self, revalidate):
|
|
"""Set caching headers for the request."""
|
|
max_age = self.MAX_AGE
|
|
#self.response.headers['Expires'] = email.Utils.formatdate(
|
|
# time.time() + max_age, usegmt=True)
|
|
cache_control = []
|
|
if self.PUBLIC:
|
|
cache_control.append('public')
|
|
cache_control.append('max-age=%d' % max_age)
|
|
if revalidate:
|
|
cache_control.append('must-revalidate')
|
|
self.response.headers['Cache-Control'] = ', '.join(cache_control)
|
|
|
|
def GetFromCache(self, filename):
|
|
"""Get file from memcache, if available.
|
|
|
|
Args:
|
|
filename: The URL of the file to return
|
|
|
|
Returns:
|
|
The content of the file
|
|
"""
|
|
return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
|
|
|
|
def StoreOrUpdateInCache(self, filename, data):
|
|
"""Store data in the cache.
|
|
|
|
Store a piece of data in the memcache. Memcache has a maximum item size of
|
|
1*10^6 bytes. If the data is too large, fail, but log the failure. Future
|
|
work will consider compressing the data before storing or chunking it
|
|
|
|
Args:
|
|
filename: the name of the file to store
|
|
data: the data of the file
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
try:
|
|
if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
|
|
memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
|
|
except (ValueError), err:
|
|
logging.warning('Data size too large to cache\n%s' % err)
|
|
|
|
def Write404Error(self):
|
|
"""Ouptut a simple 404 response."""
|
|
self.error(404)
|
|
self.response.out.write(
|
|
''.join(['<html><head><title>404: Not Found</title></head>',
|
|
'<body><b><h2>Error 404</h2><br/>',
|
|
'File not found</b></body></html>']))
|
|
|
|
def StoreInNegativeCache(self, filename):
|
|
"""If a non-existant URL is accessed, cache this result as well.
|
|
|
|
Future work should consider setting a maximum negative cache size to
|
|
prevent it from from negatively impacting the real cache.
|
|
|
|
Args:
|
|
filename: URL to add ot negative cache
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
|
|
|
|
def GetFromNegativeCache(self, filename):
|
|
"""Retrieve from negative cache.
|
|
|
|
Args:
|
|
filename: URL to retreive
|
|
|
|
Returns:
|
|
The file contents if present in the negative cache.
|
|
"""
|
|
return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
|
|
|
|
def main():
|
|
application = webapp.WSGIApplication([('/([^/]+)/(.*)',
|
|
MemcachedZipHandler)])
|
|
util.run_wsgi_app(application)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|