Adding manual redirect support to developer.android.com.

Internal bug: 2347145

Change-Id: I0cdcec8a23704ab80878e8cc781b735fd2173011
This commit is contained in:
Trevor Johns
2010-09-01 03:34:05 -07:00
parent e52271aff9
commit 662b8f1c9a
2 changed files with 277 additions and 73 deletions

View File

@@ -31,7 +31,10 @@ __author__ = 'jmatt@google.com (Justin Mattson)'
import email.Utils import email.Utils
import logging import logging
import mimetypes import mimetypes
import re
import sys
import time import time
import yaml
import zipfile import zipfile
from google.appengine.api import memcache from google.appengine.api import memcache
@@ -94,9 +97,21 @@ class MemcachedZipHandler(webapp.RequestHandler):
PUBLIC = True # public cache setting PUBLIC = True # public cache setting
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
# redirect table entry
REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
# a redirect table entry
REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
# redirect table entry
REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
# redirect should be served
REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
# Redirect should be served
intlString = 'intl/' intlString = 'intl/'
validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW'] validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']
def TrueGet(self, reqUri): def TrueGet(self, reqUri):
"""The top-level entry point to serving requests. """The top-level entry point to serving requests.
@@ -118,7 +133,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
isStripped = False isStripped = False
# Try to retrieve the user's lang pref from the cookie. If there is no # Try to retrieve the user's lang pref from the cookie. If there is no
# lang pref cookie in the request, add set-cookie to the response with the # lang pref cookie in the request, add set-cookie to the response with the
# default value of 'en'. # default value of 'en'.
try: try:
langName = self.request.cookies['android_developer_pref_lang'] langName = self.request.cookies['android_developer_pref_lang']
@@ -127,64 +142,201 @@ class MemcachedZipHandler(webapp.RequestHandler):
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName) #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie) logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
# Do some prep for handling intl requests. Parse the url and validate
# the intl/lang substring, extract the url lang code (urlLangName) and the
# the uri that follows the intl/lang substring(contentUri)
sections = reqUri.split("/", 2)
isIntl = len(sections) > 2 and (sections[0] == "intl")
if isIntl:
isValidIntl = sections[1] in self.validLangs
urlLangName = sections[1]
contentUri = sections[2]
logging.info(' Content URI is [%s]...', contentUri)
if isValidIntl:
if (langName != urlLangName) or (langName == 'en'):
# if the lang code in the request is different from that in
# the cookie, or if the target lang is en, strip the
# intl/nn substring. It will later be redirected to
# the user's preferred language url.
# logging.info(' Handling a MISMATCHED intl request')
reqUri = contentUri
isStripped = True
isValidIntl = False
isIntl = False
#logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
#else:
# logging.info('INTL PREP no need to reset langName')
else:
contentUri = reqUri
# Apply manual redirects from redirects.yaml. This occurs before any
# other mutations are performed, to avoid odd redirect behavior
# (For example, a user may want to redirect a directory without having
# /index.html appended.)
did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
if did_redirect:
return
# Preprocess the req url. If it references a directory or the domain itself, # Preprocess the req url. If it references a directory or the domain itself,
# append '/index.html' to the url and 302 redirect. Otherwise, continue # append '/index.html' to the url and 302 redirect. Otherwise, continue
# processing the request below. # processing the request below.
name = self.PreprocessUrl(reqUri, langName) did_redirect = self.PreprocessUrl(reqUri, langName)
if name: if did_redirect:
# Do some prep for handling intl requests. Parse the url and validate return
# the intl/lang substring, extract the url lang code (urlLangName) and the
# the uri that follows the intl/lang substring(contentUri)
sections = name.split("/", 2)
contentUri = 0
isIntl = len(sections) > 1 and (sections[0] == "intl")
if isIntl:
isValidIntl = sections[1] in self.validLangs
if isValidIntl:
urlLangName = sections[1]
contentUri = sections[2]
logging.info(' Content URI is [%s]...', contentUri)
if (urlLangName != langName) or (langName == 'en'):
# if the lang code in the request is different from that in
# the cookie, or if the target lang is en, strip the
# intl/nn substring. It will later be redirected to
# the user's preferred language url.
# logging.info(' Handling a MISMATCHED intl request')
name = contentUri
isStripped = True
isValidIntl = False
isIntl = False
# Send for processing # Send for processing
if self.isCleanUrl(name, langName, isValidIntl, isStripped): if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
# handle a 'clean' request. # handle a 'clean' request.
# Try to form a response using the actual request url. # Try to form a response using the actual request url.
# logging.info(' Request being handled as clean: [%s]', name) # logging.info(' Request being handled as clean: [%s]', name)
if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie): if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
# If CreateResponse returns False, there was no such document # If CreateResponse returns False, there was no such document
# in the intl/lang tree. Before going to 404, see if there is an # in the intl/lang tree. Before going to 404, see if there is an
# English-language version of the doc in the default # English-language version of the doc in the default
# default tree and return it, else go to 404. # default tree and return it, else go to 404.
self.CreateResponse(contentUri, langName, False, resetLangCookie) self.CreateResponse(contentUri, langName, False, resetLangCookie)
elif isIntl: elif isIntl:
# handle the case where we need to pass through an invalid intl req # handle the case where we need to pass through an invalid intl req
# for processing (so as to get 404 as appropriate). This is needed # for processing (so as to get 404 as appropriate). This is needed
# because intl urls are passed through clean and retried in English, # because intl urls are passed through clean and retried in English,
# if necessary. # if necessary.
# logging.info(' Handling an invalid intl request...') # logging.info(' Handling an invalid intl request...')
self.CreateResponse(name, langName, isValidIntl, resetLangCookie) self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
else: else:
# handle the case where we have a non-clean url (usually a non-intl # handle the case where we have a non-clean url (usually a non-intl
# url) that we need to interpret in the context of any lang pref # url) that we need to interpret in the context of any lang pref
# that is set. Prepend an intl/lang string to the request url and # that is set. Prepend an intl/lang string to the request url and
# send it as a 302 redirect. After the redirect, the subsequent # send it as a 302 redirect. After the redirect, the subsequent
# request will be handled as a clean url. # request will be handled as a clean url.
self.RedirToIntl(name, self.intlString, langName) self.RedirToIntl(reqUri, self.intlString, langName)
def ProcessManualRedirects(self, contentUri, langName, isIntl):
"""Compute any manual redirects for a request and execute them.
This allows content authors to manually define a set of regex rules which,
when matched, will cause an HTTP redirect to be performed.
Redirect rules are typically stored in a file named redirects.yaml. See the
comments in that file for more information about formatting.
Redirect computations are stored in memcache for performance.
Note that international URIs are handled automatically, and are assumed to
mirror redirects for non-intl requests.
Args:
contentUri: The relative URI (without leading slash) that was requested.
This should NOT contain an intl-prefix, if otherwise present.
langName: The requested language.
isIntl: True if contentUri originally contained an intl prefix.
Results:
boolean: True if a redirect has been set, False otherwise.
"""
# Redirect data is stored in memcache for performance
memcache_key = self.REDIRECT_PREFIX + contentUri
redirect_data = memcache.get(memcache_key)
if redirect_data is None:
logging.info('Redirect cache miss. Computing new redirect data.\n'
'Memcache Key: ' + memcache_key)
redirect_data = self.ComputeManualRedirectUrl(contentUri)
memcache.set(memcache_key, redirect_data)
contentUri = redirect_data[0]
redirectType = redirect_data[1]
# If this is an international URL, prepend intl path to minimize
# number of redirects
if isIntl:
contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
if redirectType is None:
# No redirect necessary
return False
elif redirectType == self.REDIRECT_TYPE_PERM:
logging.info('Sending permanent redirect: ' + contentUri);
self.redirect(contentUri, permanent=True)
return True
elif redirectType == self.REDIRECT_TYPE_TEMP:
logging.info('Sending temporary redirect: ' + contentUri);
self.redirect(contentUri, permanent=False)
return True
else:
# Invalid redirect type
logging.error('Invalid redirect type: %s', redirectType)
raise ('Invalid redirect type: %s', redirectType)
def ComputeManualRedirectUrl(self, uri):
"""Read redirects file and evaluate redirect rules for a given URI.
Args:
uri: The relative URI (without leading slash) for which redirect data
should be computed. No special handling of intl URIs is pefromed
at this level.
Returns:
tuple: The computed redirect data. This tuple has two parts:
redirect_uri: The new URI that should be used. (If no redirect rule is
found, the original input to 'uri' will be returned.
redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
for an HTTP 302 redirect, or None if no redirect should be performed.
"""
# Redircts are defined in a file named redirects.yaml.
try:
f = open(self.REDIRECT_FILE)
data = yaml.load(f)
f.close()
except IOError, e:
logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
'): ' + e.strerror)
return (uri, None)
# The incoming path is missing a leading slash. However, many parts of the
# redirect system require leading slashes to distinguish between relative
# and absolute redirects. So, to compensate for this, we'll add a leading
# slash here as well.
uri = '/' + uri
# Check to make sure we actually got an iterable list out of the YAML file
if data is None:
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
'YAML.')
elif 'redirects' not in data:
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
'properly formatted -- no \'redirects:\' header.')
elif hasattr(data['redirects'], '__iter__'):
# Iterate through redirect data, try to find a redirect that matches.
for redirect in data['redirects']:
# Note: re.search adds an implied '^' to the beginning of the regex
# This means that the regex must match from the beginning of the
# string.
try:
if re.match(redirect[self.REDIRECT_SRC], uri):
# Match found. Apply redirect rule.
redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
redirect[self.REDIRECT_DST], uri)
logging.info('Redirect rule matched.\n'
'Rule: %s\n'
'Src: %s\n'
'Dst: %s',
redirect[self.REDIRECT_SRC], uri, redirect_uri)
if self.REDIRECT_TYPE in redirect:
redirect_type = redirect[self.REDIRECT_TYPE]
else:
# Default redirect type, if unspecified
redirect_type = self.REDIRECT_TYPE_PERM
return (redirect_uri, redirect_type)
except:
e = sys.exc_info()[1]
raise ('Error while processing redirect rule.\n'
'Rule: %s\n'
'Error: %s' % (redirect[self.REDIRECT_SRC], e))
# No redirect found, return URL unchanged
return (uri, None)
def isCleanUrl(self, name, langName, isValidIntl, isStripped): def isCleanUrl(self, name, langName, isValidIntl, isStripped):
"""Determine whether to pass an incoming url straight to processing. """Determine whether to pass an incoming url straight to processing.
Args: Args:
name: The incoming URL name: The incoming URL
@@ -208,9 +360,10 @@ class MemcachedZipHandler(webapp.RequestHandler):
name: The incoming URL name: The incoming URL
Returns: Returns:
False if the request was redirected to '/index.html', or True if the request was redirected to '/index.html'.
The processed URL, otherwise Otherewise False.
""" """
# determine if this is a request for a directory # determine if this is a request for a directory
final_path_segment = name final_path_segment = name
final_slash_offset = name.rfind('/') final_slash_offset = name.rfind('/')
@@ -224,16 +377,16 @@ class MemcachedZipHandler(webapp.RequestHandler):
uri = ''.join(['/', name, 'index.html']) uri = ''.join(['/', name, 'index.html'])
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName) # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
self.redirect(uri, False) self.redirect(uri, False)
return False return True
else: else:
return name return False
def RedirToIntl(self, name, intlString, langName): def RedirToIntl(self, name, intlString, langName):
"""Redirect an incoming request to the appropriate intl uri. """Redirect an incoming request to the appropriate intl uri.
For non-en langName, builds the intl/lang string from a For non-en langName, builds the intl/lang string from a
base (en) string and redirects (302) the request to look for base (en) string and redirects (302) the request to look for
a version of the file in langName. For en langName, simply a version of the file in langName. For en langName, simply
redirects a stripped uri string (intl/nn removed). redirects a stripped uri string (intl/nn removed).
Args: Args:
@@ -247,25 +400,25 @@ class MemcachedZipHandler(webapp.RequestHandler):
else: else:
builtIntlLangUri = name builtIntlLangUri = name
uri = ''.join(['/', builtIntlLangUri]) uri = ''.join(['/', builtIntlLangUri])
logging.info('-->>REDIRECTING %s to %s', name, uri) logging.info('-->REDIRECTING %s to %s', name, uri)
self.redirect(uri, False) self.redirect(uri, False)
return uri return uri
def CreateResponse(self, name, langName, isValidIntl, resetLangCookie): def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
"""Process the url and form a response, if appropriate. """Process the url and form a response, if appropriate.
Attempts to retrieve the requested file (name) from cache, Attempts to retrieve the requested file (name) from cache,
negative cache, or store (zip) and form the response. negative cache, or store (zip) and form the response.
For intl requests that are not found (in the localized tree), For intl requests that are not found (in the localized tree),
returns False rather than forming a response, so that returns False rather than forming a response, so that
the request can be retried with the base url (this is the the request can be retried with the base url (this is the
fallthrough to default language). fallthrough to default language).
For requests that are found, forms the headers and For requests that are found, forms the headers and
adds the content to the response entity. If the request was adds the content to the response entity. If the request was
for an intl (localized) url, also resets the language cookie for an intl (localized) url, also resets the language cookie
to the language specified in the url if needed, to ensure that to the language specified in the url if needed, to ensure that
the client language and response data remain harmonious. the client language and response data remain harmonious.
Args: Args:
name: The incoming, preprocessed URL name: The incoming, preprocessed URL
@@ -281,7 +434,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
False: No response was created. False: No response was created.
""" """
# see if we have the page in the memcache # see if we have the page in the memcache
logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]', logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
name, langName, isValidIntl, resetLangCookie) name, langName, isValidIntl, resetLangCookie)
resp_data = self.GetFromCache(name) resp_data = self.GetFromCache(name)
if resp_data is None: if resp_data is None:
@@ -314,12 +467,12 @@ class MemcachedZipHandler(webapp.RequestHandler):
logging.info(' Resetting android_developer_pref_lang cookie to [%s]', logging.info(' Resetting android_developer_pref_lang cookie to [%s]',
langName) langName)
expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10 expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
self.response.headers.add_header('Set-Cookie', self.response.headers.add_header('Set-Cookie',
'android_developer_pref_lang=%s; path=/; expires=%s' % 'android_developer_pref_lang=%s; path=/; expires=%s' %
(langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate)))) (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
mustRevalidate = False mustRevalidate = False
if ('.html' in name): if ('.html' in name):
# revalidate html files -- workaround for cache inconsistencies for # revalidate html files -- workaround for cache inconsistencies for
# negotiated responses # negotiated responses
mustRevalidate = True mustRevalidate = True
#logging.info(' Adding [Vary: Cookie] to response...') #logging.info(' Adding [Vary: Cookie] to response...')
@@ -391,7 +544,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
x = False x = False
if resp_data is not None: if resp_data is not None:
logging.info('%s read from %s', file_path, archive_name) logging.info('%s read from %s', file_path, archive_name)
try: try:
archive_name = file_itr.next()[0] archive_name = file_itr.next()[0]
except (StopIteration), err: except (StopIteration), err:
@@ -459,7 +612,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
We say that file1 is lexigraphically before file2 if the last non-matching We say that file1 is lexigraphically before file2 if the last non-matching
path segment of file1 is alphabetically before file2. path segment of file1 is alphabetically before file2.
Args: Args:
file1: the first file path file1: the first file path
file2: the second file path file2: the second file path

View File

@@ -0,0 +1,51 @@
# Redirect file.
# This file contains the list of rewrite rules that are applied when serving
# pages.
#
# Each redirect has four parts:
#
# - src: The path to redirect. This is a regex rule prefixed with an implied
# '^'. Unless you're doing something advanced, your path should start with
# '/' character.
#
# - dst: The path to redirect to. If the path begins with a slash,
# it is considered a relative redirect. Otherwise, it is an absolute
# redirct (and should probably begin with http: or http://). You may use
# capturing groups to preserve part of the source path. To referece a
# capturing group, use \N, where N is the (1-based) index of desired group.
#
# - type: Either 'permanent' or 'temporary', depending on whether you want an
# HTTP 301 or HTTP 302 redirect, respectiviely. See RFC 2616 for the
# difference between these:
#
# http://tools.ietf.org/html/rfc2616
#
# If you don't specify a type, 'permanent' will be used by default. Note that
# this is different from the Apache convention (which uses 'temporary' by
# default.)
#
# - comment: Currently ignored by the computer, but useful for humans.
#
# Example:
#
# redirects:
# - src: /foo
# dst: /bar
# # Redirect /foo to /bar. This will also redirect foo/ and
# # foo/test.html. Note that the redirect type is optional. This will be
# # treated as a permanent redirect.
#
# - src: /(.+droid(/.*)?)$
# dst: /droids/\1
# type: permanent
# # Redirect /android to /droids/android and /bugdroid to
# # /droids/bugdroid. However, it will not redirect /droid or
# # /bugdroids.
#
# - src: /google
# dst: http://www.google.com
# type: temporary
# # This is an example of a redirect to an absolute URI.
#
redirects: