Adding manual redirect support to developer.android.com.

Internal bug: 2347145

Change-Id: I0cdcec8a23704ab80878e8cc781b735fd2173011
This commit is contained in:
Trevor Johns
2010-09-01 03:34:05 -07:00
parent e52271aff9
commit 662b8f1c9a
2 changed files with 277 additions and 73 deletions

View File

@@ -31,7 +31,10 @@ __author__ = 'jmatt@google.com (Justin Mattson)'
import email.Utils
import logging
import mimetypes
import re
import sys
import time
import yaml
import zipfile
from google.appengine.api import memcache
@@ -94,6 +97,18 @@ class MemcachedZipHandler(webapp.RequestHandler):
PUBLIC = True # public cache setting
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
# redirect table entry
REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
# a redirect table entry
REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
# redirect table entry
REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
# redirect should be served
REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
# Redirect should be served
intlString = 'intl/'
validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']
@@ -127,40 +142,54 @@ class MemcachedZipHandler(webapp.RequestHandler):
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
# Preprocess the req url. If it references a directory or the domain itself,
# append '/index.html' to the url and 302 redirect. Otherwise, continue
# processing the request below.
name = self.PreprocessUrl(reqUri, langName)
if name:
# Do some prep for handling intl requests. Parse the url and validate
# the intl/lang substring, extract the url lang code (urlLangName) and the
# the uri that follows the intl/lang substring(contentUri)
sections = name.split("/", 2)
contentUri = 0
isIntl = len(sections) > 1 and (sections[0] == "intl")
sections = reqUri.split("/", 2)
isIntl = len(sections) > 2 and (sections[0] == "intl")
if isIntl:
isValidIntl = sections[1] in self.validLangs
if isValidIntl:
urlLangName = sections[1]
contentUri = sections[2]
logging.info(' Content URI is [%s]...', contentUri)
if (urlLangName != langName) or (langName == 'en'):
if isValidIntl:
if (langName != urlLangName) or (langName == 'en'):
# if the lang code in the request is different from that in
# the cookie, or if the target lang is en, strip the
# intl/nn substring. It will later be redirected to
# the user's preferred language url.
# logging.info(' Handling a MISMATCHED intl request')
name = contentUri
reqUri = contentUri
isStripped = True
isValidIntl = False
isIntl = False
#logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
#else:
# logging.info('INTL PREP no need to reset langName')
else:
contentUri = reqUri
# Apply manual redirects from redirects.yaml. This occurs before any
# other mutations are performed, to avoid odd redirect behavior
# (For example, a user may want to redirect a directory without having
# /index.html appended.)
did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
if did_redirect:
return
# Preprocess the req url. If it references a directory or the domain itself,
# append '/index.html' to the url and 302 redirect. Otherwise, continue
# processing the request below.
did_redirect = self.PreprocessUrl(reqUri, langName)
if did_redirect:
return
# Send for processing
if self.isCleanUrl(name, langName, isValidIntl, isStripped):
if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
# handle a 'clean' request.
# Try to form a response using the actual request url.
# logging.info(' Request being handled as clean: [%s]', name)
if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie):
if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
# If CreateResponse returns False, there was no such document
# in the intl/lang tree. Before going to 404, see if there is an
# English-language version of the doc in the default
@@ -173,7 +202,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
# because intl urls are passed through clean and retried in English,
# if necessary.
# logging.info(' Handling an invalid intl request...')
self.CreateResponse(name, langName, isValidIntl, resetLangCookie)
self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
else:
# handle the case where we have a non-clean url (usually a non-intl
@@ -181,7 +210,130 @@ class MemcachedZipHandler(webapp.RequestHandler):
# that is set. Prepend an intl/lang string to the request url and
# send it as a 302 redirect. After the redirect, the subsequent
# request will be handled as a clean url.
self.RedirToIntl(name, self.intlString, langName)
self.RedirToIntl(reqUri, self.intlString, langName)
def ProcessManualRedirects(self, contentUri, langName, isIntl):
"""Compute any manual redirects for a request and execute them.
This allows content authors to manually define a set of regex rules which,
when matched, will cause an HTTP redirect to be performed.
Redirect rules are typically stored in a file named redirects.yaml. See the
comments in that file for more information about formatting.
Redirect computations are stored in memcache for performance.
Note that international URIs are handled automatically, and are assumed to
mirror redirects for non-intl requests.
Args:
contentUri: The relative URI (without leading slash) that was requested.
This should NOT contain an intl-prefix, if otherwise present.
langName: The requested language.
isIntl: True if contentUri originally contained an intl prefix.
Results:
boolean: True if a redirect has been set, False otherwise.
"""
# Redirect data is stored in memcache for performance
memcache_key = self.REDIRECT_PREFIX + contentUri
redirect_data = memcache.get(memcache_key)
if redirect_data is None:
logging.info('Redirect cache miss. Computing new redirect data.\n'
'Memcache Key: ' + memcache_key)
redirect_data = self.ComputeManualRedirectUrl(contentUri)
memcache.set(memcache_key, redirect_data)
contentUri = redirect_data[0]
redirectType = redirect_data[1]
# If this is an international URL, prepend intl path to minimize
# number of redirects
if isIntl:
contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
if redirectType is None:
# No redirect necessary
return False
elif redirectType == self.REDIRECT_TYPE_PERM:
logging.info('Sending permanent redirect: ' + contentUri);
self.redirect(contentUri, permanent=True)
return True
elif redirectType == self.REDIRECT_TYPE_TEMP:
logging.info('Sending temporary redirect: ' + contentUri);
self.redirect(contentUri, permanent=False)
return True
else:
# Invalid redirect type
logging.error('Invalid redirect type: %s', redirectType)
raise ('Invalid redirect type: %s', redirectType)
def ComputeManualRedirectUrl(self, uri):
"""Read redirects file and evaluate redirect rules for a given URI.
Args:
uri: The relative URI (without leading slash) for which redirect data
should be computed. No special handling of intl URIs is pefromed
at this level.
Returns:
tuple: The computed redirect data. This tuple has two parts:
redirect_uri: The new URI that should be used. (If no redirect rule is
found, the original input to 'uri' will be returned.
redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
for an HTTP 302 redirect, or None if no redirect should be performed.
"""
# Redircts are defined in a file named redirects.yaml.
try:
f = open(self.REDIRECT_FILE)
data = yaml.load(f)
f.close()
except IOError, e:
logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
'): ' + e.strerror)
return (uri, None)
# The incoming path is missing a leading slash. However, many parts of the
# redirect system require leading slashes to distinguish between relative
# and absolute redirects. So, to compensate for this, we'll add a leading
# slash here as well.
uri = '/' + uri
# Check to make sure we actually got an iterable list out of the YAML file
if data is None:
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
'YAML.')
elif 'redirects' not in data:
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
'properly formatted -- no \'redirects:\' header.')
elif hasattr(data['redirects'], '__iter__'):
# Iterate through redirect data, try to find a redirect that matches.
for redirect in data['redirects']:
# Note: re.search adds an implied '^' to the beginning of the regex
# This means that the regex must match from the beginning of the
# string.
try:
if re.match(redirect[self.REDIRECT_SRC], uri):
# Match found. Apply redirect rule.
redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
redirect[self.REDIRECT_DST], uri)
logging.info('Redirect rule matched.\n'
'Rule: %s\n'
'Src: %s\n'
'Dst: %s',
redirect[self.REDIRECT_SRC], uri, redirect_uri)
if self.REDIRECT_TYPE in redirect:
redirect_type = redirect[self.REDIRECT_TYPE]
else:
# Default redirect type, if unspecified
redirect_type = self.REDIRECT_TYPE_PERM
return (redirect_uri, redirect_type)
except:
e = sys.exc_info()[1]
raise ('Error while processing redirect rule.\n'
'Rule: %s\n'
'Error: %s' % (redirect[self.REDIRECT_SRC], e))
# No redirect found, return URL unchanged
return (uri, None)
def isCleanUrl(self, name, langName, isValidIntl, isStripped):
"""Determine whether to pass an incoming url straight to processing.
@@ -208,9 +360,10 @@ class MemcachedZipHandler(webapp.RequestHandler):
name: The incoming URL
Returns:
False if the request was redirected to '/index.html', or
The processed URL, otherwise
True if the request was redirected to '/index.html'.
Otherewise False.
"""
# determine if this is a request for a directory
final_path_segment = name
final_slash_offset = name.rfind('/')
@@ -224,9 +377,9 @@ class MemcachedZipHandler(webapp.RequestHandler):
uri = ''.join(['/', name, 'index.html'])
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
self.redirect(uri, False)
return False
return True
else:
return name
return False
def RedirToIntl(self, name, intlString, langName):
"""Redirect an incoming request to the appropriate intl uri.
@@ -247,7 +400,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
else:
builtIntlLangUri = name
uri = ''.join(['/', builtIntlLangUri])
logging.info('-->>REDIRECTING %s to %s', name, uri)
logging.info('-->REDIRECTING %s to %s', name, uri)
self.redirect(uri, False)
return uri

View File

@@ -0,0 +1,51 @@
# Redirect file.
# This file contains the list of rewrite rules that are applied when serving
# pages.
#
# Each redirect has four parts:
#
# - src: The path to redirect. This is a regex rule prefixed with an implied
# '^'. Unless you're doing something advanced, your path should start with
# '/' character.
#
# - dst: The path to redirect to. If the path begins with a slash,
# it is considered a relative redirect. Otherwise, it is an absolute
# redirct (and should probably begin with http: or http://). You may use
# capturing groups to preserve part of the source path. To referece a
# capturing group, use \N, where N is the (1-based) index of desired group.
#
# - type: Either 'permanent' or 'temporary', depending on whether you want an
# HTTP 301 or HTTP 302 redirect, respectiviely. See RFC 2616 for the
# difference between these:
#
# http://tools.ietf.org/html/rfc2616
#
# If you don't specify a type, 'permanent' will be used by default. Note that
# this is different from the Apache convention (which uses 'temporary' by
# default.)
#
# - comment: Currently ignored by the computer, but useful for humans.
#
# Example:
#
# redirects:
# - src: /foo
# dst: /bar
# # Redirect /foo to /bar. This will also redirect foo/ and
# # foo/test.html. Note that the redirect type is optional. This will be
# # treated as a permanent redirect.
#
# - src: /(.+droid(/.*)?)$
# dst: /droids/\1
# type: permanent
# # Redirect /android to /droids/android and /bugdroid to
# # /droids/bugdroid. However, it will not redirect /droid or
# # /bugdroids.
#
# - src: /google
# dst: http://www.google.com
# type: temporary
# # This is an example of a redirect to an absolute URI.
#
redirects: