Adding manual redirect support to developer.android.com.
Internal bug: 2347145 Change-Id: I0cdcec8a23704ab80878e8cc781b735fd2173011
This commit is contained in:
@@ -31,7 +31,10 @@ __author__ = 'jmatt@google.com (Justin Mattson)'
|
|||||||
import email.Utils
|
import email.Utils
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import yaml
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from google.appengine.api import memcache
|
from google.appengine.api import memcache
|
||||||
@@ -94,6 +97,18 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
PUBLIC = True # public cache setting
|
PUBLIC = True # public cache setting
|
||||||
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
|
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
|
||||||
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
|
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
|
||||||
|
REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
|
||||||
|
REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
|
||||||
|
REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
|
||||||
|
# redirect table entry
|
||||||
|
REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
|
||||||
|
# a redirect table entry
|
||||||
|
REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
|
||||||
|
# redirect table entry
|
||||||
|
REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
|
||||||
|
# redirect should be served
|
||||||
|
REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
|
||||||
|
# Redirect should be served
|
||||||
intlString = 'intl/'
|
intlString = 'intl/'
|
||||||
validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']
|
validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']
|
||||||
|
|
||||||
@@ -127,40 +142,54 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
|
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
|
||||||
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
|
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
|
||||||
|
|
||||||
# Preprocess the req url. If it references a directory or the domain itself,
|
|
||||||
# append '/index.html' to the url and 302 redirect. Otherwise, continue
|
|
||||||
# processing the request below.
|
|
||||||
name = self.PreprocessUrl(reqUri, langName)
|
|
||||||
if name:
|
|
||||||
# Do some prep for handling intl requests. Parse the url and validate
|
# Do some prep for handling intl requests. Parse the url and validate
|
||||||
# the intl/lang substring, extract the url lang code (urlLangName) and the
|
# the intl/lang substring, extract the url lang code (urlLangName) and the
|
||||||
# the uri that follows the intl/lang substring(contentUri)
|
# the uri that follows the intl/lang substring(contentUri)
|
||||||
sections = name.split("/", 2)
|
sections = reqUri.split("/", 2)
|
||||||
contentUri = 0
|
isIntl = len(sections) > 2 and (sections[0] == "intl")
|
||||||
isIntl = len(sections) > 1 and (sections[0] == "intl")
|
|
||||||
if isIntl:
|
if isIntl:
|
||||||
isValidIntl = sections[1] in self.validLangs
|
isValidIntl = sections[1] in self.validLangs
|
||||||
if isValidIntl:
|
|
||||||
urlLangName = sections[1]
|
urlLangName = sections[1]
|
||||||
contentUri = sections[2]
|
contentUri = sections[2]
|
||||||
logging.info(' Content URI is [%s]...', contentUri)
|
logging.info(' Content URI is [%s]...', contentUri)
|
||||||
if (urlLangName != langName) or (langName == 'en'):
|
if isValidIntl:
|
||||||
|
if (langName != urlLangName) or (langName == 'en'):
|
||||||
# if the lang code in the request is different from that in
|
# if the lang code in the request is different from that in
|
||||||
# the cookie, or if the target lang is en, strip the
|
# the cookie, or if the target lang is en, strip the
|
||||||
# intl/nn substring. It will later be redirected to
|
# intl/nn substring. It will later be redirected to
|
||||||
# the user's preferred language url.
|
# the user's preferred language url.
|
||||||
# logging.info(' Handling a MISMATCHED intl request')
|
# logging.info(' Handling a MISMATCHED intl request')
|
||||||
name = contentUri
|
reqUri = contentUri
|
||||||
isStripped = True
|
isStripped = True
|
||||||
isValidIntl = False
|
isValidIntl = False
|
||||||
isIntl = False
|
isIntl = False
|
||||||
|
#logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
|
||||||
|
#else:
|
||||||
|
# logging.info('INTL PREP no need to reset langName')
|
||||||
|
else:
|
||||||
|
contentUri = reqUri
|
||||||
|
|
||||||
|
# Apply manual redirects from redirects.yaml. This occurs before any
|
||||||
|
# other mutations are performed, to avoid odd redirect behavior
|
||||||
|
# (For example, a user may want to redirect a directory without having
|
||||||
|
# /index.html appended.)
|
||||||
|
did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
|
||||||
|
if did_redirect:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Preprocess the req url. If it references a directory or the domain itself,
|
||||||
|
# append '/index.html' to the url and 302 redirect. Otherwise, continue
|
||||||
|
# processing the request below.
|
||||||
|
did_redirect = self.PreprocessUrl(reqUri, langName)
|
||||||
|
if did_redirect:
|
||||||
|
return
|
||||||
|
|
||||||
# Send for processing
|
# Send for processing
|
||||||
if self.isCleanUrl(name, langName, isValidIntl, isStripped):
|
if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
|
||||||
# handle a 'clean' request.
|
# handle a 'clean' request.
|
||||||
# Try to form a response using the actual request url.
|
# Try to form a response using the actual request url.
|
||||||
# logging.info(' Request being handled as clean: [%s]', name)
|
# logging.info(' Request being handled as clean: [%s]', name)
|
||||||
if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie):
|
if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
|
||||||
# If CreateResponse returns False, there was no such document
|
# If CreateResponse returns False, there was no such document
|
||||||
# in the intl/lang tree. Before going to 404, see if there is an
|
# in the intl/lang tree. Before going to 404, see if there is an
|
||||||
# English-language version of the doc in the default
|
# English-language version of the doc in the default
|
||||||
@@ -173,7 +202,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
# because intl urls are passed through clean and retried in English,
|
# because intl urls are passed through clean and retried in English,
|
||||||
# if necessary.
|
# if necessary.
|
||||||
# logging.info(' Handling an invalid intl request...')
|
# logging.info(' Handling an invalid intl request...')
|
||||||
self.CreateResponse(name, langName, isValidIntl, resetLangCookie)
|
self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# handle the case where we have a non-clean url (usually a non-intl
|
# handle the case where we have a non-clean url (usually a non-intl
|
||||||
@@ -181,7 +210,130 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
# that is set. Prepend an intl/lang string to the request url and
|
# that is set. Prepend an intl/lang string to the request url and
|
||||||
# send it as a 302 redirect. After the redirect, the subsequent
|
# send it as a 302 redirect. After the redirect, the subsequent
|
||||||
# request will be handled as a clean url.
|
# request will be handled as a clean url.
|
||||||
self.RedirToIntl(name, self.intlString, langName)
|
self.RedirToIntl(reqUri, self.intlString, langName)
|
||||||
|
|
||||||
|
def ProcessManualRedirects(self, contentUri, langName, isIntl):
|
||||||
|
"""Compute any manual redirects for a request and execute them.
|
||||||
|
|
||||||
|
This allows content authors to manually define a set of regex rules which,
|
||||||
|
when matched, will cause an HTTP redirect to be performed.
|
||||||
|
|
||||||
|
Redirect rules are typically stored in a file named redirects.yaml. See the
|
||||||
|
comments in that file for more information about formatting.
|
||||||
|
|
||||||
|
Redirect computations are stored in memcache for performance.
|
||||||
|
|
||||||
|
Note that international URIs are handled automatically, and are assumed to
|
||||||
|
mirror redirects for non-intl requests.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
contentUri: The relative URI (without leading slash) that was requested.
|
||||||
|
This should NOT contain an intl-prefix, if otherwise present.
|
||||||
|
langName: The requested language.
|
||||||
|
isIntl: True if contentUri originally contained an intl prefix.
|
||||||
|
|
||||||
|
Results:
|
||||||
|
boolean: True if a redirect has been set, False otherwise.
|
||||||
|
"""
|
||||||
|
# Redirect data is stored in memcache for performance
|
||||||
|
memcache_key = self.REDIRECT_PREFIX + contentUri
|
||||||
|
redirect_data = memcache.get(memcache_key)
|
||||||
|
if redirect_data is None:
|
||||||
|
logging.info('Redirect cache miss. Computing new redirect data.\n'
|
||||||
|
'Memcache Key: ' + memcache_key)
|
||||||
|
redirect_data = self.ComputeManualRedirectUrl(contentUri)
|
||||||
|
memcache.set(memcache_key, redirect_data)
|
||||||
|
contentUri = redirect_data[0]
|
||||||
|
redirectType = redirect_data[1]
|
||||||
|
|
||||||
|
# If this is an international URL, prepend intl path to minimize
|
||||||
|
# number of redirects
|
||||||
|
if isIntl:
|
||||||
|
contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
|
||||||
|
|
||||||
|
if redirectType is None:
|
||||||
|
# No redirect necessary
|
||||||
|
return False
|
||||||
|
elif redirectType == self.REDIRECT_TYPE_PERM:
|
||||||
|
logging.info('Sending permanent redirect: ' + contentUri);
|
||||||
|
self.redirect(contentUri, permanent=True)
|
||||||
|
return True
|
||||||
|
elif redirectType == self.REDIRECT_TYPE_TEMP:
|
||||||
|
logging.info('Sending temporary redirect: ' + contentUri);
|
||||||
|
self.redirect(contentUri, permanent=False)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# Invalid redirect type
|
||||||
|
logging.error('Invalid redirect type: %s', redirectType)
|
||||||
|
raise ('Invalid redirect type: %s', redirectType)
|
||||||
|
|
||||||
|
def ComputeManualRedirectUrl(self, uri):
|
||||||
|
"""Read redirects file and evaluate redirect rules for a given URI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
uri: The relative URI (without leading slash) for which redirect data
|
||||||
|
should be computed. No special handling of intl URIs is pefromed
|
||||||
|
at this level.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: The computed redirect data. This tuple has two parts:
|
||||||
|
redirect_uri: The new URI that should be used. (If no redirect rule is
|
||||||
|
found, the original input to 'uri' will be returned.
|
||||||
|
redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
|
||||||
|
for an HTTP 302 redirect, or None if no redirect should be performed.
|
||||||
|
"""
|
||||||
|
# Redircts are defined in a file named redirects.yaml.
|
||||||
|
try:
|
||||||
|
f = open(self.REDIRECT_FILE)
|
||||||
|
data = yaml.load(f)
|
||||||
|
f.close()
|
||||||
|
except IOError, e:
|
||||||
|
logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
|
||||||
|
'): ' + e.strerror)
|
||||||
|
return (uri, None)
|
||||||
|
|
||||||
|
# The incoming path is missing a leading slash. However, many parts of the
|
||||||
|
# redirect system require leading slashes to distinguish between relative
|
||||||
|
# and absolute redirects. So, to compensate for this, we'll add a leading
|
||||||
|
# slash here as well.
|
||||||
|
uri = '/' + uri
|
||||||
|
|
||||||
|
# Check to make sure we actually got an iterable list out of the YAML file
|
||||||
|
if data is None:
|
||||||
|
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
|
||||||
|
'YAML.')
|
||||||
|
elif 'redirects' not in data:
|
||||||
|
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
|
||||||
|
'properly formatted -- no \'redirects:\' header.')
|
||||||
|
elif hasattr(data['redirects'], '__iter__'):
|
||||||
|
# Iterate through redirect data, try to find a redirect that matches.
|
||||||
|
for redirect in data['redirects']:
|
||||||
|
# Note: re.search adds an implied '^' to the beginning of the regex
|
||||||
|
# This means that the regex must match from the beginning of the
|
||||||
|
# string.
|
||||||
|
try:
|
||||||
|
if re.match(redirect[self.REDIRECT_SRC], uri):
|
||||||
|
# Match found. Apply redirect rule.
|
||||||
|
redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
|
||||||
|
redirect[self.REDIRECT_DST], uri)
|
||||||
|
logging.info('Redirect rule matched.\n'
|
||||||
|
'Rule: %s\n'
|
||||||
|
'Src: %s\n'
|
||||||
|
'Dst: %s',
|
||||||
|
redirect[self.REDIRECT_SRC], uri, redirect_uri)
|
||||||
|
if self.REDIRECT_TYPE in redirect:
|
||||||
|
redirect_type = redirect[self.REDIRECT_TYPE]
|
||||||
|
else:
|
||||||
|
# Default redirect type, if unspecified
|
||||||
|
redirect_type = self.REDIRECT_TYPE_PERM
|
||||||
|
return (redirect_uri, redirect_type)
|
||||||
|
except:
|
||||||
|
e = sys.exc_info()[1]
|
||||||
|
raise ('Error while processing redirect rule.\n'
|
||||||
|
'Rule: %s\n'
|
||||||
|
'Error: %s' % (redirect[self.REDIRECT_SRC], e))
|
||||||
|
# No redirect found, return URL unchanged
|
||||||
|
return (uri, None)
|
||||||
|
|
||||||
def isCleanUrl(self, name, langName, isValidIntl, isStripped):
|
def isCleanUrl(self, name, langName, isValidIntl, isStripped):
|
||||||
"""Determine whether to pass an incoming url straight to processing.
|
"""Determine whether to pass an incoming url straight to processing.
|
||||||
@@ -208,9 +360,10 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
name: The incoming URL
|
name: The incoming URL
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
False if the request was redirected to '/index.html', or
|
True if the request was redirected to '/index.html'.
|
||||||
The processed URL, otherwise
|
Otherewise False.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# determine if this is a request for a directory
|
# determine if this is a request for a directory
|
||||||
final_path_segment = name
|
final_path_segment = name
|
||||||
final_slash_offset = name.rfind('/')
|
final_slash_offset = name.rfind('/')
|
||||||
@@ -224,9 +377,9 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
uri = ''.join(['/', name, 'index.html'])
|
uri = ''.join(['/', name, 'index.html'])
|
||||||
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
|
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
|
||||||
self.redirect(uri, False)
|
self.redirect(uri, False)
|
||||||
return False
|
return True
|
||||||
else:
|
else:
|
||||||
return name
|
return False
|
||||||
|
|
||||||
def RedirToIntl(self, name, intlString, langName):
|
def RedirToIntl(self, name, intlString, langName):
|
||||||
"""Redirect an incoming request to the appropriate intl uri.
|
"""Redirect an incoming request to the appropriate intl uri.
|
||||||
@@ -247,7 +400,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
|||||||
else:
|
else:
|
||||||
builtIntlLangUri = name
|
builtIntlLangUri = name
|
||||||
uri = ''.join(['/', builtIntlLangUri])
|
uri = ''.join(['/', builtIntlLangUri])
|
||||||
logging.info('-->>REDIRECTING %s to %s', name, uri)
|
logging.info('-->REDIRECTING %s to %s', name, uri)
|
||||||
self.redirect(uri, False)
|
self.redirect(uri, False)
|
||||||
return uri
|
return uri
|
||||||
|
|
||||||
|
|||||||
51
scripts/app_engine_server/redirects.yaml
Normal file
51
scripts/app_engine_server/redirects.yaml
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# Redirect file.
|
||||||
|
# This file contains the list of rewrite rules that are applied when serving
|
||||||
|
# pages.
|
||||||
|
#
|
||||||
|
# Each redirect has four parts:
|
||||||
|
#
|
||||||
|
# - src: The path to redirect. This is a regex rule prefixed with an implied
|
||||||
|
# '^'. Unless you're doing something advanced, your path should start with
|
||||||
|
# '/' character.
|
||||||
|
#
|
||||||
|
# - dst: The path to redirect to. If the path begins with a slash,
|
||||||
|
# it is considered a relative redirect. Otherwise, it is an absolute
|
||||||
|
# redirct (and should probably begin with http: or http://). You may use
|
||||||
|
# capturing groups to preserve part of the source path. To referece a
|
||||||
|
# capturing group, use \N, where N is the (1-based) index of desired group.
|
||||||
|
#
|
||||||
|
# - type: Either 'permanent' or 'temporary', depending on whether you want an
|
||||||
|
# HTTP 301 or HTTP 302 redirect, respectiviely. See RFC 2616 for the
|
||||||
|
# difference between these:
|
||||||
|
#
|
||||||
|
# http://tools.ietf.org/html/rfc2616
|
||||||
|
#
|
||||||
|
# If you don't specify a type, 'permanent' will be used by default. Note that
|
||||||
|
# this is different from the Apache convention (which uses 'temporary' by
|
||||||
|
# default.)
|
||||||
|
#
|
||||||
|
# - comment: Currently ignored by the computer, but useful for humans.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# redirects:
|
||||||
|
# - src: /foo
|
||||||
|
# dst: /bar
|
||||||
|
# # Redirect /foo to /bar. This will also redirect foo/ and
|
||||||
|
# # foo/test.html. Note that the redirect type is optional. This will be
|
||||||
|
# # treated as a permanent redirect.
|
||||||
|
#
|
||||||
|
# - src: /(.+droid(/.*)?)$
|
||||||
|
# dst: /droids/\1
|
||||||
|
# type: permanent
|
||||||
|
# # Redirect /android to /droids/android and /bugdroid to
|
||||||
|
# # /droids/bugdroid. However, it will not redirect /droid or
|
||||||
|
# # /bugdroids.
|
||||||
|
#
|
||||||
|
# - src: /google
|
||||||
|
# dst: http://www.google.com
|
||||||
|
# type: temporary
|
||||||
|
# # This is an example of a redirect to an absolute URI.
|
||||||
|
#
|
||||||
|
|
||||||
|
redirects:
|
||||||
Reference in New Issue
Block a user