Adding manual redirect support to developer.android.com.
Internal bug: 2347145 Change-Id: I0cdcec8a23704ab80878e8cc781b735fd2173011
This commit is contained in:
@@ -31,7 +31,10 @@ __author__ = 'jmatt@google.com (Justin Mattson)'
|
||||
import email.Utils
|
||||
import logging
|
||||
import mimetypes
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import yaml
|
||||
import zipfile
|
||||
|
||||
from google.appengine.api import memcache
|
||||
@@ -94,6 +97,18 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
PUBLIC = True # public cache setting
|
||||
CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
|
||||
NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
|
||||
REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data
|
||||
REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table
|
||||
REDIRECT_SRC = 'src' # Name of the 'source' attribute for a
|
||||
# redirect table entry
|
||||
REDIRECT_DST = 'dst' # Name of the 'destination' attribute for
|
||||
# a redirect table entry
|
||||
REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a
|
||||
# redirect table entry
|
||||
REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301
|
||||
# redirect should be served
|
||||
REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302
|
||||
# Redirect should be served
|
||||
intlString = 'intl/'
|
||||
validLangs = ['en', 'de', 'es', 'fr','it','ja','zh-CN','zh-TW']
|
||||
|
||||
@@ -127,40 +142,54 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
#logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
|
||||
logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
|
||||
|
||||
# Preprocess the req url. If it references a directory or the domain itself,
|
||||
# append '/index.html' to the url and 302 redirect. Otherwise, continue
|
||||
# processing the request below.
|
||||
name = self.PreprocessUrl(reqUri, langName)
|
||||
if name:
|
||||
# Do some prep for handling intl requests. Parse the url and validate
|
||||
# the intl/lang substring, extract the url lang code (urlLangName) and the
|
||||
# the uri that follows the intl/lang substring(contentUri)
|
||||
sections = name.split("/", 2)
|
||||
contentUri = 0
|
||||
isIntl = len(sections) > 1 and (sections[0] == "intl")
|
||||
sections = reqUri.split("/", 2)
|
||||
isIntl = len(sections) > 2 and (sections[0] == "intl")
|
||||
if isIntl:
|
||||
isValidIntl = sections[1] in self.validLangs
|
||||
if isValidIntl:
|
||||
urlLangName = sections[1]
|
||||
contentUri = sections[2]
|
||||
logging.info(' Content URI is [%s]...', contentUri)
|
||||
if (urlLangName != langName) or (langName == 'en'):
|
||||
if isValidIntl:
|
||||
if (langName != urlLangName) or (langName == 'en'):
|
||||
# if the lang code in the request is different from that in
|
||||
# the cookie, or if the target lang is en, strip the
|
||||
# intl/nn substring. It will later be redirected to
|
||||
# the user's preferred language url.
|
||||
# logging.info(' Handling a MISMATCHED intl request')
|
||||
name = contentUri
|
||||
reqUri = contentUri
|
||||
isStripped = True
|
||||
isValidIntl = False
|
||||
isIntl = False
|
||||
#logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
|
||||
#else:
|
||||
# logging.info('INTL PREP no need to reset langName')
|
||||
else:
|
||||
contentUri = reqUri
|
||||
|
||||
# Apply manual redirects from redirects.yaml. This occurs before any
|
||||
# other mutations are performed, to avoid odd redirect behavior
|
||||
# (For example, a user may want to redirect a directory without having
|
||||
# /index.html appended.)
|
||||
did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
|
||||
if did_redirect:
|
||||
return
|
||||
|
||||
# Preprocess the req url. If it references a directory or the domain itself,
|
||||
# append '/index.html' to the url and 302 redirect. Otherwise, continue
|
||||
# processing the request below.
|
||||
did_redirect = self.PreprocessUrl(reqUri, langName)
|
||||
if did_redirect:
|
||||
return
|
||||
|
||||
# Send for processing
|
||||
if self.isCleanUrl(name, langName, isValidIntl, isStripped):
|
||||
if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
|
||||
# handle a 'clean' request.
|
||||
# Try to form a response using the actual request url.
|
||||
# logging.info(' Request being handled as clean: [%s]', name)
|
||||
if not self.CreateResponse(name, langName, isValidIntl, resetLangCookie):
|
||||
if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
|
||||
# If CreateResponse returns False, there was no such document
|
||||
# in the intl/lang tree. Before going to 404, see if there is an
|
||||
# English-language version of the doc in the default
|
||||
@@ -173,7 +202,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
# because intl urls are passed through clean and retried in English,
|
||||
# if necessary.
|
||||
# logging.info(' Handling an invalid intl request...')
|
||||
self.CreateResponse(name, langName, isValidIntl, resetLangCookie)
|
||||
self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
|
||||
|
||||
else:
|
||||
# handle the case where we have a non-clean url (usually a non-intl
|
||||
@@ -181,7 +210,130 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
# that is set. Prepend an intl/lang string to the request url and
|
||||
# send it as a 302 redirect. After the redirect, the subsequent
|
||||
# request will be handled as a clean url.
|
||||
self.RedirToIntl(name, self.intlString, langName)
|
||||
self.RedirToIntl(reqUri, self.intlString, langName)
|
||||
|
||||
def ProcessManualRedirects(self, contentUri, langName, isIntl):
|
||||
"""Compute any manual redirects for a request and execute them.
|
||||
|
||||
This allows content authors to manually define a set of regex rules which,
|
||||
when matched, will cause an HTTP redirect to be performed.
|
||||
|
||||
Redirect rules are typically stored in a file named redirects.yaml. See the
|
||||
comments in that file for more information about formatting.
|
||||
|
||||
Redirect computations are stored in memcache for performance.
|
||||
|
||||
Note that international URIs are handled automatically, and are assumed to
|
||||
mirror redirects for non-intl requests.
|
||||
|
||||
Args:
|
||||
contentUri: The relative URI (without leading slash) that was requested.
|
||||
This should NOT contain an intl-prefix, if otherwise present.
|
||||
langName: The requested language.
|
||||
isIntl: True if contentUri originally contained an intl prefix.
|
||||
|
||||
Results:
|
||||
boolean: True if a redirect has been set, False otherwise.
|
||||
"""
|
||||
# Redirect data is stored in memcache for performance
|
||||
memcache_key = self.REDIRECT_PREFIX + contentUri
|
||||
redirect_data = memcache.get(memcache_key)
|
||||
if redirect_data is None:
|
||||
logging.info('Redirect cache miss. Computing new redirect data.\n'
|
||||
'Memcache Key: ' + memcache_key)
|
||||
redirect_data = self.ComputeManualRedirectUrl(contentUri)
|
||||
memcache.set(memcache_key, redirect_data)
|
||||
contentUri = redirect_data[0]
|
||||
redirectType = redirect_data[1]
|
||||
|
||||
# If this is an international URL, prepend intl path to minimize
|
||||
# number of redirects
|
||||
if isIntl:
|
||||
contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
|
||||
|
||||
if redirectType is None:
|
||||
# No redirect necessary
|
||||
return False
|
||||
elif redirectType == self.REDIRECT_TYPE_PERM:
|
||||
logging.info('Sending permanent redirect: ' + contentUri);
|
||||
self.redirect(contentUri, permanent=True)
|
||||
return True
|
||||
elif redirectType == self.REDIRECT_TYPE_TEMP:
|
||||
logging.info('Sending temporary redirect: ' + contentUri);
|
||||
self.redirect(contentUri, permanent=False)
|
||||
return True
|
||||
else:
|
||||
# Invalid redirect type
|
||||
logging.error('Invalid redirect type: %s', redirectType)
|
||||
raise ('Invalid redirect type: %s', redirectType)
|
||||
|
||||
def ComputeManualRedirectUrl(self, uri):
|
||||
"""Read redirects file and evaluate redirect rules for a given URI.
|
||||
|
||||
Args:
|
||||
uri: The relative URI (without leading slash) for which redirect data
|
||||
should be computed. No special handling of intl URIs is pefromed
|
||||
at this level.
|
||||
|
||||
Returns:
|
||||
tuple: The computed redirect data. This tuple has two parts:
|
||||
redirect_uri: The new URI that should be used. (If no redirect rule is
|
||||
found, the original input to 'uri' will be returned.
|
||||
redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
|
||||
for an HTTP 302 redirect, or None if no redirect should be performed.
|
||||
"""
|
||||
# Redircts are defined in a file named redirects.yaml.
|
||||
try:
|
||||
f = open(self.REDIRECT_FILE)
|
||||
data = yaml.load(f)
|
||||
f.close()
|
||||
except IOError, e:
|
||||
logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
|
||||
'): ' + e.strerror)
|
||||
return (uri, None)
|
||||
|
||||
# The incoming path is missing a leading slash. However, many parts of the
|
||||
# redirect system require leading slashes to distinguish between relative
|
||||
# and absolute redirects. So, to compensate for this, we'll add a leading
|
||||
# slash here as well.
|
||||
uri = '/' + uri
|
||||
|
||||
# Check to make sure we actually got an iterable list out of the YAML file
|
||||
if data is None:
|
||||
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
|
||||
'YAML.')
|
||||
elif 'redirects' not in data:
|
||||
logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
|
||||
'properly formatted -- no \'redirects:\' header.')
|
||||
elif hasattr(data['redirects'], '__iter__'):
|
||||
# Iterate through redirect data, try to find a redirect that matches.
|
||||
for redirect in data['redirects']:
|
||||
# Note: re.search adds an implied '^' to the beginning of the regex
|
||||
# This means that the regex must match from the beginning of the
|
||||
# string.
|
||||
try:
|
||||
if re.match(redirect[self.REDIRECT_SRC], uri):
|
||||
# Match found. Apply redirect rule.
|
||||
redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
|
||||
redirect[self.REDIRECT_DST], uri)
|
||||
logging.info('Redirect rule matched.\n'
|
||||
'Rule: %s\n'
|
||||
'Src: %s\n'
|
||||
'Dst: %s',
|
||||
redirect[self.REDIRECT_SRC], uri, redirect_uri)
|
||||
if self.REDIRECT_TYPE in redirect:
|
||||
redirect_type = redirect[self.REDIRECT_TYPE]
|
||||
else:
|
||||
# Default redirect type, if unspecified
|
||||
redirect_type = self.REDIRECT_TYPE_PERM
|
||||
return (redirect_uri, redirect_type)
|
||||
except:
|
||||
e = sys.exc_info()[1]
|
||||
raise ('Error while processing redirect rule.\n'
|
||||
'Rule: %s\n'
|
||||
'Error: %s' % (redirect[self.REDIRECT_SRC], e))
|
||||
# No redirect found, return URL unchanged
|
||||
return (uri, None)
|
||||
|
||||
def isCleanUrl(self, name, langName, isValidIntl, isStripped):
|
||||
"""Determine whether to pass an incoming url straight to processing.
|
||||
@@ -208,9 +360,10 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
name: The incoming URL
|
||||
|
||||
Returns:
|
||||
False if the request was redirected to '/index.html', or
|
||||
The processed URL, otherwise
|
||||
True if the request was redirected to '/index.html'.
|
||||
Otherewise False.
|
||||
"""
|
||||
|
||||
# determine if this is a request for a directory
|
||||
final_path_segment = name
|
||||
final_slash_offset = name.rfind('/')
|
||||
@@ -224,9 +377,9 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
uri = ''.join(['/', name, 'index.html'])
|
||||
# logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
|
||||
self.redirect(uri, False)
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
return name
|
||||
return False
|
||||
|
||||
def RedirToIntl(self, name, intlString, langName):
|
||||
"""Redirect an incoming request to the appropriate intl uri.
|
||||
@@ -247,7 +400,7 @@ class MemcachedZipHandler(webapp.RequestHandler):
|
||||
else:
|
||||
builtIntlLangUri = name
|
||||
uri = ''.join(['/', builtIntlLangUri])
|
||||
logging.info('-->>REDIRECTING %s to %s', name, uri)
|
||||
logging.info('-->REDIRECTING %s to %s', name, uri)
|
||||
self.redirect(uri, False)
|
||||
return uri
|
||||
|
||||
|
||||
51
scripts/app_engine_server/redirects.yaml
Normal file
51
scripts/app_engine_server/redirects.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
# Redirect file.
|
||||
# This file contains the list of rewrite rules that are applied when serving
|
||||
# pages.
|
||||
#
|
||||
# Each redirect has four parts:
|
||||
#
|
||||
# - src: The path to redirect. This is a regex rule prefixed with an implied
|
||||
# '^'. Unless you're doing something advanced, your path should start with
|
||||
# '/' character.
|
||||
#
|
||||
# - dst: The path to redirect to. If the path begins with a slash,
|
||||
# it is considered a relative redirect. Otherwise, it is an absolute
|
||||
# redirct (and should probably begin with http: or http://). You may use
|
||||
# capturing groups to preserve part of the source path. To referece a
|
||||
# capturing group, use \N, where N is the (1-based) index of desired group.
|
||||
#
|
||||
# - type: Either 'permanent' or 'temporary', depending on whether you want an
|
||||
# HTTP 301 or HTTP 302 redirect, respectiviely. See RFC 2616 for the
|
||||
# difference between these:
|
||||
#
|
||||
# http://tools.ietf.org/html/rfc2616
|
||||
#
|
||||
# If you don't specify a type, 'permanent' will be used by default. Note that
|
||||
# this is different from the Apache convention (which uses 'temporary' by
|
||||
# default.)
|
||||
#
|
||||
# - comment: Currently ignored by the computer, but useful for humans.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# redirects:
|
||||
# - src: /foo
|
||||
# dst: /bar
|
||||
# # Redirect /foo to /bar. This will also redirect foo/ and
|
||||
# # foo/test.html. Note that the redirect type is optional. This will be
|
||||
# # treated as a permanent redirect.
|
||||
#
|
||||
# - src: /(.+droid(/.*)?)$
|
||||
# dst: /droids/\1
|
||||
# type: permanent
|
||||
# # Redirect /android to /droids/android and /bugdroid to
|
||||
# # /droids/bugdroid. However, it will not redirect /droid or
|
||||
# # /bugdroids.
|
||||
#
|
||||
# - src: /google
|
||||
# dst: http://www.google.com
|
||||
# type: temporary
|
||||
# # This is an example of a redirect to an absolute URI.
|
||||
#
|
||||
|
||||
redirects:
|
||||
Reference in New Issue
Block a user