They were moved into sdk/scripts when sdk was split from development. Change-Id: I8404ae5fdeb9060adb76357f29b42c4c8e2054ee
367 lines
13 KiB
Python
Executable File
367 lines
13 KiB
Python
Executable File
#!/usr/bin/python2.4
|
|
#
|
|
# Copyright (C) 2008 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
"""Module to compress directories in to series of zip files.
|
|
|
|
This module will take a directory and compress all its contents, including
|
|
child directories into a series of zip files named N.zip where 'N' ranges from
|
|
0 to infinity. The zip files will all be below a certain specified maximum
|
|
threshold.
|
|
|
|
The directory is compressed with a depth first traversal, each directory's
|
|
file contents being compressed as it is visisted, before the compression of any
|
|
child directory's contents. In this way the files within an archive are ordered
|
|
and the archives themselves are ordered.
|
|
|
|
The class also constructs a 'main.py' file intended for use with Google App
|
|
Engine with a custom App Engine program not currently distributed with this
|
|
code base. The custom App Engine runtime can leverage the index files written
|
|
out by this class to more quickly locate which zip file to serve a given URL
|
|
from.
|
|
"""
|
|
|
|
__author__ = 'jmatt@google.com (Justin Mattson)'
|
|
|
|
import optparse
|
|
import os
|
|
import stat
|
|
import sys
|
|
import zipfile
|
|
import divide_and_compress_constants
|
|
|
|
|
|
def CreateOptionsParser():
|
|
"""Creates the parser for command line arguments.
|
|
|
|
Returns:
|
|
A configured optparse.OptionParser object.
|
|
"""
|
|
rtn = optparse.OptionParser()
|
|
rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None,
|
|
help='The directory containing the files to compress')
|
|
rtn.add_option('-d', '--destination', dest='destination', default=None,
|
|
help=('Where to put the archive files, this should not be'
|
|
' a child of where the source files exist.'))
|
|
rtn.add_option('-f', '--filesize', dest='filesize', default='1M',
|
|
help=('Maximum size of archive files. A number followed by '
|
|
'a magnitude indicator either "B", "K", "M", or "G". '
|
|
'Examples:\n 1000000B == one million BYTES\n'
|
|
' 1.2M == one point two MEGABYTES\n'
|
|
' 1M == 1048576 BYTES'))
|
|
rtn.add_option('-n', '--nocompress', action='store_false', dest='compress',
|
|
default=True,
|
|
help=('Whether the archive files should be compressed, or '
|
|
'just a concatenation of the source files'))
|
|
return rtn
|
|
|
|
|
|
def VerifyArguments(options, parser):
|
|
"""Runs simple checks on correctness of commandline arguments.
|
|
|
|
Args:
|
|
options: The command line options passed.
|
|
parser: The parser object used to parse the command string.
|
|
"""
|
|
try:
|
|
if options.sourcefiles is None or options.destination is None:
|
|
parser.print_help()
|
|
sys.exit(-1)
|
|
except AttributeError:
|
|
parser.print_help()
|
|
sys.exit(-1)
|
|
|
|
|
|
def ParseSize(size_str):
|
|
"""Parse the file size argument from a string to a number of bytes.
|
|
|
|
Args:
|
|
size_str: The string representation of the file size.
|
|
|
|
Returns:
|
|
The file size in bytes.
|
|
|
|
Raises:
|
|
ValueError: Raises an error if the numeric or qualifier portions of the
|
|
file size argument is invalid.
|
|
"""
|
|
if len(size_str) < 2:
|
|
raise ValueError(('filesize argument not understood, please include'
|
|
' a numeric value and magnitude indicator'))
|
|
magnitude = size_str[-1]
|
|
if not magnitude in ('B', 'K', 'M', 'G'):
|
|
raise ValueError(('filesize magnitude indicator not valid, must be "B",'
|
|
'"K","M", or "G"'))
|
|
numeral = float(size_str[:-1])
|
|
if magnitude == 'K':
|
|
numeral *= 1024
|
|
elif magnitude == 'M':
|
|
numeral *= 1048576
|
|
elif magnitude == 'G':
|
|
numeral *= 1073741824
|
|
return int(numeral)
|
|
|
|
|
|
class DirectoryZipper(object):
|
|
"""Class to compress a directory and all its sub-directories."""
|
|
|
|
def __init__(self, output_path, base_dir, archive_size, enable_compression):
|
|
"""DirectoryZipper constructor.
|
|
|
|
Args:
|
|
output_path: A string, the path to write the archives and index file to.
|
|
base_dir: A string, the directory to compress.
|
|
archive_size: An number, the maximum size, in bytes, of a single
|
|
archive file.
|
|
enable_compression: A boolean, whether or not compression should be
|
|
enabled, if disabled, the files will be written into an uncompresed
|
|
zip.
|
|
"""
|
|
self.output_dir = output_path
|
|
self.current_archive = '0.zip'
|
|
self.base_path = base_dir
|
|
self.max_size = archive_size
|
|
self.compress = enable_compression
|
|
|
|
# Set index_fp to None, because we don't know what it will be yet.
|
|
self.index_fp = None
|
|
|
|
def StartCompress(self):
|
|
"""Start compress of the directory.
|
|
|
|
This will start the compression process and write the archives to the
|
|
specified output directory. It will also produce an 'index.txt' file in the
|
|
output directory that maps from file to archive.
|
|
"""
|
|
self.index_fp = open(os.path.join(self.output_dir, 'main.py'), 'w')
|
|
self.index_fp.write(divide_and_compress_constants.file_preamble)
|
|
os.path.walk(self.base_path, self.CompressDirectory, 1)
|
|
self.index_fp.write(divide_and_compress_constants.file_endpiece)
|
|
self.index_fp.close()
|
|
|
|
def RemoveLastFile(self, archive_path=None):
|
|
"""Removes the last item in the archive.
|
|
|
|
This removes the last item in the archive by reading the items out of the
|
|
archive, adding them to a new archive, deleting the old archive, and
|
|
moving the new archive to the location of the old archive.
|
|
|
|
Args:
|
|
archive_path: Path to the archive to modify. This archive should not be
|
|
open elsewhere, since it will need to be deleted.
|
|
|
|
Returns:
|
|
A new ZipFile object that points to the modified archive file.
|
|
"""
|
|
if archive_path is None:
|
|
archive_path = os.path.join(self.output_dir, self.current_archive)
|
|
|
|
# Move the old file and create a new one at its old location.
|
|
root, ext = os.path.splitext(archive_path)
|
|
old_archive = ''.join([root, '-old', ext])
|
|
os.rename(archive_path, old_archive)
|
|
old_fp = self.OpenZipFileAtPath(old_archive, mode='r')
|
|
|
|
# By default, store uncompressed.
|
|
compress_bit = zipfile.ZIP_STORED
|
|
if self.compress:
|
|
compress_bit = zipfile.ZIP_DEFLATED
|
|
new_fp = self.OpenZipFileAtPath(archive_path,
|
|
mode='w',
|
|
compress=compress_bit)
|
|
|
|
# Read the old archive in a new archive, except the last one.
|
|
for zip_member in old_fp.infolist()[:-1]:
|
|
new_fp.writestr(zip_member, old_fp.read(zip_member.filename))
|
|
|
|
# Close files and delete the old one.
|
|
old_fp.close()
|
|
new_fp.close()
|
|
os.unlink(old_archive)
|
|
|
|
def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED):
|
|
"""This method is mainly for testing purposes, eg dependency injection."""
|
|
if mode is None:
|
|
if os.path.exists(path):
|
|
mode = 'a'
|
|
else:
|
|
mode = 'w'
|
|
|
|
if mode == 'r':
|
|
return zipfile.ZipFile(path, mode)
|
|
else:
|
|
return zipfile.ZipFile(path, mode, compress)
|
|
|
|
def CompressDirectory(self, unused_id, dir_path, dir_contents):
|
|
"""Method to compress the given directory.
|
|
|
|
This method compresses the directory 'dir_path'. It will add to an existing
|
|
zip file that still has space and create new ones as necessary to keep zip
|
|
file sizes under the maximum specified size. This also writes out the
|
|
mapping of files to archives to the self.index_fp file descriptor
|
|
|
|
Args:
|
|
unused_id: A numeric identifier passed by the os.path.walk method, this
|
|
is not used by this method.
|
|
dir_path: A string, the path to the directory to compress.
|
|
dir_contents: A list of directory contents to be compressed.
|
|
"""
|
|
# Construct the queue of files to be added that this method will use
|
|
# it seems that dir_contents is given in reverse alphabetical order,
|
|
# so put them in alphabetical order by inserting to front of the list.
|
|
dir_contents.sort()
|
|
zip_queue = []
|
|
for filename in dir_contents:
|
|
zip_queue.append(os.path.join(dir_path, filename))
|
|
compress_bit = zipfile.ZIP_DEFLATED
|
|
if not self.compress:
|
|
compress_bit = zipfile.ZIP_STORED
|
|
|
|
# Zip all files in this directory, adding to existing archives and creating
|
|
# as necessary.
|
|
while zip_queue:
|
|
target_file = zip_queue[0]
|
|
if os.path.isfile(target_file):
|
|
self.AddFileToArchive(target_file, compress_bit)
|
|
|
|
# See if adding the new file made our archive too large.
|
|
if not self.ArchiveIsValid():
|
|
|
|
# IF fixing fails, the last added file was to large, skip it
|
|
# ELSE the current archive filled normally, make a new one and try
|
|
# adding the file again.
|
|
if not self.FixArchive('SIZE'):
|
|
zip_queue.pop(0)
|
|
else:
|
|
self.current_archive = '%i.zip' % (
|
|
int(self.current_archive[
|
|
0:self.current_archive.rfind('.zip')]) + 1)
|
|
else:
|
|
|
|
# Write an index record if necessary.
|
|
self.WriteIndexRecord()
|
|
zip_queue.pop(0)
|
|
else:
|
|
zip_queue.pop(0)
|
|
|
|
def WriteIndexRecord(self):
|
|
"""Write an index record to the index file.
|
|
|
|
Only write an index record if this is the first file to go into archive
|
|
|
|
Returns:
|
|
True if an archive record is written, False if it isn't.
|
|
"""
|
|
archive = self.OpenZipFileAtPath(
|
|
os.path.join(self.output_dir, self.current_archive), 'r')
|
|
archive_index = archive.infolist()
|
|
if len(archive_index) == 1:
|
|
self.index_fp.write(
|
|
'[\'%s\', \'%s\'],\n' % (self.current_archive,
|
|
archive_index[0].filename))
|
|
archive.close()
|
|
return True
|
|
else:
|
|
archive.close()
|
|
return False
|
|
|
|
def FixArchive(self, problem):
|
|
"""Make the archive compliant.
|
|
|
|
Args:
|
|
problem: An enum, the reason the archive is invalid.
|
|
|
|
Returns:
|
|
Whether the file(s) removed to fix the archive could conceivably be
|
|
in an archive, but for some reason can't be added to this one.
|
|
"""
|
|
archive_path = os.path.join(self.output_dir, self.current_archive)
|
|
return_value = None
|
|
|
|
if problem == 'SIZE':
|
|
archive_obj = self.OpenZipFileAtPath(archive_path, mode='r')
|
|
num_archive_files = len(archive_obj.infolist())
|
|
|
|
# IF there is a single file, that means its too large to compress,
|
|
# delete the created archive
|
|
# ELSE do normal finalization.
|
|
if num_archive_files == 1:
|
|
print ('WARNING: %s%s is too large to store.' % (
|
|
self.base_path, archive_obj.infolist()[0].filename))
|
|
archive_obj.close()
|
|
os.unlink(archive_path)
|
|
return_value = False
|
|
else:
|
|
archive_obj.close()
|
|
self.RemoveLastFile(
|
|
os.path.join(self.output_dir, self.current_archive))
|
|
print 'Final archive size for %s is %i' % (
|
|
self.current_archive, os.path.getsize(archive_path))
|
|
return_value = True
|
|
return return_value
|
|
|
|
def AddFileToArchive(self, filepath, compress_bit):
|
|
"""Add the file at filepath to the current archive.
|
|
|
|
Args:
|
|
filepath: A string, the path of the file to add.
|
|
compress_bit: A boolean, whether or not this file should be compressed
|
|
when added.
|
|
|
|
Returns:
|
|
True if the file could be added (typically because this is a file) or
|
|
False if it couldn't be added (typically because its a directory).
|
|
"""
|
|
curr_archive_path = os.path.join(self.output_dir, self.current_archive)
|
|
if os.path.isfile(filepath) and not os.path.islink(filepath):
|
|
if os.path.getsize(filepath) > 1048576:
|
|
print 'Warning: %s is potentially too large to serve on GAE' % filepath
|
|
archive = self.OpenZipFileAtPath(curr_archive_path,
|
|
compress=compress_bit)
|
|
# Add the file to the archive.
|
|
archive.write(filepath, filepath[len(self.base_path):])
|
|
archive.close()
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def ArchiveIsValid(self):
|
|
"""Check whether the archive is valid.
|
|
|
|
Currently this only checks whether the archive is under the required size.
|
|
The thought is that eventually this will do additional validation
|
|
|
|
Returns:
|
|
True if the archive is valid, False if its not.
|
|
"""
|
|
archive_path = os.path.join(self.output_dir, self.current_archive)
|
|
return os.path.getsize(archive_path) <= self.max_size
|
|
|
|
|
|
def main(argv):
|
|
parser = CreateOptionsParser()
|
|
(options, unused_args) = parser.parse_args(args=argv[1:])
|
|
VerifyArguments(options, parser)
|
|
zipper = DirectoryZipper(options.destination,
|
|
options.sourcefiles,
|
|
ParseSize(options.filesize),
|
|
options.compress)
|
|
zipper.StartCompress()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv)
|