Merge changes I61fd754e,I71bdc5e2,I1371b9ca

* changes:
  vndk-def: Ignore dex strings w/ invalid surrogates
  vndk-def: DexFileReader should return ModifiedUTF8
  vndk-def: Add command that dumps dex strings
This commit is contained in:
Logan Chien
2018-06-21 02:00:56 +00:00
committed by Gerrit Code Review
2 changed files with 84 additions and 27 deletions

View File

@@ -11,7 +11,7 @@ import zipfile
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from compat import TemporaryDirectory from compat import TemporaryDirectory
from vndk_definition_tool import DexFileReader from vndk_definition_tool import DexFileReader, UnicodeSurrogateDecodeError
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
INPUT_DIR = os.path.join(SCRIPT_DIR, 'testdata', 'test_dex_file') INPUT_DIR = os.path.join(SCRIPT_DIR, 'testdata', 'test_dex_file')
@@ -42,6 +42,28 @@ class ModifiedUTF8Test(unittest.TestCase):
b'\xed\xa0\x81\xed\xb0\x80'.decode('mutf-8')) b'\xed\xa0\x81\xed\xb0\x80'.decode('mutf-8'))
def test_decode(self):
# Low surrogate does not come after high surrogate
with self.assertRaises(UnicodeSurrogateDecodeError):
b'\xed\xa0\x81\x40'.decode('mutf-8')
# Low surrogate without prior high surrogate
with self.assertRaises(UnicodeSurrogateDecodeError):
b'\xed\xb0\x80\x40'.decode('mutf-8')
# Unexpected end after high surrogate
with self.assertRaises(UnicodeSurrogateDecodeError):
b'\xed\xa0\x81'.decode('mutf-8')
# Unexpected end after low surrogate
with self.assertRaises(UnicodeSurrogateDecodeError):
b'\xed\xb0\x80'.decode('mutf-8')
# Out-of-order surrogate
with self.assertRaises(UnicodeSurrogateDecodeError):
b'\xed\xb0\x80\xed\xa0\x81'.decode('mutf-8')
class DexFileTest(unittest.TestCase): class DexFileTest(unittest.TestCase):
def _assemble_smali(self, dest, source): def _assemble_smali(self, dest, source):
"""Assemble a smali source file. Skip the test if the smali command is """Assemble a smali source file. Skip the test if the smali command is
@@ -77,8 +99,8 @@ class DexFileTest(unittest.TestCase):
strs = set(DexFileReader.enumerate_dex_strings_buf(buf)) strs = set(DexFileReader.enumerate_dex_strings_buf(buf))
self.assertIn('hello', strs) self.assertIn(b'hello', strs)
self.assertIn('world', strs) self.assertIn(b'world', strs)
def test_enumerate_dex_strings_apk(self): def test_enumerate_dex_strings_apk(self):
@@ -96,10 +118,10 @@ class DexFileTest(unittest.TestCase):
strs = set(DexFileReader.enumerate_dex_strings_apk(zip_file)) strs = set(DexFileReader.enumerate_dex_strings_apk(zip_file))
self.assertIn('hello', strs) self.assertIn(b'hello', strs)
self.assertIn('world', strs) self.assertIn(b'world', strs)
self.assertIn('foo', strs) self.assertIn(b'foo', strs)
self.assertIn('bar', strs) self.assertIn(b'bar', strs)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -82,6 +82,10 @@ except ImportError:
# Modified UTF-8 Encoder and Decoder # Modified UTF-8 Encoder and Decoder
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
class UnicodeSurrogateDecodeError(UnicodeDecodeError):
pass
def encode_mutf8(input, errors='strict'): def encode_mutf8(input, errors='strict'):
i = 0 i = 0
res = io.BytesIO() res = io.BytesIO()
@@ -130,6 +134,9 @@ def decode_mutf8(input, errors='strict'):
def raise_error(start, reason): def raise_error(start, reason):
raise UnicodeDecodeError('mutf-8', input, start, i + 1, reason) raise UnicodeDecodeError('mutf-8', input, start, i + 1, reason)
def raise_surrogate_error(start, reason):
raise UnicodeSurrogateDecodeError('mutf-8', input, start, i + 1, reason)
for i, byte in enumerate_bytes(input): for i, byte in enumerate_bytes(input):
if (byte & 0x80) == 0x00: if (byte & 0x80) == 0x00:
if num_next > 0: if num_next > 0:
@@ -160,14 +167,15 @@ def decode_mutf8(input, errors='strict'):
if num_next == 0: if num_next == 0:
if code >= 0xd800 and code <= 0xdbff: # High surrogate if code >= 0xd800 and code <= 0xdbff: # High surrogate
if code_surrogate is not None: if code_surrogate is not None:
raise_error(start_surrogate, 'invalid high surrogate') raise_surrogate_error(
start_surrogate, 'invalid high surrogate')
code_surrogate = code code_surrogate = code
start_surrogate = start start_surrogate = start
continue continue
if code >= 0xdc00 and code <= 0xdfff: # Low surrogate if code >= 0xdc00 and code <= 0xdfff: # Low surrogate
if code_surrogate is None: if code_surrogate is None:
raise_error(start, 'invalid low surrogate') raise_surrogate_error(start, 'invalid low surrogate')
code = ((code_surrogate & 0x3f) << 10) | (code & 0x3f) + 0x10000 code = ((code_surrogate & 0x3f) << 10) | (code & 0x3f) + 0x10000
code_surrogate = None code_surrogate = None
start_surrogate = None start_surrogate = None
@@ -177,7 +185,7 @@ def decode_mutf8(input, errors='strict'):
code_surrogate = None code_surrogate = None
start_surrogate = None start_surrogate = None
else: else:
raise_error(start_surrogate, 'illegal surrogate') raise_surrogate_error(start_surrogate, 'illegal surrogate')
res.write(create_chr(code)) res.write(create_chr(code))
@@ -185,7 +193,7 @@ def decode_mutf8(input, errors='strict'):
if num_next > 0: if num_next > 0:
raise_error(start, 'unexpected end') raise_error(start, 'unexpected end')
if code_surrogate is not None: if code_surrogate is not None:
raise_error(start_surrogate, 'unexpected end') raise_surrogate_error(start_surrogate, 'unexpected end')
return (res.getvalue(), i) return (res.getvalue(), i)
@@ -799,15 +807,7 @@ class DexFileReader(object):
@classmethod @classmethod
def extract_dex_string(cls, buf, offset=0): def extract_dex_string(cls, buf, offset=0):
end = buf.find(b'\0', offset) end = buf.find(b'\0', offset)
res = buf[offset:] if end == -1 else buf[offset:end] return buf[offset:] if end == -1 else buf[offset:end]
return res.decode('mutf-8', 'ignore')
if sys.version_info < (3,):
_extract_dex_string = extract_dex_string
@classmethod
def extract_dex_string(cls, buf, offset=0):
return cls._extract_dex_string(buf, offset).encode('utf-8')
@classmethod @classmethod
@@ -1051,6 +1051,15 @@ class DexFileReader(object):
return cls.enumerate_dex_strings_vdex_buf(vdex_file.read()) return cls.enumerate_dex_strings_vdex_buf(vdex_file.read())
@classmethod
def enumerate_dex_strings(cls, path):
if cls.is_zipfile(path):
return DexFileReader.enumerate_dex_strings_apk(path)
if cls.is_vdex_file(path):
return DexFileReader.enumerate_dex_strings_vdex(path)
return None
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# TaggedDict # TaggedDict
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
@@ -2688,12 +2697,16 @@ def scan_apk_dep(graph, system_dirs, vendor_dirs):
for ap, path in _enumerate_paths(system_dirs, vendor_dirs): for ap, path in _enumerate_paths(system_dirs, vendor_dirs):
# Read the dex file from various file formats # Read the dex file from various file formats
try: try:
if DexFileReader.is_zipfile(path): dex_string_iter = DexFileReader.enumerate_dex_strings(path)
strs = set(DexFileReader.enumerate_dex_strings_apk(path)) if dex_string_iter is None:
elif DexFileReader.is_vdex_file(path):
strs = set(DexFileReader.enumerate_dex_strings_vdex(path))
else:
continue continue
strings = set()
for string in dex_string_iter:
try:
strings.add(string.decode('mutf-8'))
except UnicodeSurrogateDecodeError:
pass
except FileNotFoundError: except FileNotFoundError:
continue continue
except: except:
@@ -2701,12 +2714,12 @@ def scan_apk_dep(graph, system_dirs, vendor_dirs):
raise raise
# Skip the file that does not call System.loadLibrary() # Skip the file that does not call System.loadLibrary()
if 'loadLibrary' not in strs: if 'loadLibrary' not in strings:
continue continue
# Collect libraries from string tables # Collect libraries from string tables
libs = set() libs = set()
for string in strs: for string in strings:
try: try:
libs.update(libnames[string]) libs.update(libnames[string])
except KeyError: except KeyError:
@@ -3647,6 +3660,27 @@ class CheckDepCommand(CheckDepCommandBase):
return 0 if num_errors == 0 else 1 return 0 if num_errors == 0 else 1
class DumpDexStringCommand(Command):
def __init__(self):
super(DumpDexStringCommand, self).__init__(
'dump-dex-string',
help='Dump string literals defined in a dex file')
def add_argparser_options(self, parser):
super(DumpDexStringCommand, self).add_argparser_options(parser)
parser.add_argument('dex_file', help='path to an input dex file')
def main(self, args):
for string in DexFileReader.enumerate_dex_strings(args.dex_file):
try:
print(string)
except (UnicodeEncodeError, UnicodeDecodeError):
print(repr(string))
class CheckEligibleListCommand(CheckDepCommandBase): class CheckEligibleListCommand(CheckDepCommandBase):
def __init__(self): def __init__(self):
super(CheckEligibleListCommand, self).__init__( super(CheckEligibleListCommand, self).__init__(
@@ -3814,6 +3848,7 @@ def main():
register_subcmd(CheckDepCommand()) register_subcmd(CheckDepCommand())
register_subcmd(CheckEligibleListCommand()) register_subcmd(CheckEligibleListCommand())
register_subcmd(DepGraphCommand()) register_subcmd(DepGraphCommand())
register_subcmd(DumpDexStringCommand())
args = parser.parse_args() args = parser.parse_args()
if not args.subcmd: if not args.subcmd: