Merge changes I61fd754e,I71bdc5e2,I1371b9ca
* changes: vndk-def: Ignore dex strings w/ invalid surrogates vndk-def: DexFileReader should return ModifiedUTF8 vndk-def: Add command that dumps dex strings
This commit is contained in:
@@ -11,7 +11,7 @@ import zipfile
|
|||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from compat import TemporaryDirectory
|
from compat import TemporaryDirectory
|
||||||
from vndk_definition_tool import DexFileReader
|
from vndk_definition_tool import DexFileReader, UnicodeSurrogateDecodeError
|
||||||
|
|
||||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
INPUT_DIR = os.path.join(SCRIPT_DIR, 'testdata', 'test_dex_file')
|
INPUT_DIR = os.path.join(SCRIPT_DIR, 'testdata', 'test_dex_file')
|
||||||
@@ -42,6 +42,28 @@ class ModifiedUTF8Test(unittest.TestCase):
|
|||||||
b'\xed\xa0\x81\xed\xb0\x80'.decode('mutf-8'))
|
b'\xed\xa0\x81\xed\xb0\x80'.decode('mutf-8'))
|
||||||
|
|
||||||
|
|
||||||
|
def test_decode(self):
|
||||||
|
# Low surrogate does not come after high surrogate
|
||||||
|
with self.assertRaises(UnicodeSurrogateDecodeError):
|
||||||
|
b'\xed\xa0\x81\x40'.decode('mutf-8')
|
||||||
|
|
||||||
|
# Low surrogate without prior high surrogate
|
||||||
|
with self.assertRaises(UnicodeSurrogateDecodeError):
|
||||||
|
b'\xed\xb0\x80\x40'.decode('mutf-8')
|
||||||
|
|
||||||
|
# Unexpected end after high surrogate
|
||||||
|
with self.assertRaises(UnicodeSurrogateDecodeError):
|
||||||
|
b'\xed\xa0\x81'.decode('mutf-8')
|
||||||
|
|
||||||
|
# Unexpected end after low surrogate
|
||||||
|
with self.assertRaises(UnicodeSurrogateDecodeError):
|
||||||
|
b'\xed\xb0\x80'.decode('mutf-8')
|
||||||
|
|
||||||
|
# Out-of-order surrogate
|
||||||
|
with self.assertRaises(UnicodeSurrogateDecodeError):
|
||||||
|
b'\xed\xb0\x80\xed\xa0\x81'.decode('mutf-8')
|
||||||
|
|
||||||
|
|
||||||
class DexFileTest(unittest.TestCase):
|
class DexFileTest(unittest.TestCase):
|
||||||
def _assemble_smali(self, dest, source):
|
def _assemble_smali(self, dest, source):
|
||||||
"""Assemble a smali source file. Skip the test if the smali command is
|
"""Assemble a smali source file. Skip the test if the smali command is
|
||||||
@@ -77,8 +99,8 @@ class DexFileTest(unittest.TestCase):
|
|||||||
|
|
||||||
strs = set(DexFileReader.enumerate_dex_strings_buf(buf))
|
strs = set(DexFileReader.enumerate_dex_strings_buf(buf))
|
||||||
|
|
||||||
self.assertIn('hello', strs)
|
self.assertIn(b'hello', strs)
|
||||||
self.assertIn('world', strs)
|
self.assertIn(b'world', strs)
|
||||||
|
|
||||||
|
|
||||||
def test_enumerate_dex_strings_apk(self):
|
def test_enumerate_dex_strings_apk(self):
|
||||||
@@ -96,10 +118,10 @@ class DexFileTest(unittest.TestCase):
|
|||||||
|
|
||||||
strs = set(DexFileReader.enumerate_dex_strings_apk(zip_file))
|
strs = set(DexFileReader.enumerate_dex_strings_apk(zip_file))
|
||||||
|
|
||||||
self.assertIn('hello', strs)
|
self.assertIn(b'hello', strs)
|
||||||
self.assertIn('world', strs)
|
self.assertIn(b'world', strs)
|
||||||
self.assertIn('foo', strs)
|
self.assertIn(b'foo', strs)
|
||||||
self.assertIn('bar', strs)
|
self.assertIn(b'bar', strs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
@@ -82,6 +82,10 @@ except ImportError:
|
|||||||
# Modified UTF-8 Encoder and Decoder
|
# Modified UTF-8 Encoder and Decoder
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class UnicodeSurrogateDecodeError(UnicodeDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def encode_mutf8(input, errors='strict'):
|
def encode_mutf8(input, errors='strict'):
|
||||||
i = 0
|
i = 0
|
||||||
res = io.BytesIO()
|
res = io.BytesIO()
|
||||||
@@ -130,6 +134,9 @@ def decode_mutf8(input, errors='strict'):
|
|||||||
def raise_error(start, reason):
|
def raise_error(start, reason):
|
||||||
raise UnicodeDecodeError('mutf-8', input, start, i + 1, reason)
|
raise UnicodeDecodeError('mutf-8', input, start, i + 1, reason)
|
||||||
|
|
||||||
|
def raise_surrogate_error(start, reason):
|
||||||
|
raise UnicodeSurrogateDecodeError('mutf-8', input, start, i + 1, reason)
|
||||||
|
|
||||||
for i, byte in enumerate_bytes(input):
|
for i, byte in enumerate_bytes(input):
|
||||||
if (byte & 0x80) == 0x00:
|
if (byte & 0x80) == 0x00:
|
||||||
if num_next > 0:
|
if num_next > 0:
|
||||||
@@ -160,14 +167,15 @@ def decode_mutf8(input, errors='strict'):
|
|||||||
if num_next == 0:
|
if num_next == 0:
|
||||||
if code >= 0xd800 and code <= 0xdbff: # High surrogate
|
if code >= 0xd800 and code <= 0xdbff: # High surrogate
|
||||||
if code_surrogate is not None:
|
if code_surrogate is not None:
|
||||||
raise_error(start_surrogate, 'invalid high surrogate')
|
raise_surrogate_error(
|
||||||
|
start_surrogate, 'invalid high surrogate')
|
||||||
code_surrogate = code
|
code_surrogate = code
|
||||||
start_surrogate = start
|
start_surrogate = start
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if code >= 0xdc00 and code <= 0xdfff: # Low surrogate
|
if code >= 0xdc00 and code <= 0xdfff: # Low surrogate
|
||||||
if code_surrogate is None:
|
if code_surrogate is None:
|
||||||
raise_error(start, 'invalid low surrogate')
|
raise_surrogate_error(start, 'invalid low surrogate')
|
||||||
code = ((code_surrogate & 0x3f) << 10) | (code & 0x3f) + 0x10000
|
code = ((code_surrogate & 0x3f) << 10) | (code & 0x3f) + 0x10000
|
||||||
code_surrogate = None
|
code_surrogate = None
|
||||||
start_surrogate = None
|
start_surrogate = None
|
||||||
@@ -177,7 +185,7 @@ def decode_mutf8(input, errors='strict'):
|
|||||||
code_surrogate = None
|
code_surrogate = None
|
||||||
start_surrogate = None
|
start_surrogate = None
|
||||||
else:
|
else:
|
||||||
raise_error(start_surrogate, 'illegal surrogate')
|
raise_surrogate_error(start_surrogate, 'illegal surrogate')
|
||||||
|
|
||||||
res.write(create_chr(code))
|
res.write(create_chr(code))
|
||||||
|
|
||||||
@@ -185,7 +193,7 @@ def decode_mutf8(input, errors='strict'):
|
|||||||
if num_next > 0:
|
if num_next > 0:
|
||||||
raise_error(start, 'unexpected end')
|
raise_error(start, 'unexpected end')
|
||||||
if code_surrogate is not None:
|
if code_surrogate is not None:
|
||||||
raise_error(start_surrogate, 'unexpected end')
|
raise_surrogate_error(start_surrogate, 'unexpected end')
|
||||||
|
|
||||||
return (res.getvalue(), i)
|
return (res.getvalue(), i)
|
||||||
|
|
||||||
@@ -799,15 +807,7 @@ class DexFileReader(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def extract_dex_string(cls, buf, offset=0):
|
def extract_dex_string(cls, buf, offset=0):
|
||||||
end = buf.find(b'\0', offset)
|
end = buf.find(b'\0', offset)
|
||||||
res = buf[offset:] if end == -1 else buf[offset:end]
|
return buf[offset:] if end == -1 else buf[offset:end]
|
||||||
return res.decode('mutf-8', 'ignore')
|
|
||||||
|
|
||||||
if sys.version_info < (3,):
|
|
||||||
_extract_dex_string = extract_dex_string
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def extract_dex_string(cls, buf, offset=0):
|
|
||||||
return cls._extract_dex_string(buf, offset).encode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -1051,6 +1051,15 @@ class DexFileReader(object):
|
|||||||
return cls.enumerate_dex_strings_vdex_buf(vdex_file.read())
|
return cls.enumerate_dex_strings_vdex_buf(vdex_file.read())
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def enumerate_dex_strings(cls, path):
|
||||||
|
if cls.is_zipfile(path):
|
||||||
|
return DexFileReader.enumerate_dex_strings_apk(path)
|
||||||
|
if cls.is_vdex_file(path):
|
||||||
|
return DexFileReader.enumerate_dex_strings_vdex(path)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# TaggedDict
|
# TaggedDict
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
@@ -2688,12 +2697,16 @@ def scan_apk_dep(graph, system_dirs, vendor_dirs):
|
|||||||
for ap, path in _enumerate_paths(system_dirs, vendor_dirs):
|
for ap, path in _enumerate_paths(system_dirs, vendor_dirs):
|
||||||
# Read the dex file from various file formats
|
# Read the dex file from various file formats
|
||||||
try:
|
try:
|
||||||
if DexFileReader.is_zipfile(path):
|
dex_string_iter = DexFileReader.enumerate_dex_strings(path)
|
||||||
strs = set(DexFileReader.enumerate_dex_strings_apk(path))
|
if dex_string_iter is None:
|
||||||
elif DexFileReader.is_vdex_file(path):
|
|
||||||
strs = set(DexFileReader.enumerate_dex_strings_vdex(path))
|
|
||||||
else:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
strings = set()
|
||||||
|
for string in dex_string_iter:
|
||||||
|
try:
|
||||||
|
strings.add(string.decode('mutf-8'))
|
||||||
|
except UnicodeSurrogateDecodeError:
|
||||||
|
pass
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
continue
|
continue
|
||||||
except:
|
except:
|
||||||
@@ -2701,12 +2714,12 @@ def scan_apk_dep(graph, system_dirs, vendor_dirs):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
# Skip the file that does not call System.loadLibrary()
|
# Skip the file that does not call System.loadLibrary()
|
||||||
if 'loadLibrary' not in strs:
|
if 'loadLibrary' not in strings:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Collect libraries from string tables
|
# Collect libraries from string tables
|
||||||
libs = set()
|
libs = set()
|
||||||
for string in strs:
|
for string in strings:
|
||||||
try:
|
try:
|
||||||
libs.update(libnames[string])
|
libs.update(libnames[string])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@@ -3647,6 +3660,27 @@ class CheckDepCommand(CheckDepCommandBase):
|
|||||||
return 0 if num_errors == 0 else 1
|
return 0 if num_errors == 0 else 1
|
||||||
|
|
||||||
|
|
||||||
|
class DumpDexStringCommand(Command):
|
||||||
|
def __init__(self):
|
||||||
|
super(DumpDexStringCommand, self).__init__(
|
||||||
|
'dump-dex-string',
|
||||||
|
help='Dump string literals defined in a dex file')
|
||||||
|
|
||||||
|
|
||||||
|
def add_argparser_options(self, parser):
|
||||||
|
super(DumpDexStringCommand, self).add_argparser_options(parser)
|
||||||
|
|
||||||
|
parser.add_argument('dex_file', help='path to an input dex file')
|
||||||
|
|
||||||
|
|
||||||
|
def main(self, args):
|
||||||
|
for string in DexFileReader.enumerate_dex_strings(args.dex_file):
|
||||||
|
try:
|
||||||
|
print(string)
|
||||||
|
except (UnicodeEncodeError, UnicodeDecodeError):
|
||||||
|
print(repr(string))
|
||||||
|
|
||||||
|
|
||||||
class CheckEligibleListCommand(CheckDepCommandBase):
|
class CheckEligibleListCommand(CheckDepCommandBase):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(CheckEligibleListCommand, self).__init__(
|
super(CheckEligibleListCommand, self).__init__(
|
||||||
@@ -3814,6 +3848,7 @@ def main():
|
|||||||
register_subcmd(CheckDepCommand())
|
register_subcmd(CheckDepCommand())
|
||||||
register_subcmd(CheckEligibleListCommand())
|
register_subcmd(CheckEligibleListCommand())
|
||||||
register_subcmd(DepGraphCommand())
|
register_subcmd(DepGraphCommand())
|
||||||
|
register_subcmd(DumpDexStringCommand())
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if not args.subcmd:
|
if not args.subcmd:
|
||||||
|
|||||||
Reference in New Issue
Block a user