From 45f7d2aaf53e130b19016161f62bd660e3cd5637 Mon Sep 17 00:00:00 2001 From: Ahaan Ugale Date: Sun, 14 Mar 2021 23:07:00 -0700 Subject: [PATCH] Update HotwordDetectionService prototype. - Added permission requirements for communicating with system enrollment app - Added priv-app permission build target 'com.example.android.voiceinteractor.xml' - Updated README usage directions - Added log messages when reading audio from AudioRecord - Changed AudioRecord read calls to request all of the desired data in a single .read() call - Refactor AudioRecord buffer settings to top of service files in Duration type - Compute bytesPerSecond based on EventPayload AudioFormat passed by SoundTrigger HAL - Fixed keyphrase used by VoiceInteractionService to "X Google" - Added trace events for tracking the time to read audio in VIS after a trusted onDetected() callback. Bug: 168305377 Test: manual Change-Id: I6c1db7f70ddd62d21c0bb7eba663fe9503d96ae3 --- samples/VoiceInteractionService/Android.bp | 7 + .../AndroidManifest.xml | 2 + samples/VoiceInteractionService/README.md | 29 ++--- .../com.example.android.voiceinteractor.xml | 2 + .../android/voiceinteractor/AudioUtils.java | 12 +- .../SampleHotwordDetectionService.java | 88 ++++++------- .../SampleVoiceInteractionService.java | 122 ++++++++++-------- 7 files changed, 134 insertions(+), 128 deletions(-) diff --git a/samples/VoiceInteractionService/Android.bp b/samples/VoiceInteractionService/Android.bp index 23c9b5836..13974f275 100644 --- a/samples/VoiceInteractionService/Android.bp +++ b/samples/VoiceInteractionService/Android.bp @@ -13,3 +13,10 @@ android_app { "androidx.annotation_annotation", ], } + +prebuilt_etc { + name: "com.example.android.voiceinteractor.xml", + src: "com.example.android.voiceinteractor.xml", + sub_dir: "permissions", + filename_from_src: true, +} diff --git a/samples/VoiceInteractionService/AndroidManifest.xml b/samples/VoiceInteractionService/AndroidManifest.xml index 37d7993f4..56c959081 100755 --- a/samples/VoiceInteractionService/AndroidManifest.xml +++ b/samples/VoiceInteractionService/AndroidManifest.xml @@ -2,6 +2,7 @@ package="com.example.android.voiceinteractor"> + @@ -31,5 +32,6 @@ + diff --git a/samples/VoiceInteractionService/README.md b/samples/VoiceInteractionService/README.md index 7d8dbe5f5..a1a4ccdb7 100644 --- a/samples/VoiceInteractionService/README.md +++ b/samples/VoiceInteractionService/README.md @@ -2,23 +2,18 @@ setup: 1. Set the KEYPHRASE constant in SampleVoiceInteractionService.java to something the device's default assistant supports. 2. m -j SampleVoiceInteractor -3. adb pull ./system/etc/permissions/privapp-permissions-platform.xml -4. Add: - - - -5. adb remount -6. adb push privapp-permissions-platform.xml /system/etc/permissions/privapp-permissions-platform.xml -7. adb shell mkdir /system/priv-app/SampleVoiceInteractor -8. adb push out/target/product/$TARGET_PRODUCT/system/priv-app/SampleVoiceInteractor/SampleVoiceInteractor.apk /system/priv-app/SampleVoiceInteractor/ -9. adb reboot -10. Go to the sample app info/settings. -11. Tap on Permissions and grant Mic access. -12. Reboot. -13. Set the sample app as the assistant. -14. Check for this in the logs to make sure it worked: +4. adb root; adb remount +5. adb push development/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml /system/etc/permissions/com.example.android.voiceinteractor.xml +6. adb shell mkdir /system/priv-app/SampleVoiceInteractor +7. adb push out/target/product/$TARGET_PRODUCT/system/priv-app/SampleVoiceInteractor/SampleVoiceInteractor.apk /system/priv-app/SampleVoiceInteractor/ +8. adb reboot +9. Go to the sample app info/settings. +10. Tap on Permissions and grant Mic access. +11. Reboot. +12. Set the "Digital assistant app" to "Sample Voice Interactor" in the Android settings +13. Check for this in the logs to make sure it worked: com.example.android.voiceinteractor I/VIS: onAvailabilityChanged: 2 -15. If it didn't, check if the pregrant worked: +14. If it didn't, check if the pregrant worked: adb shell dumpsys package com.example.android.voiceinteractor | grep CAPTURE_AUDIO_HOTWORD Iterating: @@ -42,4 +37,4 @@ Debugging: * uncomment LOG_NDEBUG lines at the top in AudioFlinger.cpp, Threads.cpp, Tracks.cpp, AudioPolicyInterfaceImpl.cpp, AudioPolicyService.cpp * Use this logcat filter: - com.example.android.voiceinteractor|AlwaysOnHotword|SoundTrigger|RecordingActivityMonitor|soundtrigger|AudioPolicyManager|AudioFlinger|AudioPolicyIntefaceImpl|AudioPolicyService \ No newline at end of file + com.example.android.voiceinteractor|AlwaysOnHotword|SoundTrigger|RecordingActivityMonitor|soundtrigger|AudioPolicyManager|AudioFlinger|AudioPolicyIntefaceImpl|AudioPolicyService|VIS|SHotwordDetectionSrvc|Hotword-AudioUtils \ No newline at end of file diff --git a/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml b/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml index b3b87f5b4..ea3647c92 100644 --- a/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml +++ b/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml @@ -18,5 +18,7 @@ + + diff --git a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/AudioUtils.java b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/AudioUtils.java index 9a9f8cc4a..d517de2d3 100644 --- a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/AudioUtils.java +++ b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/AudioUtils.java @@ -22,13 +22,16 @@ import android.util.Log; import java.util.Arrays; public class AudioUtils { - private static String TAG = "Hotword-AudioUtils"; + private static final String TAG = "Hotword-AudioUtils"; static int read(AudioRecord record, int bytesPerSecond, float secondsToRead, byte[] buffer) { + Log.i(TAG, "read(): bytesPerSecond=" + bytesPerSecond + + ", secondsToRead=" + secondsToRead + ", bufferSize=" + buffer.length); int numBytes = 0; int nextSecondToSample = 0; while (true) { - int bytesRead = record.read(buffer, numBytes, numBytes + 1024); + int bytesRead = record.read(buffer, numBytes, Math.round(bytesPerSecond * secondsToRead)); + Log.i(TAG, "AudioRecord.read offset=" + numBytes + ", size=" + Math.round(bytesPerSecond * secondsToRead)); numBytes += bytesRead; if (bytesRead <= 0) { @@ -44,8 +47,9 @@ public class AudioUtils { (bytesPerSecond * curSecond) + 10))); nextSecondToSample++; } - if (numBytes * 1.0 / bytesPerSecond >= secondsToRead) { - Log.i(TAG, "recorded enough. stopping."); + if ((numBytes * 1.0 / bytesPerSecond) >= secondsToRead) { + Log.i(TAG, "recorded enough. stopping. bytesRead=" + numBytes + + ", secondsRead=" + (numBytes * 1.0 / bytesPerSecond)); break; } } diff --git a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleHotwordDetectionService.java b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleHotwordDetectionService.java index 91764a6c9..e0b122df8 100644 --- a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleHotwordDetectionService.java +++ b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleHotwordDetectionService.java @@ -17,7 +17,6 @@ package com.example.android.voiceinteractor; import android.media.AudioAttributes; -import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.Handler; @@ -35,13 +34,40 @@ import androidx.annotation.Nullable; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.time.Duration; import java.util.function.IntConsumer; public class SampleHotwordDetectionService extends HotwordDetectionService { static final String TAG = "SHotwordDetectionSrvc"; - // Number of bytes per sample of audio (which is a short). - private static final int BYTES_PER_SAMPLE = 2; + // AudioRecord config + private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(1); + private static final Duration DSP_AUDIO_READ_DURATION = Duration.ofSeconds(5); + private static final Duration AUDIO_RECORD_RELEASE_TIMEOUT = Duration.ofSeconds(10); + + private static AudioRecord createAudioRecord(AlwaysOnHotwordDetector.EventPayload eventPayload, + int bytesPerSecond, + int sessionId) { + int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond, + AUDIO_RECORD_BUFFER_DURATION.getSeconds()); + Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize + + ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond)); + return new AudioRecord.Builder() + .setAudioAttributes( + new AudioAttributes.Builder() + .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) + // TODO see what happens if this is too small + .build()) + .setAudioFormat(eventPayload.getCaptureAudioFormat()) + .setBufferSizeInBytes(audioRecordBufferSize) + .setSessionId(sessionId) + .setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis()) + .build(); + } + + private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) { + return (int) (bytesPerSecond * bufferLengthSeconds); + } @Override public void onUpdateState(@Nullable PersistableBundle options, @@ -58,10 +84,11 @@ public class SampleHotwordDetectionService extends HotwordDetectionService { @NonNull AlwaysOnHotwordDetector.EventPayload eventPayload, long timeoutMillis, @NonNull Callback callback) { - Log.d(TAG, "onDetect (Hardware trigger)"); + Log.d(TAG, "onDetect (Hardware trigger): " + eventPayload); int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate(); - int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate; + int bytesPerSecond = + eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate; Integer captureSession = 0; try { @@ -81,9 +108,11 @@ public class SampleHotwordDetectionService extends HotwordDetectionService { return; } - byte[] buffer = new byte[bytesPerSecond * 10]; + byte[] buffer = new byte[bytesPerSecond * (int) DSP_AUDIO_READ_DURATION.getSeconds()]; + Log.d(TAG, "starting read: bytesPerSecond=" + bytesPerSecond + + ", totalBufferSize=" + buffer.length); record.startRecording(); - AudioUtils.read(record, bytesPerSecond, .75f, buffer); + AudioUtils.read(record, bytesPerSecond, DSP_AUDIO_READ_DURATION.getSeconds(), buffer); callback.onDetected( new HotwordDetectedResult.Builder() @@ -95,7 +124,7 @@ public class SampleHotwordDetectionService extends HotwordDetectionService { Log.i(TAG, "Releasing audio record"); record.stop(); record.release(); - }, 5000); + }, AUDIO_RECORD_RELEASE_TIMEOUT.toMillis()); } private int getKeyphraseId(AlwaysOnHotwordDetector.EventPayload payload) { @@ -108,48 +137,7 @@ public class SampleHotwordDetectionService extends HotwordDetectionService { @Override public void onDetect(@NonNull Callback callback) { - int sampleRate = 16000; - int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate; - AudioRecord record = new AudioRecord.Builder() - .setAudioAttributes(new AudioAttributes.Builder() - .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD).build()) - .setAudioFormat( - new AudioFormat.Builder() - .setChannelMask(AudioFormat.CHANNEL_IN_MONO) - .setEncoding(AudioFormat.ENCODING_DEFAULT) - .setSampleRate(sampleRate) - .build()) - .setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 15)) - .setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis()) - .build(); - - if (record.getState() != AudioRecord.STATE_INITIALIZED) { - Log.w(TAG, "Failed to initialize AudioRecord"); - record.release(); - } - record.startRecording(); - byte[] buffer = new byte[bytesPerSecond * 10]; - int numBytes = AudioUtils.read(record, bytesPerSecond, .75f, buffer); - } - - private static AudioRecord createAudioRecord(AlwaysOnHotwordDetector.EventPayload eventPayload, - int bytesPerSecond, - int sessionId) { - return new AudioRecord.Builder() - .setAudioAttributes( - new AudioAttributes.Builder() - .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) - // TODO see what happens if this is too small - .build()) - .setAudioFormat(eventPayload.getCaptureAudioFormat()) - .setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 1)) - .setSessionId(sessionId) - .setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis()) - .build(); - } - - private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) { - return (int) (bytesPerSecond * bufferLengthSeconds); + Log.w(TAG, "onDetect called for microphone trigger"); } } diff --git a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleVoiceInteractionService.java b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleVoiceInteractionService.java index 017ca46d5..c244e7dd4 100644 --- a/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleVoiceInteractionService.java +++ b/samples/VoiceInteractionService/src/com/example/android/voiceinteractor/SampleVoiceInteractionService.java @@ -24,41 +24,62 @@ import android.content.ComponentName; import android.content.Intent; import android.media.AudioAttributes; import android.media.AudioFormat; -import android.media.AudioManager; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.Binder; import android.os.Bundle; import android.os.IBinder; +import android.os.Trace; import android.service.voice.AlwaysOnHotwordDetector; import android.service.voice.AlwaysOnHotwordDetector.EventPayload; import android.service.voice.HotwordDetector; +import android.service.voice.HotwordDetector.IllegalDetectorStateException; import android.service.voice.HotwordRejectedResult; import android.service.voice.VoiceInteractionService; import android.util.Log; import androidx.annotation.NonNull; -import androidx.annotation.Nullable; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Arrays; +import java.time.Duration; import java.util.Locale; public class SampleVoiceInteractionService extends VoiceInteractionService { + public static final String DSP_MODEL_KEYPHRASE = "Test Keyphrase"; private static final String TAG = "VIS"; - // Number of bytes per sample of audio (which is a short). - private static final int BYTES_PER_SAMPLE = 2; - public static final String KEYPHRASE = "X Android"; + // AudioRecord config + private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(5); + private static final Duration AUDIO_READ_DURATION = Duration.ofSeconds(5); + + // DSP model config + private static final Locale DSP_MODEL_LOCALE = Locale.US; private final IBinder binder = new LocalBinder(); - public class LocalBinder extends Binder { - SampleVoiceInteractionService getService() { - // Return this instance of LocalService so clients can call public methods - return SampleVoiceInteractionService.this; - } + HotwordDetector mDetector; + Callback mCallback; + Bundle mData = new Bundle(); + AudioFormat mAudioFormat; + EventPayload mLastPayload; + + private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) { + int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond, + AUDIO_RECORD_BUFFER_DURATION.getSeconds()); + Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize + + ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond)); + return new AudioRecord.Builder() + .setAudioAttributes( + new AudioAttributes.Builder() + .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) + .build()) + .setAudioFormat(eventPayload.getCaptureAudioFormat()) + .setBufferSizeInBytes(audioRecordBufferSize) + .setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent()) + .build(); + } + + private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) { + return (int) (bytesPerSecond * bufferLengthSeconds); } @Override @@ -69,19 +90,13 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { return super.onBind(intent); } - HotwordDetector mDetector; - Callback mCallback; - - Bundle mData = new Bundle(); - AudioFormat mAudioFormat; - EventPayload mLastPayload; - @Override public void onReady() { super.onReady(); Log.i(TAG, "onReady"); mCallback = new Callback(); - mDetector = createAlwaysOnHotwordDetector(KEYPHRASE, Locale.US, null, null, mCallback); + mDetector = createAlwaysOnHotwordDetector(DSP_MODEL_KEYPHRASE, DSP_MODEL_LOCALE, null, null, + mCallback); } @Override @@ -90,6 +105,13 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { Log.i(TAG, "onShutdown"); } + public class LocalBinder extends Binder { + SampleVoiceInteractionService getService() { + // Return this instance of LocalService so clients can call public methods + return SampleVoiceInteractionService.this; + } + } + class Callback extends AlwaysOnHotwordDetector.Callback { private boolean mAvailable = false; @@ -110,7 +132,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { Intent enrollIntent = null; try { enrollIntent = ((AlwaysOnHotwordDetector) mDetector).createEnrollIntent(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } if (enrollIntent == null) { @@ -131,14 +153,16 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { public void onRejected(@NonNull HotwordRejectedResult result) { try { mDetector.startRecognition(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } } @Override public void onDetected(@NonNull EventPayload eventPayload) { + Trace.beginAsyncSection("SampleVoiceInteractionService.onDetected", 0); onDetected(eventPayload, false); + Trace.endAsyncSection("SampleVoiceInteractionService.onDetected", 0); } public void onDetected(@NonNull EventPayload eventPayload, boolean generateSessionId) { @@ -150,7 +174,10 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { eventPayload.getCaptureAudioFormat().getEncoding())); int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate(); - int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate; + int bytesPerSecond = + eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate; + + Trace.beginAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1); // For Non-trusted: // Integer captureSession = 0; @@ -163,20 +190,25 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { // int sessionId = generateSessionId ? // AudioManager.AUDIO_SESSION_ID_GENERATE : captureSession; // AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond, sessionId); + AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond); if (record.getState() != AudioRecord.STATE_INITIALIZED) { + Trace.endAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1); + Trace.setCounter("SampleVoiceInteractionService AudioRecord.STATE_INITIALIZED", + record.getState()); Log.e(TAG, "Failed to init first AudioRecord."); try { mDetector.startRecognition(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } return; } - byte[] buffer = new byte[bytesPerSecond * 6]; + byte[] buffer = new byte[bytesPerSecond * (int) AUDIO_READ_DURATION.getSeconds()]; record.startRecording(); - int numBytes = AudioUtils.read(record, bytesPerSecond, 5, buffer); + int numBytes = AudioUtils.read(record, bytesPerSecond, AUDIO_READ_DURATION.getSeconds(), + buffer); // try { // Thread.sleep(2000); @@ -185,6 +217,9 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { // throw new RuntimeException(e); // } + + Trace.endAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1); + Trace.setCounter("SampleVoiceInteractionService Read Complete", numBytes); record.stop(); record.release(); @@ -195,7 +230,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { try { mDetector.startRecognition(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } } @@ -205,7 +240,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { Log.i(TAG, "onError"); try { mDetector.startRecognition(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } } @@ -227,37 +262,10 @@ public class SampleVoiceInteractionService extends VoiceInteractionService { if (mAvailable) { try { mDetector.startRecognition(); - } catch (HotwordDetector.IllegalDetectorStateException e) { + } catch (IllegalDetectorStateException e) { e.printStackTrace(); } } } } - - private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) { - return new AudioRecord.Builder() - .setAudioAttributes( - new AudioAttributes.Builder() - .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) - .build()) - .setAudioFormat(eventPayload.getCaptureAudioFormat()) - .setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 2)) - .setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent()) - .build(); - } - - private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond, - int sessionId) { - return new AudioRecord( - new AudioAttributes.Builder() - .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD) - .build(), - eventPayload.getCaptureAudioFormat(), - getBufferSizeInBytes(bytesPerSecond, 2), - sessionId); - } - - private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) { - return (int) (bytesPerSecond * bufferLengthSeconds); - } }