Update HotwordDetectionService prototype.

- Added permission requirements for communicating with system enrollment
  app
- Added priv-app permission build target
  'com.example.android.voiceinteractor.xml'
- Updated README usage directions
- Added log messages when reading audio from AudioRecord
- Changed AudioRecord read calls to request all of the desired data in a
  single .read() call
- Refactor AudioRecord buffer settings to top of service files in
  Duration type
- Compute bytesPerSecond based on EventPayload AudioFormat passed by
  SoundTrigger HAL
- Fixed keyphrase used by VoiceInteractionService to "X Google"
- Added trace events for tracking the time to read audio in VIS after a
  trusted onDetected() callback.

Bug: 168305377
Test: manual
Change-Id: I6c1db7f70ddd62d21c0bb7eba663fe9503d96ae3
This commit is contained in:
Ahaan Ugale
2021-03-14 23:07:00 -07:00
committed by Nicholas Ambur
parent 864e448436
commit 45f7d2aaf5
7 changed files with 134 additions and 128 deletions

View File

@@ -13,3 +13,10 @@ android_app {
"androidx.annotation_annotation",
],
}
prebuilt_etc {
name: "com.example.android.voiceinteractor.xml",
src: "com.example.android.voiceinteractor.xml",
sub_dir: "permissions",
filename_from_src: true,
}

View File

@@ -2,6 +2,7 @@
package="com.example.android.voiceinteractor">
<application android:label="@string/app_name">
<profileable android:shell="true"/>
<activity android:name=".MainActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
@@ -31,5 +32,6 @@
<uses-permission android:name="android.permission.RECORD_BACKGROUND_AUDIO" />
<uses-permission android:name="android.permission.CAPTURE_AUDIO_HOTWORD" />
<uses-permission android:name="android.permission.MANAGE_HOTWORD_DETECTION" />
<uses-permission android:name="android.permission.INTERACT_ACROSS_USERS" />
</manifest>

View File

@@ -2,23 +2,18 @@ setup:
1. Set the KEYPHRASE constant in SampleVoiceInteractionService.java to something the device's
default assistant supports.
2. m -j SampleVoiceInteractor
3. adb pull ./system/etc/permissions/privapp-permissions-platform.xml
4. Add:
<privapp-permissions package="com.example.android.voiceinteractor">
<permission name="android.permission.CAPTURE_AUDIO_HOTWORD"/>
</privapp-permissions>
5. adb remount
6. adb push privapp-permissions-platform.xml /system/etc/permissions/privapp-permissions-platform.xml
7. adb shell mkdir /system/priv-app/SampleVoiceInteractor
8. adb push out/target/product/$TARGET_PRODUCT/system/priv-app/SampleVoiceInteractor/SampleVoiceInteractor.apk /system/priv-app/SampleVoiceInteractor/
9. adb reboot
10. Go to the sample app info/settings.
11. Tap on Permissions and grant Mic access.
12. Reboot.
13. Set the sample app as the assistant.
14. Check for this in the logs to make sure it worked:
4. adb root; adb remount
5. adb push development/samples/VoiceInteractionService/com.example.android.voiceinteractor.xml /system/etc/permissions/com.example.android.voiceinteractor.xml
6. adb shell mkdir /system/priv-app/SampleVoiceInteractor
7. adb push out/target/product/$TARGET_PRODUCT/system/priv-app/SampleVoiceInteractor/SampleVoiceInteractor.apk /system/priv-app/SampleVoiceInteractor/
8. adb reboot
9. Go to the sample app info/settings.
10. Tap on Permissions and grant Mic access.
11. Reboot.
12. Set the "Digital assistant app" to "Sample Voice Interactor" in the Android settings
13. Check for this in the logs to make sure it worked:
com.example.android.voiceinteractor I/VIS: onAvailabilityChanged: 2
15. If it didn't, check if the pregrant worked:
14. If it didn't, check if the pregrant worked:
adb shell dumpsys package com.example.android.voiceinteractor | grep CAPTURE_AUDIO_HOTWORD
Iterating:
@@ -42,4 +37,4 @@ Debugging:
* uncomment LOG_NDEBUG lines at the top in AudioFlinger.cpp, Threads.cpp, Tracks.cpp,
AudioPolicyInterfaceImpl.cpp, AudioPolicyService.cpp
* Use this logcat filter:
com.example.android.voiceinteractor|AlwaysOnHotword|SoundTrigger|RecordingActivityMonitor|soundtrigger|AudioPolicyManager|AudioFlinger|AudioPolicyIntefaceImpl|AudioPolicyService
com.example.android.voiceinteractor|AlwaysOnHotword|SoundTrigger|RecordingActivityMonitor|soundtrigger|AudioPolicyManager|AudioFlinger|AudioPolicyIntefaceImpl|AudioPolicyService|VIS|SHotwordDetectionSrvc|Hotword-AudioUtils

View File

@@ -18,5 +18,7 @@
<permissions>
<privapp-permissions package="com.example.android.voiceinteractor">
<permission name="android.permission.CAPTURE_AUDIO_HOTWORD"/>
<permission name="android.permission.INTERACT_ACROSS_USERS"/>
<permission name="android.permission.MANAGE_HOTWORD_DETECTION"/>
</privapp-permissions>
</permissions>

View File

@@ -22,13 +22,16 @@ import android.util.Log;
import java.util.Arrays;
public class AudioUtils {
private static String TAG = "Hotword-AudioUtils";
private static final String TAG = "Hotword-AudioUtils";
static int read(AudioRecord record, int bytesPerSecond, float secondsToRead, byte[] buffer) {
Log.i(TAG, "read(): bytesPerSecond=" + bytesPerSecond
+ ", secondsToRead=" + secondsToRead + ", bufferSize=" + buffer.length);
int numBytes = 0;
int nextSecondToSample = 0;
while (true) {
int bytesRead = record.read(buffer, numBytes, numBytes + 1024);
int bytesRead = record.read(buffer, numBytes, Math.round(bytesPerSecond * secondsToRead));
Log.i(TAG, "AudioRecord.read offset=" + numBytes + ", size=" + Math.round(bytesPerSecond * secondsToRead));
numBytes += bytesRead;
if (bytesRead <= 0) {
@@ -44,8 +47,9 @@ public class AudioUtils {
(bytesPerSecond * curSecond) + 10)));
nextSecondToSample++;
}
if (numBytes * 1.0 / bytesPerSecond >= secondsToRead) {
Log.i(TAG, "recorded enough. stopping.");
if ((numBytes * 1.0 / bytesPerSecond) >= secondsToRead) {
Log.i(TAG, "recorded enough. stopping. bytesRead=" + numBytes
+ ", secondsRead=" + (numBytes * 1.0 / bytesPerSecond));
break;
}
}

View File

@@ -17,7 +17,6 @@
package com.example.android.voiceinteractor;
import android.media.AudioAttributes;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Handler;
@@ -35,13 +34,40 @@ import androidx.annotation.Nullable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.time.Duration;
import java.util.function.IntConsumer;
public class SampleHotwordDetectionService extends HotwordDetectionService {
static final String TAG = "SHotwordDetectionSrvc";
// Number of bytes per sample of audio (which is a short).
private static final int BYTES_PER_SAMPLE = 2;
// AudioRecord config
private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(1);
private static final Duration DSP_AUDIO_READ_DURATION = Duration.ofSeconds(5);
private static final Duration AUDIO_RECORD_RELEASE_TIMEOUT = Duration.ofSeconds(10);
private static AudioRecord createAudioRecord(AlwaysOnHotwordDetector.EventPayload eventPayload,
int bytesPerSecond,
int sessionId) {
int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond,
AUDIO_RECORD_BUFFER_DURATION.getSeconds());
Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize
+ ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond));
return new AudioRecord.Builder()
.setAudioAttributes(
new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
// TODO see what happens if this is too small
.build())
.setAudioFormat(eventPayload.getCaptureAudioFormat())
.setBufferSizeInBytes(audioRecordBufferSize)
.setSessionId(sessionId)
.setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis())
.build();
}
private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) {
return (int) (bytesPerSecond * bufferLengthSeconds);
}
@Override
public void onUpdateState(@Nullable PersistableBundle options,
@@ -58,10 +84,11 @@ public class SampleHotwordDetectionService extends HotwordDetectionService {
@NonNull AlwaysOnHotwordDetector.EventPayload eventPayload,
long timeoutMillis,
@NonNull Callback callback) {
Log.d(TAG, "onDetect (Hardware trigger)");
Log.d(TAG, "onDetect (Hardware trigger): " + eventPayload);
int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate();
int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate;
int bytesPerSecond =
eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate;
Integer captureSession = 0;
try {
@@ -81,9 +108,11 @@ public class SampleHotwordDetectionService extends HotwordDetectionService {
return;
}
byte[] buffer = new byte[bytesPerSecond * 10];
byte[] buffer = new byte[bytesPerSecond * (int) DSP_AUDIO_READ_DURATION.getSeconds()];
Log.d(TAG, "starting read: bytesPerSecond=" + bytesPerSecond
+ ", totalBufferSize=" + buffer.length);
record.startRecording();
AudioUtils.read(record, bytesPerSecond, .75f, buffer);
AudioUtils.read(record, bytesPerSecond, DSP_AUDIO_READ_DURATION.getSeconds(), buffer);
callback.onDetected(
new HotwordDetectedResult.Builder()
@@ -95,7 +124,7 @@ public class SampleHotwordDetectionService extends HotwordDetectionService {
Log.i(TAG, "Releasing audio record");
record.stop();
record.release();
}, 5000);
}, AUDIO_RECORD_RELEASE_TIMEOUT.toMillis());
}
private int getKeyphraseId(AlwaysOnHotwordDetector.EventPayload payload) {
@@ -108,48 +137,7 @@ public class SampleHotwordDetectionService extends HotwordDetectionService {
@Override
public void onDetect(@NonNull Callback callback) {
int sampleRate = 16000;
int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate;
AudioRecord record = new AudioRecord.Builder()
.setAudioAttributes(new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD).build())
.setAudioFormat(
new AudioFormat.Builder()
.setChannelMask(AudioFormat.CHANNEL_IN_MONO)
.setEncoding(AudioFormat.ENCODING_DEFAULT)
.setSampleRate(sampleRate)
.build())
.setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 15))
.setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis())
.build();
if (record.getState() != AudioRecord.STATE_INITIALIZED) {
Log.w(TAG, "Failed to initialize AudioRecord");
record.release();
}
record.startRecording();
byte[] buffer = new byte[bytesPerSecond * 10];
int numBytes = AudioUtils.read(record, bytesPerSecond, .75f, buffer);
}
private static AudioRecord createAudioRecord(AlwaysOnHotwordDetector.EventPayload eventPayload,
int bytesPerSecond,
int sessionId) {
return new AudioRecord.Builder()
.setAudioAttributes(
new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
// TODO see what happens if this is too small
.build())
.setAudioFormat(eventPayload.getCaptureAudioFormat())
.setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 1))
.setSessionId(sessionId)
.setMaxSharedAudioHistoryMillis(AudioRecord.getMaxSharedAudioHistoryMillis())
.build();
}
private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) {
return (int) (bytesPerSecond * bufferLengthSeconds);
Log.w(TAG, "onDetect called for microphone trigger");
}
}

View File

@@ -24,41 +24,62 @@ import android.content.ComponentName;
import android.content.Intent;
import android.media.AudioAttributes;
import android.media.AudioFormat;
import android.media.AudioManager;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Binder;
import android.os.Bundle;
import android.os.IBinder;
import android.os.Trace;
import android.service.voice.AlwaysOnHotwordDetector;
import android.service.voice.AlwaysOnHotwordDetector.EventPayload;
import android.service.voice.HotwordDetector;
import android.service.voice.HotwordDetector.IllegalDetectorStateException;
import android.service.voice.HotwordRejectedResult;
import android.service.voice.VoiceInteractionService;
import android.util.Log;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.time.Duration;
import java.util.Locale;
public class SampleVoiceInteractionService extends VoiceInteractionService {
public static final String DSP_MODEL_KEYPHRASE = "Test Keyphrase";
private static final String TAG = "VIS";
// Number of bytes per sample of audio (which is a short).
private static final int BYTES_PER_SAMPLE = 2;
public static final String KEYPHRASE = "X Android";
// AudioRecord config
private static final Duration AUDIO_RECORD_BUFFER_DURATION = Duration.ofSeconds(5);
private static final Duration AUDIO_READ_DURATION = Duration.ofSeconds(5);
// DSP model config
private static final Locale DSP_MODEL_LOCALE = Locale.US;
private final IBinder binder = new LocalBinder();
public class LocalBinder extends Binder {
SampleVoiceInteractionService getService() {
// Return this instance of LocalService so clients can call public methods
return SampleVoiceInteractionService.this;
}
HotwordDetector mDetector;
Callback mCallback;
Bundle mData = new Bundle();
AudioFormat mAudioFormat;
EventPayload mLastPayload;
private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) {
int audioRecordBufferSize = getBufferSizeInBytes(bytesPerSecond,
AUDIO_RECORD_BUFFER_DURATION.getSeconds());
Log.d(TAG, "creating AudioRecord: bytes=" + audioRecordBufferSize
+ ", lengthSeconds=" + (audioRecordBufferSize / bytesPerSecond));
return new AudioRecord.Builder()
.setAudioAttributes(
new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
.build())
.setAudioFormat(eventPayload.getCaptureAudioFormat())
.setBufferSizeInBytes(audioRecordBufferSize)
.setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent())
.build();
}
private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) {
return (int) (bytesPerSecond * bufferLengthSeconds);
}
@Override
@@ -69,19 +90,13 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
return super.onBind(intent);
}
HotwordDetector mDetector;
Callback mCallback;
Bundle mData = new Bundle();
AudioFormat mAudioFormat;
EventPayload mLastPayload;
@Override
public void onReady() {
super.onReady();
Log.i(TAG, "onReady");
mCallback = new Callback();
mDetector = createAlwaysOnHotwordDetector(KEYPHRASE, Locale.US, null, null, mCallback);
mDetector = createAlwaysOnHotwordDetector(DSP_MODEL_KEYPHRASE, DSP_MODEL_LOCALE, null, null,
mCallback);
}
@Override
@@ -90,6 +105,13 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
Log.i(TAG, "onShutdown");
}
public class LocalBinder extends Binder {
SampleVoiceInteractionService getService() {
// Return this instance of LocalService so clients can call public methods
return SampleVoiceInteractionService.this;
}
}
class Callback extends AlwaysOnHotwordDetector.Callback {
private boolean mAvailable = false;
@@ -110,7 +132,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
Intent enrollIntent = null;
try {
enrollIntent = ((AlwaysOnHotwordDetector) mDetector).createEnrollIntent();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
if (enrollIntent == null) {
@@ -131,14 +153,16 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
public void onRejected(@NonNull HotwordRejectedResult result) {
try {
mDetector.startRecognition();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
}
@Override
public void onDetected(@NonNull EventPayload eventPayload) {
Trace.beginAsyncSection("SampleVoiceInteractionService.onDetected", 0);
onDetected(eventPayload, false);
Trace.endAsyncSection("SampleVoiceInteractionService.onDetected", 0);
}
public void onDetected(@NonNull EventPayload eventPayload, boolean generateSessionId) {
@@ -150,7 +174,10 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
eventPayload.getCaptureAudioFormat().getEncoding()));
int sampleRate = eventPayload.getCaptureAudioFormat().getSampleRate();
int bytesPerSecond = BYTES_PER_SAMPLE * sampleRate;
int bytesPerSecond =
eventPayload.getCaptureAudioFormat().getFrameSizeInBytes() * sampleRate;
Trace.beginAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1);
// For Non-trusted:
// Integer captureSession = 0;
@@ -163,20 +190,25 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
// int sessionId = generateSessionId ?
// AudioManager.AUDIO_SESSION_ID_GENERATE : captureSession;
// AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond, sessionId);
AudioRecord record = createAudioRecord(eventPayload, bytesPerSecond);
if (record.getState() != AudioRecord.STATE_INITIALIZED) {
Trace.endAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1);
Trace.setCounter("SampleVoiceInteractionService AudioRecord.STATE_INITIALIZED",
record.getState());
Log.e(TAG, "Failed to init first AudioRecord.");
try {
mDetector.startRecognition();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
return;
}
byte[] buffer = new byte[bytesPerSecond * 6];
byte[] buffer = new byte[bytesPerSecond * (int) AUDIO_READ_DURATION.getSeconds()];
record.startRecording();
int numBytes = AudioUtils.read(record, bytesPerSecond, 5, buffer);
int numBytes = AudioUtils.read(record, bytesPerSecond, AUDIO_READ_DURATION.getSeconds(),
buffer);
// try {
// Thread.sleep(2000);
@@ -185,6 +217,9 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
// throw new RuntimeException(e);
// }
Trace.endAsyncSection("SampleVoiceInteractionService.createAudioRecord", 1);
Trace.setCounter("SampleVoiceInteractionService Read Complete", numBytes);
record.stop();
record.release();
@@ -195,7 +230,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
try {
mDetector.startRecognition();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
}
@@ -205,7 +240,7 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
Log.i(TAG, "onError");
try {
mDetector.startRecognition();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
}
@@ -227,37 +262,10 @@ public class SampleVoiceInteractionService extends VoiceInteractionService {
if (mAvailable) {
try {
mDetector.startRecognition();
} catch (HotwordDetector.IllegalDetectorStateException e) {
} catch (IllegalDetectorStateException e) {
e.printStackTrace();
}
}
}
}
private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond) {
return new AudioRecord.Builder()
.setAudioAttributes(
new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
.build())
.setAudioFormat(eventPayload.getCaptureAudioFormat())
.setBufferSizeInBytes(getBufferSizeInBytes(bytesPerSecond, 2))
.setSharedAudioEvent(eventPayload.getHotwordDetectedResult().getMediaSyncEvent())
.build();
}
private static AudioRecord createAudioRecord(EventPayload eventPayload, int bytesPerSecond,
int sessionId) {
return new AudioRecord(
new AudioAttributes.Builder()
.setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD)
.build(),
eventPayload.getCaptureAudioFormat(),
getBufferSizeInBytes(bytesPerSecond, 2),
sessionId);
}
private static int getBufferSizeInBytes(int bytesPerSecond, float bufferLengthSeconds) {
return (int) (bytesPerSecond * bufferLengthSeconds);
}
}