[android] finished

This commit is contained in:
pengzhendong 2022-09-04 18:13:36 +08:00
parent 88fc9834b1
commit d91cc95edb
6 changed files with 389 additions and 8 deletions

View File

@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.4.1)
project(wekws CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_VERBOSE_MAKEFILE on)
set(build_DIR ${CMAKE_SOURCE_DIR}/../../../build)
file(GLOB ONNXRUNTIME_INCLUDE_DIRS "${build_DIR}/onnxruntime*.aar/headers")
file(GLOB ONNXRUNTIME_LINK_DIRS "${build_DIR}/onnxruntime*.aar/jni/${ANDROID_ABI}")
link_directories(${ONNXRUNTIME_LINK_DIRS})
include_directories(${ONNXRUNTIME_INCLUDE_DIRS})
include_directories(${CMAKE_SOURCE_DIR})
add_library(wekws SHARED
frontend/feature_pipeline.cc
frontend/fft.cc
kws/keyword_spotting.cc
wekws.cc
)
target_link_libraries(wekws PUBLIC onnxruntime)

View File

@ -0,0 +1,119 @@
// Copyright (c) 2022 Zhendong Peng (pzd17@tsinghua.org.cn)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <jni.h>
#include <string>
#include <thread>
#include "frontend/feature_pipeline.h"
#include "kws/keyword_spotting.h"
#include "utils/log.h"
namespace wekws {
std::shared_ptr<KeywordSpotting> spotter;
std::shared_ptr<wenet::FeaturePipelineConfig> feature_config;
std::shared_ptr<wenet::FeaturePipeline> feature_pipeline;
std::string result; // NOLINT
int offset;
void init(JNIEnv* env, jobject, jstring jModelDir) {
const char* pModelDir = env->GetStringUTFChars(jModelDir, nullptr);
std::string modelPath = std::string(pModelDir) + "/wenwen.ort";
spotter = std::make_shared<KeywordSpotting>(modelPath);
feature_config = std::make_shared<wenet::FeaturePipelineConfig>(40, 16000);
feature_pipeline = std::make_shared<wenet::FeaturePipeline>(*feature_config);
}
void reset(JNIEnv *env, jobject) {
offset = 0;
result = "";
spotter->Reset();
}
void accept_waveform(JNIEnv *env, jobject, jshortArray jWaveform) {
jsize size = env->GetArrayLength(jWaveform);
int16_t* waveform = env->GetShortArrayElements(jWaveform, 0);
std::vector<int16_t> v(waveform, waveform + size);
feature_pipeline->AcceptWaveform(v);
LOG(INFO) << "wekws accept waveform in ms: " << int(size / 16);
}
void set_input_finished() {
LOG(INFO) << "wekws input finished";
feature_pipeline->set_input_finished();
}
void spot_thread_func() {
while (true) {
std::vector<std::vector<float>> feats;
feature_pipeline->Read(80, &feats);
std::vector<std::vector<float>> prob;
spotter->Forward(feats, &prob);
float max_hi_xiaowen = 0;
float max_nihao_wenwen = 0;
for (int t = 0; t < prob.size(); t++) {
max_hi_xiaowen = std::max(prob[t][0], max_hi_xiaowen);
max_nihao_wenwen = std::max(prob[t][1], max_nihao_wenwen);
}
float detect_prob = max_hi_xiaowen + max_nihao_wenwen;
result = std::to_string(offset) + "prob: " + std::to_string(detect_prob);
offset += prob.size();
}
}
void start_spot() {
std::thread decode_thread(spot_thread_func);
decode_thread.detach();
}
jstring get_result(JNIEnv *env, jobject) {
LOG(INFO) << "wekws ui result: " << result;
return env->NewStringUTF(result.c_str());
}
} // namespace wekws
JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) {
JNIEnv *env;
if (vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6) != JNI_OK) {
return JNI_ERR;
}
jclass c = env->FindClass("cn/org/wenet/wekws/Spot");
if (c == nullptr) {
return JNI_ERR;
}
static const JNINativeMethod methods[] = {
{"init", "(Ljava/lang/String;)V", reinterpret_cast<void*>(wekws::init)},
{"reset", "()V", reinterpret_cast<void *>(wekws::reset)},
{"acceptWaveform", "([S)V",
reinterpret_cast<void *>(wekws::accept_waveform)},
{"setInputFinished", "()V",
reinterpret_cast<void *>(wekws::set_input_finished)},
{"startSpot", "()V", reinterpret_cast<void *>(wekws::start_spot)},
{"getResult", "()Ljava/lang/String;",
reinterpret_cast<void *>(wekws::get_result)},
};
int rc = env->RegisterNatives(c, methods,
sizeof(methods) / sizeof(JNINativeMethod));
if (rc != JNI_OK) {
return rc;
}
return JNI_VERSION_1_6;
}

View File

@ -1,14 +1,212 @@
package cn.org.wenet.wekws;
import androidx.appcompat.app.AppCompatActivity;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
import android.Manifest;
import android.content.Context;
import android.content.pm.PackageManager;
import android.content.res.AssetManager;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Bundle;
import android.os.Process;
import android.util.Log;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class MainActivity extends AppCompatActivity {
private final int MY_PERMISSIONS_RECORD_AUDIO = 1;
private static final String LOG_TAG = "WEKWS";
private static final int SAMPLE_RATE = 16000; // The sampling rate
private static final int MAX_QUEUE_SIZE = 2500; // 100 seconds audio, 1 / 0.04 * 100
private static final List<String> resource = Arrays.asList("wenwen.ort");
private boolean startRecord = false;
private AudioRecord record = null;
private int miniBufferSize = 0; // 1280 bytes 648 byte 40ms, 0.04s
private final BlockingQueue<short[]> bufferQueue = new ArrayBlockingQueue<>(MAX_QUEUE_SIZE);
public static void assetsInit(Context context) throws IOException {
AssetManager assetMgr = context.getAssets();
// Unzip all files in resource from assets to context.
// Note: Uninstall the APP will remove the resource files in the context.
for (String file : assetMgr.list("")) {
if (resource.contains(file)) {
File dst = new File(context.getFilesDir(), file);
if (!dst.exists() || dst.length() == 0) {
Log.i(LOG_TAG, "Unzipping " + file + " to " + dst.getAbsolutePath());
InputStream is = assetMgr.open(file);
OutputStream os = new FileOutputStream(dst);
byte[] buffer = new byte[4 * 1024];
int read;
while ((read = is.read(buffer)) != -1) {
os.write(buffer, 0, read);
}
os.flush();
}
}
}
}
@Override
public void onRequestPermissionsResult(int requestCode,
String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == MY_PERMISSIONS_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
Log.i(LOG_TAG, "record permission is granted");
initRecorder();
} else {
Toast.makeText(this, "Permissions denied to record audio", Toast.LENGTH_LONG).show();
Button button = findViewById(R.id.button);
button.setEnabled(false);
}
}
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
requestAudioPermissions();
try {
assetsInit(this);
} catch (IOException e) {
Log.e(LOG_TAG, "Error process asset files to file path");
}
TextView textView = findViewById(R.id.textView);
textView.setText("");
Spot.init(getFilesDir().getPath());
Button button = findViewById(R.id.button);
button.setText("Start Record");
button.setOnClickListener(view -> {
if (!startRecord) {
startRecord = true;
startRecordThread();
startSpotThread();
Spot.reset();
Spot.startSpot();
button.setText("Stop Record");
} else {
startRecord = false;
button.setText("Start Record");
}
});
}
private void requestAudioPermissions() {
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
!= PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this,
new String[]{Manifest.permission.RECORD_AUDIO},
MY_PERMISSIONS_RECORD_AUDIO);
} else {
initRecorder();
}
}
private void initRecorder() {
// buffer size in bytes 1280
miniBufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT);
if (miniBufferSize == AudioRecord.ERROR || miniBufferSize == AudioRecord.ERROR_BAD_VALUE) {
Log.e(LOG_TAG, "Audio buffer can't initialize!");
return;
}
if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
// TODO: Consider calling
// ActivityCompat#requestPermissions
// here to request the missing permissions, and then overriding
// public void onRequestPermissionsResult(int requestCode, String[] permissions,
// int[] grantResults)
// to handle the case where the user grants the permission. See the documentation
// for ActivityCompat#requestPermissions for more details.
return;
}
record = new AudioRecord(MediaRecorder.AudioSource.DEFAULT,
SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
miniBufferSize);
if (record.getState() != AudioRecord.STATE_INITIALIZED) {
Log.e(LOG_TAG, "Audio Record can't initialize!");
return;
}
Log.i(LOG_TAG, "Record init okay");
}
private void startRecordThread() {
new Thread(() -> {
VoiceRectView voiceView = findViewById(R.id.voiceRectView);
record.startRecording();
Process.setThreadPriority(Process.THREAD_PRIORITY_AUDIO);
while (startRecord) {
short[] buffer = new short[miniBufferSize / 2];
int read = record.read(buffer, 0, buffer.length);
voiceView.add(calculateDb(buffer));
try {
if (AudioRecord.ERROR_INVALID_OPERATION != read) {
bufferQueue.put(buffer);
}
} catch (InterruptedException e) {
Log.e(LOG_TAG, e.getMessage());
}
Button button = findViewById(R.id.button);
if (!button.isEnabled() && startRecord) {
runOnUiThread(() -> button.setEnabled(true));
}
}
record.stop();
voiceView.zero();
}).start();
}
private double calculateDb(short[] buffer) {
double energy = 0.0;
for (short value : buffer) {
energy += value * value;
}
energy /= buffer.length;
energy = (10 * Math.log10(1 + energy)) / 100;
energy = Math.min(energy, 1.0);
return energy;
}
private void startSpotThread() {
new Thread(() -> {
// Send all data
while (startRecord || bufferQueue.size() > 0) {
try {
short[] data = bufferQueue.take();
// 1. add data to C++ interface
Spot.acceptWaveform(data);
// 2. get partial result
runOnUiThread(() -> {
TextView textView = findViewById(R.id.textView);
textView.setText(Spot.getResult());
});
} catch (InterruptedException e) {
Log.e(LOG_TAG, e.getMessage());
}
}
}).start();
}
}

View File

@ -0,0 +1,15 @@
package cn.org.wenet.wekws;
public class Spot {
static {
System.loadLibrary("wekws");
}
public static native void init(String modelDir);
public static native void reset();
public static native void acceptWaveform(short[] waveform);
public static native void setInputFinished();
public static native void startSpot();
public static native String getResult();
}

View File

@ -2,17 +2,49 @@
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
xmlns:VoiceRect="http://schemas.android.com/apk/res-auto"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<TextView
android:layout_width="wrap_content"
android:id="@+id/textView"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:text="Hello World!"
android:background="#F4F4F4"
android:maxLines="15"
android:minLines="15"
android:text="TextView"
android:textSize="30sp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintHorizontal_bias="0.0"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
app:layout_constraintTop_toTopOf="parent"
app:layout_constraintVertical_bias="0.08" />
<cn.org.wenet.wekws.VoiceRectView
android:id="@+id/voiceRectView"
android:layout_width="0dp"
android:layout_height="150dp"
VoiceRect:RectCount="50"
VoiceRect:RectDownColor="@color/green"
VoiceRect:RectOffset="0"
VoiceRect:RectSpeed="300"
VoiceRect:RectTopColor="@color/green"
app:layout_constraintBottom_toTopOf="@+id/button"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toBottomOf="@+id/textView" />
<Button
android:id="@+id/button"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:text="Button"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintHorizontal_bias="1.0"
app:layout_constraintStart_toStartOf="parent" />
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <string>
@ -50,11 +49,10 @@ int main(int argc, char* argv[]) {
std::vector<std::vector<float>> prob;
spotter.Forward(feats, &prob);
for (int i = 0; i < prob.size(); i++) {
std::cout << "frame " << offset + i << " prob";
for (int j = 0; j < prob[i].size(); j++) {
std::cout << " " << prob[i][j];
if (prob[i][0] > 0.1 || prob[i][1] > 0.1) {
std::cout << "frame " << offset + i << " prob " << prob[i][0] << " "
<< prob[i][1] << std::endl;
}
std::cout << std::endl;
}
// Reach the end of feature pipeline
if (!ok) break;