SmartAudio/package/avs/libsensory/files/test/main.cpp

252 lines
7.9 KiB
C++

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <snsr.h>
#define CHUNK_SAMPLES 160
SnsrSession m_session;
static std::string getSensoryDetails(SnsrSession session, SnsrRC result) {
std::string message;
// It is recommended by Sensory to prefer snsrErrorDetail() over snsrRCMessage() as it provides more details.
if (session) {
message = snsrErrorDetail(session);
} else {
message = snsrRCMessage(result);
}
if (message.empty()) {
message = "Unrecognized error";
}
return message;
}
static SnsrRC keyWordDetectedCallback(SnsrSession s, const char* key, void* userData)
{
static int count = 0;
SnsrRC result;
const char* keyword;
double begin;
double end;
result = snsrGetDouble(s, SNSR_RES_BEGIN_SAMPLE, &begin);
if (result != SNSR_RC_OK) {
std::cout << "keyWordDetectedCallbackFailed getbegin " << getSensoryDetails(s, result) << std::endl;
return result;
}
result = snsrGetDouble(s, SNSR_RES_END_SAMPLE, &end);
if (result != SNSR_RC_OK) {
std::cout << "keyWordDetectedCallbackFailed getend " << getSensoryDetails(s, result) << std::endl;
return result;
}
result = snsrGetString(s, SNSR_RES_TEXT, &keyword);
if (result != SNSR_RC_OK) {
std::cout << "keyWordDetectedCallbackFailed keywordRetrievalFailure " << getSensoryDetails(s, result) << std::endl;
return result;
}
std::cout << "keyWordDetected! " << count++ << ",begin: " << (uint64_t)begin << " end:" << (uint64_t)end << " (samples)"<<std::endl;
return SNSR_RC_OK;
}
int snsr_release()
{
snsrRelease(m_session);
return 0;
}
int snsr_init(const char *model, int point)
{
// Allocate the Sensory library handle
SnsrRC result = snsrNew(&m_session);
if (result != SNSR_RC_OK) {
std::cout << "initFailed: snsrNew " << getSensoryDetails(m_session, result) << std::endl;
exit(-1);
}
// Get the expiration date of the library
const char* info = nullptr;
result = snsrGetString(m_session, SNSR_LICENSE_EXPIRES, &info);
if (result == SNSR_RC_OK && info) {
// Will print "License expires on <date>"
std::cout << info << std::endl;
} else {
std::cout << "Sensory library license does not expire." << std::endl;
}
// Check if the expiration date is near, then we should display a warning
result = snsrGetString(m_session, SNSR_LICENSE_WARNING, &info);
if (result == SNSR_RC_OK && info) {
// Will print "License will expire in <days-until-expiration> days."
std::cout << info << std::endl;
} else {
std::cout << "Sensory library license does not expire for at least 60 more days." << std::endl;
}
result = snsrLoad(m_session, snsrStreamFromFileName(model, "r"));
if (result != SNSR_RC_OK) {
std::cout << "initFailed: snsrLoad " << getSensoryDetails(m_session, result) << std::endl;
exit(-1);
}
if(point > 0){
int target;
result = snsrGetInt(m_session, SNSR_OPERATING_POINT, &target);
std::cout << "Sensory model default operating point " << std::to_string(target) << std::endl;
result = snsrSet(m_session, ("operating-point=" + std::to_string(point)).c_str());
if (result != SNSR_RC_OK) {
std::cout <<"error" << getSensoryDetails(m_session, result) << std::endl;
}
result = snsrGetInt(m_session, SNSR_OPERATING_POINT, &target);
std::cout << "Sensory model new operating point " << std::to_string(target) << std::endl;
}
// Setting the callback handler
result = snsrSetHandler(
m_session,
SNSR_RESULT_EVENT,
snsrCallback(keyWordDetectedCallback, nullptr, nullptr));
if (result != SNSR_RC_OK) {
std::cout << "setUpRuntimeSettingsFailed: setKeywordDetectionHandlerFailure " << getSensoryDetails(m_session, result) << std::endl;
exit(-1);
}
/*
* Turns off automatic pipeline flushing that happens when the end of the input stream is reached. This is an
* internal setting recommended by Sensory when audio is presented to Sensory in small chunks.
*/
result = snsrSetInt(m_session, SNSR_AUTO_FLUSH, 0);
if (result != SNSR_RC_OK) {
std::cout << "setUpRuntimeSettingsFailed: disableAutoPipelineFlushingFailed " << getSensoryDetails(m_session, result) << std::endl;
exit(-1);
}
return 0;
}
struct wav_header {
uint32_t riff_id; /*00H ~ 03H*/ //"RIFF"
uint32_t riff_sz; /*04H ~ 07H*/
uint32_t riff_fmt; /*08H ~ 0BH*/ //"WAVE"
uint32_t fmt_id; /*0CH ~ 0FH*/ //"fmt "
uint32_t fmt_sz; /*10H ~ 13H*/ //PCM 16
uint16_t audio_format; /*14H ~ 15H*/ //PCM 1
uint16_t num_channels; /*16H ~ 17H*/ //PCM 1
uint32_t sample_rate;
uint32_t byte_rate;
uint16_t block_align;
uint16_t bits_per_sample;
uint32_t data_id;
uint32_t data_sz;
};
int main(int argc, char *argv[])
{
const char *wav = nullptr;
const char *model = nullptr;
int point = -1;
if(argc < 3) exit(-1);
wav = argv[1];
model = argv[2];
if(argc == 4) point = atoi(argv[3]);
snsr_init(model, point);
//File
printf("open: %s\n", wav);
FILE *fp = fopen(wav, "r");
if(fp == NULL){
printf("fopen error %s\n",strerror(errno));
return -1;
}
char *data;
char *final_data = data;
int re_test = 1;
while(re_test-- > 0) {
struct wav_header header;
int bytes = fread((void*)&header, 1, sizeof(struct wav_header), fp);
if(bytes != sizeof(struct wav_header)) exit(-1);
printf("num_channels = %d\n", header.num_channels);
printf("bits_per_sample = %d\n", header.bits_per_sample);
printf("sample_rate = %d\n", header.sample_rate);
printf("total samples = %d\n", header.data_sz/(header.num_channels*header.bits_per_sample/8));
if(header.num_channels > 1) {
printf("target wav channel must be 1!\n");
exit(-1);
}
if(header.sample_rate != 16000) {
printf("target wav sample rate must be 16000!\n");
exit(-1);
}
int sample_size = header.bits_per_sample/8;
final_data = data = (char*)malloc(CHUNK_SAMPLES * sample_size);
if(sample_size == 4){
final_data = (char*)malloc(CHUNK_SAMPLES * 2);
}
//fseek(fp, 44, SEEK_SET); //skip wav header
int should_break = 1;
int32_t static_readbytes = 0;
while(should_break) {
bytes = fread(data, 1, CHUNK_SAMPLES*sample_size, fp);
if(bytes < 0) {
printf("fread error %s\n",strerror(errno));
exit(-1);
}
if(bytes == 0){
printf("Reach the end of the file? fread bytes %d\n",bytes);
should_break = 0;
continue;
}
//process print
static_readbytes += bytes;
float process = (float)static_readbytes*100/(float)header.data_sz;
printf("\r[%0.2f%%] ", process);
fflush(stdout);
if(sample_size == 4) {
uint16_t *target_samples = (uint16_t*)final_data;
uint32_t *orgin_samples = (uint32_t*)data;
for(int i = 0; i < bytes/sample_size; i++){
target_samples[i] = orgin_samples[i] >> 16;
}
bytes = bytes/2;
}
snsrSetStream(
m_session,
SNSR_SOURCE_AUDIO_PCM,
snsrStreamFromMemory(final_data, bytes, SNSR_ST_MODE_READ));
SnsrRC result = snsrRun(m_session);
switch (result) {
case SNSR_RC_STREAM_END:
// Reached end of buffer without any keyword detections
break;
case SNSR_RC_OK:
std::cout << "SNSR_RC_OK" << std::endl;
break;
default:
// A different return from the callback function that indicates some sort of error
std::cout << "detect: unexpectedReturn " << getSensoryDetails(m_session, result) << std::endl;
exit(-1);
}
snsrClearRC(m_session);
}
}
fclose(fp);
snsr_release();
return 0;
}