Add AGI commands for speech recognition. These mirror the dialplan applications mostly but present the information in a nicer fashion. The SPEECH RECOGNIZE command for example will return the results instead of having to query the dialplan functions.
git-svn-id: http://svn.digium.com/svn/asterisk/trunk@90656 f38db490-d61c-443f-a65b-d21fe96a405b
This commit is contained in:
parent
5ea0aff8a0
commit
7c209702a8
5
CHANGES
5
CHANGES
|
@ -301,6 +301,11 @@ Language Support Changes
|
|||
* Brazilian Portuguese (pt-BR) in VM, and say.c was added
|
||||
* Added support for the Hungarian language for saying numbers, dates, and times.
|
||||
|
||||
AGI Changes
|
||||
-----------
|
||||
* Added SPEECH commands for speech recognition. A complete listing can be found
|
||||
using agi show.
|
||||
|
||||
Miscellaneous
|
||||
-------------
|
||||
* Added the bindaddr option to gtalk.conf.
|
||||
|
|
|
@ -32,6 +32,7 @@ typedef struct agi_state {
|
|||
int audio; /* FD for audio output */
|
||||
int ctrl; /* FD for input control */
|
||||
unsigned int fast:1; /* flag for fast agi or not */
|
||||
struct ast_speech *speech; /* Speech structure for speech recognition */
|
||||
} AGI;
|
||||
|
||||
typedef struct agi_command {
|
||||
|
|
330
res/res_agi.c
330
res/res_agi.c
|
@ -51,6 +51,7 @@ ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
|
|||
#include "asterisk/lock.h"
|
||||
#include "asterisk/strings.h"
|
||||
#include "asterisk/agi.h"
|
||||
#include "asterisk/speech.h"
|
||||
|
||||
#define MAX_ARGS 128
|
||||
#define AGI_NANDFS_RETRY 3
|
||||
|
@ -1337,6 +1338,291 @@ static int handle_setmusic(struct ast_channel *chan, AGI *agi, int argc, char *a
|
|||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechcreate(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
/* If a structure already exists, return an error */
|
||||
if (agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if ((agi->speech = ast_speech_new(argv[2], AST_FORMAT_SLINEAR)))
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
else
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechset(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
/* Check for minimum arguments */
|
||||
if (argc != 3)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
/* Check to make sure speech structure exists */
|
||||
if (!agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ast_speech_change(agi->speech, argv[2], argv[3]);
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechdestroy(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
if (agi->speech) {
|
||||
ast_speech_destroy(agi->speech);
|
||||
agi->speech = NULL;
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
} else {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
}
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechloadgrammar(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
if (argc != 5)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
if (!agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (ast_speech_grammar_load(agi->speech, argv[3], argv[4]))
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
else
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechunloadgrammar(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
if (argc != 4)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
if (!agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (ast_speech_grammar_unload(agi->speech, argv[3]))
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
else
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechactivategrammar(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
if (argc != 4)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
if (!agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (ast_speech_grammar_activate(agi->speech, argv[3]))
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
else
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int handle_speechdeactivategrammar(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
if (argc != 4)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
if (!agi->speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (ast_speech_grammar_deactivate(agi->speech, argv[3]))
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
else
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1\n");
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang, int offset)
|
||||
{
|
||||
struct ast_filestream *fs = NULL;
|
||||
|
||||
if (!(fs = ast_openstream(chan, filename, preflang)))
|
||||
return -1;
|
||||
|
||||
if (offset)
|
||||
ast_seekstream(fs, offset, SEEK_SET);
|
||||
|
||||
if (ast_applystream(chan, fs))
|
||||
return -1;
|
||||
|
||||
if (ast_playstream(fs))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_speechrecognize(struct ast_channel *chan, AGI *agi, int argc, char **argv)
|
||||
{
|
||||
struct ast_speech *speech = agi->speech;
|
||||
char *prompt, dtmf = 0, tmp[4096] = "", *buf = tmp;
|
||||
int timeout = 0, offset = 0, old_read_format = 0, res = 0, i = 0;
|
||||
long current_offset = 0;
|
||||
const char *reason = NULL;
|
||||
struct ast_frame *fr = NULL;
|
||||
struct ast_speech_result *result = NULL;
|
||||
size_t left = sizeof(tmp);
|
||||
time_t start = 0, current;
|
||||
|
||||
if (argc < 4)
|
||||
return RESULT_SHOWUSAGE;
|
||||
|
||||
if (!speech) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
prompt = argv[2];
|
||||
timeout = atoi(argv[3]);
|
||||
|
||||
/* If offset is specified then convert from text to integer */
|
||||
if (argc == 5)
|
||||
offset = atoi(argv[4]);
|
||||
|
||||
/* We want frames coming in signed linear */
|
||||
old_read_format = chan->readformat;
|
||||
if (ast_set_read_format(chan, AST_FORMAT_SLINEAR)) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0\n");
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
/* Setup speech structure */
|
||||
if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
|
||||
ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
|
||||
ast_speech_start(speech);
|
||||
}
|
||||
|
||||
/* Start playing prompt */
|
||||
speech_streamfile(chan, prompt, chan->language, offset);
|
||||
|
||||
/* Go into loop reading in frames, passing to speech thingy, checking for hangup, all that jazz */
|
||||
while (ast_strlen_zero(reason)) {
|
||||
/* Run scheduled items */
|
||||
ast_sched_runq(chan->sched);
|
||||
|
||||
/* See maximum time of waiting */
|
||||
if ((res = ast_sched_wait(chan->sched)) < 0)
|
||||
res = 1000;
|
||||
|
||||
/* Wait for frame */
|
||||
if (ast_waitfor(chan, res) > 0) {
|
||||
if (!(fr = ast_read(chan))) {
|
||||
reason = "hangup";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform timeout check */
|
||||
if ((timeout > 0) && (start > 0)) {
|
||||
time(¤t);
|
||||
if ((current - start) >= timeout) {
|
||||
reason = "timeout";
|
||||
if (fr)
|
||||
ast_frfree(fr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check the speech structure for any changes */
|
||||
ast_mutex_lock(&speech->lock);
|
||||
|
||||
/* See if we need to quiet the audio stream playback */
|
||||
if (ast_test_flag(speech, AST_SPEECH_QUIET) && chan->stream) {
|
||||
current_offset = ast_tellstream(chan->stream);
|
||||
ast_stopstream(chan);
|
||||
ast_clear_flag(speech, AST_SPEECH_QUIET);
|
||||
}
|
||||
|
||||
/* Check each state */
|
||||
switch (speech->state) {
|
||||
case AST_SPEECH_STATE_READY:
|
||||
/* If the stream is done, start timeout calculation */
|
||||
if ((timeout > 0) && ((!chan->stream) || (chan->streamid == -1 && chan->timingfunc == NULL))) {
|
||||
ast_stopstream(chan);
|
||||
time(&start);
|
||||
}
|
||||
/* Write audio frame data into speech engine if possible */
|
||||
if (fr && fr->frametype == AST_FRAME_VOICE)
|
||||
ast_speech_write(speech, fr->data, fr->datalen);
|
||||
break;
|
||||
case AST_SPEECH_STATE_WAIT:
|
||||
/* Cue waiting sound if not already playing */
|
||||
if ((!chan->stream) || (chan->streamid == -1 && chan->timingfunc == NULL)) {
|
||||
ast_stopstream(chan);
|
||||
/* If a processing sound exists, or is not none - play it */
|
||||
if (!ast_strlen_zero(speech->processing_sound) && strcasecmp(speech->processing_sound, "none"))
|
||||
speech_streamfile(chan, speech->processing_sound, chan->language, 0);
|
||||
}
|
||||
break;
|
||||
case AST_SPEECH_STATE_DONE:
|
||||
/* Get the results */
|
||||
speech->results = ast_speech_results_get(speech);
|
||||
/* Change state to not ready */
|
||||
ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
|
||||
reason = "speech";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
ast_mutex_unlock(&speech->lock);
|
||||
|
||||
/* Check frame for DTMF or hangup */
|
||||
if (fr) {
|
||||
if (fr->frametype == AST_FRAME_DTMF) {
|
||||
reason = "dtmf";
|
||||
dtmf = fr->subclass;
|
||||
} else if (fr->frametype == AST_FRAME_CONTROL && fr->subclass == AST_CONTROL_HANGUP) {
|
||||
reason = "hangup";
|
||||
}
|
||||
ast_frfree(fr);
|
||||
}
|
||||
}
|
||||
|
||||
if (!strcasecmp(reason, "speech")) {
|
||||
/* Build string containing speech results */
|
||||
for (result = speech->results; result; result = AST_LIST_NEXT(result, list)) {
|
||||
/* Build result string */
|
||||
ast_build_string(&buf, &left, "%sscore%d=%d text%d=\"%s\" grammar%d=%s", (i > 0 ? " " : ""), i, result->score, i, result->text, i, result->grammar);
|
||||
/* Increment result count */
|
||||
i++;
|
||||
}
|
||||
/* Print out */
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1 (speech) endpos=%ld results=%d %s\n", current_offset, i, tmp);
|
||||
} else if (!strcasecmp(reason, "dtmf")) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1 (digit) digit=%c endpos=%ld\n", dtmf, current_offset);
|
||||
} else if (!strcasecmp(reason, "hangup") || !strcasecmp(reason, "timeout")) {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=1 (%s) endpos=%ld\n", reason, current_offset);
|
||||
} else {
|
||||
ast_agi_fdprintf(chan, agi->fd, "200 result=0 endpos=%ld\n", current_offset);
|
||||
}
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static char usage_setmusic[] =
|
||||
" Usage: SET MUSIC ON <on|off> <class>\n"
|
||||
" Enables/Disables the music on hold generator. If <class> is\n"
|
||||
|
@ -1584,6 +1870,38 @@ static char usage_noop[] =
|
|||
" Usage: NoOp\n"
|
||||
" Does nothing.\n";
|
||||
|
||||
static char usage_speechcreate[] =
|
||||
" Usage: SPEECH CREATE <engine>\n"
|
||||
" Create a speech object to be used by the other Speech AGI commands.\n";
|
||||
|
||||
static char usage_speechset[] =
|
||||
" Usage: SPEECH SET <name> <value>\n"
|
||||
" Set an engine-specific setting.\n";
|
||||
|
||||
static char usage_speechdestroy[] =
|
||||
" Usage: SPEECH DESTROY\n"
|
||||
" Destroy the speech object created by SPEECH CREATE.\n";
|
||||
|
||||
static char usage_speechloadgrammar[] =
|
||||
" Usage: SPEECH LOAD GRAMMAR <grammar name> <path to grammar>\n"
|
||||
" Loads the specified grammar as the specified name.\n";
|
||||
|
||||
static char usage_speechunloadgrammar[] =
|
||||
" Usage: SPEECH UNLOAD GRAMMAR <grammar name>\n"
|
||||
" Unloads the specified grammar.\n";
|
||||
|
||||
static char usage_speechactivategrammar[] =
|
||||
" Usage: SPEECH ACTIVATE GRAMMAR <grammar name>\n"
|
||||
" Activates the specified grammar on the speech object.\n";
|
||||
|
||||
static char usage_speechdeactivategrammar[] =
|
||||
" Usage: SPEECH DEACTIVATE GRAMMAR <grammar name>\n"
|
||||
" Deactivates the specified grammar on the speech object.\n";
|
||||
|
||||
static char usage_speechrecognize[] =
|
||||
" Usage: SPEECH RECOGNIZE <prompt> <timeout> [<offset>]\n"
|
||||
" Plays back given prompt while listening for speech and dtmf.\n";
|
||||
|
||||
/*!
|
||||
* \brief AGI commands list
|
||||
*/
|
||||
|
@ -1625,6 +1943,14 @@ static struct agi_command commands[] = {
|
|||
{ { "tdd", "mode", NULL }, handle_tddmode, "Toggles TDD mode (for the deaf)", usage_tddmode , 0 },
|
||||
{ { "verbose", NULL }, handle_verbose, "Logs a message to the asterisk verbose log", usage_verbose , 1 },
|
||||
{ { "wait", "for", "digit", NULL }, handle_waitfordigit, "Waits for a digit to be pressed", usage_waitfordigit , 0 },
|
||||
{ { "speech", "create", NULL }, handle_speechcreate, "Creates a speech object", usage_speechcreate, 0 },
|
||||
{ { "speech", "set", NULL }, handle_speechset, "Sets a speech engine setting", usage_speechset, 0 },
|
||||
{ { "speech", "destroy", NULL }, handle_speechdestroy, "Destroys a speech object", usage_speechdestroy, 1 },
|
||||
{ { "speech", "load", "grammar", NULL }, handle_speechloadgrammar, "Loads a grammar", usage_speechloadgrammar, 0 },
|
||||
{ { "speech", "unload", "grammar", NULL }, handle_speechunloadgrammar, "Unloads a grammar", usage_speechunloadgrammar, 1 },
|
||||
{ { "speech", "activate", "grammar", NULL }, handle_speechactivategrammar, "Activates a grammar", usage_speechactivategrammar, 0 },
|
||||
{ { "speech", "deactivate", "grammar", NULL }, handle_speechdeactivategrammar, "Deactivates a grammar", usage_speechdeactivategrammar, 0 },
|
||||
{ { "speech", "recognize", NULL }, handle_speechrecognize, "Recognizes speech", usage_speechrecognize, 0 },
|
||||
};
|
||||
|
||||
static AST_RWLIST_HEAD_STATIC(agi_commands, agi_command);
|
||||
|
@ -1637,7 +1963,7 @@ static char *help_workhorse(int fd, char *match[])
|
|||
if (match)
|
||||
ast_join(matchstr, sizeof(matchstr), match);
|
||||
|
||||
ast_cli(fd, "%5.5s %20.20s %s\n","Dead","Command","Description");
|
||||
ast_cli(fd, "%5.5s %30.30s %s\n","Dead","Command","Description");
|
||||
AST_RWLIST_RDLOCK(&agi_commands);
|
||||
AST_RWLIST_TRAVERSE(&agi_commands, e, list) {
|
||||
if (!e->cmda[0])
|
||||
|
@ -1648,7 +1974,7 @@ static char *help_workhorse(int fd, char *match[])
|
|||
ast_join(fullcmd, sizeof(fullcmd), e->cmda);
|
||||
if (match && strncasecmp(matchstr, fullcmd, strlen(matchstr)))
|
||||
continue;
|
||||
ast_cli(fd, "%5.5s %20.20s %s\n", e->dead ? "Yes" : "No" , fullcmd, e->summary);
|
||||
ast_cli(fd, "%5.5s %30.30s %s\n", e->dead ? "Yes" : "No" , fullcmd, e->summary);
|
||||
}
|
||||
AST_RWLIST_UNLOCK(&agi_commands);
|
||||
|
||||
|
|
Reference in New Issue