4 files changed, 67 insertions, 0 deletions
diff --git a/util/bin/mic.stt b/util/bin/mic.stt
new file mode 100755
index 00000000..9236f85c
--- /dev/null
+++ b/util/bin/mic.stt
@@ -0,0 +1,12 @@
+#!/bin/sh
+set -efux
+cd $(dirname $(readlink -f $0))
+. ../lib/stt/google.sh
+duration=${1?please provide duration via \$1}
+lang=${lang:-de-DE}
+export lang
+echo "language is set to $lang"
+echo "will record for '$duration' seconds"
+f=$(record_audio ${duration})
+trap 'rm $f' TERM EXIT HUP
+stt "$f"
diff --git a/util/lib/stt/README.md b/util/lib/stt/README.md
new file mode 100644
index 00000000..be905770
--- /dev/null
+++ b/util/lib/stt/README.md
@@ -0,0 +1,4 @@
+# Speech to Text api wrapper
+
+Because Speech to text is hard™ with FOSS, these libraries utilize the magic of
+the internets to solve this problem.
diff --git a/util/lib/stt/google.sh b/util/lib/stt/google.sh
new file mode 100644
index 00000000..a78579d5
--- /dev/null
+++ b/util/lib/stt/google.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+_get_content_type(){
+    file -b --mime-type "$1"
+}
+_get_audio_rate(){
+    file "$1" | sed -n -e 's/.* \([.0-9]\+\) kHz.*/\1/p' \
+        | awk '{print int($1 *1000)}'
+}
+
+record_audio(){
+    # usage : _record_audio num_seconds
+    # echoes the output file
+    tmpfile=$(mktemp)
+    : ${1?please provide number of seconds to record}
+    arecord -d "$1" -r 16000 -t wav -q -f cd  | flac -s -f - -o "$tmpfile" && echo "$tmpfile"
+}
+stt(){
+    # usage: (lang=de-de stty recorded_file)
+    : ${1? please provide recorded file}
+    infile="$1"
+    lang=${lang:-en-us}
+    _get_content_type "$1" | (! grep -q "x-flac" ) \
+        && echo "infile needs to be in flac format" \
+        && return 1
+    # only flac seems to be working...
+    wget -q -O - \
+        -U 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7' \
+        --post-file "$infile"  \
+        --header "Content-Type: `_get_content_type $infile`; rate=`_get_audio_rate $infile`;" \
+    "http://www.google.com/speech-api/v1/recognize?lang=${lang}&client=chromium&maxresults=1" \
+    | sed -n 's/.*utterance":"\([^"]*\)".*/\1/p'
+
+    # returns {"status":0,"id":"d9269e6f741997161e41a4d441b34ba1-1","hypotheses":[{"utterance":"hallo Welt","confidence":0.7008959}]}
+}
diff --git a/util/t/stt/stt-works-with-espeak b/util/t/stt/stt-works-with-espeak
new file mode 100755
index 00000000..ff39f567
--- /dev/null
+++ b/util/t/stt/stt-works-with-espeak
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+cd $(dirname $(readlink -f $0))
+. ../../lib/stt/google.sh
+tmp=$(mktemp)
+test_str="hello"
+trap "rm $tmp" TERM INT EXIT HUP
+espeak --stdout "$test_str" | flac --totally-silent -f -o "$tmp" -
+
+if stt "$tmp" | egrep "^$test_str\$" >/dev/null ;then
+    echo "ok"
+    exit 0
+else
+    echo "not ok"
+    exit 1
+fi