Skip to content

Commit

Permalink
Add spectrul flux analysis to detect silence
Browse files Browse the repository at this point in the history
  • Loading branch information
evanphx committed Mar 8, 2016
1 parent addd896 commit a59aa37
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 80 deletions.
149 changes: 69 additions & 80 deletions ask.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@ package alexa

import (
"bytes"
"encoding/binary"
"io"
"mime"
"mime/multipart"
"net/http"
"net/textproto"
"os"
"os/exec"
"os/signal"
"sort"
"time"

"github.com/evanphx/alexa/config"
"github.com/evanphx/alexa/portaudio"
"github.com/fatih/color"
)

Expand All @@ -31,6 +29,70 @@ func max(buf []int16) int16 {
return max
}

func avg(buf []int16) int16 {
var tot int64

for _, s := range buf {
if s < 0 {
s = -s
}
tot += int64(s)
}

return int16(tot / int64(len(buf)))
}

const silenceFloor = 327 // int16(0.01 * float64(math.MaxInt16))

func silent(buf []int16) float32 {
var silent int

for _, s := range buf {
if s < silenceFloor && s > -silenceFloor {
silent++
}
}

return 100 * (float32(silent) / float32(len(buf)))
}

func variance(buf []int16) int16 {
a := avg(buf)

var m int16

for _, s := range buf {
diff := s - a

if diff < 0 {
diff = -diff
}

if diff > m {
m = diff
}
}

return m
}

type int16slice []int16

func (s int16slice) Len() int { return len(s) }
func (s int16slice) Less(i, j int) bool { return s[i] < s[j] }
func (s int16slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

func nine95(buf, tmp []int16) int16 {
copy(tmp, buf)

spec := int16slice(tmp)
sort.Sort(spec)

pos := len(buf) - int(float32(len(buf))*0.05)

return tmp[pos]
}

type AskCommand struct {
}

Expand Down Expand Up @@ -69,85 +131,12 @@ func (r *AskCommand) Execute(args []string) error {
}

type ListenOpts struct {
State func(State)
QuietFrames int
State func(State)
QuietDuration time.Duration
}

func Listen(opts ListenOpts) error {
portaudio.Initialize()
defer portaudio.Terminate()

sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, os.Kill)

defer signal.Reset(os.Interrupt, os.Kill)

in := make([]int16, 512)
stream, err := portaudio.OpenDefaultStream(1, 0, 16000, len(in), in)
if err != nil {
return err
}
defer stream.Close()

err = stream.Start()
if err != nil {
return err
}

var (
buf bytes.Buffer
heardSomething bool
quiets int
quietFrames = opts.QuietFrames
)

if quietFrames == 0 {
quietFrames = DefaultQuietFrames
}

if opts.State != nil {
opts.State(Waiting)
}

reader:
for {
err = stream.Read()
if err != nil {
return err
}

err = binary.Write(&buf, binary.LittleEndian, in)
if err != nil {
return err
}

if max(in) > 1000 {
if heardSomething {
if quiets > 0 {
quiets /= 2
}
} else {
heardSomething = true
if opts.State != nil {
opts.State(Listening)
}
}
} else if heardSomething {
quiets++

if quiets == 30 {
break reader
}
}

select {
case <-sig:
break reader
default:
}
}

err = stream.Stop()
buf, err := ListenIntoBuffer(opts)
if err != nil {
return err
}
Expand Down
116 changes: 116 additions & 0 deletions ears.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package alexa

import (
"bytes"
"encoding/binary"
"os"
"os/signal"
"time"

"github.com/evanphx/alexa/portaudio"
)

const DefaultQuietTime = time.Second

func ListenIntoBuffer(opts ListenOpts) (*bytes.Buffer, error) {
portaudio.Initialize()
defer portaudio.Terminate()

sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, os.Kill)

defer signal.Reset(os.Interrupt, os.Kill)

in := make([]int16, 8196)
stream, err := portaudio.OpenDefaultStream(1, 0, 16000, len(in), in)
if err != nil {
return nil, err
}

defer stream.Close()

err = stream.Start()
if err != nil {
return nil, err
}

var (
buf bytes.Buffer
heardSomething bool
quiet bool
quietTime = opts.QuietDuration
quietStart time.Time
lastFlux float64
)

vad := NewVAD(len(in))

if quietTime == 0 {
quietTime = DefaultQuietTime
}

if opts.State != nil {
opts.State(Waiting)
}

reader:
for {
err = stream.Read()
if err != nil {
return nil, err
}

err = binary.Write(&buf, binary.LittleEndian, in)
if err != nil {
return nil, err
}

flux := vad.Flux(in)

if lastFlux == 0 {
lastFlux = flux
continue
}

if heardSomething {
if flux*1.75 <= lastFlux {
if !quiet {
quietStart = time.Now()
} else {
diff := time.Since(quietStart)

if diff > quietTime {
break reader
}
}

quiet = true
} else {
quiet = false
lastFlux = flux
}
} else {
if flux >= lastFlux*1.75 {
heardSomething = true
if opts.State != nil {
opts.State(Listening)
}
}

lastFlux = flux
}

select {
case <-sig:
break reader
default:
}
}

err = stream.Stop()
if err != nil {
return nil, err
}

return &buf, nil
}
44 changes: 44 additions & 0 deletions vad.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package alexa

import (
"math"

"github.com/mjibson/go-dsp/fft"
)

type VAD struct {
samples []complex128
fft []complex128
spectrum []float64
lastSpectrum []float64
}

func NewVAD(width int) *VAD {
return &VAD{
samples: make([]complex128, width),
spectrum: make([]float64, width/2+1),
lastSpectrum: make([]float64, width/2+1),
}
}

func (v *VAD) Flux(samples []int16) float64 {
for i, s := range samples {
v.samples[i] = complex(float64(s), 0)
}

v.fft = fft.FFT(v.samples)
copy(v.spectrum, v.lastSpectrum)

for i, _ := range v.spectrum {
c := v.fft[i]
v.spectrum[i] = math.Sqrt(real(c)*real(c) + imag(c)*imag(c))
}

var flux float64

for i, s := range v.spectrum {
flux += (s - v.lastSpectrum[i])
}

return flux
}

0 comments on commit a59aa37

Please sign in to comment.