added readme,md modified gerammars to work better on linux

master
Jeremy Hayes 2022-02-10 07:19:17 -06:00
parent 3f13cfa421
commit de404e65be
22 changed files with 755 additions and 82 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
__pycache__/
*.py[cod]
dfly-loader-wsr.py
kaldi_model/
vocabulary_config/

66
README.md Normal file
View File

@ -0,0 +1,66 @@
# Dragonfly Grammars for Coding by Voice
[![N|Solid](https://gitlab.com/uploads/-/system/project/avatar/13627082/dragonfly-161745.png?width=64)](https://gitlab.com/onecybernomad/dragonfly-grammars)
A simple way to setup voice coding in Linux, and Windows using Kaldi on Linux and WSR on Windows
_Grammar avalible for Python, Javascript, React.js, HTML, CSS, Java, and C#_
# Setup an installation
- Clone this repo using GIT
- If you haven't installed dragonfly yet run this command
-- For linux
```sh
pip install 'dragonfly2[kaldi]'
```
-- For windows
```sh
pip install dragonfly2
```
- Install a Kaldi model from (https://github.com/daanzu/kaldi-active-grammar/releases)
-- I recommend kaldi_model_daanzu_20211030-biglm.zip
-- If you are feeling risky you can train your own model (http://jrmeyer.github.io/asr/2016/12/15/DNN-AM-Kaldi.html)
-- download and place into the directory you clone the project.
- On Linux you may need install some extra software
```sh
sudo apt install wmctrl xdotool xsel
```
- You may now try it out
```sh
python kaldi_module_loader_plus.py
```
- On some linux distributions you may need to use this command instead
```sh
python3 kaldi_module_loader_plus.py
```
- On Windows you may need to use the python loader py
```sh
py -3.6 kaldi_module_loader_plus.py
```
## How to use
- Once it is running say wake up. If everything is working you should hear a voice prompt stating that it's awake.
- Say enable [language of your choice]. You should hear a voice prompt stating that the language has been activated
- You can combine some grammars just be aware that some may have conflicting rules
-- For example I often combine HTML and Javascript
- You can manipulate the cursor by saying up, down, left, right plus the number of times you want it to move
- You can combine commands for example "shift" + "direction" + "number of moves" or "ctrl" + "up/down" + "number of moves"
## Tech
This project uses:
- [Python] - An awesome and powerful interpreted programming language
- [Dragonfly2] - A framework that allows you to code by voice
- [Festival] - A tts engine
- [Kaldi] - A open source speech recognition tool kit
- WSR - Microsoft Windows built in speech recognition tool kit
- [Tkinter](https://docs.python.org/3/library/tkinter.html) - A GUI for programming user interfaces in Python
## License
MIT
[//]: # (These are reference links used in the body of this note and get stripped out when the markdown processor does its job. There is no need to format nicely because it shouldn't be seen. Thanks SO - http://stackoverflow.com/questions/4823468/store-comments-in-markdown-syntax)
[Python]: <https://www.python.org/>
[Festival]: <https://www.cstr.ed.ac.uk/projects/festival/>
[Dragonfly2]: <https://github.com/dragonflyoss/Dragonfly2>
[Kaldi]: <https://www.kaldi-asr.org/>

View File

@ -2,22 +2,24 @@
# This script includes commands that are to be used for Angular programming
from dragonfly import (Grammar, CompoundRule, Dictation, RuleRef, DictList, DictListRef, Text, Key, AppContext, MappingRule, Function, Sequence, Mimic)
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
# import win32com.client
# speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
def doSomethingToCommand(command):
newCommand = Sequence(command)
newCommand.execute()
class AngularEnabler(CompoundRule):
spec = "Activate Angular" # Spoken form of command.
spec = "enable Angular" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
AngularBootstrap.disable()
AngularGrammar.enable()
s = "Angular JS grammar activated"
print (s)
speaker.Speak(s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class AngularDisabler(CompoundRule):
spec = "switch language" # Spoken form of command.
@ -27,7 +29,8 @@ class AngularDisabler(CompoundRule):
AngularBootstrap.enable()
s = "Angular JS grammar deactivated"
print (s)
speaker.Speak(s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class AngularTestRule(CompoundRule):
spec = "test Angular" # Spoken form of command.

View File

@ -4,7 +4,7 @@
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule, Integer)
class CSEnabler(CompoundRule):
spec = "Enable C sharp" # Spoken form of command.
spec = "Enable charlie sharp" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
csBootstrap.disable()

View File

@ -81,4 +81,3 @@ def unload():
global cssGrammar
if cssGrammar: cssGrammar.unload()
cssGrammar = None

View File

@ -1,4 +1,5 @@
# Author:Brandon Lovrien
# Modified Jeremy Hayes
# This script includes some commands for various useful symbols used in programming
#from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule)
@ -19,8 +20,10 @@ class UsefulStuff(MappingRule):
"enter|slap": Key("enter"),
"east": Key("end"),
"west": Key("home"),
"format": Key("ctrl") + Key("alt") + Key("b"),
"save": Key("ctrl") + Key("s")
"format": Key("ctrl") + Key("alt") + Key("f"),
"save file": Key("ctrl") + Key("s"),
"hash": Key("#"),
"punch": Key(","),
}
globalStuff = Grammar("useful custom global commands") # Create a grammar to contain the command rule.

View File

@ -3,18 +3,21 @@
# This script includes commands used for HTML coding
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule, Choice)
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
# import win32com.client
# speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
class HTMLEnabler(CompoundRule):
spec = "Activate html" # Spoken form of command.
spec = "enable hyper" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
htmlBootstrap.disable()
htmlGrammar.enable()
s = "HTML grammar activated"
print (s)
speaker.Speak(s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class HTMLDisabler(CompoundRule):
spec = "switch language" # Spoken form of command.
@ -23,7 +26,9 @@ class HTMLDisabler(CompoundRule):
htmlBootstrap.enable()
s = "HTML grammar deactivated"
print (s)
speaker.Speak(s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class HTMLTestRule(CompoundRule):
spec = "test HTML" # Spoken form of command.
@ -94,8 +99,7 @@ class HTMLTags(MappingRule):
"body": "body",
"button": "button",
"Canvas": "canvas",
# means the same thing and represents a table caption
"caption": "caption", "table caption": "caption",
"table caption": "caption",
"cite": "cite",
"code": "code",
"table column": "col",
@ -106,7 +110,7 @@ class HTMLTags(MappingRule):
"del": "del",
"details": "details",
"dfn": "dfn",
"div": "div",
"divider": "div",
"dl": "dl",
"dt": "dt",
"em": "em",
@ -116,18 +120,18 @@ class HTMLTags(MappingRule):
"figure": "figure",
"footer": "footer",
"form": "form",
"H1": "h1",
"H2": "h2",
"H3": "h3",
"(header 4 |H4)": "h4",
"H5": "h5",
"H6": "h6",
"header one": "h1",
"header to": "h2",
"header three": "h3",
"header for": "h4",
"header five": "h5",
"header six": "h6",
"head": "head",
"H group": "hgroup",
"HR": "hr",
"header group": "hgroup",
"horizontal rule": "hr",
"HTML": "html",
"I": "i",
"I frame": "iframe",
"italics": "i",
"framed": "iframe",
"input": "input",
"INS": "ins",
"key gen": "keygen",
@ -147,14 +151,13 @@ class HTMLTags(MappingRule):
"option group": "optgroup",
"option": "option",
"output": "output",
"p": "p",
"paragraph": "p",
"parameter": "param",
"pre": "pre",
"progress": "progress",
"g": "g",
"RP": "rp",
"RT": "rt",
"Ruby": "ruby",
"s": "s",
"sample": "samp",
"script": "script",
@ -169,24 +172,20 @@ class HTMLTags(MappingRule):
"summary": "summary",
"super script": "sup",
"table": "table",
"T body": "tbody",
# means the same thing and represents a table cell
"TD": "td", "table cell": "td",
"table body": "tbody",
"table cell": "td",
"text area": "textarea",
"T foot": "tfoot",
# means the same thing and represents a table header
"TH": "th", "table header": "th",
"T head": "thead",
"table foot": "tfoot",
"table header": "th",
"table head": "thead",
"time": "time",
"title": "title",
# means the same thing and represents a table row
"table row": "tr", "TR": "tr",
"table row": "tr",
"track": "track",
"unordered list": "ul",
"variable": "var",
"video": "video",
"label": "label",
}
)
]

View File

@ -4,7 +4,7 @@
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule)
class JavaEnabler(CompoundRule):
spec = "Enable Java" # Spoken form of command.
spec = "Enable Cup Java" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
javaBootstrap.disable()

View File

@ -2,22 +2,25 @@
# This script includes commands that are to be used for JavaScript programming
from dragonfly import (Grammar, CompoundRule, Dictation, RuleRef, DictList, DictListRef, Text, Key, AppContext, MappingRule, Function, Sequence, Mimic)
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
#for windows speech
# import win32com.client
# speaker = win32com.client.Dispatch("SAPI.SpVoice")
# for linux
import os
def doSomethingToCommand(command):
newCommand = Sequence(command)
newCommand.execute()
class JavaScriptEnabler(CompoundRule):
spec = "Activate JS" # Spoken form of command.
spec = "enable java" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
JavaScriptBootstrap.disable()
JavaScriptGrammar.enable()
s = "JavaScript grammar activated"
print (s)
speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class JavaScriptDisabler(CompoundRule):
@ -28,7 +31,9 @@ class JavaScriptDisabler(CompoundRule):
JavaScriptBootstrap.enable()
s = "JavaScript grammar deactivated"
print (s)
speaker.Speak(s)
#speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class JavaScriptTestRule(CompoundRule):
spec = "test JavaScript" # Spoken form of command.

View File

@ -108,10 +108,11 @@ config.cmd.map = Item(
"tab [<n>]": Key("tab:%(n)d"),
"delete [<n>]": release + Key("del:%(n)d"),
"delete [<n> | this] (line|lines)": release + Key("home, s-down:%(n)d, del"),
"backspace [<n>]": release + Key("backspace:%(n)d"),
"back [<n>]": release + Key("backspace:%(n)d"),
"pop up": release + Key("apps"),
"paste": release + Key("c-v"),
"scratch": release + Key("c-z"),
"no scratch": release + Key("c-y"),
"duplicate <n>": release + Key("c-c, c-v:%(n)d"),
"copy": release + Key("c-c"),
"cut": release + Key("c-x"),
@ -121,7 +122,6 @@ config.cmd.map = Item(
"[hold] control": Key("ctrl:down"),
"release control": Key("ctrl:up"),
"release [all]": release,
"say <text>": release + Text("%(text)s"),
"mimic <text>": release + Mimic(extra="text"),
},

View File

@ -74,12 +74,20 @@ def _BEGINNING_dot_note(command): # Callback when command is spoken.
printer.execute()
# Voice command rule for "middle-slash" naming convention.
def middle_slash_format(command): # Callback when command is spoken.
def middle_lines_format(command): # Callback when command is spoken.
textToPrint = command
someString = str(textToPrint)
printer = Text(someString.replace(' ', '-'))
printer.execute()
# Voice command rule for "middle-slash" naming convention.
def middle_slash_format(command): # Callback when command is spoken.
textToPrint = command
someString = str(textToPrint)
printer = Text(someString.replace(' ', '/'))
printer.execute()
# Voice command rule for "spacefree" naming convention.
def SpaceFreeFormat(command): # Callback when command is spoken.
textToPrint = command
@ -92,13 +100,13 @@ class ProgrammingNamingConventions(MappingRule):
mapping = {
#both of these commands do the same thing in terms of name formatting example: testValue
"var <command>": Function(camel_back),
"var <command> <symbol>": Function(camel_back) + Text("%(symbol)s"),
"<symbol> var <command>": Text("%(symbol)s") + Function(camel_back),
"variable <command>": Function(camel_back),
"variable <command> <symbol>": Function(camel_back) + Text("%(symbol)s"),
"<symbol> variable <command>": Text("%(symbol)s") + Function(camel_back),
"camelback <command>": Function(camel_back),
"camelback <command> <symbol>": Function(camel_back) + Text("%(symbol)s"),
"<symbol> camelback <command>": Text("%(symbol)s") + Function(camel_back),
"camelot <command>": Function(camel_back),
"camelot <command> <symbol>": Function(camel_back) + Text("%(symbol)s"),
"<symbol> camelot <command>": Text("%(symbol)s") + Function(camel_back),
#this command capitalizes the 1st letter of each word example: Test Value
"caps first <command>": Function(caps_first_format),
@ -131,15 +139,29 @@ class ProgrammingNamingConventions(MappingRule):
"<symbol> beginning under <command>": Text("%(symbol)s") + Function(_BEGINNING_dot_note),
#example of this command: test-value
"middle lines <command>": Function(middle_slash_format),
"middle lines <command> <symbol>": Function(middle_slash_format) + Text("%(symbol)s"),
"<symbol> middle lines <command>": Text("%(symbol)s") + Function(middle_slash_format),
"middle lines <command>": Function(middle_lines_format),
"middle lines <command> <symbol>": Function(middle_lines_format) + Text("%(symbol)s"),
"<symbol> middle lines <command>": Text("%(symbol)s") + Function(middle_lines_format),
#example of this command: test-value
"middle slash <command>": Function(middle_slash_format),
"middle slash <command> <symbol>": Function(middle_slash_format) + Text("%(symbol)s"),
"<symbol> middle slice <command>": Text("%(symbol)s") + Function(middle_slash_format),
# example of this command: testvalue
"space free <command>": Function(SpaceFreeFormat),
"space free <command> <symbol>": Function(SpaceFreeFormat) + Text("%(symbol)s"),
"<symbol> space free <command>": Text("%(symbol)s") + Function(SpaceFreeFormat),
# symbols
"dash": Text("-"),
"slash": Text("/"),
"back slash": Text("\\"),
"pipe": Text("|"),
"or": Text("||"),
"question": Text("?"),
# Numbers
"zero": Text("0"),
"one": Text("1"),
@ -177,7 +199,7 @@ class ProgrammingNamingConventions(MappingRule):
"uniform|unix": Text("u"),
"victor": Text("v"),
"whiskey": Text("w"),
"(X|x-ray) ": Text("x"),
"(X|xray) ": Text("x"),
"yankee": Text("y"),
"zulu": Text("z"),
@ -205,7 +227,7 @@ class ProgrammingNamingConventions(MappingRule):
"caps uniform|unix": Text("U"),
"caps victor": Text("V"),
"caps whiskey": Text("W"),
"caps (X|x-ray) ": Text("X"),
"caps (X|xray) ": Text("X"),
"caps yankee": Text("Y"),
"caps zulu": Text("Z"),
}

View File

@ -2,8 +2,9 @@
# This script is to be used for programming in the Python programming language
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule)
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
# import win32com.client
# speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
class PythonEnabler(CompoundRule):
spec = "Enable Python" # Spoken command to enable the Python grammar.
@ -13,7 +14,8 @@ class PythonEnabler(CompoundRule):
print ("Python grammar enabled")
s = "Python grammar enabled"
print (s)
speaker.Speak(s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class PythonDisabler(CompoundRule):
spec = "switch language" # spoken command to disable the Python grammar.
@ -22,6 +24,8 @@ class PythonDisabler(CompoundRule):
pythonGrammar.disable()
pythonBootstrap.enable()
print ("Python grammar disabled")
s = "Python grammar disabled"
os.system('echo "{0}" | festival --tts'.format(s))
# This is a test rule to see if the Python grammar is enabled
class PythonTestRule(CompoundRule):

200
_react_grammar.py Normal file
View File

@ -0,0 +1,200 @@
# Author: Jeremy Hayes
# This script includes commands used for React coding
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule, Choice)
# import win32com.client
# speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
class ReactEnabler(CompoundRule):
spec = "enable react" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
reactBootstrap.disable()
reactGrammar.enable()
s = "React grammar activated"
print (s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class ReactDisabler(CompoundRule):
spec = "switch language" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
reactGrammar.disable()
reactBootstrap.enable()
s = "React grammar deactivated"
print (s)
# speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class ReactTestRule(CompoundRule):
spec = "test React" # Spoken form of command.
def _process_recognition(self, node, extras): # Callback when command is spoken.
print ("React grammar tested")
class ReactTags(MappingRule):
mapping = {
"in <tagname> tags": Key("c-x") + Text("<%(tagname)s>") + Key("enter") + Key("c-v") + Key("enter") + Text("</%(tagname)s>"),
"doc type": Text("<!DOCTYPE React>"),
"comment": Text( "<!---->" ) + Key( "left" ) + Key( "left" ) + Key( "left" ),
"tags": Text("<>") + Text("</>"),
"<tagname> tags": Text("<%(tagname)s>") + Text("</%(tagname)s>"),
"single tag": Text("</>"),
"line break": Text( "<br />" ),
"image": Text( "<img />" ),
"equals": Text( "=" ),
"<tagname> kick": Text("</%(tagname)s>") ,#+ Text("</%(tagname)s>"),
# used to specify tag attributes
"attribute": Text( ' attributeName=""' ) + Key( "left" ),
"<attribute> attribute": Text( ' %(attribute)s=""' ) + Key( "left" ),
}
extras = [
Choice("attribute", {
"ID": "id",
"class": "className",
"style": "style",
"title": "title",
"SRC": "src",
"HREF": "href",
"type": "type",
"value": "value",
"name": "name",
"for": "htmlFor",
}
),
Choice("tagname", {
"row": "a-row",
"column": "a-col",
"card": "a-card",
"anchor": "a",
"abbreviation": "abbr",
"address": "address",
"area": "area",
"article": "article",
"aside": "aside",
"audio": "audio",
"bold": "b",
"base": "base",
"BDI": "bdi",
"BDO": "bdo",
"block quote": "blockquote",
"body": "body",
"button": "button",
"Canvas": "canvas",
"table caption": "caption",
"cite": "cite",
"code": "code",
"table column": "col",
"table column group": "colgroup",
"command": "command",
"data list": "datalist",
"definition description": "dd",
"del": "del",
"details": "details",
"dfn": "dfn",
"divider": "div",
"dl": "dl",
"dt": "dt",
"em": "em",
"embed": "embed",
"field set": "fieldset",
"figure caption": "figcaption",
"figure": "figure",
"footer": "footer",
"form": "form",
"header one": "h1",
"header to": "h2",
"header three": "h3",
"header for": "h4",
"header five": "h5",
"header six": "h6",
"head": "head",
"header group": "hgroup",
"horizontal rule": "hr",
"HTML": "html",
"italics": "i",
"framed": "iframe",
"input": "input",
"INS": "ins",
"key gen": "keygen",
"KBD": "kbd",
"label": "label",
"legend": "legend",
"list item": "li",
"Link": "link",
"Mark": "mark",
"menu": "menu",
"meta": "meta",
"meter": "meter",
"nav": "nav",
"no script": "noscript",
"object": "object",
"ordered list": "ol",
"option group": "optgroup",
"option": "option",
"output": "output",
"paragraph": "p",
"parameter": "param",
"pre": "pre",
"progress": "progress",
"g": "g",
"RP": "rp",
"RT": "rt",
"s": "s",
"sample": "samp",
"script": "script",
"section": "section",
"select": "select",
"small": "small",
"source": "source",
"span": "span",
"strong": "strong",
"style": "style",
"sub": "sub",
"summary": "summary",
"super script": "sup",
"table": "table",
"table body": "tbody",
"table cell": "td",
"text area": "textarea",
"table foot": "tfoot",
"table header": "th",
"table head": "thead",
"time": "time",
"title": "title",
"table row": "tr",
"track": "track",
"unordered list": "ul",
"variable": "var",
"video": "video",
"label": "label",
}
)
]
# Code for initial setup of the React grammar
reactBootstrap = Grammar("react bootstrap") # Create a grammar to contain the command rule.
reactBootstrap.add_rule(ReactEnabler())
reactBootstrap.load()
reactGrammar = Grammar("react grammar")
reactGrammar.add_rule(ReactTestRule())
reactGrammar.add_rule(ReactDisabler())
reactGrammar.add_rule(ReactTags())
reactGrammar.load()
reactGrammar.disable()
# Unload function which will be called by natlink at unload time.
def unload():
global reactGrammar
if reactGrammar: reactGrammar.unload()
reactGrammar = None

View File

@ -3,8 +3,7 @@
# This script includes commands used for Terminal coding
from dragonfly import (Grammar, CompoundRule, Dictation, Text, Key, AppContext, MappingRule, Choice)
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
class TerminalEnabler(CompoundRule):
spec = "Activate Terminal" # Spoken form of command.
@ -14,7 +13,8 @@ class TerminalEnabler(CompoundRule):
TerminalGrammar.enable()
s = "Terminal grammar activated"
print (s)
speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class TerminalDisabler(CompoundRule):
spec = "switch language" # Spoken form of command.
@ -23,7 +23,7 @@ class TerminalDisabler(CompoundRule):
TerminalBootstrap.enable()
s = "Terminal grammar deactivated"
print (s)
speaker.Speak(s)
os.system('echo "{0}" | festival --tts'.format(s))
class TerminalTestRule(CompoundRule):
spec = "test Terminal" # Spoken form of command.
@ -35,14 +35,18 @@ class TerminalTestRule(CompoundRule):
class TerminalTags(MappingRule):
mapping = {
"copy": Text("cp"),
"list": Text("ls"),
"move": Text("mv"),
"file copy": Text("cp"),
"file list": Text("ls"),
"file move": Text("mv"),
"force remove <tagname>": Text("rm -rf %(tagname)s"),
"make directory": Text("mkdir"),
"nano": Text("nano" ),
"node package manager": Text("npm" ),
"node": Text("node" ),
"node version manager": Text("nvm" ),
"python": Text("python3" ),
"pip": Text("pip3" ),
"remove": Text( "rm" ),
"dac": Text( "-" ),
# used to specify tag attributes
"attribute": Text( ' attributeName=""' ) + Key( "left" ),

28
app.py Normal file
View File

@ -0,0 +1,28 @@
from tkinter import *
from kaldi_dfly_loader_plus_gui import *
BG_COLOR = "#17202A"
TEXT_COLOR = "#EAECEE"
FONT = "Helvetica 14"
FONT_BOLD = "Helvetica 13 bold"
class GuiApp:
def __init__(self):
self.window = Tk()
self._setup_main_window()
def run(self):
self.window.mainloop()
def _setup_main_window(self):
self.window.title("Speech coding")
self.window.resizable(width=False, height=False)
self.window.configure(width=470,height=200, bg=BG_COLOR)
head_label = Label(self.window, bg=BG_COLOR, fg=TEXT_COLOR, text="Pause, Stop, and Start speech code", font=FONT_BOLD, pady=10)
head_label.place(relwidth=1)
if __name__ == "__main__":
app = GuiApp()
app.run()

169
kaldi-dfly-loader-plus.py Normal file
View File

@ -0,0 +1,169 @@
"""
Command-module loader for Kaldi.
This script is based on 'dfly-loader-wsr.py' written by Christo Butcher and
has been adapted to work with the Kaldi engine instead.
This script can be used to look for Dragonfly command-modules for use with
the Kaldi engine. It scans the directory it's in and loads any ``_*.py`` it
finds.
"""
# TODO Have a simple GUI for pausing, resuming, cancelling and stopping
# recognition, etc
from __future__ import print_function
import logging
import os.path
import os
import sys
import six
from dragonfly import get_engine
from dragonfly import Grammar, MappingRule, Function, Dictation, FuncContext
from dragonfly.loader import CommandModuleDirectory
from dragonfly.log import setup_log
# --------------------------------------------------------------------------
# Set up basic logging.
if False:
# Debugging logging for reporting trouble
logging.basicConfig(level=10)
logging.getLogger('grammar.decode').setLevel(20)
logging.getLogger('grammar.begin').setLevel(20)
logging.getLogger('compound').setLevel(20)
logging.getLogger('kaldi.compiler').setLevel(10)
else:
setup_log()
# --------------------------------------------------------------------------
# User notification / rudimentary UI. MODIFY AS DESIRED
# For message in ('sleep', 'wake')
def notify(message):
if message == 'sleep':
K = "sleeping"
os.system('echo "{0}" | festival --tts'.format(K))
print("Sleeping...")
# get_engine().speak("Sleeping")
elif message == 'wake':
print("Awake...")
K = "I have awakened"
os.system('echo "{0}" | festival --tts'.format(K))
# get_engine().speak("Awake")
# --------------------------------------------------------------------------
# Sleep/wake grammar. (This can be unused or removed if you don't want it.)
sleeping = False
def load_sleep_wake_grammar(initial_awake):
sleep_grammar = Grammar("sleep")
def sleep(force=False):
global sleeping
if not sleeping or force:
sleeping = True
sleep_grammar.set_exclusiveness(True)
notify('sleep')
def wake(force=False):
global sleeping
if sleeping or force:
sleeping = False
sleep_grammar.set_exclusiveness(False)
notify('wake')
class SleepRule(MappingRule):
mapping = {
"wake up": Function(wake) + Function(lambda: get_engine().start_saving_adaptation_state()),
"snore": Function(lambda: get_engine().stop_saving_adaptation_state()) + Function(sleep),
"halt listening": Function(lambda: get_engine().stop_saving_adaptation_state()) + Function(sleep),
}
sleep_grammar.add_rule(SleepRule())
sleep_noise_rule = MappingRule(
name = "sleep_noise_rule",
mapping = { "<text>": Function(lambda text: False and print(text)) },
extras = [ Dictation("text") ],
context = FuncContext(lambda: sleeping),
)
sleep_grammar.add_rule(sleep_noise_rule)
sleep_grammar.load()
if initial_awake:
wake(force=True)
else:
sleep(force=True)
# --------------------------------------------------------------------------
# Main event driving loop.
def main():
logging.basicConfig(level=logging.INFO)
try:
path = os.path.dirname(__file__)
except NameError:
# The "__file__" name is not always available, for example
# when this module is run from PythonWin. In this case we
# simply use the current working directory.
path = os.getcwd()
__file__ = os.path.join(path, "kaldi_module_loader_plus.py")
# Set any configuration options here as keyword arguments.
# See Kaldi engine documentation for all available options and more info.
engine = get_engine('kaldi',
model_dir='kaldi_model', # default model directory
# vad_aggressiveness=3, # default aggressiveness of VAD
# vad_padding_start_ms=150, # default ms of required silence before VAD
# vad_padding_end_ms=150, # default ms of required silence after VAD
# vad_complex_padding_end_ms=500, # default ms of required silence after VAD for complex utterances
# input_device_index=None, # set to an int to choose a non-default microphone
# lazy_compilation=True, # set to True to parallelize & speed up loading
# retain_dir=None, # set to a writable directory path to retain recognition metadata and/or audio data
# retain_audio=None, # set to True to retain speech data wave files in the retain_dir (if set)
)
# Call connect() now that the engine configuration is set.
engine.connect()
# Load grammars.
load_sleep_wake_grammar(False)
directory = CommandModuleDirectory(path, excludes=[__file__])
directory.load()
# Define recognition callback functions.
def on_begin():
print("Speech start detected.")
def on_recognition(words):
message = u"Recognized: %s" % u" ".join(words)
# This only seems to be an issue with Python 2.7 on Windows.
if six.PY2:
encoding = sys.stdout.encoding or "ascii"
message = message.encode(encoding, errors='replace')
print(message)
def on_failure():
print("Sorry, what was that?")
# Start the engine's main recognition loop
engine.prepare_for_recognition()
try:
print("Listening...")
engine.do_recognition(on_begin, on_recognition, on_failure)
except KeyboardInterrupt:
pass
if __name__ == "__main__":
main()

View File

@ -0,0 +1,169 @@
"""
Command-module loader for Kaldi.
This script is based on 'dfly-loader-wsr.py' written by Christo Butcher and
has been adapted to work with the Kaldi engine instead.
This script can be used to look for Dragonfly command-modules for use with
the Kaldi engine. It scans the directory it's in and loads any ``_*.py`` it
finds.
"""
# TODO Have a simple GUI for pausing, resuming, cancelling and stopping
# recognition, etc
from __future__ import print_function
import logging
import os.path
import os
import sys
import six
from dragonfly import get_engine
from dragonfly import Grammar, MappingRule, Function, Dictation, FuncContext
from dragonfly.loader import CommandModuleDirectory
from dragonfly.log import setup_log
# --------------------------------------------------------------------------
# Set up basic logging.
if False:
# Debugging logging for reporting trouble
logging.basicConfig(level=10)
logging.getLogger('grammar.decode').setLevel(20)
logging.getLogger('grammar.begin').setLevel(20)
logging.getLogger('compound').setLevel(20)
logging.getLogger('kaldi.compiler').setLevel(10)
else:
setup_log()
# --------------------------------------------------------------------------
# User notification / rudimentary UI. MODIFY AS DESIRED
# For message in ('sleep', 'wake')
def notify(message):
if message == 'sleep':
K = "sleeping"
os.system('echo "{0}" | festival --tts'.format(K))
print("Sleeping...")
# get_engine().speak("Sleeping")
elif message == 'wake':
print("Awake...")
K = "I have awakened"
os.system('echo "{0}" | festival --tts'.format(K))
# get_engine().speak("Awake")
# --------------------------------------------------------------------------
# Sleep/wake grammar. (This can be unused or removed if you don't want it.)
sleeping = False
def load_sleep_wake_grammar(initial_awake):
sleep_grammar = Grammar("sleep")
def sleep(force=False):
global sleeping
if not sleeping or force:
sleeping = True
sleep_grammar.set_exclusiveness(True)
notify('sleep')
def wake(force=False):
global sleeping
if sleeping or force:
sleeping = False
sleep_grammar.set_exclusiveness(False)
notify('wake')
class SleepRule(MappingRule):
mapping = {
"wake up": Function(wake) + Function(lambda: get_engine().start_saving_adaptation_state()),
"snore": Function(lambda: get_engine().stop_saving_adaptation_state()) + Function(sleep),
"halt listening": Function(lambda: get_engine().stop_saving_adaptation_state()) + Function(sleep),
}
sleep_grammar.add_rule(SleepRule())
sleep_noise_rule = MappingRule(
name = "sleep_noise_rule",
mapping = { "<text>": Function(lambda text: False and print(text)) },
extras = [ Dictation("text") ],
context = FuncContext(lambda: sleeping),
)
sleep_grammar.add_rule(sleep_noise_rule)
sleep_grammar.load()
if initial_awake:
wake(force=True)
else:
sleep(force=True)
# --------------------------------------------------------------------------
# Main event driving loop.
def main():
logging.basicConfig(level=logging.INFO)
try:
path = os.path.dirname(__file__)
except NameError:
# The "__file__" name is not always available, for example
# when this module is run from PythonWin. In this case we
# simply use the current working directory.
path = os.getcwd()
__file__ = os.path.join(path, "kaldi_module_loader_plus.py")
# Set any configuration options here as keyword arguments.
# See Kaldi engine documentation for all available options and more info.
engine = get_engine('kaldi',
model_dir='kaldi_model', # default model directory
# vad_aggressiveness=3, # default aggressiveness of VAD
# vad_padding_start_ms=150, # default ms of required silence before VAD
# vad_padding_end_ms=150, # default ms of required silence after VAD
# vad_complex_padding_end_ms=500, # default ms of required silence after VAD for complex utterances
# input_device_index=None, # set to an int to choose a non-default microphone
# lazy_compilation=True, # set to True to parallelize & speed up loading
# retain_dir=None, # set to a writable directory path to retain recognition metadata and/or audio data
# retain_audio=None, # set to True to retain speech data wave files in the retain_dir (if set)
)
# Call connect() now that the engine configuration is set.
engine.connect()
# Load grammars.
load_sleep_wake_grammar(True)
directory = CommandModuleDirectory(path, excludes=[__file__])
directory.load()
# Define recognition callback functions.
def on_begin():
print("Speech start detected.")
def on_recognition(words):
message = u"Recognized: %s" % u" ".join(words)
# This only seems to be an issue with Python 2.7 on Windows.
if six.PY2:
encoding = sys.stdout.encoding or "ascii"
message = message.encode(encoding, errors='replace')
print(message)
def on_failure():
print("Sorry, what was that?")
# Start the engine's main recognition loop
engine.prepare_for_recognition()
try:
print("Listening...")
engine.do_recognition(on_begin, on_recognition, on_failure)
except KeyboardInterrupt:
pass
if __name__ == "__main__":
main()