Différences
Ci-dessous, les différences entre deux révisions de la page.
Les deux révisions précédentesRévision précédenteProchaine révision | Révision précédente | ||
pocketsphinx [Le 11/01/2014, 22:11] – [test] mmaura | pocketsphinx [Le 07/08/2019, 07:10] (Version actuelle) – [À partir des dépôts] 82.234.232.9 | ||
---|---|---|---|
Ligne 1: | Ligne 1: | ||
+ | {{tag> | ||
+ | <note important> | ||
+ | ====== Pocketsphinx ====== | ||
+ | |||
+ | **Pocketsphinx** est une librairie permettant d' | ||
+ | |||
+ | ===== Installation ===== | ||
+ | ==== À partir des dépôts ==== | ||
+ | |||
+ | [[: | ||
+ | |||
+ | Ou dans un [[: | ||
+ | < | ||
+ | sudo apt-get install python-pocketsphinx libpocketsphinx1 gstreamer1.0-pocketsphinx python-pyaudio | ||
+ | </ | ||
+ | |||
+ | ==== À partir des sources ==== | ||
+ | D' | ||
+ | < | ||
+ | sudo apt-get build-dep pocketsphinx | ||
+ | sudo apt-get install bison python-pyaudio | ||
+ | </ | ||
+ | |||
+ | === sphinxbase === | ||
+ | |||
+ | On récupère ensuite les sources de **sphinxbase-0.8** | ||
+ | < | ||
+ | wget -O sphinxbase-0.8.tar.gz http:// | ||
+ | </ | ||
+ | On les décompresse | ||
+ | < | ||
+ | tar -xvzf sphinxbase-0.8.tar.gz | ||
+ | </ | ||
+ | On ce rend dans le dossier pour préparer l' | ||
+ | < | ||
+ | cd sphinxbase-0.8/ | ||
+ | </ | ||
+ | On compile | ||
+ | |||
+ | < | ||
+ | ./configure --prefix=/ | ||
+ | make | ||
+ | sudo make install | ||
+ | </ | ||
+ | |||
+ | |||
+ | <note important> | ||
+ | Si vous avez l' | ||
+ | < | ||
+ | import pocketsphinx as ps | ||
+ | File " | ||
+ | ValueError: PyCapsule_GetPointer called with invalid PyCapsule object | ||
+ | </ | ||
+ | il faut régénérer le fichier | ||
+ | < | ||
+ | sudo apt-get install cython | ||
+ | mv python/ | ||
+ | </ | ||
+ | puis relancer la compilation a partir de ./configure | ||
+ | </ | ||
+ | |||
+ | <note tip>Par défaut au moment de la configuration de sphinxbase, si les librairies de compilation de pulseaudio sont installées elles seront utilisées. | ||
+ | Si comme moi vous devez utiliser Alsa, il faut supprimer la librairie pulseaudio : | ||
+ | < | ||
+ | sudo apt-get remove libpulse-dev | ||
+ | |||
+ | </ | ||
+ | </ | ||
+ | |||
+ | === pocketsphinx === | ||
+ | < | ||
+ | wget -O pocketsphinx-0.8.tar.gz http:// | ||
+ | tar -xvzf pocketsphinx-0.8.tar.gz | ||
+ | cd pocketsphinx-0.8 | ||
+ | ./configure --prefix=/ | ||
+ | make | ||
+ | sudo make install | ||
+ | </ | ||
+ | |||
+ | === Modèles français === | ||
+ | |||
+ | Téléchargement: | ||
+ | < | ||
+ | wget -O lium_french_f0.tar.gz http:// | ||
+ | tar -xvzf lium_french_f0.tar.gz | ||
+ | cd lium_french_f0/ | ||
+ | sudo mkdir -p `pkg-config --variable=modeldir pocketsphinx`/ | ||
+ | sudo mv * `pkg-config --variable=modeldir pocketsphinx`/ | ||
+ | |||
+ | wget -O french3g62K.lm.dmp http:// | ||
+ | sudo mkdir -p `pkg-config --variable=modeldir pocketsphinx`/ | ||
+ | sudo mv french3g62K.lm.dmp `pkg-config --variable=modeldir pocketsphinx`/ | ||
+ | |||
+ | wget -O frenchWords62K.dic http:// | ||
+ | sudo mv frenchWords62K.dic `pkg-config --variable=modeldir pocketsphinx`/ | ||
+ | </ | ||
+ | |||
+ | ===== Bon alors, est ce que ça marche ? ===== | ||
+ | ==== pocketsphinx_continuous ==== | ||
+ | Vous pouvez alors lancer la reconnaissance vocale depuis le micro directement avec la commande pocketsphinx_continuous. | ||
+ | Il faut simplement préciser un dictionnaire à utiliser, un modèle de langage et un modèle de Markov caché (Hidden Markov Model ou HMM). | ||
+ | Si vous avez installé pocketsphinx avec le gestionnaire de paquets, le répertoire contenant les modèles est / | ||
+ | Pour utiliser les modèles en français que vous venez de télécharger en suivant les instructions ci-dessus, il faut exécuter la commande : | ||
+ | < | ||
+ | pocketsphinx_continuous -dict / | ||
+ | </ | ||
+ | |||
+ | Si vous l'avez compilé depuis les sources comme indiqué plus haut, le répertoire contenant les modèles est / | ||
+ | |||
+ | < | ||
+ | pocketsphinx_continuous -dict / | ||
+ | </ | ||
+ | |||
+ | Plutôt que d' | ||
+ | |||
+ | |||
+ | ==== scripts python ==== | ||
+ | === lister tous les périphériques audio === | ||
+ | |||
+ | [[http:// | ||
+ | <code language=' | ||
+ | # | ||
+ | """ | ||
+ | PyAudio Example: | ||
+ | |||
+ | Query and print PortAudio HostAPIs, Devices, and their | ||
+ | support rates. | ||
+ | """ | ||
+ | |||
+ | import pyaudio | ||
+ | |||
+ | standard_sample_rates = [8000.0, 9600.0, 11025.0, 12000.0, | ||
+ | | ||
+ | | ||
+ | | ||
+ | |||
+ | p = pyaudio.PyAudio() | ||
+ | max_apis = p.get_host_api_count() | ||
+ | max_devs = p.get_device_count() | ||
+ | |||
+ | print(" | ||
+ | print(" | ||
+ | print(" | ||
+ | print(" | ||
+ | print(" | ||
+ | |||
+ | print(" | ||
+ | |||
+ | for i in range(max_apis): | ||
+ | apiinfo = p.get_host_api_info_by_index(i) | ||
+ | for k in list(apiinfo.items()): | ||
+ | print(" | ||
+ | print(" | ||
+ | |||
+ | print(" | ||
+ | |||
+ | for i in range(max_devs): | ||
+ | devinfo = p.get_device_info_by_index(i) | ||
+ | |||
+ | # print out device parameters | ||
+ | for k in list(devinfo.items()): | ||
+ | name, value = k | ||
+ | |||
+ | # if host API, then get friendly name | ||
+ | |||
+ | if name == ' | ||
+ | value = str(value) + \ | ||
+ | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
+ | print(" | ||
+ | |||
+ | # print out supported format rates | ||
+ | |||
+ | input_supported_rates = [] | ||
+ | output_supported_rates = [] | ||
+ | full_duplex_rates = [] | ||
+ | |||
+ | for f in standard_sample_rates: | ||
+ | |||
+ | if devinfo[' | ||
+ | try: | ||
+ | if p.is_format_supported( | ||
+ | f, | ||
+ | input_device = devinfo[' | ||
+ | input_channels = devinfo[' | ||
+ | input_format = pyaudio.paInt16): | ||
+ | input_supported_rates.append(f) | ||
+ | except ValueError: | ||
+ | pass | ||
+ | |||
+ | if devinfo[' | ||
+ | try: | ||
+ | if p.is_format_supported( | ||
+ | f, | ||
+ | output_device = devinfo[' | ||
+ | output_channels = devinfo[' | ||
+ | output_format = pyaudio.paInt16): | ||
+ | output_supported_rates.append(f) | ||
+ | except ValueError: | ||
+ | pass | ||
+ | |||
+ | if (devinfo[' | ||
+ | | ||
+ | try: | ||
+ | if p.is_format_supported( | ||
+ | f, | ||
+ | input_device = devinfo[' | ||
+ | input_channels = devinfo[' | ||
+ | input_format = pyaudio.paInt16, | ||
+ | output_device = devinfo[' | ||
+ | output_channels = devinfo[' | ||
+ | output_format = pyaudio.paInt16): | ||
+ | full_duplex_rates.append(f) | ||
+ | except ValueError: | ||
+ | pass | ||
+ | |||
+ | if len(input_supported_rates): | ||
+ | print(" | ||
+ | if len(output_supported_rates): | ||
+ | print(" | ||
+ | if len(full_duplex_rates): | ||
+ | print(" | ||
+ | |||
+ | print(" | ||
+ | |||
+ | print(" | ||
+ | try: | ||
+ | def_index = p.get_default_input_device_info()[' | ||
+ | print(" | ||
+ | devinfo = p.get_device_info_by_index(def_index) | ||
+ | for k in list(devinfo.items()): | ||
+ | name, value = k | ||
+ | if name == ' | ||
+ | value = str(value) + \ | ||
+ | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
+ | print(" | ||
+ | print(" | ||
+ | except IOError as e: | ||
+ | print(" | ||
+ | |||
+ | try: | ||
+ | def_index = p.get_default_output_device_info()[' | ||
+ | print(" | ||
+ | devinfo = p.get_device_info_by_index(def_index) | ||
+ | for k in list(devinfo.items()): | ||
+ | name, value = k | ||
+ | if name == ' | ||
+ | value = str(value) + \ | ||
+ | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
+ | print(" | ||
+ | print(" | ||
+ | except IOError as e: | ||
+ | print(" | ||
+ | |||
+ | p.terminate() | ||
+ | |||
+ | </ | ||
+ | === essayer de faire une reconnaissance vocale === | ||
+ | |||
+ | [[http:// | ||
+ | |||
+ | <code language=' | ||
+ | # | ||
+ | |||
+ | import sys,os | ||
+ | import pyaudio | ||
+ | import wave | ||
+ | |||
+ | hmdir = "/ | ||
+ | lmd = "/ | ||
+ | dictd = "/ | ||
+ | |||
+ | def decodeSpeech(hmmd, | ||
+ | |||
+ | import pocketsphinx as ps | ||
+ | import sphinxbase | ||
+ | |||
+ | speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp) | ||
+ | wavFile = file(wavfile,' | ||
+ | wavFile.seek(44) | ||
+ | speechRec.decode_raw(wavFile) | ||
+ | result = speechRec.get_hyp() | ||
+ | |||
+ | return result[0] | ||
+ | |||
+ | #CHUNK = 1024 | ||
+ | CHUNK = 512 | ||
+ | #FORMAT = pyaudio.paInt16 | ||
+ | FORMAT = pyaudio.paALSA | ||
+ | CHANNELS = 1 | ||
+ | RATE = 16000 | ||
+ | #RATE = 44100 | ||
+ | RECORD_SECONDS = 10 | ||
+ | |||
+ | for x in range(10): | ||
+ | fn = " | ||
+ | p = pyaudio.PyAudio() | ||
+ | stream = p.open(format=FORMAT, | ||
+ | print(" | ||
+ | frames = [] | ||
+ | print str(RATE / CHUNK * RECORD_SECONDS) + " size\n" | ||
+ | for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): | ||
+ | data = stream.read(CHUNK) | ||
+ | frames.append(data) | ||
+ | print(" | ||
+ | stream.stop_stream() | ||
+ | stream.close() | ||
+ | wf = wave.open(fn, | ||
+ | wf.setnchannels(CHANNELS) | ||
+ | wf.setsampwidth(p.get_sample_size(FORMAT)) | ||
+ | p.terminate() | ||
+ | wf.setframerate(RATE) | ||
+ | wf.writeframes(b'' | ||
+ | wf.close() | ||
+ | wavfile = fn | ||
+ | recognised = decodeSpeech(hmdir, | ||
+ | print recognised | ||
+ | cm = ' | ||
+ | os.system(cm) | ||
+ | </ | ||
+ | ===== Références ===== | ||
+ | |||
+ | * Site officiel : http:// | ||
+ | * Github : https:// | ||
+ | * Python audio : http:// | ||
+ | * http:// | ||
+ | * installer sur raspberry pi : | ||
+ | |||