Différences
Ci-dessous, les différences entre deux révisions de la page.
| Les deux révisions précédentesRévision précédenteProchaine révision | Révision précédente | ||
| pocketsphinx [Le 15/03/2017, 11:35] – [pocketsphinx_continuous] 138.231.120.146 | pocketsphinx [Le 07/08/2019, 07:10] (Version actuelle) – [À partir des dépôts] 82.234.232.9 | ||
|---|---|---|---|
| Ligne 1: | Ligne 1: | ||
| + | {{tag> | ||
| + | <note important> | ||
| + | ====== Pocketsphinx ====== | ||
| + | |||
| + | **Pocketsphinx** est une librairie permettant d' | ||
| + | |||
| + | ===== Installation ===== | ||
| + | ==== À partir des dépôts ==== | ||
| + | |||
| + | [[: | ||
| + | |||
| + | Ou dans un [[: | ||
| + | < | ||
| + | sudo apt-get install python-pocketsphinx libpocketsphinx1 gstreamer1.0-pocketsphinx python-pyaudio | ||
| + | </ | ||
| + | |||
| + | ==== À partir des sources ==== | ||
| + | D' | ||
| + | < | ||
| + | sudo apt-get build-dep pocketsphinx | ||
| + | sudo apt-get install bison python-pyaudio | ||
| + | </ | ||
| + | |||
| + | === sphinxbase === | ||
| + | |||
| + | On récupère ensuite les sources de **sphinxbase-0.8** | ||
| + | < | ||
| + | wget -O sphinxbase-0.8.tar.gz http:// | ||
| + | </ | ||
| + | On les décompresse | ||
| + | < | ||
| + | tar -xvzf sphinxbase-0.8.tar.gz | ||
| + | </ | ||
| + | On ce rend dans le dossier pour préparer l' | ||
| + | < | ||
| + | cd sphinxbase-0.8/ | ||
| + | </ | ||
| + | On compile | ||
| + | |||
| + | < | ||
| + | ./configure --prefix=/ | ||
| + | make | ||
| + | sudo make install | ||
| + | </ | ||
| + | |||
| + | |||
| + | <note important> | ||
| + | Si vous avez l' | ||
| + | < | ||
| + | import pocketsphinx as ps | ||
| + | File " | ||
| + | ValueError: PyCapsule_GetPointer called with invalid PyCapsule object | ||
| + | </ | ||
| + | il faut régénérer le fichier | ||
| + | < | ||
| + | sudo apt-get install cython | ||
| + | mv python/ | ||
| + | </ | ||
| + | puis relancer la compilation a partir de ./configure | ||
| + | </ | ||
| + | |||
| + | <note tip>Par défaut au moment de la configuration de sphinxbase, si les librairies de compilation de pulseaudio sont installées elles seront utilisées. | ||
| + | Si comme moi vous devez utiliser Alsa, il faut supprimer la librairie pulseaudio : | ||
| + | < | ||
| + | sudo apt-get remove libpulse-dev | ||
| + | |||
| + | </ | ||
| + | </ | ||
| + | |||
| + | === pocketsphinx === | ||
| + | < | ||
| + | wget -O pocketsphinx-0.8.tar.gz http:// | ||
| + | tar -xvzf pocketsphinx-0.8.tar.gz | ||
| + | cd pocketsphinx-0.8 | ||
| + | ./configure --prefix=/ | ||
| + | make | ||
| + | sudo make install | ||
| + | </ | ||
| + | |||
| + | === Modèles français === | ||
| + | |||
| + | Téléchargement: | ||
| + | < | ||
| + | wget -O lium_french_f0.tar.gz http:// | ||
| + | tar -xvzf lium_french_f0.tar.gz | ||
| + | cd lium_french_f0/ | ||
| + | sudo mkdir -p `pkg-config --variable=modeldir pocketsphinx`/ | ||
| + | sudo mv * `pkg-config --variable=modeldir pocketsphinx`/ | ||
| + | |||
| + | wget -O french3g62K.lm.dmp http:// | ||
| + | sudo mkdir -p `pkg-config --variable=modeldir pocketsphinx`/ | ||
| + | sudo mv french3g62K.lm.dmp `pkg-config --variable=modeldir pocketsphinx`/ | ||
| + | |||
| + | wget -O frenchWords62K.dic http:// | ||
| + | sudo mv frenchWords62K.dic `pkg-config --variable=modeldir pocketsphinx`/ | ||
| + | </ | ||
| + | |||
| + | ===== Bon alors, est ce que ça marche ? ===== | ||
| + | ==== pocketsphinx_continuous ==== | ||
| + | Vous pouvez alors lancer la reconnaissance vocale depuis le micro directement avec la commande pocketsphinx_continuous. | ||
| + | Il faut simplement préciser un dictionnaire à utiliser, un modèle de langage et un modèle de Markov caché (Hidden Markov Model ou HMM). | ||
| + | Si vous avez installé pocketsphinx avec le gestionnaire de paquets, le répertoire contenant les modèles est / | ||
| + | Pour utiliser les modèles en français que vous venez de télécharger en suivant les instructions ci-dessus, il faut exécuter la commande : | ||
| + | < | ||
| + | pocketsphinx_continuous -dict / | ||
| + | </ | ||
| + | |||
| + | Si vous l'avez compilé depuis les sources comme indiqué plus haut, le répertoire contenant les modèles est / | ||
| + | |||
| + | < | ||
| + | pocketsphinx_continuous -dict / | ||
| + | </ | ||
| + | |||
| + | Plutôt que d' | ||
| + | |||
| + | |||
| + | ==== scripts python ==== | ||
| + | === lister tous les périphériques audio === | ||
| + | |||
| + | [[http:// | ||
| + | <code language=' | ||
| + | # | ||
| + | """ | ||
| + | PyAudio Example: | ||
| + | |||
| + | Query and print PortAudio HostAPIs, Devices, and their | ||
| + | support rates. | ||
| + | """ | ||
| + | |||
| + | import pyaudio | ||
| + | |||
| + | standard_sample_rates = [8000.0, 9600.0, 11025.0, 12000.0, | ||
| + | | ||
| + | | ||
| + | | ||
| + | |||
| + | p = pyaudio.PyAudio() | ||
| + | max_apis = p.get_host_api_count() | ||
| + | max_devs = p.get_device_count() | ||
| + | |||
| + | print(" | ||
| + | print(" | ||
| + | print(" | ||
| + | print(" | ||
| + | print(" | ||
| + | |||
| + | print(" | ||
| + | |||
| + | for i in range(max_apis): | ||
| + | apiinfo = p.get_host_api_info_by_index(i) | ||
| + | for k in list(apiinfo.items()): | ||
| + | print(" | ||
| + | print(" | ||
| + | |||
| + | print(" | ||
| + | |||
| + | for i in range(max_devs): | ||
| + | devinfo = p.get_device_info_by_index(i) | ||
| + | |||
| + | # print out device parameters | ||
| + | for k in list(devinfo.items()): | ||
| + | name, value = k | ||
| + | |||
| + | # if host API, then get friendly name | ||
| + | |||
| + | if name == ' | ||
| + | value = str(value) + \ | ||
| + | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
| + | print(" | ||
| + | |||
| + | # print out supported format rates | ||
| + | |||
| + | input_supported_rates = [] | ||
| + | output_supported_rates = [] | ||
| + | full_duplex_rates = [] | ||
| + | |||
| + | for f in standard_sample_rates: | ||
| + | |||
| + | if devinfo[' | ||
| + | try: | ||
| + | if p.is_format_supported( | ||
| + | f, | ||
| + | input_device = devinfo[' | ||
| + | input_channels = devinfo[' | ||
| + | input_format = pyaudio.paInt16): | ||
| + | input_supported_rates.append(f) | ||
| + | except ValueError: | ||
| + | pass | ||
| + | |||
| + | if devinfo[' | ||
| + | try: | ||
| + | if p.is_format_supported( | ||
| + | f, | ||
| + | output_device = devinfo[' | ||
| + | output_channels = devinfo[' | ||
| + | output_format = pyaudio.paInt16): | ||
| + | output_supported_rates.append(f) | ||
| + | except ValueError: | ||
| + | pass | ||
| + | |||
| + | if (devinfo[' | ||
| + | | ||
| + | try: | ||
| + | if p.is_format_supported( | ||
| + | f, | ||
| + | input_device = devinfo[' | ||
| + | input_channels = devinfo[' | ||
| + | input_format = pyaudio.paInt16, | ||
| + | output_device = devinfo[' | ||
| + | output_channels = devinfo[' | ||
| + | output_format = pyaudio.paInt16): | ||
| + | full_duplex_rates.append(f) | ||
| + | except ValueError: | ||
| + | pass | ||
| + | |||
| + | if len(input_supported_rates): | ||
| + | print(" | ||
| + | if len(output_supported_rates): | ||
| + | print(" | ||
| + | if len(full_duplex_rates): | ||
| + | print(" | ||
| + | |||
| + | print(" | ||
| + | |||
| + | print(" | ||
| + | try: | ||
| + | def_index = p.get_default_input_device_info()[' | ||
| + | print(" | ||
| + | devinfo = p.get_device_info_by_index(def_index) | ||
| + | for k in list(devinfo.items()): | ||
| + | name, value = k | ||
| + | if name == ' | ||
| + | value = str(value) + \ | ||
| + | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
| + | print(" | ||
| + | print(" | ||
| + | except IOError as e: | ||
| + | print(" | ||
| + | |||
| + | try: | ||
| + | def_index = p.get_default_output_device_info()[' | ||
| + | print(" | ||
| + | devinfo = p.get_device_info_by_index(def_index) | ||
| + | for k in list(devinfo.items()): | ||
| + | name, value = k | ||
| + | if name == ' | ||
| + | value = str(value) + \ | ||
| + | " (%s)" % p.get_host_api_info_by_index(k[1])[' | ||
| + | print(" | ||
| + | print(" | ||
| + | except IOError as e: | ||
| + | print(" | ||
| + | |||
| + | p.terminate() | ||
| + | |||
| + | </ | ||
| + | === essayer de faire une reconnaissance vocale === | ||
| + | |||
| + | [[http:// | ||
| + | |||
| + | <code language=' | ||
| + | # | ||
| + | |||
| + | import sys,os | ||
| + | import pyaudio | ||
| + | import wave | ||
| + | |||
| + | hmdir = "/ | ||
| + | lmd = "/ | ||
| + | dictd = "/ | ||
| + | |||
| + | def decodeSpeech(hmmd, | ||
| + | |||
| + | import pocketsphinx as ps | ||
| + | import sphinxbase | ||
| + | |||
| + | speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp) | ||
| + | wavFile = file(wavfile,' | ||
| + | wavFile.seek(44) | ||
| + | speechRec.decode_raw(wavFile) | ||
| + | result = speechRec.get_hyp() | ||
| + | |||
| + | return result[0] | ||
| + | |||
| + | #CHUNK = 1024 | ||
| + | CHUNK = 512 | ||
| + | #FORMAT = pyaudio.paInt16 | ||
| + | FORMAT = pyaudio.paALSA | ||
| + | CHANNELS = 1 | ||
| + | RATE = 16000 | ||
| + | #RATE = 44100 | ||
| + | RECORD_SECONDS = 10 | ||
| + | |||
| + | for x in range(10): | ||
| + | fn = " | ||
| + | p = pyaudio.PyAudio() | ||
| + | stream = p.open(format=FORMAT, | ||
| + | print(" | ||
| + | frames = [] | ||
| + | print str(RATE / CHUNK * RECORD_SECONDS) + " size\n" | ||
| + | for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): | ||
| + | data = stream.read(CHUNK) | ||
| + | frames.append(data) | ||
| + | print(" | ||
| + | stream.stop_stream() | ||
| + | stream.close() | ||
| + | wf = wave.open(fn, | ||
| + | wf.setnchannels(CHANNELS) | ||
| + | wf.setsampwidth(p.get_sample_size(FORMAT)) | ||
| + | p.terminate() | ||
| + | wf.setframerate(RATE) | ||
| + | wf.writeframes(b'' | ||
| + | wf.close() | ||
| + | wavfile = fn | ||
| + | recognised = decodeSpeech(hmdir, | ||
| + | print recognised | ||
| + | cm = ' | ||
| + | os.system(cm) | ||
| + | </ | ||
| + | ===== Références ===== | ||
| + | |||
| + | * Site officiel : http:// | ||
| + | * Github : https:// | ||
| + | * Python audio : http:// | ||
| + | * http:// | ||
| + | * installer sur raspberry pi : | ||
| + | |||
