±à¼ÍƼö: |
ÎÄÕ½éÉÜÁËÈçºÎʹÓÃPythonµÚÈý·½¿âPyAudio½øÐÐÂó¿Ë·ç¼ÒôÈ»ºó×Ô¶¯²¥·ÅÒѾºÏ³ÉµÄÓïÒôʵÏÖÓïÒô½»»¥»Ø´ð¡£
±¾ÎÄÀ´×ÔÓÚcnblogs £¬ÓÉ»ðÁú¹ûÈí¼þLuca±à¼£¬ÍƼö¡£ |
|
Python ºÜÇ¿´óÆäÔÒò¾ÍÊÇÒòΪËüÅÓ´óµÄÈý·½¿â , ×ÊÔ´ÊǷdz£µÄ·á¸»
, µ±È»Ò²²»»áȱÉÙ¹ØÓÚÒôƵµÄ¿â
¹ØÓÚÒôƵ, PyAudio Õâ¸ö¿â, ¿ÉÒÔʵÏÖ¿ªÆôÂó¿Ë·ç¼Òô, ¿ÉÒÔ²¥·ÅÒôƵÎļþµÈµÈ,´Ë¿ÌÎÒÃDz»È¥Á˽âÆäËûµÄ¹¦ÄÜ,Ö»Á˽âÒ»ÏÂËüÈçºÎʵÏÖ¼ÒôµÄ
Ê×ÏÈÒªÏÈ pip Ò»¸ö PyAudio
pip install pyaudio
Ò».PyAudio ʵÏÖÂó¿Ë·ç¼Òô
È»ºó½¨Á¢Ò»¸öpyÎļþ,¸´ÖÆÈçÏ´úÂë
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 16000
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "Oldboy.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("¿ªÊ¼Â¼Òô,Çë˵»°......")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("¼Òô½áÊø,Çë±Õ×ì!")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close() |
³¢ÊÔÒ»ÏÂ,ÔÚĿ¼ÖгöÏÖÁËÒ»¸ö Oldboy.wav Îļþ , ÌýÒ»Ìý,»¹ÊǺÜÇåÎúµÄÂï
½ÓÏÂÀ´,ÎÒÃǽ«Õâ¶Î¼Òô´úÂë,дÔÚÒ»¸öº¯ÊýÀïÃæ,Èç¹ûҪ¼ÒôµÄ»°¾Íµ÷ÓÃ
½¨Á¢Ò»¸öÎļþ pyrec.py ²¢½«Â¼Òô´úÂëºÍº¯ÊýдÔÚÄÚ
# pyrec.py ÎļþÄÚÈÝ
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 16000
RECORD_SECONDS = 2
def rec(file_name):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("¿ªÊ¼Â¼Òô,Çë˵»°......")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("¼Òô½áÊø,Çë±Õ×ì!")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(file_name, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close() |
rec º¯Êý¾ÍÊÇÎÒÃǵ÷ÓõļÒôº¯Êý,²¢ÇÒ¸øËûÒ»¸öÎļþÃû,Ëû¾Í»á×Ô¶¯½«ÉùÒôдÈëµ½ÎļþÖÐÁË
¶þ.ʵÏÖÒôƵ¸ñʽ×Ô¶¯×ª»» ²¢ µ÷ÓÃÓïÒôʶ±ð
¼ÒôµÄÎÊÌâ½â¾öÁË,¸Ï¿ìºÍ°Ù¶ÈÓïÒôʶ±ð½ÓÔÚÒ»ÆðʹÓÃÒ»ÏÂ:

²»¹ÜÄãµÄ¼ÒôÓжàô¶àôÇåÎú,Äã·¢ÏְٶȸøÄã·µ»ØµÄÓÀÔ¶ÊÇ:
{'err_msg':
'speech quality error.', 'err_no': 3301, 'sn':
'6397933501529645284'} # ÒôÖʲ»ÇåÎú |
Æäʵ²»ÊÇûÌýÇå,¶øÊǰٶÈÖ§³ÖµÄÒôƵ¸ñʽPCM¸ãµÄ¹í
ËùÒÔ,ÎÒÃÇÒª½«Â¼ÖƵÄwavÒôƵÎļþת»»ÎªpcmÎļþ
дһ¸öÎļþ wav2pcm.py Õâ¸öÎļþÀïÃæµÄº¯ÊýÊÇרÃÅΪÎÒÃÇת»»wavÎļþµÄ
ʹÓà os Ä£¿éÖÐµÄ os.system()·½·¨ Õâ¸ö·½·¨ÊÇÖ´ÐÐϵͳÃüÁîÓõÄ,
ÔÚwindowsϵͳÖеÄÃüÁî¾ÍÊÇ cmd ÀïÃæÐ´µÄ¶«Î÷,dir , cd ÕâÀàµÄÃüÁî
# wav2pcm.py
ÎļþÄÚÈÝ
import os
def wav_to_pcm(wav_file):
# ¼ÙÉè wav_file = "ÒôƵÎļþ.wav"
# wav_file.split(".") µÃµ½["ÒôƵÎļþ","wav"]
ÄóöµÚÒ»¸ö½á¹û"ÒôƵÎļþ" Óë ".pcm" Æ´½Ó
µÈµ½½á¹û "ÒôƵÎļþ.pcm"
pcm_file = "%s.pcm" %(wav_file.split(".")[0])
# ¾ÍÊÇ´ËǰÎÒÃÇÔÚcmd´°¿ÚÖÐÊäÈëÃüÁî,ÕâÀïÃæ¾ÍÊÇÔÚÈÃPython°ïÎÒÃÇÔÚcmdÖÐÖ´ÐÐÃüÁî
os.system("ffmpeg -y -i %s -acodec pcm_s16le
-f s16le -ac 1 -ar 16000 %s"%(wav_file,pcm_file))
return pcm_file |
ÕâÑùÎÒÃǾÍÓÐÁ˰ÑwavתΪpcmµÄº¯ÊýÁË , ÔÙÖØÐ¹¹½¨Ò»´ÎÔÛÃǵĴúÂë

Õâ´ÎµÄ·µ»Ø½á¹û»¹Í¦ÈÃÈËÂúÒâµÄÂï
{'corpus_no':
'6569869134617218414', 'err_msg': 'success.',
'err_no': 0, 'result': ['xxx½ÌÓý'], 'sn': '8116162981529666859'} |
Äõ½ÓïÒôʶ±ðµÄ×Ö·û´®ÁË,½ÓÏÂÀ´ÓÃÕâ¶Î×Ö·û´® ÓïÒôºÏ³É, ѧϰÔÛÃÇ˵³öÀ´µÄ»°
Èý.ÓïÒôºÏ³É Óë FFmpeg ²¥·Åmp3 Îļþ
Äõ½×Ö·û´®ÁË,Ö±½Óµ÷ÓÃsynthesis·½·¨È¥ºÏ³É°É

Õâ¶Î´úÂëÏνÓÉÏÒ»¶Î´úÂë,³É¹¦»ñµÃÁË synth.mp3 ÒôƵÎļþ,²¢ÇÒÈ·¶¨ÁËʵÔÚѧϰÎÒÃÇ˵µÄ»°
½ÓÏÂÀ´¾ÍÊÇÈÃÎÒÃǵijÌÐò×Ô¶¯½« synth.mp3 ÒôƵÎļþ²¥·ÅÁË ÆäʵPyAudio Óв¥·ÅµÄ¹¦ÄÜ,µ«ÊDzÙ×÷Óе㸴ÔÓ
ËùÒÔÎÒÃÇ»¹ÊÇÑ¡ÔñÓüòµ¥µÄ·½Ê½½â¾ö¸´ÔÓµÄÎÊÌâ,¾ÍÊÇÕâô¼òµ¥´Ö±©,ÊÇ·ñ»¹¼ÇµÃFFmpeg ÄØ?
FFmpeg Õâ¸öϵͳ¹¤¾ßÖÐ,ÓÐÒ»¸ö ffplay µÄ¹¤¾ßÓÃÀ´´ò¿ª²¢²¥·ÅÒôƵÎļþµÄ,ʹÓ÷½·¨´ó¸ÅÊÇ:
ffplay ÒôƵÎļþ.mp3
½¨Á¢Ò»¸öplaymp3.pyÎļþ, дһ¸ö play_mp3 µÄº¯ÊýÓÃÀ´²¥·ÅÒѾºÏ³ÉµÄÓïÒô
# playmp3.py
ÎļþÄÚÈÝ
import os
def play_mp3(file_name):
os.system("ffplay %s"%(file_name)) |
»Øµ½Ö÷Îļþ,µ÷ÓÃplaymp3.pyÎļþÖÐµÄ play_mp3 º¯Êý

Ö´ÐдúÂë,µ±Äã¿´µ½ : ¿ªÊ¼Â¼Òô,Çë˵»°......
Çë´óÉùµÄ˵³ö: ѧIT ÕÒxxx½ÌÓý
È»ºóÄã¾Í»áÌýµ½,Ò»¸ö½¿µÎµÎÉùÒôÖØ¸´Äã˵µÄ»°
ËÄ.¼òµ¥ÎÊ´ð
Ê×ÏÈÎÒÃÇÒª°Ñ´úÂëÖØÐÂÊáÀíÒ»ÏÂ:
°ÑÓïÒôºÏ³É ÓïÒôʶ±ð²¿·ÖµÄ´úÂë¶ÀÁ¢³Éº¯Êý·Åµ½baidu_ai.pyÎļþÖÐ
# baidu_ai.py
ÎļþÄÚÈÝ
from aip import AipSpeech
# ÕâÀïµÄÈý¸ö²ÎÊý,¶ÔÓ¦ÔÚ°Ù¶ÈÓïÒô´´½¨µÄÓ¦ÓÃÖеÄÈý¸ö²ÎÊý
APP_ID = "xxxxx"
API_KEY = "xxxxxxx"
SECRET_KEY = "xxxxxxxx"
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
def audio_to_text(pcm_file):
# ¶ÁÈ¡Îļþ , ÖÕÓڵõ½ÁËPCMÎļþ
with open(pcm_file, 'rb') as fp:
file_context = fp.read()
# ʶ±ð±¾µØÎļþ
res = client.asr(file_context, 'pcm', 16000,
{
'dev_pid': 1536,
})
# ´Ó×ÖµäÀïÃæ»ñÈ¡"result"µÄvalue ÁбíÖеÚ1¸öÔªËØ,¾ÍÊÇʶ±ð³öÀ´µÄ×Ö·û´®"xxx½ÌÓý"
res_str = res.get("result")[0]
return res_str
def text_to_audio(res_str):
synth_file = "synth.mp3"
synth_context = client.synthesis(res_str, "zh",
1, {
"vol": 5,
"spd": 4,
"pit": 9,
"per": 4
})
with open(synth_file, "wb") as f:
f.write(synth_context)
return synth_file |
È»ºó°ÑÎÒÃǵÄÖ÷Îļþ½øÐÐÒ»ÏÂÐÞ¸Ä
import pyrec
# ¼Òôº¯ÊýÎļþ
import wav2pcm # wavת»»pcm º¯ÊýÎļþ
import baidu_ai # ÓïÒôºÏ³Éº¯Êý,ÓïÒôʶ±ðº¯Êý Îļþ
import playmp3 # ²¥·Åmp3 º¯Êý Îļþ
pyrec.rec("1.wav") # ¼Òô²¢Éú³ÉwavÎļþ,ʹÓ÷½Ê½´«ÈëÎļþÃû
pcm_file = wav2pcm.wav_to_pcm("1.wav")
# ½«wavÎļþ ת»»³ÉpcmÎļþ ·µ»Ø pcmµÄÎļþÃû
res_str = baidu_ai.audio_to_text(pcm_file)
# ½«×ª»»ºóµÄpcmÒôƵÎļþʶ±ð³É ÎÄ×Ö res_str
synth_file = baidu_ai.text_to_audio(res_str)
# ½«res_str ×Ö·û´® ºÏ³ÉÓïÒô ·µ»ØÎļþÃû synth_file
playmp3.play_mp3(synth_file) # ²¥·Å synth_file |
È»ºó¾ÍÊÇ´óÕ¹ºêͼµÄʱºòÁË,Õ¹¿ªÄãÃǵÄÏëÏóÁ¦:
res_str ÊÇ×Ö·û´®,Èç¹û×Ö·û´®µÈÓÚ"Äã½ÐʲôÃû×Ö"µÄʱºò,ÎÒÃǾÍÒª¸øËûÒ»¸ö»Ø´ð:ÎÒµÄÃû×Ö½Ðxxx½ÌÓý
н¨Ò»¸öFAQ.pyµÄÎļþÈ»ºó½¨Á¢Ò»¸öº¯Êýfaq:
# FAQ.py ÎļþÄÚÈÝ
def faq(Q):
if Q == "Äã½ÐʲôÃû×Ö": # ÎÊÌâ
return "ÎÒµÄÃû×ÖÊÇxxx½ÌÓý" # ´ð°¸
return "ÎÒ²»ÖªµÀÄãÔÚ˵ʲô" #ÎÊÌâûÓдð°¸Ê±·µ |
ÔÚÖ÷ÎļþÖе¼ÈëÕâ¸öº¯Êý,²¢½«ÓïÒôʶ±ðºóµÄ×Ö·û´®´«È뺯ÊýÖÐ

ÏÖÔÚÀ´³¢ÊÔÒ»ÏÂ:"Äã½ÐʲôÃû×Ö","Äã½ñÄ꼸ËêÁË"
³É¹¦ÁË,ÏÖÔÚÄã¿ÉÒÔ¶Ô FAQ.py Õâ¸öÎļþ½øÐиü¶àµÄÎÊÌâÆ¥ÅäÁË
»¹ÊÇÄǾ仰,±ðÍæ¶ù»µÁË
˼¿¼Ìâ:
1.ÈçºÎʵÏÖÒ»Ö±ÎÊ´ð²»ÓÃÎÊÒ»´ÎÍ£Ò»´Î?
2.ÎÊÌâÄÇô¶à,ÊDz»ÊÇҪдÕâô¶àÎÊÌâÄØ?
3.Èç¹ûÎÒÎÊÄãÊÇË,ÊDz»ÊÇÒªÖØ¸´Ò²Ò»´Î ÎÒµÄÃû×Ö½ÐXXX µÄ´ð°¸ÄØ? |