610 语音学习笔记
1. 录音的 pcm 文件直接播放,使用:
#!/bin/bash
play -t raw -r 44.1k -e signed-integer -b 16 -c 2 loved.pcm
play -t raw -r 48k -e floating-point -b 32 -c 2 ./data_decode/out.pcm
参考: https://blog.csdn.net/lc999102/article/details/80579866
2. json.h 没有相应的头文件。json.h, curl.h
sudo apt-get install libjsoncpp-dev
sudo ln -s /usr/include/jsoncpp/json/ /usr/include/json
sudo apt install libcurl4-openssl-dev
sudo ln -s /usr/include/x86_64-linux-gnu/curl /usr/include/curl
sudo apt-get install libopencv-dev
参考: https://blog.csdn.net/zhangpeterx/article/details/92175479
3. QCoreApplication 找不到定义的地方。
QCoreApplication 在 5.14.2/Src/qtbase/src/corelib/kernel/qcoreapplication.h 里面,定义为 class Q_CORE_EXPORT QCoreApplication。 参考: https://www.cnblogs.com/lyggqm/p/6281581.html
4. ffmpeg 的 cmake 配置
cmake_minimum_required(VERSION 3.10)
project(ffmpeg_test)
set(SRC_LIST main.cpp)
include_directories("/usr/include/x86_64-linux-gnu")
link_directories("/usr/lib/x86_64-linux-gnu")
add_executable(ffmpeg_test ${SRC_LIST})
#target_link_libraries(${PROJECT_NAME} libavutil.so libavcodec.so libavformat.so libavdevice.so.57 libavfilter.so libswscale.so libpostproc.so)
#target_link_libraries(${PROJECT_NAME} libavutil.so libavcodec.so libavformat.so libswscale.so)
target_link_libraries(${PROJECT_NAME} avutil avcodec avformat swscale)
参考: https://blog.csdn.net/wangchao1412/article/details/103454371 https://www.jianshu.com/p/72cdcb8d06a7 https://blog.csdn.net/BigDream123/article/details/89741253
5. 使用百度的 tts,需要安装百度 aip sdk
pip3 install baidu-aip --user
参考: https://blog.csdn.net/m0_37886429/article/details/85222593
6. pcm 和 wav 互转
参考: https://blog.csdn.net/sinat_37816910/article/details/105054372 https://blog.csdn.net/huplion/article/details/81260874
7. alsaaudio 中的 openPCM 这个参数的顺序有问题,不要按照 api 上面的顺序写。全部用关键词的方式去写,就没有问题。
try:
self.__alsaDev = alsaaudio.PCM(type = alsaaudio.PCM_PLAYBACK, mode = alsaaudio.PCM_NORMAL, rate = 16000, channels = 8, format = alsaaudio.PCM_FORMAT_S16_LE, periodsize = 160, device = "plughw:" + self.__devName)
except Exception as e:
print("alsaaudio open pcm exception: ", e)
8. 其他格式转换为 wav 格式,使用 pydub 中的 AudioSegment
@staticmethod
def extractToWave(srcPath, destDir = None, destPrefix = None):
(srcDir, fileName) = os.path.split(srcPath)
(fileNoExt, ext) = os.path.splitext(fileName)
if ext == ".wav":
return srcPath
if destDir is None:
destDir = srcDir
if destPrefix is None:
destPrefix = ""
if not os.path.exists(destDir):
os.makedirs(destDir)
destName = destPrefix + fileNoExt + ".wav"
destPath = destDir + "/" + destName
#print(destPath)
if ext == ".mp3":
data = AudioSegment.from_mp3(srcPath)
else:
return None
data.export(destPath, format = "wav")
return destPath
参考: https://www.cnblogs.com/xingshansi/p/6799994.html https://ithelp.ithome.com.tw/articles/10252078 https://blog.csdn.net/baidu_29198395/article/details/86694365
9. alsaaudio 播放 wav 格式
def playThreadWav(self, path, index):
if self.__alsaDev:
self.__alsaDev.close()
print("alsa playback wav thread run: %d" % index)
with wave.open(path, 'rb') as f:
self.__rate = f.getframerate()
self.__channels = f.getnchannels()
self.__depthBits = f.getsampwidth() * 8
self.__format = self.bitsToFormat(self.__depthBits)
self.__periodSize = int(self.__rate / 100)
try:
self.__alsaDev = alsaaudio.PCM(type = alsaaudio.PCM_PLAYBACK,
mode = alsaaudio.PCM_NORMAL,
rate = self.__rate,
channels = self.__channels,
format = self.__format,
periodsize = self.__periodSize,
device = "plughw:" + self.__devName)
except Exception as e:
print("alsaaudio open exception: ", e)
if self.__alsaDev is None:
print("open alsa audio device failed")
self.clearThreadParam(index)
return "finished"
data = f.readframes(self.__periodSize)
while data and self.__eStop == False:
try:
self.__alsaDev.write(data)
except ALSAAudioError as e:
print("alsa audio play except: ", e)
break
data = f.readframes(self.__periodSize)
self.afterThreadComplete(index)
return "finished"
def clearThreadParam(self, index):
del self.__poolDict[index]
self.__rate = 0
self.__channels = 0
self.__depthBits = 0
self.__format = 0
self.__periodSize = 0
def afterThreadComplete(self, index):
self.__alsaDev.close()
self.__alsaDev = None
if index in self.__hookDict:
if self.__eStop != True:
self.__hookDict[index]()
del self.__hookDict[index]
self.clearThreadParam(index)
参考: https://www.programcreek.com/python/example/91453/alsaaudio.PCM
10. 使用 websocket 的时候, pip3 install --user websocket-client 而不是 websocket
11. pcm 转 wav
(file, ext) = os.path.splitext(path)
wavPath = file + ".wav"
EspAudioUtil.pcmToWave(path, wavPath, rate, channels, bits)
os.remove(path)
参考: https://stackoverflow.com/questions/16111038/how-to-convert-pcm-files-to-wav-files-scripting
12. 多通道音频抽取单通道数据
@staticmethod
def pcmExtractOneChannal(multiChannArray, channels, index):
array = multiChannArray
array.shape = -1, channels
array = array.T
return array[index]
@staticmethod
def pcmExtractOneChannalFile(multiPath, channels, index, dataBits, onePath):
audioData = None
if dataBits == 16:
dataType = np.uint16
with open(multiPath, 'rb') as f:
audioData = np.fromfile(f, dtype = dataType)
oneData = __class__.pcmExtractOneChannal(audioData, channels, index)
oneData.tofile(onePath)
@staticmethod
def pcmExtractOneChannalBinary(multiBinary, channels, index, dataBits):
audioData = None
if dataBits == 16:
dataType = np.uint16
audioData = np.fromstring(multiBinary, dtype = dataType)
oneData = __class__.pcmExtractOneChannal(audioData, channels, index)
return oneData.tobytes()
参考: https://www.pythonf.cn/read/128012
13 pcm 和 wave 互转
@staticmethod
def pcmToWave(pcmPath, wavPath, rate, channels, depthBits):
with open(pcmPath, "rb") as pcmFile:
print("pcm open")
pcmData = pcmFile.read()
with wave.open(wavPath, "wb") as wavFile:
print(channels, int(depthBits / 8), rate)
print(len(pcmData))
wavFile.setparams((channels, int(depthBits / 8), rate, 0, 'NONE', 'NONE'))
wavFile.writeframes(pcmData)
@staticmethod
def waveToPCM(wavPath, pcmPath, dataBits = 16):
if dataBits == 16:
dataType = np.uint16
with open(wavPath, 'rb') as f:
f.seek(0)
f.read(44)
data = np.fromfile(f, dtype = dataType)
data.tofile(pcmPath)
with wave.open(wavPath, 'rb') as f:
return f.getparams()
参考: https://blog.csdn.net/sinat_37816910/article/details/105054372 https://docs.python.org/3/library/wave.html
14. 停止 baidu 的 websocket,需要发送 cancel,至于是否 self.ws.keep_running = False 不太确定
参考: https://www.coder.work/article/1269314
15. 播放音乐并立即停止
cmd = "AUDIODEV=hw:realtekrt5651co play ~/esp_run/speech/test.wav"
sub = subprocess.Popen(cmd, shell = True)
print(sub.poll())
time.sleep(5)
print("kill")
print(time.time())
sub.kill() #sub.send_signal(signal.SIGKILL)
# sub.wait()
print(time.time())
print(sub.poll())
sub = subprocess.Popen("stty echo", shell = True)
16. 如果需要使用 alsaaudio 在录音的时候播放其他音频,那么可能录音回发生 overrun,主要是播放音频的 set 函数回导致 overrun,其他一些耗时的处理也会导致 overrun,比如 mp3 解码。
17. subprocess kill 之后,使用 wait() 函数的时候,提示 EOFError 的时候,可以使用
stty sane
来恢复。
18. 寻找目录,寻找文件
def searchDir(path, dirName):
for root, dirs, files in os.walk(path):
if dirName in dirs:
return os.path.join(root, dirName)
return None
def searchFile(path, fileName):
for root, dirs, files in os.walk(path):
if fileName in files:
return os.path.join(root, fileName)
19. 需要依赖的文件收集
def assembleDepends(path):
cmd = 'grep -R "import" ' + path
f = os.popen(cmd)
data = f.readlines()
f.close()
#print(data)
dependDict = {}
if data != None:
for line in data:
lineData = line[line.find(":") + 1 : ]
if lineData.startswith("#"):
continue
print(lineData)
dependList = []
if lineData.find("from") == -1:
lineData = lineData.replace(" ", "").replace("\n", "")
dependList = lineData[lineData.find("import") + len("import") : ].split(",")
#print(dependList)
for depend in dependList:
if depend.startswith("esp_"):
dependDict[depend] = 1
else:
start = lineData.find("from") + len("from") + 1
end = lineData.find("import")
lineData = lineData[start : end].strip()
#print(lineData)
if lineData.startswith("esp_"):
dependDict[lineData] = 1
#print(dependDict)
return dependDict
return None