1. 录音的 pcm 文件直接播放,使用:

#!/bin/bash
play -t raw -r 44.1k -e signed-integer -b 16 -c 2 loved.pcm
play -t raw -r 48k -e floating-point -b 32 -c 2 ./data_decode/out.pcm

参考: https://blog.csdn.net/lc999102/article/details/80579866

2. json.h 没有相应的头文件。json.h, curl.h

sudo apt-get install libjsoncpp-dev 
sudo ln -s /usr/include/jsoncpp/json/ /usr/include/json

sudo apt install libcurl4-openssl-dev
sudo ln -s /usr/include/x86_64-linux-gnu/curl /usr/include/curl

sudo apt-get install libopencv-dev

参考: https://blog.csdn.net/zhangpeterx/article/details/92175479

3. QCoreApplication 找不到定义的地方。

QCoreApplication 在 5.14.2/Src/qtbase/src/corelib/kernel/qcoreapplication.h 里面,定义为 class Q_CORE_EXPORT QCoreApplication。 参考: https://www.cnblogs.com/lyggqm/p/6281581.html

4. ffmpeg 的 cmake 配置

cmake_minimum_required(VERSION 3.10)

project(ffmpeg_test)

set(SRC_LIST main.cpp)
include_directories("/usr/include/x86_64-linux-gnu")
link_directories("/usr/lib/x86_64-linux-gnu")

add_executable(ffmpeg_test ${SRC_LIST})

#target_link_libraries(${PROJECT_NAME} libavutil.so libavcodec.so libavformat.so libavdevice.so.57 libavfilter.so libswscale.so libpostproc.so)

#target_link_libraries(${PROJECT_NAME} libavutil.so libavcodec.so libavformat.so libswscale.so)

target_link_libraries(${PROJECT_NAME} avutil avcodec avformat swscale)

参考: https://blog.csdn.net/wangchao1412/article/details/103454371 https://www.jianshu.com/p/72cdcb8d06a7 https://blog.csdn.net/BigDream123/article/details/89741253

5. 使用百度的 tts,需要安装百度 aip sdk

pip3 install baidu-aip --user

参考: https://blog.csdn.net/m0_37886429/article/details/85222593

6. pcm 和 wav 互转

参考: https://blog.csdn.net/sinat_37816910/article/details/105054372 https://blog.csdn.net/huplion/article/details/81260874

7. alsaaudio 中的 openPCM 这个参数的顺序有问题,不要按照 api 上面的顺序写。全部用关键词的方式去写,就没有问题。

        try:
            self.__alsaDev = alsaaudio.PCM(type = alsaaudio.PCM_PLAYBACK, mode = alsaaudio.PCM_NORMAL, rate = 16000, channels = 8, format = alsaaudio.PCM_FORMAT_S16_LE, periodsize = 160, device = "plughw:" + self.__devName)
        except Exception as e:
            print("alsaaudio open pcm exception: ", e)

8. 其他格式转换为 wav 格式,使用 pydub 中的 AudioSegment

    @staticmethod
    def extractToWave(srcPath, destDir = None, destPrefix = None):
        (srcDir, fileName) = os.path.split(srcPath)
        (fileNoExt, ext) = os.path.splitext(fileName)
        if ext == ".wav":
            return srcPath

        if destDir is None:
            destDir = srcDir
        if destPrefix is None:
            destPrefix = ""
        if not os.path.exists(destDir):
            os.makedirs(destDir)
        destName = destPrefix + fileNoExt + ".wav"
        destPath = destDir + "/" + destName
        #print(destPath)
        if ext == ".mp3":
            data = AudioSegment.from_mp3(srcPath)
        else:
            return None
        data.export(destPath, format = "wav")
        return destPath

参考: https://www.cnblogs.com/xingshansi/p/6799994.html https://ithelp.ithome.com.tw/articles/10252078 https://blog.csdn.net/baidu_29198395/article/details/86694365

9. alsaaudio 播放 wav 格式

    def playThreadWav(self, path, index):
        if self.__alsaDev:
            self.__alsaDev.close()
        print("alsa playback wav thread run: %d" % index)

        with wave.open(path, 'rb') as f:
            self.__rate = f.getframerate()
            self.__channels = f.getnchannels()
            self.__depthBits = f.getsampwidth() * 8
            self.__format = self.bitsToFormat(self.__depthBits)
            self.__periodSize = int(self.__rate / 100)
            try:
                self.__alsaDev = alsaaudio.PCM(type = alsaaudio.PCM_PLAYBACK,
                                               mode = alsaaudio.PCM_NORMAL,
                                               rate = self.__rate,
                                               channels = self.__channels,
                                               format = self.__format,
                                               periodsize = self.__periodSize,
                                               device = "plughw:" + self.__devName)
            except Exception as e:
                print("alsaaudio open exception: ", e)

            if self.__alsaDev is None:
                print("open alsa audio device failed")
                self.clearThreadParam(index)
                return "finished"

            data = f.readframes(self.__periodSize)
            while data and self.__eStop == False:
                try:
                    self.__alsaDev.write(data)
                except ALSAAudioError as e:
                    print("alsa audio play except: ", e)
                    break
                data = f.readframes(self.__periodSize)

        self.afterThreadComplete(index)
        return "finished"

    def clearThreadParam(self, index):
        del self.__poolDict[index]
        self.__rate = 0
        self.__channels = 0
        self.__depthBits = 0
        self.__format = 0
        self.__periodSize = 0

    def afterThreadComplete(self, index):
        self.__alsaDev.close()
        self.__alsaDev = None
        if index in self.__hookDict:
            if self.__eStop != True:
                self.__hookDict[index]()
            del self.__hookDict[index]
        self.clearThreadParam(index)

参考: https://www.programcreek.com/python/example/91453/alsaaudio.PCM

10. 使用 websocket 的时候, pip3 install --user websocket-client 而不是 websocket

11. pcm 转 wav

        (file, ext) = os.path.splitext(path)
        wavPath = file + ".wav"
        EspAudioUtil.pcmToWave(path, wavPath, rate, channels, bits)
        os.remove(path)

参考: https://stackoverflow.com/questions/16111038/how-to-convert-pcm-files-to-wav-files-scripting

12. 多通道音频抽取单通道数据

    @staticmethod
    def pcmExtractOneChannal(multiChannArray, channels, index):
        array = multiChannArray
        array.shape = -1, channels
        array = array.T
        return array[index]

    @staticmethod
    def pcmExtractOneChannalFile(multiPath, channels, index, dataBits, onePath):
        audioData = None
        if dataBits == 16:
            dataType = np.uint16
        with open(multiPath, 'rb') as f:
            audioData = np.fromfile(f, dtype = dataType)
        oneData = __class__.pcmExtractOneChannal(audioData, channels, index)
        oneData.tofile(onePath)

    @staticmethod
    def pcmExtractOneChannalBinary(multiBinary, channels, index, dataBits):
        audioData = None
        if dataBits == 16:
            dataType = np.uint16
        audioData = np.fromstring(multiBinary, dtype = dataType)
        oneData = __class__.pcmExtractOneChannal(audioData, channels, index)
        return oneData.tobytes()

参考: https://www.pythonf.cn/read/128012

13 pcm 和 wave 互转

    @staticmethod
    def pcmToWave(pcmPath, wavPath, rate, channels, depthBits):
        with open(pcmPath, "rb") as pcmFile:
            print("pcm open")
            pcmData = pcmFile.read()
        with wave.open(wavPath, "wb") as wavFile:
            print(channels, int(depthBits / 8), rate)
            print(len(pcmData))
            wavFile.setparams((channels, int(depthBits / 8), rate, 0, 'NONE', 'NONE'))
            wavFile.writeframes(pcmData)

    @staticmethod
    def waveToPCM(wavPath, pcmPath, dataBits = 16):
        if dataBits == 16:
            dataType = np.uint16
        with open(wavPath, 'rb') as f:
            f.seek(0)
            f.read(44)
            data = np.fromfile(f, dtype = dataType)
            data.tofile(pcmPath)
        with wave.open(wavPath, 'rb') as f:
            return f.getparams()

参考: https://blog.csdn.net/sinat_37816910/article/details/105054372 https://docs.python.org/3/library/wave.html

14. 停止 baidu 的 websocket,需要发送 cancel,至于是否 self.ws.keep_running = False 不太确定

参考: https://www.coder.work/article/1269314

15. 播放音乐并立即停止

cmd = "AUDIODEV=hw:realtekrt5651co play ~/esp_run/speech/test.wav"
sub = subprocess.Popen(cmd, shell = True)
print(sub.poll())
time.sleep(5)
print("kill")
print(time.time())
sub.kill()  #sub.send_signal(signal.SIGKILL)
# sub.wait()
print(time.time())
print(sub.poll())
sub = subprocess.Popen("stty echo", shell = True)

16. 如果需要使用 alsaaudio 在录音的时候播放其他音频,那么可能录音回发生 overrun,主要是播放音频的 set 函数回导致 overrun,其他一些耗时的处理也会导致 overrun,比如 mp3 解码。

17. subprocess kill 之后,使用 wait() 函数的时候,提示 EOFError 的时候,可以使用

stty sane

来恢复。

18. 寻找目录,寻找文件

def searchDir(path, dirName):
    for root, dirs, files in os.walk(path):
        if dirName in dirs:
            return os.path.join(root, dirName)
    return None

def searchFile(path, fileName):
    for root, dirs, files in os.walk(path):
        if fileName in files:
            return os.path.join(root, fileName)

19. 需要依赖的文件收集

def assembleDepends(path):
    cmd = 'grep -R "import" ' + path
    f = os.popen(cmd)
    data = f.readlines()
    f.close()
    #print(data)
    dependDict = {}
    if data != None:
        for line in data:
            lineData = line[line.find(":") + 1 : ]
            if lineData.startswith("#"):
                continue
            print(lineData)
            dependList = []
            if lineData.find("from") == -1:
                lineData = lineData.replace(" ", "").replace("\n", "")
                dependList = lineData[lineData.find("import") + len("import") : ].split(",")
                #print(dependList)
                for depend in dependList:
                    if depend.startswith("esp_"):
                        dependDict[depend] = 1
            else:
                start = lineData.find("from") + len("from") + 1
                end = lineData.find("import")
                lineData = lineData[start : end].strip()
                #print(lineData)
                if lineData.startswith("esp_"):
                    dependDict[lineData] = 1
        #print(dependDict)
        return dependDict
    return None

20. 降噪算法效果好,耗时低的是 WebRTC, python 可以使用 https://github.com/xiongyihui/python-webrtc-audio-processing 这边的代码。

标签: C, python

添加新评论