dome 模块 pyaudio 声音处理 为语音识别准备
阅读原文时间:2023年07月10日阅读:1

dome 模块 pyaudio 声音处理

为语音识别准备

直接上例子

import pyaudio
import numpy as np

class QAudio:

    CHUNK = 512
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 48000

    def __init__(self):
        self.paudio = None
        self.stream = None

    def open(self):

        self.paudio = pyaudio.PyAudio()
        self.stream = self.paudio.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       frames_per_buffer=self.CHUNK)

    def read(self):
        data = self.stream.read(self.CHUNK)
        return data

    def close(self):
        self.stream.close()
        self.paudio.terminate()

def main():
    a = QAudio()
    a.open()
    print("开始聆听...")
    while True:
        data = a.read()
        audio_data = np.fromstring(data, dtype=np.short)
        temp = np.max(audio_data)
        print(f'当前声音强度值: {"*"*(temp//100)}')

if __name__ == '__main__':
    main()



import pyaudio
import wave

class QAudio:

    CHUNK = 512
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 48000

    def __init__(self):
        self.paudio = None
        self.stream = None

    def open(self):

        self.paudio = pyaudio.PyAudio()
        self.stream = self.paudio.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       frames_per_buffer=self.CHUNK)

    def read(self):
        data = self.stream.read(self.CHUNK)
        return data

    def read_s(self, time=1):
        data = []
        for _ in range(self.RATE//self.CHUNK*time):
            data.append(self.stream.read(self.CHUNK))

        return data

    def close(self):
        self.stream.close()
        self.paudio.terminate()

    def save(self, filename='tmp.wav', data=[]):
        wf = wave.open(filename, "wb")
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(2)
        wf.setframerate(self.RATE)
        wf.writeframes(b"".join(data))
        wf.close()

    def play(self, path="tmp.wav"):
        # 定义数据流块
        chunk = 1024
        # 只读方式打开wav文件
        f = wave.open(path, "rb")
        # 打开数据流
        fstream = self.paudio.open(format=self.paudio.get_format_from_width(f.getsampwidth()),
                                   channels=f.getnchannels(),
                                   rate=f.getframerate(),
                                   output=True)
        # 读取数据
        data = f.readframes(chunk)
        # 播放
        while data:
            fstream.write(data)
            data = f.readframes(chunk)
        # 停止数据流
        fstream.stop_stream()
        fstream.close()

def main():
    a = QAudio()
    a.open()

    print('开始录音')
    data = a.read_s(10)
    print('保存录音')
    a.save(data=data)
    print('播放声音')
    a.play()
    a.close()

if __name__ == '__main__':
    main()

为语音识别准备

import pyaudio
import wave
import numpy as np

class QAudio:

    CHUNK = 512
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 48000

    def __init__(self):
        self.paudio = None
        self.stream = None

    def open(self):

        self.paudio = pyaudio.PyAudio()
        self.stream = self.paudio.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       frames_per_buffer=self.CHUNK)

    def read(self):
        data = self.stream.read(self.CHUNK)
        return data

    def read_s(self, time=1.0):
        data = []
        for _ in range(int(self.RATE//self.CHUNK*time)):
            data.append(self.stream.read(self.CHUNK))

        return data

    def close(self):
        self.stream.close()
        self.paudio.terminate()

    def save(self, filename='tmp.wav', data=[]):
        wf = wave.open(filename, "wb")
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(2)
        wf.setframerate(self.RATE)
        wf.writeframes(b"".join(data))
        wf.close()

    def play(self, path="tmp.wav"):
        # 定义数据流块
        chunk = 1024
        # 只读方式打开wav文件
        f = wave.open(path, "rb")
        # 打开数据流
        fstream = self.paudio.open(format=self.paudio.get_format_from_width(f.getsampwidth()),
                                   channels=f.getnchannels(),
                                   rate=f.getframerate(),
                                   output=True)
        # 读取数据
        data = f.readframes(chunk)
        # 播放
        while data:
            fstream.write(data)
            data = f.readframes(chunk)
        # 停止数据流
        fstream.stop_stream()
        fstream.close()

    def read_auto(self):

        self.read_s(0.1)  # 先取0.1s,清一下buf
        while True:
            data = []
            flag = None
            count = 1
            while True:
                result = self.read()
                audio_data = np.fromstring(result, dtype=np.short)
                # audio_data = np.array(result)
                temp = np.max(audio_data)
                print(f'当前声音强度值: {"*" * (temp // 100)}')
                if temp < 400:
                    if flag:
                        count -= 1
                        if count < 0:
                            break
                    continue
                else:
                    flag = True
                    data.append(result)
            print(len(data))
            if len(data) > (self.RATE//self.CHUNK)//3:
                break

        return data

def main():
    a = QAudio()
    a.open()
    while True:
        print('开始录音')
        data = a.read_auto()
        print('保存录音')
        a.save(data=data)
        print('播放声音')
        a.play()

        # 清空buff 避免噪音回响
        a.read_s(len(data)//(a.RATE//a.CHUNK)+0.5)

if __name__ == '__main__':
    main()