查看: 766|回复: 0

[Micropython] 玩转DFRobot ESP32-P4开发板，用Micropython仿制小智

[复制链接]

本帖最后由 PY学习笔记于 2025-8-25 17:07 编辑

近期，DFRobot 推出了全新开发板 FireBeetle 2 ESP32-P4。这块开发板搭载了 ESP32-P4 主控，虽未集成 Wi-Fi 与蓝牙，但凭借强劲性能，依然令人眼前一亮。很荣幸能抢先体验这块开发板！

1.开发板介绍

FireBeetle 2 ESP32-P4有很多种外设:

Type-C USB CDC：Type-C USB烧录、调试接口
IO3/LED：板载LED引脚
Power LED：主板电源指示灯
RST：复位按键
IO35/BOOT：IO引脚/BOOT按键
MIC: MEMS PDM麦克风
HIGH-SPEED USB OTG 2.0: Type-C高速USB OTG 2.0
ESP32-P4：ESP32-P4芯片
MIPI-DSI: 两通道MIPI-DSI屏幕（兼容树莓派4B DSI屏幕线序）
MIPI-CSI: 两通道MIPI-DSI屏幕（兼容树莓派4B CSI摄像头线序）
TF Card: TF卡插槽
16MB FLASH: 16MB Flash存储
ESP32-C6：ESP32-C6-MINI-1模组，通过SDIO与ESP32-P4连接，用于扩展WiFi、蓝牙

2.实现原理

由于MicroPython不支持离线语音识别与合成，所以采用在线语音识别和在线语音合成，AI也是采用在线的方式，在线语音识别和AI大模型均使用siliconflow的免费模型，而在线语音合成使用了百度的语音合成（实名认证后免费5万次），再结合ST7789屏幕（使用MIPI屏幕会出问题），MAX9835功放模块，以及板载的PDM麦克风即可实现。

3.代码实现

参考代码如下：

库文件.zip

# 录音+语音识别+AI对话+语音合成的完整闭环
import network, urequests, ujson, gc, time, st7789_spi, baidu_tts
from easydisplay import EasyDisplay
from machine import Pin, I2S, SPI
# ---------- 全局配置 ----------
WIFI_SSID = "SSID"
WIFI_PASS = "PWD"
API_KEY   = "API-KEY"
TTS_API_KEY = "API-KEY"
TTS_SEC_KEY = "SEC-KEY"

ASR_URL   = "https://api.siliconflow.cn/v1/audio/transcriptions"
CHAT_URL  = "https://api.siliconflow.cn/v1/chat/completions"

ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
LLM_MODEL = "Qwen/Qwen3-8B"

# I2S 引脚
SCK_PIN = 12
SD_PIN  = 9
boot = Pin(35, Pin.IN, Pin.PULL_UP)   # BOOT 键，按下为 0
led = Pin(3,Pin.OUT)
spi = SPI(2, baudrate=20000000, polarity=0, phase=0, sck=Pin(28), mosi=Pin(29))
dp = st7789_spi.ST7789(width=240, height=280, spi=spi, cs=20, dc=4, res=30, rotate=1,invert=False, rgb=False)
ed = EasyDisplay(dp, "RGB565", font="/text_lite_16px_2312.v3.bmf", show=True, color=0xFFFF, clear=True,auto_wrap=True)
# ---------- 联网 ----------
def connect_wifi():
    sta = network.WLAN(network.STA_IF)
    sta.active(True)
    sta.connect(WIFI_SSID, WIFI_PASS)
    while not sta.isconnected():
        time.sleep(0.5)
    print("Wi-Fi OK:", sta.ifconfig()[0])
    return sta

def record_audio(sr=8000):
    # 等待按下
    print("长按 BOOT 开始录音...")
    while boot.value() == 1:
        time.sleep_ms(10)

    # 开始录音
    pcm = bytearray()
    audio = I2S(0,
                sck=Pin(SCK_PIN),
                sd=Pin(SD_PIN),
                mode=I2S.PDM_RX,
                bits=16,
                format=I2S.MONO,
                rate=sr * 4,
                ibuf=10240)

    # 边录边检查按键
    chunk = bytearray(1024)
    print("录音中，松开 BOOT 结束...")
    while boot.value() == 0:           # 0 表示仍按着
        n = audio.readinto(chunk)
        pcm.extend(chunk[:n])

    audio.deinit()
    return pcm, sr

# ---------- 构造 WAV ----------
def wav_header(data_len, sample_rate):
    hdr = bytearray(44)
    hdr[0:4]   = b'RIFF'
    hdr[4:8]   = (data_len + 36).to_bytes(4, 'little')
    hdr[8:12]  = b'WAVE'
    hdr[12:16] = b'fmt '
    hdr[16:20] = (16).to_bytes(4, 'little')
    hdr[20:22] = (1).to_bytes(2, 'little')         # PCM
    hdr[22:24] = (1).to_bytes(2, 'little')         # mono
    hdr[24:28] = sample_rate.to_bytes(4, 'little')
    hdr[28:32] = (sample_rate * 2).to_bytes(4, 'little')
    hdr[32:34] = (2).to_bytes(2, 'little')         # block align
    hdr[34:36] = (16).to_bytes(2, 'little')        # bits per sample
    hdr[36:40] = b'data'
    hdr[40:44] = data_len.to_bytes(4, 'little')
    return hdr

# ---------- 语音识别 ----------
def speech_to_text(pcm, sr):
    wav = wav_header(len(pcm), sr) + pcm
    boundary = "----VoiceBoundary"
    body  = b"--" + boundary.encode() + b"\r\n"
    body += b'Content-Disposition: form-data; name="file"; filename="mic.wav"\r\n'
    body += b"Content-Type: audio/wav\r\n\r\n"
    body += wav
    body += b"\r\n--" + boundary.encode() + b"\r\n"
    body += b'Content-Disposition: form-data; name="model"\r\n\r\n'
    body += ASR_MODEL.encode()
    body += b"\r\n--" + boundary.encode() + b"--\r\n"

    headers = {
        "Authorization": "Bearer " + API_KEY,
        "Content-Type": "multipart/form-data; boundary=" + boundary
    }
    print("识别中…")
    res = urequests.post(ASR_URL, data=body, headers=headers)
    text = res.json().get("text", "").strip()
    res.close()
    gc.collect()
    return text

# ---------- 对话 ----------
def chat_with_ai(text):
    headers = {
        "Authorization": "Bearer " + API_KEY,
        "Content-Type": "application/json"
    }
    payload = {
        "model": LLM_MODEL,
        "messages": [
            {"role": "system", "content": "你是我的AI助手小智，你必须用中文回答且不超过100字还不允许使用MD进行回答"},
            {"role": "user", "content": text}
        ],
        "enable_thinking":False,
    }
    print("AI思考中…")
    start = time.time()
    res = urequests.post(CHAT_URL, data=ujson.dumps(payload).encode(), headers=headers)
    delta = time.time() - start
    if res.status_code == 200:
        reply = res.json()['choices'][0]['message']['content'].replace("\n", "")
        print(f"({delta:.1f}s) AI:", reply)
        ed.text(f"({delta:.1f}s) AI:"+reply, 0, 50)
        baidu_tts.run(
            access=TTS_API_KEY,
            secret=TTS_SEC_KEY,
            text=reply,
        )
    else:
        print("Error:", res.status_code, res.text)
        reply = ""
    res.close()
    gc.collect()
    return reply

# ---------- 主循环 ----------
def main():
    connect_wifi()
    while True:
        pcm, sr = record_audio()
        text = speech_to_text(pcm, sr)
        if not text:
            print("没听清，请再说一遍")
            baidu_tts.run(
                access=TTS_API_KEY,
                secret=TTS_SEC_KEY,
                text="没听清，请再说一遍",
                out_path='welcome.wav'
            )
            continue
        elif "开灯" in text:
            print("你:", text)
            led.on()
            print("AI:LED灯已开启")
            ed.text("AI:LED灯已开启", 0, 50)
            baidu_tts.run(
                access=TTS_API_KEY,
                secret=TTS_SEC_KEY,
                text="LED灯已开启",
            )
        elif "关灯" in text:
            print("你:", text)
            led.off()
            print("AI:LED灯已关闭")
            ed.text("AI:LED灯已关闭", 0, 50)
            baidu_tts.run(
                access=TTS_API_KEY,
                secret=TTS_SEC_KEY,
                text="LED灯已关闭",
            )
        else:
            print("你:", text)
            chat_with_ai(text)

if __name__ == "__main__":
    main()
复制代码