行空板之“此时此景”吟诗精灵 DF创客社区

【项目背景】

在日常生活中，我们经常会遇到令人心旷神怡的美景，无论是壮丽的山河、绚烂的日落，还是城市的繁华夜景，这些瞬间总能触动我们的心灵，激发我们想要用言语来表达赞美和情感的冲动。然而，并非每个人都有丰富的词汇量和文学素养，能够即兴创作出优美的诗句来充分表达内心的感受。为了解决这一问题，我们开发了这个项目，旨在通过技术手段帮助人们捕捉和表达对美景的感悟。

【项目设计】

该项目通过行空板与USB摄像头、蓝牙音箱的结合，利用Python编程，实现了一个智能的图像识别和诗歌创作系统。当用户在看到美景并按下按钮时，系统会自动拍摄照片，并通过OpenCV库进行图像处理。接着，将图像上传至百度AI平台，利用fuyu_8b模型进行图片理解，生成描述性文本。然后，这些文本被送入百度的大语言模型中，提炼出主题，并据此创作出一首诗歌。最后，通过语音合成技术，将诗歌转化为音频，并通过蓝牙音箱播放，让用户能够以一种新颖而富有创意的方式，表达对美景的赞美和情感。这个项目不仅丰富了人们的表达方式，也使得艺术创作变得更加便捷和普及。

【项目硬件】

行空板之“此时此景”吟诗精灵图2

行空板之“此时此景”吟诗精灵图4

行空板之“此时此景”吟诗精灵图5

行空板之“此时此景”吟诗精灵图1

【百度智能云千帆大模型】

本项目使用百度智能云千帆大模型，⼤语⾔模型使用ERNIE 4.0。

行空板之“此时此景”吟诗精灵图6

图像理解模型使用Fuyu-8B。

行空板之“此时此景”吟诗精灵图7

【程序编写】

1.OpenCV获取摄像头图像


#  -*- coding: UTF-8 -*-

# MindPlus
# Python
import sys
sys.path.append("/root/mindplus/.lib/thirdExtension/nick-base64-thirdex")
import cv2
from pinpong.board import Board,Pin
from pinpong.extension.unihiker import *
import base64
from io import BytesIO
from PIL import Image


Board().begin()
p_p21_in=Pin(Pin.P21, Pin.IN)

def frame2base64(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame) #将每一帧转为Image
    output_buffer = BytesIO() #创建一个BytesIO
    img.save(output_buffer, format='JPEG') #写入output_buffer
    byte_data = output_buffer.getvalue() #在内存中读取
    base64_data = base64.b64encode(byte_data) #转为BASE64
    return base64_data #转码成功 返回base64编码

def base642base64(frame):
    #data=str('data:image/png;base64,')
    base64data = str(frame2base64(frame))
    framedata = base64data[2:(len(base64data)-1)]
    #base642base64_data = data + str(framedata)
    base642base64_data =str(framedata)
    return base642base64_data
vd = cv2.VideoCapture()
vd.open(0)
while not (vd.isOpened()):
    pass
cv2.namedWindow("Mind+'s Windows", cv2.WINDOW_NORMAL)
cv2.setWindowProperty("Mind+'s Windows", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

while True:
    if vd.grab():
        ret, grab = vd.read()
        cv2.imshow("Mind+'s Windows", grab)
        if cv2.waitKey(20) & 0xff== 27:
            pass
        if (p_p21_in.read_digital()==True):
            picbase64 = base642base64(grab)
            print(picbase64)

复制代码

2.图片理解


import requests
import json

API_KEY = "Ef8EeI3loPIqIbxxTWZnh0av"
SECRET_KEY = "****************************"

def main():
        
    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/image2text/fuyu_8b?access_token=" + get_access_token()
    
    payload = json.dumps({
        "prompt": "请描述图中美景",
        "image": ""#base64编码图片
    })
    headers = {
        'Content-Type': 'application/json'
    }
    
    response = requests.request("POST", url, headers=headers, data=payload)
    
    print(response.text)
    

def get_access_token():
    """
    使用 AK，SK 生成鉴权签名（Access Token）
    :return: access_token，或是None(如果错误)
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    return str(requests.post(url, params=params).json().get("access_token"))

if __name__ == '__main__':
    main()

复制代码

3.写诗



import requests
import json

API_KEY = "**********************"
SECRET_KEY = "……………………………………"

def main():
        
    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token()
    
    payload = json.dumps({
        "messages": [
            {
                "role": "user",
                "content": "In the image , a city street is shown with several buildings. A person is crossing the street  in the middle of the scene , and there is a bike parked on the right side of the street.\n\nThe street is surrounded by trees, some of which can be seen  on the left side . There are four cars parked on the street , two near the center, one on the right side , and one further to the right . Additionally, there are two pedestrians  in the middle of the street , one closer to the right and the other further to the left .\n\n The overall scene conveys a sense of urban life, with the lush green trees, people, cars, and bicycles contributing to the city scape."
            }
        ],
        "temperature": 0.95,
        "top_p": 0.8,
        "penalty_score": 1,
        "enable_system_memory": True,
        "system_memory_id": "sm-upmjb9yaya0gtr45",
        "system": "你是一位诗人，能根据用户提供的描述，提练出主题，并做诗一首。例如：此时此景，我要吟诗一首……",
        "disable_search": False,
        "enable_citation": False
    })
    headers = {
        'Content-Type': 'application/json'
    }
    
    response = requests.request("POST", url, headers=headers, data=payload)
    
    result=json.loads(response.text)

    print(result['result'])
    

def get_access_token():
    """
    使用 AK，SK 生成鉴权签名（Access Token）
    :return: access_token，或是None(如果错误)
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    return str(requests.post(url, params=params).json().get("access_token"))

if __name__ == '__main__':
    main()

复制代码

4.语音合成


#  -*- coding: UTF-8 -*-

# MindPlus
# Python
import sys
sys.path.append("/root/mindplus/.lib/thirdExtension/nick-base64-thirdex")
from df_xfyun_speech import XfTts

appId = "5c7a6af2"
apiKey ="94932090baf7bb1eae2200ace714f424"
apiSecret = "*******************"
options = {}
business_args = {"aue":"raw","vcn":"aisjinger","tte":"utf8","speed":50,"volume":50,"pitch":50,"bgs":0}
options["business_args"] = business_args
tts = XfTts(appId, apiKey, apiSecret, options)
tts.synthesis("你好, Mind+", "speech.wav")

复制代码

5.播放音频


import pyaudio
import wave

# 打开WAV文件
wf = wave.open('your_file.wav', 'rb')

# 创建PyAudio对象
p = pyaudio.PyAudio()

# 打开流
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True)

# 读取数据
data = wf.readframes(1024)

# 播放
while len(data) > 0:
    stream.write(data)
    data = wf.readframes(1024)

# 停止流
stream.stop_stream()
stream.close()

# 关闭PyAudio
p.terminate()
复制代码

6.完整程序


#  -*- coding: UTF-8 -*-

# MindPlus
# Python
import sys
sys.path.append("/root/mindplus/.lib/thirdExtension/nick-base64-thirdex")
import cv2
from pinpong.board import Board,Pin
from pinpong.extension.unihiker import *
import base64
from io import BytesIO
from PIL import Image
import requests
import json
from df_xfyun_speech import XfTts
import pyaudio
import wave

appId = "5c7a6af2"
apiKey ="94932090baf7bb1eae2200ace714f424"
apiSecret = "********************"
options = {}
business_args = {"aue":"raw","vcn":"x2_xiaoqian","tte":"utf8","speed":50,"volume":50,"pitch":50,"bgs":0}
options["business_args"] = business_args
tts = XfTts(appId, apiKey, apiSecret, options)
def get_access_token():
    """
    使用 AK，SK 生成鉴权签名（Access Token）
    :return: access_token，或是None(如果错误)
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    return str(requests.post(url, params=params).json().get("access_token"))

def image2text(url,base64image):
    payload = json.dumps({
        "prompt": "请描述图中美景",
        "image":base64image
        })
    headers = {
        'Content-Type': 'application/json'
    }
    
    response = requests.request("POST", url, headers=headers, data=payload)
    
    return(response.text)
cv2.namedWindow("Mind+'s Windows", cv2.WINDOW_NORMAL)
cv2.setWindowProperty("Mind+'s Windows", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
img = cv2.imread("back.png", cv2.IMREAD_UNCHANGED)
img = cv2.rotate(img,cv2.ROTATE_90_CLOCKWISE)
cv2.imshow("Mind+'s Windows", img)
if cv2.waitKey(20) & 0xff== 27:
    pass
Board().begin()
p_p21_in=Pin(Pin.P21, Pin.IN)

def frame2base64(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame) #将每一帧转为Image
    output_buffer = BytesIO() #创建一个BytesIO
    img.save(output_buffer, format='JPEG') #写入output_buffer
    byte_data = output_buffer.getvalue() #在内存中读取
    base64_data = base64.b64encode(byte_data) #转为BASE64
    return base64_data #转码成功 返回base64编码

def base642base64(frame):
    #data=str('data:image/png;base64,')
    base64data = str(frame2base64(frame))
    framedata = base64data[2:(len(base64data)-1)]
    #base642base64_data = data + str(framedata)
    base642base64_data =str(framedata)
    return base642base64_data
def playwav(result):
            tts.synthesis(result, "speech.wav")
            
            # 打开WAV文件
            wf = wave.open('speech.wav', 'rb')
            # 创建PyAudio对象
            p = pyaudio.PyAudio()
            # 打开流
            stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True)
            # 读取数据
            data = wf.readframes(1024)
            # 播放
            while len(data) > 0:
                stream.write(data)
                data = wf.readframes(1024)
            # 停止流
            stream.stop_stream()
            stream.close()
            # 关闭PyAudio
            p.terminate()
vd = cv2.VideoCapture()
vd.open(0)
while not (vd.isOpened()):
    pass

API_KEY = "********"
SECRET_KEY = "************"
url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/image2text/fuyu_8b?access_token=" + get_access_token()
url2 = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token()
while True:
    if vd.grab():
        ret, grab = vd.read()
        cp_img = grab.copy()
        cp_img = cv2.rotate(cp_img,cv2.ROTATE_90_CLOCKWISE)
        cv2.imshow("Mind+'s Windows", cp_img)
        if cv2.waitKey(20) & 0xff== 27:
            pass
        if (p_p21_in.read_digital()==True):
          playwav('拍照完成，正在识别处理中')
          picbase64 = base642base64(grab)
          print(image2text(url,picbase64))
          content=json.loads(image2text(url,picbase64))
          if "result" in content:
            payload = json.dumps({
        "messages": [
            {
                "role": "user",
                "content":content['result']
            }
        ],
        "temperature": 0.95,
        "top_p": 0.8,
        "penalty_score": 1,
        "enable_system_memory": True,
        "system_memory_id": "sm-upmjb9yaya0gtr45",
        "system": "你是一位诗人，能根据用户提供的描述，提练出主题，并做诗一首。例如：此时此情，我要吟诗一首……",
        "disable_search": False,
        "enable_citation": False
    })
            headers = {
        'Content-Type': 'application/json'
    }
    
            response = requests.request("POST", url2, headers=headers, data=payload)
    
            result=json.loads(response.text)
            print(result['result'])
            playwav(result['result'])



复制代码

【演示视频】

[M10项目]行空板之“此时此景”吟诗精灵 精华

[M10项目] 行空板之“此时此景”吟诗精灵

云天 中级技神 楼主|

云天 中级技神 楼主|

硬件清单

楼主的其它帖子

[M10项目]行空板之“此时此景”吟诗精灵精华

云天中级技神
楼主|

云天中级技神
楼主|