最近刚刚实现nodehub中的火山引擎大模型例程,但是例程中只有对话模型的适配文件。我想用网关大模型中的qwen-VL-8k模型,qwen-VL-8k是多模态大模型,我想用它实现图生文的功能。以下是我部分修改的代码:
客户端代码:
import rclpy
from rclpy.node import Node
from rdk_ai_gateway_msg.srv import TextToText # Adjust the import according to your service name
class InferClient(Node):
def __init__(self):
super().__init__('infer_client')
self.client = self.create_client(TextToText, 'text_to_text')
while not self.client.wait_for_service(timeout_sec=1.0):
self.get_logger().info('Service not available, waiting again...')
self.request = TextToText.Request()
# Declare parameters
self.declare_parameter('input_str', '请描述这张图片')
self.declare_parameter('model', 'qwen-VL-8k') # Default to 0, should be an integer
self.declare_parameter('image_path', '/home/sunrise/huoshan/src/img/1.jpg')
# Retrieve parameters
self.input_str = self.get_parameter('input_str').get_parameter_value().string_value
self.model = self.get_parameter('model').get_parameter_value().string_value
self.image_path = self.get_parameter('image_path').get_parameter_value().value
def send_request(self):
if not self.image_path:
self.get_logger().error("未提供图片路径!")
return None
try:
with open(self.image_path, "rb") as f:
self.request.image_base64 = base64.b64encode(f.read()).decode("utf-8")
except Exception as e:
self.get_logger().error(f"读取图片失败: {e}")
return None
self.request.input = self.input_str
self.request.model = self.model
self.future = self.client.call_async(self.request)
rclpy.spin_until_future_complete(self, self.future)
return self.future.result()
def main(args=None):
rclpy.init(args=args)
infer_client = InferClient()
# Send the request using parameters
response = infer_client.send_request()
infer_client.get_logger().info(f'Result: {response.output}')
infer_client.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()
服务端代码:
import os
import rclpy
from rclpy.node import Node
from decode import api_infer
from rdk_ai_gateway_msg.srv import TextToText
class InferService(Node):
def __init__(self):
super().__init__('infer_service')
self.srv = self.create_service(TextToText, "text_to_text", self.api_infer_cb)
self.declare_parameter('bin_file', '/home/sunrise/.ros/rdk_ai_gateway/auth.bin')
self.bin_file = self.get_parameter('bin_file').get_parameter_value().string_value
self.get_logger().info('bin_file is: ' + self.bin_file)
# Check if the file exists
if not os.path.exists(self.bin_file):
self.get_logger().error(f"File '{self.bin_file}' does not exist. Shutting down.")
return
else:
self.get_logger().info("Found auth.bin, the server if ready. Please pass in request.")
self.shift = 99
def api_infer_cb(self, request, response):
self.get_logger().info("Receive request..")
self.get_logger().info("The requested model is: " + request.model)
self.get_logger().info("The requested input is: " + request.input)
response.output = api_infer(self.bin_file, self.shift, request.input, request.model, self.get_logger())
return response
def main():
rclpy.init()
infer_service = InferService()
rclpy.spin(infer_service)
rclpy.shutdown()
if __name__ == '__main__':
main()
srv文件:
string input
string image_base64
---
string output
服务端中的api_infer()未被提供,我也不懂接下来怎么修改了,求求各位大佬解惑。