diff --git a/examples/agents/bilibili_agent.py b/examples/agents/bilibili_agent.py new file mode 100644 index 00000000..89f13263 --- /dev/null +++ b/examples/agents/bilibili_agent.py @@ -0,0 +1,30 @@ +import asyncio +import os + +# 【新增】引入 OxyRequest 用于工作流函数定义 +from oxygent import MAS, Config, oxy, preset_tools, OxyRequest + +Config.set_agent_llm_model("default_llm") + +oxy_space = [ + oxy.HttpLLM( + name="default_llm", + api_key=os.getenv("DEFAULT_LLM_API_KEY"), + base_url=os.getenv("DEFAULT_LLM_BASE_URL"), + model_name=os.getenv("DEFAULT_LLM_MODEL_NAME"), + llm_params={"stream": True}, + ), + preset_tools.bilibili_tools, + oxy.ReActAgent( + name="bilibili_agent", + desc="A tool that can perform baidu search.", + tools=["bilibili_tools"], + ), +] + +async def main(): + async with MAS(oxy_space=oxy_space) as mas: + await mas.start_web_service(first_query="hello") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/examples/agents/video_understanding_agent.py b/examples/agents/video_understanding_agent.py new file mode 100644 index 00000000..a90329bb --- /dev/null +++ b/examples/agents/video_understanding_agent.py @@ -0,0 +1,30 @@ +import asyncio +import os + +# 【新增】引入 OxyRequest 用于工作流函数定义 +from oxygent import MAS, Config, oxy, preset_tools, OxyRequest + +Config.set_agent_llm_model("default_llm") + +oxy_space = [ + oxy.HttpLLM( + name="default_llm", + api_key=os.getenv("DEFAULT_LLM_API_KEY"), + base_url=os.getenv("DEFAULT_LLM_BASE_URL"), + model_name=os.getenv("DEFAULT_LLM_MODEL_NAME"), + llm_params={"stream": True}, + ), + preset_tools.video_understanding_tools, + oxy.ReActAgent( + name="video_understanding_agent", + desc="A tool can understand the video.", + tools=["video_understanding_tools"], + ), +] + +async def main(): + async with MAS(oxy_space=oxy_space) as mas: + await mas.start_web_service(first_query="hello") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/examples/flows/video_download_understanding_demo.py b/examples/flows/video_download_understanding_demo.py new file mode 100644 index 00000000..287c1b41 --- /dev/null +++ b/examples/flows/video_download_understanding_demo.py @@ -0,0 +1,122 @@ +"""Workflow-based Reflexion Demo for OxyGent""" + +import asyncio +import os + +from oxygent import MAS, Config, OxyRequest, oxy + +# Set LLM model +Config.set_agent_llm_model("default_llm") + + +# Reflexion Workflow Core Logic +async def video_analysis_workflow(oxy_request: OxyRequest): + """ + 工作流:B站搜索 -> 下载视频 -> 视频理解分析 + """ + user_query = oxy_request.get_query(master_level=True) + print(f"== 收到工作流任务: {user_query} ==") + + # --- Step 1: 直接调用 Bilibili 搜索 --- + # 要求它只返回一个最相关的视频 URL + bilibili_search_prompt = f""" + 请在Bilibili中搜索与以下需求最相关的视频,并只返回最相关的一个视频的完整网页URL,不要返回任何多余文本: + {user_query} + """ + + search_resp = await oxy_request.call( + callee="bilibili_agent", + arguments={"query": bilibili_search_prompt} + ) + + video_url = search_resp.output.strip() + print(f"== Step 1 B站搜索结果: {video_url} ==") + + # --- Step 2: 下载视频 --- + download_resp = await oxy_request.call( + callee="bilibili_bangumi_agent", + arguments={"query": video_url} + ) + + local_video_path = download_resp.output + print(f"== Step 2 视频已下载至: {local_video_path} ==") + + # --- Step 3: 视频理解分析 --- + analysis_prompt = f""" + 请详细分析位于 '{local_video_path}' 的视频内容,并回答用户的问题: + {user_query} + """ + + analysis_resp = await oxy_request.call( + callee="video_understanding_agent", + arguments={"query": analysis_prompt} + ) + + final_result = analysis_resp.output + print("== Step 3 分析完成 ==") + + return final_result + + +# Define oxy_space +oxy_space = [ + # LLM model + oxy.HttpLLM( + name="default_llm", + api_key=os.getenv("DEFAULT_LLM_API_KEY"), + base_url=os.getenv("DEFAULT_LLM_BASE_URL"), + model_name=os.getenv("DEFAULT_LLM_MODEL_NAME"), + llm_params={"temperature": 0.01}, + semaphore=4, + timeout=240, + ), + # Worker Agent - responsible for generating initial answers + oxy.ReActAgent( + name="worker_agent", + desc="Worker agent responsible for generating initial answers", + llm_model="default_llm", + ), + # Reflexion Agent - responsible for evaluating answer quality + oxy.ChatAgent( + name="reflexion_agent", + desc="Reflexion agent responsible for evaluating answer quality and providing improvement suggestions", + llm_model="default_llm", + ), + # Math Expert Agent - specifically handles mathematical problems + oxy.ChatAgent( + name="math_expert_agent", + desc="Mathematics expert providing detailed mathematical solutions", + llm_model="default_llm", + ), + # Math Checker Agent - checks mathematical solutions + oxy.ChatAgent( + name="math_checker_agent", + desc="Mathematics solution checker verifying the correctness of mathematical solutions", + llm_model="default_llm", + ), + # General Reflexion Workflow Agent + preset_tools.bilibili_tools, + oxy.ReActAgent( + name="bilibili_agent", + desc="A tool that can perform baidu search.", + tools=["bilibili_tools"], + ), + preset_tools.video_understanding_tools, + oxy.ReActAgent( + name="video_understanding_agent", + desc="A tool can understand the video.", + tools=["video_understanding_tools"], + ), +] + + +async def main(): + """Start Web Service Demo""" + async with MAS(oxy_space=oxy_space) as mas: + await mas.start_web_service( + first_query="Calculate the area of a circle with radius 5." + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/oxygent/preset_tools/bilibili_tools.py b/oxygent/preset_tools/bilibili_tools.py new file mode 100644 index 00000000..50bce018 --- /dev/null +++ b/oxygent/preset_tools/bilibili_tools.py @@ -0,0 +1,129 @@ +import json +import time +import subprocess +from typing import List, Dict +from oxygent.oxy import FunctionHub + +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from bs4 import BeautifulSoup + +bilibili_tools = FunctionHub(name="bilibili_tools") + + +def _search_bilibili_html(query: str, max_results: int = 5) -> List[Dict[str, str]]: + from selenium import webdriver + from selenium.webdriver.chrome.options import Options + from bs4 import BeautifulSoup + import time + + chrome_options = Options() + chrome_options.add_argument("--headless=new") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--no-sandbox") + chrome_options.add_argument("--disable-dev-shm-usage") + chrome_options.add_argument("--window-size=1920,1080") + chrome_options.add_argument("--lang=zh-CN,zh") + + driver = webdriver.Chrome(options=chrome_options) + + def fetch(url, type_flag): + driver.get(url) + time.sleep(3) + soup = BeautifulSoup(driver.page_source, "html.parser") + results = [] + + if type_flag == "video": + items = soup.select(".bili-video-card")[:max_results] + for idx, item in enumerate(items, 1): + title_tag = item.select_one("h3") + link_tag = item.select_one("a[href]") + if not (title_tag and link_tag): continue + title = title_tag.get_text(strip=True) + url = link_tag["href"] + url = "https:" + url if url.startswith("//") else url + results.append({ + "rank": str(idx), "title": title, "url": url, "type": "video" + }) + return results + + elif type_flag == "bangumi": + # ✅ 2025 新版 B 站番剧 DOM + selectors = [ + ".bangumi-card", ".pgc-item", ".pgc-item-wrapper", + ".media-card", ".b-subject-item" + ] + items = [] + for s in selectors: + items = soup.select(s) + if items: break + + for idx, item in enumerate(items[:max_results], 1): + title_tag = item.select_one("a[title], .bangumi-title, .title") + if not title_tag: continue + title = title_tag.get("title") or title_tag.get_text(strip=True) + url = title_tag.get("href", "") + url = "https:" + url if url.startswith("//") else url + + # ✅ 只保留番剧/纪录片真实入口 + if not any(x in url for x in ["bangumi", "/ep", "/ss"]): + continue + + results.append({ + "rank": str(idx), "title": title, "url": url, "type": "bangumi" + }) + return results + + # ✅ 搜视频 + video_results = fetch(f"https://search.bilibili.com/video?keyword={query}", "video") + + # ✅ 搜番剧/纪录片/动画/综艺 + bangumi_results = fetch(f"https://search.bilibili.com/bangumi?keyword={query}", "bangumi") + + driver.quit() + + # ✅ 合并结果,番剧优先 + return bangumi_results + video_results + + +@bilibili_tools.tool(description="Search Bilibili for videos & bangumi (documentaries, anime).") +def search_bilibili(query: str) -> str: + results = _search_bilibili_html(query, max_results=10) + return json.dumps(results, ensure_ascii=False, indent=2) + + +@bilibili_tools.tool(description="Download normal Bilibili video via yt-dlp.") +def download_bilibili_video(url: str, output_dir: str = "./downloads") -> str: + """ + 普通视频下载函数 + """ + try: + cmd = [ + "yt-dlp", + "-o", f"{output_dir}/%(title)s.%(ext)s", + url + ] + subprocess.run(cmd, check=True) + return json.dumps({"status": "success", "message": f"Video saved to {output_dir}"}, ensure_ascii=False) + + except subprocess.CalledProcessError as e: + return json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False) + +import asyncio + +if __name__ == "__main__": + async def main(): + query = "人生一串" + print("🔍 正在搜索:", query) + results_json = await search_bilibili(query) # ✅ 加 await + print("✅ 搜索结果:\n", results_json) + + results = json.loads(results_json) + if results: + first_video = results[0]["url"] + print(f"\n🎬 开始下载第一个视频: {first_video}") + #print(await download_bilibili_bangumi(first_video)) # ✅ 这里也加 await + else: + print("❌ 未找到视频结果。") + + asyncio.run(main()) diff --git a/oxygent/preset_tools/video_understanding_tools.py b/oxygent/preset_tools/video_understanding_tools.py new file mode 100644 index 00000000..382ddf52 --- /dev/null +++ b/oxygent/preset_tools/video_understanding_tools.py @@ -0,0 +1,184 @@ +import os +import subprocess +import json +from typing import Dict +from dashscope import MultiModalConversation +import dashscope +from oxygent.oxy import FunctionHub + +video_understanding_tools = FunctionHub(name="video_understanding_tools") + +# ✅ 设置 API Key(建议改为环境变量) +dashscope.api_key = os.getenv("DASHSCOPE_API_KEY") + + +import os +import subprocess + +import os +import subprocess + +def compress_video(input_path: str, max_size_mb: int = 100) -> str: + """ + 如果视频超过 max_size_mb(默认100MB),截取前15分钟输出新文件。 + """ + file_size_mb = os.path.getsize(input_path) / (1024 * 1024) + if file_size_mb <= max_size_mb: + print(f"✅ 文件大小 {file_size_mb:.2f}MB,小于 {max_size_mb}MB,无需压缩。") + return input_path + + print(f"⚠️ 文件大小 {file_size_mb:.2f}MB,超过 {max_size_mb}MB,截取前15分钟...") + + output_path = os.path.splitext(input_path)[0] + "_cut15min.mp4" + + # ffmpeg 截取前 15 分钟 (不重新编码: -c copy) + command = [ + "ffmpeg", + "-y", + "-i", input_path, + "-t", "00:15:00", # 截取 15 分钟 + "-c", "copy", # 不重新编码,保持原质量 + output_path + ] + + subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + if not os.path.exists(output_path): + print("❌ 截取失败,返回原文件") + return input_path + + new_size_mb = os.path.getsize(output_path) / (1024 * 1024) + print(f"🎞️ 截取完成:{file_size_mb:.2f}MB → {new_size_mb:.2f}MB -> {output_path}") + return output_path + + +import os +import subprocess +import json +from dashscope import MultiModalConversation + + +import os +import subprocess +import json +from dashscope import MultiModalConversation + +@video_understanding_tools.tool( + description="Understand a video file using Qwen3-VL-Plus model (auto-split into 10-minute parts)." +) +def understand_video(video_path: str, query_text: str, fps: int = 2) -> str: + """ + 使用 Qwen3-VL-Plus 模型理解视频内容。 + 如果视频长度超过10分钟,则按每10分钟分割成多段分别理解并拼接结果。 + + 参数: + video_path: 本地视频路径 + query_text: 用户指令,例如“请总结视频主要内容” + fps: 每秒抽帧数量(默认2) + + 返回: + 模型输出 JSON 字符串 + """ + + def get_video_duration(input_path: str) -> float: + """获取视频时长(秒)""" + result = subprocess.run( + ["ffprobe", "-v", "error", "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", input_path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + try: + return float(result.stdout.strip()) + except: + raise ValueError("无法读取视频时长") + + def split_video_by_minutes(input_path: str, minutes: int = 10, output_dir="temp_splits") -> list: + """按分钟分割视频,每段长度为 minutes 分钟""" + os.makedirs(output_dir, exist_ok=True) + duration = get_video_duration(input_path) + segment_length = minutes * 60 # 转换为秒 + parts = [] + start = 0 + idx = 1 + + while start < duration: + part_file = os.path.join(output_dir, f"part_{idx}.mp4") + cmd = [ + "ffmpeg", "-y", "-ss", str(start), "-i", input_path, + "-t", str(segment_length), "-c", "copy", part_file + ] + subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + parts.append(part_file) + start += segment_length + idx += 1 + + print(f"📽️ 已将视频分割为 {len(parts)} 段,每段约 {minutes} 分钟。") + return parts + + def call_model(video_file, query_text, fps): + """调用 Qwen3-VL-Plus 理解单个视频""" + video_uri = f"file://{video_file}" + messages = [{ + "role": "user", + "content": [ + {"video": video_uri, "fps": fps}, + {"text": query_text} + ] + }] + print(f"🚀 调用模型 qwen3-vl-plus 分析 {os.path.basename(video_file)} ...") + + try: + response = MultiModalConversation.call(model="qwen3-vl-plus", messages=messages) + except Exception as e: + print(f"❌ 模型调用异常:{e}") + return f"⚠️ 模型调用异常:{e}" + + if response is None: + print("❌ 未收到模型响应(response=None)") + return "⚠️ 模型未返回任何结果" + + # 打印完整响应结构以便调试 + print("🧩 模型原始响应:", response) + + try: + if isinstance(response, dict) and "output" in response and "choices" in response["output"]: + return response["output"]["choices"][0]["message"]["content"][0]["text"] + else: + return f"⚠️ 无法解析模型输出结构,response: {response}" + except Exception as e: + return f"⚠️ 解析模型输出时出错:{e}" + + try: + # 分割视频(每10分钟一段) + parts = split_video_by_minutes(video_path, minutes=10) + + # 分别理解每个片段 + results = [] + for i, part in enumerate(parts): + part_result = call_model(part, f"{query_text}(第{i + 1}部分)", fps) + results.append(part_result) + + # 拼接所有部分结果 + final_result = "\n".join([f"第{i+1}部分结果:{r}" for i, r in enumerate(results)]) + print("✅ 所有片段分析完成。") + + return json.dumps({"status": "success", "result": final_result}, ensure_ascii=False) + + except Exception as e: + return json.dumps({"status": "error", "message": e}, ensure_ascii=False) + + + + +import asyncio +if __name__ == "__main__": + async def main(): + video_file = "/home/caotiezheng/pythoncode/OxyGent-main/downloads/1 非洲雨水追逐之旅.mp4" + query_text = "视频中,旁白介绍了一头成年大象一天要吃超过多少公斤的植物?" + + print("🎬 开始视频理解...") + result = await understand_video(video_file, query_text, fps=2) + print("\n📜 输出结果:\n", result) + + + asyncio.run(main())