zfsafe/mycode/LLMManager.py

'''
实现对大模型调用的封装，隔离具体使用的LLM
pip install openai
 export OPENAI_API_KEY="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"
'''
import openai
import json
import threading
import re
from openai import OpenAI
from myutils.MyTime import get_local_timestr
from myutils.MyLogger_logger import LogHandler

class LLMManager:
    def __init__(self,illm_type=0):
        self.logger = LogHandler().get_logger("LLMManager")
        self.api_key = None
        self.api_url = None
        self.task_id =0 #一个任务一个id
        self.llm_sn = 0  # llm执行序列号，--一任务一序列
        self.llm_sn_lock = threading.Lock()  #
        #temperature设置
        #DS------代码生成/数学解题：0.0 -- 数据抽取/分析：1.0 -- 通用对话：1.3 -- 翻译：1.3 -- 创意类写作：1.5
        #腾讯云---
        self.temperature = 1.0
        if illm_type == 0:  #腾讯云
            self.api_key = "fGBYaQLHykBOQsFwVrQdIFTsYr8YDtDVDQWFU41mFsmvfNPc"
            self.api_url = ""
        elif illm_type == 1: #DS
            self.api_key ="sk-10360148b465424288218f02c87b0e1b"
            self.api_url ="https://api.deepseek.com/v1"
            self.model = "deepseek-reasoner"   #model=deepseek-reasoner -- R1 model=deepseek-chat  --V3
            # 创建会话对象 -- 一个任务的LLM必须唯一
            self.client = OpenAI(api_key=self.api_key, base_url=self.api_url)
        elif illm_type ==3: #GPT
            self.api_key =""
            self.api_url = ""
            self.model = ""
            self.client = OpenAI()

        self.messages = []
        # 初始化阶段和已知信息
        current_stage = "信息收集"
        known_info = {"url": "www.test.com"}
        results = []  # 存储所有任务结果，用于生成报告

    #******测试使用，设置slef.message的值
    def test_old_message(self,strMessage):
        try:
            self.messages = json.loads(strMessage)
        except json.JSONDecodeError as e:
            print(f"JSON解析错误: {str(e)}")
        except Exception as e:
            print(f"错误: {str(e)}")

    # 构建初始提示  初始化messages
    def build_initial_prompt(self,target,know_info="无",node=None):
        if not node:
            return None
        '''暂时未添加到提示词
        **核心要求**：
	    - 每次新增节点时，必须在同一响应中为该节点提供测试指令。
	    - 分批新增节点，每次响应中新增节点不超过3个，确保指令完整。
	    '''
        #根节点初始化message
        node.messages = [{"role": "system",
        "content":'''
你是一位渗透测试专家,负责动态控制整个渗透测试过程，根据当前测试状态和返回结果，决定下一步测试指令，推动测试前进，直至完成渗透测试。本地程序会执行你的指令进并返回结果。
**总体要求**
1.以目标系统IP为根节点，每个渗透测试点（如端口、服务、漏洞点）作为子节点，形成树型结构（测试树）；
2.测试树整体数据由本地程序存储，你只需要关注当前节点的测试推进、状态更新(未完成/已完成)及子节点新增；
3.返回两类指令：节点指令和测试指令，以空行间隔，不要包含注释和说明；
4.若无节点修改或新增，节点指令可以为空，但测试指令必须对应已有节点。
**决策流程**
1. 若当前节点是IP且未进行端口扫描，则执行端口扫描；
2. 若端口扫描发现开放端口，为每个端口新增节点并提供测试指令；
3. 若当前节点是端口且未进行服务扫描，则执行服务扫描；
4. 若服务扫描发现服务版本或漏洞，则新增漏洞测试节点并提供测试指令；
5. 若漏洞利用成功，则根据结果决定是否新增子节点并提供测试指令；
6. 若节点测试无新信息，则更新状态为“已完成”。
**节点指令格式**
- 新增节点：{\"action\":\"add_node\", \"parent\": \"80端口\", \"node\": \"http://192.168.1.100/index.php?id=1\", \"status\": \"未完成\"}；
- 更新节点未发现漏洞：{\"action\": \"update_status\", \"node\": \"21端口\", \"status\": \"已完成\"}；
- 更新节点发现漏洞：{\"action\": \"update_status\", \"node\": \"21端口\", \"status\": \"已完成\"，\"vulnerability\": {\"name\":\"ftp匿名登录\",\"risk\":\"高\"}}；
**测试指令格式**
- shell指令：```bash-[节点路径](.*?)```包裹，需要避免用户交互；
- python指令：```python-[节点路径](.*?)```包裹，主函数名为dynamic_fun，需包含错误处理，执行结束后必须返回一个tuple (status, output)，其中status为'success'或'failure'，output为补充输出信息；
- [节点路径]为从根节点到目标节点的完整层级描述。
**响应示例**
{\"action\":\"add_node\", \"parent\": \"192.168.1.100\", \"node\": \"3306端口\", \"status\": \"未完成\"}

```bash-[目标系统->192.168.1.100->3306端口]
mysql -u root -p 192.168.1.100
```

{\"action\":\"add_node\", \"parent\": \"192.168.1.100\", \"node\": \"22端口\", \"status\": \"未完成\"}

```python-[目标系统->192.168.1.100->22端口]
def dynamic_fun():
    try:
        result = "扫描完成"
        return ("success", result)
    except Exception as e:
        return ("failure", str(e))
```
'''}]  # 一个messages
        user_Prompt = f'''
当前分支路径：目标系统->{target}
当前节点信息：
- 节点名称：{target}
- 节点状态：未完成
- 漏洞类型：未发现
上一步结果：{know_info}
任务：生成下一步渗透测试指令或结束该节点的渗透测试(修改节点状态为：已完成)。
        '''
        return user_Prompt

    def init_data(self,task_id=0):
        #初始化LLM数据
        self.llm_sn = 0
        self.task_id = task_id
        self.messages = []

    # 调用LLM生成指令
    def get_llm_instruction(self,prompt,th_DBM,node):
        '''
        1.由于大模型API不记录用户请求的上下文，一个任务的LLM不能并发！
        :param prompt:用户本次输入的内容
        :return: instr_list
        '''
        #添加本次输入入该节点的message队列
        message = {"role":"user","content":prompt}
        node.messages.append(message)

        #提交LLM
        post_time = get_local_timestr()
        response = self.client.chat.completions.create(
            model=self.model,
            messages = node.messages
        )

        #LLM返回结果处理
        reasoning_content = ""
        content = ""
        #LLM返回处理
        if self.model == "deepseek-reasoner":
            #返回错误码：DS-https://api-docs.deepseek.com/zh-cn/quick_start/error_codes
            reasoning_content = response.choices[0].message.reasoning_content   #推理过程
            print(reasoning_content)
            content = response.choices[0].message.content             #推理内容
            print(content)
            # 记录llm历史信息
            node.messages.append({'role': 'assistant', 'content': content})
        elif self.model == "deepseek-chat":
            content = response.choices[0].message
            # 记录llm历史信息
            node.messages.append(content)
        else:
            self.logger.error("处理到未预设的模型！")
            return None

        #LLM记录存数据库
        node.llm_sn += 1
        bres = th_DBM.insert_llm(self.task_id,prompt,reasoning_content,content,post_time,node)
        if not bres:
            self.logger.error(f"{node.name}-llm入库失败！")

        #需要对指令进行提取
        node_cmds,commands = self.fetch_instruction(content,node)

        return node_cmds,commands

    def fetch_instruction(self,response_text,node):
        '''
        *****该函数很重要，需要一定的容错能力，解析LLM返回内容*****
        处理边界：只格式化分析LLM返回内容，指令和节点操作等交其他模块。
        节点控制指令
        渗透测试指令
        提取命令列表，包括：
        1. Python 代码块 python[](.*?)
        2. Shell 命令``bash[](.*?)```
        :param text: 输入文本
        :return: node_cmds,python_blocks,shell_blocks
        '''
        #针对llm的回复，提取节点操作数据和执行的指令----
        # 正则匹配 Python 代码块
        python_blocks = re.findall(r"```python-(.*?)```", response_text, flags=re.DOTALL)
        # 处理 Python 代码块，去除空行并格式化
        python_blocks = [block.strip() for block in python_blocks]

        #正则匹配shell指令
        shell_blocks = re.findall(f"```bash-(.*?)```", response_text, flags=re.DOTALL)
        shell_blocks = [block.strip() for block in shell_blocks]

        # 按连续的空行拆分
        # 移除 Python和bash 代码块
        text_no_python = re.sub(r"```python.*?```", "PYTHON_BLOCK", response_text, flags=re.DOTALL)
        text = re.sub(r"```bash.*?```", "SHELL_BLOCK", text_no_python, flags=re.DOTALL)

        # 这里用 \n\s*\n 匹配一个或多个空白行
        parts = re.split(r'\n\s*\n', text)
        node_cmds = []
        commands = []
        python_index = 0
        shell_index = 0
        for part in parts:
            part = part.strip()
            if not part:
                continue
            if "PYTHON_BLOCK" in part:
                # 还原 Python 代码块
                commands.append(f"python_code {python_blocks[python_index]}")
                python_index += 1
            elif "SHELL_BLOCK" in part:
                commands.append(shell_blocks[shell_index])
                shell_index +=1
            else:
                #其他的认为是节点操作指令--指令格式还存在不确定性，需要正则匹配
                pattern = re.compile(r'\{.*?\}', re.DOTALL)
                # 遍历所有匹配到的 JSON 结构
                for match in pattern.findall(part):
                    try:
                        node_cmds.append(json.loads(match))  # 解析 JSON 并添加到列表
                    except json.JSONDecodeError as e:#解析不了的不入队列
                        print(f"JSON 解析错误: {e}")
        return node_cmds,commands

    def llm_error_feedback(self,response_text,node):
        '''验证llm返回--错误反馈机制--兼容LLM的不稳定性，需要持续补充
        1.验证节点是否都有测试指令返回
        '''
        pass


    def test_llm(self):
        with open("../test", "r", encoding="utf-8") as f:
            messages = json.load(f)
        text = messages[-1]["content"]
        list = self.fetch_instruction(text)
        for itme in list:
            print("***********")
            print(itme)


if __name__ == "__main__":
    # LM = LLMManager(1)
    # LM.test_llm()
    tlist1 = []
    tlist2 = []
    tlist2.append(1)
    if not tlist1:
        print("list1空")
    if not tlist2:
        print("list2空")
    if tlist2:
        print("list2不为空")
V0.1.1 node_tree_0.1 1 month ago			`'''`
			`实现对大模型调用的封装，隔离具体使用的LLM`
			`pip install openai`
			`export OPENAI_API_KEY="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"`
			`'''`
			`import openai`
			`import json`
			`import threading`
			`import re`
			`from openai import OpenAI`
			`from myutils.MyTime import get_local_timestr`
			`from myutils.MyLogger_logger import LogHandler`

			`class LLMManager:`
			`def __init__(self,illm_type=0):`
			`self.logger = LogHandler().get_logger("LLMManager")`
			`self.api_key = None`
			`self.api_url = None`
			`self.task_id =0 #一个任务一个id`
			`self.llm_sn = 0 # llm执行序列号，--一任务一序列`
			`self.llm_sn_lock = threading.Lock() #`
			`#temperature设置`
			`#DS------代码生成/数学解题：0.0 -- 数据抽取/分析：1.0 -- 通用对话：1.3 -- 翻译：1.3 -- 创意类写作：1.5`
			`#腾讯云---`
			`self.temperature = 1.0`
			`if illm_type == 0: #腾讯云`
			`self.api_key = "fGBYaQLHykBOQsFwVrQdIFTsYr8YDtDVDQWFU41mFsmvfNPc"`
			`self.api_url = ""`
			`elif illm_type == 1: #DS`
			`self.api_key ="sk-10360148b465424288218f02c87b0e1b"`
			`self.api_url ="https://api.deepseek.com/v1"`
			`self.model = "deepseek-reasoner" #model=deepseek-reasoner -- R1 model=deepseek-chat --V3`
			`# 创建会话对象 -- 一个任务的LLM必须唯一`
			`self.client = OpenAI(api_key=self.api_key, base_url=self.api_url)`
			`elif illm_type ==3: #GPT`
			`self.api_key =""`
			`self.api_url = ""`
			`self.model = ""`
			`self.client = OpenAI()`

			`self.messages = []`
			`# 初始化阶段和已知信息`
			`current_stage = "信息收集"`
			`known_info = {"url": "www.test.com"}`
			`results = [] # 存储所有任务结果，用于生成报告`

			`#******测试使用，设置slef.message的值`
			`def test_old_message(self,strMessage):`
			`try:`
			`self.messages = json.loads(strMessage)`
			`except json.JSONDecodeError as e:`
			`print(f"JSON解析错误: {str(e)}")`
			`except Exception as e:`
			`print(f"错误: {str(e)}")`

			`# 构建初始提示初始化messages`
			`def build_initial_prompt(self,target,know_info="无",node=None):`
			`if not node:`
			`return None`
V0.1.1 node_tree_0.3 1 month ago			`'''暂时未添加到提示词`
			`核心要求：`
			`- 每次新增节点时，必须在同一响应中为该节点提供测试指令。`
			`- 分批新增节点，每次响应中新增节点不超过3个，确保指令完整。`
			`'''`
V0.1.1 node_tree_0.1 1 month ago			`#根节点初始化message`
			`node.messages = [{"role": "system",`
V0.1.1 node_tree_0.2 1 month ago			`"content":'''`
V0.1.1 node_tree_0.3 1 month ago			`你是一位渗透测试专家,负责动态控制整个渗透测试过程，根据当前测试状态和返回结果，决定下一步测试指令，推动测试前进，直至完成渗透测试。本地程序会执行你的指令进并返回结果。`
			`总体要求`
			`1.以目标系统IP为根节点，每个渗透测试点（如端口、服务、漏洞点）作为子节点，形成树型结构（测试树）；`
			`2.测试树整体数据由本地程序存储，你只需要关注当前节点的测试推进、状态更新(未完成/已完成)及子节点新增；`
			`3.返回两类指令：节点指令和测试指令，以空行间隔，不要包含注释和说明；`
			`4.若无节点修改或新增，节点指令可以为空，但测试指令必须对应已有节点。`
			`决策流程`
			`1. 若当前节点是IP且未进行端口扫描，则执行端口扫描；`
			`2. 若端口扫描发现开放端口，为每个端口新增节点并提供测试指令；`
			`3. 若当前节点是端口且未进行服务扫描，则执行服务扫描；`
			`4. 若服务扫描发现服务版本或漏洞，则新增漏洞测试节点并提供测试指令；`
			`5. 若漏洞利用成功，则根据结果决定是否新增子节点并提供测试指令；`
			`6. 若节点测试无新信息，则更新状态为“已完成”。`
			`节点指令格式`
			`- 新增节点：{\"action\":\"add_node\", \"parent\": \"80端口\", \"node\": \"http://192.168.1.100/index.php?id=1\", \"status\": \"未完成\"}；`
			`- 更新节点未发现漏洞：{\"action\": \"update_status\", \"node\": \"21端口\", \"status\": \"已完成\"}；`
			`- 更新节点发现漏洞：{\"action\": \"update_status\", \"node\": \"21端口\", \"status\": \"已完成\"，\"vulnerability\": {\"name\":\"ftp匿名登录\",\"risk\":\"高\"}}；`
			`测试指令格式`
			- shell指令：```bash-[节点路径](.*?)```包裹，需要避免用户交互；
			- python指令：```python-[节点路径](.*?)```包裹，主函数名为dynamic_fun，需包含错误处理，执行结束后必须返回一个tuple (status, output)，其中status为'success'或'failure'，output为补充输出信息；
			`- [节点路径]为从根节点到目标节点的完整层级描述。`
			`响应示例`
			`{\"action\":\"add_node\", \"parent\": \"192.168.1.100\", \"node\": \"3306端口\", \"status\": \"未完成\"}`
V0.1.1 node_tree_0.2 1 month ago
			```bash-[目标系统->192.168.1.100->3306端口]
V0.1.1 node_tree_0.3 1 month ago			`mysql -u root -p 192.168.1.100`
			```

			`{\"action\":\"add_node\", \"parent\": \"192.168.1.100\", \"node\": \"22端口\", \"status\": \"未完成\"}`

			```python-[目标系统->192.168.1.100->22端口]
V0.1.1 node_tree_0.2 1 month ago			`def dynamic_fun():`
			`try:`
			`result = "扫描完成"`
			`return ("success", result)`
			`except Exception as e:`
			`return ("failure", str(e))`
			```
V0.1.1 node_tree_0.3 1 month ago			`'''}] # 一个messages`
V0.1.1 node_tree_0.1 1 month ago			`user_Prompt = f'''`
V0.1.1 node_tree_0.2 1 month ago			`当前分支路径：目标系统->{target}`
			`当前节点信息：`
			`- 节点名称：{target}`
			`- 节点状态：未完成`
			`- 漏洞类型：未发现`
			`上一步结果：{know_info}`
			`任务：生成下一步渗透测试指令或结束该节点的渗透测试(修改节点状态为：已完成)。`
V0.1.1 node_tree_0.1 1 month ago			`'''`
			`return user_Prompt`

			`def init_data(self,task_id=0):`
			`#初始化LLM数据`
			`self.llm_sn = 0`
			`self.task_id = task_id`
			`self.messages = []`

			`# 调用LLM生成指令`
			`def get_llm_instruction(self,prompt,th_DBM,node):`
			`'''`
			`1.由于大模型API不记录用户请求的上下文，一个任务的LLM不能并发！`
			`:param prompt:用户本次输入的内容`
			`:return: instr_list`
			`'''`
			`#添加本次输入入该节点的message队列`
			`message = {"role":"user","content":prompt}`
			`node.messages.append(message)`

			`#提交LLM`
			`post_time = get_local_timestr()`
			`response = self.client.chat.completions.create(`
			`model=self.model,`
V0.1.1 node_tree_0.2 1 month ago			`messages = node.messages`
V0.1.1 node_tree_0.1 1 month ago			`)`

			`#LLM返回结果处理`
			`reasoning_content = ""`
			`content = ""`
			`#LLM返回处理`
			`if self.model == "deepseek-reasoner":`
			`#返回错误码：DS-https://api-docs.deepseek.com/zh-cn/quick_start/error_codes`
			`reasoning_content = response.choices[0].message.reasoning_content #推理过程`
			`print(reasoning_content)`
			`content = response.choices[0].message.content #推理内容`
			`print(content)`
			`# 记录llm历史信息`
			`node.messages.append({'role': 'assistant', 'content': content})`
			`elif self.model == "deepseek-chat":`
			`content = response.choices[0].message`
			`# 记录llm历史信息`
			`node.messages.append(content)`
			`else:`
			`self.logger.error("处理到未预设的模型！")`
			`return None`

			`#LLM记录存数据库`
			`node.llm_sn += 1`
			`bres = th_DBM.insert_llm(self.task_id,prompt,reasoning_content,content,post_time,node)`
			`if not bres:`
			`self.logger.error(f"{node.name}-llm入库失败！")`

			`#需要对指令进行提取`
V0.1.1 node_tree_0.2 1 month ago			`node_cmds,commands = self.fetch_instruction(content,node)`
V0.1.1 node_tree_0.1 1 month ago
V0.1.1 node_tree_0.2 1 month ago			`return node_cmds,commands`
V0.1.1 node_tree_0.1 1 month ago
			`def fetch_instruction(self,response_text,node):`
			`'''`
V0.1.1 node_tree_0.2 1 month ago			`***该函数很重要，需要一定的容错能力，解析LLM返回内容***`
			`处理边界：只格式化分析LLM返回内容，指令和节点操作等交其他模块。`
V0.1.1 node_tree_0.1 1 month ago			`节点控制指令`
			`渗透测试指令`
			`提取命令列表，包括：`
			`1. Python 代码块 python[](.*?)`
			2. Shell 命令``bash[](.*?)```
			`:param text: 输入文本`
			`:return: node_cmds,python_blocks,shell_blocks`
			`'''`
			`#针对llm的回复，提取节点操作数据和执行的指令----`
			`# 正则匹配 Python 代码块`
			python_blocks = re.findall(r"```python-(.*?)```", response_text, flags=re.DOTALL)
			`# 处理 Python 代码块，去除空行并格式化`
			`python_blocks = [block.strip() for block in python_blocks]`

			`#正则匹配shell指令`
			shell_blocks = re.findall(f"```bash-(.*?)```", response_text, flags=re.DOTALL)
			`shell_blocks = [block.strip() for block in shell_blocks]`

			`# 按连续的空行拆分`
			`# 移除 Python和bash 代码块`
			text_no_python = re.sub(r"```python.*?```", "PYTHON_BLOCK", response_text, flags=re.DOTALL)
			text = re.sub(r"```bash.*?```", "SHELL_BLOCK", text_no_python, flags=re.DOTALL)

			`# 这里用 \n\s*\n 匹配一个或多个空白行`
			`parts = re.split(r'\n\s*\n', text)`
			`node_cmds = []`
			`commands = []`
			`python_index = 0`
			`shell_index = 0`
			`for part in parts:`
			`part = part.strip()`
			`if not part:`
			`continue`
			`if "PYTHON_BLOCK" in part:`
			`# 还原 Python 代码块`
			`commands.append(f"python_code {python_blocks[python_index]}")`
			`python_index += 1`
			`elif "SHELL_BLOCK" in part:`
			`commands.append(shell_blocks[shell_index])`
			`shell_index +=1`
			`else:`
V0.1.1 node_tree_0.3 1 month ago			`#其他的认为是节点操作指令--指令格式还存在不确定性，需要正则匹配`
			`pattern = re.compile(r'\{.*?\}', re.DOTALL)`
			`# 遍历所有匹配到的 JSON 结构`
			`for match in pattern.findall(part):`
			`try:`
			`node_cmds.append(json.loads(match)) # 解析 JSON 并添加到列表`
			`except json.JSONDecodeError as e:#解析不了的不入队列`
			`print(f"JSON 解析错误: {e}")`
V0.1.1 node_tree_0.2 1 month ago			`return node_cmds,commands`
V0.1.1 node_tree_0.1 1 month ago
V0.1.1 node_tree_0.3 1 month ago			`def llm_error_feedback(self,response_text,node):`
			`'''验证llm返回--错误反馈机制--兼容LLM的不稳定性，需要持续补充`
			`1.验证节点是否都有测试指令返回`
			`'''`
			`pass`


V0.1.1 node_tree_0.1 1 month ago			`def test_llm(self):`
			`with open("../test", "r", encoding="utf-8") as f:`
			`messages = json.load(f)`
			`text = messages[-1]["content"]`
			`list = self.fetch_instruction(text)`
			`for itme in list:`
			`print("***********")`
			`print(itme)`


			`if __name__ == "__main__":`
V0.1.1 node_tree_0.3 1 month ago			`# LM = LLMManager(1)`
			`# LM.test_llm()`
			`tlist1 = []`
			`tlist2 = []`
			`tlist2.append(1)`
			`if not tlist1:`
			`print("list1空")`
			`if not tlist2:`
			`print("list2空")`
			`if tlist2:`
			`print("list2不为空")`
V0.1.1 node_tree_0.1 1 month ago