'''
实现对大模型调用的封装，隔离具体使用的LLM
pip install openai
 export OPENAI_API_KEY="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"
'''
import openai
import json
import re
import os
from openai import OpenAI
from openai import OpenAIError, APIConnectionError, APITimeoutError
from myutils.ConfigManager import myCongif
from myutils.MyTime import get_local_timestr
from myutils.MyLogger_logger import LogHandler

class LLMManager:
    def __init__(self,illm_type):
        self.logger = LogHandler().get_logger("LLMManager")
        self.api_key = None
        self.api_url = None

        #temperature设置
        if illm_type == 0:  #腾讯云
            self.api_key = "fGBYaQLHykBOQsFwVrQdIFTsYr8YDtDVDQWFU41mFsmvfNPc"
            self.api_url = ""
        elif illm_type == 1: #DS
            self.api_key ="sk-10360148b465424288218f02c87b0e1b"
            self.api_url ="https://api.deepseek.com/v1"
            self.model = "deepseek-reasoner"   #model=deepseek-reasoner -- R1 model=deepseek-chat  --V3
        elif illm_type == 2:  #2233.ai
            self.api_key = "sk-J3562ad9aece8fd2855bb495bfa1a852a4e8de8a2a1IOchD"
            self.api_url = "https://api.gptsapi.net/v1"
            self.model = "o3-mini-2025-01-31"
        elif illm_type ==3: #GPT
            # 定义代理服务器地址
            proxy_url =  "http://192.168.3.102:3128"
            os.environ["HTTP_PROXY"] = proxy_url
            os.environ["HTTPS_PROXY"] = proxy_url
            self.api_key ="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"
            self.api_url = "https://api.openai.com/v1"
            self.model = "o3-mini-2025-01-31"
            openai.proxy = proxy_url
            openai.api_key = self.api_key
        elif illm_type ==4:#通义Qwen3
            self.api_key ="sk-48028b85e7604838b5be5bf6a90222cb"
            self.api_url ="https://dashscope.aliyuncs.com/compatible-mode/v1"
            self.model = "qwen3-235b-a22b"
        else:
            self.logger.error("模型参数选择异常！")
            return
        # 创建会话对象 -- 一个任务的LLM必须唯一
        self.client = OpenAI(api_key=self.api_key, base_url=self.api_url)

    '''
    **决策原则**
    - 根据节点类型和状态，优先执行基础测试（如端口扫描、服务扫描）。
    - 仅在发现新信息或漏洞时新增子节点。
    - 确保每个新增节点匹配测试指令。
    '''
    # 初始化messages
    def build_initial_prompt(self,node):
        if not node:
            return
        #根节点初始化message----后续有可能需要为每个LLM生成不同的system msg
        node.parent_messages = [{"role": "system",
        "content":'''
你是一位渗透测试专家，来指导本地程序进行渗透测试，由你负责动态控制整个渗透测试过程，根据当前测试状态和返回结果，决定下一步测试指令，推动测试前进，直至完成渗透测试。
**总体要求**
1.以测试目标为根节点，结合信息收集和测试反馈的结果，以新的测试点作为子节点，逐步规划和推进下一步测试，形成树型结构（测试树），测试点需尽量全面；
2.只有当收到当前节点的所有测试指令的结果，且没有新的测试指令需要执行时，再判断是否需要新增子节点进一步进行验证测试，若没有，则结束该路径的验证；
3.若一次性新增的节点过多，无法为每个节点都匹配测试指令，请优先保障新增测试节点的完整性，若有新增的节点未能匹配测试指令，必须返回未匹配指令的节点列表；
4.生成的指令有两类：节点指令和测试指令，指令之间必须以空行间隔，不能包含注释和说明；
5.本地程序会执行生成的指令，但不具备分析判断和保持会话能力，只会把执行结果返回提交；
6.只有当漏洞验证成功后，才添加该节点的漏洞信息；
7.若无需要处理的节点数据，节点指令可以不生成；
8.若节点已完成测试，测试指令可以不生成。
**测试指令生成准则**
1.可以是dash指令，也可以是python指令，必须按格式要求生成；
2.必须对应已有节点，或同时生成新增节点指令；
3.优先使用覆盖面广成功率高的指令；不要生成重复的指令；
4.若需要多条指令配合测试，请生成对应的python指令，完成闭环返回；
5.避免用户交互，必须要能返回。
**节点指令格式**
- 新增节点：{\"action\":\"add_node\", \"parent\": \"父节点\", \"nodes\": \"节点1，节点2\"}；
- 未匹配指令的节点列表：{\"action\": \"no_instruction\", \"nodes\": \"节点1,节点2\"}；
- 漏洞验证成功：{\"action\": \"find_vul\", \"node\": \"节点\",\"vulnerability\": {\"name\":\"漏洞名称\",\"risk\":\"风险等级(低危/中危/高危)\",\"info\":\"补充信息（没有可为空）\"}}；
- 节点完成测试：{\"action\": \"end_work\", \"node\": \"节点\"}；
**测试指令格式**
- dash指令：```dash-[节点路径]指令内容```包裹，若涉及到多步指令，请生成python指令；
- python指令：```python-[节点路径]指令内容```包裹，主函数名为dynamic_fun，需包含错误处理，必须返回一个tuple(status, output)；
- [节点路径]为从根节点到目标节点的完整层级路径；
**核心要求**
- 指令之间必须要有一个空行；
- 需确保测试指令的节点路径和指令的目标节点一致,例如：针对子节点的测试指令，节点路径不能指向当前节点；
**响应示例**
{\"action\":\"add_node\", \"parent\": \"192.168.1.100\", \"nodes\": \"3306端口,22端口\"}

```dash-[目标系统->192.168.1.100->3306端口]
mysql -u root -p 192.168.1.100
```
'''}]  # 一个messages

    # 调用LLM生成指令
    def get_llm_instruction(self,prompt,node,DataFilter):
        '''
        1.由于大模型API不记录用户请求的上下文，一个任务的LLM不能并发！
        :param prompt:用户本次输入的内容
        :return: instr_list
        '''
        #添加本次输入入该节点的message队列
        message = {"role":"user","content":prompt}
        node.cur_messages.append(message)   #更新节点message

        sendmessage = []
        sendmessage.extend(node.parent_messages)
        sendmessage.extend(node.cur_messages)
        #提交LLM
        #准备请求参数
        params = {
            "model": self.model,
            "messages": sendmessage,
        }
        # 某些模型额外的参数
        stream = False
        if self.model == "o3-mini-2025-01-31":
            params["reasoning_effort"] = "high"
        elif self.model == "qwen3-235b-a22b":
            stream = True
            params["stream"] = stream
            params["extra_body"] = {"enable_thinking": True,"thinking_budget": 3000}

        try:
            # 调用 API
            response = self.client.chat.completions.create(**params)
        except APITimeoutError:
            self.logger.error("LLM API 请求超时")
            return False, "","","", f"调用超时（model={self.model})"
        except APIConnectionError as e:
            self.logger.error(f"网络连接错误: {e}")
            return False, "","", "", "网络连接错误"
        except OpenAIError as e:
            # 包括 400/401/403/500 等各种 API 错误
            self.logger.error(f"LLM API 错误: {e}")
            return False, "","", "", f"API错误: {e}"
        except Exception as e:
            # 兜底，防止意外
            self.logger.exception("调用 LLM 时出现未预期异常")
            return False, "","", "", f"未知错误: {e}"

        reasoning_content = ""
        content = ""
        if stream: #流式模式
            is_answering = False
            for chunk in response:
                if not chunk.choices:
                    continue
                delta = chunk.choices[0].delta
                if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
                    reasoning_content += delta.reasoning_content

                    # 收到content，开始进行回复
                if hasattr(delta, "content") and delta.content:
                    if not is_answering:
                        is_answering = True
                    content += delta.content
        else:
            #LLM返回结果处理
            choice = response.choices[0].message
            #LLM返回处理
            if self.model == "deepseek-reasoner":
                reasoning_content = getattr(choice, "reasoning_content", "")
                content = choice.content
            elif self.model == "o3-mini-2025-01-31" or self.model == "qwen-max-latest":
                content = choice.content
            else:
                self.logger.error("处理到未预设的模型！")
                return False,"","","","处理到未预设的模型！"
        # 记录llm历史信息
        node.cur_messages.append({'role': 'assistant', 'content': content})
        print(content)
        real_con = DataFilter.filter_instruction(content)
        #按格式规定对指令进行提取
        node_cmds,commands = self.fetch_instruction(real_con)
        return True,node_cmds,commands,reasoning_content, content

    def fetch_instruction(self,response_text):
        '''
        *****该函数很重要，需要一定的容错能力，解析LLM返回内容*****
        处理边界：只格式化分析LLM返回内容，指令和节点操作等交其他模块。
        节点控制指令
        渗透测试指令
        提取命令列表，包括：
        1. Python 代码块 python[](.*?)
        2. Shell 命令``dash[](.*?)```
        :param text: 输入文本
        :return: node_cmds,python_blocks,shell_blocks
        '''

        #针对llm的回复，提取节点操作数据和执行的指令----
        # 正则匹配 Python 代码块
        python_blocks = re.findall(r"```python-(.*?)```", response_text, flags=re.DOTALL)
        # 处理 Python 代码块，去除空行并格式化
        python_blocks = [block.strip() for block in python_blocks]

        #正则匹配shell指令
        shell_blocks = re.findall(f"```dash-(.*?)```", response_text, flags=re.DOTALL)
        shell_blocks = [block.strip() for block in shell_blocks]

        # 按连续的空行拆分
        # 移除 Python和dash 代码块
        text_no_python = re.sub(r"```python.*?```", "PYTHON_BLOCK", response_text, flags=re.DOTALL)
        text = re.sub(r"```dash.*?```", "SHELL_BLOCK", text_no_python, flags=re.DOTALL)

        # 这里用 \n\s*\n 匹配一个或多个空白行
        parts = re.split(r'\n\s*\n', text)
        node_cmds = []
        commands = []
        python_index = 0
        shell_index = 0
        for part in parts:
            part = part.strip()
            if not part:
                continue
            if "PYTHON_BLOCK" in part:
                # 还原 Python 代码块
                commands.append(f"python-code {python_blocks[python_index]}")
                python_index += 1
            elif "SHELL_BLOCK" in part:
                commands.append(shell_blocks[shell_index])
                shell_index +=1
            else:#其他的认为是节点操作指令--指令格式还存在不确定性，需要正则匹配，要求是JSON
                pattern = re.compile(r'\{(?:[^{}]|\{[^{}]*\})*\}')
                # 遍历所有匹配到的 JSON 结构

                # strlines = part.strip('\n')     #按行拆分，避免贪婪模式下，匹配到多行的最后一个}
                # for strline in strlines:
                for match in pattern.findall(part): #正常只能有一个
                    try:
                        node_cmds.append(json.loads(match))  # 解析 JSON 并添加到列表
                    except json.JSONDecodeError as e:#解析不了的不入队列
                        self.logger.error(f"LLM-{part}-JSON 解析错误: {e}")    #这是需不需要人为介入？
        return node_cmds,commands

    def test_llm(self):
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "讲个笑话吧。"}
        ]
        response = self.client.chat.completions.create(
            model=self.model,
            reasoning_effort="medium",
            messages=messages
        )
        print(response)

if __name__ == "__main__":
    llm = LLMManager(3)