zfsafe/mycode/ControlCenter.py

#自动化测试逻辑规则控制
#统一控制规则  和 渗透测试树的维护
import json
import re
import queue
import time
import threading
import pickle
from mycode.AttackMap import AttackTree
from mycode.AttackMap import TreeNode
from mycode.LLMManager import LLMManager
from myutils.ConfigManager import myCongif  #单一实例
from myutils.MyLogger_logger import LogHandler
from mycode.DBManager import DBManager

class ControlCenter:
    def __init__(self,DBM,TM):
        self.logger = LogHandler().get_logger("ControlCenter")
        self.task_id = None
        self.target = None
        self.attack_tree = None
        self.DBM = DBM
        self.TM = TM
        #LLM对象
        self.LLM = LLMManager(myCongif.get_data("LLM_type"))
        self.llm_quere = queue.Queue()      #提交LLM指令的队列----
        self.max_thread_num = 1             # 控制最大并发指令数量
        self.llmth_list = []                  #llm线程list
        self.brun = False

    def __del__(self):
        self.brun =False
        self.task_id = None
        self.target = None
        self.attack_tree = None
        self.DBM = None

    def init_cc_data(self):
        #一次任务一次数据
        pass

    def get_user_init_info(self):
        '''开始任务初，获取用户设定的基础信息，初始信息可以分为两块：
        1.提交llm的补充信息 和 保留在本地的信息（如工具补充参数等，cookie）
        2.用户可以设置全局，和指定节点（端口）
        3.补充测试节点
        '''
        # ?包括是否对目标进行初始化的信息收集
        return {"已知信息":"无"}

    def start_do(self,target,task_id):
        '''一个新任务的开始'''
        self.task_id = task_id
        self.target = target
        #创建/初始化测试树
        if self.attack_tree:
            self.attack_tree = None #释放
        root_node = TreeNode(target,task_id)
        self.attack_tree = AttackTree(root_node)#创建测试树，同时更新根节点相关内容
        #初始化启动提示信息
        know_info = self.get_user_init_info()
        self.LLM.build_initial_prompt(root_node)
        #提交到待处理队列
        self.put_one_llm_work(know_info,root_node,1)

        # 启动LLM请求提交线程
        self.brun = True
        #启动线程
        ineed_create_num = self.max_thread_num - len(self.llmth_list) #正常应该就是0或者是max_num
        for i in range(ineed_create_num):
            l_th = threading.Thread(target=self.th_llm_worker)
            l_th.start()
            self.llmth_list.append(l_th)

    def put_one_llm_work(self,str_res,node,llm_type):
        '''提交任务到llm_quere'''
        if llm_type == 0:
            self.logger.debug("llm_type不能设置为0")
            return
        node.llm_type = llm_type    #目前处理逻辑中一个node应该只会在一个queue中，且只会有一次记录。所以llm_type复用
        node.res_quere.append(str_res)
        #提交到待处理队列
        self.llm_quere.put(node)

    def get_one_llm_work(self,node):
        '''获取该节点的llm提交数据，会清空type和res_quere'''
        llm_type = node.llm_type
        node.llm_type  = 0  #回归0
        res_list = node.res_quere[:]    #浅拷贝，复制第一层
        node.res_quere.clear()  #清空待处理数据，相当于把原应用关系接触
        return llm_type,res_list

    def restore_one_llm_work(self,node,llm_type,res_list):
        node.llm_type = llm_type
        node.res_quere = res_list

    #llm请求提交线程
    def th_llm_worker(self):#LLM没有修改的全局变量，应该可以共用一个client
        '''
        几个规则--TM的work线程同
        1.线程获取一个节点后，其他线程不能再获取这个节点（遇到被执行的节点，直接放弃执行）--- 加了没办法保存中间结果进行测试
        2.llm返回的指令，只可能是该节点，或者是子节点的，不符合这条规则的都不处理，避免llm处理混乱。
        :return:
        '''
        # 线程的dbm需要一个线程一个
        th_DBM = DBManager()
        th_DBM.connect()
        while self.brun:
            try:
                #节点锁
                node = self.llm_quere.get(block=False)
                self.get_llm_instruction(node,th_DBM)
                #释放锁

                # 暂存状态--测试时使用--限制条件llm工作线程只能未1个
                with open("attack_tree", 'wb') as f:
                    pickle.dump(self.attack_tree, f)

            except queue.Empty:
                self.logger.debug("llm队列中暂时无新的提交任务！")
                time.sleep(30)

    #约束1：一个节点只能同时提交一次，未测试的节点不要重复
    def get_llm_instruction(self,node,DBM):
        user_Prompt = ""
        ext_Prompt = ""
        llm_type, res_list = self.get_one_llm_work(node)
        try:
            res_str = json.dumps(res_list, ensure_ascii=False)
        except TypeError as e:
            self.logger.error(f"{res_list}序列化失败：{e},需要自查程序添加代码！")
            return  # 直接返回
        if llm_type == 0:
            self.logger.error("这里type不应该为0，请自查程序逻辑！")
            return
        #2025-3-20增加了llm_type
        user_Prompt = f'''
当前分支路径：{node.path}
当前节点信息：
- 节点名称：{node.name}
- 节点状态：{node.status}
- 漏洞类型：{node.vul_type}
'''
        if llm_type == 1:  #提交指令执行结果 --- 正常提交
            # 构造本次提交的prompt
            ext_Prompt = f'''
上一步结果：{res_str}
任务：生成下一步渗透测试指令或判断是否完成该节点测试。
'''
        elif llm_type ==2: #llm返回的指令存在问题，需要再次请求返回
            ext_Prompt = f'''
反馈类型：节点指令格式错误
错误信息：{res_str}
任务：请按格式要求重新生成该节点上一次返回中生成的所有指令。
'''
# '''
#         elif llm_type ==4: #未生成节点列表
#             ext_Prompt = f'''
# 反馈类型：需要继续补充信息
# 缺失信息：{res_str}
# 任务：
# 1.请生成这些节点的新增节点指令，并生成对应的测试指令；
# 2.这些节点的父节点为当前节点，请正确生成这些节点的节点路径；
# 3.若节点未能全部新增，必须返回未新增的节点列表
# 4.若有未生成指令的节点，必须返回未生成指令的节点列表。
# '''
        elif llm_type ==5:
            ext_Prompt = f'''
反馈类型：测试指令格式错误
错误信息：{res_str}
任务：请根据格式要求，重新生成该测试指令。
'''
        else:
            self.logger.debug("意外的类型参数")
            return
        if not ext_Prompt:
            self.logger.error("未成功获取本次提交的user_prompt")
            return
        #提交LLM
        user_Prompt = user_Prompt + ext_Prompt
        node_cmds, commands = self.LLM.get_llm_instruction(user_Prompt,DBM, node)  # message要更新
        '''
        对于LLM返回的错误处理机制
        1.验证节点是否都有测试指令返回
        2.LLM的回复开始反复时（有点难判断）
        '''
        # 更新tree
        bok,new_commands = self.tree_manager(node_cmds, node,commands,DBM)
        # 分析指令入对应节点
        if bok: #节点指令若存在错误，测试指令都不处理，需要LLM重新生成
            node_list = self.instr_in_node(new_commands, node)
            # 插入TM的node_queue中,交TM线程处理---除了LLM在不同的请求返回针对同一节点的测试指令，正常业务不会产生两次进队列
            for node in node_list:
                self.TM.node_queue.put(node)

    def verify_node_cmds(self,node_cmds,node):
        '''
        验证节点指令的合规性，持续维护
        :param node_cmds:
        :param node:
        :return: Flase 存在问题， True 合规
        '''
        strerror = ""
        for node_json in node_cmds:
            if "action" not in node_json:
                self.logger.error(f"缺少action节点：{node_json}")
                strerror = {"节点指令错误":f"{node_json}缺少action节点，不符合格式要求!"}
                break
            action = node_json["action"]
            if action == "add_node":
                if "parent" not in node_json or "status" not in node_json or "nodes" not in node_json:
                    strerror = {"节点指令错误": f"{node_json}不符合格式要求,缺少节点！"}
                    break
            elif action == "update_status":
                if "status" not in node_json or "node" not in node_json:
                    strerror = {"节点指令错误": f"{node_json}不符合格式要求,缺少节点！"}
                    break
            elif action =="no_instruction" or action=="no_create":
                if "nodes" not in node_json:
                    strerror = {"节点指令错误": f"{node_json}不符合格式要求,缺少节点！"}
                    break
            else:
                strerror = {"节点指令错误": f"{node_json}不可识别的action值！"}
                break
        if not strerror:
            return True
        #提交一个错误反馈任务
        self.put_one_llm_work(strerror,node,2)
        return False

    def tree_manager(self,node_cmds,node,commands,DBM):
        '''更新渗透测试树
        node_cmds是json-list
        2025-03-22添加commands参数，用于处理LLM对同一个节点返回了测试指令，但还返回了no_instruction节点指令
        '''
        if not node_cmds:    # or len(node_cmds)==0: 正常not判断就可以有没有节点指令
            return True,commands
        #对节点指令进行校验
        if not self.verify_node_cmds(node_cmds,node):
            return False,commands    #节点指令存在问题，终止执行

        #执行节点操作---先执行add_node，怕返回顺序不一直
        residue_node_cmds = []
        for node_json in node_cmds:
            action = node_json["action"]
            if action == "add_node":  # 新增节点
                parent_node_name = node_json["parent"]
                status = node_json["status"]
                node_names = node_json["nodes"].split(',')
                # 新增节点原则上应该都是当前节点增加子节点
                if node.name == parent_node_name or parent_node_name.endswith(node.name):
                    for node_name in node_names:
                        # 判重---遇到过补充未生成指令的节点时，返回了新增这些节点的指令
                        bfind = False
                        for node_child in node.children:
                            if node_child.name == node_name:
                                bfind = True
                                break
                        if not bfind:
                            # 添加节点
                            new_node = TreeNode(node_name, node.task_id, status)
                            node.add_child(new_node)  # message的传递待验证
                elif node.parent.name == parent_node_name or parent_node_name.endswith(node.parent.name):
                    #是添加当前节点的平级节点（当前节点的父节点下添加子节点） --使用2233ai-o3时遇到的情况
                    for node_name in node_names:
                        # 判重---遇到过补充未生成指令的节点时，返回了新增这些节点的指令
                        bfind = False
                        for node_child in node.parent.children:
                            if node_child.name == node_name:
                                bfind = True
                                break
                        if not bfind:
                            # 添加节点
                            new_node = TreeNode(node_name, node.task_id, status)
                            node.parent.add_child(new_node)
                else:
                    self.logger.error(f"添加子节点时，遇到父节点名称不一致的，需要介入！！{node_json}")  # 丢弃该节点
            else:#其他指令添加到list
                residue_node_cmds.append(node_json)

        #执行剩余的节点指令--不分先后
        for node_json in residue_node_cmds:
            action = node_json["action"]
            if action == "update_status":
                node_name = node_json["node"]
                status = node_json["status"]
                vul_type = "未发现"
                if node.name == node_name or node_name.endswith(node_name):
                    node.status = status
                    if "vulnerability" in node_json:
                        #{\"name\":\"漏洞名称\",\"risk\":\"风险等级(低危/中危/高危)\",\"info\":\"补充信息（没有可为空）\"}}；
                        vul_type = json.dumps(node_json["vulnerability"],ensure_ascii=False)    #json转字符串
                        try:
                            node.name = node_json["vulnerability"]["name"]
                            node.vul_grade = node_json["vulnerability"]["risk"]
                            node.vul_info = node_json["vulnerability"]["info"]
                        except:
                            self.logger.error("漏洞信息错误")
                    node.vul_type = vul_type
                else:
                    str_user = f"遇到不是修改本节点状态的，需要介入！！{node_json}"
                    self.logger.error(str_user)
                    self.need_user_know(str_user,node)
            elif action == "no_instruction":
                #返回的未生成指令的数据进行校验：1.要有数据；2.节点不是当前节点就是子节点
                nodes = []
                node_names = node_json["nodes"].split(',')
                for node_name in node_names:
                    #先判断是否在测试指令中，若在则不提交llm任务，只能接受在一次返回中同一节点有多条测试指令，不允许分次返回
                    bcommand = False
                    for com in commands:
                        if node_name in com:
                            bcommand = True
                            break
                    if bcommand:    #如果存在测试指令，则不把该节点放入补充信息llm任务
                            continue
                    #验证对应节点是否已经创建---本节点或子节点，其他节点不处理（更狠一点就是本节点都不行）
                    if node_name == node.name:
                        nodes.append(node_name)
                        # str_add = "请生成测试指令"
                        # self.put_one_llm_work(str_add,node,1)
                    else:
                        for child_node  in node.children:
                            if child_node.name == node_name:
                                nodes.append(node_name)
                                # str_add = "无"
                                # self.put_one_llm_work(str_add, child_node, 1)
                                break
                if nodes:   #阻塞式，在当前节点提交补充信息，完善节点指令 -- 优势是省token
                    new_commands = self.get_other_instruction(nodes,DBM,node)
                    commands.extend(new_commands)
            elif action == "no_create": #提交人工确认
                nodes = node_json["nodes"]
                if nodes:
                    str_add = {"未新增的节点": nodes}
                    self.logger.debug(str_add)
                    # 提交一个继续反馈任务--继续后续工作 2025-3-25不自动处理
                    # self.put_one_llm_work(str_add, node, 4)
                    # self.logger.debug(f"未新增的节点有：{nodes}")
            else:
                self.logger.error("****不应该执行到这！程序逻辑存在问题！")
        return True,commands

    #阻塞轮询补充指令
    def get_other_instruction(self,nodes,DBM,cur_node):
        res_str = ','.join(nodes)
        new_commands = []
        while res_str:
            self.logger.debug(f"开始针对f{res_str}这些节点请求测试指令")
            user_Prompt = f'''
        当前分支路径：{cur_node.path}
        当前节点信息：
        - 节点名称：{cur_node.name}
        - 节点状态：{cur_node.status}
        - 漏洞类型：{cur_node.vul_type}
        反馈类型：需要补充信息
        缺失信息：针对{res_str}的测试指令
        任务：
        1.请生成这些节点的测试指令；
        2.这些节点的父节点为当前节点，请正确生成这些节点的节点路径；
        3.若还有节点未能生成测试指令，必须返回未生成指令的节点列表。
        '''
            res_str = ""
            node_cmds, commands = self.LLM.get_llm_instruction(user_Prompt, DBM, cur_node)  # message要更新
            #把返回的测试指令进行追加
            new_commands.extend(commands)
            #判断是否还有未添加指令的节点
            for node_json in node_cmds: #正常应该只有一条no_instruction
                if "no_instruction" in node_json and "nodes" in node_json:
                    tmp_nodes = []
                    node_names = node_json["nodes"].split(',')
                    for node_name in node_names:
                        if node_name in nodes:
                            tmp_nodes.append(node_name)
                    res_str = ','.join(tmp_nodes)
                    break
        self.logger.debug("为添加指令的节点，都已完成指令的添加！")
        return new_commands

    def instr_in_node(self,commands,node):
        node_list = []  #一次返回的测试指令
        for command in commands:
            # 使用正则匹配方括号中的node_path（非贪婪模式）
            match = re.search(r'\[(.*?)\]', command)
            if match:
                node_path = match.group(1)
                #'''强制约束，不是本节点或者是子节点的指令不处理'''
                find_node = self.attack_tree.find_node_by_nodepath_parent(node_path,node)
                if find_node:
                    instruction = re.sub(r'\[.*?\]', "", command,count=1,flags=re.DOTALL)
                    find_node.instr_queue.append(instruction)
                    #入输出队列
                    if find_node not in node_list:
                        node_list.append(find_node)
                else:
                    self.logger.error(f"基于节点路径没有找到对应的节点{node_path},父节点都没找到！")#丢弃该指令
            else:
                self.logger.error(f"得到的指令格式不符合规范：{command}")#丢弃该指令---
                #这里对于丢弃指令，有几种方案：
                # 1.直接丢弃不处理，但需要考虑会不会产生节点缺失指令的问题，需要有机制验证节点；------ 需要有个独立线程，节点要加锁--首选待改进方案
                # 2.入当前节点的res_queue,但有可能当前节点没有其他指令，不会触发提交，另外就算提交，会不会产生预设范围外的返回，不确定；
                # 3.独立队列处理
        return node_list

    #需要用户确认的信息--待完善
    def need_user_know(self,strinfo,node):
        pass

    #待修改
    def is_user_instr(self,instr):
        '''
        过滤需要人工确认或手动执行的指令 ---- 待完善
        :param instr:
        :return:
        '''
        #if instr.startswith("curl") or instr.startswith("http") or instr.startswith("wget"):
        if instr.startswith("http") or instr.startswith("wget") or instr.startswith("ssh"):
            return True

    #指令入队列，待修改
    def instr_in_quere(self,instr_list):
        '''
        对于运行需要较长时间的不强求同一批次返回给LLM
        :param instr_list:
        :return:
        '''
        for instr in instr_list:
            if self.is_user_instr(instr):
                self.user_instr.put(instr)
                print(f"需要人工确认的指令{instr}")
            else:
                matched =False
                for prefix in self.long_time_instr:
                    if instr.startswith(prefix):
                        matched =True
                if not matched:
                    with self.lock:
                        self.batch_num += 1  #非耗时指令+1
                        print(f"&&&&&&当前batch_num:{self.batch_num}")
                else:
                    with self.lock:
                        self.long_instr_num +=1 #耗时指令数量+1
                # 指令入队列
                self.instr_queue.append(instr)

    def stop_do(self):
        #清空数据
        self.task_id = None
        self.target = None
        self.attack_tree = None
        #停止llm处理线程
        self.brun =False
        for th in self.llmth_list:
            th.jion()