zfsafe/tools/CurlTool.py


								import requests

								import shlex

								import re

								import json

								from bs4 import BeautifulSoup

								from tools.ToolBase import ToolBase


								class CurlTool(ToolBase):

								    # def __init__(self):

								    #     super.__init__()

								    #     self.headers = {}

								    #     self.url = None

								    #     self.verify_ssl = True


								    #解析指令到requests

								    def parse_curl_to_requests(self,curl_command):

								        # Split command preserving quoted strings

								        parts = shlex.split(curl_command)

								        if parts[0] != 'curl':

								            raise ValueError("Command must start with 'curl'")


								        # Parse curl flags and arguments

								        i = 1

								        while i < len(parts):

								            arg = parts[i]

								            if arg == '-k' or arg == '--insecure':

								                self.verify_ssl = False

								                i += 1

								            elif arg == '-s' or arg == '--silent':

								                # Silencing isn't needed for requests, just skip

								                i += 1

								            elif arg == '-H' or arg == '--header':

								                if i + 1 >= len(parts):

								                    raise ValueError("Missing header value after -H")

								                header_str = parts[i + 1]

								                header_name, header_value = header_str.split(':', 1)

								                self.headers[header_name.strip()] = header_value.strip()

								                i += 2

								            elif not arg.startswith('-'):

								                if self.url is None:

								                    self.url = arg

								                i += 1

								            else:

								                i += 1


								        if self.url is None:

								            raise ValueError("No URL found in curl command")


								        #return url, headers, verify_ssl


								    def validate_instruction(self, instruction_old):

								        #instruction  = instruction_old

								        #指令过滤

								        timeout = 0

								        #添加-i 返回信息头

								        parts = instruction_old.split()

								        if 'base64 -d' in instruction_old:

								            return instruction_old

								        if '-i' not in parts and '--include' not in parts:

								            url_index = next((i for i, p in enumerate(parts) if p.startswith(('http://', 'https://'))), None)

								            if url_index is not None:

								                # 在URL前插入 -i 参数‌:ml-citation{ref="1" data="citationList"}

								                parts.insert(url_index, '-i')

								            else:

								                # 无URL时直接在末尾添加

								                parts.append('-i')

								        return ' '.join(parts),timeout


								    # def execute_instruction(self, instruction_old):

								    #     '''

								    #     执行指令：验证合法性 -> 执行 -> 分析结果

								    #     :param instruction_old:

								    #     :return:

								    #         bool:true-正常返回给大模型，false-结果不返回给大模型

								    #         str:执行的指令

								    #         str:执行指令的结果

								    #     '''

								    #

								    #     # 第一步：验证指令合法性

								    #     instruction = self.validate_instruction(instruction_old)

								    #     if not instruction:

								    #         return False,instruction_old,"该指令暂不执行！"

								    #

								    #     # 第二步：执行指令  --- 基于request使用

								    #     #print(f"执行指令：{instruction}")

								    #     output = ""

								    #

								    #     # 第三步：分析执行结果

								    #     analysis = self.analyze_result(output,instruction)

								    #     #指令和结果入数据库

								    #     #？

								    #     if not analysis:    #analysis为“” 不提交LLM

								    #         return False,instruction,analysis

								    #     return True,instruction, analysis


								    def get_ssl_info(self,stderr,stdout):

								        # --------------------------

								        # 解释信息的安全意义：

								        #

								        # - 如果证书的 Common Name 与请求的 IP 不匹配（如这里的 'crnn.f3322.net'），

								        #   则可能表明服务隐藏了真实身份或存在配置错误，这在后续攻击中可以作为信息收集的一部分。

								        #

								        # - TLS 连接信息（如 TLS1.3 和加密套件）有助于判断是否存在弱加密或旧版协议问题。

								        #

								        # - HTTP 状态和 Content-Type 帮助确认返回的是一个合法的 Web 服务，

								        #   而 HTML Title 暗示了实际运行的是 SoftEther VPN Server，可能存在默认配置或已知漏洞。

								        #

								        # 这些信息可以作为进一步探测、漏洞验证和渗透测试的依据。

								        # --------------------------

								        # 从 stderr 中提取证书及 TLS 信息

								        # 提取 Common Name（CN）

								        cn_match = re.search(r"common name:\s*([^\s]+)", stderr, re.IGNORECASE)

								        cert_cn = cn_match.group(1) if cn_match else "N/A"


								        # 提取 TLS 连接信息（例如 TLS1.3 及加密套件）

								        tls_match = re.search(r"SSL connection using\s+([^\n]+)", stderr, re.IGNORECASE)

								        tls_info = tls_match.group(1).strip() if tls_match else "N/A"


								        # 提取 Issuer 信息

								        issuer_match = re.search(r"issuer:\s*(.+)", stderr, re.IGNORECASE)

								        issuer_info = issuer_match.group(1).strip() if issuer_match else "N/A"


								        # 从 stdout 中提取 HTTP 响应头和 HTML 标题

								        # 分离 HTTP 头部和 body（假设头部与 body 用两个换行符分隔）

								        parts = stdout.split("\n\n", 1)

								        headers_part = parts[0]

								        body_part = parts[1] if len(parts) > 1 else ""


								        # 从头部中提取状态行和部分常见头部信息

								        lines = headers_part.splitlines()

								        http_status = lines[0] if lines else "N/A"

								        content_type_match = re.search(r"Content-Type:\s*(.*)", headers_part, re.IGNORECASE)

								        content_type = content_type_match.group(1).strip() if content_type_match else "N/A"


								        # 使用 BeautifulSoup 提取 HTML <title>

								        soup = BeautifulSoup(body_part, "html.parser")

								        html_title = soup.title.string.strip() if soup.title and soup.title.string else "N/A"


								        # --------------------------

								        # 输出提取的信息

								        # print("=== 提取的有用信息 ===")

								        # print("HTTP 状态行:", http_status)

								        # print("Content-Type:", content_type)

								        # print("HTML Title:", html_title)

								        # print("TLS 连接信息:", tls_info)

								        # print("证书 Common Name:", cert_cn)

								        # print("证书 Issuer:", issuer_info)

								        result = f"HTTP 状态行:{http_status},Content-Type:{content_type},HTML Title:{html_title},TLS 连接信息:{tls_info},证书 Common Name:{cert_cn},证书 Issuer:{issuer_info}"

								        return result


								    def get_info_xpost(self,stdout,stderr):

								        """

								            从 subprocess.run 执行 curl 后的结果中提取关键信息：

								              - HTTP 状态码

								              - 常见响应头（Content-Type, Content-Length）

								              - HTML 页面标题（如果内容为 HTML）

								              - 返回正文的前200字符（body_snippet）

								              - TLS/证书相关信息（从详细调试信息 stderr 中提取）


								            对于未匹配到的信息，返回“Not found”或空字符串。

								            """

								        info = {}


								        # 处理 stdout: 拆分响应头与正文（假设用空行分隔）

								        parts = re.split(r'\r?\n\r?\n', stdout, maxsplit=1)

								        #***************解析方式一

								        # headers_str = parts[0] if parts else ""

								        # body = parts[1] if len(parts) > 1 else ""

								        #

								        # # 提取 HTTP 状态码（从响应头第一行中获取，例如 "HTTP/1.1 202 OK"）

								        # header_lines = headers_str.splitlines()

								        # ***************解析方式二

								        if len(parts) == 2:

								            headers_str, body = parts

								        else:

								            # 如果没有拆分成功，可能 stdout 中只有正文，则从 stderr 尝试提取 HTTP 状态行

								            headers_str = ""

								            body = stdout


								            # 如果没有在 stdout 中找到头信息，则尝试从 stderr 中提取（部分信息可能在 stderr 中）

								        if not headers_str:

								            header_lines = stderr.splitlines()

								        else:

								            header_lines = headers_str.splitlines()

								        #**************************


								        if header_lines:

								            status_line = header_lines[0]

								            status_match = re.search(r'HTTP/\d+\.\d+\s+(\d+)', status_line)

								            info['status_code'] = status_match.group(1) if status_match else "Unknown"

								        else:

								            info['status_code'] = "No headers found"


								        # 提取常见响应头

								        content_type = "Not found"

								        content_length = "Not found"

								        for line in header_lines:

								            if line.lower().startswith("content-type:"):

								                info['content_type'] = line.split(":", 1)[1].strip()

								            elif line.lower().startswith("content-length:"):

								                info['content_length'] = line.split(":", 1)[1].strip()

								        # 如果未匹配到，则设置默认值

								        info.setdefault('content_type', "Not found")

								        info.setdefault('content_length', "Not found")


								        # 如果内容为 HTML，则使用 BeautifulSoup 提取 <title> 标签内容

								        if "html" in info['content_type'].lower():

								            try:

								                soup = BeautifulSoup(body, "html.parser")

								                if soup.title and soup.title.string:

								                    info['html_title'] = soup.title.string.strip()

								                else:

								                    info['html_title'] = "Not found"

								            except Exception as e:

								                info['html_title'] = f"Error: {e}"

								        else:

								            info['html_title'] = "N/A"


								        # 保存部分正文内容，便于后续分析

								        info['body_snippet'] = body[:200]  # 前500字符


								        # 处理 stderr 中的 TLS/证书信息：只提取包含关键字的行

								        tls_info_lines = []

								        cert_info_lines = []

								        for line in stderr.splitlines():

								            # 过滤与 TLS/SSL 握手、证书相关的信息

								            if "SSL connection using" in line or "TLS" in line:

								                tls_info_lines.append(line.strip())

								            if "certificate" in line.lower():

								                cert_info_lines.append(line.strip())

								        info['tls_info'] = tls_info_lines if tls_info_lines else "Not found"

								        info['certificate_info'] = cert_info_lines if cert_info_lines else "Not found"


								        # 可选：保留完整的 verbose 信息以便后续分析

								        #info['verbose'] = stderr

								        #转换成字符串

								        result = json.dumps(info)

								        return result


								    def analyze_result(self, result,instruction,stderr,stdout):

								        #指令结果分析

								        if("-H "in instruction and "Host:" in instruction):# 基于证书域名进行跨站劫持

								            if "HTTP/1.1 200" in result:    # and "<urlset" in result:

								                #result = "存在问题：成功通过 Host 头获取 sitemap 内容。"

								                result = "返回200，存在问题"   #要进一步分析返回内容

								            else:

								                result = "未检测到问题"

								        elif("-H "in instruction and "Range:" in instruction):

								            if "HTTP/1.1 200 OK" in result:

								                result = "正常返回了200 OK页面"

								            elif "HTTP/1.1 416" in result:

								                 if "Content-Range: bytes" in result:

								                     result = "返回 416 且 Content-Range 正常：服务器对 Range 请求处理正确。"

								                 elif "Content-Range:" in result:

								                     result = "返回 416 但缺少或异常 Content-Range 头"

								            else:#"返回其他状态码（", response.status_code, "）：需要进一步分析。"

								                result = "服务器对Range请求处理正确"

								        elif("-H "in instruction and "Referer:" in instruction):

								            if "HTTP/1.1 200" in result:

								                result="该漏洞无法利用"

								            else: #保留原结果

								                pass

								        elif("resource=config.php" in instruction):

								            if "base64: 无效的输入" in result:

								                result="该漏洞无法利用"

								        elif("Date:" in instruction):  #保留原结果

								            print("")

								        elif("-kv https://" in instruction or "-vk https://"  in instruction):

								            result = self.get_ssl_info(stderr,stdout)

								        elif("-X POST " in instruction):

								            result = self.get_info_xpost(stdout,stderr)

								        elif("-v " in instruction): #curl -v http://192.168.204.137:8180/manager/html --user admin:admin 常规解析curl返回内容

								            result = self.get_info_xpost(stdout,stderr)

								        else:   #非处理命令的结果，暂时不提交LLM

								            result =""

								        return result


								if __name__ =="__main__":

								    import subprocess

								    CT = CurlTool()

								    strinstruction = "curl -kv -X POST -d \"username=admin&password=admin\" https://58.216.217.70/vpn/index.html --connect-timeout 10"

								    instruction,time_out = CT.validate_instruction(strinstruction)

								    if instruction:

								        result = subprocess.run(instruction, shell=True, capture_output=True, text=True)

								        res = CT.analyze_result(result.stdout,instruction,result.stderr,result.stdout)

								        print(res)