import requests
import shlex
import re
import json
from bs4 import BeautifulSoup
from tools.ToolBase import ToolBase
class CurlTool(ToolBase):
# def __init__(self):
# super.__init__()
# self.headers = {}
# self.url = None
# self.verify_ssl = True
#解析指令到requests
def parse_curl_to_requests(self,curl_command):
# Split command preserving quoted strings
parts = shlex.split(curl_command)
if parts[0] != 'curl':
raise ValueError("Command must start with 'curl'")
# Parse curl flags and arguments
i = 1
while i < len(parts):
arg = parts[i]
if arg == '-k' or arg == '--insecure':
self.verify_ssl = False
i += 1
elif arg == '-s' or arg == '--silent':
# Silencing isn't needed for requests, just skip
i += 1
elif arg == '-H' or arg == '--header':
if i + 1 >= len(parts):
raise ValueError("Missing header value after -H")
header_str = parts[i + 1]
header_name, header_value = header_str.split(':', 1)
self.headers[header_name.strip()] = header_value.strip()
i += 2
elif not arg.startswith('-'):
if self.url is None:
self.url = arg
i += 1
else:
i += 1
if self.url is None:
raise ValueError("No URL found in curl command")
#return url, headers, verify_ssl
def validate_instruction(self, instruction_old):
#instruction = instruction_old
#指令过滤
timeout = 0
#添加-i 返回信息头
parts = instruction_old.split()
if 'base64 -d' in instruction_old:
return instruction_old
if '-i' not in parts and '--include' not in parts:
url_index = next((i for i, p in enumerate(parts) if p.startswith(('http://', 'https://'))), None)
if url_index is not None:
# 在URL前插入 -i 参数:ml-citation{ref="1" data="citationList"}
parts.insert(url_index, '-i')
else:
# 无URL时直接在末尾添加
parts.append('-i')
return ' '.join(parts),timeout
def get_ssl_info(self,stderr,stdout):
# --------------------------
# 解释信息的安全意义:
#
# - 如果证书的 Common Name 与请求的 IP 不匹配(如这里的 'crnn.f3322.net'),
# 则可能表明服务隐藏了真实身份或存在配置错误,这在后续攻击中可以作为信息收集的一部分。
#
# - TLS 连接信息(如 TLS1.3 和加密套件)有助于判断是否存在弱加密或旧版协议问题。
#
# - HTTP 状态和 Content-Type 帮助确认返回的是一个合法的 Web 服务,
# 而 HTML Title 暗示了实际运行的是 SoftEther VPN Server,可能存在默认配置或已知漏洞。
#
# 这些信息可以作为进一步探测、漏洞验证和渗透测试的依据。
# --------------------------
# 从 stderr 中提取证书及 TLS 信息
# 提取 Common Name(CN)
cn_match = re.search(r"common name:\s*([^\s]+)", stderr, re.IGNORECASE)
cert_cn = cn_match.group(1) if cn_match else "N/A"
# 提取 TLS 连接信息(例如 TLS1.3 及加密套件)
tls_match = re.search(r"SSL connection using\s+([^\n]+)", stderr, re.IGNORECASE)
tls_info = tls_match.group(1).strip() if tls_match else "N/A"
# 提取 Issuer 信息
issuer_match = re.search(r"issuer:\s*(.+)", stderr, re.IGNORECASE)
issuer_info = issuer_match.group(1).strip() if issuer_match else "N/A"
# 从 stdout 中提取 HTTP 响应头和 HTML 标题
# 分离 HTTP 头部和 body(假设头部与 body 用两个换行符分隔)
parts = stdout.split("\n\n", 1)
headers_part = parts[0]
body_part = parts[1] if len(parts) > 1 else ""
# 从头部中提取状态行和部分常见头部信息
lines = headers_part.splitlines()
http_status = lines[0] if lines else "N/A"
content_type_match = re.search(r"Content-Type:\s*(.*)", headers_part, re.IGNORECASE)
content_type = content_type_match.group(1).strip() if content_type_match else "N/A"
# 使用 BeautifulSoup 提取 HTML
soup = BeautifulSoup(body_part, "html.parser")
html_title = soup.title.string.strip() if soup.title and soup.title.string else "N/A"
# --------------------------
# 输出提取的信息
# print("=== 提取的有用信息 ===")
# print("HTTP 状态行:", http_status)
# print("Content-Type:", content_type)
# print("HTML Title:", html_title)
# print("TLS 连接信息:", tls_info)
# print("证书 Common Name:", cert_cn)
# print("证书 Issuer:", issuer_info)
result = f"HTTP 状态行:{http_status},Content-Type:{content_type},HTML Title:{html_title},TLS 连接信息:{tls_info},证书 Common Name:{cert_cn},证书 Issuer:{issuer_info}"
return result
def get_info_curl(self,instruction,stdout,stderr):
info = {}
# 处理 stdout: 拆分响应头与正文(假设用空行分隔)
parts = re.split(r'\r?\n\r?\n', stdout, maxsplit=1)
if len(parts) == 2:
headers_str, body = parts
else:
# 如果没有拆分成功,可能 stdout 中只有正文,则从 stderr 尝试提取 HTTP 状态行
headers_str = ""
body = stdout
# 如果没有在 stdout 中找到头信息,则尝试从 stderr 中提取(部分信息可能在 stderr 中)
if not headers_str:
header_lines = stderr.splitlines()
else:
header_lines = headers_str.splitlines()
#status_code
if header_lines:
status_line = header_lines[0]
status_match = re.search(r'HTTP/\d+\.\d+\s+(\d+)', status_line)
info['status_code'] = status_match.group(1) if status_match else "Unknown"
else:
info['status_code'] = "No headers found"
#Server
m = re.search(r'^Server:\s*(.+)$', headers_str, re.MULTILINE)
if m:
info["server"] = m.group(1).strip()
#content-type,content-length
content_type = "Not found"
content_length = "Not found"
for line in header_lines:
if line.lower().startswith("content-type:"):
info['content-type'] = line.split(":", 1)[1].strip()
elif line.lower().startswith("content-length:"):
info['content-length'] = line.split(":", 1)[1].strip()
info.setdefault('content-type', "Not found")
info.setdefault('content-length', "Not found")
# 如果内容为 HTML,则使用 BeautifulSoup 提取 标签内容
if "html" in info['content-type'].lower():
try:
soup = BeautifulSoup(body, "html.parser")
if soup.title and soup.title.string:
info['html_title'] = soup.title.string.strip()
else:
info['html_title'] = "Not found"
except Exception as e:
info['html_title'] = f"Error: {e}"
else:
info['html_title'] = "N/A"
#------------正文部分解析------------
if "phpinfo.php" in instruction:
info["configurations"] = {}
info["sensitive_info"] = {}
# 提取PHP版本信息,可以尝试从phpinfo表格中提取
m = re.search(r'PHP Version\s*\s*]*>\s*([\d.]+)\s* | ', body, re.IGNORECASE)
if m:
info["php_version"] = m.group(1).strip()
else:
# 备用方案:在页面中查找 "PHP Version" 后面的数字
m = re.search(r'PHP\s*Version\s*([\d.]+)', body, re.IGNORECASE)
if m:
info["php_version"] = m.group(1).strip()
# 提取配置信息(如allow_url_include, display_errors, file_uploads, open_basedir)
configs = ["allow_url_include", "display_errors", "file_uploads", "open_basedir"]
for key in configs:
# 尝试匹配HTML表格形式:key | value |
regex = re.compile(r']*>\s*' + re.escape(key) + r'\s* | \s*]*>\s*([^<]+?)\s* | ',
re.IGNORECASE)
m = regex.search(body)
if m:
info["configurations"][key] = m.group(1).strip()
# 提取敏感信息,这里以MYSQL_PASSWORD为例
sensitive_keys = ["MYSQL_PASSWORD"]
for key in sensitive_keys:
regex = re.compile(r']*>\s*' + re.escape(key) + r'\s* | \s*]*>\s*([^<]+?)\s* | ',
re.IGNORECASE)
m = regex.search(body)
if m:
info["sensitive_info"][key] = m.group(1).strip()
elif "phpMyAdmin" in instruction:
info["security_info"] = {}
info["login_info"] = {}
# 查找登录表单中用户名、密码字段(例如 name="pma_username" 和 name="pma_password")
m = re.search(r']+name=["\'](pma_username)["\']', body, re.IGNORECASE)
if m:
info["login_info"]["username_field"] = m.group(1).strip()
m = re.search(r']+name=["\'](pma_password)["\']', body, re.IGNORECASE)
if m:
info["login_info"]["password_field"] = m.group(1).strip()
#安全信息
# csrf_protection:尝试查找隐藏域中是否存在 csrf token(例如 name="csrf_token" 或 "token")
m = re.search(r']+name=["\'](csrf_token|token)["\']', stdout, re.IGNORECASE)
info["security_info"]["csrf_protection"] = True if m else False
# httponly_cookie:从响应头中查找 Set-Cookie 行中是否包含 HttpOnly
m = re.search(r'Set-Cookie:.*HttpOnly', stdout, re.IGNORECASE)
info["security_info"]["httponly_cookie"] = True if m else False
# secure_cookie:从响应头中查找 Set-Cookie 行中是否包含 Secure
m = re.search(r'Set-Cookie:.*Secure', stdout, re.IGNORECASE)
info["security_info"]["secure_cookie"] = True if m else False
else: #
#info['body_snippet'] = body[:200] # 前500字符
if stderr:
# 处理 stderr 中的 TLS/证书信息:只提取包含关键字的行
tls_info_lines = []
cert_info_lines = []
for line in stderr.splitlines():
# 过滤与 TLS/SSL 握手、证书相关的信息
if "SSL connection using" in line or "TLS" in line:
tls_info_lines.append(line.strip())
if "certificate" in line.lower():
cert_info_lines.append(line.strip())
info['tls_info'] = tls_info_lines if tls_info_lines else "Not found"
info['certificate_info'] = cert_info_lines if cert_info_lines else "Not found"
#转换成字符串
result = json.dumps(info,ensure_ascii=False)
#print(result)
return result
def analyze_result(self, result,instruction,stderr,stdout):
#指令结果分析
if("-H "in instruction and "Host:" in instruction):# 基于证书域名进行跨站劫持
if "HTTP/1.1 200" in result: # and "