import base64
import re
import os
import sys
MAGIC_NUMBERS = {
b"\x50\x4B\x03\x04": "xlsx_zip",
b"\xFF\xD8\xFF": "jpeg",
b"\x89\x50\x4E\x47": "png",
b"\x25\x50\x44\x46": "pdf",
b"\x1A\x45\xDF\xA3": "webm_mkv",
}
def find_magic(data):
for magic, name in MAGIC_NUMBERS.items():
idx = data.find(magic)
if idx != -1:
return idx, name
return -1, "unknown"
def extract_body(raw_response):
for sep in (b"\r\n\r\n", b"\n\n"):
idx = raw_response.find(sep)
if idx != -1:
return raw_response[idx + len(sep):]
return raw_response
def extract_tag(block, tag):
"""<tag ...>내용</tag> 또는 <tag>내용</tag> 에서 내용 추출"""
pattern = r"<{0}[^>]*>(.*?)</{0}>".format(tag)
m = re.search(pattern, block, re.DOTALL)
return m.group(1).strip() if m else ""
def extract_tag_attr(block, tag, attr):
"""<tag attr="값"> 에서 attr 값 추출"""
pattern = r"<{0}[^>]*{1}=\"([^\"]+)\"[^>]*>".format(tag, attr)
m = re.search(pattern, block, re.DOTALL | re.IGNORECASE)
return m.group(1).strip() if m else ""
def strip_cdata(text):
"""<![CDATA[내용]]> 에서 내용만 추출"""
m = re.match(r"<!\[CDATA\[(.*)\]\]>$", text, re.DOTALL)
return m.group(1) if m else text
def parse_burp_xml(xml_path):
with open(xml_path, "rb") as f:
raw = f.read()
# BOM 제거
if raw.startswith(b"\xef\xbb\xbf"):
raw = raw[3:]
# NULL 바이트 제거
raw = raw.replace(b"\x00", b"")
text = raw.decode("utf-8", errors="replace")
# <item> 블록 전체를 정규식으로 추출 (DOCTYPE/XML선언 완전 무시)
item_blocks = re.findall(r"<item>(.*?)</item>", text, re.DOTALL)
if not item_blocks:
print("[ERROR] <item> 블록을 찾을 수 없습니다.")
print(" 파일 구조를 확인하세요: 메모장으로 열어서 <item> 태그 존재 여부 확인")
sys.exit(1)
items = []
for idx, block in enumerate(item_blocks):
# response 태그의 base64 어트리뷰트 확인
resp_b64_attr = extract_tag_attr(block, "response", "base64")
resp_b64 = resp_b64_attr.lower() == "true"
# response 내용 추출
resp_content = extract_tag(block, "response")
resp_content = strip_cdata(resp_content)
items.append({
"index": idx,
"url": strip_cdata(extract_tag(block, "url")),
"method": strip_cdata(extract_tag(block, "method")),
"status": strip_cdata(extract_tag(block, "status")),
"resp_raw": resp_content,
"resp_b64": resp_b64,
})
return items
def process_item(item, output_dir):
raw = item["resp_raw"]
if not raw:
print(" [SKIP] 응답 없음")
return
if item["resp_b64"]:
try:
decoded = base64.b64decode(raw)
except Exception as e:
print(" [ERROR] base64 디코딩 실패: {}".format(e))
return
else:
decoded = raw.encode("latin-1", errors="replace")
body = extract_body(decoded)
magic_idx, magic_name = find_magic(body)
if magic_idx == -1:
print(" [WARN] 매직넘버 미발견 → 바디 전체 저장")
payload = body
magic_name = "raw"
else:
payload = body[magic_idx:]
print(" [OK] 매직넘버: {} (offset={})".format(magic_name, magic_idx))
os.makedirs(output_dir, exist_ok=True)
base_name = "{:03d}_{}_{}_{}" .format(
item["index"], item["method"], item["status"], magic_name
)
# hex 저장
hex_path = os.path.join(output_dir, base_name + ".hex")
hex_bytes = payload.hex(" ").split(" ")
try:
with open(hex_path, "w", encoding="utf-8") as f:
f.write("# URL : {}\n".format(item["url"]))
f.write("# Status : {}\n".format(item["status"]))
f.write("# Bytes : {}\n#\n".format(len(payload)))
for i in range(0, len(hex_bytes), 16):
f.write(" ".join(hex_bytes[i:i+16]) + "\n")
print(" → hex : {}".format(hex_path))
except IOError as e:
print(" [ERROR] hex 저장 실패: {}".format(e))
# bin 저장
bin_path = os.path.join(output_dir, base_name + ".bin")
try:
with open(bin_path, "wb") as f:
f.write(payload)
print(" → bin : {} ({} bytes)".format(bin_path, len(payload)))
except IOError as e:
print(" [ERROR] bin 저장 실패: {}".format(e))
def main():
if len(sys.argv) < 2:
print("Usage: python burp_extract.py <burp_save.xml> [output_dir]")
sys.exit(0)
xml_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else "./extracted"
if not os.path.exists(xml_path):
print("[ERROR] 파일 없음: {}".format(xml_path))
sys.exit(1)
items = parse_burp_xml(xml_path)
print("[*] {}개 아이템 파싱 완료\n".format(len(items)))
for item in items:
print("[{:03d}] {} {} (HTTP {})".format(
item["index"], item["method"], item["url"], item["status"]
))
process_item(item, output_dir)
print()
if __name__ == "__main__":
main()ㅛㅛㅛ
2026. 5. 19. 09:34