import base64
import re
import os
import sys

MAGIC_NUMBERS = {
    b"\x50\x4B\x03\x04": "xlsx_zip",
    b"\xFF\xD8\xFF":      "jpeg",
    b"\x89\x50\x4E\x47": "png",
    b"\x25\x50\x44\x46": "pdf",
    b"\x1A\x45\xDF\xA3": "webm_mkv",
}

def find_magic(data):
    for magic, name in MAGIC_NUMBERS.items():
        idx = data.find(magic)
        if idx != -1:
            return idx, name
    return -1, "unknown"

def extract_body(raw_response):
    for sep in (b"\r\n\r\n", b"\n\n"):
        idx = raw_response.find(sep)
        if idx != -1:
            return raw_response[idx + len(sep):]
    return raw_response

def extract_tag(block, tag):
    """<tag ...>내용</tag> 또는 <tag>내용</tag> 에서 내용 추출"""
    pattern = r"<{0}[^>]*>(.*?)</{0}>".format(tag)
    m = re.search(pattern, block, re.DOTALL)
    return m.group(1).strip() if m else ""

def extract_tag_attr(block, tag, attr):
    """<tag attr="값"> 에서 attr 값 추출"""
    pattern = r"<{0}[^>]*{1}=\"([^\"]+)\"[^>]*>".format(tag, attr)
    m = re.search(pattern, block, re.DOTALL | re.IGNORECASE)
    return m.group(1).strip() if m else ""

def strip_cdata(text):
    """<![CDATA[내용]]> 에서 내용만 추출"""
    m = re.match(r"<!\[CDATA\[(.*)\]\]>$", text, re.DOTALL)
    return m.group(1) if m else text

def parse_burp_xml(xml_path):
    with open(xml_path, "rb") as f:
        raw = f.read()

    # BOM 제거
    if raw.startswith(b"\xef\xbb\xbf"):
        raw = raw[3:]

    # NULL 바이트 제거
    raw = raw.replace(b"\x00", b"")

    text = raw.decode("utf-8", errors="replace")

    # <item> 블록 전체를 정규식으로 추출 (DOCTYPE/XML선언 완전 무시)
    item_blocks = re.findall(r"<item>(.*?)</item>", text, re.DOTALL)

    if not item_blocks:
        print("[ERROR] <item> 블록을 찾을 수 없습니다.")
        print("        파일 구조를 확인하세요: 메모장으로 열어서 <item> 태그 존재 여부 확인")
        sys.exit(1)

    items = []
    for idx, block in enumerate(item_blocks):
        # response 태그의 base64 어트리뷰트 확인
        resp_b64_attr = extract_tag_attr(block, "response", "base64")
        resp_b64 = resp_b64_attr.lower() == "true"

        # response 내용 추출
        resp_content = extract_tag(block, "response")
        resp_content = strip_cdata(resp_content)

        items.append({
            "index":    idx,
            "url":      strip_cdata(extract_tag(block, "url")),
            "method":   strip_cdata(extract_tag(block, "method")),
            "status":   strip_cdata(extract_tag(block, "status")),
            "resp_raw": resp_content,
            "resp_b64": resp_b64,
        })

    return items

def process_item(item, output_dir):
    raw = item["resp_raw"]
    if not raw:
        print("  [SKIP] 응답 없음")
        return

    if item["resp_b64"]:
        try:
            decoded = base64.b64decode(raw)
        except Exception as e:
            print("  [ERROR] base64 디코딩 실패: {}".format(e))
            return
    else:
        decoded = raw.encode("latin-1", errors="replace")

    body = extract_body(decoded)

    magic_idx, magic_name = find_magic(body)
    if magic_idx == -1:
        print("  [WARN] 매직넘버 미발견 → 바디 전체 저장")
        payload    = body
        magic_name = "raw"
    else:
        payload    = body[magic_idx:]
        print("  [OK] 매직넘버: {} (offset={})".format(magic_name, magic_idx))

    os.makedirs(output_dir, exist_ok=True)
    base_name = "{:03d}_{}_{}_{}" .format(
        item["index"], item["method"], item["status"], magic_name
    )

    # hex 저장
    hex_path  = os.path.join(output_dir, base_name + ".hex")
    hex_bytes = payload.hex(" ").split(" ")
    try:
        with open(hex_path, "w", encoding="utf-8") as f:
            f.write("# URL    : {}\n".format(item["url"]))
            f.write("# Status : {}\n".format(item["status"]))
            f.write("# Bytes  : {}\n#\n".format(len(payload)))
            for i in range(0, len(hex_bytes), 16):
                f.write(" ".join(hex_bytes[i:i+16]) + "\n")
        print("  → hex : {}".format(hex_path))
    except IOError as e:
        print("  [ERROR] hex 저장 실패: {}".format(e))

    # bin 저장
    bin_path = os.path.join(output_dir, base_name + ".bin")
    try:
        with open(bin_path, "wb") as f:
            f.write(payload)
        print("  → bin : {} ({} bytes)".format(bin_path, len(payload)))
    except IOError as e:
        print("  [ERROR] bin 저장 실패: {}".format(e))

def main():
    if len(sys.argv) < 2:
        print("Usage: python burp_extract.py <burp_save.xml> [output_dir]")
        sys.exit(0)

    xml_path   = sys.argv[1]
    output_dir = sys.argv[2] if len(sys.argv) > 2 else "./extracted"

    if not os.path.exists(xml_path):
        print("[ERROR] 파일 없음: {}".format(xml_path))
        sys.exit(1)

    items = parse_burp_xml(xml_path)
    print("[*] {}개 아이템 파싱 완료\n".format(len(items)))

    for item in items:
        print("[{:03d}] {} {} (HTTP {})".format(
            item["index"], item["method"], item["url"], item["status"]
        ))
        process_item(item, output_dir)
        print()

if __name__ == "__main__":
    main()

+ Recent posts