From f7a477b87b34464337ba6bc16184d1673140caed Mon Sep 17 00:00:00 2001
From: Kevin Heis <heiskr@users.noreply.github.com>
Date: Thu, 15 May 2025 11:56:40 -0700
Subject: [PATCH] GitHub models REST API docs  (#55288)

Co-authored-by: Roniece Ricardo <33437850+RonRicardo@users.noreply.github.com>
Co-authored-by: Daniel Garman <garman@github.com>
Co-authored-by: SiaraMist <siaramist@github.com>
Co-authored-by: Evan Bonsignori <ebonsignori@github.com>
---
 .github/workflows/sync-openapi.yml            |  11 +-
 content/rest/index.md                         |   1 +
 content/rest/models/catalog.md                |  17 +
 content/rest/models/index.md                  |  12 +
 content/rest/models/inference.md              |  24 +
 package-lock.json                             |   7 +
 package.json                                  |   1 +
 src/rest/README.md                            |   2 +-
 src/rest/components/RestCodeSamples.tsx       |   9 +-
 src/rest/components/RestOperation.tsx         |  19 +-
 src/rest/components/get-rest-code-samples.ts  |   4 +
 src/rest/data/fpt-2022-11-28/schema.json      | 845 +++++++++++++++++-
 src/rest/scripts/update-files.ts              |  49 +-
 src/rest/scripts/utils/get-body-params.ts     |   2 +-
 src/rest/scripts/utils/get-operations.js      |   6 +-
 .../scripts/utils/inject-models-schema.ts     | 102 +++
 src/rest/scripts/utils/operation.js           |   4 +-
 src/rest/scripts/utils/render-content.ts      |  11 +
 src/rest/scripts/utils/sync.ts                |   8 +-
 19 files changed, 1073 insertions(+), 61 deletions(-)
 create mode 100644 content/rest/models/catalog.md
 create mode 100644 content/rest/models/index.md
 create mode 100644 content/rest/models/inference.md
 create mode 100644 src/rest/scripts/utils/inject-models-schema.ts
 create mode 100644 src/rest/scripts/utils/render-content.ts

diff --git a/.github/workflows/sync-openapi.yml b/.github/workflows/sync-openapi.yml
index bbf7a83119..fda313da70 100644
--- a/.github/workflows/sync-openapi.yml
+++ b/.github/workflows/sync-openapi.yml
@@ -42,6 +42,13 @@ jobs:
           path: rest-api-description
           ref: ${{ inputs.SOURCE_BRANCH }}
 
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          # By default, only the most recent commit of the `main` branch
+          # will be checked out
+          repository: github/models-gateway
+          path: models-gateway
+
       - uses: ./.github/actions/node-npm-setup
 
       - name: Sync the REST, Webhooks, and GitHub Apps schemas
@@ -49,7 +56,9 @@ jobs:
           # Needed for gh
           GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_BASE }}
         run: |
-          npm run sync-rest -- --source-repo rest-api-description --output rest github-apps webhooks rest-redirects
+          npm run sync-rest -- \
+            --source-repos rest-api-description models-gateway \
+            --output rest github-apps webhooks rest-redirects
           git status
           echo "Deleting the cloned github/rest-api-description repo..."
           rm -rf rest-api-description
diff --git a/content/rest/index.md b/content/rest/index.md
index 9ff5868af1..0b23678327 100644
--- a/content/rest/index.md
+++ b/content/rest/index.md
@@ -73,6 +73,7 @@ children:
   - /meta
   - /metrics
   - /migrations
+  - /models
   - /oauth-authorizations
   - /orgs
   - /packages
diff --git a/content/rest/models/catalog.md b/content/rest/models/catalog.md
new file mode 100644
index 0000000000..55ae0944c7
--- /dev/null
+++ b/content/rest/models/catalog.md
@@ -0,0 +1,17 @@
+---
+title: REST API endpoints for models catalog
+shortTitle: Catalog
+intro: Use the REST API to get a list of models available for use, including details like ID, supported input/output modalities, and rate limits.
+versions: # DO NOT MANUALLY EDIT. CHANGES WILL BE OVERWRITTEN BY A 🤖
+  fpt: '*'
+topics:
+  - API
+autogenerated: rest
+allowTitleToDifferFromFilename: true
+---
+
+## About {% data variables.product.prodname_github_models %} catalog
+
+You can use the REST API to explore available models in the {% data variables.product.prodname_github_models %} catalog.
+
+<!-- Content after this section is automatically generated -->
diff --git a/content/rest/models/index.md b/content/rest/models/index.md
new file mode 100644
index 0000000000..4016dde81a
--- /dev/null
+++ b/content/rest/models/index.md
@@ -0,0 +1,12 @@
+---
+title: Models
+topics:
+  - API
+autogenerated: rest
+allowTitleToDifferFromFilename: true
+children:
+  - /catalog
+  - /inference
+versions:
+  fpt: '*'
+---
diff --git a/content/rest/models/inference.md b/content/rest/models/inference.md
new file mode 100644
index 0000000000..fd7d570368
--- /dev/null
+++ b/content/rest/models/inference.md
@@ -0,0 +1,24 @@
+---
+title: REST API endpoints for models inference
+shortTitle: Inference
+intro: Use the REST API to submit a chat completion request to a specified model, with or without organizational attribution.
+versions: # DO NOT MANUALLY EDIT. CHANGES WILL BE OVERWRITTEN BY A 🤖
+  fpt: '*'
+topics:
+  - API
+autogenerated: rest
+allowTitleToDifferFromFilename: true
+---
+
+## About {% data variables.product.prodname_github_models %} inference
+
+You can use the REST API to run inference requests using the {% data variables.product.prodname_github_models %} platform.
+
+The API supports:
+
+* Accessing top models from OpenAI, DeepSeek, Microsoft, Llama, and more.
+* Running chat-based inference requests with full control over sampling and response parameters.
+* Streaming or non-streaming completions.
+* Organizational attribution and usage tracking.
+
+<!-- Content after this section is automatically generated -->
diff --git a/package-lock.json b/package-lock.json
index 531ca6e3ac..d1cd2faef7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -32,6 +32,7 @@
         "cookie-parser": "^1.4.7",
         "cuss": "2.2.0",
         "dayjs": "^1.11.13",
+        "dereference-json-schema": "^0.2.1",
         "dotenv": "^16.4.7",
         "escape-string-regexp": "5.0.0",
         "express": "4.21.2",
@@ -6713,6 +6714,12 @@
         "node": ">=6"
       }
     },
+    "node_modules/dereference-json-schema": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/dereference-json-schema/-/dereference-json-schema-0.2.1.tgz",
+      "integrity": "sha512-uzJsrg225owJyRQ8FNTPHIuBOdSzIZlHhss9u6W8mp7jJldHqGuLv9cULagP/E26QVJDnjtG8U7Dw139mM1ydA==",
+      "license": "MIT"
+    },
     "node_modules/destroy": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
diff --git a/package.json b/package.json
index aa728bcc00..7b8b238e5d 100644
--- a/package.json
+++ b/package.json
@@ -266,6 +266,7 @@
     "cookie-parser": "^1.4.7",
     "cuss": "2.2.0",
     "dayjs": "^1.11.13",
+    "dereference-json-schema": "^0.2.1",
     "dotenv": "^16.4.7",
     "escape-string-regexp": "5.0.0",
     "express": "4.21.2",
diff --git a/src/rest/README.md b/src/rest/README.md
index 9078acdea5..8ec595d73b 100644
--- a/src/rest/README.md
+++ b/src/rest/README.md
@@ -36,7 +36,7 @@ To run the REST pipeline locally:
 
 1. Clone the [`github/rest-api-description`](https://github.com/github/rest-api-description) repository inside your local `docs-internal` repository. 
 1. Set a `GITHUB_TOKEN` in your `.env` with (classic) `repo` scopes & enable SSO for the github org. 
-1. Run `npm run sync-rest -- -s rest-api-description -o rest`. Note, by default `-o rest` is specified, so you can omit it.
+1. Run `npm run sync-rest -- -s rest-api-description models-gateway -o rest`. Note, by default `-o rest` is specified, so you can omit it.
 
 ## About this directory
 
diff --git a/src/rest/components/RestCodeSamples.tsx b/src/rest/components/RestCodeSamples.tsx
index 46ea76f516..5f3fb0ae89 100644
--- a/src/rest/components/RestCodeSamples.tsx
+++ b/src/rest/components/RestCodeSamples.tsx
@@ -76,8 +76,13 @@ export function RestCodeSamples({ operation, slug, heading }: Props) {
   // Menu options for the language selector
   const languageSelectOptions: CodeSampleKeys[] = [CodeSampleKeys.curl]
 
-  // Management Console and GHES Manage API operations are not supported by Octokit
-  if (operation.subcategory !== 'management-console' && operation.subcategory !== 'manage-ghes') {
+  // Management Console, GHES Manage API, and GitHub Models
+  // operations are not supported by Octokit
+  if (
+    operation.category !== 'models' &&
+    operation.subcategory !== 'management-console' &&
+    operation.subcategory !== 'manage-ghes'
+  ) {
     languageSelectOptions.push(CodeSampleKeys.javascript)
 
     // Not all examples support the GH CLI language option. If any of
diff --git a/src/rest/components/RestOperation.tsx b/src/rest/components/RestOperation.tsx
index 6d710d03b6..8f580cdf67 100644
--- a/src/rest/components/RestOperation.tsx
+++ b/src/rest/components/RestOperation.tsx
@@ -26,6 +26,13 @@ const DEFAULT_ACCEPT_HEADER = {
   isRequired: false,
 }
 
+const REQUIRED_CONTENT_TYPE_HEADER = {
+  name: 'content-type',
+  type: 'string',
+  description: `<p>Setting to <code>application/json</code> is required.</p>`,
+  isRequired: true,
+}
+
 export function RestOperation({ operation }: Props) {
   const titleSlug = slug(operation.title)
   const { t } = useTranslation('rest_reference')
@@ -34,11 +41,13 @@ export function RestOperation({ operation }: Props) {
   const headers =
     operation.subcategory === 'management-console' || operation.subcategory === 'manage-ghes'
       ? []
-      : [DEFAULT_ACCEPT_HEADER]
-  const numPreviews = operation.previews.length
-  const hasStatusCodes = operation.statusCodes.length > 0
-  const hasCodeSamples = operation.codeExamples.length > 0
-  const hasParameters = operation.parameters.length > 0 || operation.bodyParameters.length > 0
+      : operation.subcategory === 'inference'
+        ? [REQUIRED_CONTENT_TYPE_HEADER, DEFAULT_ACCEPT_HEADER]
+        : [DEFAULT_ACCEPT_HEADER]
+  const numPreviews = operation.previews?.length || 0
+  const hasStatusCodes = operation.statusCodes?.length > 0
+  const hasCodeSamples = operation.codeExamples?.length > 0
+  const hasParameters = operation.parameters?.length > 0 || operation.bodyParameters?.length > 0
 
   const anchorRef = useRef<null | HTMLDivElement>(null)
 
diff --git a/src/rest/components/get-rest-code-samples.ts b/src/rest/components/get-rest-code-samples.ts
index 3b684e920d..9f566effbf 100644
--- a/src/rest/components/get-rest-code-samples.ts
+++ b/src/rest/components/get-rest-code-samples.ts
@@ -49,6 +49,10 @@ export function getShellExample(
     }
   }
 
+  if (operation.subcategory === 'inference') {
+    contentTypeHeader = '-H "Content-Type: application/json"'
+  }
+
   let requestPath = codeSample?.request?.parameters
     ? parseTemplate(operation.requestPath).expand(codeSample.request.parameters)
     : operation.requestPath
diff --git a/src/rest/data/fpt-2022-11-28/schema.json b/src/rest/data/fpt-2022-11-28/schema.json
index f75b26989f..a8d1db4a63 100644
--- a/src/rest/data/fpt-2022-11-28/schema.json
+++ b/src/rest/data/fpt-2022-11-28/schema.json
@@ -112096,8 +112096,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -115254,8 +115254,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -119454,8 +119454,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "201",
@@ -224869,8 +224869,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -224943,8 +224943,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -226462,8 +226462,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -226532,8 +226532,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -237364,8 +237364,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -254267,8 +254267,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -254565,8 +254565,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -257342,8 +257342,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -274694,8 +274694,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -275034,8 +275034,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -276385,8 +276385,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -276963,8 +276963,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "201",
@@ -277030,8 +277030,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -277162,8 +277162,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -280660,8 +280660,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -280782,8 +280782,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "201",
@@ -342829,8 +342829,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -373319,8 +373319,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -389396,6 +389396,793 @@
       }
     ]
   },
+  "models": {
+    "catalog": [
+      {
+        "serverUrl": "https://models.github.ai",
+        "verb": "get",
+        "requestPath": "/catalog/models",
+        "title": "List all models",
+        "category": "models",
+        "subcategory": "catalog",
+        "parameters": [],
+        "bodyParameters": [],
+        "codeExamples": [
+          {
+            "key": "default",
+            "request": {
+              "description": "Example",
+              "acceptHeader": "application/vnd.github.v3+json"
+            },
+            "response": {
+              "statusCode": "200",
+              "contentType": "application/json",
+              "description": "",
+              "example": [
+                {
+                  "id": "openai/gpt-4.1",
+                  "name": "OpenAI GPT-4.1",
+                  "publisher": "OpenAI",
+                  "summary": "gpt-4.1 outperforms gpt-4o across the board, with major gains in coding, instruction following, and long-context understanding",
+                  "rate_limit_tier": "high",
+                  "supported_input_modalities": [
+                    "text",
+                    "image",
+                    "audio"
+                  ],
+                  "supported_output_modalities": [
+                    "text"
+                  ],
+                  "tags": [
+                    "multipurpose",
+                    "multilingual",
+                    "multimodal"
+                  ]
+                }
+              ],
+              "schema": {
+                "type": "array",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The unique identifier for the model"
+                    },
+                    "name": {
+                      "type": "string",
+                      "description": "The name of the model"
+                    },
+                    "publisher": {
+                      "type": "string",
+                      "description": "The publisher of the model"
+                    },
+                    "summary": {
+                      "type": "string",
+                      "description": "A brief summary of the model's capabilities"
+                    },
+                    "rate_limit_tier": {
+                      "type": "string",
+                      "description": "The rate limit tier for the model"
+                    },
+                    "tags": {
+                      "type": "array",
+                      "description": "A list of tags associated with the model",
+                      "items": {
+                        "type": "string"
+                      }
+                    },
+                    "supported_input_modalities": {
+                      "type": "array",
+                      "description": "A list of input modalities supported by the model",
+                      "items": {
+                        "type": "string",
+                        "description": "An input modality supported by the model"
+                      }
+                    },
+                    "supported_output_modalities": {
+                      "type": "array",
+                      "description": "A list of output modalities supported by the model",
+                      "items": {
+                        "type": "string",
+                        "description": "An output modality supported by the model"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        ],
+        "previews": [],
+        "descriptionHTML": "<p>Get a list of models available for use, including details like supported input/output modalities,\npublisher, and rate limits.</p>",
+        "statusCodes": [
+          {
+            "httpStatusCode": "200",
+            "description": "<p>OK</p>"
+          }
+        ]
+      }
+    ],
+    "inference": [
+      {
+        "serverUrl": "https://models.github.ai",
+        "verb": "post",
+        "requestPath": "/orgs/{org}/inference/chat/completions",
+        "title": "Run an inference request attributed to an organization",
+        "category": "models",
+        "subcategory": "inference",
+        "parameters": [
+          {
+            "in": "query",
+            "required": false,
+            "name": "api-version",
+            "description": "<p>The API version to use. Optional, but required for some features.</p>",
+            "schema": {
+              "type": "string"
+            },
+            "example": "2024-05-01-preview"
+          },
+          {
+            "in": "path",
+            "name": "org",
+            "required": true,
+            "description": "<p>The organization login associated with the organization to which the request is to be attributed.</p>",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "bodyParameters": [
+          {
+            "type": "string",
+            "name": "model",
+            "in": "body",
+            "description": "<p>ID of the specific model to use for the request. The model ID should be in the format of {publisher}/{model_name} where \"openai/gpt-4.1\" is an example of a model ID. You can find supported models in the catalog/models endpoint.</p>",
+            "isRequired": true
+          },
+          {
+            "type": "array of objects",
+            "name": "messages",
+            "in": "body",
+            "description": "<p>The collection of context messages associated with this chat completion request. Typical usage begins with a chat message for the System role that provides instructions for the behavior of the assistant, followed by alternating messages between the User and Assistant roles.</p>",
+            "isRequired": true,
+            "childParamsGroups": [
+              {
+                "type": "string",
+                "name": "role",
+                "description": "<p>The chat role associated with this message</p>",
+                "isRequired": true,
+                "enum": [
+                  "assistant",
+                  "developer",
+                  "system",
+                  "user"
+                ]
+              },
+              {
+                "type": "string",
+                "name": "content",
+                "description": "<p>The content of the message</p>",
+                "isRequired": true
+              }
+            ]
+          },
+          {
+            "type": "number",
+            "name": "frequency_penalty",
+            "in": "body",
+            "description": "<p>A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. Supported range is [-2, 2].</p>"
+          },
+          {
+            "type": "integer",
+            "name": "max_tokens",
+            "in": "body",
+            "description": "<p>The maximum number of tokens to generate in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length. For example, if your prompt is 100 tokens and you set max_tokens to 50, the API will return a completion with a maximum of 50 tokens.</p>"
+          },
+          {
+            "type": "array of strings",
+            "name": "modalities",
+            "in": "body",
+            "description": "<p>The modalities that the model is allowed to use for the chat completions response. The default modality is text. Indicating an unsupported modality combination results in a 422 error.\nSupported values are: <code>text</code>, <code>audio</code></p>"
+          },
+          {
+            "type": "number",
+            "name": "presence_penalty",
+            "in": "body",
+            "description": "<p>A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new tokens. Supported range is [-2, 2].</p>"
+          },
+          {
+            "type": "object",
+            "name": "response_format",
+            "in": "body",
+            "description": "<p>The desired format for the response.</p>",
+            "childParamsGroups": [
+              {
+                "type": "object",
+                "name": "Object",
+                "description": "",
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "type",
+                    "description": "",
+                    "enum": [
+                      "text",
+                      "json_object"
+                    ]
+                  }
+                ]
+              },
+              {
+                "type": "object",
+                "name": "Schema for structured JSON response",
+                "description": "",
+                "isRequired": [
+                  "type",
+                  "json_schema"
+                ],
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "type",
+                    "description": "<p>The type of the response.</p>",
+                    "isRequired": true,
+                    "enum": [
+                      "json_schema"
+                    ]
+                  },
+                  {
+                    "type": "object",
+                    "name": "json_schema",
+                    "description": "<p>The JSON schema for the response.</p>",
+                    "isRequired": true
+                  }
+                ]
+              }
+            ],
+            "oneOfObject": true
+          },
+          {
+            "type": "integer",
+            "name": "seed",
+            "in": "body",
+            "description": "<p>If specified, the system will make a best effort to sample deterministically such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed.</p>"
+          },
+          {
+            "type": "boolean",
+            "name": "stream",
+            "in": "body",
+            "description": "<p>A value indicating whether chat completions should be streamed for this request.</p>",
+            "default": false
+          },
+          {
+            "type": "object",
+            "name": "stream_options",
+            "in": "body",
+            "description": "<p>Whether to include usage information in the response. Requires stream to be set to true.</p>",
+            "childParamsGroups": [
+              {
+                "type": "boolean",
+                "name": "include_usage",
+                "description": "<p>Whether to include usage information in the response.</p>",
+                "default": false
+              }
+            ]
+          },
+          {
+            "type": "array of strings",
+            "name": "stop",
+            "in": "body",
+            "description": "<p>A collection of textual sequences that will end completion generation.</p>"
+          },
+          {
+            "type": "number",
+            "name": "temperature",
+            "in": "body",
+            "description": "<p>The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completion request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Decimal values are supported.</p>"
+          },
+          {
+            "type": "string",
+            "name": "tool_choice",
+            "in": "body",
+            "description": "<p>If specified, the model will configure which of the provided tools it can use for the chat completions response.</p>",
+            "enum": [
+              "auto",
+              "required",
+              "none"
+            ]
+          },
+          {
+            "type": "array of objects",
+            "name": "tools",
+            "in": "body",
+            "description": "<p>A list of tools the model may request to call. Currently, only functions are supported as a tool. The model may respond with a function call request and provide the input arguments in JSON format for that function.</p>",
+            "childParamsGroups": [
+              {
+                "type": "object",
+                "name": "function",
+                "description": "",
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "name",
+                    "description": "<p>The name of the function to be called.</p>"
+                  },
+                  {
+                    "type": "string",
+                    "name": "description",
+                    "description": "<p>A description of what the function does. The model will use this description when selecting the function and interpreting its parameters.</p>"
+                  },
+                  {
+                    "type": "",
+                    "name": "parameters",
+                    "description": "<p>The parameters the function accepts, described as a JSON Schema object.</p>"
+                  }
+                ]
+              },
+              {
+                "type": "string",
+                "name": "type",
+                "description": "",
+                "enum": [
+                  "function"
+                ]
+              }
+            ]
+          },
+          {
+            "type": "number",
+            "name": "top_p",
+            "in": "body",
+            "description": "<p>An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. As an example, a value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be considered. It is not recommended to modify temperature and top_p for the same request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Decimal values are supported.</p>"
+          }
+        ],
+        "codeExamples": [
+          {
+            "key": "default",
+            "request": {
+              "contentType": "application/json",
+              "description": "Example",
+              "acceptHeader": "application/vnd.github.v3+json",
+              "bodyParameters": {
+                "model": "openai/gpt-4.1",
+                "messages": [
+                  {
+                    "role": "user",
+                    "content": "What is the capital of France?"
+                  }
+                ]
+              },
+              "parameters": {
+                "org": "ORG"
+              }
+            },
+            "response": {
+              "statusCode": "200",
+              "contentType": "application/json",
+              "description": "",
+              "example": {
+                "choices": [
+                  {
+                    "message": {
+                      "content": "The capital of France is Paris.",
+                      "role": "assistant"
+                    }
+                  }
+                ]
+              },
+              "schema": {
+                "type": "object",
+                "oneOf": [
+                  {
+                    "title": "Non Streaming Response",
+                    "description": "A non-streaming response for the inference request.",
+                    "type": "object",
+                    "properties": {
+                      "choices": {
+                        "type": "array",
+                        "items": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "description": "The message associated with the completion.",
+                              "type": "object",
+                              "properties": {
+                                "content": {
+                                  "description": "The content of the message.",
+                                  "type": "string"
+                                },
+                                "role": {
+                                  "description": "The role of the message.",
+                                  "type": "string"
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  {
+                    "title": "Streaming Response",
+                    "description": "A streaming response for the inference request",
+                    "type": "object",
+                    "properties": {
+                      "data": {
+                        "description": "Some details about the response.",
+                        "type": "object",
+                        "properties": {
+                          "choices": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "delta": {
+                                  "description": "Container for the content of the streamed response.",
+                                  "type": "object",
+                                  "properties": {
+                                    "content": {
+                                      "description": "The content of the streamed response.",
+                                      "type": "string"
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                ]
+              }
+            }
+          }
+        ],
+        "previews": [],
+        "descriptionHTML": "<p>This endpoint allows you to run an inference request attributed to a specific organization. You must be a member of the organization to use this endpoint.\nThe request body should contain the model ID and the messages for the chat completion request.\nThe response will include either a non-streaming or streaming response based on the request parameters.</p>",
+        "statusCodes": [
+          {
+            "httpStatusCode": "200",
+            "description": "<p>OK</p>"
+          }
+        ]
+      },
+      {
+        "serverUrl": "https://models.github.ai",
+        "verb": "post",
+        "requestPath": "/inference/chat/completions",
+        "title": "Run an inference request",
+        "category": "models",
+        "subcategory": "inference",
+        "parameters": [
+          {
+            "in": "query",
+            "required": false,
+            "name": "api-version",
+            "description": "<p>The API version to use. Optional, but required for some features.</p>",
+            "schema": {
+              "type": "string"
+            },
+            "example": "2024-05-01-preview"
+          }
+        ],
+        "bodyParameters": [
+          {
+            "type": "string",
+            "name": "model",
+            "in": "body",
+            "description": "<p>ID of the specific model to use for the request. The model ID should be in the format of {publisher}/{model_name} where \"openai/gpt-4.1\" is an example of a model ID. You can find supported models in the catalog/models endpoint.</p>",
+            "isRequired": true
+          },
+          {
+            "type": "array of objects",
+            "name": "messages",
+            "in": "body",
+            "description": "<p>The collection of context messages associated with this chat completion request. Typical usage begins with a chat message for the System role that provides instructions for the behavior of the assistant, followed by alternating messages between the User and Assistant roles.</p>",
+            "isRequired": true,
+            "childParamsGroups": [
+              {
+                "type": "string",
+                "name": "role",
+                "description": "<p>The chat role associated with this message</p>",
+                "isRequired": true,
+                "enum": [
+                  "assistant",
+                  "developer",
+                  "system",
+                  "user"
+                ]
+              },
+              {
+                "type": "string",
+                "name": "content",
+                "description": "<p>The content of the message</p>",
+                "isRequired": true
+              }
+            ]
+          },
+          {
+            "type": "number",
+            "name": "frequency_penalty",
+            "in": "body",
+            "description": "<p>A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. Supported range is [-2, 2].</p>"
+          },
+          {
+            "type": "integer",
+            "name": "max_tokens",
+            "in": "body",
+            "description": "<p>The maximum number of tokens to generate in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length. For example, if your prompt is 100 tokens and you set max_tokens to 50, the API will return a completion with a maximum of 50 tokens.</p>"
+          },
+          {
+            "type": "array of strings",
+            "name": "modalities",
+            "in": "body",
+            "description": "<p>The modalities that the model is allowed to use for the chat completions response. The default modality is text. Indicating an unsupported modality combination results in a 422 error.\nSupported values are: <code>text</code>, <code>audio</code></p>"
+          },
+          {
+            "type": "number",
+            "name": "presence_penalty",
+            "in": "body",
+            "description": "<p>A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new tokens. Supported range is [-2, 2].</p>"
+          },
+          {
+            "type": "object",
+            "name": "response_format",
+            "in": "body",
+            "description": "<p>The desired format for the response.</p>",
+            "childParamsGroups": [
+              {
+                "type": "object",
+                "name": "Object",
+                "description": "",
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "type",
+                    "description": "",
+                    "enum": [
+                      "text",
+                      "json_object"
+                    ]
+                  }
+                ]
+              },
+              {
+                "type": "object",
+                "name": "Schema for structured JSON response",
+                "description": "",
+                "isRequired": [
+                  "type",
+                  "json_schema"
+                ],
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "type",
+                    "description": "<p>The type of the response.</p>",
+                    "isRequired": true,
+                    "enum": [
+                      "json_schema"
+                    ]
+                  },
+                  {
+                    "type": "object",
+                    "name": "json_schema",
+                    "description": "<p>The JSON schema for the response.</p>",
+                    "isRequired": true
+                  }
+                ]
+              }
+            ],
+            "oneOfObject": true
+          },
+          {
+            "type": "integer",
+            "name": "seed",
+            "in": "body",
+            "description": "<p>If specified, the system will make a best effort to sample deterministically such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed.</p>"
+          },
+          {
+            "type": "boolean",
+            "name": "stream",
+            "in": "body",
+            "description": "<p>A value indicating whether chat completions should be streamed for this request.</p>",
+            "default": false
+          },
+          {
+            "type": "object",
+            "name": "stream_options",
+            "in": "body",
+            "description": "<p>Whether to include usage information in the response. Requires stream to be set to true.</p>",
+            "childParamsGroups": [
+              {
+                "type": "boolean",
+                "name": "include_usage",
+                "description": "<p>Whether to include usage information in the response.</p>",
+                "default": false
+              }
+            ]
+          },
+          {
+            "type": "array of strings",
+            "name": "stop",
+            "in": "body",
+            "description": "<p>A collection of textual sequences that will end completion generation.</p>"
+          },
+          {
+            "type": "number",
+            "name": "temperature",
+            "in": "body",
+            "description": "<p>The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completion request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Decimal values are supported.</p>"
+          },
+          {
+            "type": "string",
+            "name": "tool_choice",
+            "in": "body",
+            "description": "<p>If specified, the model will configure which of the provided tools it can use for the chat completions response.</p>",
+            "enum": [
+              "auto",
+              "required",
+              "none"
+            ]
+          },
+          {
+            "type": "array of objects",
+            "name": "tools",
+            "in": "body",
+            "description": "<p>A list of tools the model may request to call. Currently, only functions are supported as a tool. The model may respond with a function call request and provide the input arguments in JSON format for that function.</p>",
+            "childParamsGroups": [
+              {
+                "type": "object",
+                "name": "function",
+                "description": "",
+                "childParamsGroups": [
+                  {
+                    "type": "string",
+                    "name": "name",
+                    "description": "<p>The name of the function to be called.</p>"
+                  },
+                  {
+                    "type": "string",
+                    "name": "description",
+                    "description": "<p>A description of what the function does. The model will use this description when selecting the function and interpreting its parameters.</p>"
+                  },
+                  {
+                    "type": "",
+                    "name": "parameters",
+                    "description": "<p>The parameters the function accepts, described as a JSON Schema object.</p>"
+                  }
+                ]
+              },
+              {
+                "type": "string",
+                "name": "type",
+                "description": "",
+                "enum": [
+                  "function"
+                ]
+              }
+            ]
+          },
+          {
+            "type": "number",
+            "name": "top_p",
+            "in": "body",
+            "description": "<p>An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. As an example, a value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be considered. It is not recommended to modify temperature and top_p for the same request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Decimal values are supported.</p>"
+          }
+        ],
+        "codeExamples": [
+          {
+            "key": "default",
+            "request": {
+              "contentType": "application/json",
+              "description": "Example",
+              "acceptHeader": "application/vnd.github.v3+json",
+              "bodyParameters": {
+                "model": "openai/gpt-4.1",
+                "messages": [
+                  {
+                    "role": "user",
+                    "content": "What is the capital of France?"
+                  }
+                ]
+              }
+            },
+            "response": {
+              "statusCode": "200",
+              "contentType": "application/json",
+              "description": "",
+              "example": {
+                "choices": [
+                  {
+                    "message": {
+                      "content": "The capital of France is Paris.",
+                      "role": "assistant"
+                    }
+                  }
+                ]
+              },
+              "schema": {
+                "type": "object",
+                "oneOf": [
+                  {
+                    "title": "Non Streaming Response",
+                    "description": "A non-streaming response for the inference request.",
+                    "type": "object",
+                    "properties": {
+                      "choices": {
+                        "type": "array",
+                        "items": {
+                          "type": "object",
+                          "properties": {
+                            "message": {
+                              "description": "The message associated with the completion.",
+                              "type": "object",
+                              "properties": {
+                                "content": {
+                                  "description": "The content of the message.",
+                                  "type": "string"
+                                },
+                                "role": {
+                                  "description": "The role of the message.",
+                                  "type": "string"
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  {
+                    "title": "Streaming Response",
+                    "description": "A streaming response for the inference request",
+                    "type": "object",
+                    "properties": {
+                      "data": {
+                        "description": "Some details about the response.",
+                        "type": "object",
+                        "properties": {
+                          "choices": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "delta": {
+                                  "description": "Container for the content of the streamed response.",
+                                  "type": "object",
+                                  "properties": {
+                                    "content": {
+                                      "description": "The content of the streamed response.",
+                                      "type": "string"
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                ]
+              }
+            }
+          }
+        ],
+        "previews": [],
+        "descriptionHTML": "<p>This endpoint allows you to run an inference request.</p>",
+        "statusCodes": [
+          {
+            "httpStatusCode": "200",
+            "description": "<p>OK</p>"
+          }
+        ]
+      }
+    ]
+  },
   "orgs": {
     "orgs": [
       {
@@ -497689,8 +498476,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -498071,8 +498858,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -516103,8 +516890,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -516528,8 +517315,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -516638,8 +517425,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -536484,8 +537271,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "200",
@@ -621744,8 +622531,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
@@ -622494,8 +623281,8 @@
             }
           }
         ],
-        "descriptionHTML": "",
         "previews": [],
+        "descriptionHTML": "",
         "statusCodes": [
           {
             "httpStatusCode": "204",
diff --git a/src/rest/scripts/update-files.ts b/src/rest/scripts/update-files.ts
index 5d0ea3643e..67d20cd6bb 100755
--- a/src/rest/scripts/update-files.ts
+++ b/src/rest/scripts/update-files.ts
@@ -21,6 +21,7 @@ import { allVersions } from '@/versions/lib/all-versions'
 import { syncWebhookData } from '../../webhooks/scripts/sync'
 import { syncGitHubAppsData } from '../../github-apps/scripts/sync'
 import { syncRestRedirects } from './utils/get-redirects'
+import { MODELS_GATEWAY_ROOT, injectModelsSchema } from './utils/inject-models-schema'
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
 const TEMP_OPENAPI_DIR = path.join(__dirname, '../../../rest-api-description/openApiTemp')
@@ -45,10 +46,10 @@ program
   )
   .addOption(
     new Option(
-      '-s, --source-repo <repo>',
-      `The source repository to get the dereferenced files from. When the source repo is ${REST_API_DESCRIPTION_ROOT}, the bundler is not run to generate the source dereferenced OpenAPI files because the ${REST_API_DESCRIPTION_ROOT} repo already contains them.`,
+      '-s, --source-repos [repos...]',
+      `The source repositories to get the dereferenced files from. When the source repo is ${REST_API_DESCRIPTION_ROOT}, the bundler is not run to generate the source dereferenced OpenAPI files because the ${REST_API_DESCRIPTION_ROOT} repo already contains them.`,
     )
-      .choices(['github', REST_API_DESCRIPTION_ROOT])
+      .choices(['github', REST_API_DESCRIPTION_ROOT, MODELS_GATEWAY_ROOT])
       .default('github', 'github'),
   )
   .option(
@@ -63,9 +64,12 @@ program
   .option('-n --next', 'Generate the next OpenAPI calendar-date version.')
   .parse(process.argv)
 
-const { versions, includeUnpublished, includeDeprecated, next, output, sourceRepo } = program.opts()
+const { versions, includeUnpublished, includeDeprecated, next, output, sourceRepos } =
+  program.opts()
 
-const sourceRepoDirectory = sourceRepo === 'github' ? GITHUB_REP_DIR : REST_API_DESCRIPTION_ROOT
+const sourceRepoDirectories = sourceRepos.map((sourceRepo: string) =>
+  sourceRepo === 'github' ? GITHUB_REP_DIR : sourceRepo,
+)
 
 main()
 
@@ -77,15 +81,20 @@ async function main() {
 
   // If the source repo is github, this is the local development workflow
   // and the files in github must be bundled and dereferenced first.
-  if (sourceRepo === 'github') {
+  if (sourceRepos.includes('github')) {
     await getBundledFiles()
   }
+  const sourceRepoDirectory = sourceRepos.includes('github')
+    ? GITHUB_REP_DIR
+    : REST_API_DESCRIPTION_ROOT
 
   // When we get the dereferenced OpenAPI files from the open-source
   // rest description repo (REST_API_DESCRIPTION_ROOT), we need to
   // remove any versions that are deprecated because that repo contains
   // all past versions.
-  const sourceDirectory = sourceRepo === 'github' ? TEMP_BUNDLED_OPENAPI_DIR : REST_DESCRIPTION_DIR
+  const sourceDirectory = sourceRepos.includes('github')
+    ? TEMP_BUNDLED_OPENAPI_DIR
+    : REST_DESCRIPTION_DIR
 
   const dereferencedFiles = walk(sourceDirectory, {
     includeBasePath: true,
@@ -103,7 +112,7 @@ async function main() {
   // The REST_API_DESCRIPTION_ROOT repo contains all current and
   // deprecated versions. We need to remove the deprecated versions
   // so that we don't spend time generating data files for them.
-  if (sourceRepo === REST_API_DESCRIPTION_ROOT) {
+  if (sourceRepos.includes(REST_API_DESCRIPTION_ROOT)) {
     const derefDir = await readdir(TEMP_OPENAPI_DIR)
     // TODO: After migrating all-version.js to TypeScript, we can remove the type assertion
     const currentOpenApiVersions = Object.values(allVersions).map(
@@ -118,11 +127,12 @@ async function main() {
     }
   }
   const derefFiles = await readdir(TEMP_OPENAPI_DIR)
+
   const { restSchemas, webhookSchemas } = await getOpenApiSchemaFiles(derefFiles)
 
   if (pipelines.includes('rest')) {
     console.log(`\n▶️  Generating REST data files...\n`)
-    await syncRestData(TEMP_OPENAPI_DIR, restSchemas, sourceRepoDirectory)
+    await syncRestData(TEMP_OPENAPI_DIR, restSchemas, sourceRepoDirectory, injectModelsSchema)
   }
 
   if (pipelines.includes('webhooks')) {
@@ -142,7 +152,7 @@ async function main() {
 
   // If the source repo is REST_API_DESCRIPTION_ROOT, we want to update
   // the pipeline config files with the SHA of the synced commit.
-  if (sourceRepo === REST_API_DESCRIPTION_ROOT) {
+  if (sourceRepos.includes(REST_API_DESCRIPTION_ROOT)) {
     const syncedSha = execSync('git rev-parse HEAD', {
       cwd: REST_API_DESCRIPTION_ROOT,
       encoding: 'utf8',
@@ -161,7 +171,7 @@ async function main() {
   }
 
   console.log(
-    `\n🏁 The static REST API files are now up-to-date with \`github/${sourceRepo}\`. To revert uncommitted data changes, run \`git checkout src/**/data/*\`\n`,
+    `\n🏁 The static REST API files are now up-to-date with ${sourceRepos.join(' ')}. To revert uncommitted data changes, run \`git checkout src/**/data/*\`\n`,
   )
 }
 
@@ -226,19 +236,22 @@ async function validateInputParameters(): Promise<void> {
 
   // The `--decorate-only` option cannot be used
   // with the `--include-deprecated` or `--include-unpublished` options
-  if ((includeDeprecated || includeUnpublished) && sourceRepo !== 'github') {
+  if ((includeDeprecated || includeUnpublished) && !sourceRepos.include('github')) {
     const errorMsg = `🛑 You cannot use the decorate-only option with  include-unpublished or include-deprecated because the include-unpublished and include-deprecated options are only available when running the bundler. The decorate-only option skips running the bundler.\nPlease reach out to #docs-engineering if a new use case should be supported.`
     throw new Error(errorMsg)
   }
 
   // Check that the source repo exists.
-  if (!existsSync(sourceRepoDirectory)) {
-    const errorMsg =
-      sourceRepo === 'github'
-        ? `🛑 The ${GITHUB_REP_DIR} does not exist. Make sure you have a codespace with a checkout of \`github/github\` at the same level as your \`github/docs-internal \`repo before running this script. See this documentation for details: https://thehub.github.com/epd/engineering/products-and-services/public-apis/rest/openapi/openapi-in-the-docs/#previewing-changes-in-the-docs.`
-        : `🛑 You must have a clone of the ${REST_API_DESCRIPTION_ROOT} repo in the root of this repo.`
-    throw new Error(errorMsg)
+  for (let sourceRepoDirectory of sourceRepoDirectories) {
+    if (!existsSync(sourceRepoDirectory)) {
+      const errorMsg =
+        sourceRepoDirectory === 'github' || sourceRepoDirectory === GITHUB_REP_DIR
+          ? `🛑 The ${GITHUB_REP_DIR} does not exist. Make sure you have a codespace with a checkout of \`github/github\` at the same level as your \`github/docs-internal \`repo before running this script. See this documentation for details: https://thehub.github.com/epd/engineering/products-and-services/public-apis/rest/openapi/openapi-in-the-docs/#previewing-changes-in-the-docs.`
+          : `🛑 You must have a clone of the ${sourceRepoDirectory} repo in the root of this repo.`
+      throw new Error(errorMsg)
+    }
   }
+
   if (versions && versions.length) {
     await validateVersionsOptions(versions)
   }
diff --git a/src/rest/scripts/utils/get-body-params.ts b/src/rest/scripts/utils/get-body-params.ts
index a7556d6a9d..b4f09efb9d 100644
--- a/src/rest/scripts/utils/get-body-params.ts
+++ b/src/rest/scripts/utils/get-body-params.ts
@@ -1,4 +1,4 @@
-import { renderContent } from '@/content-render/index'
+import { renderContent } from './render-content'
 
 interface Schema {
   oneOf?: any[]
diff --git a/src/rest/scripts/utils/get-operations.js b/src/rest/scripts/utils/get-operations.js
index ae091528c3..e3d6ae9200 100644
--- a/src/rest/scripts/utils/get-operations.js
+++ b/src/rest/scripts/utils/get-operations.js
@@ -4,7 +4,11 @@ import Operation from './operation.js'
 // and returns an array of its operation objects with their
 // HTTP verb and requestPath attached as properties
 export async function processOperations(operations, progAccessData) {
-  await Promise.all(operations.map(async (operation) => await operation.process(progAccessData)))
+  await Promise.all(
+    operations.map(async (operation) => {
+      await operation.process(progAccessData)
+    }),
+  )
   return operations
 }
 
diff --git a/src/rest/scripts/utils/inject-models-schema.ts b/src/rest/scripts/utils/inject-models-schema.ts
new file mode 100644
index 0000000000..df1ebf74e3
--- /dev/null
+++ b/src/rest/scripts/utils/inject-models-schema.ts
@@ -0,0 +1,102 @@
+import yaml from 'js-yaml'
+import dereferenceJsonSchema from 'dereference-json-schema'
+import { existsSync } from 'fs'
+import { readFile, readdir } from 'fs/promises'
+
+export const MODELS_GATEWAY_ROOT = 'models-gateway'
+const MODELS_GATEWAY_PATH = 'docs/api'
+
+// The github-models REST API OpenAPI descriptions live in a separate repo, github/models-gateway.
+// We "inject" the descriptions from that repo into the core GitHub API descriptions so that
+// from the perspective of our app code,
+// models descriptions are part of the same REST API schema and don't need additional processing
+export async function injectModelsSchema(schema: any, schemaName: string): Promise<any> {
+  if (!schemaName.includes('fpt')) {
+    return schema
+  }
+
+  const modelEndpoints = (
+    await readdir(`./${MODELS_GATEWAY_ROOT}/${MODELS_GATEWAY_PATH}`, {
+      recursive: true,
+    })
+  ).filter((name) => name.endsWith('.yaml') || name.endsWith('.yml'))
+
+  for (let endpointPath of modelEndpoints) {
+    endpointPath = `./${MODELS_GATEWAY_ROOT}/${MODELS_GATEWAY_PATH}/${endpointPath}`
+    if (!existsSync(endpointPath)) {
+      console.warn(
+        `⚠️ Models gateway YAML file not found at ${endpointPath}. Skipping injection for ${schemaName}.`,
+      )
+      continue
+    }
+
+    const yamlContent = await readFile(endpointPath, 'utf8')
+    const loadedYaml = yaml.load(yamlContent) as {
+      openapi: string
+      info: any
+      servers: any[]
+      paths: { [x: string]: any }
+    }
+    const deferencedYaml = dereferenceJsonSchema.dereferenceSync(loadedYaml)
+
+    // Copy over top-level OpenAPI fields
+    schema.openapi = schema.openapi || deferencedYaml.openapi
+    schema.info = schema.info || deferencedYaml.info
+    schema.servers = schema.servers || deferencedYaml.servers
+
+    // Process each path and operation in the YAML
+    for (const path of Object.keys(deferencedYaml.paths)) {
+      for (const operation of Object.keys(deferencedYaml.paths[path])) {
+        const operationObject = deferencedYaml.paths[path][operation]
+
+        // Use values from the YAML where possible
+        const name = operationObject.summary || ''
+        const description = operationObject.description || ''
+        const category = operationObject['x-github']?.category || 'models'
+
+        console.log(`⏳ Processing operation: ${name} (${path} ${operation})`)
+
+        // Create enhanced operation preserving all original fields
+        // TODO this should be cleaned up, most can be removed
+        const enhancedOperation = {
+          ...operationObject, // Keep all original fields
+          operationId: operationObject.operationId, // Preserve original operationId with namespace
+          tags: operationObject.tags || ['models'], // Only use 'models' if no tags present
+          verb: operation,
+          requestPath: path,
+          category: category,
+          subcategory: operationObject['x-github']?.subcategory || '',
+          summary: name,
+          description: description,
+          'x-github': {
+            ...operationObject['x-github'], // Preserve all x-github metadata
+            category: category,
+            enabledForGitHubApps: operationObject['x-github']?.enabledForGitHubApps,
+            githubCloudOnly: operationObject['x-github']?.githubCloudOnly,
+            permissions: operationObject['x-github']?.permissions || {},
+            externalDocs: operationObject['x-github']?.externalDocs || {},
+          },
+          parameters: operationObject.parameters || [],
+          responses: {
+            ...operationObject.responses,
+            '200': operationObject.responses?.['200'],
+          },
+        }
+
+        // Preserve operation-level servers if present
+        // !Needed! to use models.github.ai instead of api.github.com
+        if (deferencedYaml.servers) {
+          enhancedOperation.servers = deferencedYaml.servers
+        }
+
+        // Add the enhanced operation to the schema
+        schema.paths[path] = schema.paths[path] || {}
+        schema.paths[path][operation] = enhancedOperation
+
+        console.log(`✅ Processed operation: ${name} (${path} ${operation})`)
+      }
+    }
+  }
+
+  return schema
+}
diff --git a/src/rest/scripts/utils/operation.js b/src/rest/scripts/utils/operation.js
index 807cd4a143..aefb275df2 100644
--- a/src/rest/scripts/utils/operation.js
+++ b/src/rest/scripts/utils/operation.js
@@ -3,7 +3,7 @@ import { get, isPlainObject } from 'lodash-es'
 import { parseTemplate } from 'url-template'
 import mergeAllOf from 'json-schema-merge-allof'
 
-import { renderContent } from '#src/content-render/index.js'
+import { renderContent } from './render-content'
 import getCodeSamples from './create-rest-examples.js'
 import operationSchema from './operation-schema.js'
 import { validateJson } from '#src/tests/lib/validate-json-schema.js'
@@ -105,7 +105,7 @@ export default class Operation {
           // until then, we can catch some known generic descriptions and replace
           // them with the default http status message.
           const responseDescription =
-            response.description.toLowerCase() === 'response'
+            !response.description || response.description?.toLowerCase() === 'response'
               ? await renderContent(httpStatusMessage)
               : await renderContent(response.description)
 
diff --git a/src/rest/scripts/utils/render-content.ts b/src/rest/scripts/utils/render-content.ts
new file mode 100644
index 0000000000..bad9ab30a4
--- /dev/null
+++ b/src/rest/scripts/utils/render-content.ts
@@ -0,0 +1,11 @@
+import { renderContent as _renderContent } from '@/content-render/index'
+import { getAlertTitles } from '@/languages/lib/get-alert-titles'
+
+// Wrap the renderContent function and provide the alertTitles
+// so they aren't blank
+export async function renderContent(template: string) {
+  const context = {
+    alertTitles: await getAlertTitles({ languageCode: 'en' }),
+  }
+  return await _renderContent(template, context)
+}
diff --git a/src/rest/scripts/utils/sync.ts b/src/rest/scripts/utils/sync.ts
index 09ddb008f8..b245f127d0 100644
--- a/src/rest/scripts/utils/sync.ts
+++ b/src/rest/scripts/utils/sync.ts
@@ -24,11 +24,17 @@ export async function syncRestData(
   sourceDirectory: string,
   restSchemas: string[],
   progAccessSource: string,
+  injectIntoSchema?: (schema: Schema, schemaName: string) => Schema,
 ): Promise<void> {
   await Promise.all(
     restSchemas.map(async (schemaName) => {
       const file = path.join(sourceDirectory, schemaName)
-      const schema = JSON.parse(await readFile(file, 'utf-8')) as Schema
+      let schema = JSON.parse(await readFile(file, 'utf-8')) as Schema
+
+      if (injectIntoSchema) {
+        const injectedSchema = await injectIntoSchema(schema, schemaName)
+        schema = injectedSchema || schema // Fallback to original if injection returns null
+      }
 
       const operations: Operation[] = []
       console.log('Instantiating operation instances from schema ', schemaName)