diff --git a/Dockerfile b/Dockerfile index f8a21fc81a..b3bebe63f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ # -------------------------------------------------------------------------------- # To update the sha, run `docker pull node:$VERSION-alpine` # look for something like: `Digest: sha256:0123456789abcdef` -FROM node:20-alpine@sha256:b1789b7be6aa16afd642eaaaccdeeeb33bd8f08e69b3d27d931aa9665b731f01 as base +FROM node:20-alpine@sha256:32427bc0620132b2d9e79e405a1b27944d992501a20417a7f407427cc4c2b672 as base # This directory is owned by the node user ARG APP_HOME=/home/node/app diff --git a/src/github-apps/scripts/sync.js b/src/github-apps/scripts/sync.js old mode 100755 new mode 100644 index d735fc7870..544fb18a42 --- a/src/github-apps/scripts/sync.js +++ b/src/github-apps/scripts/sync.js @@ -62,9 +62,11 @@ export async function syncGitHubAppsData(openApiSource, sourceSchemas, progAcces } // permissions - for (const [permissionName, readOrWrite] of Object.entries( - progAccessData[operation.operationId].permissions, - )) { + const allPermissions = { + ...progAccessData[operation.operationId].permissions.and, + ...progAccessData[operation.operationId].permissions.or, + } + for (const [permissionName, readOrWrite] of Object.entries(allPermissions)) { const tempTitle = permissionName.replace(/_/g, ' ') const permissionNameExists = progActorResources[permissionName] if (!permissionNameExists) { @@ -76,9 +78,8 @@ export async function syncGitHubAppsData(openApiSource, sourceSchemas, progAcces const resourceGroup = progActorResources[permissionName]?.resource_group || '' const displayTitle = getDisplayTitle(title, resourceGroup) const relatedPermissionNames = Object.keys( - progAccessData[operation.operationId].permissions, + progAccessData[operation.operationId].permissions.and, ).filter((permission) => permission !== permissionName) - // github app permissions const serverToServerPermissions = githubAppsData['server-to-server-permissions'] if (!serverToServerPermissions[permissionName]) { @@ -182,11 +183,41 @@ async function getProgAccessData(progAccessSource) { const progAccessData = {} for (const operation of progAccessDataRaw) { - const permissions = {} + const permissions = { or: {}, and: {} } if (operation.permission_sets) { - operation.permission_sets.forEach((permissionSet) => { - Object.assign(permissions, permissionSet) - }) + // Currently there is only a length of up to 2 permission_sets + // OR permission_sets are dashed lists in yaml + // e.g. + // permission_sets: + // - admin: write + // - contents: read + // This becomes: [{admin: write}, {contents: read}] with yaml.load + if (operation.permission_sets.length === 2) { + // There's currently only one scenario where you have an OR permission_set where one of the OR permissions is an AND permission_set + // In this scenario, we want the AND permission_set + if ( + Object.keys(operation.permission_sets[0]).length > 1 || + Object.keys(operation.permission_sets[1]).length > 1 + ) { + const andPermissionSet = + Object.keys(operation.permission_sets[0]).length > 1 + ? operation.permission_sets[0] + : operation.permission_sets[1] + Object.assign(permissions.and, andPermissionSet) + } else { + operation.permission_sets.forEach((permissionSet) => { + Object.assign(permissions.or, permissionSet) + }) + } + // AND permission_sets are under the same dash in yaml + // e.g. + // permission_sets: + // - admin: write + // contents: read + // This becomes: [{admin: write, contents: read}] with yaml.load + } else if (operation.permission_sets.length === 1) { + Object.assign(permissions.and, operation.permission_sets[0]) + } } const userToServerRest = operation.user_to_server.enabled diff --git a/src/shielding/middleware/handle-invalid-query-strings.js b/src/shielding/middleware/handle-invalid-query-strings.js index 64a96680cf..7048dfab63 100644 --- a/src/shielding/middleware/handle-invalid-query-strings.js +++ b/src/shielding/middleware/handle-invalid-query-strings.js @@ -76,7 +76,17 @@ export default function handleInvalidQuerystrings(req, res, next) { const badKeylessQuery = rootHomePage && keys.length === 1 && keys[0].length === 8 && !query[keys[0]] - if (keys.length >= MAX_UNFAMILIAR_KEYS_REDIRECT || badKeylessQuery) { + // It's still a mystery why these requests happen but we've seen large + // number of requests that have a very long URL-encoded query string + // that starts with 'tool' but doesn't have any value. + // For example + // ?tool%25252525253Dvisualstudio%252525253D%2525252526tool%25252525... + // ...3Dvscode%2525253D%25252526tool%2525253Dvscode%25253D%252526tool... + // ...%25253Dvimneovim%253D%2526tool%253Djetbrains%3D%26tool%3Djetbrains=& + // Let's shield against those by removing them. + const badToolsQuery = keys.some((key) => key.startsWith('tool%') && !query[key]) + + if (keys.length >= MAX_UNFAMILIAR_KEYS_REDIRECT || badKeylessQuery || badToolsQuery) { defaultCacheControl(res) const sp = new URLSearchParams(query) keys.forEach((key) => sp.delete(key)) diff --git a/src/shielding/tests/invalid-querystrings.js b/src/shielding/tests/invalid-querystrings.js index e21f2e58ae..cdf9b95667 100644 --- a/src/shielding/tests/invalid-querystrings.js +++ b/src/shielding/tests/invalid-querystrings.js @@ -63,6 +63,14 @@ describe('invalid query strings', () => { expect(res.statusCode).toBe(302) expect(res.headers.location).toBe('/en') }) + + test('bad tool query string with Chinese URL-encoded characters', async () => { + const url = + '/?tool%25252525253Dvisualstudio%252525253D%2525252526tool%252525253Dvscode%2525253D%25252526tool%2525253Dvscode%25253D%252526tool%25253Dvimneovim%253D%2526tool%253Djetbrains%3D%26tool%3Djetbrains=&tool=azure_data_studio' + const res = await get(url) + expect(res.statusCode).toBe(302) + expect(res.headers.location).toBe('/?tool=azure_data_studio') + }) }) function randomCharacters(length) {