import { expect, jest } from '@jest/globals'
import robotsParser from 'robots-parser'

import {
  SURROGATE_ENUMS,
  makeLanguageSurrogateKey,
} from '../../middleware/set-fastly-surrogate-key.js'
import { get } from '../helpers/e2etest.js'

describe('robots.txt', () => {
  jest.setTimeout(5 * 60 * 1000)

  let res, robots
  beforeAll(async () => {
    res = await get('/robots.txt', {
      headers: {
        Host: 'docs.github.com',
      },
    })
    expect(res.statusCode).toBe(200)
    robots = robotsParser('https://docs.github.com/robots.txt', res.text)
  })

  test('allows indexing of the homepage and English content', async () => {
    expect(robots.isAllowed('https://docs.github.com/')).toBe(true)
    expect(robots.isAllowed('https://docs.github.com/en')).toBe(true)
    expect(
      robots.isAllowed('https://docs.github.com/en/articles/verifying-your-email-address')
    ).toBe(true)
  })

  test('disallows indexing of azurecontainer.io domains', async () => {
    const res = await get('/robots.txt', {
      headers: {
        host: 'docs-internal-preview-12345-asdfz.azurecontainer.io',
      },
    })
    expect(res.body).toEqual('User-agent: *\nDisallow: /')
  })

  test('does not have duplicate lines', () => {
    expect(res.body.split('\n').length).toBe(new Set(res.body.split('\n')).size)
  })

  test('is cached by headers', () => {
    expect(res.headers['cache-control']).toMatch(/public, max-age=/)

    const surrogateKeySplit = res.headers['surrogate-key'].split(/\s/g)
    expect(surrogateKeySplit.includes(SURROGATE_ENUMS.DEFAULT)).toBeTruthy()
    expect(surrogateKeySplit.includes(makeLanguageSurrogateKey('en'))).toBeTruthy()
  })
})