#!/usr/bin/env node
import { flatten } from 'lodash-es'
import path from 'path'
import walk from 'walk-sync'
import { execSync } from 'child_process'
import assert from 'assert'
import loadSiteData from '../../lib/site-data.js'
import { loadPages } from '../../lib/page-data.js'
import patterns from '../../lib/patterns.js'
import getDataReferences from '../../lib/get-liquid-data-references.js'

const imagesPath = '/assets/images'

// these paths should remain in the repo even if they are not referenced directly
const ignoreList = ['/assets/images/help/site-policy', 'site.data.reusables.policies']

// search these dirs for images or data references
// content files are handled separately in assetsReferencedInContent
const dirsToGrep = [
  'includes',
  'layouts',
  'javascripts',
  'stylesheets',
  'README.md',
  'data/reusables',
  'data/variables',
]

const validArgs = ['reusables', 'variables', 'images']

export default async function findUnusedAssets(assetType) {
  assert(validArgs.includes(assetType), `arg must be one of: ${validArgs.join(', ')}`)

  const pages = await getEnglishPages()
  const data = await loadSiteData()

  // step 1. find all assets that exist in the repo
  const allReusablesInRepo = data.en.site.data.reusables
  const allVariablesInRepo = data.en.site.data.variables
  const allImagesInRepo = getAllImagesInRepo()

  // step 2. find assets referenced in content by searching page markdown
  const assetsReferencedInContent = flatten(
    pages.map((page) => {
      const fullContent = [page.intro, page.title, page.product, page.markdown].join()

      return assetType === 'images'
        ? getImageReferences(fullContent)
        : getDataReferences(fullContent)
    })
  )

  // step 3. find assets referenced in non-content directories
  const assetsReferencedInNonContentDirs = getAssetsReferencedInNonContentDirs(assetType)

  // step 4. combine all the referenced assets into one array
  const allReferencedAssets = [
    ...new Set(assetsReferencedInContent.concat(assetsReferencedInNonContentDirs)),
  ]

  // step 5. return asssets that exist but are not referenced
  switch (assetType) {
    case 'images':
      return getUnusedImages(allImagesInRepo, allReferencedAssets)
    case 'reusables':
      return getUnusedData(allReusablesInRepo, assetType, allReferencedAssets)
    case 'variables':
      return getUnusedData(allVariablesInRepo, assetType, allReferencedAssets)
  }
}

async function getEnglishPages() {
  const pages = await loadPages()
  return pages.filter((page) => page.languageCode === 'en')
}

function getAllImagesInRepo() {
  return walk(path.join(process.cwd(), imagesPath), { directories: false })
    .filter((relPath) => !relPath.endsWith('.md') && !relPath.match(/^(octicons|site)\//))
    .map((relPath) => path.join(imagesPath, relPath))
}

function getAssetsReferencedInNonContentDirs(assetType) {
  const regex = assetType === 'images' ? patterns.imagePath.source : patterns.dataReference.source

  const grepCmd = `egrep -rh '${regex}' ${dirsToGrep.join(' ')}`

  const grepResults = execSync(grepCmd).toString()

  return assetType === 'images' ? getImageReferences(grepResults) : getDataReferences(grepResults)
}

function getImageReferences(text) {
  return (text.match(patterns.imagePath) || []).map((ref) => {
    return ref.replace(/\.\.\//g, '').trim()
  })
}

function getUnusedData(allDataInRepo, assetType, allReferencedAssets) {
  const unusedData = []

  Object.keys(allDataInRepo).forEach((filename) => {
    Object.keys(allDataInRepo[filename]).forEach((key) => {
      const name = `site.data.${assetType}.${filename}.${key}`
      if (
        !allReferencedAssets.includes(name) &&
        !ignoreList.find((ignored) => name.startsWith(ignored))
      ) {
        unusedData.push(name)
      }
    })
  })

  return unusedData
}

function getUnusedImages(allImagesInRepo, allReferencedAssets) {
  return allImagesInRepo.filter(
    (image) =>
      !allReferencedAssets.includes(image) &&
      !ignoreList.find((ignored) => image.startsWith(ignored))
  )
}