add reusables helper CLI (#50800)
Co-authored-by: Peter Bengtsson <mail@peterbe.com> Co-authored-by: Peter Bengtsson <peterbe@github.com>
This commit is contained in:
71
src/content-render/scripts/reusables-cli.ts
Normal file
71
src/content-render/scripts/reusables-cli.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
// Usage: npm run reusables -- --help
|
||||
// Usage: npm run reusables -- find used accounts/create-account.md
|
||||
// Usage: npm run reusables -- find unused accounts/create-account.md
|
||||
// Usage: npm run reusables -- find any-unused
|
||||
// Usage: npm run reusables -- find top-used
|
||||
|
||||
import { Command } from 'commander'
|
||||
import { findTopUsed, findUsed } from './reusables-cli/find/used'
|
||||
import { findPotentialUses } from './reusables-cli/find/potential-uses'
|
||||
import { findUnused } from './reusables-cli/find/unused'
|
||||
|
||||
const defaultSimilarityThreshold = 10000
|
||||
const defaultTopUsedCount = 10
|
||||
const absolutePathDescription = 'Show absolute paths in output instead of relative path to repo'
|
||||
|
||||
const program = new Command()
|
||||
|
||||
program
|
||||
.name('reusables-helper-cli')
|
||||
.description('Tools to help with reusable Docs content snippets')
|
||||
|
||||
const findCommand = program.command('find')
|
||||
|
||||
findCommand
|
||||
.command('used')
|
||||
.description('Find all content files that use a specific reusable.')
|
||||
.argument(
|
||||
'<reusable-path>',
|
||||
'Path to the reusable file relative to content/data/reusables, e.g. "accounts/create-account.md".',
|
||||
)
|
||||
.option('-a --absolute', absolutePathDescription, false)
|
||||
.action(findUsed)
|
||||
|
||||
findCommand
|
||||
.command('top-used')
|
||||
.description('Find the top x most used reusables.')
|
||||
.argument(
|
||||
'[number-of-most-used-to-find]',
|
||||
'Number of most used reusables to find.',
|
||||
defaultTopUsedCount,
|
||||
)
|
||||
.option('-a --absolute', absolutePathDescription, false)
|
||||
.action(findTopUsed)
|
||||
|
||||
findCommand
|
||||
.command('unused')
|
||||
.description(
|
||||
'Find all reusables that are not used in any content files. WARNING: This command may take a long time to run.',
|
||||
)
|
||||
.option('-a --absolute', absolutePathDescription, false)
|
||||
.action(findUnused)
|
||||
|
||||
findCommand
|
||||
.command('potential-uses')
|
||||
.option(
|
||||
'-s, --similar',
|
||||
'Find files where contents loosely matches a reusable instead of an exact match.',
|
||||
)
|
||||
.option(
|
||||
'-t, --threshold <number>',
|
||||
'Similarity threshold for similar reusables. e.g. 10000. This requires the --similar flag and some experimentation to find a useful value.',
|
||||
parseFloat,
|
||||
defaultSimilarityThreshold,
|
||||
)
|
||||
.option('-a --absolute', absolutePathDescription, false)
|
||||
.description(
|
||||
'Find all content files that could use any reusables, but do not. WARNING: This command may take a long time to run.',
|
||||
)
|
||||
.action(findPotentialUses)
|
||||
|
||||
program.parse()
|
||||
132
src/content-render/scripts/reusables-cli/README.md
Normal file
132
src/content-render/scripts/reusables-cli/README.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Reusables CLI
|
||||
|
||||
Helpful CLI tool for making it easier to work with `data/reusables`.
|
||||
|
||||
Helps find where reusables are already used, and where they could be used.
|
||||
|
||||
## Usage
|
||||
|
||||
`npm run reusables -- --help` to see commands
|
||||
|
||||
## Commands:
|
||||
|
||||
`npm run reusables --`:
|
||||
|
||||
- [find used <reusable-path>](#command-npm-run-reusables-cli----find-used-reusable-path)
|
||||
- [find top-used [number-of-most-used-to-find]](#command-npm-run-reusables-cli----find-top-used-number-of-most-used-to-find)
|
||||
- [find unused](#command-npm-run-reusables-cli----find-unused)
|
||||
- [find potential-uses](#command-npm-run-reusables-cli----find-potential-uses)
|
||||
|
||||
|
||||
### Command: `npm run reusables -- find used <reusable-path>`
|
||||
|
||||
Find where a specific reusable is used
|
||||
|
||||
#### Example
|
||||
|
||||
`npm run reusables -- find used copilot/signup-procedure.md`
|
||||
|
||||
```
|
||||
Searching for content files that use data/reusables/copilot/signup-procedure.md...
|
||||
|
||||
Found 2 files that use data/reusables/copilot/signup-procedure.md.
|
||||
|
||||
In content/billing/managing-billing-for-github-copilot/managing-your-github-copilot-individual-subscription.md on:
|
||||
Line 35
|
||||
|
||||
In content/copilot/quickstart.md on:
|
||||
Line 29
|
||||
```
|
||||
|
||||
### Command: `npm run reusables -- find top-used [number-of-most-used-to-find]`
|
||||
|
||||
Find top X (default 10) most used reusables and the number of times they are used.
|
||||
|
||||
#### Example
|
||||
|
||||
`npm run reusables -- find top-used 5`
|
||||
|
||||
```
|
||||
Searching for the top 5 most used reusables...
|
||||
0/3225 reusables checked...
|
||||
100/3225 reusables checked...
|
||||
(etc, etc)
|
||||
3225/3225 reusables checked...
|
||||
|
||||
Top 5 most used reusables:
|
||||
#1. 318 uses of data/reusables/repositories/navigate-to-repo.md
|
||||
#2. 286 uses of data/reusables/profile/access_org.md
|
||||
#3. 212 uses of data/reusables/enterprise-accounts/access-enterprise.md
|
||||
#4. 193 uses of data/reusables/profile/org_settings.md
|
||||
#5. 171 uses of data/reusables/actions/action-checkout.md
|
||||
```
|
||||
|
||||
### Command: `npm run reusables -- find unused`
|
||||
|
||||
Find which reusables aren't used in any content files.
|
||||
|
||||
This will take ~10+ minutes to run locally. You will be updated at each 5% interval.
|
||||
|
||||
#### Example
|
||||
|
||||
`npm run reusables -- find unused`
|
||||
|
||||
```
|
||||
Searching 6468 files and 3225 reusables...
|
||||
Progress: 5% done
|
||||
Progress: 10% done
|
||||
Progress: 15% done
|
||||
|
||||
...
|
||||
|
||||
Found 111 unused reusables:
|
||||
data/reusables/actions/action-labeler.md
|
||||
data/reusables/actions/actions-audit-events-for-enterprise.md
|
||||
data/reusables/actions/actions-audit-events-workflow.md
|
||||
data/reusables/actions/cache-no-org-policy.md
|
||||
data/reusables/actions/configure-runner-group-access.md
|
||||
...
|
||||
```
|
||||
|
||||
### Command: `npm run reusables -- find potential-uses`
|
||||
|
||||
Find which files that reusables might be used in.
|
||||
|
||||
The command does this by searching every `content/` & `data/` file for strings that match every reusable that isn't ignored in `src/content-render/scripts/reusables-cli/ignore-reusables.ts`.
|
||||
|
||||
#### Example
|
||||
|
||||
`npm run reusables -- find potential-uses`
|
||||
|
||||
```
|
||||
Searching 6468 files for potential reusable use...
|
||||
0/3225 reusables checked...
|
||||
100/3225 reusables checked...
|
||||
(etc, etc)
|
||||
3223/3225 reusables checked...
|
||||
|
||||
Found 13 files that could use reusables.
|
||||
|
||||
Reusable data/reusables/actions/action-labeler.md can be used
|
||||
In content/actions/using-workflows/reusing-workflows.md on:
|
||||
Line 146
|
||||
Line 188
|
||||
|
||||
(cont.)
|
||||
```
|
||||
|
||||
#### Ignoring reusables
|
||||
|
||||
Some reusables might not make sense to "reuse" everywhere they could be reused. For instance, at the time of writing there is a reusable that is just the number "30" which shows up in certain files, but doesn't make sense to be replaced with a reusable.
|
||||
|
||||
In these cases you can skip these reusables from being checked by the `find potential-uses` command by adding their paths to the array in [src/content-render/scripts/reusables-cli/ignore-reusables.ts](./ignore-unused-reusables.ts)
|
||||
|
||||
#### Similarity search
|
||||
|
||||
This may or may not be a useful search. It does a looser search to find places where the reusable _may_ be usable. You can include this type of search with the `-s` flag. You can alter the "threshold" used by the scoring algorithm to show more (higher number) or less (lower number) potential results with the `-t` flag.
|
||||
|
||||
The threshold is a number that finds how similar the words in the reusable are to the words in a given article.
|
||||
|
||||
A good default threshold number is `15000`. You can experiment with a higher/lower number if you aren't getting good results.
|
||||
|
||||
e.g. `npm run reusables -- find potential-uses -s -t 15000`
|
||||
@@ -0,0 +1,99 @@
|
||||
import fs from 'fs'
|
||||
import {
|
||||
FilesWithLineNumbers,
|
||||
FilesWithSimilarity,
|
||||
findIndicesOfSubstringInString,
|
||||
findSimilarSubStringInString,
|
||||
getAllContentFilePaths,
|
||||
getAllReusablesFilePaths,
|
||||
getRelativeReusablesPath,
|
||||
printFindsWithLineNumbers,
|
||||
} from '../shared'
|
||||
import { reusablesToIgnore } from '../ignore-reusables'
|
||||
|
||||
export function findPotentialUses({
|
||||
similar,
|
||||
threshold,
|
||||
absolute,
|
||||
}: {
|
||||
similar?: boolean
|
||||
threshold: number
|
||||
absolute: boolean
|
||||
}) {
|
||||
const reusableFiles = getAllReusablesFilePaths()
|
||||
const allFilePaths = getAllContentFilePaths()
|
||||
|
||||
const filesThatCouldUseReusable: FilesWithLineNumbers = []
|
||||
const filesThatCouldUseReusableSimilar: FilesWithSimilarity = []
|
||||
|
||||
// Read all content & data files into memory
|
||||
const allFileContents = allFilePaths.map((filePath) => {
|
||||
return {
|
||||
filePath,
|
||||
fileContents: fs.readFileSync(filePath, 'utf-8'),
|
||||
}
|
||||
})
|
||||
|
||||
console.log(`Searching ${allFileContents.length} files for potential reusable use...`)
|
||||
if (similar) {
|
||||
console.log('Using similarity search, this may take a while...')
|
||||
}
|
||||
|
||||
let reusableCount = 0
|
||||
let reusableContents
|
||||
for (const reusableFilePath of reusableFiles) {
|
||||
reusableContents = fs.readFileSync(reusableFilePath, 'utf-8')
|
||||
|
||||
const reusableRelativeFilePath = getRelativeReusablesPath(reusableFilePath)
|
||||
if (!reusableContents.trim()) {
|
||||
if (!absolute) {
|
||||
console.log(`Skipping empty reusable file: ${reusableRelativeFilePath}`)
|
||||
} else {
|
||||
console.log(`Skipping empty reusable file: ${reusableFilePath}`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (reusablesToIgnore.includes(reusableRelativeFilePath)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (reusableCount % 100 === 0) {
|
||||
console.log(`${reusableCount}/${reusableFiles.length} reusables checked...`)
|
||||
}
|
||||
reusableCount += 1
|
||||
|
||||
for (const { filePath, fileContents } of allFileContents) {
|
||||
// Skip the reusable file itself
|
||||
if (filePath === reusableFilePath) continue
|
||||
|
||||
const indices = findIndicesOfSubstringInString(reusableContents.trim(), fileContents)
|
||||
if (indices.length > 0) {
|
||||
// Find line numbers of each index in fileContents
|
||||
const lineNumbers = indices.map((index) => fileContents.slice(0, index).split('\n').length)
|
||||
|
||||
filesThatCouldUseReusable.push({
|
||||
filePath,
|
||||
lineNumbers,
|
||||
reusableFile: reusableFilePath,
|
||||
})
|
||||
}
|
||||
|
||||
if (similar) {
|
||||
const similarityScore = findSimilarSubStringInString(reusableContents.trim(), fileContents)
|
||||
if (similarityScore > threshold) {
|
||||
filesThatCouldUseReusableSimilar.push({
|
||||
filePath,
|
||||
similarityScore,
|
||||
reusableFile: reusableFilePath,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`${reusableCount}/${reusableFiles.length} reusables checked...`)
|
||||
|
||||
console.log(`\nFound ${filesThatCouldUseReusable.length} files that could use reusables.`)
|
||||
printFindsWithLineNumbers(absolute, filesThatCouldUseReusable)
|
||||
}
|
||||
54
src/content-render/scripts/reusables-cli/find/unused.ts
Normal file
54
src/content-render/scripts/reusables-cli/find/unused.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import { getLiquidTokens } from '@/content-linter/lib/helpers/liquid-utils.js'
|
||||
import {
|
||||
getAllContentFilePaths,
|
||||
getAllReusablesFilePaths,
|
||||
getRelativeReusablesPath,
|
||||
resolveReusablePath,
|
||||
} from '../shared'
|
||||
|
||||
export function findUnused({ absolute }: { absolute: boolean }) {
|
||||
const reusableFilePaths = getAllReusablesFilePaths()
|
||||
const allFilePaths = getAllContentFilePaths()
|
||||
|
||||
const usedReusables = new Set<string>()
|
||||
const totalFiles = allFilePaths.length
|
||||
let lastLoggedPercent = 0
|
||||
|
||||
console.log(`Searching ${totalFiles} files and ${reusableFilePaths.length} reusables...`)
|
||||
|
||||
for (let i = 0; i < totalFiles; i++) {
|
||||
const filePath = allFilePaths[i]
|
||||
const fileContents = fs.readFileSync(filePath, 'utf-8')
|
||||
const liquidTokens = getLiquidTokens(fileContents)
|
||||
for (const token of liquidTokens) {
|
||||
const { args, name } = token
|
||||
if (name === 'data' && args.startsWith('reusables.')) {
|
||||
const reusableName = path.join('data', ...args.split('.')) + '.md'
|
||||
// Special cases where we don't want them to count as reusables. It's an example in a how-to doc
|
||||
if (reusableName.includes('foo/bar.md') || reusableName.includes('your-reusable-name.md')) {
|
||||
continue
|
||||
}
|
||||
const reusablePath = resolveReusablePath(reusableName)
|
||||
usedReusables.add(reusablePath)
|
||||
}
|
||||
}
|
||||
|
||||
const percentDone = Math.floor(((i + 1) / totalFiles) * 100)
|
||||
if (percentDone >= lastLoggedPercent + 5) {
|
||||
console.log(`Progress: ${percentDone}% done`)
|
||||
lastLoggedPercent = percentDone
|
||||
}
|
||||
}
|
||||
|
||||
const unusedReusables = reusableFilePaths.filter((filePath) => !usedReusables.has(filePath))
|
||||
|
||||
console.log(`\nFound ${unusedReusables.length} unused reusables:`)
|
||||
for (const reusableFilePath of unusedReusables) {
|
||||
const printReusablePath = absolute
|
||||
? reusableFilePath
|
||||
: getRelativeReusablesPath(reusableFilePath)
|
||||
console.log(printReusablePath)
|
||||
}
|
||||
}
|
||||
74
src/content-render/scripts/reusables-cli/find/used.ts
Normal file
74
src/content-render/scripts/reusables-cli/find/used.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import { getLiquidTokens } from '@/content-linter/lib/helpers/liquid-utils.js'
|
||||
import {
|
||||
FilesWithLineNumbers,
|
||||
getAllContentFilePaths,
|
||||
getIndicesOfLiquidVariable,
|
||||
getRelativeReusablesPath,
|
||||
getReusableLiquidString,
|
||||
printFindsWithLineNumbers,
|
||||
resolveReusablePath,
|
||||
} from '../shared'
|
||||
|
||||
export function findUsed(reusablePath: string, { absolute }: { absolute: boolean }) {
|
||||
const reusableFilePath = resolveReusablePath(reusablePath)
|
||||
const reusableLiquidVar = getReusableLiquidString(reusableFilePath)
|
||||
|
||||
const printReusablePath = absolute ? reusableFilePath : getRelativeReusablesPath(reusableFilePath)
|
||||
|
||||
console.log(`Searching for content files that use ${printReusablePath}...`)
|
||||
|
||||
const allFilePaths = getAllContentFilePaths()
|
||||
|
||||
const filesWithReusables: FilesWithLineNumbers = []
|
||||
|
||||
for (const filePath of allFilePaths) {
|
||||
// Skip the reusable file itself
|
||||
if (filePath === reusableFilePath) continue
|
||||
|
||||
const fileContents = fs.readFileSync(filePath, 'utf-8')
|
||||
|
||||
const indices = getIndicesOfLiquidVariable(reusableLiquidVar, fileContents)
|
||||
if (indices.length > 0) {
|
||||
// Find line numbers of each index in fileContents
|
||||
const lineNumbers = indices.map((index) => fileContents.slice(0, index).split('\n').length)
|
||||
|
||||
filesWithReusables.push({
|
||||
filePath,
|
||||
lineNumbers,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nFound ${filesWithReusables.length} files that use ${printReusablePath}.`)
|
||||
printFindsWithLineNumbers(absolute, filesWithReusables)
|
||||
}
|
||||
|
||||
export function findTopUsed(numberOfMostUsedToFind: number, { absolute }: { absolute: boolean }) {
|
||||
const allFilePaths = getAllContentFilePaths()
|
||||
|
||||
const reusableCounts = new Map<string, number>()
|
||||
for (const filePath of allFilePaths) {
|
||||
const fileContents = fs.readFileSync(filePath, 'utf-8')
|
||||
const liquidTokens = getLiquidTokens(fileContents)
|
||||
for (const token of liquidTokens) {
|
||||
const { args, name } = token
|
||||
if (name === 'data' && args.startsWith('reusables.')) {
|
||||
reusableCounts.set(args, (reusableCounts.get(args) || 0) + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const sortedCounts = Array.from(reusableCounts.entries()).sort((a, b) => b[1] - a[1])
|
||||
|
||||
console.log(`\nTop ${numberOfMostUsedToFind} most used reusables:`)
|
||||
let i = 0
|
||||
for (const [reusable, count] of sortedCounts.slice(0, numberOfMostUsedToFind)) {
|
||||
let printReusablePath = path.join('data', ...reusable.split('.')) + '.md'
|
||||
if (absolute) {
|
||||
printReusablePath = path.resolve(printReusablePath)
|
||||
}
|
||||
console.log(`#${`${++i}.`.padEnd(3)} ${count} uses of ${printReusablePath}`)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
// List of reusables to ignore when checking for potential uses of reusables
|
||||
// Make sure paths are relative to the root of the repo
|
||||
export const reusablesToIgnore = [
|
||||
'data/reusables/copilot/trial-period.md', // Just a number, so it pops up in unrelated files
|
||||
]
|
||||
196
src/content-render/scripts/reusables-cli/shared.ts
Normal file
196
src/content-render/scripts/reusables-cli/shared.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import walk from 'walk-sync'
|
||||
import path from 'path'
|
||||
import { TokenizationError } from 'liquidjs'
|
||||
import { getLiquidTokens } from '@/content-linter/lib/helpers/liquid-utils'
|
||||
|
||||
const __dirname = path.dirname(new URL(import.meta.url).pathname)
|
||||
|
||||
const repoRoot = path.resolve(__dirname, '../../../../')
|
||||
const contentDirectory = path.resolve(__dirname, repoRoot, 'content/')
|
||||
const dataDirectory = path.resolve(__dirname, repoRoot, 'data/')
|
||||
|
||||
const reusablesDirectory = path.resolve(dataDirectory, 'reusables/')
|
||||
|
||||
export type FilesWithLineNumbers = {
|
||||
filePath: string
|
||||
lineNumbers: number[]
|
||||
reusableFile?: string
|
||||
}[]
|
||||
export type FilesWithSimilarity = {
|
||||
filePath: string
|
||||
similarityScore: number
|
||||
reusableFile?: string
|
||||
}[]
|
||||
|
||||
export function filterFiles(files: string[]) {
|
||||
return files.filter(
|
||||
(filePath) =>
|
||||
filePath.endsWith('.md') || (filePath.endsWith('.yml') && !filePath.endsWith('README.md')),
|
||||
)
|
||||
}
|
||||
|
||||
export function getAllContentFilePaths() {
|
||||
const allContentFiles = filterFiles(
|
||||
walk(contentDirectory, {
|
||||
includeBasePath: true,
|
||||
directories: false,
|
||||
}),
|
||||
)
|
||||
|
||||
const allDataFiles = filterFiles(
|
||||
walk(dataDirectory, {
|
||||
includeBasePath: true,
|
||||
directories: false,
|
||||
}),
|
||||
)
|
||||
|
||||
return [...allContentFiles, ...allDataFiles]
|
||||
}
|
||||
|
||||
// Get the string that represents the reusable in the content files
|
||||
export function getReusableLiquidString(reusablePath: string): string {
|
||||
const relativePath = path.relative(reusablesDirectory, reusablePath)
|
||||
return `reusables.${relativePath.slice(0, -3).split('/').join('.')}`
|
||||
}
|
||||
|
||||
export function getIndicesOfLiquidVariable(liquidVariable: string, fileContents: string): number[] {
|
||||
const indices: number[] = []
|
||||
try {
|
||||
for (const token of getLiquidTokens(fileContents)) {
|
||||
if (token.name === 'data' && token.args.trim() === liquidVariable) {
|
||||
indices.push(token.begin)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof TokenizationError) return []
|
||||
throw err
|
||||
}
|
||||
|
||||
return indices
|
||||
}
|
||||
|
||||
// Find the path to a reusable file.
|
||||
export function resolveReusablePath(reusablePath: string): string {
|
||||
// Try .md if extension is not provided
|
||||
if (!reusablePath.endsWith('.md') && !reusablePath.endsWith('.yml')) {
|
||||
reusablePath += '.md'
|
||||
}
|
||||
|
||||
// Allow user to just pass the name of the file. If it's not ambiguous, we'll find it.
|
||||
const allReusableFiles = getAllReusablesFilePaths()
|
||||
const foundPaths = []
|
||||
for (const possiblePath of allReusableFiles) {
|
||||
if (possiblePath.includes(reusablePath)) {
|
||||
foundPaths.push(possiblePath)
|
||||
}
|
||||
}
|
||||
|
||||
if (foundPaths.length === 0) {
|
||||
console.error(`Reusables file not found: ${reusablePath}`)
|
||||
process.exit(1)
|
||||
} else if (foundPaths.length === 1) {
|
||||
return foundPaths[0]
|
||||
} else {
|
||||
console.error(`Multiple reusables found by name: ${reusablePath}`)
|
||||
for (let i = 0; i < foundPaths.length; i++) {
|
||||
console.error(` ${i + 1}: ${getRelativeReusablesPath(foundPaths[i])}`)
|
||||
}
|
||||
console.error('Please specify which reusable by passing the full path')
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
export function getAllReusablesFilePaths(): string[] {
|
||||
return filterFiles(
|
||||
walk(reusablesDirectory, {
|
||||
includeBasePath: true,
|
||||
directories: false,
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
export function findIndicesOfSubstringInString(substr: string, str: string): number[] {
|
||||
str = str.toLowerCase()
|
||||
|
||||
const result: number[] = []
|
||||
|
||||
let idx = str.indexOf(substr)
|
||||
|
||||
while (idx !== -1) {
|
||||
result.push(idx)
|
||||
idx = str.indexOf(substr, idx + 1)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
export function findSimilarSubStringInString(substr: string, str: string) {
|
||||
// Take every sentence in the substr, lower case it, and compare it to every sentence in the str to get a similarity score
|
||||
const substrSentences = substr.split('.').map((sentence) => sentence.toLowerCase())
|
||||
const corpus = str.split('.').map((sentence) => sentence.toLowerCase())
|
||||
|
||||
let similarityScore = 0
|
||||
|
||||
// Find how similar every two strings are based on the words they share
|
||||
for (const substrSentence of substrSentences) {
|
||||
for (const sentence of corpus) {
|
||||
const substrTokens = substrSentence.split(' ')
|
||||
const tokens = sentence.split(' ')
|
||||
|
||||
const sharedWords = substrTokens.filter((token) => tokens.includes(token))
|
||||
|
||||
similarityScore += sharedWords.length / (substrTokens.length + tokens.length)
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the similarity score
|
||||
return Math.round((similarityScore / substrSentences.length) * corpus.length)
|
||||
}
|
||||
|
||||
export function printFindsWithLineNumbers(
|
||||
absolute: boolean,
|
||||
reusableFindings: { filePath: string; lineNumbers: number[]; reusableFile?: string }[],
|
||||
similarityFindings?: { filePath: string; similarityScore: number; reusableFile?: string }[],
|
||||
) {
|
||||
for (const { filePath, lineNumbers, reusableFile } of reusableFindings) {
|
||||
let printReusablePath = reusableFile
|
||||
let printFilePath = filePath
|
||||
if (!absolute) {
|
||||
printReusablePath = getRelativeReusablesPath(printReusablePath as string)
|
||||
printFilePath = path.relative(repoRoot, printFilePath)
|
||||
}
|
||||
if (reusableFile) {
|
||||
console.log(`\nReusable ${printReusablePath} can be used`)
|
||||
console.log(`In ${printFilePath} on:`)
|
||||
} else {
|
||||
console.log(`\nIn ${printFilePath} on:`)
|
||||
}
|
||||
for (const lineNumber of lineNumbers) {
|
||||
console.log(` Line ${lineNumber}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (similarityFindings?.length) {
|
||||
console.log('\nFindings using "similar" algorithm:')
|
||||
for (const { filePath, similarityScore, reusableFile } of similarityFindings) {
|
||||
let printReusablePath = reusableFile
|
||||
let printFilePath = filePath
|
||||
if (!absolute) {
|
||||
printReusablePath = getRelativeReusablesPath(printReusablePath as string)
|
||||
printFilePath = path.relative(repoRoot, printFilePath)
|
||||
}
|
||||
if (reusableFile) {
|
||||
console.log(`\nReusables ${printReusablePath} can be used`)
|
||||
console.log(`In ${printFilePath} with similarity score: ${similarityScore}`)
|
||||
} else {
|
||||
console.log(`\nIn ${printFilePath} with similarity score: ${similarityScore}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function getRelativeReusablesPath(reusablePath: string) {
|
||||
if (!reusablePath) {
|
||||
return ''
|
||||
}
|
||||
return path.relative(repoRoot, reusablePath)
|
||||
}
|
||||
Reference in New Issue
Block a user