Files
opentf/internal/command/format/control_chars.go
Martin Atkins 6dec25c1fb command/format: FilterControlChars is now ReplaceControlChars
Since this is replacing C0 control characters with other control characters
rather than just removing them completely, "replace" is probably the more
intuitive name for this function.

This also removes the preallocation of the output buffer in the case where
control characters were present in the input, letting the strings.Builder
implementation manage the buffer growth automatically itself.

Signed-off-by: Martin Atkins <mart@degeneration.co.uk>
2025-11-10 09:49:35 -08:00

92 lines
3.6 KiB
Go

// Copyright (c) The OpenTofu Authors
// SPDX-License-Identifier: MPL-2.0
// Copyright (c) 2023 HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package format
import (
"strings"
)
// unicodeControlPicturesStart is the codepoint of the first character in the
// Unicode "Control Pictures" block.
//
// The first 32 codepoints in this block correlate with the control characters
// in the first 32 codepoints of the "Basic Latin" block, so a control character
// codepoint can be translated into its corresponding control picture codepoint
// by adding this constant.
const unicodeControlPicturesStart = rune(0x2400)
const del = rune(0x7f)
const delPicture = rune(0x2421)
// ReplaceControlChars translates 7-bit C0 control characters in the given string
// (character codes less than 32) into their corresponding symbols from the
// Unicode "Control Pictures" block, so that the result can be printed to a
// terminal-like device without affecting the terminal's state machine.
//
// As an exception this does not change control characters that commonly appear
// as part of human-oriented text: newline (0x0a), carriage return (0x0d),
// and horizontal tab (0x09).
//
// We use this when including untrusted data as part of "human-friendly"
// output. We use the Unicode control pictures so that a human reader can
// (with a suitably-equipped terminal font) still identify which specific
// control character appeared, in case that is helpful for debugging, and
// because they are relatively unlikely to appear literally in a string we're
// rendering in the UI.
//
// This is only for arbitrary text strings rendered directly in the UI,
// such as the message portions of rendered diagnostics. We need not use this
// when producing machine-readable output such as JSON representations, or when
// showing a string in a quoted notation that mimics either the HCL or Go string
// syntax, because the control characters are already backslash-escaped by the
// quoting process in those cases. We also don't need to use this for strings
// that are known to contain valid HCL identifiers, because the control
// characters are not valid for use in HCL's identifier tokens.
func ReplaceControlChars(input string) string {
// In the common case there are no relevant control characters at all, so
// we'll first scan the string to see if we can return the input verbatim
// and thus avoid allocating a new copy of that string.
if !strings.ContainsFunc(input, isFilteredControlChar) {
return input
}
// If we get here then we definitely need to build a new string.
var buf strings.Builder
for _, r := range input {
if !isFilteredControlChar(r) {
// Writing to a [strings.Builder] never encounters an error.
_, _ = buf.WriteRune(r)
continue
}
// If we get here then seq is definitely an ineligible C0 control
// character, so we need to transform it into the 3-byte encoding of the
// corresponding Control Picture codepoint.
// Writing to a [strings.Builder] never encounters an error.
_, _ = buf.WriteRune(controlPicture(r))
}
return buf.String()
}
// isFilteredControlChar returns true if and only if the given rune is in the
// range of 7-bit C0 control characters.
func isFilteredControlChar(r rune) bool {
// Space (0x20) is the first non-control character
return (r < ' ' && r != '\r' && r != '\n' && r != '\t') || r == del
}
// controlPicture returns the control picture equivalent of the given C0 control
// character, or returns the given character verbatim if it is not actually
// a C0 control character.
func controlPicture(ctrl rune) rune {
if ctrl < ' ' {
return ctrl + unicodeControlPicturesStart
}
if ctrl == del {
return delPicture
}
return ctrl
}