mirror of
https://github.com/opentffoundation/opentf.git
synced 2025-12-19 17:59:05 -05:00
command/format: FilterControlChars function
This utility function replaces C0 control characters in a given string with their corresponding symbols from the "Control Pictures" Unicode block. As of this commit nothing is using this, but in future commits we will use this when preparing terminal UI output that may contain arbitrary strings that are not subject to any other quoting/escaping to ensure that it is not possible to affect virtual terminal state in sitations where that is not intentionally allowed by OpenTofu. Signed-off-by: Martin Atkins <mart@degeneration.co.uk>
This commit is contained in:
97
internal/command/format/filter_control.go
Normal file
97
internal/command/format/filter_control.go
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright (c) The OpenTofu Authors
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright (c) 2023 HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// unicodeControlPicturesStart is the codepoint of the first character in the
|
||||
// Unicode "Control Pictures" block.
|
||||
//
|
||||
// The first 32 codepoints in this block correlate with the control characters
|
||||
// in the first 32 codepoints of the "Basic Latin" block, so a control character
|
||||
// codepoint can be translated into its corresponding control picture codepoint
|
||||
// by adding this constant.
|
||||
const unicodeControlPicturesStart = rune(0x2400)
|
||||
|
||||
const del = rune(0x7f)
|
||||
const delPicture = rune(0x2421)
|
||||
|
||||
// FilterControlChars translates 7-bit C0 control characters in the given string
|
||||
// (character codes less than 32) into their corresponding symbols from the
|
||||
// Unicode "Control Pictures" block, so that the result can be printed to a
|
||||
// terminal-like device without affecting the terminal's state machine.
|
||||
//
|
||||
// As an exception this does not change control characters that commonly appear
|
||||
// as part of human-oriented text: newline (0x0a), carriage return (0x0d),
|
||||
// and horizontal tab (0x09).
|
||||
//
|
||||
// We use this when including untrusted data as part of "human-friendly"
|
||||
// output. We use the Unicode control pictures so that a human reader can
|
||||
// (with a suitably-equipped terminal font) still identify which specific
|
||||
// control character appeared, in case that is helpful for debugging, and
|
||||
// because they are relatively unlikely to appear literally in a string we're
|
||||
// rendering in the UI.
|
||||
//
|
||||
// This is only for arbitrary text strings rendered directly in the UI,
|
||||
// such as the message portions of rendered diagnostics. We need not use this
|
||||
// when producing machine-readable output such as JSON representations, or when
|
||||
// showing a string in a quoted notation that mimics either the HCL or Go string
|
||||
// syntax, because the control characters are already backslash-escaped by the
|
||||
// quoting process in those cases. We also don't need to use this for strings
|
||||
// that are known to contain valid HCL identifiers, because the control
|
||||
// characters are not valid for use in HCL's identifier tokens.
|
||||
func FilterControlChars(input string) string {
|
||||
// In the common case there are no relevant control characters at all, so
|
||||
// we'll first scan the string to see if we can return the input verbatim
|
||||
// and thus avoid allocating a new copy of that string.
|
||||
if !strings.ContainsFunc(input, isFilteredControlChar) {
|
||||
return input
|
||||
}
|
||||
|
||||
// If we get here then we definitely need to build a new string.
|
||||
var buf strings.Builder
|
||||
// We'll give ourselves capacity for replacing up to two control characters
|
||||
// with their "Control Pictures' equivalents, which (due to UTF-8 encoding)
|
||||
// causes each 1-byte control character to be replaced by a 3-byte sequence.
|
||||
// If we find more than two control characters then the buffer may
|
||||
// reallocate (automatically) to get extra capacity.
|
||||
buf.Grow(len(input) + 4)
|
||||
for _, r := range input {
|
||||
if !isFilteredControlChar(r) {
|
||||
// Writing to a [strings.Builder] never encounters an error.
|
||||
_, _ = buf.WriteRune(r)
|
||||
continue
|
||||
}
|
||||
// If we get here then seq is definitely an ineligible C0 control
|
||||
// character, so we need to transform it into the 3-byte encoding of the
|
||||
// corresponding Control Picture codepoint.
|
||||
// Writing to a [strings.Builder] never encounters an error.
|
||||
_, _ = buf.WriteRune(controlPicture(r))
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// isFilteredControlChar returns true if and only if the given rune is in the
|
||||
// range of 7-bit C0 control characters.
|
||||
func isFilteredControlChar(r rune) bool {
|
||||
// Space (0x20) is the first non-control character
|
||||
return (r < ' ' && r != '\r' && r != '\n' && r != '\t') || r == del
|
||||
}
|
||||
|
||||
// controlPicture returns the control picture equivalent of the given C0 control
|
||||
// character, or returns the given character verbatim if it is not actually
|
||||
// a C0 control character.
|
||||
func controlPicture(ctrl rune) rune {
|
||||
if ctrl < ' ' {
|
||||
return ctrl + unicodeControlPicturesStart
|
||||
}
|
||||
if ctrl == del {
|
||||
return delPicture
|
||||
}
|
||||
return ctrl
|
||||
}
|
||||
43
internal/command/format/filter_control_test.go
Normal file
43
internal/command/format/filter_control_test.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) The OpenTofu Authors
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright (c) 2023 HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilterControlChars(t *testing.T) {
|
||||
tests := map[string]string{
|
||||
"Hello, world!": "Hello, world!",
|
||||
"Hello\nworld!": "Hello\nworld!",
|
||||
"Hello\rworld!": "Hello\rworld!",
|
||||
"Hello\r\nworld!": "Hello\r\nworld!",
|
||||
"Hello world\x00": "Hello world␀",
|
||||
|
||||
// Filter various ways that someone might try to hide or replace earlier
|
||||
// output from OpenTofu.
|
||||
"Hello\x7f\x7f\x7f\x7f\x7fGoodbye, world!": "Hello␡␡␡␡␡Goodbye, world!",
|
||||
"Hello\x08\x08\x08\x08\x08Goodbye, world!": "Hello␈␈␈␈␈Goodbye, world!",
|
||||
"\x1b[1m": "␛[1m", // "Set Graphic Rendition" (SGR) control sequence
|
||||
"\x1bM": "␛M", // "Reverse Index" (RI) control sequence (moves cursor up, so subsequent text could overwrite earlier text)
|
||||
|
||||
// The cases above ensure that we handle some relatively-likely
|
||||
// combinations in a sensible way, but we'll also just exhaustively
|
||||
// test all of them together to make sure they all get handled in
|
||||
// a reasonable way.
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \x7f": "␀␁␂␃␄␅␆␇␈\t\n␋␌\r␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟ ␡",
|
||||
}
|
||||
|
||||
for input, want := range tests {
|
||||
t.Run(fmt.Sprintf("%q", input), func(t *testing.T) {
|
||||
got := FilterControlChars(input)
|
||||
if got != want {
|
||||
t.Errorf("wrong result\ninput: %q\ngot: %q\nwant: %q", input, got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user