summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorG. Branden Robinson <g.branden.robinson@gmail.com>2024-01-13 09:26:58 -0600
committerG. Branden Robinson <g.branden.robinson@gmail.com>2024-01-13 12:40:12 -0600
commit9dbf227a5b3870a19c1e6d90e5b619c4ae3e7f3e (patch)
tree14ca704d3cca354266d472e434f0b788011b474e
parent974c063f0a9e1ef6c0d2cac4755a3b9d6e925b0d (diff)
[troff]: Fix Savannah #64484.
* src/roff/troff/input.cpp (encode_char_for_troff_output): Annotate the function's purpose. Initially assume the character to be encoded as valid. If the current token is a plain space, write a space (U+0020) to the output. (This is necessary because the `device` request no longer reads its arguments in copy mode; see below.) Move the `sc` local variable to a higher scope. Update the new `is_char_valid` Boolean instead of issuing an error diagnostic at each point of validation failure. When done processing the character, test `is_char_valid` and emit different diagnostics depending on whether the input was a special character escape sequence we can't handle, or something else. Emit a self-quoted escape character _as a backslash_, not as the current *roff escape character. (device_request): Rewrite to operate in interpretation mode, not copy mode. * doc/groff.texi (Postprocessor Access): * man/groff.7.man (Request short reference): * NEWS: Document it. Fixes <https://savannah.gnu.org/bugs/?64484>.
-rw-r--r--ChangeLog28
-rw-r--r--NEWS9
-rw-r--r--doc/groff.texi65
-rw-r--r--man/groff.7.man3
-rw-r--r--src/roff/troff/input.cpp70
5 files changed, 107 insertions, 68 deletions
diff --git a/ChangeLog b/ChangeLog
index 45db977cc..446043386 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,31 @@
2024-01-13 G. Branden Robinson <g.branden.robinson@gmail.com>
+ [troff]: Fix Savannah #64484.
+
+ * src/roff/troff/input.cpp (encode_char_for_troff_output):
+ Annotate the function's purpose. Initially assume the character
+ to be encoded as valid. If the current token is a plain space,
+ write a space (U+0020) to the output. (This is necessary
+ because the `device` request no longer reads its arguments in
+ copy mode; see below.) Move the `sc` local variable to a higher
+ scope. Update the new `is_char_valid` Boolean instead of
+ issuing an error diagnostic at each point of validation failure.
+ When done processing the character, test `is_char_valid` and
+ emit different diagnostics depending on whether the input was a
+ special character escape sequence we can't handle, or something
+ else. Emit a self-quoted escape character _as a backslash_, not
+ as the current *roff escape character.
+ (device_request): Rewrite to operate in interpretation mode, not
+ copy mode.
+
+ * doc/groff.texi (Postprocessor Access):
+ * man/groff.7.man (Request short reference):
+ * NEWS: Document it.
+
+ Fixes <https://savannah.gnu.org/bugs/?64484>.
+
+2024-01-13 G. Branden Robinson <g.branden.robinson@gmail.com>
+
* src/roff/groff/tests/\
device-control-special-character-handling.sh: Add unit test for
this feature. We want to be able to consistently pass (some)
@@ -172,7 +198,7 @@
"default", instead of permitting it to have a null name.
* doc/groff.texi (Colors):
- * man/groff.7.man (Read-only regsiters):
+ * man/groff.7.man (Read-only registers):
* NEWS: Document it.
Fixes <https://savannah.gnu.org/bugs/?64592>. Thanks to Deri
diff --git a/NEWS b/NEWS
index adf27d093..7f5e8894c 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,15 @@ o The read-only registers `.m` and `.M` now interpolate "default" when
the default color is selected as the stroke or fill color,
respectively, rather than interpolating nothing.
+o The `device` request no longer reads its arguments in copy mode; this
+ change makes it more consistent with the `\X` device control command
+ escape sequence. This request also no longer emits a self-quoted
+ *roff escape character as itself, but instead as a backslash.
+ (troff's input and output languges are not the same thing.) These
+ changes are to enable postprocessors to reliably interpret device
+ control commands that wish to express arbitrary byte sequences. For
+ example, PDF bookmarks need to be expressed in UTF-16LE.
+
eqn
---
diff --git a/doc/groff.texi b/doc/groff.texi
index 93a86b202..6e2e1cc3e 100644
--- a/doc/groff.texi
+++ b/doc/groff.texi
@@ -16421,36 +16421,34 @@ returned by the function @cite{getenv@r{(3)}}.
@cindex access to postprocessor
Two escape sequences and two requests enable documents to pass
-information directly to a postprocessor. These are useful for
-exercising device-specific capabilities that the @code{groff} language
-does not abstract or generalize; examples include the embedding of
-hyperlinks and image files. Device-specific functions are documented in
-each output driver's man page, such as @cite{gropdf@r{(1)}},
-@cite{grops@r{(1)}}, or @cite{grotty@r{(1)}}.
+information directly to an output driver or other postprocessor. These
+are useful for exercising device-specific capabilities that the
+@code{groff} language does not abstract or generalize; examples include
+the embedding of hyperlinks and image files. Device-specific functions
+are documented in each output driver's man page, such as
+@cite{gropdf@r{(1)}}, @cite{grops@r{(1)}}, or @cite{grotty@r{(1)}}.
@DefreqList {device, xxx @r{@dots{}}}
@DefescListEndx {\\X, @code{'}, xxx @r{@dots{}}, @code{'}}
Embed all @var{xxx} arguments into GNU @command{troff} output as
-parameters to a device control command @w{@samp{x X}}. The meaning and
-interpretation of such parameters is determined by the output driver or
-other postprocessor.
-
-@cindex @code{device} request, and copy mode
-@cindex copy mode, and @code{device} request
-@cindex mode, copy, and @code{device} request
-The @code{device} request processes its arguments in copy mode
-(@pxref{Copy Mode}). An initial neutral double quote in @var{contents}
-is stripped to allow embedding of leading spaces.
-@cindex @code{\&}, in @code{\X}
-@cindex @code{\)}, in @code{\X}
-@cindex @code{\%}, in @code{\X}
+parameters to an @w{@samp{x X}} device control
+command.@footnote{@xref{gtroff Output}.} The meaning and interpretation
+of such parameters is determined by the output driver or other
+postprocessor.
+
+The @code{device} request strips an initial neutral double quote from
+@var{contents} to allow embedding of leading spaces.
+
+@cindex @code{\&}, in device control commands
+@cindex @code{\)}, in device control commands
+@cindex @code{\%}, in device control commands
@ifnotinfo
-@cindex @code{\:}, in @code{\X}
+@cindex @code{\:}, in device control commands
@end ifnotinfo
@ifinfo
-@cindex @code{\@r{<colon>}}, in @code{\X}
+@cindex @code{\@r{<colon>}}, in device control commands
@end ifinfo
-By contrast, within @code{\X} arguments, the escape sequences @code{\&},
+Within a device control command, the escape sequences @code{\&},
@code{\)}, @code{\%}, and @code{\:} are ignored; @code{\@key{SPC}} and
@code{\~} are converted to single space characters; and @code{\\} has
its escape character stripped. So that the basic Latin subset of the
@@ -16458,13 +16456,15 @@ Unicode character set@footnote{that is, ISO@tie{}646:1991-IRV or,
popularly, ``US-ASCII''} can be reliably encoded in device control
commands, seven special character escape sequences (@samp{\-},
@samp{\[aq]}, @samp{\[dq]}, @samp{\[ga]}, @samp{\[ha]}, @samp{\[rs]},
-and @samp{\[ti]},) are mapped to basic Latin characters; see the
+and @samp{\[ti]}) are mapped to basic Latin characters; see the
@cite{groff_char@r{(7)}} man page. For this transformation, character
translations and special character definitions are
ignored.@footnote{They are bypassed because these parameters are not
rendered as glyphs in the output; instead, they remain abstract
-characters---in a PDF bookmark or a URL, for example.} The use of any
-other escape sequence in @code{\X} parameters is normally an error.
+characters---in a PDF bookmark or a URL, for example.}
+
+Escape sequences other than the foregoing in device control command
+may be ignored, or produce an error.
A device control command issued with the @code{device} request will not
be reflected in the output unless a partially collected line exists at
@@ -16483,6 +16483,9 @@ discussed above). @code{use_charnames_in_special} is currently employed
only by @code{grohtml}.
@endDefesc
+GNU @command{troff} also permits the interpolatation of macro contents
+as a device control command.
+
@DefreqList {devicem, name}
@DefescItemx {\\Y, , n, }
@DefescItem {\\Y, (, nm, }
@@ -16490,12 +16493,12 @@ only by @code{grohtml}.
This is approximately equivalent to @samp{\X'\*[@var{name}]'}
(one-character name@tie{}@var{n}, two-character name @var{nm}).
However, the contents of the string or macro @var{name} are not
-interpreted; also it is permitted for @var{name} to have been defined as
-a macro and thus contain newlines (it is not permitted for the argument
-to @code{\X} to contain newlines). The inclusion of newlines requires
-an extension to the @acronym{AT&T} @command{troff} output format, and
-confuses drivers that do not know about this extension (@pxref{Device
-Control Commands}).
+interpreted; it is also permitted for @var{name} to have been defined as
+a macro and thus contain newlines. (There is no way to embed a newline
+in the arguments to @code{device} or @code{\X}.) The inclusion of
+newlines requires an extension to the @acronym{AT&T} @command{troff}
+output format; their presence confuses drivers that do not know about it
+(@pxref{Device Control Commands}).
@endDefesc
@DefreqList {tag, name}
diff --git a/man/groff.7.man b/man/groff.7.man
index 6e8e6974a..8bb3bfd84 100644
--- a/man/groff.7.man
+++ b/man/groff.7.man
@@ -2983,8 +2983,7 @@ with compatibility mode disabled when the macro is interpreted.
.TPx
.REQ .device anything
Write
-.IR anything ,
-read in copy mode,
+.I anything
to
.I @g@troff
output as a device control command.
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index a2bb11d4e..d3d700c0c 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -5571,14 +5571,19 @@ static node *do_non_interpreted()
return new non_interpreted_node(mac);
}
+// In troff output, we translate the escape character to '\', but it is
+// up to the postprocessor to interpret it as such. (This mostly
+// matters for device control commands.)
static void encode_char_for_troff_output(macro *mac, const char c)
{
+ bool is_char_valid = true;
+ const char *sc = 0 /* nullptr */;
if ('\0' == c) {
- if (tok.is_stretchable_space()
- || tok.is_unstretchable_space())
+ if (tok.is_space()
+ || tok.is_stretchable_space()
+ || tok.is_unstretchable_space())
mac->append(' ');
else if (tok.is_special()) {
- const char *sc;
if (font::use_charnames_in_special) {
charinfo *ci = tok.get_char(true /* required */);
sc = ci->get_symbol()->contents();
@@ -5612,29 +5617,32 @@ static void encode_char_for_troff_output(macro *mac, const char c)
mac->append(']');
}
else
- error("special character '%1' cannot be used within a"
- " device control escape sequence", sc);
+ is_char_valid = false;
}
else
- error("special character '%1' cannot be used within a device"
- " control escape sequence", sc);
+ is_char_valid = false;
}
}
else if (!(tok.is_hyphen_indicator()
|| tok.is_dummy()
|| tok.is_transparent_dummy()
|| tok.is_zero_width_break()))
- error("%1 is invalid within device control escape sequence",
- tok.description());
+ is_char_valid = false;
+ if (!is_char_valid) {
+ if (sc != 0 /* nullptr */)
+ error("special character '%1' is invalid within a device"
+ " control command", sc);
+ else
+ error("%1 is invalid within a device control command",
+ tok.description());
+ }
}
else {
- if ((font::use_charnames_in_special) && ('\\' == c)) {
- /*
- * add escape escape sequence
- */
- mac->append(c);
+ if (c == escape_char) {
+ mac->append('\\');
}
- mac->append(c);
+ else
+ mac->append(c);
}
}
@@ -5678,34 +5686,28 @@ static node *do_special()
static void device_request()
{
- // We can't use `has_arg()` here because we want to read in copy mode.
- int c;
- for (;;) {
- c = input_stack::peek();
- if (' ' == c)
- (void) get_copy(0 /* nullptr */);
- else
- break;
+ if (!has_arg()) {
+ warning(WARN_MISSING, "device request expects arguments");
+ skip_line();
+ return;
}
- if (('\n' == c) || (EOF == c)) {
- warning(WARN_MISSING, "device control request expects arguments");
+ if (tok.is_newline() || tok.is_eof()) {
+ warning(WARN_MISSING, "device request expects arguments");
skip_line();
return;
}
+ if ('"' == tok.ch()) {
+ tok.next();
+ }
macro mac;
for (;;) {
- c = get_copy(0 /* nullptr */);
- if ('"' == c) {
- c = get_copy(0 /* nullptr */);
- break;
- }
- if (c != ' ' && c != '\t')
+ if (tok.is_newline() || tok.is_eof())
break;
+ encode_char_for_troff_output(&mac, tok.ch());
+ tok.next();
}
- for (; c != '\n' && c != EOF; c = get_copy(0 /* nullptr */))
- mac.append(c);
curenv->add_node(new special_node(mac));
- tok.next();
+ skip_line();
}
static void device_macro_request()