about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-07-04 15:55:28 +0200
committerSören Tempel <soeren+git@soeren-tempel.net>2021-07-15 13:28:33 +0200
commitd59393e1efc2d21e4a79ce1698d4d6c1cb924c66 (patch)
tree7ded5ffbe031de6074b4e20d82e7166b7207793b
parenta33f9be15276ebe217fe14fa75f1848533dff4e8 (diff)
Implement a reasonable subset of SGR escape sequences
SGR (Select Graphical Representation) escape sequences are CSI escape
sequences ending in the final byte 'm'. They are described in the
ECMA-48 standard, but we also support a compatible extension which is
commonly used nowadays, namely extended colors (256 colors and true 24
bit colors) which are specified in ITU-T Rec. T.416.

SGR sequences are probably the most commonly used escape sequences and
a lot of CLI tools now feel much more familiar in saneterm. The
implemented SGR sequences for example allow:

* to change the current text's foreground and background color in the
  three commonly used color modes 8/16 colors, 256 colors and 24 bit
  true color.

* to change the current text's appearance: italic, bold, underline,
  strikethrough and more are supported.

The current implementation uses a new TextStyle object which is added
to pty.Parser's state to track the inherintly stateful changes in text
appearance described by SGR escape sequences.

When the TextStyle object changes, a TEXT_STYLE event is emitted and
the a Gtk.TextTag is created from the TextStyle and registered in the
widget's TextBuffer.

For the most part this is quite straightforward, just two areas
deserve more attention:

* The extended colors (256 colors and 24 bit true color) are a bit
  more complicated to parse which is handled by parse_extended_color().
  This function doesn't fully support everything the recommendation
  mandates. Especially true color will need more real world testing,
  e. g. lolcat(1) a heavy user of true color doesn't even emit true
  color escape sequences conforming to the standard.

* Color handling in general contributes to most of the complexity:

  * There are three ways to specify colors via SGR escape sequences
    we support which all need to be converted to Gdk.RGBA objects.
    This is handled by saneterm.color.Color. True color is trivial,
    for 256 colors we implement the conversion instead of generating a
    lookup table (like XTerm does). For the 8 basic colors in their
    normal and bright variants, we use hard coded list of X11 color
    names for now. This probably should become configurable in the
    future.

  * Many implementation use the intensity escape sequences to
    influence color vibrance: SGR 2 is interpreted as dim wrt
    to colors and SGR 1 not only makes the text bold but also
    chooses brighter colors. So far we interpret SGR 1, 2 and 22
    only in terms of font weight. EMCA-48 permits both. Changing the
    color intensity as well increases complexity and has little
    benefit, so this should probably be kept this way.

  * Instead we implement the 90-97 and 100-107 non-standard bright
    color SGR escape sequences.

The current implementation is, however, not without issues:

* Tracking the text style state in the parser is probably a layer
  violation — pty.Parser should instead translate the escape sequences
  into events and the state tracking done in saneterm.terminal.

* Performance is poor if a lot of escape sequences are in the input.
  This is due to two reasons: a) insert_data is called with little
  chunks of text which decreases performance and b) a lot of anonymous
  Gtk TextTags are created which hurts performance a lot. We should
  investigate a way to deduplicate the created TextTags (by using
  names?) and possibly decouple the application of tags from the
  insertion of text itself.
-rw-r--r--saneterm/__init__.py1
-rw-r--r--saneterm/color.py158
-rw-r--r--saneterm/pty.py230
-rw-r--r--saneterm/terminal.py6
-rw-r--r--saneterm/termview.py7
-rw-r--r--tests.py28
6 files changed, 426 insertions, 4 deletions
diff --git a/saneterm/__init__.py b/saneterm/__init__.py
index 9c8c73e..6412a26 100644
--- a/saneterm/__init__.py
+++ b/saneterm/__init__.py
@@ -1,2 +1,3 @@
 import gi
 gi.require_version("Gtk", "3.0")
+gi.require_version("Gdk", "3.0")
diff --git a/saneterm/color.py b/saneterm/color.py
new file mode 100644
index 0000000..7e06cb9
--- /dev/null
+++ b/saneterm/color.py
@@ -0,0 +1,158 @@
+from enum import Enum, auto, unique
+
+from gi.repository import Gdk
+
+@unique
+class BasicColor(Enum):
+    BLACK = 0
+    RED = 1
+    GREEN = 2
+    YELLOW = 3
+    BLUE = 4
+    MAGENTA = 5
+    CYAN = 6
+    WHITE = 7
+
+# colors are (almost) the same as XTerm's default ones,
+# see X11's rgb.txt for the RGB values
+BASIC_COLOR_NAMES_REGULAR = {
+    BasicColor.BLACK   : "black",
+    BasicColor.RED     : "red3",
+    BasicColor.GREEN   : "green3",
+    BasicColor.YELLOW  : "yellow3",
+    BasicColor.BLUE    : "blue2",
+    BasicColor.MAGENTA : "magenta3",
+    BasicColor.CYAN    : "cyan3",
+    BasicColor.WHITE   : "gray90",
+}
+
+BASIC_COLOR_NAMES_BRIGHT = {
+    BasicColor.BLACK   : "gray50",
+    BasicColor.RED     : "red",
+    BasicColor.GREEN   : "green",
+    BasicColor.YELLOW  : "yellow",
+    BasicColor.BLUE    : "CornflowerBlue",
+    BasicColor.MAGENTA : "magenta",
+    BasicColor.CYAN    : "cyan",
+    BasicColor.WHITE   : "white",
+}
+
+class ColorType(Enum):
+    NUMBERED_8 = auto()
+    NUMBERED_8_BRIGHT = auto()
+    NUMBERED_256 = auto()
+    TRUECOLOR = auto()
+
+def extended_color_val(x):
+    """
+    Convert a 256 color cube axis index into
+    its corresponding color channel value.
+    """
+    val = x * 40 + 55 if x > 0 else 0
+    return val / 255
+
+def int_triple_to_rgba(c):
+    """
+    Convert a triple of the form (r, g, b) into
+    a valid Gdk.RGBA where r, g and b are integers
+    in the range [0;255].
+    """
+    (r, g, b) = tuple(map(lambda x: x / 255, c))
+    return Gdk.RGBA(r, g, b, 1)
+
+def basic_color_to_rgba(n, bright=False):
+    """
+    Convert a BasicColor into a Gdk.RGBA object using
+    the BASIC_COLOR_NAMES_* lookup tables. Raises an
+    AssertionFailure if the conversion fails.
+    """
+    color = Gdk.RGBA()
+
+    if bright:
+        assert color.parse(BASIC_COLOR_NAMES_BRIGHT[n])
+    else:
+        assert color.parse(BASIC_COLOR_NAMES_REGULAR[n])
+
+    return color
+
+class Color(object):
+    """
+    Color represents all possible types of colors
+    used in SGR escape sequences:
+
+    * ColorType.NUMBERED_8: regular BasicColor, corresponding to
+      either the 30-37 or 40-47 SGR parameters. data is always
+      a member of the BasicColor enum.
+    * ColorType.NUMBERED_8_BRIGHT: bright BasicColor, corresponding
+      to either the 90-97 or 100-107 SGR parameters. data is always
+      a member of the BasicColor enum.
+    * ColorType.NUMBERED_256: a color of the 256 color palette
+      supported by the SGR sequence parameters 38 and 48. data
+      is always an integer in the range [0;255]
+    * ColorType.TRUECOLOR: a true RGB color as supported by SGR
+      sequence parameters 38 and 48. data should be a triple of
+      integers in the range [0;255].
+    """
+    def __init__(self, t, data):
+        if not isinstance(t, ColorType):
+            raise TypeError("type must be ColorType")
+
+        if t is ColorType.TRUECOLOR:
+            if not type(data) is tuple:
+                raise TypeError("data must be tuple for TRUECOLOR")
+            if not len(data) == 3:
+                raise TypeError("tuple must have 3 elements for TRUECOLOR")
+        elif t is ColorType.NUMBERED_8 or t is ColorType.NUMBERED_8_BRIGHT:
+            if not isinstance(data, BasicColor):
+                raise TypeError(f'data must be BasicColor for {t}')
+        elif t is ColorType.NUMBERED_256:
+            if not type(data) is int:
+                raise TypeError('data must be integer for NUMBERED_256')
+            if not (data >= 0 and data < 256):
+                raise TypeError('data must be in range [0;255] for NUMBERED_256')
+
+        self.type = t
+        self.data = data
+
+    def to_gdk(self):
+        """
+        Convert a Color into a Gdk.RGBA which TextTag accepts.
+        The color scheme for the 16 color part uses default X11
+        colors and is currently not configurable.
+        """
+        if self.type is ColorType.NUMBERED_8:
+            return basic_color_to_rgba(self.data, bright=False)
+        elif self.type is ColorType.NUMBERED_8_BRIGHT:
+            return basic_color_to_rgba(self.data, bright=True)
+        elif self.type is ColorType.TRUECOLOR:
+            return int_triple_to_rgba(self.data)
+        elif self.type is ColorType.NUMBERED_256:
+            if self.data < 8:
+                # normal 8 colors
+                return basic_color_to_rgba(BasicColor(self.data), bright=False)
+            elif self.data >= 8 and self.data < 16:
+                # bright 8 colors
+                return basic_color_to_rgba(BasicColor(self.data - 8), bright=True)
+            elif self.data >= 16 and self.data < 232:
+                # color cube which is constructed in the following manner:
+                #
+                # * The color number is described by the following formula:
+                #   n = 16 + 36r + 6g + b
+                # * r, g, b are all >= 0 and < 6
+                # * The corresponding color channel value for the r, g, b
+                #   values can be obtained using the following expression:
+                #   x * 40 + 55 if x > 0 else 0
+                #
+                # This is not documented anywhere as far as I am aware.
+                # The information presented here has been reverse engineered
+                # from XTerm's 256colres.pl.
+                tmp = self.data - 16
+                (r, tmp) = divmod(tmp, 36)
+                (g, b) = divmod(tmp, 6)
+
+                triple = tuple(map(extended_color_val, (r, g, b)))
+                return Gdk.RGBA(*triple)
+            else:
+                # grayscale in 24 steps
+                c = (self.data - 232) * (1.0/24)
+                return Gdk.RGBA(c, c, c, 1.0)
diff --git a/saneterm/pty.py b/saneterm/pty.py
index d5ed7d3..d25d0bf 100644
--- a/saneterm/pty.py
+++ b/saneterm/pty.py
@@ -1,8 +1,10 @@
 import os
+import re
 
 from pty import fork
+from .color import Color, ColorType, BasicColor
 from enum import Enum, auto
-from gi.repository import GLib
+from gi.repository import GLib, Pango
 
 TERM = "dumb"
 
@@ -42,6 +44,40 @@ class Source(GLib.Source):
 class EventType(Enum):
     TEXT = auto()
     BELL = auto()
+    TEXT_STYLE = auto()
+
+class TextStyle(object):
+    def __init__(self):
+        self.reset_all()
+
+    def reset_all(self):
+        self.fg_color = None
+        self.bg_color = None
+        self.strikethrough = False
+        self.intensity = Pango.Weight.NORMAL
+        self.italic = False
+        self.underline = Pango.Underline.NONE
+        self.concealed = False
+
+    def to_tag(self, textbuffer):
+        keywords = {
+            'strikethrough' : self.strikethrough,
+            'underline'     : self.underline,
+            'weight'        : self.intensity,
+            'style'         : Pango.Style.ITALIC if self.italic else Pango.Style.NORMAL,
+            'invisible'     : self.concealed,
+        }
+
+        if self.fg_color is not None:
+            keywords['foreground_rgba'] = self.fg_color.to_gdk()
+
+        if self.bg_color is not None:
+            keywords['background_rgba'] = self.bg_color.to_gdk()
+
+        tag = textbuffer.create_tag(None, **keywords)
+
+        return tag
+
 
 class PositionedIterator(object):
     """
@@ -198,6 +234,68 @@ def csi_final_byte(c):
     cp = ord(c)
     return cp >= 0x40 and cp <= 0x7e
 
+def parse_extended_color(iterator):
+    """
+    Parse extended color sequences (CSI [ 38 and CSI [ 48).
+    Takes an iterator which has already consumed the initial
+    SGR sequence type argument and returns a Color.
+    On failure an AssertionError is raised.
+
+    Relevant standards:
+    * Definition of the SGR extended color escape sequence:
+      ITU-T Rec. T.416 | ISO/IEC 8613-6
+      https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-T.416-199303-I!!PDF-E&type=items
+    * Full definition of the colour specification including the “colour space id”:
+      ITU-T Rec. T.412 | ISO/IEC 8613-2
+      https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-T.412-199303-I!!PDF-E&type=items
+    """
+    args = list(iterator)
+
+    if len(args) == 0:
+        raise AssertionError("too few arguments")
+
+    if args[0] == '5':
+        # 256 color
+        assert len(args) == 2
+
+        try:
+            return Color(
+                ColorType.NUMBERED_256,
+                int(args[1])
+            )
+        except ValueError:
+            raise AssertionError("unexpected non-integer")
+    elif args[0] == '2':
+        # truecolor
+        if len(args) == 4:
+            channels = tuple(args[1:4])
+        elif len(args) >= 5:
+            # TODO: handle color space id and tolerance values
+            channels = tuple(args[2:5])
+        else:
+            raise AssertionError("too few arguments")
+
+        try:
+            return Color(
+                ColorType.TRUECOLOR,
+                tuple(int(c) for c in channels)
+            )
+        except ValueError:
+            raise AssertionError("unexpected non-integer")
+    elif args[0] == '0':
+        # The standard specifies this as “implementation defined”,
+        # so we define this as color reset
+        return None
+    else:
+        # TODO: support
+        #
+        #   1   transparent
+        #   3   CMY
+        #   4   CMYK
+        #
+        # … but who needs these?
+        raise AssertionError("unsupported extended color")
+
 class Parser(object):
     """
     Parses a subset of special control sequences read from
@@ -209,6 +307,7 @@ class Parser(object):
     def __init__(self):
         # unparsed output left from the last call to parse
         self.__leftover = ''
+        self.__text_style = TextStyle()
 
     def parse(self, input):
         """
@@ -287,6 +386,135 @@ class Parser(object):
 
                                 assert csi_final_byte(final)
 
+                                if final == 'm':
+                                    # SGR (Select Graphic Rendition) sequence:
+                                    # any number of numbers separated by ';'
+                                    # which change the current text presentation.
+                                    # If the parameter string is empty, a single '0'
+                                    # is implied.
+                                    #
+                                    # We support a subset of the core SGR sequences
+                                    # as specified by ECMA-48. Most notably we also
+                                    # support the common additional bright color
+                                    # sequences. This also justifies not to implement
+                                    # the strange behavior of choosing brighter colors
+                                    # when the current text is bold.
+                                    #
+                                    # We also support ':' as a separator which is
+                                    # only necessary for extended color sequences
+                                    # as specified in ITU-T Rec. T.416 | ISO/IEC 8613-6
+                                    # (see also parse_extended_color()). Actually
+                                    # those sequences _must_ use colons and semicolons
+                                    # would be invalid. In reality, however, the
+                                    # incorrect usage of semicolons seems to be much
+                                    # more common. Thus we are extremely lenient and
+                                    # allow both ':' and ';' as well as a mix of both
+                                    # as separators.
+                                    args = re.split(r'[:;]', params)
+
+                                    # track if we support the used sequences,
+                                    # only emit an event if that is the case
+                                    supported = False
+
+                                    arg_it = iter(args)
+                                    for arg in arg_it:
+                                        if len(arg) == 0:
+                                            # empty implies 0
+                                            sgr_type = 0
+                                        else:
+                                            try:
+                                                sgr_type = int(arg)
+                                            except ValueError:
+                                                raise AssertionError("Invalid Integer")
+
+                                        this_supported = True
+                                        if sgr_type == 0:
+                                            self.__text_style.reset_all()
+                                        elif sgr_type == 1:
+                                            self.__text_style.intensity = Pango.Weight.BOLD
+                                        elif sgr_type == 2:
+                                            self.__text_style.intensity = Pango.Weight.THIN
+                                        elif sgr_type == 3:
+                                            self.__text_style.italic = True
+                                        elif sgr_type == 4:
+                                            self.__text_style.underline = Pango.Underline.SINGLE
+                                        elif sgr_type == 8:
+                                            self.__text_style.concealed = True
+                                        elif sgr_type == 9:
+                                            self.__text_style.strikethrough = True
+                                        elif sgr_type == 21:
+                                            self.__text_style.underline = Pango.Underline.DOUBLE
+                                        elif sgr_type == 22:
+                                            self.__text_style.intensity = Pango.Weight.NORMAL
+                                        elif sgr_type == 23:
+                                            # also theoretically should disable blackletter
+                                            self.__text_style.italic = False
+                                        elif sgr_type == 24:
+                                            self.__text_style.underline = Pango.Underline.NONE
+                                        elif sgr_type == 28:
+                                            self.__text_style.concealed = False
+                                        elif sgr_type == 29:
+                                            self.__text_style.strikethrough = False
+                                        elif sgr_type >= 30 and sgr_type <= 37:
+                                            self.__text_style.fg_color = Color(
+                                                ColorType.NUMBERED_8,
+                                                BasicColor(sgr_type - 30)
+                                            )
+                                        elif sgr_type == 38:
+                                            try:
+                                                self.__text_style.fg_color = parse_extended_color(arg_it)
+                                            except AssertionError:
+                                                this_supported = False
+                                        elif sgr_type == 39:
+                                            self.__text_style.fg_color = None
+                                        elif sgr_type >= 40 and sgr_type <= 47:
+                                            self.__text_style.bg_color = Color(
+                                                ColorType.NUMBERED_8,
+                                                BasicColor(sgr_type - 40)
+                                            )
+                                        elif sgr_type == 48:
+                                            try:
+                                                self.__text_style.bg_color = parse_extended_color(arg_it)
+                                            except AssertionError:
+                                                this_supported = False
+                                        elif sgr_type == 49:
+                                            self.__text_style.bg_color = None
+                                        elif sgr_type >= 90 and sgr_type <= 97:
+                                            self.__text_style.fg_color = Color(
+                                                ColorType.NUMBERED_8_BRIGHT,
+                                                BasicColor(sgr_type - 90)
+                                            )
+                                        elif sgr_type >= 100 and sgr_type <= 107:
+                                            self.__text_style.bg_color = Color(
+                                                ColorType.NUMBERED_8_BRIGHT,
+                                                BasicColor(sgr_type - 100)
+                                            )
+                                        else:
+                                            # Not supported:
+                                            #   5-6     blink
+                                            #   7       invert
+                                            #   10      default font
+                                            #   11-19   alternative font
+                                            #   20      blackletter font
+                                            #   25      disable blinking
+                                            #   26      proportional spacing
+                                            #   27      disable inversion
+                                            #   50      disable proportional spacing
+                                            #   51      framed
+                                            #   52      encircled
+                                            #   53      overlined (TODO: implement via GTK 4 TextTag)
+                                            #   54      neither framed nor encircled
+                                            #   55      not overlined
+                                            #   60-65   ideograms (TODO: find out what this is supposed to do)
+                                            #   58-59   underline color, non-standard
+                                            #   73-65   sub/superscript, non-standard (TODO: via scale and rise)
+                                            this_supported = False
+
+                                        supported = supported or this_supported
+
+                                    if supported:
+                                        special_ev = (EventType.TEXT_STYLE, self.__text_style)
+
                             except AssertionError:
                                 # invalid CSI sequence, we'll render it as text for now
                                 ignore_esc = True
diff --git a/saneterm/terminal.py b/saneterm/terminal.py
index f0c5e4d..eadd6a1 100644
--- a/saneterm/terminal.py
+++ b/saneterm/terminal.py
@@ -38,6 +38,8 @@ class Terminal(Gtk.Window):
         self.pty.attach(None)
 
         self.pty_parser = pty.Parser()
+        # gtk TextTag to use, generated from TEXT_STYLE events
+        self.text_insert_tag = None
 
         self.termview = TermView(self.complete, limit)
 
@@ -167,10 +169,12 @@ class Terminal(Gtk.Window):
 
         for (ev, data) in self.pty_parser.parse(decoded):
             if ev is pty.EventType.TEXT:
-                self.termview.insert_data(data)
+                self.termview.insert_data(data, self.text_insert_tag)
             elif ev is pty.EventType.BELL:
                 self.termview.error_bell()
                 self.set_urgency_hint(True)
+            elif ev is pty.EventType.TEXT_STYLE:
+                self.text_insert_tag = data.to_tag(self.termview.get_buffer())
             else:
                 raise AssertionError("unknown pty.EventType")
 
diff --git a/saneterm/termview.py b/saneterm/termview.py
index c697fae..1ce35a7 100644
--- a/saneterm/termview.py
+++ b/saneterm/termview.py
@@ -121,8 +121,11 @@ class TermView(Gtk.TextView):
                 GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE,
                 (GObject.TYPE_PYOBJECT,))
 
-    def insert_data(self, str):
-        self._textbuffer.insert(self._textbuffer.get_end_iter(), str)
+    def insert_data(self, str, tag=None):
+        if tag is None:
+            self._textbuffer.insert(self._textbuffer.get_end_iter(), str)
+        else:
+            self._textbuffer.insert_with_tags(self._textbuffer.get_end_iter(), str, tag)
 
         end = self._textbuffer.get_end_iter()
         self._last_mark = self._textbuffer.create_mark(None, end, True)
diff --git a/tests.py b/tests.py
index 34db339..aacdcdc 100644
--- a/tests.py
+++ b/tests.py
@@ -1,8 +1,11 @@
 import copy
 import unittest
 
+from saneterm.color import Color, ColorType
 from saneterm.pty import PositionedIterator
 
+from gi.repository import Gdk
+
 TEST_STRING = 'foo;bar'
 
 class TestPositionedIterator(unittest.TestCase):
@@ -92,5 +95,30 @@ class TestPositionedIterator(unittest.TestCase):
         # using take does not consume the next element!
         self.assertEqual(it1.pos, length - 1)
 
+class TestColor(unittest.TestCase):
+    def test_256_colors(self):
+        """
+        Check divmod based RGB value calculation against
+        256 color table generation as implemented in
+        XTerm's 256colres.pl.
+        """
+        def channel_val(c):
+            return (c * 40 + 55 if c > 0 else 0) / 255
+
+        for r in range(6):
+            for g in range(6):
+                for b in range(6):
+                    n = 16 + (r * 36) + (g * 6) + b
+
+                    expected = Gdk.RGBA(*map(channel_val, (r, g, b)))
+                    col = Color(ColorType.NUMBERED_256, n).to_gdk()
+
+                    self.assertTrue(
+                        expected.equal(col),
+                        'Color {}: expected: {}; got: {}'.format(
+                            n, expected.to_string(), col.to_string()
+                        )
+                    )
+
 if __name__ == '__main__':
     unittest.main()