1 files changed, 83 insertions, 0 deletions
diff --git a/saneterm/pty.py b/saneterm/pty.py
index 5396d82..d5ed7d3 100644
--- a/saneterm/pty.py
+++ b/saneterm/pty.py
@@ -174,6 +174,30 @@ class PositionedIterator(object):
             self.pos -= 1
             raise StopIteration
 
+def csi_parameter_byte(c):
+    """
+    Check if the given unicode character is a CSI sequence
+    parameter byte. See ECMA-48 (5th edition) Section 5.4.
+    """
+    cp = ord(c)
+    return cp >= 0x30 and cp <= 0x3f
+
+def csi_intermediate_byte(c):
+    """
+    Check if the given unicode character is a CSI sequence
+    intermediate byte. See ECMA-48 (5th edition) Section 5.4.
+    """
+    cp = ord(c)
+    return cp >= 0x20 and cp <= 0x2f
+
+def csi_final_byte(c):
+    """
+    Check if the given unicode character is a CSI sequence
+    final byte. See ECMA-48 (5th edition) Section 5.4.
+    """
+    cp = ord(c)
+    return cp >= 0x40 and cp <= 0x7e
+
 class Parser(object):
     """
     Parses a subset of special control sequences read from
@@ -200,6 +224,13 @@ class Parser(object):
           the bell character '\a' was in the terminal input.
           This usually should trigger the machine to beep
           and/or the window to set the urgent flag.
+
+        Parsed control sequences are guaranteed to never
+        appear in a TEXT event. This is also true for
+        escape sequences which don't cause an event to
+        be generated. This is true for all CSI escape
+        sequences at the moment which are filtered out
+        from saneterm's output in this way.
         """
 
         it = PositionedIterator(self.__leftover + input)
@@ -209,6 +240,11 @@ class Parser(object):
         # we want to emit as a TEXT event
         start = 0
 
+        # this is set by the parser before backtracking if
+        # an ANSI escape sequence should be ignored, e. g.
+        # if we don't support it
+        ignore_esc = False
+
         # we expect a decoded string as input,
         # so we don't need to handle incremental
         # decoding here as well
@@ -228,6 +264,53 @@ class Parser(object):
             if code == '\a':
                 flush_until = it.pos
                 special_ev = (EventType.BELL, None)
+            elif code == '\033':
+                # ignore_esc can be set if we encounter a '\033'
+                # which is followed by a sequence we don't understand.
+                # In that case we'll jump back to the '\033', but just
+                # treat it as if it was an ordinary character.
+                if ignore_esc:
+                    ignore_esc = False
+                else:
+                    flush_until = it.pos
+
+                    # if parsing fails we'll return to this point
+                    it.waypoint()
+
+                    try:
+                        if it.next() == '[':
+                            # CSI sequence
+                            try:
+                                params = it.takewhile_greedy(csi_parameter_byte)
+                                inters = it.takewhile_greedy(csi_intermediate_byte)
+                                final = it.next()
+
+                                assert csi_final_byte(final)
+
+                            except AssertionError:
+                                # invalid CSI sequence, we'll render it as text for now
+                                ignore_esc = True
+
+                        else:
+                            # we only parse CSI sequences for now, all other
+                            # sequences will be rendered as text to the terminal.
+                            # This probably should change in the future since
+                            # we also want to filter out, e. g. OSC sequences
+                            ignore_esc = True
+
+                        # with only backtracks if the end of input is
+                        # reached, so we do need to do it explicitly here.
+                        if ignore_esc:
+                            it.backtrack()
+
+                    except StopIteration:
+                        # the full escape sequence wasn't contained in
+                        # this chunk of input, so we'll parse it next time.
+                        # Since we flush up to the escape sequence, we know
+                        # where it started. The parser loop will exit at the
+                        # end of this iteration because the iterator is
+                        # exhausted.
+                        self.__leftover = it.wrapped[flush_until:]
 
             # at the end of input, flush if we aren't already
             if flush_until == None and it.empty():