about summary refs log tree commit diff
path: root/sternenblog/xml.h
diff options
context:
space:
mode:
Diffstat (limited to 'sternenblog/xml.h')
-rw-r--r--sternenblog/xml.h340
1 files changed, 340 insertions, 0 deletions
diff --git a/sternenblog/xml.h b/sternenblog/xml.h
new file mode 100644
index 0000000..869a579
--- /dev/null
+++ b/sternenblog/xml.h
@@ -0,0 +1,340 @@
+/*!
+ * @file xml.h
+ * @brief Simple library for constructing XML documents
+ *
+ * This library provides a C interface for opening and closing
+ * XML tags as well as filling them with content. It is mainly
+ * intended for constructing XML/HTML documents by directly
+ * writing them to `stdout`.
+ *
+ * It's main advantage over plain `printf()` is that it keeps
+ * track of open tags, enabling it to automatically close
+ * open tags (saving a few lines of code) using `xml_close_all()`
+ * and `xml_close_including()` and/or to detect errors in the
+ * programmer's XML nesting. For information on its sanity
+ * checking abilities see the documentation of `xml_close_tag()`.
+ *
+ * Currently it has some limitations (possibly incomplete list):
+ *
+ * * It does not give the calling code feedback if errors occurred
+ * * It doesn't do validity checking of tags and attributes
+ *   (legal characters etc.)
+ * * It can't generate pretty output (i. e. properly indented),
+ *   its output is currently always "minified".
+ *
+ * For handling arbitrary data this library is probably not a good
+ * fit, it is mainly intended and tested for generating HTML and
+ * RSS documents in a CGI-like environment from trusted data.
+ *
+ * An example application generating a HTML5 page looks like this:
+ *
+ * @include xml_example.c
+ */
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/*!
+ * @brief Type of an XML "tag"
+ *
+ * This is mostly internally used to be able
+ * to keep track of CDATA using `xml_stack`.
+ */
+enum xml_tag_type {
+    XML_NORMAL_TAG,
+    XML_CDATA
+};
+
+/*!
+ * @brief Internal linked list type
+ *
+ * Linked list used internally to keep track of tags to close.
+ *
+ * @see struct xml_context
+ */
+struct xml_stack {
+    enum xml_tag_type type;  //! type of the tag
+    char *tag;               //!< tag name if `XML_NORMAL_TAG`, otherwise `NULL`
+    struct xml_stack *next;  //!< tag to be closed after the current one
+};
+
+/*!
+ * @brief State and configuration of xml generation.
+ *
+ * Struct containing both state and configuration of this module.
+ * See `new_xml_context()` for usage instructions.
+ *
+ * @see new_xml_context
+ * @see del_xml_context
+ * @see struct xml_stack
+ */
+struct xml_context {
+    struct xml_stack *stack; //!< linked list used internally to keep track of open tags
+    FILE *out;               //!< Where to write output, defaults to stdout
+    FILE *warn;              //!< if not `NULL`, print warnings to handle warn, defaults to `NULL`
+    bool closing_slash;      //!< whether to output a closing slash at the end of an empty tag
+};
+
+/*!
+ * @brief Initialize the `xml_context` structure.
+ *
+ * Initialize a `struct xml_context` with default values:
+ *
+ * * empty stack
+ * * output to `stdout`
+ * * no warnings
+ * * closing slashes enabled
+ *
+ * This function should always be called before any other functions of xml.
+ * If you want to use different settings than the default ones, update the
+ * struct after calling this function. This way your application won't break
+ * if it gets extended.
+ *
+ * @see struct xml_context
+ * @see del_xml_context
+ */
+void new_xml_context(struct xml_context *ctx);
+
+/*!
+ * @brief Clean up the `xml_context` structure.
+ *
+ * Frees any dynamically allocated data in a `struct xml_context`.
+ * Should always be called before a `struct xml_context` goes out
+ * of scope or the program terminates.
+ *
+ * If `ctx->warn` is not `NULL`, `del_xml_context()` will additionally
+ * output a message about any remaining unclosed tags to `ctx->warn`.
+ * For example:
+ *
+ * ```
+ * Unclosed tags remaining: article main body html
+ * ```
+ *
+ * @see struct xml_context
+ * @see new_xml_context
+ */
+void del_xml_context(struct xml_context *ctx);
+
+/*!
+ * @brief Output a xml escaped string
+ *
+ * Outputs the given string escaped for use with XML. It only
+ * does minimal-ish escaping, i. e. it escapes all characters
+ * that have some syntactical meaning in XML. That includes:
+ * Angled brackets (lower than and greater than), ampersand,
+ * and single as well as double quotes. All other characters
+ * are passed through as is and the caller is expected to
+ * make sure they are correctly encoded, i. e. valid UTF-8
+ * characters.
+ *
+ * The escaping is not as minimal as possible. In some cases
+ * you can omit escaping all characters except for `<` and `&`,
+ * but this would be context-sensitive and therefore
+ * unnecessarily tedious to implement. Escaping all
+ * syntactically significant characters has no real downsides
+ * except maybe using a tiny bit more storage than absolutely
+ * necessary.
+ *
+ * @see xml_raw
+ */
+void xml_escaped(struct xml_context *ctx, const char *str);
+
+/*!
+ * @brief Output a raw string.
+ *
+ * Output string to `ctx->out`, equivalent to `fputs(str, ctx.out)`.
+ * If your string is not already XML, use xml_escaped() to output it
+ * correcty escaped.
+ *
+ * @see struct xml_context
+ * @see xml_escaped
+ */
+void xml_raw(struct xml_context *ctx, const char *str);
+
+/*!
+ * @brief Output an empty xml tag.
+ *
+ * Output an empty xml tag (i. e. a single tag that doesn't need to be closed).
+ * This call does not change the provided context `ctx`.
+ *
+ * The call also outputs given attributes: For `attr_count` n, `xml_empty_tag()` expects
+ * 2n additional arguments — for each attribute a name and a value. If value is `NULL`,
+ * an attribute without a value will be output, i. e. just the name without the `="…"`.
+ *
+ * For example, `xml_empty_tag(&ctx, "my-tag", 2, "foo", "bar", "baz", NULL);` gives
+ * `<my-tag foo="bar" baz/>` with default settings.
+ *
+ * The attributes' values are XML-escaped automatically. For details on how escaping
+ * works in xml.h, see xml_escaped().
+ *
+ * If `closing_slash` is 0 in `ctx`, the slash before the closing ">" will be omitted.
+ * This is useful for HTML5 where it is optional.
+ *
+ * @see struct xml_context
+ */
+void xml_empty_tag(struct xml_context *ctx, const char *tag, size_t attr_count, ...);
+
+/*!
+ * @brief Output an opening tag with attributes.
+ *
+ * Output an opening tag with attributes and add it to `ctx->stack` for future reference.
+ *
+ * Attributes work exactly like in `xml_empty_tag()`.
+ * @see xml_empty_tag
+ */
+void xml_open_tag_attrs(struct xml_context *ctx, const char *tag, size_t attr_count, ...);
+
+/*!
+ * @brief Output an opening tag without any attributes.
+ *
+ * Shorthand for `xml_open_tag_attrs(ctx, tag, 0)`.
+ * @see xml_open_tag_attrs
+ */
+void xml_open_tag(struct xml_context *ctx, const char *tag);
+
+/*!
+ * @brief Close a previously opened tag.
+ *
+ * `xml_close_tag()` first checks the head of the current `xml_stack`
+ * if the provided `tag` is in fact the current innermost opened tag.
+ *
+ * If this is true, it outputs the closing tag, removes the reference
+ * to the tag on top of the `xml_stack` and frees this part of the
+ * structure.
+ *
+ * If it isn't true, it does nothing and outputs an appropriate warning
+ * to `ctx->warn` if it is not `NULL`:
+ *
+ * * `Refusing to close tag xyz, unclosed tags remaining`
+ * * `Refusing to close tag zyx, no tags left to be closed`
+ *
+ * This sanity checking of tag closing ensures that a xml document
+ * constructed by this module has no nesting errors, i. e. every tag
+ * is closed at the proper nesting level. Because it is only simple
+ * runtime checking in specific calls it can't prevent / detect the
+ * following errors:
+ *
+ * * It can't prevent unclosed tags remaining at the end. You can
+ *   however prevent this by calling `xml_close_all()` at the end of
+ *   your XML outputting code.
+ * * It can worsen a situation with remaining unclosed tags: If
+ *   an inner tag is left unclosed, it will refuse to close all
+ *   outer tags, leaving a trail of unclosed tags behind.
+ * * It will always attribute the error to closing: Some errors
+ *   will be caused by missing an `xml_open_tag()` somewhere, but
+ *   `xml_close_tag()` will think the closing is erroneous. Of course
+ *   it's also unable to resolve the error.
+ * * It can't compare against the intended XML structure: Sometimes
+ *   a programming error will result in an "wrong" XML structure
+ *   which is still completely valid to `xml_close_tag()`, i. e.
+ *   correctly nested.
+ *
+ * Overall the sanity checking is limited, as you can see, but
+ * it should generate *some* warning if a detectable issue is
+ * present (invalid XML nesting) and never actively output
+ * a tag that makes the XML nesting invalid.
+ *
+ * These properties should however be enough to detect issues
+ * quickly in development. Additionally the sanity checking is
+ * cheap enough to be feasible in production. `xml_close_tag()`
+ * only needs to call `strcmp` once per invocation.
+ *
+ * @see xml_open_tag_attrs
+ * @see xml_open_tag
+ * @see struct xml_stack
+ * @see xml_close_all
+ * @see xml_close_including
+ */
+void xml_close_tag(struct xml_context *ctx, const char *tag);
+
+/*!
+ * @brief Close all remaining unclosed tags
+ *
+ * `xml_close_all()` iterates through the `xml_stack` and calls
+ * `xml_close_tag()` or `xml_close_cdata()` respectively for every
+ * entry in it. A call to it will thus result in an empty `xml_stack`
+ * and all previously opened tags being closed correctly.
+ *
+ * Internally it's an alias for `xml_close_all(ctx, NULL)`
+ *
+ * Note that `xml_close_all()` will limit error checking, since it
+ * (by nature) always succeeds and has no insight into what the
+ * programmer thinks needs to be closed.
+ *
+ * @see xml_close_tag
+ * @see xml_close_including
+ * @see struct xml_stack
+ */
+void xml_close_all(struct xml_context *ctx);
+
+/*!
+ * @brief Close all unclosed tags until a given one.
+ *
+ * `xml_close_including()` works like `xml_close_all()`, but
+ * will stop after it hits a tag of the given name.
+ * If the given tag is not present in the stack, it behaves
+ * like `xml_close_all()`. It is not possible to match
+ * a `CDATA` section using `xml_close_including()`.
+ *
+ * Be aware that it might lead to unexpected results if
+ * multiple tags of the same are nested. Consider the
+ * following snippet.
+ *
+ * ```c
+ * xml_open_tag(&ctx, "a");
+ * xml_open_tag(&ctx, "b");
+ * xml_close_tag(&ctx, "a");
+ * xml_open_tag(&ctx, "c");
+ * xml_raw(&ctx, "value");
+ * xml_close_including(&ctx, "a");
+ * ```
+ *
+ * `xml_close_including()` will stop as soon as it hits the first
+ * tag "a", although it might be intended to keep going until the
+ * outermost one. The result would be:
+ *
+ * ```xml
+ * <a><b><a><c>value</c></a>
+ * ```
+ *
+ * This is the behavior of `xml_close_including()`: It closes all
+ * unclosed tags until it hits the first instance of the specified
+ * tag which is also closed.
+ *
+ * This function will also limit error detection like `xml_close_all()`.
+ * For an explanation of this, see its documentation.
+ *
+ * @see xml_close_all
+ * @see xml_close_tag
+ * @see struct xml_stack
+ */
+void xml_close_including(struct xml_context *ctx, const char *tag);
+
+/*!
+ * @brief Start CDATA section
+ *
+ * Behaves like xml_open_tag(), but for opening `CDATA` sections.
+ * Internally the `XML_CDATA` type of `struct xml_stack` is used.
+ *
+ * Note that this function won't prevent `CDATA` sections or XML
+ * elements inside a `CDATA` section, since this is sometimes
+ * useful.
+ *
+ * @see xml_close_cdata
+ * @see enum xml_tag_type
+ * @see struct xml_stack
+ */
+void xml_open_cdata(struct xml_context *ctx);
+
+/*!
+ * @brief Close CDATA section
+ *
+ * Behaves like xml_close_tag(), but for `CDATA` sections.
+ *
+ * Checks the top of the stack if it is a `CDATA` section.
+ * In that case closes it and updates the stack, otherwise
+ * does nothing and if applicable outputs a warning.
+ *
+ * @see xml_open_cdata
+ */
+void xml_close_cdata(struct xml_context *ctx);