From f4b7cf4448effa168f2e1c247353c00e2741e831 Mon Sep 17 00:00:00 2001 From: sternenseemann <0rpkxez4ksa01gb3typccl0i@systemli.org> Date: Wed, 26 Aug 2020 23:14:56 +0200 Subject: refactor(treewide): prefix public/internal API with sternenblog/ --- Doxyfile | 1 + Makefile | 10 +- cgiutil.c | 130 ------------------ cgiutil.h | 111 ---------------- core.h | 48 ------- entry.c | 216 ------------------------------ entry.h | 109 --------------- index.c | 133 ------------------ index.h | 36 ----- main.c | 17 +-- sternenblog/cgiutil.c | 130 ++++++++++++++++++ sternenblog/cgiutil.h | 111 ++++++++++++++++ sternenblog/core.h | 48 +++++++ sternenblog/entry.c | 216 ++++++++++++++++++++++++++++++ sternenblog/entry.h | 109 +++++++++++++++ sternenblog/index.c | 133 ++++++++++++++++++ sternenblog/index.h | 36 +++++ sternenblog/stringutil.c | 66 +++++++++ sternenblog/stringutil.h | 27 ++++ sternenblog/template.h | 101 ++++++++++++++ sternenblog/timeutil.c | 97 ++++++++++++++ sternenblog/timeutil.h | 43 ++++++ sternenblog/xml.c | 309 ++++++++++++++++++++++++++++++++++++++++++ sternenblog/xml.h | 340 +++++++++++++++++++++++++++++++++++++++++++++++ stringutil.c | 66 --------- stringutil.h | 27 ---- template.h | 101 -------------- templates/simple.c | 13 +- timeutil.c | 97 -------------- timeutil.h | 43 ------ xml.c | 309 ------------------------------------------ xml.h | 340 ----------------------------------------------- 32 files changed, 1789 insertions(+), 1784 deletions(-) delete mode 100644 cgiutil.c delete mode 100644 cgiutil.h delete mode 100644 core.h delete mode 100644 entry.c delete mode 100644 entry.h delete mode 100644 index.c delete mode 100644 index.h create mode 100644 sternenblog/cgiutil.c create mode 100644 sternenblog/cgiutil.h create mode 100644 sternenblog/core.h create mode 100644 sternenblog/entry.c create mode 100644 sternenblog/entry.h create mode 100644 sternenblog/index.c create mode 100644 sternenblog/index.h create mode 100644 sternenblog/stringutil.c create mode 100644 sternenblog/stringutil.h create mode 100644 sternenblog/template.h create mode 100644 sternenblog/timeutil.c create mode 100644 sternenblog/timeutil.h create mode 100644 sternenblog/xml.c create mode 100644 sternenblog/xml.h delete mode 100644 stringutil.c delete mode 100644 stringutil.h delete mode 100644 template.h delete mode 100644 timeutil.c delete mode 100644 timeutil.h delete mode 100644 xml.c delete mode 100644 xml.h diff --git a/Doxyfile b/Doxyfile index 73a2d68..8163fc2 100644 --- a/Doxyfile +++ b/Doxyfile @@ -26,3 +26,4 @@ EXAMPLE_PATH = ./doc/examples/ # hack to allow @include simple.c EXAMPLE_PATH += ./templates/ EXCLUDE_SYMBOLS = _POSIX_C_SOURCE +RECURSIVE = YES diff --git a/Makefile b/Makefile index 97529d6..0b4f5f8 100644 --- a/Makefile +++ b/Makefile @@ -2,22 +2,24 @@ include config.mk ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +TEMPLATE_API = sternenblog/core.h config.h sternenblog/xml.h sternenblog/cgiutil.h sternenblog/timeutil.h sternenblog/stringutil.h + sternenblog.cgi: xml.o entry.o index.o stringutil.o cgiutil.o timeutil.o $(TEMPLATE).o main.o $(CC) $(CFLAGS) -o $@ $^ -main.o: main.c core.h timeutil.h config.h +main.o: main.c sternenblog/core.h config.h $(CC) $(CFLAGS) -c -o main.o $< -$(TEMPLATE).o: $(TEMPLATE).c core.h config.h xml.h cgiutil.h timeutil.h stringutil.h +$(TEMPLATE).o: $(TEMPLATE).c $(TEMPLATE_API) $(CC) $(CFLAGS) -I$(ROOT_DIR) -c -o $@ $< -entry.o: config.h entry.c +entry.o: config.h sternenblog/entry.c sternenblog/entry.h # only invoked if config.h does not exist config.h: $(CP) config.example.h config.h -%.o: %.c %.h +%.o: sternenblog/%.c sternenblog/%.h $(CC) $(CFLAGS) -c -o $@ $< clean: diff --git a/cgiutil.c b/cgiutil.c deleted file mode 100644 index 7dc8114..0000000 --- a/cgiutil.c +++ /dev/null @@ -1,130 +0,0 @@ -#include -#include -#include -#include -#include -#include "stringutil.h" - -void send_header(char key[], char val[]) { - fputs(key, stdout); - fputs(": ", stdout); - fputs(val, stdout); - puts("\r"); -} - -void terminate_headers(void) { - puts("\r"); -} - -char *http_status_line(int status) { - switch(status) { - case 200: - return "200 OK"; - case 400: - return "400 Bad Request"; - case 401: - return "401 Unauthorized"; - case 403: - return "403 Forbidden"; - case 404: - return "404 Not Found"; - default: - // default to 500 - return "500 Internal Server Error"; - } -} - -int http_errno(int err) { - switch(err) { - case EACCES: - return 403; - case ENOENT: - return 404; - case ENOTDIR: - return 404; - default: - return 500; - } -} - -int urlencode_realloc(char **input, int size) { - if(*input == NULL || size <= 0) { - return -1; - } - - int output_size = size; - char *output = malloc(output_size); - int output_pos = 0; - - if(output == NULL) { - return -1; - } - - for(int i = 0; i < size; i++) { - char c = *(*input + i); - bool needs_escape; - switch(c) { - // generic delimiters - // we assume we never need to escape '/'. This - // should hold since on unix filenames won't - // contain slashes and the basis for all URLs - // in sternenblog are actual files - case ':': case '?': case '#': case '[': case ']': case '@': - // sub delimiters - case '!': case '$': case '&': case '\'': case '(': case ')': - case '*': case '+': case ',': case ';': case '=': - // other characters to encode - case '%': case ' ': - needs_escape = 1; - break; - // in order to simplify the code we just assume - // everything else doesn't have to be encoded - // - // otherwise we'd need to be UTF-8 aware here - // and consider more than one byte at a time. - default: - needs_escape = 0; - } - - int necessary_space = needs_escape ? 3 : 1; - - if(output_pos + necessary_space >= output_size) { - output_size += necessary_space; - char *tmp = realloc(output, output_size); - if(tmp == NULL) { - free(output); - return -1; - } else { - output = tmp; - } - } - - if(needs_escape) { - short a = (c & 0xf0) >> 4; - short b = c & 0x0f; - output[output_pos++] = '%'; - output[output_pos++] = nibble_hex(a); - output[output_pos++] = nibble_hex(b); - } else { - output[output_pos++] = c; - } - } - - free(*input); - *input = output; - - return output_size; -} - -char *server_url(bool https) { - char *server_name = getenv("SERVER_NAME"); - char *server_port = getenv("SERVER_PORT"); - - if(server_name == NULL || server_port == NULL) { - return NULL; - } - - char *proto = https ? "https://" : "http://"; - - return catn_alloc(4, proto, server_name, ":", server_port); -} diff --git a/cgiutil.h b/cgiutil.h deleted file mode 100644 index f809b25..0000000 --- a/cgiutil.h +++ /dev/null @@ -1,111 +0,0 @@ -/*! - * @file cgiutil.h - * @brief Simple CGI/HTTP helper functions used by sternenblog. - */ - -#include - -/*! - * @brief Print a HTTP header - * - * Prints a HTTP Header to `stdout` like CGI requires. - * - * @param key Name of the HTTP Header - * @param val Contents of the header to send - */ -void send_header(char key[], char val[]); - -/*! - * @brief Print end of HTTP header section - * - * Terminates the header section of a CGI/HTTP Response by printing `\r\n` to `stdout`. - */ -void terminate_headers(void); - -/*! - * @brief Value of a HTTP status header for a given status code. - * - * Helper function that returns the status code plus its - * accompanying reason phrase as a string. - * - * The value is statically allocated so do not attempt - * to free it. - * - * Example usage: - * - * ``` - * send_header("Status", http_status_line(404); - * // Prints: Status: 404 Not Found - * ``` - * - * @param status HTTP status code - * @return status code and reason phrase as a string. - */ -char *http_status_line(int status); - -/*! - * @brief Return HTTP error code for given errno - * - * Incomplete mapping of `errno`s to HTTP error codes. - * Defaults to 500. - * - * @param err POSIX errno - * @return HTTP error code - */ -int http_errno(int err); - -/*! - * @brief Urlencode a given dynamically allocated string - * - * urlencode_realloc() receives a pointer to a pointer to - * a dynamically allocated string to encode plus its size - * including the null byte at the end. - * - * It then replaces every reserved character in the string - * except `/` with the appropriate percent encoding. If - * the size of the buffer is not enough, it uses `realloc()` - * to increase it. - * - * Note that the implementation of url encoding is not 100% - * correct, but should be good enough in the context of - * sternenblog. `/` is not encoded since on unix - * a slash should always a path delimiter and never part of - * a filename. Another limitation of the url encoding is - * that it only checks for a list of characters to encode - * instead of checking if the characters are unreserved - * and don't need to be encoded which would be more correct. - * The approach taken has the big advantage that we don't - * need to worry about UTF-8, which makes the implementation - * considerably simpler. As a consequence however it will - * be not aggressive enough in terms of encoding in some - * cases. - * - * On error -1 is returned. In such a case the original - * pointer remains intact, so you can either `free()` it - * or continue with the unencoded string. - * - * Otherwise it returns new size of the buffer. - * - * @param **input pointer to input string - * @param size size of input string including null byte - * @return -1 on error, else size of buffer - */ -int urlencode_realloc(char **input, int size); - -/*! - * @brief Returns URL of server addressed by the current CGI request - * - * server_url() uses the CGI 1.1 environment variables `SERVER_NAME` - * and `SERVER_PORT` to construct an URL to the server the current - * request is addressed to. Since CGI only reveals the HTTP version - * used and not wether an encrypted version of HTTP is used, - * server_url() will use the parameter `https` to decide which protocol - * identifier to prefix. - * - * The returned `char *` is dynamically allocated and must be cleaned - * up using `free()` before it goes out of scope. - * - * @param https if true, prefix `https://` else `http://` - * @return Pointer to dynamically allocated char buffer containing the URL. - */ -char *server_url(bool https); diff --git a/core.h b/core.h deleted file mode 100644 index d380d24..0000000 --- a/core.h +++ /dev/null @@ -1,48 +0,0 @@ -/*! - * @file core.h - * @brief Central type definitions of sternenblog - */ -#include - -/*! - * @brief Resolved blog entry - * - * Represents a resolved entry and should only be - * constructed using `make_entry()` and populated using - * `entry_get_text()`. - * - * If constructed correctly, you can expect such an entry to exist. - * - * Use `free_entry()` to free allocated `char *` in an `entry` - * constructed by `make_entry()` (and `entry_get_text()`). - * - * @see make_entry - * @see entry_get_text - * @see free_entry - * @see make_index - */ -struct entry { - // mandatory: part of each well-formed entry - time_t time; //!< last modification time of the entry - char *path; //!< path (on disk) to the entry - char *link; //!< absolute path on the http server to the entry - char *title; //!< title of the post, currently `PATH_INFO` without the initial slash - size_t text_size; //!< size of text, -1 to indicate it's missing - // optional: may be NULL, depending on context - char *text; //!< contents of the entry (mmap-ed file) or `NULL` -}; - -/*! - * @brief Type of a HTML response - * - * This enum describes the three types of HTML responses sternenblog can - * generate: a single entry, an index and an error page. - * - * Used internally for routing and passed to the template for context. - */ -enum page_type { - PAGE_TYPE_ENTRY, - PAGE_TYPE_INDEX, - PAGE_TYPE_ERROR -}; - diff --git a/entry.c b/entry.c deleted file mode 100644 index 8bb6476..0000000 --- a/entry.c +++ /dev/null @@ -1,216 +0,0 @@ -#define _POSIX_C_SOURCE 200809L -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "core.h" -#include "config.h" // TODO: make independent? -#include "cgiutil.h" -#include "entry.h" - -int make_entry(const char *blog_dir, char *script_name, char *path_info, struct entry *entry) { - // TODO: allow subdirectories? - // TODO: no status code return? - - // TODO: url encoding of links - - // intialize pointers - entry->time = 0; - entry->link = NULL; - entry->path = NULL; - entry->title = NULL; - - // won't be handled by make_entry - entry->text = NULL; - entry->text_size = 0; - - // validate path_info - if(path_info == NULL) { - fprintf(stderr, "Missing PATH_INFO\n"); - return 500; - } - - size_t path_info_len = strlen(path_info); - - // if path_info is empty make_entry shouldn't be called - // as per RFC3875 expect it to start with a slash - if(path_info_len == 0 || path_info[0] != '/') { - fprintf(stderr, "Malformed PATH_INFO: \"%s\"\n", path_info); - return 400; - } - - // check if the path_info segments are alright - // should be sane coming from a webserver - int last_was_slash = 0; - for(size_t i = 0; i < path_info_len; i++) { - if(last_was_slash) { - switch(path_info[i]) { - case '/': - // TODO: necessary? - fprintf(stderr, "Double slash in PATH_INFO: \"%s\"\n", path_info); - return 400; - break; - case '.': - fprintf(stderr, "Dot file or dir in PATH_INFO: \"%s\"\n", path_info); - return 403; - break; - default: - last_was_slash = 0; - } - } else if(path_info[i] == '/') { - last_was_slash = 1; - } - } - - // set title (PATH_INFO without the slash) - if(path_info_len < 2) { - // shouldn't be called with just "/" - return 500; - } - - // title length is exactly path_info_len (-1 for slash, +1 for null byte) - entry->title = malloc(sizeof(char) * path_info_len); - memcpy(entry->title, path_info + 1, sizeof(char) * path_info_len); - - // build path to entry's file - size_t blog_dir_len = strlen(blog_dir); - - entry->path = malloc(sizeof(char) * (path_info_len + blog_dir_len + 1)); - - memcpy(entry->path, blog_dir, blog_dir_len * sizeof(char)); - - // prevent double slash - if(entry->path[blog_dir_len - 1] == '/') { - blog_dir_len--; - } - - memcpy(entry->path + blog_dir_len, path_info, path_info_len); - entry->path[path_info_len + blog_dir_len] = '\0'; - - struct stat file_info; - memset(&file_info, 0, sizeof(struct stat)); - - if(stat(entry->path, &file_info) == -1) { - return http_errno(errno); - } - - int regular_file = (file_info.st_mode & S_IFMT) == S_IFREG; - - // strict access check requires files to be owned by the webserver's - // group or user in order to be processed. can be disabled in config.h - bool access = !BLOG_STRICT_ACCESS; - if(BLOG_STRICT_ACCESS) { - gid_t gid = getegid(); - uid_t uid = geteuid(); - access = file_info.st_gid == gid || file_info.st_uid == uid; - } - - if(!access) { - return http_errno(EACCES); - } else if(!regular_file) { - return http_errno(ENOENT); - } - - // use POSIX compatible version, since we don't need nanoseconds - entry->time = file_info.st_mtime; - - // build the link using SCRIPT_NAME - if(script_name == NULL) { - fprintf(stderr, "Missing SCRIPT_NAME\n"); - return 500; - } - - // don't check SCRIPT_NAME validity, since we - // don't depend on it starting with a slash - - size_t script_name_len = strlen(script_name); - size_t link_size = script_name_len + path_info_len + 1; - - entry->link = malloc(sizeof(char) * link_size); - - if(script_name_len != 0) { - memcpy(entry->link, script_name, script_name_len); - } - - memcpy(entry->link + script_name_len, path_info, path_info_len); - - entry->link[link_size - 1] = '\0'; - - if(urlencode_realloc(&entry->link, link_size) <= 0) { - return 500; - } - - return 200; -} - -int entry_get_text(struct entry *entry) { - // TODO set errno correctly in all cases - if(entry->text != NULL) { - // nothing to do - return 0; - } - - int fd = open(entry->path, O_RDONLY); - - if(fd == -1) { - return -1; - } - - struct stat file_info; - - if(fstat(fd, &file_info) == -1) { - return -1; - } - - if(file_info.st_size == 0) { - close(fd); - return 0; - } - - entry->text = mmap(NULL, file_info.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if(entry->text == MAP_FAILED) { - entry->text = NULL; - close(fd); - return -1; - } - - entry->text_size = file_info.st_size; - - if(close(fd) == -1) { - return -1; - } - - return 0; -} - -void entry_unget_text(struct entry *entry) { - if(entry->text_size > 0 && entry->text != NULL && - munmap(entry->text, entry->text_size) != -1) { - entry->text_size = -1; - entry->text = NULL; - } -} - -void free_entry(struct entry *entry) { - if(entry->path != NULL) { - free(entry->path); - } - - if(entry->link != NULL) { - free(entry->link); - } - - if(entry->title != NULL) { - free(entry->title); - } - - entry_unget_text(entry); -} diff --git a/entry.h b/entry.h deleted file mode 100644 index de8d868..0000000 --- a/entry.h +++ /dev/null @@ -1,109 +0,0 @@ -/*! - * @file entry.h - * @brief Construction and destruction of entries - * - * Requires prior inclusion of core.h. - */ - -/*! - * @brief Construct an entry for a given `PATH_INFO` - * - * `make_entry()` first does a sanity check of the provided `path_info`: - * - * * It must start with a slash - * * It may not contain any path segments which start with a dot - * (i. e. no dotfiles and no `.` and `..`) - * * It may not contain any double slashes - * - * `make_entry()` currently has no support for any kind of escaped `PATH_INFO`. - * This might need some work in the future, but seems unnecessary at the moment - * RFC3875 doesn't specify any escaping mechanism for `PATH_INFO` and leaves it - * to the webserver to deal with cases like `foo%2dbar` which is indistinguishable - * from `foo/bar` in its decoded form. - * - * Note that accessing subdirectories is possible and allowed, i. e. a `path_info` of - * `"/foo/bar"` will result in an entry being constructed for `/foo/bar`. - * This behavior is sometimes useful (entries in subdirectories are not included in - * any indices), but may also be confusing. In the future an option to disable this - * may be added. - * - * Before constructing the entry, `make_entry()` calls `stat()` to check if the given - * entry a) exists b) is a regular file and c) is owned by the current processes user - * or group. The last check ensures that the file is not only readable for the webserver, - * but also owned by either its group or its user. This lessens the likelyhood of - * something accidentially being processed by `make_entry()`, since usually a - * `chown http:http` or similar will be necessary to satisfy the check. - * - * After that the `entry` structure is populated: - * - * * `path` is set to the constructed path to the entry file (dynamically allocated) - * * `title` is set to `path_info` with the leading slash removed (dynamically allocated) - * * `time` is set to the file's modification time - * * `link` is set to `script_name` and `path_info` concatenated which is the absolute web - * server path corresponding to the entry - * * `text_size` is set to `-1` - * * `text` is set to `NULL` - * - * `make_entry()` may fail at any point with parts of the struct already containing - * pointers to dynamically allocated memory. It is always safe to call `free_entry()` - * after calling `make_entry()`, so you should make sure to do just that in case of - * both error and success. - * - * @param blog_dir Directory blog entries are stored in, usually `BLOG_DIR` - * @param path_info `PATH_INFO` CGI environment variable - * @param script_name `SCRIPT_NAME` CGI environment variable - * @param entry Uninitialized entry structure to update - * - * @return 200 on success, an appropriate HTTP status code on error - * - * @see struct entry - * @see entry_get_text - * @see free_entry - * @see make_index - */ -int make_entry(const char *blog_dir, char *script_name, char *path_info, struct entry *entry); - -/*! - * @brief Populate an `entry`'s `text` field - * - * Reads the contents of `entry->path` into memory using `mmap()` and sets - * `entry->text` and `entry->text_size` accordingly. - * - * Must be called on an already completely constructed entry. - * - * @return 0 on success, -1 on error, currently errno is not set correctly - * - * @see entry_unget_text - * @see make_entry - * @see free_entry - */ -int entry_get_text(struct entry *entry); - -/*! - * @brief Unmap the file referenced in a `struct entry` - * - * Tries to `munmap()` the file pointed to by `entry->text` - * if present, and updates `entry->text_size` accordingly. - * - * The rest of the struct is left untouched. - * - * @see free_entry - */ -void entry_unget_text(struct entry *entry); - -/*! - * @brief Free dynamically allocated parts on an `entry` - * - * Frees any non `NULL` pointers in the given `entry` structure. - * `make_entry()` initializes all pointers as `NULL` first thing - * after being called, so you can always call `free_entry()` after - * `make_entry()`. - * - * It also unmaps the mapped file in `text` if it is not `NULL` - * using `entry_unget_text()`. - * - * Warning: It won't call `free()` on the entire entry structure. - * - * @see entry_unget_text - */ -void free_entry(struct entry *entry); diff --git a/index.c b/index.c deleted file mode 100644 index bb94bb9..0000000 --- a/index.c +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "core.h" -#include "entry.h" -#include "index.h" - -/*! - * @brief Base size of the allocated index array - * - * Base count of `struct index` to use for allocation - * of the `*entries` array in `make_index()`. - * - * @see make_index - */ -#define BASE_INDEX_SIZE 64 - -int entries_timesort_r(struct entry *a, struct entry *b) { - if(a == NULL && b == NULL) { - return 0; - } else if(a == NULL) { - return 1; - } else if(b == NULL) { - return -1; - } else if(a->time > b->time) { - return -1; - } else if(a->time < b->time) { - return 1; - } else { - return 0; - } -} - -int make_index(const char *blog_dir, char *script_name, bool get_text, struct entry *entries[]) { - if(*entries != NULL) { - return -1; - } - - if(script_name == NULL) { - return -1; - } - - size_t index_count = 0; - DIR *dir; - - dir = opendir(blog_dir); - - if(dir == NULL) { - return -1; - } - - struct dirent *ent; - - size_t size = BASE_INDEX_SIZE; - *entries = malloc(sizeof(struct entry) * size); - - if(*entries == NULL) { - return -1; - } - - // losely based on musl's scandir(3) - // directly use struct entry instead of dirents, - // so we can directly use entries for filtering - // and later sorting - // - // TODO errno handling - while((ent = readdir(dir)) != NULL) { - if(ent->d_name[0] != '.') { - // build PATH_INFO for given entry - size_t d_name_len = strlen(ent->d_name); - char path_info[d_name_len + 2]; - path_info[0] = '/'; - memcpy(path_info + 1, ent->d_name, d_name_len + 1); - - struct entry tmp_entry; - - int result = make_entry(blog_dir, script_name, path_info, &tmp_entry); - - if(result == 200) { - // increase array size if necessary - if(index_count >= size) { - size += BASE_INDEX_SIZE; - - if(size > SIZE_MAX/sizeof(struct entry)) { - break; - } - - struct entry *tmp = realloc(*entries, size * sizeof(struct entry)); - - if(tmp == NULL) { - break; - } - - *entries = tmp; - } - - memcpy(*entries + index_count, &tmp_entry, sizeof(struct entry)); - - if(get_text) { - entry_get_text(*entries + index_count); - } - - index_count++; - } else { - free_entry(&tmp_entry); - } - } - } - - closedir(dir); - - // reverse sort by time (use "incorrect" compar function - // to avoid using glibc specific qsort_r) - qsort(*entries, index_count, sizeof(struct entry), - (int (*)(const void *, const void *)) entries_timesort_r); - - return index_count; -} - -void free_index(struct entry *entries[], int count) { - if(count > 0) { - for(int i = 0; i < count; i++) { - free_entry(*entries + i); - } - } - - free(*entries); -} diff --git a/index.h b/index.h deleted file mode 100644 index 586d203..0000000 --- a/index.h +++ /dev/null @@ -1,36 +0,0 @@ -/*! - * @file index.h - * @brief Construction and destruction of entry indices - * - * Requires prior inclusion of core.h. - */ -#include - -/*! - * @brief Build index of given `blog_dir` - * - * Allocates an array of entries, reads `blog_dir` and adds a `struct entry` to the - * array for every file for which `make_entry()` reports no error. It doesn't enter - * subdirectories. - * - * Note that it's error handling is very simple and it doesn't distinguish between an - * error occuring and the end of the directory. - * - * @param blog_dir path to the directory entries are stored in - * @param script_name the value of the `SCRIPT_NAME` environment variable - * @param get_text whether to call `entry_get_text()` on successfully constructed entries - * @param entries pointer to an array that should be used - * @return size of the dynamically allocated entries array - * @see free_index - */ -int make_index(const char *blog_dir, char *script_name, bool get_text, struct entry *entries[]); - -/*! - * @brief Free dynamically allocated index - * - * Call `free_entry()` for every entry and free entire array afterwards. - * - * @param entries pointer to array of entries - * @param count size of the given array - */ -void free_index(struct entry *entries[], int count); diff --git a/main.c b/main.c index dfbfd5f..e6689d5 100644 --- a/main.c +++ b/main.c @@ -86,15 +86,16 @@ #include #include -#include "core.h" #include "config.h" -#include "cgiutil.h" -#include "entry.h" -#include "index.h" -#include "stringutil.h" -#include "timeutil.h" -#include "template.h" -#include "xml.h" + +#include "sternenblog/core.h" +#include "sternenblog/cgiutil.h" +#include "sternenblog/entry.h" +#include "sternenblog/index.h" +#include "sternenblog/stringutil.h" +#include "sternenblog/timeutil.h" +#include "sternenblog/template.h" +#include "sternenblog/xml.h" /*! * @brief Routing enum to differentiate feeds diff --git a/sternenblog/cgiutil.c b/sternenblog/cgiutil.c new file mode 100644 index 0000000..7dc8114 --- /dev/null +++ b/sternenblog/cgiutil.c @@ -0,0 +1,130 @@ +#include +#include +#include +#include +#include +#include "stringutil.h" + +void send_header(char key[], char val[]) { + fputs(key, stdout); + fputs(": ", stdout); + fputs(val, stdout); + puts("\r"); +} + +void terminate_headers(void) { + puts("\r"); +} + +char *http_status_line(int status) { + switch(status) { + case 200: + return "200 OK"; + case 400: + return "400 Bad Request"; + case 401: + return "401 Unauthorized"; + case 403: + return "403 Forbidden"; + case 404: + return "404 Not Found"; + default: + // default to 500 + return "500 Internal Server Error"; + } +} + +int http_errno(int err) { + switch(err) { + case EACCES: + return 403; + case ENOENT: + return 404; + case ENOTDIR: + return 404; + default: + return 500; + } +} + +int urlencode_realloc(char **input, int size) { + if(*input == NULL || size <= 0) { + return -1; + } + + int output_size = size; + char *output = malloc(output_size); + int output_pos = 0; + + if(output == NULL) { + return -1; + } + + for(int i = 0; i < size; i++) { + char c = *(*input + i); + bool needs_escape; + switch(c) { + // generic delimiters + // we assume we never need to escape '/'. This + // should hold since on unix filenames won't + // contain slashes and the basis for all URLs + // in sternenblog are actual files + case ':': case '?': case '#': case '[': case ']': case '@': + // sub delimiters + case '!': case '$': case '&': case '\'': case '(': case ')': + case '*': case '+': case ',': case ';': case '=': + // other characters to encode + case '%': case ' ': + needs_escape = 1; + break; + // in order to simplify the code we just assume + // everything else doesn't have to be encoded + // + // otherwise we'd need to be UTF-8 aware here + // and consider more than one byte at a time. + default: + needs_escape = 0; + } + + int necessary_space = needs_escape ? 3 : 1; + + if(output_pos + necessary_space >= output_size) { + output_size += necessary_space; + char *tmp = realloc(output, output_size); + if(tmp == NULL) { + free(output); + return -1; + } else { + output = tmp; + } + } + + if(needs_escape) { + short a = (c & 0xf0) >> 4; + short b = c & 0x0f; + output[output_pos++] = '%'; + output[output_pos++] = nibble_hex(a); + output[output_pos++] = nibble_hex(b); + } else { + output[output_pos++] = c; + } + } + + free(*input); + *input = output; + + return output_size; +} + +char *server_url(bool https) { + char *server_name = getenv("SERVER_NAME"); + char *server_port = getenv("SERVER_PORT"); + + if(server_name == NULL || server_port == NULL) { + return NULL; + } + + char *proto = https ? "https://" : "http://"; + + return catn_alloc(4, proto, server_name, ":", server_port); +} diff --git a/sternenblog/cgiutil.h b/sternenblog/cgiutil.h new file mode 100644 index 0000000..f809b25 --- /dev/null +++ b/sternenblog/cgiutil.h @@ -0,0 +1,111 @@ +/*! + * @file cgiutil.h + * @brief Simple CGI/HTTP helper functions used by sternenblog. + */ + +#include + +/*! + * @brief Print a HTTP header + * + * Prints a HTTP Header to `stdout` like CGI requires. + * + * @param key Name of the HTTP Header + * @param val Contents of the header to send + */ +void send_header(char key[], char val[]); + +/*! + * @brief Print end of HTTP header section + * + * Terminates the header section of a CGI/HTTP Response by printing `\r\n` to `stdout`. + */ +void terminate_headers(void); + +/*! + * @brief Value of a HTTP status header for a given status code. + * + * Helper function that returns the status code plus its + * accompanying reason phrase as a string. + * + * The value is statically allocated so do not attempt + * to free it. + * + * Example usage: + * + * ``` + * send_header("Status", http_status_line(404); + * // Prints: Status: 404 Not Found + * ``` + * + * @param status HTTP status code + * @return status code and reason phrase as a string. + */ +char *http_status_line(int status); + +/*! + * @brief Return HTTP error code for given errno + * + * Incomplete mapping of `errno`s to HTTP error codes. + * Defaults to 500. + * + * @param err POSIX errno + * @return HTTP error code + */ +int http_errno(int err); + +/*! + * @brief Urlencode a given dynamically allocated string + * + * urlencode_realloc() receives a pointer to a pointer to + * a dynamically allocated string to encode plus its size + * including the null byte at the end. + * + * It then replaces every reserved character in the string + * except `/` with the appropriate percent encoding. If + * the size of the buffer is not enough, it uses `realloc()` + * to increase it. + * + * Note that the implementation of url encoding is not 100% + * correct, but should be good enough in the context of + * sternenblog. `/` is not encoded since on unix + * a slash should always a path delimiter and never part of + * a filename. Another limitation of the url encoding is + * that it only checks for a list of characters to encode + * instead of checking if the characters are unreserved + * and don't need to be encoded which would be more correct. + * The approach taken has the big advantage that we don't + * need to worry about UTF-8, which makes the implementation + * considerably simpler. As a consequence however it will + * be not aggressive enough in terms of encoding in some + * cases. + * + * On error -1 is returned. In such a case the original + * pointer remains intact, so you can either `free()` it + * or continue with the unencoded string. + * + * Otherwise it returns new size of the buffer. + * + * @param **input pointer to input string + * @param size size of input string including null byte + * @return -1 on error, else size of buffer + */ +int urlencode_realloc(char **input, int size); + +/*! + * @brief Returns URL of server addressed by the current CGI request + * + * server_url() uses the CGI 1.1 environment variables `SERVER_NAME` + * and `SERVER_PORT` to construct an URL to the server the current + * request is addressed to. Since CGI only reveals the HTTP version + * used and not wether an encrypted version of HTTP is used, + * server_url() will use the parameter `https` to decide which protocol + * identifier to prefix. + * + * The returned `char *` is dynamically allocated and must be cleaned + * up using `free()` before it goes out of scope. + * + * @param https if true, prefix `https://` else `http://` + * @return Pointer to dynamically allocated char buffer containing the URL. + */ +char *server_url(bool https); diff --git a/sternenblog/core.h b/sternenblog/core.h new file mode 100644 index 0000000..d380d24 --- /dev/null +++ b/sternenblog/core.h @@ -0,0 +1,48 @@ +/*! + * @file core.h + * @brief Central type definitions of sternenblog + */ +#include + +/*! + * @brief Resolved blog entry + * + * Represents a resolved entry and should only be + * constructed using `make_entry()` and populated using + * `entry_get_text()`. + * + * If constructed correctly, you can expect such an entry to exist. + * + * Use `free_entry()` to free allocated `char *` in an `entry` + * constructed by `make_entry()` (and `entry_get_text()`). + * + * @see make_entry + * @see entry_get_text + * @see free_entry + * @see make_index + */ +struct entry { + // mandatory: part of each well-formed entry + time_t time; //!< last modification time of the entry + char *path; //!< path (on disk) to the entry + char *link; //!< absolute path on the http server to the entry + char *title; //!< title of the post, currently `PATH_INFO` without the initial slash + size_t text_size; //!< size of text, -1 to indicate it's missing + // optional: may be NULL, depending on context + char *text; //!< contents of the entry (mmap-ed file) or `NULL` +}; + +/*! + * @brief Type of a HTML response + * + * This enum describes the three types of HTML responses sternenblog can + * generate: a single entry, an index and an error page. + * + * Used internally for routing and passed to the template for context. + */ +enum page_type { + PAGE_TYPE_ENTRY, + PAGE_TYPE_INDEX, + PAGE_TYPE_ERROR +}; + diff --git a/sternenblog/entry.c b/sternenblog/entry.c new file mode 100644 index 0000000..0318311 --- /dev/null +++ b/sternenblog/entry.c @@ -0,0 +1,216 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core.h" +#include "../config.h" // TODO: make independent? +#include "cgiutil.h" +#include "entry.h" + +int make_entry(const char *blog_dir, char *script_name, char *path_info, struct entry *entry) { + // TODO: allow subdirectories? + // TODO: no status code return? + + // TODO: url encoding of links + + // intialize pointers + entry->time = 0; + entry->link = NULL; + entry->path = NULL; + entry->title = NULL; + + // won't be handled by make_entry + entry->text = NULL; + entry->text_size = 0; + + // validate path_info + if(path_info == NULL) { + fprintf(stderr, "Missing PATH_INFO\n"); + return 500; + } + + size_t path_info_len = strlen(path_info); + + // if path_info is empty make_entry shouldn't be called + // as per RFC3875 expect it to start with a slash + if(path_info_len == 0 || path_info[0] != '/') { + fprintf(stderr, "Malformed PATH_INFO: \"%s\"\n", path_info); + return 400; + } + + // check if the path_info segments are alright + // should be sane coming from a webserver + int last_was_slash = 0; + for(size_t i = 0; i < path_info_len; i++) { + if(last_was_slash) { + switch(path_info[i]) { + case '/': + // TODO: necessary? + fprintf(stderr, "Double slash in PATH_INFO: \"%s\"\n", path_info); + return 400; + break; + case '.': + fprintf(stderr, "Dot file or dir in PATH_INFO: \"%s\"\n", path_info); + return 403; + break; + default: + last_was_slash = 0; + } + } else if(path_info[i] == '/') { + last_was_slash = 1; + } + } + + // set title (PATH_INFO without the slash) + if(path_info_len < 2) { + // shouldn't be called with just "/" + return 500; + } + + // title length is exactly path_info_len (-1 for slash, +1 for null byte) + entry->title = malloc(sizeof(char) * path_info_len); + memcpy(entry->title, path_info + 1, sizeof(char) * path_info_len); + + // build path to entry's file + size_t blog_dir_len = strlen(blog_dir); + + entry->path = malloc(sizeof(char) * (path_info_len + blog_dir_len + 1)); + + memcpy(entry->path, blog_dir, blog_dir_len * sizeof(char)); + + // prevent double slash + if(entry->path[blog_dir_len - 1] == '/') { + blog_dir_len--; + } + + memcpy(entry->path + blog_dir_len, path_info, path_info_len); + entry->path[path_info_len + blog_dir_len] = '\0'; + + struct stat file_info; + memset(&file_info, 0, sizeof(struct stat)); + + if(stat(entry->path, &file_info) == -1) { + return http_errno(errno); + } + + int regular_file = (file_info.st_mode & S_IFMT) == S_IFREG; + + // strict access check requires files to be owned by the webserver's + // group or user in order to be processed. can be disabled in config.h + bool access = !BLOG_STRICT_ACCESS; + if(BLOG_STRICT_ACCESS) { + gid_t gid = getegid(); + uid_t uid = geteuid(); + access = file_info.st_gid == gid || file_info.st_uid == uid; + } + + if(!access) { + return http_errno(EACCES); + } else if(!regular_file) { + return http_errno(ENOENT); + } + + // use POSIX compatible version, since we don't need nanoseconds + entry->time = file_info.st_mtime; + + // build the link using SCRIPT_NAME + if(script_name == NULL) { + fprintf(stderr, "Missing SCRIPT_NAME\n"); + return 500; + } + + // don't check SCRIPT_NAME validity, since we + // don't depend on it starting with a slash + + size_t script_name_len = strlen(script_name); + size_t link_size = script_name_len + path_info_len + 1; + + entry->link = malloc(sizeof(char) * link_size); + + if(script_name_len != 0) { + memcpy(entry->link, script_name, script_name_len); + } + + memcpy(entry->link + script_name_len, path_info, path_info_len); + + entry->link[link_size - 1] = '\0'; + + if(urlencode_realloc(&entry->link, link_size) <= 0) { + return 500; + } + + return 200; +} + +int entry_get_text(struct entry *entry) { + // TODO set errno correctly in all cases + if(entry->text != NULL) { + // nothing to do + return 0; + } + + int fd = open(entry->path, O_RDONLY); + + if(fd == -1) { + return -1; + } + + struct stat file_info; + + if(fstat(fd, &file_info) == -1) { + return -1; + } + + if(file_info.st_size == 0) { + close(fd); + return 0; + } + + entry->text = mmap(NULL, file_info.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if(entry->text == MAP_FAILED) { + entry->text = NULL; + close(fd); + return -1; + } + + entry->text_size = file_info.st_size; + + if(close(fd) == -1) { + return -1; + } + + return 0; +} + +void entry_unget_text(struct entry *entry) { + if(entry->text_size > 0 && entry->text != NULL && + munmap(entry->text, entry->text_size) != -1) { + entry->text_size = -1; + entry->text = NULL; + } +} + +void free_entry(struct entry *entry) { + if(entry->path != NULL) { + free(entry->path); + } + + if(entry->link != NULL) { + free(entry->link); + } + + if(entry->title != NULL) { + free(entry->title); + } + + entry_unget_text(entry); +} diff --git a/sternenblog/entry.h b/sternenblog/entry.h new file mode 100644 index 0000000..de8d868 --- /dev/null +++ b/sternenblog/entry.h @@ -0,0 +1,109 @@ +/*! + * @file entry.h + * @brief Construction and destruction of entries + * + * Requires prior inclusion of core.h. + */ + +/*! + * @brief Construct an entry for a given `PATH_INFO` + * + * `make_entry()` first does a sanity check of the provided `path_info`: + * + * * It must start with a slash + * * It may not contain any path segments which start with a dot + * (i. e. no dotfiles and no `.` and `..`) + * * It may not contain any double slashes + * + * `make_entry()` currently has no support for any kind of escaped `PATH_INFO`. + * This might need some work in the future, but seems unnecessary at the moment + * RFC3875 doesn't specify any escaping mechanism for `PATH_INFO` and leaves it + * to the webserver to deal with cases like `foo%2dbar` which is indistinguishable + * from `foo/bar` in its decoded form. + * + * Note that accessing subdirectories is possible and allowed, i. e. a `path_info` of + * `"/foo/bar"` will result in an entry being constructed for `/foo/bar`. + * This behavior is sometimes useful (entries in subdirectories are not included in + * any indices), but may also be confusing. In the future an option to disable this + * may be added. + * + * Before constructing the entry, `make_entry()` calls `stat()` to check if the given + * entry a) exists b) is a regular file and c) is owned by the current processes user + * or group. The last check ensures that the file is not only readable for the webserver, + * but also owned by either its group or its user. This lessens the likelyhood of + * something accidentially being processed by `make_entry()`, since usually a + * `chown http:http` or similar will be necessary to satisfy the check. + * + * After that the `entry` structure is populated: + * + * * `path` is set to the constructed path to the entry file (dynamically allocated) + * * `title` is set to `path_info` with the leading slash removed (dynamically allocated) + * * `time` is set to the file's modification time + * * `link` is set to `script_name` and `path_info` concatenated which is the absolute web + * server path corresponding to the entry + * * `text_size` is set to `-1` + * * `text` is set to `NULL` + * + * `make_entry()` may fail at any point with parts of the struct already containing + * pointers to dynamically allocated memory. It is always safe to call `free_entry()` + * after calling `make_entry()`, so you should make sure to do just that in case of + * both error and success. + * + * @param blog_dir Directory blog entries are stored in, usually `BLOG_DIR` + * @param path_info `PATH_INFO` CGI environment variable + * @param script_name `SCRIPT_NAME` CGI environment variable + * @param entry Uninitialized entry structure to update + * + * @return 200 on success, an appropriate HTTP status code on error + * + * @see struct entry + * @see entry_get_text + * @see free_entry + * @see make_index + */ +int make_entry(const char *blog_dir, char *script_name, char *path_info, struct entry *entry); + +/*! + * @brief Populate an `entry`'s `text` field + * + * Reads the contents of `entry->path` into memory using `mmap()` and sets + * `entry->text` and `entry->text_size` accordingly. + * + * Must be called on an already completely constructed entry. + * + * @return 0 on success, -1 on error, currently errno is not set correctly + * + * @see entry_unget_text + * @see make_entry + * @see free_entry + */ +int entry_get_text(struct entry *entry); + +/*! + * @brief Unmap the file referenced in a `struct entry` + * + * Tries to `munmap()` the file pointed to by `entry->text` + * if present, and updates `entry->text_size` accordingly. + * + * The rest of the struct is left untouched. + * + * @see free_entry + */ +void entry_unget_text(struct entry *entry); + +/*! + * @brief Free dynamically allocated parts on an `entry` + * + * Frees any non `NULL` pointers in the given `entry` structure. + * `make_entry()` initializes all pointers as `NULL` first thing + * after being called, so you can always call `free_entry()` after + * `make_entry()`. + * + * It also unmaps the mapped file in `text` if it is not `NULL` + * using `entry_unget_text()`. + * + * Warning: It won't call `free()` on the entire entry structure. + * + * @see entry_unget_text + */ +void free_entry(struct entry *entry); diff --git a/sternenblog/index.c b/sternenblog/index.c new file mode 100644 index 0000000..bb94bb9 --- /dev/null +++ b/sternenblog/index.c @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "core.h" +#include "entry.h" +#include "index.h" + +/*! + * @brief Base size of the allocated index array + * + * Base count of `struct index` to use for allocation + * of the `*entries` array in `make_index()`. + * + * @see make_index + */ +#define BASE_INDEX_SIZE 64 + +int entries_timesort_r(struct entry *a, struct entry *b) { + if(a == NULL && b == NULL) { + return 0; + } else if(a == NULL) { + return 1; + } else if(b == NULL) { + return -1; + } else if(a->time > b->time) { + return -1; + } else if(a->time < b->time) { + return 1; + } else { + return 0; + } +} + +int make_index(const char *blog_dir, char *script_name, bool get_text, struct entry *entries[]) { + if(*entries != NULL) { + return -1; + } + + if(script_name == NULL) { + return -1; + } + + size_t index_count = 0; + DIR *dir; + + dir = opendir(blog_dir); + + if(dir == NULL) { + return -1; + } + + struct dirent *ent; + + size_t size = BASE_INDEX_SIZE; + *entries = malloc(sizeof(struct entry) * size); + + if(*entries == NULL) { + return -1; + } + + // losely based on musl's scandir(3) + // directly use struct entry instead of dirents, + // so we can directly use entries for filtering + // and later sorting + // + // TODO errno handling + while((ent = readdir(dir)) != NULL) { + if(ent->d_name[0] != '.') { + // build PATH_INFO for given entry + size_t d_name_len = strlen(ent->d_name); + char path_info[d_name_len + 2]; + path_info[0] = '/'; + memcpy(path_info + 1, ent->d_name, d_name_len + 1); + + struct entry tmp_entry; + + int result = make_entry(blog_dir, script_name, path_info, &tmp_entry); + + if(result == 200) { + // increase array size if necessary + if(index_count >= size) { + size += BASE_INDEX_SIZE; + + if(size > SIZE_MAX/sizeof(struct entry)) { + break; + } + + struct entry *tmp = realloc(*entries, size * sizeof(struct entry)); + + if(tmp == NULL) { + break; + } + + *entries = tmp; + } + + memcpy(*entries + index_count, &tmp_entry, sizeof(struct entry)); + + if(get_text) { + entry_get_text(*entries + index_count); + } + + index_count++; + } else { + free_entry(&tmp_entry); + } + } + } + + closedir(dir); + + // reverse sort by time (use "incorrect" compar function + // to avoid using glibc specific qsort_r) + qsort(*entries, index_count, sizeof(struct entry), + (int (*)(const void *, const void *)) entries_timesort_r); + + return index_count; +} + +void free_index(struct entry *entries[], int count) { + if(count > 0) { + for(int i = 0; i < count; i++) { + free_entry(*entries + i); + } + } + + free(*entries); +} diff --git a/sternenblog/index.h b/sternenblog/index.h new file mode 100644 index 0000000..586d203 --- /dev/null +++ b/sternenblog/index.h @@ -0,0 +1,36 @@ +/*! + * @file index.h + * @brief Construction and destruction of entry indices + * + * Requires prior inclusion of core.h. + */ +#include + +/*! + * @brief Build index of given `blog_dir` + * + * Allocates an array of entries, reads `blog_dir` and adds a `struct entry` to the + * array for every file for which `make_entry()` reports no error. It doesn't enter + * subdirectories. + * + * Note that it's error handling is very simple and it doesn't distinguish between an + * error occuring and the end of the directory. + * + * @param blog_dir path to the directory entries are stored in + * @param script_name the value of the `SCRIPT_NAME` environment variable + * @param get_text whether to call `entry_get_text()` on successfully constructed entries + * @param entries pointer to an array that should be used + * @return size of the dynamically allocated entries array + * @see free_index + */ +int make_index(const char *blog_dir, char *script_name, bool get_text, struct entry *entries[]); + +/*! + * @brief Free dynamically allocated index + * + * Call `free_entry()` for every entry and free entire array afterwards. + * + * @param entries pointer to array of entries + * @param count size of the given array + */ +void free_index(struct entry *entries[], int count); diff --git a/sternenblog/stringutil.c b/sternenblog/stringutil.c new file mode 100644 index 0000000..44cec9e --- /dev/null +++ b/sternenblog/stringutil.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include + +char nibble_hex(short h) { + switch(h) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + return (h + 48); + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + return (h + 55); + default: + return 0; + } +} + +char *catn_alloc(size_t n, ...) { + va_list args; + size_t pos = 0; + char *buffer = NULL; + size_t buffer_size = 0; + va_start(args, n); + + for(size_t i = 0; i < n; i++) { + char *str = va_arg(args, char *); + if(str != NULL) { + size_t copy_len = strlen(str) + (i + 1 == n ? 1 : 0); + + char *tmp = realloc(buffer, buffer_size + copy_len); + + if(tmp == NULL) { + break; + } else { + buffer = tmp; + } + + buffer_size += copy_len; + memcpy(buffer + pos, str, copy_len); + pos += copy_len; + } + } + + if(buffer != NULL) { + // ensure it's NUL terminated + buffer[buffer_size - 1] = '\0'; + } + + va_end(args); + + return buffer; +} + diff --git a/sternenblog/stringutil.h b/sternenblog/stringutil.h new file mode 100644 index 0000000..c7cb40d --- /dev/null +++ b/sternenblog/stringutil.h @@ -0,0 +1,27 @@ +/*! + * @file + * @brief Utilities for string construction + */ + +/*! + * @brief Returns hex digit for given integer + * + * Will return appropriate `char` in range 0-F + * for input in range 0-15. Can be abused to + * return decimal digits for range 0-9. + */ +char nibble_hex(short h); + +/*! + * @brief Concatenate arbitrary number of strings into + * dynamically allocated buffer + * + * catn_alloc() concats the `n` given strings into a + * dynamically allocated and resized buffer and returns + * it. This buffer must be cleaned up by `free()` before + * it goes out of scope. + * + * @param n number of strings given as `va_args` + * @return pointer to concatenated strings or `NULL` on error. + */ +char *catn_alloc(size_t n, ...); diff --git a/sternenblog/template.h b/sternenblog/template.h new file mode 100644 index 0000000..24b9504 --- /dev/null +++ b/sternenblog/template.h @@ -0,0 +1,101 @@ +/*! + * @file template.h + * @brief Declarations of functions to be implemented by a sternenblog template + * + * Requires prior inclusion of core.h. + * + * The functions declared in template.h are called by `blog_index()` + * and `blog_entry()` to generate the HTML document CGI responses + * involving HTML (contrary to the RSS feed which is independent + * from templates). + * + * These functions can be implemented by a custom C source file + * in order to customize the HTML output of sternenblog. Every + * function is expected to output HTML to `stdout`. They themselves + * can expect to be called in the following order: + * + * * template_header() + * * One of template_single_entry(), template_index_entry (any number + * of times) or template_error() + * * template_footer() + */ + +/*! + * @brief (Meta) data about the page being served + * + * `struct template_data` is used to pass information about + * the current page to the template. It is received as the + * single argument by all template functions. + * + * The following assumptions about its contents can be made: + * + * * `page_type == PAGE_TYPE_ENTRY` → `entry != NULL` + * * `page_type == PAGE_TYPE_ERROR` ⟷ `status != 200` + * * `page_type != PAGE_TYPE_ERROR` → `script_name != NULL && path_info != NULL` + * * `page_type == PAGE_TYPE_ERROR` → `entry == NULL` + */ +struct template_data { + enum page_type page_type; //!< type of page to render + int status; //!< HTTP status of the response + struct entry *entry; //!< Pointer to entry if applicable, else `NULL` + char *script_name; //!< value of `SCRIPT_NAME` environment variable + char *path_info; //!< value of `PATH_INFO` environment variable +}; + +/*! + * @brief Prints beginning of HTML source + * + * template_header() is expected to print out the common beginning of + * any response and allocate any resources the template uses (it's + * the best place for such things since it is always called as the + * first template function). + * + * Typically it will print the HTML `` and the header part + * of the `` element which is common for all pages. It may + * adjust some parts of it (like headings, title, navigations, …) + * depending on the `data` that is passed. + * + * If `data.page_type == PAGE_TYPE_INDEX`, `data.entry` will point + * to the first entry or be `NULL` if there are no entries. + * + * @see struct template_data + */ +void template_header(struct template_data data); + +/*! + * @brief Prints end of HTML source + * + * template_footer() should print the common bottom part of any HTML + * response and free all allocated resources (as it's called last). + * + * Usually this involves printing a footer part of the web page and + * closing the `` and `` elements. + * + * If `data.page_type == PAGE_TYPE_INDEX`, `data.entry` will point + * to the last entry or be `NULL` if there are no entries. + */ +void template_footer(struct template_data data); + +/*! + * @brief Prints HTML snippet for the main part of the page + * + * template_main() should print the main part of the HTML source + * which is located between template_header() and template_footer(). + * + * Depending on `data.page_type` the following applies: + * + * * For `PAGE_TYPE_ENTRY` template_main() is called once and + * should print the main part of a single entry page. + * * For `PAGE_TYPE_ERROR` template_main() is called once and + * should print the main part of a page informing the user + * about an occurred HTTP error (reflecting `data.status`). + * * For `PAGE_TYPE_INDEX` template_main() is called 0 to n + * times where n is the number of total entries. Each time + * it's called it should print a HTML snippet which is + * suitable as an index entry. Furthermore it should be + * valid HTML regardless how many times it has been called + * before and will be called afterwards. + * + * @see struct template_data + */ +void template_main(struct template_data data); diff --git a/sternenblog/timeutil.c b/sternenblog/timeutil.c new file mode 100644 index 0000000..187ace0 --- /dev/null +++ b/sternenblog/timeutil.c @@ -0,0 +1,97 @@ +#define _POSIX_C_SOURCE 1 +#define _XOPEN_SOURCE 1 // for timezone +#include +#include +#include +#include "timeutil.h" +#include "stringutil.h" + +#include + +char *format_string(enum time_format t) { + switch(t) { + case RSS_TIME_FORMAT: + return "%a, %d %b %Y %T %z"; + // both remaining cases still need a UTC offset + // part at the end which is not supported by + // strftime(3), so we do this ourselves in + // flocaltime + case HTML_TIME_FORMAT_READABLE: + return "%Y-%m-%d %T"; + case ATOM_TIME_FORMAT: + default: + return "%Y-%m-%dT%T"; + } +} + +size_t flocaltime(char *b, enum time_format type, size_t size, const time_t *time) { + tzset(); + struct tm *local = localtime(time); + char *format = format_string(type); + + size_t res = strftime(b, size, format, local); + + if(res == 0) { + return 0; + } + + size_t offset_len = 0; + + if(type == ATOM_TIME_FORMAT || type == HTML_TIME_FORMAT_READABLE) { + // for these formats we need to append a RFC3339 UTC offset + // unfortunately it is *not* exactly provided by strftime, + // but in hindsight it might be better to do a little string + // manipulation than this madness, since the libc timezone + // API is horrible (at least POSIX / glibc) + size_t offset_size = 7; + char offset[offset_size]; + + if(timezone == 0 && !local->tm_isdst) { + offset[0] = 'Z'; + offset[1] = '\0'; + + offset_len = 1; + } else { + // for some reason timezone is seconds *west* of UTC which + // is inverse to how UTC offsets are denoted + long real_offset = (-1) * timezone; + + if(daylight) { + // TODO is this correct in all cases? + if(local->tm_isdst == 1) { + real_offset += 3600; + } + } + + char sign; + if(real_offset > 0) { + sign = '+'; + } else { + sign = '-'; + } + + long abso = labs(real_offset); + long hour = abso / 3600; + long minute = (abso % 3600) / 60; + + offset[0] = sign; + offset[1] = nibble_hex((short) hour / 10); + offset[2] = nibble_hex((short) hour % 10); + offset[3] = ':'; + offset[4] = nibble_hex((short) minute / 10); + offset[5] = nibble_hex((short) minute % 10); + offset[6] = '\0'; + + offset_len = 6; + } + + if(res > 0 && res + offset_size <= size) { + memcpy(b + res, offset, offset_size); + } + } + + // prevent any buffer overflows + b[size - 1] = '\0'; + + return res + offset_len; +} diff --git a/sternenblog/timeutil.h b/sternenblog/timeutil.h new file mode 100644 index 0000000..7d0270e --- /dev/null +++ b/sternenblog/timeutil.h @@ -0,0 +1,43 @@ +/*! + * @file + * @brief Utilities for rendering timestamps as strings + */ +enum time_format { + RSS_TIME_FORMAT, //!< RFC822 formatted time with 4 instead of 2 year digits + ATOM_TIME_FORMAT, //!< RFC3339 formatted time + HTML_TIME_FORMAT_READABLE //!< like `ATOM_TIME_FORMAT`, but with space between date and time +}; + +/*! + * @brief Maximum size necessary to contain the output of flocaltime() + */ +#define MAX_TIMESTR_SIZE 32 +// max HTML/Atom: 24 + NUL byte +// max RSS: 31 + NUL byte + +/*! + * @brief Format given timestamp as a string in the local timezone + * + * flocaltime() is a wrapper around `strftime()` which supports + * a specific set of output formats. In contrast to `strftime()` + * it can output correct RFC3339 time strings and does localtime + * resolution for you. + * + * Example usage to print a RFC3339 formatted timestamp: + * + * ``` + * time_t some_time; + * char strtime[MAX_TIMESTR_SIZE]; + * + * if(flocaltime(strtime, ATOM_TIME_FORMAT, MAX_TIMESTR_SIZE, &some_time) > 0) { + * puts(strtime); + * } + * ``` + * + * @param b output buffer + * @param type time format to use for output + * @param size number of `char`s the buffer can hold + * @param time pointer to timestamp + * @return `0` on error, otherwise length of the string placed in `b` excluding terminating `NUL` byte + */ +size_t flocaltime(char *b, enum time_format type, size_t size, const time_t *time); diff --git a/sternenblog/xml.c b/sternenblog/xml.c new file mode 100644 index 0000000..5965a09 --- /dev/null +++ b/sternenblog/xml.c @@ -0,0 +1,309 @@ +// TODO indent, html escaping +#include +#include +#include +#include +#include + +#include "xml.h" + +#define DEBUG_WARN(ctx, ...) \ + if(ctx->warn != NULL) { \ + fprintf(ctx->warn, __VA_ARGS__); \ + } + +void debug_xml_stack(FILE *out, struct xml_stack *stack) { + if(stack != NULL) { + fprintf(out, "%s ", stack->tag); + debug_xml_stack(out, stack->next); + } else { + fputc('\n', out); + } +} + +void free_xml_stack(struct xml_stack *stack) { + if(stack == NULL) { + return; + } + + if(stack->tag != NULL) { + free(stack->tag); + } + + if(stack->next != NULL) { + free_xml_stack(stack->next); + } + + free(stack); +} + +void new_xml_context(struct xml_context *ctx) { + ctx->stack = NULL; + ctx->warn = NULL; + ctx->out = stdout; + ctx->closing_slash = 1; +} + +void del_xml_context(struct xml_context *ctx) { + if(ctx->stack != NULL) { + if(ctx->warn != NULL) { + fputs("Unclosed tags remaining: ", ctx->warn); + debug_xml_stack(ctx->warn, ctx->stack); + } + + free_xml_stack(ctx->stack); + } +} + +void output_xml_escaped_char(FILE *out, char c) { + switch(c) { + case '&': + fputs("&", out); + break; + case '<': + fputs("<", out); + break; + case '>': + fputs(">", out); + break; + case '\'': + fputs("'", out); + break; + case '\"': + fputs(""", out); + break; + default: + fputc(c, out); + break; + } +} + +void xml_escaped(struct xml_context *ctx, const char *str) { + for(size_t i = 0; str[i] != '\0'; i++) { + output_xml_escaped_char(ctx->out, str[i]); + } +} + +void xml_raw(struct xml_context *ctx, const char *str) { + fputs(str, ctx->out); +} + +void output_attrs(FILE *out, va_list attrs, size_t arg_count) { + if(arg_count > 0) { + for(size_t i = 1; i<=arg_count; i++) { + if(i % 2) { + char *name = va_arg(attrs, char *); + if(name == NULL) { + break; + } + + fputc(' ', out); + fputs(name, out); + } else { + char *maybe_val = va_arg(attrs, char *); + if(maybe_val != NULL) { + fputs("=\"", out); + for(size_t i = 0; maybe_val[i] != '\0'; i++) { + output_xml_escaped_char(out, maybe_val[i]); + } + fputc('\"', out); + } + } + } + } +} + +void xml_empty_tag(struct xml_context *ctx, const char *tag, size_t attr_count, ...) { + if(tag == NULL || ctx == NULL) { + DEBUG_WARN(ctx, "Got no tag or ctx\n"); + return; + } + + fputc('<', ctx->out); + fputs(tag, ctx->out); + + if(attr_count > 0) { + size_t arg_count = attr_count * 2; + + va_list attrs; + va_start(attrs, attr_count); + + output_attrs(ctx->out, attrs, arg_count); + + va_end(attrs); + } + + if(ctx->closing_slash) { + fputc('/', ctx->out); + } + + fputc('>', ctx->out); +} + +void xml_open_tag_attrs(struct xml_context *ctx, const char *tag, size_t attr_count, ...) { + if(tag == NULL || ctx == NULL) { + DEBUG_WARN(ctx, "Got no tag or ctx\n"); + return; + } + + struct xml_stack *old_stack = ctx->stack; + + fputc('<', ctx->out); + fputs(tag, ctx->out); + + + if(attr_count > 0) { + size_t arg_count = attr_count * 2; + + va_list attrs; + va_start(attrs, attr_count); + + output_attrs(ctx->out, attrs, arg_count); + + va_end(attrs); + } + + fputc('>', ctx->out); + + ctx->stack = malloc(sizeof(struct xml_context)); + + if(ctx->stack == NULL) { + ctx->stack = old_stack; + DEBUG_WARN(ctx, "Could not allocate memory for tag stack, now everything will break.\n") + return; + } + + ctx->stack->next = old_stack; + + size_t tag_size = strlen(tag) + 1; + ctx->stack->type = XML_NORMAL_TAG; + ctx->stack->tag = malloc(sizeof(char) * tag_size); + memcpy(ctx->stack->tag, tag, tag_size); +} + +void xml_open_tag(struct xml_context *ctx, const char *tag) { + xml_open_tag_attrs(ctx, tag, 0); +} + +void xml_close_tag(struct xml_context *ctx, const char *tag) { + if(tag == NULL || ctx == NULL) { + DEBUG_WARN(ctx, "Got no tag or ctx\n"); + return; + } + + if(ctx->stack == NULL) { + DEBUG_WARN(ctx, "Refusing to close tag %s, no tags left to be closed\n", tag); + return; + } + + if(ctx->stack->type != XML_NORMAL_TAG) { + DEBUG_WARN(ctx, "Refusing to close tag %s, wrong tag type\n", tag); + return; + } + + if(strcmp(tag, ctx->stack->tag) != 0) { + DEBUG_WARN(ctx, "Refusing to close tag %s, unclosed tags remaining\n", tag); + return; + } + + fputs("out); + fputs(tag, ctx->out); + fputc('>', ctx->out); + + struct xml_stack *old_head = ctx->stack; + + ctx->stack = old_head->next; + + free(old_head->tag); + free(old_head); +} + +void xml_close_all(struct xml_context *ctx) { + xml_close_including(ctx, NULL); +} + +void xml_close_including(struct xml_context *ctx, const char *tag) { + if(ctx == NULL) { + DEBUG_WARN(ctx, "Got no ctx\n"); + return; + } + + if(ctx->stack == NULL) { + if(tag != NULL) { + DEBUG_WARN(ctx, "Hit end of tag stack while searching for tag %s to close\n", tag); + } + return; + } else { + int last_tag = tag != NULL && strcmp(tag, ctx->stack->tag) == 0; + + switch(ctx->stack->type) { + case XML_NORMAL_TAG: + xml_close_tag(ctx, ctx->stack->tag); + break; + case XML_CDATA: + xml_close_cdata(ctx); + break; + default: + DEBUG_WARN(ctx, "Unexpected tag type on stack, aborting\n"); + return; + } + + if(!last_tag) { + xml_close_including(ctx, tag); + } + } +} + +void xml_open_cdata(struct xml_context *ctx) { + if(ctx == NULL) { + DEBUG_WARN(ctx, "Got no ctx\n"); + return; + } + + struct xml_stack *old_stack = ctx->stack; + + ctx->stack = malloc(sizeof(struct xml_stack)); + + if(ctx->stack == NULL) { + ctx->stack = old_stack; + + DEBUG_WARN(ctx, "Could not allocate memory for tag stack, now everything will break.\n"); + return; + } + + ctx->stack->next = old_stack; + ctx->stack->tag = NULL; + ctx->stack->type = XML_CDATA; + + fputs("out); +} + +void xml_close_cdata(struct xml_context *ctx) { + if(ctx == NULL) { + DEBUG_WARN(ctx, "Got no ctx\n"); + return; + } + + if(ctx->stack == NULL) { + DEBUG_WARN(ctx, "No CDATA to close\n"); + return; + } + + if(ctx->stack->type != XML_CDATA) { + DEBUG_WARN(ctx, "No CDATA on top of stack, refusing to close\n"); + return; + } + + struct xml_stack *old_head = ctx->stack; + + ctx->stack = old_head->next; + + if(old_head->tag != NULL) { + // shouldn't happen though + free(old_head->tag); + } + + free(old_head); + + fputs("]]>", ctx->out); +} diff --git a/sternenblog/xml.h b/sternenblog/xml.h new file mode 100644 index 0000000..869a579 --- /dev/null +++ b/sternenblog/xml.h @@ -0,0 +1,340 @@ +/*! + * @file xml.h + * @brief Simple library for constructing XML documents + * + * This library provides a C interface for opening and closing + * XML tags as well as filling them with content. It is mainly + * intended for constructing XML/HTML documents by directly + * writing them to `stdout`. + * + * It's main advantage over plain `printf()` is that it keeps + * track of open tags, enabling it to automatically close + * open tags (saving a few lines of code) using `xml_close_all()` + * and `xml_close_including()` and/or to detect errors in the + * programmer's XML nesting. For information on its sanity + * checking abilities see the documentation of `xml_close_tag()`. + * + * Currently it has some limitations (possibly incomplete list): + * + * * It does not give the calling code feedback if errors occurred + * * It doesn't do validity checking of tags and attributes + * (legal characters etc.) + * * It can't generate pretty output (i. e. properly indented), + * its output is currently always "minified". + * + * For handling arbitrary data this library is probably not a good + * fit, it is mainly intended and tested for generating HTML and + * RSS documents in a CGI-like environment from trusted data. + * + * An example application generating a HTML5 page looks like this: + * + * @include xml_example.c + */ +#include +#include +#include + +/*! + * @brief Type of an XML "tag" + * + * This is mostly internally used to be able + * to keep track of CDATA using `xml_stack`. + */ +enum xml_tag_type { + XML_NORMAL_TAG, + XML_CDATA +}; + +/*! + * @brief Internal linked list type + * + * Linked list used internally to keep track of tags to close. + * + * @see struct xml_context + */ +struct xml_stack { + enum xml_tag_type type; //! type of the tag + char *tag; //!< tag name if `XML_NORMAL_TAG`, otherwise `NULL` + struct xml_stack *next; //!< tag to be closed after the current one +}; + +/*! + * @brief State and configuration of xml generation. + * + * Struct containing both state and configuration of this module. + * See `new_xml_context()` for usage instructions. + * + * @see new_xml_context + * @see del_xml_context + * @see struct xml_stack + */ +struct xml_context { + struct xml_stack *stack; //!< linked list used internally to keep track of open tags + FILE *out; //!< Where to write output, defaults to stdout + FILE *warn; //!< if not `NULL`, print warnings to handle warn, defaults to `NULL` + bool closing_slash; //!< whether to output a closing slash at the end of an empty tag +}; + +/*! + * @brief Initialize the `xml_context` structure. + * + * Initialize a `struct xml_context` with default values: + * + * * empty stack + * * output to `stdout` + * * no warnings + * * closing slashes enabled + * + * This function should always be called before any other functions of xml. + * If you want to use different settings than the default ones, update the + * struct after calling this function. This way your application won't break + * if it gets extended. + * + * @see struct xml_context + * @see del_xml_context + */ +void new_xml_context(struct xml_context *ctx); + +/*! + * @brief Clean up the `xml_context` structure. + * + * Frees any dynamically allocated data in a `struct xml_context`. + * Should always be called before a `struct xml_context` goes out + * of scope or the program terminates. + * + * If `ctx->warn` is not `NULL`, `del_xml_context()` will additionally + * output a message about any remaining unclosed tags to `ctx->warn`. + * For example: + * + * ``` + * Unclosed tags remaining: article main body html + * ``` + * + * @see struct xml_context + * @see new_xml_context + */ +void del_xml_context(struct xml_context *ctx); + +/*! + * @brief Output a xml escaped string + * + * Outputs the given string escaped for use with XML. It only + * does minimal-ish escaping, i. e. it escapes all characters + * that have some syntactical meaning in XML. That includes: + * Angled brackets (lower than and greater than), ampersand, + * and single as well as double quotes. All other characters + * are passed through as is and the caller is expected to + * make sure they are correctly encoded, i. e. valid UTF-8 + * characters. + * + * The escaping is not as minimal as possible. In some cases + * you can omit escaping all characters except for `<` and `&`, + * but this would be context-sensitive and therefore + * unnecessarily tedious to implement. Escaping all + * syntactically significant characters has no real downsides + * except maybe using a tiny bit more storage than absolutely + * necessary. + * + * @see xml_raw + */ +void xml_escaped(struct xml_context *ctx, const char *str); + +/*! + * @brief Output a raw string. + * + * Output string to `ctx->out`, equivalent to `fputs(str, ctx.out)`. + * If your string is not already XML, use xml_escaped() to output it + * correcty escaped. + * + * @see struct xml_context + * @see xml_escaped + */ +void xml_raw(struct xml_context *ctx, const char *str); + +/*! + * @brief Output an empty xml tag. + * + * Output an empty xml tag (i. e. a single tag that doesn't need to be closed). + * This call does not change the provided context `ctx`. + * + * The call also outputs given attributes: For `attr_count` n, `xml_empty_tag()` expects + * 2n additional arguments — for each attribute a name and a value. If value is `NULL`, + * an attribute without a value will be output, i. e. just the name without the `="…"`. + * + * For example, `xml_empty_tag(&ctx, "my-tag", 2, "foo", "bar", "baz", NULL);` gives + * `` with default settings. + * + * The attributes' values are XML-escaped automatically. For details on how escaping + * works in xml.h, see xml_escaped(). + * + * If `closing_slash` is 0 in `ctx`, the slash before the closing ">" will be omitted. + * This is useful for HTML5 where it is optional. + * + * @see struct xml_context + */ +void xml_empty_tag(struct xml_context *ctx, const char *tag, size_t attr_count, ...); + +/*! + * @brief Output an opening tag with attributes. + * + * Output an opening tag with attributes and add it to `ctx->stack` for future reference. + * + * Attributes work exactly like in `xml_empty_tag()`. + * @see xml_empty_tag + */ +void xml_open_tag_attrs(struct xml_context *ctx, const char *tag, size_t attr_count, ...); + +/*! + * @brief Output an opening tag without any attributes. + * + * Shorthand for `xml_open_tag_attrs(ctx, tag, 0)`. + * @see xml_open_tag_attrs + */ +void xml_open_tag(struct xml_context *ctx, const char *tag); + +/*! + * @brief Close a previously opened tag. + * + * `xml_close_tag()` first checks the head of the current `xml_stack` + * if the provided `tag` is in fact the current innermost opened tag. + * + * If this is true, it outputs the closing tag, removes the reference + * to the tag on top of the `xml_stack` and frees this part of the + * structure. + * + * If it isn't true, it does nothing and outputs an appropriate warning + * to `ctx->warn` if it is not `NULL`: + * + * * `Refusing to close tag xyz, unclosed tags remaining` + * * `Refusing to close tag zyx, no tags left to be closed` + * + * This sanity checking of tag closing ensures that a xml document + * constructed by this module has no nesting errors, i. e. every tag + * is closed at the proper nesting level. Because it is only simple + * runtime checking in specific calls it can't prevent / detect the + * following errors: + * + * * It can't prevent unclosed tags remaining at the end. You can + * however prevent this by calling `xml_close_all()` at the end of + * your XML outputting code. + * * It can worsen a situation with remaining unclosed tags: If + * an inner tag is left unclosed, it will refuse to close all + * outer tags, leaving a trail of unclosed tags behind. + * * It will always attribute the error to closing: Some errors + * will be caused by missing an `xml_open_tag()` somewhere, but + * `xml_close_tag()` will think the closing is erroneous. Of course + * it's also unable to resolve the error. + * * It can't compare against the intended XML structure: Sometimes + * a programming error will result in an "wrong" XML structure + * which is still completely valid to `xml_close_tag()`, i. e. + * correctly nested. + * + * Overall the sanity checking is limited, as you can see, but + * it should generate *some* warning if a detectable issue is + * present (invalid XML nesting) and never actively output + * a tag that makes the XML nesting invalid. + * + * These properties should however be enough to detect issues + * quickly in development. Additionally the sanity checking is + * cheap enough to be feasible in production. `xml_close_tag()` + * only needs to call `strcmp` once per invocation. + * + * @see xml_open_tag_attrs + * @see xml_open_tag + * @see struct xml_stack + * @see xml_close_all + * @see xml_close_including + */ +void xml_close_tag(struct xml_context *ctx, const char *tag); + +/*! + * @brief Close all remaining unclosed tags + * + * `xml_close_all()` iterates through the `xml_stack` and calls + * `xml_close_tag()` or `xml_close_cdata()` respectively for every + * entry in it. A call to it will thus result in an empty `xml_stack` + * and all previously opened tags being closed correctly. + * + * Internally it's an alias for `xml_close_all(ctx, NULL)` + * + * Note that `xml_close_all()` will limit error checking, since it + * (by nature) always succeeds and has no insight into what the + * programmer thinks needs to be closed. + * + * @see xml_close_tag + * @see xml_close_including + * @see struct xml_stack + */ +void xml_close_all(struct xml_context *ctx); + +/*! + * @brief Close all unclosed tags until a given one. + * + * `xml_close_including()` works like `xml_close_all()`, but + * will stop after it hits a tag of the given name. + * If the given tag is not present in the stack, it behaves + * like `xml_close_all()`. It is not possible to match + * a `CDATA` section using `xml_close_including()`. + * + * Be aware that it might lead to unexpected results if + * multiple tags of the same are nested. Consider the + * following snippet. + * + * ```c + * xml_open_tag(&ctx, "a"); + * xml_open_tag(&ctx, "b"); + * xml_close_tag(&ctx, "a"); + * xml_open_tag(&ctx, "c"); + * xml_raw(&ctx, "value"); + * xml_close_including(&ctx, "a"); + * ``` + * + * `xml_close_including()` will stop as soon as it hits the first + * tag "a", although it might be intended to keep going until the + * outermost one. The result would be: + * + * ```xml + * value + * ``` + * + * This is the behavior of `xml_close_including()`: It closes all + * unclosed tags until it hits the first instance of the specified + * tag which is also closed. + * + * This function will also limit error detection like `xml_close_all()`. + * For an explanation of this, see its documentation. + * + * @see xml_close_all + * @see xml_close_tag + * @see struct xml_stack + */ +void xml_close_including(struct xml_context *ctx, const char *tag); + +/*! + * @brief Start CDATA section + * + * Behaves like xml_open_tag(), but for opening `CDATA` sections. + * Internally the `XML_CDATA` type of `struct xml_stack` is used. + * + * Note that this function won't prevent `CDATA` sections or XML + * elements inside a `CDATA` section, since this is sometimes + * useful. + * + * @see xml_close_cdata + * @see enum xml_tag_type + * @see struct xml_stack + */ +void xml_open_cdata(struct xml_context *ctx); + +/*! + * @brief Close CDATA section + * + * Behaves like xml_close_tag(), but for `CDATA` sections. + * + * Checks the top of the stack if it is a `CDATA` section. + * In that case closes it and updates the stack, otherwise + * does nothing and if applicable outputs a warning. + * + * @see xml_open_cdata + */ +void xml_close_cdata(struct xml_context *ctx); diff --git a/stringutil.c b/stringutil.c deleted file mode 100644 index 44cec9e..0000000 --- a/stringutil.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include -#include -#include - -char nibble_hex(short h) { - switch(h) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 9: - return (h + 48); - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - return (h + 55); - default: - return 0; - } -} - -char *catn_alloc(size_t n, ...) { - va_list args; - size_t pos = 0; - char *buffer = NULL; - size_t buffer_size = 0; - va_start(args, n); - - for(size_t i = 0; i < n; i++) { - char *str = va_arg(args, char *); - if(str != NULL) { - size_t copy_len = strlen(str) + (i + 1 == n ? 1 : 0); - - char *tmp = realloc(buffer, buffer_size + copy_len); - - if(tmp == NULL) { - break; - } else { - buffer = tmp; - } - - buffer_size += copy_len; - memcpy(buffer + pos, str, copy_len); - pos += copy_len; - } - } - - if(buffer != NULL) { - // ensure it's NUL terminated - buffer[buffer_size - 1] = '\0'; - } - - va_end(args); - - return buffer; -} - diff --git a/stringutil.h b/stringutil.h deleted file mode 100644 index c7cb40d..0000000 --- a/stringutil.h +++ /dev/null @@ -1,27 +0,0 @@ -/*! - * @file - * @brief Utilities for string construction - */ - -/*! - * @brief Returns hex digit for given integer - * - * Will return appropriate `char` in range 0-F - * for input in range 0-15. Can be abused to - * return decimal digits for range 0-9. - */ -char nibble_hex(short h); - -/*! - * @brief Concatenate arbitrary number of strings into - * dynamically allocated buffer - * - * catn_alloc() concats the `n` given strings into a - * dynamically allocated and resized buffer and returns - * it. This buffer must be cleaned up by `free()` before - * it goes out of scope. - * - * @param n number of strings given as `va_args` - * @return pointer to concatenated strings or `NULL` on error. - */ -char *catn_alloc(size_t n, ...); diff --git a/template.h b/template.h deleted file mode 100644 index 24b9504..0000000 --- a/template.h +++ /dev/null @@ -1,101 +0,0 @@ -/*! - * @file template.h - * @brief Declarations of functions to be implemented by a sternenblog template - * - * Requires prior inclusion of core.h. - * - * The functions declared in template.h are called by `blog_index()` - * and `blog_entry()` to generate the HTML document CGI responses - * involving HTML (contrary to the RSS feed which is independent - * from templates). - * - * These functions can be implemented by a custom C source file - * in order to customize the HTML output of sternenblog. Every - * function is expected to output HTML to `stdout`. They themselves - * can expect to be called in the following order: - * - * * template_header() - * * One of template_single_entry(), template_index_entry (any number - * of times) or template_error() - * * template_footer() - */ - -/*! - * @brief (Meta) data about the page being served - * - * `struct template_data` is used to pass information about - * the current page to the template. It is received as the - * single argument by all template functions. - * - * The following assumptions about its contents can be made: - * - * * `page_type == PAGE_TYPE_ENTRY` → `entry != NULL` - * * `page_type == PAGE_TYPE_ERROR` ⟷ `status != 200` - * * `page_type != PAGE_TYPE_ERROR` → `script_name != NULL && path_info != NULL` - * * `page_type == PAGE_TYPE_ERROR` → `entry == NULL` - */ -struct template_data { - enum page_type page_type; //!< type of page to render - int status; //!< HTTP status of the response - struct entry *entry; //!< Pointer to entry if applicable, else `NULL` - char *script_name; //!< value of `SCRIPT_NAME` environment variable - char *path_info; //!< value of `PATH_INFO` environment variable -}; - -/*! - * @brief Prints beginning of HTML source - * - * template_header() is expected to print out the common beginning of - * any response and allocate any resources the template uses (it's - * the best place for such things since it is always called as the - * first template function). - * - * Typically it will print the HTML `` and the header part - * of the `` element which is common for all pages. It may - * adjust some parts of it (like headings, title, navigations, …) - * depending on the `data` that is passed. - * - * If `data.page_type == PAGE_TYPE_INDEX`, `data.entry` will point - * to the first entry or be `NULL` if there are no entries. - * - * @see struct template_data - */ -void template_header(struct template_data data); - -/*! - * @brief Prints end of HTML source - * - * template_footer() should print the common bottom part of any HTML - * response and free all allocated resources (as it's called last). - * - * Usually this involves printing a footer part of the web page and - * closing the `` and `` elements. - * - * If `data.page_type == PAGE_TYPE_INDEX`, `data.entry` will point - * to the last entry or be `NULL` if there are no entries. - */ -void template_footer(struct template_data data); - -/*! - * @brief Prints HTML snippet for the main part of the page - * - * template_main() should print the main part of the HTML source - * which is located between template_header() and template_footer(). - * - * Depending on `data.page_type` the following applies: - * - * * For `PAGE_TYPE_ENTRY` template_main() is called once and - * should print the main part of a single entry page. - * * For `PAGE_TYPE_ERROR` template_main() is called once and - * should print the main part of a page informing the user - * about an occurred HTTP error (reflecting `data.status`). - * * For `PAGE_TYPE_INDEX` template_main() is called 0 to n - * times where n is the number of total entries. Each time - * it's called it should print a HTML snippet which is - * suitable as an index entry. Furthermore it should be - * valid HTML regardless how many times it has been called - * before and will be called afterwards. - * - * @see struct template_data - */ -void template_main(struct template_data data); diff --git a/templates/simple.c b/templates/simple.c index 9c9f20f..1b3c293 100644 --- a/templates/simple.c +++ b/templates/simple.c @@ -3,13 +3,14 @@ #include #include -#include -#include #include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include static struct xml_context ctx; diff --git a/timeutil.c b/timeutil.c deleted file mode 100644 index 187ace0..0000000 --- a/timeutil.c +++ /dev/null @@ -1,97 +0,0 @@ -#define _POSIX_C_SOURCE 1 -#define _XOPEN_SOURCE 1 // for timezone -#include -#include -#include -#include "timeutil.h" -#include "stringutil.h" - -#include - -char *format_string(enum time_format t) { - switch(t) { - case RSS_TIME_FORMAT: - return "%a, %d %b %Y %T %z"; - // both remaining cases still need a UTC offset - // part at the end which is not supported by - // strftime(3), so we do this ourselves in - // flocaltime - case HTML_TIME_FORMAT_READABLE: - return "%Y-%m-%d %T"; - case ATOM_TIME_FORMAT: - default: - return "%Y-%m-%dT%T"; - } -} - -size_t flocaltime(char *b, enum time_format type, size_t size, const time_t *time) { - tzset(); - struct tm *local = localtime(time); - char *format = format_string(type); - - size_t res = strftime(b, size, format, local); - - if(res == 0) { - return 0; - } - - size_t offset_len = 0; - - if(type == ATOM_TIME_FORMAT || type == HTML_TIME_FORMAT_READABLE) { - // for these formats we need to append a RFC3339 UTC offset - // unfortunately it is *not* exactly provided by strftime, - // but in hindsight it might be better to do a little string - // manipulation than this madness, since the libc timezone - // API is horrible (at least POSIX / glibc) - size_t offset_size = 7; - char offset[offset_size]; - - if(timezone == 0 && !local->tm_isdst) { - offset[0] = 'Z'; - offset[1] = '\0'; - - offset_len = 1; - } else { - // for some reason timezone is seconds *west* of UTC which - // is inverse to how UTC offsets are denoted - long real_offset = (-1) * timezone; - - if(daylight) { - // TODO is this correct in all cases? - if(local->tm_isdst == 1) { - real_offset += 3600; - } - } - - char sign; - if(real_offset > 0) { - sign = '+'; - } else { - sign = '-'; - } - - long abso = labs(real_offset); - long hour = abso / 3600; - long minute = (abso % 3600) / 60; - - offset[0] = sign; - offset[1] = nibble_hex((short) hour / 10); - offset[2] = nibble_hex((short) hour % 10); - offset[3] = ':'; - offset[4] = nibble_hex((short) minute / 10); - offset[5] = nibble_hex((short) minute % 10); - offset[6] = '\0'; - - offset_len = 6; - } - - if(res > 0 && res + offset_size <= size) { - memcpy(b + res, offset, offset_size); - } - } - - // prevent any buffer overflows - b[size - 1] = '\0'; - - return res + offset_len; -} diff --git a/timeutil.h b/timeutil.h deleted file mode 100644 index 7d0270e..0000000 --- a/timeutil.h +++ /dev/null @@ -1,43 +0,0 @@ -/*! - * @file - * @brief Utilities for rendering timestamps as strings - */ -enum time_format { - RSS_TIME_FORMAT, //!< RFC822 formatted time with 4 instead of 2 year digits - ATOM_TIME_FORMAT, //!< RFC3339 formatted time - HTML_TIME_FORMAT_READABLE //!< like `ATOM_TIME_FORMAT`, but with space between date and time -}; - -/*! - * @brief Maximum size necessary to contain the output of flocaltime() - */ -#define MAX_TIMESTR_SIZE 32 -// max HTML/Atom: 24 + NUL byte -// max RSS: 31 + NUL byte - -/*! - * @brief Format given timestamp as a string in the local timezone - * - * flocaltime() is a wrapper around `strftime()` which supports - * a specific set of output formats. In contrast to `strftime()` - * it can output correct RFC3339 time strings and does localtime - * resolution for you. - * - * Example usage to print a RFC3339 formatted timestamp: - * - * ``` - * time_t some_time; - * char strtime[MAX_TIMESTR_SIZE]; - * - * if(flocaltime(strtime, ATOM_TIME_FORMAT, MAX_TIMESTR_SIZE, &some_time) > 0) { - * puts(strtime); - * } - * ``` - * - * @param b output buffer - * @param type time format to use for output - * @param size number of `char`s the buffer can hold - * @param time pointer to timestamp - * @return `0` on error, otherwise length of the string placed in `b` excluding terminating `NUL` byte - */ -size_t flocaltime(char *b, enum time_format type, size_t size, const time_t *time); diff --git a/xml.c b/xml.c deleted file mode 100644 index 5965a09..0000000 --- a/xml.c +++ /dev/null @@ -1,309 +0,0 @@ -// TODO indent, html escaping -#include -#include -#include -#include -#include - -#include "xml.h" - -#define DEBUG_WARN(ctx, ...) \ - if(ctx->warn != NULL) { \ - fprintf(ctx->warn, __VA_ARGS__); \ - } - -void debug_xml_stack(FILE *out, struct xml_stack *stack) { - if(stack != NULL) { - fprintf(out, "%s ", stack->tag); - debug_xml_stack(out, stack->next); - } else { - fputc('\n', out); - } -} - -void free_xml_stack(struct xml_stack *stack) { - if(stack == NULL) { - return; - } - - if(stack->tag != NULL) { - free(stack->tag); - } - - if(stack->next != NULL) { - free_xml_stack(stack->next); - } - - free(stack); -} - -void new_xml_context(struct xml_context *ctx) { - ctx->stack = NULL; - ctx->warn = NULL; - ctx->out = stdout; - ctx->closing_slash = 1; -} - -void del_xml_context(struct xml_context *ctx) { - if(ctx->stack != NULL) { - if(ctx->warn != NULL) { - fputs("Unclosed tags remaining: ", ctx->warn); - debug_xml_stack(ctx->warn, ctx->stack); - } - - free_xml_stack(ctx->stack); - } -} - -void output_xml_escaped_char(FILE *out, char c) { - switch(c) { - case '&': - fputs("&", out); - break; - case '<': - fputs("<", out); - break; - case '>': - fputs(">", out); - break; - case '\'': - fputs("'", out); - break; - case '\"': - fputs(""", out); - break; - default: - fputc(c, out); - break; - } -} - -void xml_escaped(struct xml_context *ctx, const char *str) { - for(size_t i = 0; str[i] != '\0'; i++) { - output_xml_escaped_char(ctx->out, str[i]); - } -} - -void xml_raw(struct xml_context *ctx, const char *str) { - fputs(str, ctx->out); -} - -void output_attrs(FILE *out, va_list attrs, size_t arg_count) { - if(arg_count > 0) { - for(size_t i = 1; i<=arg_count; i++) { - if(i % 2) { - char *name = va_arg(attrs, char *); - if(name == NULL) { - break; - } - - fputc(' ', out); - fputs(name, out); - } else { - char *maybe_val = va_arg(attrs, char *); - if(maybe_val != NULL) { - fputs("=\"", out); - for(size_t i = 0; maybe_val[i] != '\0'; i++) { - output_xml_escaped_char(out, maybe_val[i]); - } - fputc('\"', out); - } - } - } - } -} - -void xml_empty_tag(struct xml_context *ctx, const char *tag, size_t attr_count, ...) { - if(tag == NULL || ctx == NULL) { - DEBUG_WARN(ctx, "Got no tag or ctx\n"); - return; - } - - fputc('<', ctx->out); - fputs(tag, ctx->out); - - if(attr_count > 0) { - size_t arg_count = attr_count * 2; - - va_list attrs; - va_start(attrs, attr_count); - - output_attrs(ctx->out, attrs, arg_count); - - va_end(attrs); - } - - if(ctx->closing_slash) { - fputc('/', ctx->out); - } - - fputc('>', ctx->out); -} - -void xml_open_tag_attrs(struct xml_context *ctx, const char *tag, size_t attr_count, ...) { - if(tag == NULL || ctx == NULL) { - DEBUG_WARN(ctx, "Got no tag or ctx\n"); - return; - } - - struct xml_stack *old_stack = ctx->stack; - - fputc('<', ctx->out); - fputs(tag, ctx->out); - - - if(attr_count > 0) { - size_t arg_count = attr_count * 2; - - va_list attrs; - va_start(attrs, attr_count); - - output_attrs(ctx->out, attrs, arg_count); - - va_end(attrs); - } - - fputc('>', ctx->out); - - ctx->stack = malloc(sizeof(struct xml_context)); - - if(ctx->stack == NULL) { - ctx->stack = old_stack; - DEBUG_WARN(ctx, "Could not allocate memory for tag stack, now everything will break.\n") - return; - } - - ctx->stack->next = old_stack; - - size_t tag_size = strlen(tag) + 1; - ctx->stack->type = XML_NORMAL_TAG; - ctx->stack->tag = malloc(sizeof(char) * tag_size); - memcpy(ctx->stack->tag, tag, tag_size); -} - -void xml_open_tag(struct xml_context *ctx, const char *tag) { - xml_open_tag_attrs(ctx, tag, 0); -} - -void xml_close_tag(struct xml_context *ctx, const char *tag) { - if(tag == NULL || ctx == NULL) { - DEBUG_WARN(ctx, "Got no tag or ctx\n"); - return; - } - - if(ctx->stack == NULL) { - DEBUG_WARN(ctx, "Refusing to close tag %s, no tags left to be closed\n", tag); - return; - } - - if(ctx->stack->type != XML_NORMAL_TAG) { - DEBUG_WARN(ctx, "Refusing to close tag %s, wrong tag type\n", tag); - return; - } - - if(strcmp(tag, ctx->stack->tag) != 0) { - DEBUG_WARN(ctx, "Refusing to close tag %s, unclosed tags remaining\n", tag); - return; - } - - fputs("out); - fputs(tag, ctx->out); - fputc('>', ctx->out); - - struct xml_stack *old_head = ctx->stack; - - ctx->stack = old_head->next; - - free(old_head->tag); - free(old_head); -} - -void xml_close_all(struct xml_context *ctx) { - xml_close_including(ctx, NULL); -} - -void xml_close_including(struct xml_context *ctx, const char *tag) { - if(ctx == NULL) { - DEBUG_WARN(ctx, "Got no ctx\n"); - return; - } - - if(ctx->stack == NULL) { - if(tag != NULL) { - DEBUG_WARN(ctx, "Hit end of tag stack while searching for tag %s to close\n", tag); - } - return; - } else { - int last_tag = tag != NULL && strcmp(tag, ctx->stack->tag) == 0; - - switch(ctx->stack->type) { - case XML_NORMAL_TAG: - xml_close_tag(ctx, ctx->stack->tag); - break; - case XML_CDATA: - xml_close_cdata(ctx); - break; - default: - DEBUG_WARN(ctx, "Unexpected tag type on stack, aborting\n"); - return; - } - - if(!last_tag) { - xml_close_including(ctx, tag); - } - } -} - -void xml_open_cdata(struct xml_context *ctx) { - if(ctx == NULL) { - DEBUG_WARN(ctx, "Got no ctx\n"); - return; - } - - struct xml_stack *old_stack = ctx->stack; - - ctx->stack = malloc(sizeof(struct xml_stack)); - - if(ctx->stack == NULL) { - ctx->stack = old_stack; - - DEBUG_WARN(ctx, "Could not allocate memory for tag stack, now everything will break.\n"); - return; - } - - ctx->stack->next = old_stack; - ctx->stack->tag = NULL; - ctx->stack->type = XML_CDATA; - - fputs("out); -} - -void xml_close_cdata(struct xml_context *ctx) { - if(ctx == NULL) { - DEBUG_WARN(ctx, "Got no ctx\n"); - return; - } - - if(ctx->stack == NULL) { - DEBUG_WARN(ctx, "No CDATA to close\n"); - return; - } - - if(ctx->stack->type != XML_CDATA) { - DEBUG_WARN(ctx, "No CDATA on top of stack, refusing to close\n"); - return; - } - - struct xml_stack *old_head = ctx->stack; - - ctx->stack = old_head->next; - - if(old_head->tag != NULL) { - // shouldn't happen though - free(old_head->tag); - } - - free(old_head); - - fputs("]]>", ctx->out); -} diff --git a/xml.h b/xml.h deleted file mode 100644 index 869a579..0000000 --- a/xml.h +++ /dev/null @@ -1,340 +0,0 @@ -/*! - * @file xml.h - * @brief Simple library for constructing XML documents - * - * This library provides a C interface for opening and closing - * XML tags as well as filling them with content. It is mainly - * intended for constructing XML/HTML documents by directly - * writing them to `stdout`. - * - * It's main advantage over plain `printf()` is that it keeps - * track of open tags, enabling it to automatically close - * open tags (saving a few lines of code) using `xml_close_all()` - * and `xml_close_including()` and/or to detect errors in the - * programmer's XML nesting. For information on its sanity - * checking abilities see the documentation of `xml_close_tag()`. - * - * Currently it has some limitations (possibly incomplete list): - * - * * It does not give the calling code feedback if errors occurred - * * It doesn't do validity checking of tags and attributes - * (legal characters etc.) - * * It can't generate pretty output (i. e. properly indented), - * its output is currently always "minified". - * - * For handling arbitrary data this library is probably not a good - * fit, it is mainly intended and tested for generating HTML and - * RSS documents in a CGI-like environment from trusted data. - * - * An example application generating a HTML5 page looks like this: - * - * @include xml_example.c - */ -#include -#include -#include - -/*! - * @brief Type of an XML "tag" - * - * This is mostly internally used to be able - * to keep track of CDATA using `xml_stack`. - */ -enum xml_tag_type { - XML_NORMAL_TAG, - XML_CDATA -}; - -/*! - * @brief Internal linked list type - * - * Linked list used internally to keep track of tags to close. - * - * @see struct xml_context - */ -struct xml_stack { - enum xml_tag_type type; //! type of the tag - char *tag; //!< tag name if `XML_NORMAL_TAG`, otherwise `NULL` - struct xml_stack *next; //!< tag to be closed after the current one -}; - -/*! - * @brief State and configuration of xml generation. - * - * Struct containing both state and configuration of this module. - * See `new_xml_context()` for usage instructions. - * - * @see new_xml_context - * @see del_xml_context - * @see struct xml_stack - */ -struct xml_context { - struct xml_stack *stack; //!< linked list used internally to keep track of open tags - FILE *out; //!< Where to write output, defaults to stdout - FILE *warn; //!< if not `NULL`, print warnings to handle warn, defaults to `NULL` - bool closing_slash; //!< whether to output a closing slash at the end of an empty tag -}; - -/*! - * @brief Initialize the `xml_context` structure. - * - * Initialize a `struct xml_context` with default values: - * - * * empty stack - * * output to `stdout` - * * no warnings - * * closing slashes enabled - * - * This function should always be called before any other functions of xml. - * If you want to use different settings than the default ones, update the - * struct after calling this function. This way your application won't break - * if it gets extended. - * - * @see struct xml_context - * @see del_xml_context - */ -void new_xml_context(struct xml_context *ctx); - -/*! - * @brief Clean up the `xml_context` structure. - * - * Frees any dynamically allocated data in a `struct xml_context`. - * Should always be called before a `struct xml_context` goes out - * of scope or the program terminates. - * - * If `ctx->warn` is not `NULL`, `del_xml_context()` will additionally - * output a message about any remaining unclosed tags to `ctx->warn`. - * For example: - * - * ``` - * Unclosed tags remaining: article main body html - * ``` - * - * @see struct xml_context - * @see new_xml_context - */ -void del_xml_context(struct xml_context *ctx); - -/*! - * @brief Output a xml escaped string - * - * Outputs the given string escaped for use with XML. It only - * does minimal-ish escaping, i. e. it escapes all characters - * that have some syntactical meaning in XML. That includes: - * Angled brackets (lower than and greater than), ampersand, - * and single as well as double quotes. All other characters - * are passed through as is and the caller is expected to - * make sure they are correctly encoded, i. e. valid UTF-8 - * characters. - * - * The escaping is not as minimal as possible. In some cases - * you can omit escaping all characters except for `<` and `&`, - * but this would be context-sensitive and therefore - * unnecessarily tedious to implement. Escaping all - * syntactically significant characters has no real downsides - * except maybe using a tiny bit more storage than absolutely - * necessary. - * - * @see xml_raw - */ -void xml_escaped(struct xml_context *ctx, const char *str); - -/*! - * @brief Output a raw string. - * - * Output string to `ctx->out`, equivalent to `fputs(str, ctx.out)`. - * If your string is not already XML, use xml_escaped() to output it - * correcty escaped. - * - * @see struct xml_context - * @see xml_escaped - */ -void xml_raw(struct xml_context *ctx, const char *str); - -/*! - * @brief Output an empty xml tag. - * - * Output an empty xml tag (i. e. a single tag that doesn't need to be closed). - * This call does not change the provided context `ctx`. - * - * The call also outputs given attributes: For `attr_count` n, `xml_empty_tag()` expects - * 2n additional arguments — for each attribute a name and a value. If value is `NULL`, - * an attribute without a value will be output, i. e. just the name without the `="…"`. - * - * For example, `xml_empty_tag(&ctx, "my-tag", 2, "foo", "bar", "baz", NULL);` gives - * `` with default settings. - * - * The attributes' values are XML-escaped automatically. For details on how escaping - * works in xml.h, see xml_escaped(). - * - * If `closing_slash` is 0 in `ctx`, the slash before the closing ">" will be omitted. - * This is useful for HTML5 where it is optional. - * - * @see struct xml_context - */ -void xml_empty_tag(struct xml_context *ctx, const char *tag, size_t attr_count, ...); - -/*! - * @brief Output an opening tag with attributes. - * - * Output an opening tag with attributes and add it to `ctx->stack` for future reference. - * - * Attributes work exactly like in `xml_empty_tag()`. - * @see xml_empty_tag - */ -void xml_open_tag_attrs(struct xml_context *ctx, const char *tag, size_t attr_count, ...); - -/*! - * @brief Output an opening tag without any attributes. - * - * Shorthand for `xml_open_tag_attrs(ctx, tag, 0)`. - * @see xml_open_tag_attrs - */ -void xml_open_tag(struct xml_context *ctx, const char *tag); - -/*! - * @brief Close a previously opened tag. - * - * `xml_close_tag()` first checks the head of the current `xml_stack` - * if the provided `tag` is in fact the current innermost opened tag. - * - * If this is true, it outputs the closing tag, removes the reference - * to the tag on top of the `xml_stack` and frees this part of the - * structure. - * - * If it isn't true, it does nothing and outputs an appropriate warning - * to `ctx->warn` if it is not `NULL`: - * - * * `Refusing to close tag xyz, unclosed tags remaining` - * * `Refusing to close tag zyx, no tags left to be closed` - * - * This sanity checking of tag closing ensures that a xml document - * constructed by this module has no nesting errors, i. e. every tag - * is closed at the proper nesting level. Because it is only simple - * runtime checking in specific calls it can't prevent / detect the - * following errors: - * - * * It can't prevent unclosed tags remaining at the end. You can - * however prevent this by calling `xml_close_all()` at the end of - * your XML outputting code. - * * It can worsen a situation with remaining unclosed tags: If - * an inner tag is left unclosed, it will refuse to close all - * outer tags, leaving a trail of unclosed tags behind. - * * It will always attribute the error to closing: Some errors - * will be caused by missing an `xml_open_tag()` somewhere, but - * `xml_close_tag()` will think the closing is erroneous. Of course - * it's also unable to resolve the error. - * * It can't compare against the intended XML structure: Sometimes - * a programming error will result in an "wrong" XML structure - * which is still completely valid to `xml_close_tag()`, i. e. - * correctly nested. - * - * Overall the sanity checking is limited, as you can see, but - * it should generate *some* warning if a detectable issue is - * present (invalid XML nesting) and never actively output - * a tag that makes the XML nesting invalid. - * - * These properties should however be enough to detect issues - * quickly in development. Additionally the sanity checking is - * cheap enough to be feasible in production. `xml_close_tag()` - * only needs to call `strcmp` once per invocation. - * - * @see xml_open_tag_attrs - * @see xml_open_tag - * @see struct xml_stack - * @see xml_close_all - * @see xml_close_including - */ -void xml_close_tag(struct xml_context *ctx, const char *tag); - -/*! - * @brief Close all remaining unclosed tags - * - * `xml_close_all()` iterates through the `xml_stack` and calls - * `xml_close_tag()` or `xml_close_cdata()` respectively for every - * entry in it. A call to it will thus result in an empty `xml_stack` - * and all previously opened tags being closed correctly. - * - * Internally it's an alias for `xml_close_all(ctx, NULL)` - * - * Note that `xml_close_all()` will limit error checking, since it - * (by nature) always succeeds and has no insight into what the - * programmer thinks needs to be closed. - * - * @see xml_close_tag - * @see xml_close_including - * @see struct xml_stack - */ -void xml_close_all(struct xml_context *ctx); - -/*! - * @brief Close all unclosed tags until a given one. - * - * `xml_close_including()` works like `xml_close_all()`, but - * will stop after it hits a tag of the given name. - * If the given tag is not present in the stack, it behaves - * like `xml_close_all()`. It is not possible to match - * a `CDATA` section using `xml_close_including()`. - * - * Be aware that it might lead to unexpected results if - * multiple tags of the same are nested. Consider the - * following snippet. - * - * ```c - * xml_open_tag(&ctx, "a"); - * xml_open_tag(&ctx, "b"); - * xml_close_tag(&ctx, "a"); - * xml_open_tag(&ctx, "c"); - * xml_raw(&ctx, "value"); - * xml_close_including(&ctx, "a"); - * ``` - * - * `xml_close_including()` will stop as soon as it hits the first - * tag "a", although it might be intended to keep going until the - * outermost one. The result would be: - * - * ```xml - * value - * ``` - * - * This is the behavior of `xml_close_including()`: It closes all - * unclosed tags until it hits the first instance of the specified - * tag which is also closed. - * - * This function will also limit error detection like `xml_close_all()`. - * For an explanation of this, see its documentation. - * - * @see xml_close_all - * @see xml_close_tag - * @see struct xml_stack - */ -void xml_close_including(struct xml_context *ctx, const char *tag); - -/*! - * @brief Start CDATA section - * - * Behaves like xml_open_tag(), but for opening `CDATA` sections. - * Internally the `XML_CDATA` type of `struct xml_stack` is used. - * - * Note that this function won't prevent `CDATA` sections or XML - * elements inside a `CDATA` section, since this is sometimes - * useful. - * - * @see xml_close_cdata - * @see enum xml_tag_type - * @see struct xml_stack - */ -void xml_open_cdata(struct xml_context *ctx); - -/*! - * @brief Close CDATA section - * - * Behaves like xml_close_tag(), but for `CDATA` sections. - * - * Checks the top of the stack if it is a `CDATA` section. - * In that case closes it and updates the stack, otherwise - * does nothing and if applicable outputs a warning. - * - * @see xml_open_cdata - */ -void xml_close_cdata(struct xml_context *ctx); -- cgit 1.4.1