diff options
author | sternenseemann <sternenseemann@systemli.org> | 2020-08-12 14:44:00 +0200 |
---|---|---|
committer | sternenseemann <sternenseemann@systemli.org> | 2020-08-12 14:44:00 +0200 |
commit | a9fd16d86376e4d80a08728a7da44d4c653fa796 (patch) | |
tree | f8041394ac89d32c212007a190ee220d7331095f | |
parent | 56cee5404fae78b979a00609271b9528df1a8987 (diff) |
feat(entry): url encode entry.link
This makes it possibly to use spaces and some reserved characters of URLs in post filenames.
-rw-r--r-- | cgiutil.c | 96 | ||||
-rw-r--r-- | cgiutil.h | 38 | ||||
-rw-r--r-- | entry.c | 9 |
3 files changed, 141 insertions, 2 deletions
diff --git a/cgiutil.c b/cgiutil.c index 410ea62..606a10e 100644 --- a/cgiutil.c +++ b/cgiutil.c @@ -1,5 +1,7 @@ #include <errno.h> +#include <stdbool.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> void send_header(char key[], char val[]) { @@ -43,3 +45,97 @@ int http_errno(int err) { return 500; } } + +char nibble_hex(short h) { + switch(h) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + return (h + 48); + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + return (h + 55); + default: + return 0; + } +} + +int urlencode_realloc(char **input, int size) { + if(*input == NULL || size <= 0) { + return -1; + } + + int output_size = size; + char *output = malloc(output_size); + int output_pos = 0; + + if(output == NULL) { + return -1; + } + + for(int i = 0; i < size; i++) { + char c = *(*input + i); + bool needs_escape; + switch(c) { + // generic delimiters + // we assume we never need to escape '/'. This + // should hold since on unix filenames won't + // contain slashes and the basis for all URLs + // in sternenblog are actual files + case ':': case '?': case '#': case '[': case ']': case '@': + // sub delimiters + case '!': case '$': case '&': case '\'': case '(': case ')': + case '*': case '+': case ',': case ';': case '=': + // other characters to encode + case '%': case ' ': + needs_escape = 1; + break; + // in order to simplify the code we just assume + // everything else doesn't have to be encoded + // + // otherwise we'd need to be UTF-8 aware here + // and consider more than one byte at a time. + default: + needs_escape = 0; + } + + int necessary_space = needs_escape ? 3 : 1; + + if(output_pos + necessary_space >= output_size) { + output_size += necessary_space; + char *tmp = realloc(output, output_size); + if(tmp == NULL) { + free(output); + return -1; + } else { + output = tmp; + } + } + + if(needs_escape) { + short a = (c & 0xf0) >> 4; + short b = c & 0x0f; + output[output_pos++] = '%'; + output[output_pos++] = nibble_hex(a); + output[output_pos++] = nibble_hex(b); + } else { + output[output_pos++] = c; + } + } + + free(*input); + *input = output; + + return output_size; +} diff --git a/cgiutil.h b/cgiutil.h index 1d388af..4701e83 100644 --- a/cgiutil.h +++ b/cgiutil.h @@ -51,3 +51,41 @@ char *http_status_line(int status); * @return HTTP error code */ int http_errno(int err); + +/*! + * @brief Urlencode a given dynamically allocated string + * + * urlencode_realloc() receives a pointer to a pointer to + * a dynamically allocated string to encode plus its size + * including the null byte at the end. + * + * It then replaces every reserved character in the string + * except `/` with the appropriate percent encoding. If + * the size of the buffer is not enough, it uses `realloc()` + * to increase it. + * + * Note that the implementation of url encoding is not 100% + * correct, but should be good enough in the context of + * sternenblog. `/` is not encoded since on unix + * a slash should always a path delimiter and never part of + * a filename. Another limitation of the url encoding is + * that it only checks for a list of characters to encode + * instead of checking if the characters are unreserved + * and don't need to be encoded which would be more correct. + * The approach taken has the big advantage that we don't + * need to worry about UTF-8, which makes the implementation + * considerably simpler. As a consequence however it will + * be not aggressive enough in terms of encoding in some + * cases. + * + * On error -1 is returned. In such a case the original + * pointer remains intact, so you can either `free()` it + * or continue with the unencoded string. + * + * Otherwise it returns new size of the buffer. + * + * @param **input pointer to input string + * @param size size of input string including null byte + * @return -1 on error, else size of buffer + */ +int urlencode_realloc(char **input, int size); diff --git a/entry.c b/entry.c index b421042..bf2a808 100644 --- a/entry.c +++ b/entry.c @@ -125,8 +125,9 @@ int make_entry(const char *blog_dir, char *script_name, char *path_info, struct // don't depend on it starting with a slash size_t script_name_len = strlen(script_name); + size_t link_size = script_name_len + path_info_len + 1; - entry->link = malloc(sizeof(char) * (script_name_len + path_info_len + 1)); + entry->link = malloc(sizeof(char) * link_size); if(script_name_len != 0) { memcpy(entry->link, script_name, script_name_len); @@ -134,7 +135,11 @@ int make_entry(const char *blog_dir, char *script_name, char *path_info, struct memcpy(entry->link + script_name_len, path_info, path_info_len); - entry->link[path_info_len + script_name_len] = '\0'; + entry->link[link_size - 1] = '\0'; + + if(urlencode_realloc(&entry->link, link_size) <= 0) { + return 500; + } return 200; } |