vcpe/srcs/httpserver/src/haywire/picohttpparser.c

593 lines
20 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
* Shigeo Mitsunari
*
* The software is licensed under either the MIT License (below) or the Perl
* license.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stddef.h>
#include <string.h>
#ifdef __SSE4_2__
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
#include "picohttpparser.h"
/* $Id$ */
#if __GNUC__ >= 3
2022-11-08 02:37:52 +00:00
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
2022-11-08 02:37:52 +00:00
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#ifdef _MSC_VER
#define ALIGNED(n) _declspec(align(n))
#else
#define ALIGNED(n) __attribute__((aligned(n)))
#endif
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
2022-11-08 02:37:52 +00:00
#define CHECK_EOF() \
if (buf == buf_end) { \
*ret = -2; \
return NULL; \
}
2022-11-08 02:37:52 +00:00
#define EXPECT_CHAR(ch) \
CHECK_EOF(); \
if (*buf++ != ch) { \
*ret = -1; \
return NULL; \
}
2022-11-08 02:37:52 +00:00
#define ADVANCE_TOKEN(tok, toklen) \
do { \
const char *tok_start = buf; \
static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
int found2; \
buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
if (!found2) { \
CHECK_EOF(); \
} \
while (1) { \
if (*buf == ' ') { \
break; \
} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
if ((unsigned char)*buf < '\040' || *buf == '\177') { \
*ret = -1; \
return NULL; \
} \
} \
++buf; \
CHECK_EOF(); \
} \
tok = tok_start; \
toklen = buf - tok_start; \
} while (0)
2022-11-08 02:37:52 +00:00
static const char *token_char_map =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size,
int *found) {
*found = 0;
#if __SSE4_2__
if (likely(buf_end - buf >= 16)) {
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
size_t left = (buf_end - buf) & ~15;
do {
__m128i b16 = _mm_loadu_si128((void *)buf);
2022-11-08 02:37:52 +00:00
int r = _mm_cmpestri(
ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
if (unlikely(r != 16)) {
buf += r;
*found = 1;
break;
}
buf += 16;
left -= 16;
} while (likely(left != 0));
}
#endif
return buf;
}
2022-11-08 02:37:52 +00:00
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len,
int *ret) {
const char *token_start = buf;
#ifdef __SSE4_2__
2022-11-08 02:37:52 +00:00
static const char ranges1
[] = "\0\010"
/* allow HT */
"\012\037"
/* allow SP and up to but not including DEL */
"\177\177"
/* allow chars w. MSB set */
;
int found;
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
if (found)
goto FOUND_CTL;
#else
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
while (likely(buf_end - buf >= 8)) {
2022-11-08 02:37:52 +00:00
#define DOIT() \
do { \
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
goto NonPrintable; \
++buf; \
} while (0)
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
#undef DOIT
continue;
NonPrintable:
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
goto FOUND_CTL;
}
++buf;
}
#endif
for (;; ++buf) {
CHECK_EOF();
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
goto FOUND_CTL;
}
}
}
FOUND_CTL:
if (likely(*buf == '\015')) {
++buf;
EXPECT_CHAR('\012');
*token_len = buf - 2 - token_start;
} else if (*buf == '\012') {
*token_len = buf - token_start;
++buf;
} else {
*ret = -1;
return NULL;
}
*token = token_start;
return buf;
}
2022-11-08 02:37:52 +00:00
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) {
int ret_cnt = 0;
2022-11-08 02:37:52 +00:00
buf = last_len < 3 ? buf : buf + last_len - 3;
while (1) {
CHECK_EOF();
if (*buf == '\015') {
++buf;
CHECK_EOF();
EXPECT_CHAR('\012');
++ret_cnt;
} else if (*buf == '\012') {
++buf;
++ret_cnt;
} else {
++buf;
ret_cnt = 0;
}
if (ret_cnt == 2) {
return buf;
}
}
*ret = -2;
return NULL;
}
/* *_buf is always within [buf, buf_end) upon success */
2022-11-08 02:37:52 +00:00
static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret) {
int v;
CHECK_EOF();
if (!('0' <= *buf && *buf <= '9')) {
*ret = -1;
return NULL;
}
v = 0;
for (;; ++buf) {
CHECK_EOF();
if ('0' <= *buf && *buf <= '9') {
v = v * 10 + *buf - '0';
} else {
break;
}
}
*value = v;
return buf;
}
/* returned pointer is always within [buf, buf_end), or null */
2022-11-08 02:37:52 +00:00
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) {
EXPECT_CHAR('H');
EXPECT_CHAR('T');
EXPECT_CHAR('T');
EXPECT_CHAR('P');
EXPECT_CHAR('/');
EXPECT_CHAR('1');
EXPECT_CHAR('.');
return parse_int(buf, buf_end, minor_version, ret);
}
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
2022-11-08 02:37:52 +00:00
size_t max_headers, int *ret) {
for (;; ++*num_headers) {
CHECK_EOF();
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
break;
} else if (*buf == '\012') {
++buf;
break;
}
if (*num_headers == max_headers) {
*ret = -1;
return NULL;
}
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
if (!token_char_map[(unsigned char)*buf]) {
*ret = -1;
return NULL;
}
/* parsing name, but do not discard SP before colon, see
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
2022-11-08 02:37:52 +00:00
headers[*num_headers].name = buf;
static const char ALIGNED(16) ranges1[] = "::\x00\037";
2022-11-08 02:37:52 +00:00
int found;
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
if (!found) {
CHECK_EOF();
}
while (1) {
if (*buf == ':') {
break;
} else if (*buf < ' ') {
*ret = -1;
return NULL;
}
++buf;
CHECK_EOF();
}
headers[*num_headers].name_len = buf - headers[*num_headers].name;
++buf;
for (;; ++buf) {
CHECK_EOF();
if (!(*buf == ' ' || *buf == '\t')) {
break;
}
}
} else {
2022-11-08 02:37:52 +00:00
headers[*num_headers].name = NULL;
headers[*num_headers].name_len = 0;
}
2022-11-08 02:37:52 +00:00
if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret))
== NULL) {
return NULL;
}
}
return buf;
}
2022-11-08 02:37:52 +00:00
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len,
const char **path, size_t *path_len, int *minor_version, struct phr_header *headers,
size_t *num_headers, size_t max_headers, int *ret) {
/* skip first empty line (some clients add CRLF after POST content) */
CHECK_EOF();
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
} else if (*buf == '\012') {
++buf;
}
/* parse request line */
ADVANCE_TOKEN(*method, *method_len);
++buf;
ADVANCE_TOKEN(*path, *path_len);
++buf;
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
return NULL;
}
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
} else if (*buf == '\012') {
++buf;
} else {
*ret = -1;
return NULL;
}
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
2022-11-08 02:37:52 +00:00
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
size_t last_len) {
const char *buf = buf_start, *buf_end = buf_start + len;
2022-11-08 02:37:52 +00:00
size_t max_headers = *num_headers;
int r;
2022-11-08 02:37:52 +00:00
*method = NULL;
*method_len = 0;
*path = NULL;
*path_len = 0;
*minor_version = -1;
2022-11-08 02:37:52 +00:00
*num_headers = 0;
/* if last_len != 0, check if the request is complete (a fast countermeasure
againt slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
2022-11-08 02:37:52 +00:00
if ((buf = parse_request(
buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, &r))
== NULL) {
return r;
}
return (int)(buf - buf_start);
}
2022-11-08 02:37:52 +00:00
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status,
const char **msg, size_t *msg_len, struct phr_header *headers, size_t *num_headers,
size_t max_headers, int *ret) {
/* parse "HTTP/1.x" */
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
return NULL;
}
/* skip space */
if (*buf++ != ' ') {
*ret = -1;
return NULL;
}
/* parse status code */
if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
return NULL;
}
/* skip space */
if (*buf++ != ' ') {
*ret = -1;
return NULL;
}
/* get message */
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
return NULL;
}
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}
2022-11-08 02:37:52 +00:00
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg,
size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t last_len) {
const char *buf = buf_start, *buf_end = buf + len;
2022-11-08 02:37:52 +00:00
size_t max_headers = *num_headers;
int r;
*minor_version = -1;
2022-11-08 02:37:52 +00:00
*status = 0;
*msg = NULL;
*msg_len = 0;
*num_headers = 0;
/* if last_len != 0, check if the response is complete (a fast countermeasure
against slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
2022-11-08 02:37:52 +00:00
if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r))
== NULL) {
return r;
}
return (int)(buf - buf_start);
}
2022-11-08 02:37:52 +00:00
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers,
size_t last_len) {
const char *buf = buf_start, *buf_end = buf + len;
2022-11-08 02:37:52 +00:00
size_t max_headers = *num_headers;
int r;
*num_headers = 0;
/* if last_len != 0, check if the response is complete (a fast countermeasure
against slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
return r;
}
return (int)(buf - buf_start);
}
enum {
CHUNKED_IN_CHUNK_SIZE,
CHUNKED_IN_CHUNK_EXT,
CHUNKED_IN_CHUNK_DATA,
CHUNKED_IN_CHUNK_CRLF,
CHUNKED_IN_TRAILERS_LINE_HEAD,
CHUNKED_IN_TRAILERS_LINE_MIDDLE
};
2022-11-08 02:37:52 +00:00
static int decode_hex(int ch) {
if ('0' <= ch && ch <= '9') {
return ch - '0';
} else if ('A' <= ch && ch <= 'F') {
return ch - 'A' + 0xa;
} else if ('a' <= ch && ch <= 'f') {
return ch - 'a' + 0xa;
} else {
return -1;
}
}
2022-11-08 02:37:52 +00:00
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) {
size_t dst = 0, src = 0, bufsz = *_bufsz;
ssize_t ret = -2; /* incomplete */
while (1) {
switch (decoder->_state) {
2022-11-08 02:37:52 +00:00
case CHUNKED_IN_CHUNK_SIZE:
for (;; ++src) {
int v;
if (src == bufsz)
goto Exit;
if ((v = decode_hex(buf[src])) == -1) {
if (decoder->_hex_count == 0) {
ret = -1;
goto Exit;
}
break;
}
if (decoder->_hex_count == sizeof(size_t) * 2) {
ret = -1;
goto Exit;
}
2022-11-08 02:37:52 +00:00
decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
++decoder->_hex_count;
}
2022-11-08 02:37:52 +00:00
decoder->_hex_count = 0;
decoder->_state = CHUNKED_IN_CHUNK_EXT;
/* fallthru */
case CHUNKED_IN_CHUNK_EXT:
/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] == '\012')
break;
}
2022-11-08 02:37:52 +00:00
++src;
if (decoder->bytes_left_in_chunk == 0) {
if (decoder->consume_trailer) {
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
break;
} else {
goto Complete;
}
}
decoder->_state = CHUNKED_IN_CHUNK_DATA;
/* fallthru */
case CHUNKED_IN_CHUNK_DATA: {
size_t avail = bufsz - src;
if (avail < decoder->bytes_left_in_chunk) {
if (dst != src)
memmove(buf + dst, buf + src, avail);
src += avail;
dst += avail;
decoder->bytes_left_in_chunk -= avail;
goto Exit;
}
if (dst != src)
2022-11-08 02:37:52 +00:00
memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
src += decoder->bytes_left_in_chunk;
dst += decoder->bytes_left_in_chunk;
decoder->bytes_left_in_chunk = 0;
decoder->_state = CHUNKED_IN_CHUNK_CRLF;
}
2022-11-08 02:37:52 +00:00
/* fallthru */
case CHUNKED_IN_CHUNK_CRLF:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] != '\015')
break;
}
if (buf[src] != '\012') {
ret = -1;
goto Exit;
2022-11-08 02:37:52 +00:00
}
++src;
decoder->_state = CHUNKED_IN_CHUNK_SIZE;
break;
case CHUNKED_IN_TRAILERS_LINE_HEAD:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] != '\015')
break;
}
if (buf[src++] == '\012')
goto Complete;
decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
/* fallthru */
case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] == '\012')
break;
}
++src;
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
break;
default:
assert(!"decoder is corrupt");
}
}
Complete:
ret = bufsz - src;
Exit:
if (dst != src)
memmove(buf + dst, buf + src, bufsz - src);
*_bufsz = dst;
return ret;
}
#undef CHECK_EOF
#undef EXPECT_CHAR
#undef ADVANCE_TOKEN