optimizations to json_tokener_parse_ex(), printbuf_memappend()

-- Brent Miller, bdmiller at yahoo dash inc dot com


git-svn-id: http://svn.metaparadigm.com/svn/json-c/trunk@34 327403b1-1117-474d-bef2-5cb71233fd97
This commit is contained in:
Michael Clark
2009-04-27 08:16:58 +00:00
parent aaec1ef3c5
commit 95f55a761c
4 changed files with 226 additions and 98 deletions

View File

@@ -1,4 +1,6 @@
0.9 0.9
* optimizations to json_tokener_parse_ex(), printbuf_memappend()
Brent Miller, bdmiller at yahoo dash inc dot com
* Don't use this as a variable, so we can compile with a C++ compiler * Don't use this as a variable, so we can compile with a C++ compiler
* Add casts from void* to type of assignment when using malloc * Add casts from void* to type of assignment when using malloc
* Add #ifdef __cplusplus guards to all of the headers * Add #ifdef __cplusplus guards to all of the headers

View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify * This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details. * it under the terms of the MIT license. See COPYING for details.
* *
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/ */
#include "config.h" #include "config.h"
@@ -135,35 +139,68 @@ char* strndup(const char* str, size_t n)
#define current tok->stack[tok->depth].current #define current tok->stack[tok->depth].current
#define obj_field_name tok->stack[tok->depth].obj_field_name #define obj_field_name tok->stack[tok->depth].obj_field_name
/* Optimization:
* json_tokener_parse_ex() consumed a lot of CPU in its main loop,
* iterating character-by character. A large performance boost is
* achieved by using tighter loops to locally handle units such as
* comments and strings. Loops that handle an entire token within
* their scope also gather entire strings and pass them to
* printbuf_memappend() in a single call, rather than calling
* printbuf_memappend() one char at a time.
*
* POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
* common to both the main loop and the tighter loops.
*/
/* POP_CHAR(dest, tok) macro:
* Not really a pop()...peeks at the current char and stores it in dest.
* Returns 1 on success, sets tok->err and returns 0 if no more chars.
* Implicit inputs: str, len vars
*/
#define POP_CHAR(dest, tok) \
(((tok)->char_offset == len) ? \
(((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
(((tok)->err = json_tokener_success), 0) \
: \
(((tok)->err = json_tokener_continue), 0) \
) : \
(((dest) = *str), 1) \
)
/* ADVANCE_CHAR() macro:
* Incrementes str & tok->char_offset.
* For convenience of existing conditionals, returns the old value of c (0 on eof)
* Implicit inputs: c var
*/
#define ADVANCE_CHAR(str, tok) \
( ++(str), ((tok)->char_offset)++, c)
/* End optimization macro defs */
struct json_object* json_tokener_parse_ex(struct json_tokener *tok, struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
char *str, int len) char *str, int len)
{ {
struct json_object *obj = NULL; struct json_object *obj = NULL;
char c; char c = '\1';
tok->char_offset = 0; tok->char_offset = 0;
tok->err = json_tokener_success; tok->err = json_tokener_success;
do { while (POP_CHAR(c, tok)) {
if(tok->char_offset == len) {
if(tok->depth == 0 && state == json_tokener_state_eatws &&
saved_state == json_tokener_state_finish)
tok->err = json_tokener_success;
else
tok->err = json_tokener_continue;
goto out;
}
c = *str;
redo_char: redo_char:
switch(state) { switch(state) {
case json_tokener_state_eatws: case json_tokener_state_eatws:
if(isspace(c)) { /* Advance until we change state */
/* okay */ while (isspace(c)) {
} else if(c == '/') { if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
goto out;
}
if(c == '/') {
printbuf_reset(tok->pb); printbuf_reset(tok->pb);
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
state = json_tokener_state_comment_start; state = json_tokener_state_comment_start;
} else { } else {
state = saved_state; state = saved_state;
@@ -236,7 +273,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
goto redo_char; goto redo_char;
case json_tokener_state_null: case json_tokener_state_null:
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
if(strncasecmp(json_null_str, tok->pb->buf, if(strncasecmp(json_null_str, tok->pb->buf,
min(tok->st_pos+1, strlen(json_null_str))) == 0) { min(tok->st_pos+1, strlen(json_null_str))) == 0) {
if(tok->st_pos == strlen(json_null_str)) { if(tok->st_pos == strlen(json_null_str)) {
@@ -261,25 +298,42 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
tok->err = json_tokener_error_parse_comment; tok->err = json_tokener_error_parse_comment;
goto out; goto out;
} }
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
break; break;
case json_tokener_state_comment: case json_tokener_state_comment:
if(c == '*') state = json_tokener_state_comment_end; {
printbuf_memappend(tok->pb, &c, 1); /* Advance until we change state */
break; char *case_start = str;
while(c != '*') {
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
state = json_tokener_state_comment_end;
}
break;
case json_tokener_state_comment_eol: case json_tokener_state_comment_eol:
if(c == '\n') { {
/* Advance until we change state */
char *case_start = str;
while(c != '\n') {
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
state = json_tokener_state_eatws; state = json_tokener_state_eatws;
} else {
printbuf_memappend(tok->pb, &c, 1);
} }
break; break;
case json_tokener_state_comment_end: case json_tokener_state_comment_end:
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
if(c == '/') { if(c == '/') {
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
state = json_tokener_state_eatws; state = json_tokener_state_eatws;
@@ -289,15 +343,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break; break;
case json_tokener_state_string: case json_tokener_state_string:
if(c == tok->quote_char) { {
current = json_object_new_string(tok->pb->buf); /* Advance until we change state */
saved_state = json_tokener_state_finish; char *case_start = str;
state = json_tokener_state_eatws; while(1) {
} else if(c == '\\') { if(c == tok->quote_char) {
saved_state = json_tokener_state_string; printbuf_memappend_fast(tok->pb, case_start, str-case_start);
state = json_tokener_state_string_escape; current = json_object_new_string(tok->pb->buf);
} else { saved_state = json_tokener_state_finish;
printbuf_memappend(tok->pb, &c, 1); state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_string;
state = json_tokener_state_string_escape;
break;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
} }
break; break;
@@ -306,17 +372,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
case '"': case '"':
case '\\': case '\\':
case '/': case '/':
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
state = saved_state; state = saved_state;
break; break;
case 'b': case 'b':
case 'n': case 'n':
case 'r': case 'r':
case 't': case 't':
if(c == 'b') printbuf_memappend(tok->pb, "\b", 1); if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1); else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1); else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
else if(c == 't') printbuf_memappend(tok->pb, "\t", 1); else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
state = saved_state; state = saved_state;
break; break;
case 'u': case 'u':
@@ -331,33 +397,46 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break; break;
case json_tokener_state_escape_unicode: case json_tokener_state_escape_unicode:
if(strchr(json_hex_chars, c)) { /* Note that the following code is inefficient for handling large
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4)); * chunks of extended chars, calling printbuf_memappend() once
if(tok->st_pos == 4) { * for each multi-byte character of input.
unsigned char utf_out[3]; * This is a good area for future optimization.
if (tok->ucs_char < 0x80) { */
utf_out[0] = tok->ucs_char; {
printbuf_memappend(tok->pb, (char*)utf_out, 1); /* Advance until we change state */
} else if (tok->ucs_char < 0x800) { while(1) {
utf_out[0] = 0xc0 | (tok->ucs_char >> 6); if(strchr(json_hex_chars, c)) {
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f); tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
printbuf_memappend(tok->pb, (char*)utf_out, 2); if(tok->st_pos == 4) {
} else { unsigned char utf_out[3];
utf_out[0] = 0xe0 | (tok->ucs_char >> 12); if (tok->ucs_char < 0x80) {
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); utf_out[0] = tok->ucs_char;
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f); printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
printbuf_memappend(tok->pb, (char*)utf_out, 3); } else if (tok->ucs_char < 0x800) {
} utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
state = saved_state; utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
} else {
utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
}
state = saved_state;
break;
}
} else {
tok->err = json_tokener_error_parse_string;
goto out;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
goto out;
} }
} else {
tok->err = json_tokener_error_parse_string;
goto out;
} }
break; break;
case json_tokener_state_boolean: case json_tokener_state_boolean:
printbuf_memappend(tok->pb, &c, 1); printbuf_memappend_fast(tok->pb, &c, 1);
if(strncasecmp(json_true_str, tok->pb->buf, if(strncasecmp(json_true_str, tok->pb->buf,
min(tok->st_pos+1, strlen(json_true_str))) == 0) { min(tok->st_pos+1, strlen(json_true_str))) == 0) {
if(tok->st_pos == strlen(json_true_str)) { if(tok->st_pos == strlen(json_true_str)) {
@@ -382,23 +461,35 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break; break;
case json_tokener_state_number: case json_tokener_state_number:
if(c && strchr(json_number_chars, c)) { {
printbuf_memappend(tok->pb, &c, 1); /* Advance until we change state */
if(c == '.' || c == 'e' || c == 'E') tok->is_double = 1; char *case_start = str;
} else { int case_len=0;
int numi; while(c && strchr(json_number_chars, c)) {
double numd; ++case_len;
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) { if(c == '.' || c == 'e') tok->is_double = 1;
current = json_object_new_int(numi); if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) { printbuf_memappend_fast(tok->pb, case_start, case_len);
current = json_object_new_double(numd); goto out;
} else { }
tok->err = json_tokener_error_parse_number;
goto out;
} }
saved_state = json_tokener_state_finish; if (case_len>0)
state = json_tokener_state_eatws; printbuf_memappend_fast(tok->pb, case_start, case_len);
goto redo_char; }
{
int numi;
double numd;
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
current = json_object_new_int(numi);
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
current = json_object_new_double(numd);
} else {
tok->err = json_tokener_error_parse_number;
goto out;
}
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
goto redo_char;
} }
break; break;
@@ -452,15 +543,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break; break;
case json_tokener_state_object_field: case json_tokener_state_object_field:
if(c == tok->quote_char) { {
obj_field_name = strdup(tok->pb->buf); /* Advance until we change state */
saved_state = json_tokener_state_object_field_end; char *case_start = str;
state = json_tokener_state_eatws; while(1) {
} else if(c == '\\') { if(c == tok->quote_char) {
saved_state = json_tokener_state_object_field; printbuf_memappend_fast(tok->pb, case_start, str-case_start);
state = json_tokener_state_string_escape; obj_field_name = strdup(tok->pb->buf);
} else { saved_state = json_tokener_state_object_field_end;
printbuf_memappend(tok->pb, &c, 1); state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_object_field;
state = json_tokener_state_string_escape;
break;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
} }
break; break;
@@ -506,15 +609,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break; break;
} }
str++; if (!ADVANCE_CHAR(str, tok))
tok->char_offset++; goto out;
} while(c); } /* while(POP_CHAR) */
if(state != json_tokener_state_finish &&
saved_state != json_tokener_state_finish)
tok->err = json_tokener_error_parse_eof;
out: out:
if (!c) { /* We hit an eof char (0) */
if(state != json_tokener_state_finish &&
saved_state != json_tokener_state_finish)
tok->err = json_tokener_error_parse_eof;
}
if(tok->err == json_tokener_success) return json_object_get(current); if(tok->err == json_tokener_success) return json_object_get(current);
MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
json_tokener_errors[tok->err], tok->char_offset); json_tokener_errors[tok->err], tok->char_offset);

View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify * This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details. * it under the terms of the MIT license. See COPYING for details.
* *
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/ */
#include "config.h" #include "config.h"
@@ -118,16 +122,15 @@ int sprintbuf(struct printbuf *p, const char *msg, ...)
if output is truncated whereas some return the number of bytes that if output is truncated whereas some return the number of bytes that
would have been writen - this code handles both cases. */ would have been writen - this code handles both cases. */
if(size == -1 || size > 127) { if(size == -1 || size > 127) {
int ret;
va_start(ap, msg); va_start(ap, msg);
size = vasprintf(&t, msg, ap); if((size = vasprintf(&t, msg, ap)) == -1) return -1;
va_end(ap); va_end(ap);
if(size == -1) return -1; printbuf_memappend(p, t, size);
ret = printbuf_memappend(p, t, size);
free(t); free(t);
return ret; return size;
} else { } else {
return printbuf_memappend(p, buf, size); printbuf_memappend(p, buf, size);
return size;
} }
} }

View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify * This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details. * it under the terms of the MIT license. See COPYING for details.
* *
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/ */
#ifndef _printbuf_h_ #ifndef _printbuf_h_
@@ -27,9 +31,23 @@ struct printbuf {
extern struct printbuf* extern struct printbuf*
printbuf_new(void); printbuf_new(void);
/* As an optimization, printbuf_memappend is defined as a macro that
* handles copying data if the buffer is large enough; otherwise it
* invokes printbuf_memappend_real() which performs the heavy lifting
* of realloc()ing the buffer and copying data.
*/
extern int extern int
printbuf_memappend(struct printbuf *p, const char *buf, int size); printbuf_memappend(struct printbuf *p, const char *buf, int size);
#define printbuf_memappend_fast(p, bufptr, bufsize) \
do { \
if ((p->size - p->bpos) > bufsize) { \
memcpy(p->buf + p->bpos, (bufptr), bufsize); \
p->bpos += bufsize; \
p->buf[p->bpos]= '\0'; \
} else { printbuf_memappend(p, (bufptr), bufsize); } \
} while (0)
extern int extern int
sprintbuf(struct printbuf *p, const char *msg, ...); sprintbuf(struct printbuf *p, const char *msg, ...);