optimizations to json_tokener_parse_ex(), printbuf_memappend()
-- Brent Miller, bdmiller at yahoo dash inc dot com git-svn-id: http://svn.metaparadigm.com/svn/json-c/trunk@34 327403b1-1117-474d-bef2-5cb71233fd97
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
0.9
|
||||
* optimizations to json_tokener_parse_ex(), printbuf_memappend()
|
||||
Brent Miller, bdmiller at yahoo dash inc dot com
|
||||
* Don't use this as a variable, so we can compile with a C++ compiler
|
||||
* Add casts from void* to type of assignment when using malloc
|
||||
* Add #ifdef __cplusplus guards to all of the headers
|
||||
|
||||
289
json_tokener.c
289
json_tokener.c
@@ -7,6 +7,10 @@
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See COPYING for details.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
|
||||
* The copyrights to the contents of this file are licensed under the MIT License
|
||||
* (http://www.opensource.org/licenses/mit-license.php)
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
@@ -135,35 +139,68 @@ char* strndup(const char* str, size_t n)
|
||||
#define current tok->stack[tok->depth].current
|
||||
#define obj_field_name tok->stack[tok->depth].obj_field_name
|
||||
|
||||
/* Optimization:
|
||||
* json_tokener_parse_ex() consumed a lot of CPU in its main loop,
|
||||
* iterating character-by character. A large performance boost is
|
||||
* achieved by using tighter loops to locally handle units such as
|
||||
* comments and strings. Loops that handle an entire token within
|
||||
* their scope also gather entire strings and pass them to
|
||||
* printbuf_memappend() in a single call, rather than calling
|
||||
* printbuf_memappend() one char at a time.
|
||||
*
|
||||
* POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
|
||||
* common to both the main loop and the tighter loops.
|
||||
*/
|
||||
|
||||
/* POP_CHAR(dest, tok) macro:
|
||||
* Not really a pop()...peeks at the current char and stores it in dest.
|
||||
* Returns 1 on success, sets tok->err and returns 0 if no more chars.
|
||||
* Implicit inputs: str, len vars
|
||||
*/
|
||||
#define POP_CHAR(dest, tok) \
|
||||
(((tok)->char_offset == len) ? \
|
||||
(((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
|
||||
(((tok)->err = json_tokener_success), 0) \
|
||||
: \
|
||||
(((tok)->err = json_tokener_continue), 0) \
|
||||
) : \
|
||||
(((dest) = *str), 1) \
|
||||
)
|
||||
|
||||
/* ADVANCE_CHAR() macro:
|
||||
* Incrementes str & tok->char_offset.
|
||||
* For convenience of existing conditionals, returns the old value of c (0 on eof)
|
||||
* Implicit inputs: c var
|
||||
*/
|
||||
#define ADVANCE_CHAR(str, tok) \
|
||||
( ++(str), ((tok)->char_offset)++, c)
|
||||
|
||||
/* End optimization macro defs */
|
||||
|
||||
|
||||
struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
char *str, int len)
|
||||
{
|
||||
struct json_object *obj = NULL;
|
||||
char c;
|
||||
char c = '\1';
|
||||
|
||||
tok->char_offset = 0;
|
||||
tok->err = json_tokener_success;
|
||||
|
||||
do {
|
||||
if(tok->char_offset == len) {
|
||||
if(tok->depth == 0 && state == json_tokener_state_eatws &&
|
||||
saved_state == json_tokener_state_finish)
|
||||
tok->err = json_tokener_success;
|
||||
else
|
||||
tok->err = json_tokener_continue;
|
||||
goto out;
|
||||
}
|
||||
while (POP_CHAR(c, tok)) {
|
||||
|
||||
c = *str;
|
||||
redo_char:
|
||||
switch(state) {
|
||||
|
||||
case json_tokener_state_eatws:
|
||||
if(isspace(c)) {
|
||||
/* okay */
|
||||
} else if(c == '/') {
|
||||
/* Advance until we change state */
|
||||
while (isspace(c)) {
|
||||
if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
|
||||
goto out;
|
||||
}
|
||||
if(c == '/') {
|
||||
printbuf_reset(tok->pb);
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
state = json_tokener_state_comment_start;
|
||||
} else {
|
||||
state = saved_state;
|
||||
@@ -236,7 +273,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
goto redo_char;
|
||||
|
||||
case json_tokener_state_null:
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
if(strncasecmp(json_null_str, tok->pb->buf,
|
||||
min(tok->st_pos+1, strlen(json_null_str))) == 0) {
|
||||
if(tok->st_pos == strlen(json_null_str)) {
|
||||
@@ -261,25 +298,42 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
tok->err = json_tokener_error_parse_comment;
|
||||
goto out;
|
||||
}
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
break;
|
||||
|
||||
case json_tokener_state_comment:
|
||||
if(c == '*') state = json_tokener_state_comment_end;
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
break;
|
||||
{
|
||||
/* Advance until we change state */
|
||||
char *case_start = str;
|
||||
while(c != '*') {
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
|
||||
state = json_tokener_state_comment_end;
|
||||
}
|
||||
break;
|
||||
|
||||
case json_tokener_state_comment_eol:
|
||||
if(c == '\n') {
|
||||
{
|
||||
/* Advance until we change state */
|
||||
char *case_start = str;
|
||||
while(c != '\n') {
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
|
||||
state = json_tokener_state_eatws;
|
||||
} else {
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case json_tokener_state_comment_end:
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
if(c == '/') {
|
||||
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
|
||||
state = json_tokener_state_eatws;
|
||||
@@ -289,15 +343,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
break;
|
||||
|
||||
case json_tokener_state_string:
|
||||
if(c == tok->quote_char) {
|
||||
current = json_object_new_string(tok->pb->buf);
|
||||
saved_state = json_tokener_state_finish;
|
||||
state = json_tokener_state_eatws;
|
||||
} else if(c == '\\') {
|
||||
saved_state = json_tokener_state_string;
|
||||
state = json_tokener_state_string_escape;
|
||||
} else {
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
{
|
||||
/* Advance until we change state */
|
||||
char *case_start = str;
|
||||
while(1) {
|
||||
if(c == tok->quote_char) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
current = json_object_new_string(tok->pb->buf);
|
||||
saved_state = json_tokener_state_finish;
|
||||
state = json_tokener_state_eatws;
|
||||
break;
|
||||
} else if(c == '\\') {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
saved_state = json_tokener_state_string;
|
||||
state = json_tokener_state_string_escape;
|
||||
break;
|
||||
}
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -306,17 +372,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
case '"':
|
||||
case '\\':
|
||||
case '/':
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
state = saved_state;
|
||||
break;
|
||||
case 'b':
|
||||
case 'n':
|
||||
case 'r':
|
||||
case 't':
|
||||
if(c == 'b') printbuf_memappend(tok->pb, "\b", 1);
|
||||
else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1);
|
||||
else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1);
|
||||
else if(c == 't') printbuf_memappend(tok->pb, "\t", 1);
|
||||
if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
|
||||
else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
|
||||
else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
|
||||
else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
|
||||
state = saved_state;
|
||||
break;
|
||||
case 'u':
|
||||
@@ -331,33 +397,46 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
break;
|
||||
|
||||
case json_tokener_state_escape_unicode:
|
||||
if(strchr(json_hex_chars, c)) {
|
||||
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
|
||||
if(tok->st_pos == 4) {
|
||||
unsigned char utf_out[3];
|
||||
if (tok->ucs_char < 0x80) {
|
||||
utf_out[0] = tok->ucs_char;
|
||||
printbuf_memappend(tok->pb, (char*)utf_out, 1);
|
||||
} else if (tok->ucs_char < 0x800) {
|
||||
utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
|
||||
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
|
||||
printbuf_memappend(tok->pb, (char*)utf_out, 2);
|
||||
} else {
|
||||
utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
|
||||
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
|
||||
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
|
||||
printbuf_memappend(tok->pb, (char*)utf_out, 3);
|
||||
}
|
||||
state = saved_state;
|
||||
/* Note that the following code is inefficient for handling large
|
||||
* chunks of extended chars, calling printbuf_memappend() once
|
||||
* for each multi-byte character of input.
|
||||
* This is a good area for future optimization.
|
||||
*/
|
||||
{
|
||||
/* Advance until we change state */
|
||||
while(1) {
|
||||
if(strchr(json_hex_chars, c)) {
|
||||
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
|
||||
if(tok->st_pos == 4) {
|
||||
unsigned char utf_out[3];
|
||||
if (tok->ucs_char < 0x80) {
|
||||
utf_out[0] = tok->ucs_char;
|
||||
printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
|
||||
} else if (tok->ucs_char < 0x800) {
|
||||
utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
|
||||
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
|
||||
printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
|
||||
} else {
|
||||
utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
|
||||
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
|
||||
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
|
||||
printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
|
||||
}
|
||||
state = saved_state;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
tok->err = json_tokener_error_parse_string;
|
||||
goto out;
|
||||
}
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
tok->err = json_tokener_error_parse_string;
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
|
||||
case json_tokener_state_boolean:
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
printbuf_memappend_fast(tok->pb, &c, 1);
|
||||
if(strncasecmp(json_true_str, tok->pb->buf,
|
||||
min(tok->st_pos+1, strlen(json_true_str))) == 0) {
|
||||
if(tok->st_pos == strlen(json_true_str)) {
|
||||
@@ -382,23 +461,35 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
break;
|
||||
|
||||
case json_tokener_state_number:
|
||||
if(c && strchr(json_number_chars, c)) {
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
if(c == '.' || c == 'e' || c == 'E') tok->is_double = 1;
|
||||
} else {
|
||||
int numi;
|
||||
double numd;
|
||||
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
|
||||
current = json_object_new_int(numi);
|
||||
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
|
||||
current = json_object_new_double(numd);
|
||||
} else {
|
||||
tok->err = json_tokener_error_parse_number;
|
||||
goto out;
|
||||
{
|
||||
/* Advance until we change state */
|
||||
char *case_start = str;
|
||||
int case_len=0;
|
||||
while(c && strchr(json_number_chars, c)) {
|
||||
++case_len;
|
||||
if(c == '.' || c == 'e') tok->is_double = 1;
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, case_len);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
saved_state = json_tokener_state_finish;
|
||||
state = json_tokener_state_eatws;
|
||||
goto redo_char;
|
||||
if (case_len>0)
|
||||
printbuf_memappend_fast(tok->pb, case_start, case_len);
|
||||
}
|
||||
{
|
||||
int numi;
|
||||
double numd;
|
||||
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
|
||||
current = json_object_new_int(numi);
|
||||
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
|
||||
current = json_object_new_double(numd);
|
||||
} else {
|
||||
tok->err = json_tokener_error_parse_number;
|
||||
goto out;
|
||||
}
|
||||
saved_state = json_tokener_state_finish;
|
||||
state = json_tokener_state_eatws;
|
||||
goto redo_char;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -452,15 +543,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
break;
|
||||
|
||||
case json_tokener_state_object_field:
|
||||
if(c == tok->quote_char) {
|
||||
obj_field_name = strdup(tok->pb->buf);
|
||||
saved_state = json_tokener_state_object_field_end;
|
||||
state = json_tokener_state_eatws;
|
||||
} else if(c == '\\') {
|
||||
saved_state = json_tokener_state_object_field;
|
||||
state = json_tokener_state_string_escape;
|
||||
} else {
|
||||
printbuf_memappend(tok->pb, &c, 1);
|
||||
{
|
||||
/* Advance until we change state */
|
||||
char *case_start = str;
|
||||
while(1) {
|
||||
if(c == tok->quote_char) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
obj_field_name = strdup(tok->pb->buf);
|
||||
saved_state = json_tokener_state_object_field_end;
|
||||
state = json_tokener_state_eatws;
|
||||
break;
|
||||
} else if(c == '\\') {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
saved_state = json_tokener_state_object_field;
|
||||
state = json_tokener_state_string_escape;
|
||||
break;
|
||||
}
|
||||
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
|
||||
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -506,15 +609,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
||||
break;
|
||||
|
||||
}
|
||||
str++;
|
||||
tok->char_offset++;
|
||||
} while(c);
|
||||
|
||||
if(state != json_tokener_state_finish &&
|
||||
saved_state != json_tokener_state_finish)
|
||||
tok->err = json_tokener_error_parse_eof;
|
||||
if (!ADVANCE_CHAR(str, tok))
|
||||
goto out;
|
||||
} /* while(POP_CHAR) */
|
||||
|
||||
out:
|
||||
if (!c) { /* We hit an eof char (0) */
|
||||
if(state != json_tokener_state_finish &&
|
||||
saved_state != json_tokener_state_finish)
|
||||
tok->err = json_tokener_error_parse_eof;
|
||||
}
|
||||
|
||||
if(tok->err == json_tokener_success) return json_object_get(current);
|
||||
MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
|
||||
json_tokener_errors[tok->err], tok->char_offset);
|
||||
|
||||
15
printbuf.c
15
printbuf.c
@@ -7,6 +7,10 @@
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See COPYING for details.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
|
||||
* The copyrights to the contents of this file are licensed under the MIT License
|
||||
* (http://www.opensource.org/licenses/mit-license.php)
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
@@ -118,16 +122,15 @@ int sprintbuf(struct printbuf *p, const char *msg, ...)
|
||||
if output is truncated whereas some return the number of bytes that
|
||||
would have been writen - this code handles both cases. */
|
||||
if(size == -1 || size > 127) {
|
||||
int ret;
|
||||
va_start(ap, msg);
|
||||
size = vasprintf(&t, msg, ap);
|
||||
if((size = vasprintf(&t, msg, ap)) == -1) return -1;
|
||||
va_end(ap);
|
||||
if(size == -1) return -1;
|
||||
ret = printbuf_memappend(p, t, size);
|
||||
printbuf_memappend(p, t, size);
|
||||
free(t);
|
||||
return ret;
|
||||
return size;
|
||||
} else {
|
||||
return printbuf_memappend(p, buf, size);
|
||||
printbuf_memappend(p, buf, size);
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
18
printbuf.h
18
printbuf.h
@@ -7,6 +7,10 @@
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See COPYING for details.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
|
||||
* The copyrights to the contents of this file are licensed under the MIT License
|
||||
* (http://www.opensource.org/licenses/mit-license.php)
|
||||
*/
|
||||
|
||||
#ifndef _printbuf_h_
|
||||
@@ -27,9 +31,23 @@ struct printbuf {
|
||||
extern struct printbuf*
|
||||
printbuf_new(void);
|
||||
|
||||
/* As an optimization, printbuf_memappend is defined as a macro that
|
||||
* handles copying data if the buffer is large enough; otherwise it
|
||||
* invokes printbuf_memappend_real() which performs the heavy lifting
|
||||
* of realloc()ing the buffer and copying data.
|
||||
*/
|
||||
extern int
|
||||
printbuf_memappend(struct printbuf *p, const char *buf, int size);
|
||||
|
||||
#define printbuf_memappend_fast(p, bufptr, bufsize) \
|
||||
do { \
|
||||
if ((p->size - p->bpos) > bufsize) { \
|
||||
memcpy(p->buf + p->bpos, (bufptr), bufsize); \
|
||||
p->bpos += bufsize; \
|
||||
p->buf[p->bpos]= '\0'; \
|
||||
} else { printbuf_memappend(p, (bufptr), bufsize); } \
|
||||
} while (0)
|
||||
|
||||
extern int
|
||||
sprintbuf(struct printbuf *p, const char *msg, ...);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user