Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ releases.
* prism 1.9.0
* 1.7.0 to [v1.8.0][prism-v1.8.0], [v1.8.1][prism-v1.8.1], [v1.9.0][prism-v1.9.0]
* psych 5.4.0
* 5.3.1 to [v5.4.0][psych-v5.4.0]
* resolv 0.7.1
* 0.7.0 to [v0.7.1][resolv-v0.7.1]
* stringio 3.2.1.dev
Expand Down Expand Up @@ -250,6 +251,7 @@ A lot of work has gone into making Ractors more stable, performant, and usable.
[prism-v1.8.0]: https://github.com/ruby/prism/releases/tag/v1.8.0
[prism-v1.8.1]: https://github.com/ruby/prism/releases/tag/v1.8.1
[prism-v1.9.0]: https://github.com/ruby/prism/releases/tag/v1.9.0
[psych-v5.4.0]: https://github.com/ruby/psych/releases/tag/v5.4.0
[resolv-v0.7.1]: https://github.com/ruby/resolv/releases/tag/v0.7.1
[strscan-v3.1.7]: https://github.com/ruby/strscan/releases/tag/v3.1.7
[strscan-v3.1.8]: https://github.com/ruby/strscan/releases/tag/v3.1.8
Expand Down
26 changes: 26 additions & 0 deletions ext/json/lib/json/ext.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,31 @@ def parse
end
end

if defined?(ResumableParser) # Not yet available on JRuby
class ResumableParser
# Returns whether the parser is entirely done: no unconsumed bytes in
# the buffer, no document under construction and no parsed value
# awaiting retrieval.
#
# The main use case is detecting a truncated stream once the input is
# exhausted:
#
# loop do
# begin
# parser << socket.readpartial(4096)
# rescue EOFError
# break
# end
# while parser.parse
# process(parser.value)
# end
# end
# warn "stream was truncated" unless parser.empty?
def empty?
eos? && !partial_value? && !value?
end
end
end

JSON_LOADED = true unless defined?(JSON::JSON_LOADED)
end
49 changes: 46 additions & 3 deletions ext/json/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -779,11 +779,18 @@ json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)

switch (peek(state)) {
case '/': {
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
if (!state->cursor) {
const char *newline = memchr(state->cursor, '\n', state->end - state->cursor);
if (!newline) {
// state->parser marks resumable mode, where the buffer end is only a
// chunk boundary: the terminating newline may still arrive, so leave
// the comment unterminated instead of consuming to end as a one-shot
// parse would.
if (state->parser) {
raise_eos_error_at("unterminated comment, expected end of line", state, start);
}
state->cursor = state->end;
} else {
state->cursor++;
state->cursor = newline + 1;
}
break;
}
Expand Down Expand Up @@ -2737,6 +2744,41 @@ static VALUE cResumableParser_eos_p(VALUE self)
return eos(&parser->state) ? Qtrue : Qfalse;
}

/*
* call-seq: partial_value? -> true or false
*
* Returns whether a document is currently under construction: an unclosed
* container, a key awaiting its value, etc.
*
* It answers the same question as <tt>!partial_value.nil?</tt>, but as a
* cheap predicate on the parser's internal state, without materializing the
* partially parsed Ruby objects:
* parser << '{"a":1,'
* parser.parse # => false
* parser.partial_value? # => true
*
* A fully parsed document whose value hasn't been retrieved yet is not under
* construction: #value? returns true and #partial_value? returns false.
*/
static VALUE cResumableParser_partial_value_p(VALUE self)
{
JSON_ResumableParser *parser = cResumableParser_get(self);

// Mirror of #value?: values on the stack while the document isn't DONE
// belong to a partially built document. A container whose first key or
// element hasn't been parsed yet has no frame nor value registered (the
// tokenizer rewinds to the container start on EOS), so that state is
// observable through the buffer (#eos?/#rest) instead, keeping this
// predicate consistent with #partial_value returning nil.
if (parser->value_stack.head > 0) {
json_frame *frame = json_frame_stack_peek(&parser->frames);
if (frame->phase != JSON_PHASE_DONE) {
return Qtrue;
}
}
return Qfalse;
}

/*
* call-seq: parsed_bytes -> integer
*
Expand Down Expand Up @@ -2793,6 +2835,7 @@ void Init_parser(void)
rb_define_method(cResumableParser, "value", cResumableParser_value, 0);
rb_define_method(cResumableParser, "value?", cResumableParser_value_p, 0);
rb_define_method(cResumableParser, "partial_value", cResumableParser_partial_value, 0);
rb_define_method(cResumableParser, "partial_value?", cResumableParser_partial_value_p, 0);
rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0);
rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0);
rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0);
Expand Down
7 changes: 3 additions & 4 deletions set.c
Original file line number Diff line number Diff line change
Expand Up @@ -2411,11 +2411,10 @@ rb_set_size(VALUE set)
* === Methods for Creating a \Set
*
* - ::[]:
* Returns a new set containing the given objects.
* Returns a new set populated with the given objects.
* - ::new:
* Returns a new set containing either the given objects
* (if no block given) or the return values from the called block
* (if a block given).
* Returns a new set based on the given object (if no block given),
* or on the return values from the called block (if a block given).
*
* === Methods for \Set Operations
*
Expand Down
1 change: 1 addition & 0 deletions test/json/json_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ def test_parse_comments
JSON
assert_equal({ "key1" => "value1" }, parse(json, allow_comments: true))
assert_equal({}, parse('{} /**/', allow_comments: true))
assert_equal({}, parse('{} // eol comment ending at eof', allow_comments: true))
assert_raise(ParserError) { parse('{} /* comment not closed', allow_comments: true) }
assert_raise(ParserError) { parse('{} /*/', allow_comments: true) }
assert_raise(ParserError) { parse('{} /x wrong comment', allow_comments: true) }
Expand Down
125 changes: 125 additions & 0 deletions test/json/resumable_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,48 @@ def test_incomplete_input_at_structural_positions_resumes
assert_incomplete "{\"a\":1,"
end

def test_line_comment_spanning_feed_boundary_is_not_terminated_early
# A `//` line comment is only terminated by a newline. When the newline
# has not arrived yet, the comment must stay incomplete rather than being
# treated as consumed -- otherwise its body, delivered in a later chunk,
# leaks out as parsed values.
values = []
parser = new_parser(allow_comments: true)
parser << '[1] //'
values << parser.value while parser.parse

parser << "[2]\n[3]" # [2] belongs to the comment, [3] is a real document
values << parser.value while parser.parse

assert_equal [[1], [3]], values
end

def test_line_comment_terminated_by_newline_across_feeds
values = []
parser = new_parser(allow_comments: true)
parser << '[1] //co'
values << parser.value while parser.parse

parser << "mment\n[2]"
values << parser.value while parser.parse

assert_equal [[1], [2]], values
end

def test_block_comment_spanning_feed_boundary_is_not_terminated_early
# A `/* */` block comment whose closing `*/` has not arrived yet must stay
# incomplete, mirroring the line-comment behaviour above.
values = []
parser = new_parser(allow_comments: true)
parser << '[1] /*'
values << parser.value while parser.parse

parser << '[2]*/[3]' # [2] belongs to the comment, [3] is a real document
values << parser.value while parser.parse

assert_equal [[1], [3]], values
end

def test_rest
@parser << '[1, 2, 3, "unterminated string'
refute @parser.parse
Expand Down Expand Up @@ -239,6 +281,89 @@ def test_eos
assert_predicate @parser, :eos?
end

def test_empty_predicate
# empty? is defined on the state left after parsing everything that
# could be parsed from the fed bytes, so drain with parse/value first.
{
'' => true, # nothing fed: vacuously empty
'{"a":1}' => true,
'{"a":1}{"b":2}' => true,
'{"a":1} ' => true, # trailing whitespace
'{"a":1}{"b":2' => false, # inside a number token
'{"a":1}{"b":' => false, # right after a colon (token boundary)
'{"a":1}{' => false, # right after an object open
'{"a":1,' => false, # right after a comma (token boundary)
'"abc' => false, # inside a string token
'[1,2' => false, # unclosed array
}.each do |json, expected|
parser = new_parser
parser << json
parser.value while parser.parse
assert_equal expected, parser.empty?, "expected #{json.inspect} to be empty? == #{expected}"
end
end

def test_empty_predicate_with_undrained_buffer
@parser << '{"a":1}{"b":2}'
assert @parser.parse
refute_predicate @parser, :empty? # second document still in the buffer
assert_equal({ "a" => 1 }, @parser.value)
assert @parser.parse
assert_equal({ "b" => 2 }, @parser.value)
assert_predicate @parser, :empty?
end

def test_empty_predicate_with_pending_value
# A fully parsed document awaiting retrieval with #value is not empty.
@parser << '{"a":1}'
assert @parser.parse
refute_predicate @parser, :empty?
assert_equal({ "a" => 1 }, @parser.value)
assert_predicate @parser, :empty?
end

def test_empty_predicate_across_feeds
@parser << '{"a' # chunk boundary inside a string literal
refute @parser.parse
refute_predicate @parser, :empty?

@parser << '":1'
refute @parser.parse
refute_predicate @parser, :empty?

@parser << '}'
assert @parser.parse
refute_predicate @parser, :empty? # value not retrieved yet
assert_equal({ "a" => 1 }, @parser.value)
assert_predicate @parser, :empty?
end

def test_partial_value_predicate
{
'' => false,
'{"a":1}' => false,
'{"a":1}{"b":2}' => false,
'{"a":1} ' => false,
'{"a":1}{"b":2' => true, # inside a number token
'{"a":1}{"b":' => true, # right after a colon (token boundary)
# The tokenizer rewinds to the token start on EOS, so nothing is
# registered yet for a lone '{' or an unterminated top-level string:
# partial_value returns nil and partial_value? agrees. The truncation
# is still observable through the buffer: eos? is false, rest isn't
# empty.
'{"a":1}{' => false, # right after an object open
'"abc' => false, # inside a string token
'{"a":1,' => true, # right after a comma (token boundary)
'[1,2' => true, # unclosed array
}.each do |json, expected|
parser = new_parser
parser << json
parser.value while parser.parse
assert_equal expected, parser.partial_value?, "expected #{json.inspect} to be partial_value? == #{expected}"
assert_equal !parser.partial_value.nil?, parser.partial_value?, "partial_value?/partial_value mismatch for #{json.inspect}"
end
end

def test_partial_value
assert_nil @parser.partial_value
assert_partial_value [1, 2, 3], '[1, 2, 3, "unterminated string'
Expand Down