From 3b215082cd4e689facc8893fb0cc4c38b6de44fc Mon Sep 17 00:00:00 2001 From: Soroush Safari Loaliyan Date: Wed, 24 Jun 2026 11:37:32 -0700 Subject: [PATCH] Add bounds checks to JSON Lines array iterator --- cujson/query/query_iterator.cpp | 123 ++++++++++++++++-- paper_reproduced/src/query/query_iterator.cpp | 104 +++++++++++++-- 2 files changed, 202 insertions(+), 25 deletions(-) diff --git a/cujson/query/query_iterator.cpp b/cujson/query/query_iterator.cpp index 3091921..656ac76 100644 --- a/cujson/query/query_iterator.cpp +++ b/cujson/query/query_iterator.cpp @@ -126,14 +126,37 @@ void cuJSONLinesIterator::freeJson(){ } char cuJSONLinesIterator::getChar(int idx){ - if( inputJSON[structural[idx] - 1] == '\n' ) return ','; + // Fixed: Check if idx is within bounds and structural/inputJSON are not NULL + // Also, make sure pos is within the bounds of the inputJSON array + if(idx < 0 || idx >= totalResultSize || structural == NULL || inputJSON == NULL){ + return '\0'; + } else if (idx == totalResultSize - 1) return ']'; else if (idx == 0) return '['; - else return inputJSON[structural[idx] - 1]; + + int pos = structural[idx] - 1; + if(pos < 0 || pos >= fileSize){ + return '\0'; + } + else if(inputJSON[pos] == '\n'){ + return ','; + } + else return inputJSON[pos]; } int cuJSONLinesIterator::jumpOpeningForward(int idx){ - return pair_pos[idx]; + // Fixed: Check if idx is within bounds and pair_pos is not NULL + if(idx < 0 || idx >= totalResultSize || pair_pos == NULL){ + return -1; + } + + int pairNode = pair_pos[idx]; + // Fixed: Check if pairNode is within bounds + if(pairNode < 0 || pairNode >= totalResultSize){ + return -1; + } + + return pairNode; } int cuJSONLinesIterator::jumpSpacesForward(int pos){ @@ -231,33 +254,94 @@ void cuJSONLinesIterator::reset(){ } int cuJSONLinesIterator::gotoArrayIndex(int index){ + // Fixed: Check if index is valid and structural/inputJSON are not NULL + if(index < 0){ + return 0; + } + + if(totalResultSize <= 0 || structural == NULL || pair_pos == NULL || inputJSON == NULL){ + return 0; + } + + if(node < 0 || node >= totalResultSize){ + return 0; + } + int total = index + 1; // total number of index that we have to go forward to get the requested index [started from 0] // +1 is for handling indexes [1,2,3,...], user will use [0,1,2,...] - - char currentNodeChar = getChar(node); - // cout << "currNodeChar: " << currentNodeChar <= totalResultSize){ + return 0; + } + + startNode++; + currentNodeChar = getChar(startNode); + // Fixed: Check if currentNodeChar is valid + if(currentNodeChar == '\0'){ + return 0; + } + } + // next node - int nextNode = node+1; + nextNode = startNode + 1; + // Fixed: Check if nextNode is within bounds + if(nextNode < 0 || nextNode >= totalResultSize){ + return 0; + } char nextNodeChar = getChar(nextNode); - + // Fixed: Check if nextNodeChar is valid + if(nextNodeChar == '\0'){ + return 0; + } // cout << "nextNodeChar: " << nextNodeChar <= totalResultSize){ + return 0; + } // cout << "nxt->" << nextNodeChar <= totalResultSize){ + return 0; + } + nextNode = pairNode; } if(nextNodeChar == ',' || nextNodeChar == '\n' || nextNodeChar == ' '){ // no need for \n - currArrayNode = nextNode; // save the current array node + newCurrArrayNode = nextNode; // save the current array node total--; // go one node forward } nextNode++; + // Fixed: Check if nextNode is within bounds + if(nextNode < 0 || nextNode >= totalResultSize){ + return 0; + } nextNodeChar = getChar(nextNode); + // Fixed: Check if nextNodeChar is valid + if(nextNodeChar == '\0'){ + return 0; + } } // cout << "total: " << total << endl; @@ -269,7 +353,20 @@ int cuJSONLinesIterator::gotoArrayIndex(int index){ // that means we achieve to requested index if(total == 1){ // cout << "curr node in total == 1 --> " << getChar(nextNode) <= totalResultSize){ + return 0; + } + /* + * Reject empty-array access, for example gotoArrayIndex(0) on []. + */ + if(nextNodeChar == ']' && targetNode == startNode){ + return 0; + } + node = targetNode; // change the node pointer iterator to the nextNode pointer + currArrayNode = newCurrArrayNode; + // cout << "node - 2: " << node << endl; // currentNodeChar = getChar(node); // cout << "currentNodeChar-2: " << currentNodeChar <= totalResultSize || structural == NULL || inputJSON == NULL){ + return '\0'; + } else if (idx == totalResultSize - 1) return ']'; else if (idx == 0) return '['; - else return inputJSON[structural[idx] - 1]; + + int pos = structural[idx] - 1; + if(pos < 0 || pos >= fileSize){ + return '\0'; + } + + else if(inputJSON[pos] == '\n'){ + return ','; + } + else return inputJSON[pos]; } int structural_iterator::jumpOpeningForward(int idx){ - return pair_pos[idx]; -} + if(idx < 0 || idx >= totalResultSize || pair_pos == NULL){ + return -1; + } + int pairNode = pair_pos[idx]; + if(pairNode < 0 || pairNode >= totalResultSize){ + return -1; + } + + return pairNode;} int structural_iterator::jumpSpacesForward(int pos){ int current_pos = pos; @@ -267,37 +285,99 @@ void structural_iterator::reset(){ } int structural_iterator::gotoArrayIndex(int index){ + if(index < 0){ + return 0; + } + + if(totalResultSize <= 0 || structural == NULL || pair_pos == NULL || inputJSON == NULL){ + return 0; + } + + if(node < 0 || node >= totalResultSize){ + return 0; + } + int total = index + 1; // total number of index that we have to go forward to get the requested index [started from 0] // +1 is for handling indexes [1,2,3,...], user will use [0,1,2,...] - - char currentNodeChar = getChar(node); + int startNode = node; + int nextNode; + + char currentNodeChar = getChar(startNode); + if(currentNodeChar == '\0'){ + return 0; + } // cout << "currNodeChar: " << currentNodeChar <= totalResultSize){ + return 0; + } + + startNode++; + currentNodeChar = getChar(startNode); + if(currentNodeChar == '\0'){ + return 0; + } + } // next node - int nextNode = node+1; + nextNode = startNode + 1; + if(nextNode < 0 || nextNode >= totalResultSize){ + return 0; + } + char nextNodeChar = getChar(nextNode); + if(nextNodeChar == '\0'){ + return 0; + } // cout << "nextNodeChar: " << nextNodeChar <" << nextNodeChar <" << nextNodeChar <= totalResultSize){ + return 0; + } + if(nextNodeChar == '[' || nextNodeChar == '{'){ - // cout << "nextNodeChar: " << nextNodeChar << endl; - nextNode = jumpOpeningForward(nextNode); + int pairNode = jumpOpeningForward(nextNode); + if(pairNode <= nextNode || pairNode >= totalResultSize){ + return 0; + } + + nextNode = pairNode; } if(nextNodeChar == ',' || nextNodeChar == '\n'){ // no need for \n total--; // go one node forward } nextNode++; + if(nextNode < 0 || nextNode >= totalResultSize){ + return 0; + } nextNodeChar = getChar(nextNode); + if(nextNodeChar == '\0'){ + return 0; + } } // that means we achieve to requested index if(total == 1){ + int targetNode = nextNode - 1; + if(targetNode < 0 || targetNode >= totalResultSize){ + return 0; + } + /* + * Reject empty-array access, for example gotoArrayIndex(0) on []. + */ + if(nextNodeChar == ']' && targetNode == startNode){ + return 0; + } // cout << "curr node in total == 1 --> " << getChar(nextNode) <