1use crate::error::Error;
4use crate::options::DecodeOptions;
5use crate::simd;
6use serde_json::{Map, Value};
7use std::io::{BufReader, Read};
8
9pub fn decode(input: &str, options: Option<&DecodeOptions>) -> Result<Value, Error> {
20 let default_opts = DecodeOptions::default();
21 let opts = options.unwrap_or(&default_opts);
22 let mut parser = Parser::new(input, opts);
23 parser.parse()
24}
25
26struct Parser<'a> {
27 input: &'a str,
28 pos: usize,
29 options: &'a DecodeOptions,
30}
31
32impl<'a> Parser<'a> {
33 fn new(input: &'a str, options: &'a DecodeOptions) -> Self {
34 Self {
35 input,
36 pos: 0,
37 options,
38 }
39 }
40
41 fn parse(&mut self) -> Result<Value, Error> {
42 self.skip_whitespace();
43 if self.pos >= self.input.len() {
44 return Ok(Value::Object(Map::new()));
45 }
46
47 if self.peek_char() == Some('[') {
49 self.parse_array_value()
50 } else {
51 self.parse_object()
52 }
53 }
54
55 fn parse_object(&mut self) -> Result<Value, Error> {
56 let mut map = Map::new();
57 let indent = self.options.get_indent();
58 let initial_indent = self.count_indent(indent);
59
60 loop {
61 let line_indent = self.count_indent(indent);
63 if line_indent < initial_indent {
64 break;
66 }
67
68 for _ in 0..(line_indent * indent) {
70 if self.peek_char() == Some(' ') {
71 self.advance();
72 } else {
73 break;
74 }
75 }
76
77 if self.pos >= self.input.len() {
78 break;
79 }
80 if line_indent == 0 && !map.is_empty() && initial_indent == 0 {
81 let saved_pos = self.pos;
83 let key_result = self.parse_key();
84 self.pos = saved_pos;
85 if key_result.is_err() {
86 break;
87 }
88 }
89
90 let key = self.parse_key()?;
92 self.skip_whitespace();
93
94 let has_array_notation = self.peek_char() == Some('[');
96
97 if !has_array_notation {
98 if self.peek_char() != Some(':') {
100 return Err(Error::parse(
101 self.pos,
102 format!("Expected ':' after key '{key}'"),
103 ));
104 }
105 self.advance(); self.skip_whitespace();
107 } else {
108 }
111
112 let value = if has_array_notation {
114 let value = self.parse_array_value()?;
117 if self.pos < self.input.len() && self.peek_char() == Some('\n') {
119 self.advance();
120 }
121 value
122 } else if self.peek_char() == Some('\n') {
123 self.advance(); let next_indent = self.count_indent(indent);
126 if next_indent > line_indent {
127 if self.peek_char() == Some('[') {
129 self.parse_array_value()?
130 } else {
131 self.parse_object()?
136 }
137 } else {
138 Value::Null
140 }
141 } else {
142 let value = self.parse_value_until_newline()?;
144 if self.pos < self.input.len() && self.peek_char() != Some('\n') {
146 self.skip_to_next_line();
147 } else if self.peek_char() == Some('\n') {
148 self.advance(); }
150 value
151 };
152
153 map.insert(key, value);
154
155 if self.pos >= self.input.len() {
158 break;
159 }
160
161 let next_line_indent = self.count_indent(indent);
163 if next_line_indent < initial_indent {
164 break;
165 }
166 if next_line_indent == 0 && initial_indent == 0 && !map.is_empty() {
167 let saved_pos = self.pos;
169 let key_result = self.parse_key();
170 self.pos = saved_pos;
171 if key_result.is_err() {
172 break;
173 }
174 }
175 }
176
177 Ok(Value::Object(map))
178 }
179
180 fn parse_value(&mut self) -> Result<Value, Error> {
181 self.skip_whitespace();
182 match self.peek_char() {
183 Some('[') => self.parse_array_value(),
184 Some('"') => self.parse_string(),
185 Some('-') => {
186 self.advance();
188 self.skip_whitespace();
189 self.parse_value()
190 }
191 Some(ch) if ch.is_ascii_digit() || ch == '-' => self.parse_number(),
192 Some(ch) if ch.is_ascii_alphabetic() => {
193 let start = self.pos;
195 let value = self.parse_boolean_or_null();
196 if value.is_ok() {
197 return value;
198 }
199 self.pos = start;
201 self.parse_unquoted_string()
202 }
203 _ => self.parse_unquoted_string(),
204 }
205 }
206
207 fn parse_unquoted_string(&mut self) -> Result<Value, Error> {
208 let start = self.pos;
209 while self.pos < self.input.len() {
211 match self.peek_char() {
212 Some(ch) if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' => break,
213 Some(_) => self.advance(),
214 None => break,
215 }
216 }
217 if self.pos == start {
218 return Err(Error::parse(self.pos, "Expected value"));
219 }
220 Ok(Value::String(self.input[start..self.pos].to_string()))
221 }
222
223 fn parse_value_until_newline(&mut self) -> Result<Value, Error> {
224 self.skip_whitespace();
225
226 match self.peek_char() {
228 Some('[') => {
229 self.parse_array_value()
231 }
232 Some('"') => self.parse_string(),
233 Some(ch) if ch.is_ascii_digit() || ch == '-' => self.parse_number(),
234 Some(ch) if ch.is_ascii_alphabetic() => {
235 let start_pos = self.pos;
237 let value = self.parse_boolean_or_null();
238 if value.is_ok() {
239 return value;
240 }
241 self.pos = start_pos;
243 self.parse_unquoted_string()
244 }
245 _ => self.parse_unquoted_string(),
246 }
247 }
248
249 fn parse_array_value(&mut self) -> Result<Value, Error> {
250 if self.peek_char() != Some('[') {
251 return Err(Error::parse(self.pos, "Expected '['"));
252 }
253 self.advance(); let has_length_marker = self.peek_char() == Some('#');
257 if has_length_marker {
258 self.advance(); }
260
261 let length_str = self.parse_while(|ch| ch.is_ascii_digit());
262 let length: usize = length_str
263 .parse()
264 .map_err(|_| Error::parse(self.pos, "Invalid array length"))?;
265
266 if self.peek_char() != Some(']') {
267 return Err(Error::parse(self.pos, "Expected ']'"));
268 }
269 self.advance(); if self.peek_char() == Some('{') {
273 self.parse_tabular_array(length)
274 } else if self.peek_char() == Some(':') {
275 self.advance(); self.skip_whitespace();
277
278 if length == 0 {
280 self.skip_whitespace();
282 if self.peek_char() == Some('\n') {
283 self.advance();
284 }
285 Ok(Value::Array(Vec::new()))
286 } else if self.peek_char() == Some('\n') || self.pos >= self.input.len() {
287 self.parse_list_array(length)
288 } else {
289 self.parse_inline_array(length)
290 }
291 } else {
292 Err(Error::parse(
293 self.pos,
294 "Expected ':' or '{' after array length",
295 ))
296 }
297 }
298
299 fn parse_tabular_array(&mut self, expected_length: usize) -> Result<Value, Error> {
300 if self.peek_char() != Some('{') {
301 return Err(Error::parse(self.pos, "Expected '{'"));
302 }
303 self.advance(); let fields_str = self.parse_while(|ch| ch != '}');
307 let fields: Vec<&str> = fields_str.split(',').map(|s| s.trim()).collect();
308 let delimiter = self.detect_delimiter();
309
310 if self.peek_char() != Some('}') {
311 return Err(Error::parse(self.pos, "Expected '}'"));
312 }
313 self.advance(); if self.peek_char() != Some(':') {
316 return Err(Error::parse(self.pos, "Expected ':'"));
317 }
318 self.advance(); if self.peek_char() == Some('\n') {
321 self.advance();
322 }
323
324 let mut items = Vec::new();
326 let indent = self.options.get_indent();
327 let base_indent = self.count_indent(indent);
329
330 for _ in 0..expected_length {
331 if self.pos >= self.input.len() {
332 break;
333 }
334
335 let line_indent = self.count_indent(indent);
337 if line_indent < base_indent {
338 break; }
340
341 for _ in 0..(line_indent * indent) {
343 if self.peek_char() == Some(' ') {
344 self.advance();
345 } else {
346 break;
347 }
348 }
349
350 let mut obj = Map::new();
351 let start = self.pos;
352 while self.pos < self.input.len() && self.peek_char() != Some('\n') {
354 self.advance();
355 }
356 let row = &self.input[start..self.pos];
357 let values: Vec<&str> = self.split_row(row, delimiter);
358
359 if values.len() != fields.len() && self.options.get_strict() {
360 return Err(Error::LengthMismatch {
361 expected: fields.len(),
362 found: values.len(),
363 });
364 }
365
366 for (i, field) in fields.iter().enumerate() {
367 let value_str = values.get(i).unwrap_or(&"");
368 let value = self.parse_primitive_value(value_str.trim())?;
369 obj.insert(field.to_string(), value);
370 }
371
372 items.push(Value::Object(obj));
373 if self.pos < self.input.len() && self.peek_char() == Some('\n') {
375 self.advance();
376 }
377 }
378
379 if self.options.get_strict() && items.len() != expected_length {
380 return Err(Error::LengthMismatch {
381 expected: expected_length,
382 found: items.len(),
383 });
384 }
385
386 Ok(Value::Array(items))
387 }
388
389 fn parse_inline_array(&mut self, expected_length: usize) -> Result<Value, Error> {
390 let delimiter = self.detect_delimiter();
391 let start = self.pos;
392 while self.pos < self.input.len() && self.peek_char() != Some('\n') {
394 self.advance();
395 }
396 let row = &self.input[start..self.pos];
397 let values: Vec<&str> = self.split_row(row, delimiter);
398
399 let mut items = Vec::new();
400 for value_str in values {
401 let trimmed = value_str.trim();
402 if !trimmed.is_empty() {
403 items.push(self.parse_primitive_value(trimmed)?);
404 }
405 }
406
407 if self.options.get_strict() && items.len() != expected_length {
408 return Err(Error::LengthMismatch {
409 expected: expected_length,
410 found: items.len(),
411 });
412 }
413
414 Ok(Value::Array(items))
415 }
416
417 fn parse_list_array(&mut self, expected_length: usize) -> Result<Value, Error> {
418 if self.peek_char() == Some('\n') {
420 self.advance();
421 }
422 let indent = self.options.get_indent();
423 let base_indent = self.count_indent(indent);
425 let mut items = Vec::new();
426
427 for _ in 0..expected_length {
428 if self.pos >= self.input.len() {
429 break;
430 }
431
432 let line_indent = self.count_indent(indent);
434 if line_indent < base_indent {
435 break; }
437
438 for _ in 0..(line_indent * indent) {
440 if self.peek_char() == Some(' ') {
441 self.advance();
442 } else {
443 break;
444 }
445 }
446
447 let has_dash = self.peek_char() == Some('-');
449 if has_dash {
450 self.advance(); self.skip_whitespace();
452 }
453
454 let line_start = self.pos;
458 let line_end = self.input[line_start..]
459 .find('\n')
460 .map(|i| line_start + i)
461 .unwrap_or(self.input.len());
462 let line = &self.input[line_start..line_end].trim();
463
464 let value = if self.peek_char() == Some('[') {
465 self.parse_array_value()?
466 } else if line.contains(':')
467 && !line.starts_with('"')
468 && line.matches(':').count() == 1
469 && !line.trim_start().starts_with('-')
470 {
471 let key = self.parse_key()?;
474 self.skip_whitespace();
475 if self.peek_char() != Some(':') {
476 return Err(Error::parse(
477 self.pos,
478 format!("Expected ':' after key '{key}'"),
479 ));
480 }
481 self.advance(); self.skip_whitespace();
483 let val = self.parse_value()?;
484 let mut obj = Map::new();
485 obj.insert(key, val);
486 Value::Object(obj)
487 } else {
488 self.parse_value()?
490 };
491 items.push(value);
492 if self.pos < self.input.len() && self.peek_char() == Some('\n') {
494 self.advance();
495 }
496 }
497
498 if self.options.get_strict() && items.len() != expected_length {
499 return Err(Error::LengthMismatch {
500 expected: expected_length,
501 found: items.len(),
502 });
503 }
504
505 Ok(Value::Array(items))
506 }
507
508 fn parse_primitive_value(&self, s: &str) -> Result<Value, Error> {
509 if s.is_empty() {
510 return Ok(Value::Null);
511 }
512
513 if s == "true" {
515 return Ok(Value::Bool(true));
516 }
517 if s == "false" {
518 return Ok(Value::Bool(false));
519 }
520
521 if let Ok(n) = s.parse::<i64>() {
523 return Ok(Value::Number(n.into()));
524 }
525 if let Ok(n) = s.parse::<f64>() {
526 return Ok(Value::Number(
527 serde_json::Number::from_f64(n)
528 .ok_or_else(|| Error::InvalidNumber(s.to_string()))?,
529 ));
530 }
531
532 if s.starts_with('"') && s.ends_with('"') {
534 self.parse_quoted_string(s)
535 } else {
536 Ok(Value::String(s.to_string()))
537 }
538 }
539
540 fn parse_quoted_string(&self, s: &str) -> Result<Value, Error> {
541 let mut result = String::new();
542 let chars: Vec<char> = s.chars().collect();
543 let mut i = 1; while i < chars.len() - 1 {
546 match chars[i] {
548 '\\' => {
549 i += 1;
550 if i >= chars.len() - 1 {
551 return Err(Error::InvalidEscape("Unterminated escape".to_string()));
552 }
553 match chars[i] {
554 '"' => result.push('"'),
555 '\\' => result.push('\\'),
556 'n' => result.push('\n'),
557 'r' => result.push('\r'),
558 't' => result.push('\t'),
559 _ => {
560 return Err(Error::InvalidEscape(format!("\\{}", chars[i])));
561 }
562 }
563 }
564 ch => result.push(ch),
565 }
566 i += 1;
567 }
568
569 Ok(Value::String(result))
570 }
571
572 fn parse_string(&mut self) -> Result<Value, Error> {
573 if self.peek_char() != Some('"') {
574 return Err(Error::parse(self.pos, "Expected '\"'"));
575 }
576 self.advance(); let start = self.pos;
579 let mut escaped = false;
580
581 while self.pos < self.input.len() {
582 let ch = self.input.chars().nth(self.pos).unwrap();
583 if escaped {
584 escaped = false;
585 } else if ch == '\\' {
586 escaped = true;
587 } else if ch == '"' {
588 let s = &self.input[start..self.pos];
589 self.advance(); return self.parse_quoted_string(&format!("\"{s}\""));
591 }
592 self.advance();
593 }
594
595 Err(Error::UnterminatedString)
596 }
597
598 fn parse_number(&mut self) -> Result<Value, Error> {
599 let start = self.pos;
600 let mut has_dot = false;
601
602 if self.peek_char() == Some('-') {
603 self.advance();
604 }
605
606 while self.pos < self.input.len() {
607 match self.peek_char() {
608 Some(ch) if ch.is_ascii_digit() => {
609 self.advance();
610 }
611 Some('.') if !has_dot => {
612 has_dot = true;
613 self.advance();
614 }
615 _ => break,
616 }
617 }
618
619 let s = &self.input[start..self.pos];
620 if has_dot {
621 let n = s
622 .parse::<f64>()
623 .map_err(|_| Error::InvalidNumber(s.to_string()))?;
624 serde_json::Number::from_f64(n)
625 .ok_or_else(|| Error::InvalidNumber(s.to_string()))
626 .map(Value::Number)
627 } else {
628 s.parse::<i64>()
629 .map(|n| Value::Number(n.into()))
630 .map_err(|_| Error::InvalidNumber(s.to_string()))
631 }
632 }
633
634 fn parse_boolean_or_null(&mut self) -> Result<Value, Error> {
635 let start = self.pos;
636 self.parse_while(|ch| ch.is_ascii_alphabetic());
637 let s = &self.input[start..self.pos];
638
639 match s {
640 "true" => Ok(Value::Bool(true)),
641 "false" => Ok(Value::Bool(false)),
642 "null" => Ok(Value::Null),
643 _ => {
644 self.pos = start;
646 Err(Error::parse(
647 self.pos,
648 format!("Not a boolean or null: {s}"),
649 ))
650 }
651 }
652 }
653
654 fn parse_key(&mut self) -> Result<String, Error> {
655 self.skip_whitespace();
656 let start = self.pos;
657 while self.pos < self.input.len() {
659 match self.peek_char() {
660 Some(ch) if ch == ':' || ch == '[' || ch == ' ' || ch == '\n' || ch == '\t' => {
661 break
662 }
663 Some(_) => self.advance(),
664 None => break,
665 }
666 }
667 if self.pos == start {
668 return Err(Error::parse(self.pos, "Expected key"));
669 }
670 Ok(self.input[start..self.pos].to_string())
671 }
672
673 fn detect_delimiter(&self) -> char {
674 let remaining = &self.input[self.pos..];
676
677 if remaining.len() >= 32 {
680 simd::detect_delimiter_simd(remaining)
681 } else {
682 simd::detect_delimiter_fallback(remaining)
683 }
684 }
685
686 fn split_row<'b>(&self, row: &'b str, delimiter: char) -> Vec<&'b str> {
687 if row.len() >= 32 {
690 simd::split_row_simd(row, delimiter)
691 } else {
692 simd::split_row_fallback(row, delimiter)
693 }
694 }
695
696 fn count_indent(&mut self, indent_size: usize) -> usize {
697 let start = self.pos;
698 let mut count = 0;
699 let indent_str = " ".repeat(indent_size);
700 while self.pos < self.input.len() {
701 if self.pos + indent_size <= self.input.len() {
702 let slice = &self.input[self.pos..self.pos + indent_size];
703 if slice == indent_str {
704 count += 1;
705 self.pos += indent_size;
706 } else {
707 break;
708 }
709 } else {
710 break;
711 }
712 }
713 let indent_level = count;
714 self.pos = start;
715 indent_level
716 }
717
718 fn skip_whitespace(&mut self) {
719 while self.pos < self.input.len() {
720 match self.input.chars().nth(self.pos) {
721 Some(' ') | Some('\t') => self.pos += 1,
722 _ => break,
723 }
724 }
725 }
726
727 fn skip_to_next_line(&mut self) {
728 while self.pos < self.input.len() {
729 if self.input.chars().nth(self.pos) == Some('\n') {
730 self.pos += 1;
731 break;
732 }
733 self.pos += 1;
734 }
735 }
736
737 fn parse_while<F>(&mut self, mut pred: F) -> &'a str
738 where
739 F: FnMut(char) -> bool,
740 {
741 let start = self.pos;
742 while self.pos < self.input.len() {
743 if let Some(ch) = self.input.chars().nth(self.pos) {
744 if pred(ch) {
745 self.pos += 1;
746 } else {
747 break;
748 }
749 } else {
750 break;
751 }
752 }
753 &self.input[start..self.pos]
754 }
755
756 fn peek_char(&self) -> Option<char> {
757 self.input.chars().nth(self.pos)
758 }
759
760 fn advance(&mut self) {
761 if self.pos < self.input.len() {
762 self.pos += 1;
763 }
764 }
765}
766
767pub fn decode_stream<R: Read>(reader: R, options: Option<&DecodeOptions>) -> Result<Value, Error> {
791 let default_opts = DecodeOptions::default();
792 let opts = options.unwrap_or(&default_opts);
793 let mut buf_reader = BufReader::with_capacity(8192, reader);
794 let mut parser = StreamingParser::new(&mut buf_reader, opts)?;
795 parser.parse()
796}
797
798struct StreamingParser<'a, R: Read> {
799 reader: &'a mut BufReader<R>,
800 buffer: String,
801 pos: usize,
802 options: &'a DecodeOptions,
803 eof: bool,
804}
805
806impl<'a, R: Read> StreamingParser<'a, R> {
807 fn new(reader: &'a mut BufReader<R>, options: &'a DecodeOptions) -> Result<Self, Error> {
808 let mut parser = Self {
809 reader,
810 buffer: String::new(),
811 pos: 0,
812 options,
813 eof: false,
814 };
815 parser.fill_buffer(8192)?;
817 Ok(parser)
818 }
819
820 fn fill_buffer(&mut self, min_size: usize) -> Result<(), Error> {
821 if self.eof {
822 return Ok(());
823 }
824
825 if self.pos > 8192 && self.pos > self.buffer.len() / 2 {
828 self.buffer.drain(..self.pos);
829 self.pos = 0;
830 }
831
832 let needed = if self.pos + min_size > self.buffer.len() {
834 self.pos + min_size - self.buffer.len()
835 } else {
836 0
837 };
838
839 if needed > 0 {
840 let mut temp_buf = vec![0u8; needed.max(8192)];
841 match self.reader.read(&mut temp_buf) {
842 Ok(0) => {
843 self.eof = true;
844 }
845 Ok(n) => {
846 let chunk = String::from_utf8(temp_buf[..n].to_vec())
847 .map_err(|e| Error::Io(format!("Invalid UTF-8: {}", e)))?;
848 self.buffer.push_str(&chunk);
849 }
850 Err(e) => return Err(Error::Io(e.to_string())),
851 }
852 }
853
854 Ok(())
855 }
856
857 fn ensure_buffer(&mut self, needed: usize) -> Result<(), Error> {
858 if self.pos + needed > self.buffer.len() && !self.eof {
859 self.fill_buffer(self.pos + needed)?;
860 }
861 Ok(())
862 }
863
864 fn parse(&mut self) -> Result<Value, Error> {
865 self.skip_whitespace();
866 if self.pos >= self.buffer.len() && self.eof {
867 return Ok(Value::Object(Map::new()));
868 }
869
870 if self.peek_char() == Some('[') {
872 self.parse_array_value()
873 } else {
874 self.parse_object()
875 }
876 }
877
878 fn parse_object(&mut self) -> Result<Value, Error> {
879 let mut map = Map::new();
880 let indent = self.options.get_indent();
881 let initial_indent = self.count_indent(indent)?;
882
883 loop {
884 let line_indent = self.count_indent(indent)?;
886 if line_indent < initial_indent {
887 break;
889 }
890
891 for _ in 0..(line_indent * indent) {
893 if self.peek_char() == Some(' ') {
894 self.advance();
895 } else {
896 break;
897 }
898 }
899
900 if self.pos >= self.buffer.len() && self.eof {
901 break;
902 }
903 if line_indent == 0 && !map.is_empty() && initial_indent == 0 {
904 let saved_pos = self.pos;
906 let key_result = self.parse_key();
907 self.pos = saved_pos;
908 if key_result.is_err() {
909 break;
910 }
911 }
912
913 let key = self.parse_key()?;
915 self.skip_whitespace();
916
917 let has_array_notation = self.peek_char() == Some('[');
919
920 if !has_array_notation {
921 if self.peek_char() != Some(':') {
923 return Err(Error::parse(
924 self.pos,
925 format!("Expected ':' after key '{key}'"),
926 ));
927 }
928 self.advance(); self.skip_whitespace();
930 } else {
931 }
934
935 let value = if has_array_notation {
937 let value = self.parse_array_value()?;
940 if self.pos < self.buffer.len() && self.peek_char() == Some('\n') {
942 self.advance();
943 }
944 value
945 } else if self.peek_char() == Some('\n') {
946 self.advance(); let next_indent = self.count_indent(indent)?;
949 if next_indent > line_indent {
950 if self.peek_char() == Some('[') {
952 self.parse_array_value()?
953 } else {
954 self.parse_object()?
956 }
957 } else {
958 Value::Null
960 }
961 } else {
962 let value = self.parse_value_until_newline()?;
964 if self.pos < self.buffer.len() && self.peek_char() != Some('\n') {
966 self.skip_to_next_line();
967 } else if self.peek_char() == Some('\n') {
968 self.advance(); }
970 value
971 };
972
973 map.insert(key, value);
974
975 if self.pos >= self.buffer.len() && self.eof {
977 break;
978 }
979
980 let next_line_indent = self.count_indent(indent)?;
982 if next_line_indent < initial_indent {
983 break;
984 }
985 if next_line_indent == 0 && initial_indent == 0 && !map.is_empty() {
986 let saved_pos = self.pos;
988 let key_result = self.parse_key();
989 self.pos = saved_pos;
990 if key_result.is_err() {
991 break;
992 }
993 }
994 }
995
996 Ok(Value::Object(map))
997 }
998
999 fn parse_value(&mut self) -> Result<Value, Error> {
1000 self.skip_whitespace();
1001 match self.peek_char() {
1002 Some('[') => self.parse_array_value(),
1003 Some('"') => self.parse_string(),
1004 Some('-') => {
1005 self.advance();
1007 self.skip_whitespace();
1008 self.parse_value()
1009 }
1010 Some(ch) if ch.is_ascii_digit() || ch == '-' => self.parse_number(),
1011 Some(ch) if ch.is_ascii_alphabetic() => {
1012 let start = self.pos;
1014 let value = self.parse_boolean_or_null();
1015 if value.is_ok() {
1016 return value;
1017 }
1018 self.pos = start;
1020 self.parse_unquoted_string()
1021 }
1022 _ => self.parse_unquoted_string(),
1023 }
1024 }
1025
1026 fn parse_unquoted_string(&mut self) -> Result<Value, Error> {
1027 let start = self.pos;
1028 while self.pos < self.buffer.len() || !self.eof {
1030 self.ensure_buffer(1)?;
1031 if self.pos >= self.buffer.len() {
1032 break;
1033 }
1034 match self.peek_char() {
1035 Some(ch) if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' => break,
1036 Some(_) => self.advance(),
1037 None => break,
1038 }
1039 }
1040 if self.pos == start {
1041 return Err(Error::parse(self.pos, "Expected value"));
1042 }
1043 Ok(Value::String(self.buffer[start..self.pos].to_string()))
1044 }
1045
1046 fn parse_value_until_newline(&mut self) -> Result<Value, Error> {
1047 self.skip_whitespace();
1048
1049 match self.peek_char() {
1051 Some('[') => {
1052 self.parse_array_value()
1054 }
1055 Some('"') => self.parse_string(),
1056 Some(ch) if ch.is_ascii_digit() || ch == '-' => self.parse_number(),
1057 Some(ch) if ch.is_ascii_alphabetic() => {
1058 let start_pos = self.pos;
1060 let value = self.parse_boolean_or_null();
1061 if value.is_ok() {
1062 return value;
1063 }
1064 self.pos = start_pos;
1066 self.parse_unquoted_string()
1067 }
1068 _ => self.parse_unquoted_string(),
1069 }
1070 }
1071
1072 fn parse_array_value(&mut self) -> Result<Value, Error> {
1073 if self.peek_char() != Some('[') {
1074 return Err(Error::parse(self.pos, "Expected '['"));
1075 }
1076 self.advance(); let has_length_marker = self.peek_char() == Some('#');
1080 if has_length_marker {
1081 self.advance(); }
1083
1084 let length_str = self.parse_while(|ch| ch.is_ascii_digit())?;
1085 let length: usize = length_str
1086 .parse()
1087 .map_err(|_| Error::parse(self.pos, "Invalid array length"))?;
1088
1089 if self.peek_char() != Some(']') {
1090 return Err(Error::parse(self.pos, "Expected ']'"));
1091 }
1092 self.advance(); if self.peek_char() == Some('{') {
1096 self.parse_tabular_array(length)
1097 } else if self.peek_char() == Some(':') {
1098 self.advance(); self.skip_whitespace();
1100
1101 if length == 0 {
1103 self.skip_whitespace();
1105 if self.peek_char() == Some('\n') {
1106 self.advance();
1107 }
1108 Ok(Value::Array(Vec::new()))
1109 } else if self.peek_char() == Some('\n') || (self.pos >= self.buffer.len() && self.eof)
1110 {
1111 self.parse_list_array(length)
1112 } else {
1113 self.parse_inline_array(length)
1114 }
1115 } else {
1116 Err(Error::parse(
1117 self.pos,
1118 "Expected ':' or '{' after array length",
1119 ))
1120 }
1121 }
1122
1123 fn parse_tabular_array(&mut self, expected_length: usize) -> Result<Value, Error> {
1124 if self.peek_char() != Some('{') {
1125 return Err(Error::parse(self.pos, "Expected '{'"));
1126 }
1127 self.advance(); let fields_str = self.parse_while(|ch| ch != '}')?;
1131 let fields: Vec<&str> = fields_str.split(',').map(|s| s.trim()).collect();
1132 let delimiter = self.detect_delimiter();
1133
1134 if self.peek_char() != Some('}') {
1135 return Err(Error::parse(self.pos, "Expected '}'"));
1136 }
1137 self.advance(); if self.peek_char() != Some(':') {
1140 return Err(Error::parse(self.pos, "Expected ':'"));
1141 }
1142 self.advance(); if self.peek_char() == Some('\n') {
1145 self.advance();
1146 }
1147
1148 let mut items = Vec::new();
1150 let indent = self.options.get_indent();
1151 let base_indent = self.count_indent(indent)?;
1153
1154 for _ in 0..expected_length {
1155 self.ensure_buffer(100)?;
1157
1158 if self.pos >= self.buffer.len() && self.eof {
1159 break;
1160 }
1161
1162 let line_indent = self.count_indent(indent)?;
1164 if line_indent < base_indent {
1165 break; }
1167
1168 for _ in 0..(line_indent * indent) {
1170 if self.peek_char() == Some(' ') {
1171 self.advance();
1172 } else {
1173 break;
1174 }
1175 }
1176
1177 let mut obj = Map::new();
1178 let start = self.pos;
1179 loop {
1181 self.ensure_buffer(1)?;
1182 if self.pos >= self.buffer.len() && self.eof {
1183 break;
1184 }
1185 if self.pos < self.buffer.len() && self.peek_char() == Some('\n') {
1186 break;
1187 }
1188 if self.pos < self.buffer.len() {
1189 self.advance();
1190 } else {
1191 break;
1192 }
1193 }
1194 let row = &self.buffer[start..self.pos];
1195 let values: Vec<&str> = self.split_row(row, delimiter);
1196
1197 if values.len() != fields.len() && self.options.get_strict() {
1198 return Err(Error::LengthMismatch {
1199 expected: fields.len(),
1200 found: values.len(),
1201 });
1202 }
1203
1204 for (i, field) in fields.iter().enumerate() {
1205 let value_str = values.get(i).unwrap_or(&"");
1206 let value = self.parse_primitive_value(value_str.trim())?;
1207 obj.insert(field.to_string(), value);
1208 }
1209
1210 items.push(Value::Object(obj));
1211 if self.pos < self.buffer.len() && self.peek_char() == Some('\n') {
1213 self.advance();
1214 }
1215 }
1216
1217 if self.options.get_strict() && items.len() != expected_length {
1218 return Err(Error::LengthMismatch {
1219 expected: expected_length,
1220 found: items.len(),
1221 });
1222 }
1223
1224 Ok(Value::Array(items))
1225 }
1226
1227 fn parse_inline_array(&mut self, expected_length: usize) -> Result<Value, Error> {
1228 let delimiter = self.detect_delimiter();
1229 let start = self.pos;
1230 while self.pos < self.buffer.len() && self.peek_char() != Some('\n') {
1232 self.advance();
1233 }
1234 let row = &self.buffer[start..self.pos];
1235 let values: Vec<&str> = self.split_row(row, delimiter);
1236
1237 let mut items = Vec::new();
1238 for value_str in values {
1239 let trimmed = value_str.trim();
1240 if !trimmed.is_empty() {
1241 items.push(self.parse_primitive_value(trimmed)?);
1242 }
1243 }
1244
1245 if self.options.get_strict() && items.len() != expected_length {
1246 return Err(Error::LengthMismatch {
1247 expected: expected_length,
1248 found: items.len(),
1249 });
1250 }
1251
1252 Ok(Value::Array(items))
1253 }
1254
1255 fn parse_list_array(&mut self, expected_length: usize) -> Result<Value, Error> {
1256 if self.peek_char() == Some('\n') {
1258 self.advance();
1259 }
1260 let indent = self.options.get_indent();
1261 let base_indent = self.count_indent(indent)?;
1263 let mut items = Vec::new();
1264
1265 for _ in 0..expected_length {
1266 if self.pos >= self.buffer.len() && self.eof {
1267 break;
1268 }
1269
1270 let line_indent = self.count_indent(indent)?;
1272 if line_indent < base_indent {
1273 break; }
1275
1276 for _ in 0..(line_indent * indent) {
1278 if self.peek_char() == Some(' ') {
1279 self.advance();
1280 } else {
1281 break;
1282 }
1283 }
1284
1285 let has_dash = self.peek_char() == Some('-');
1287 if has_dash {
1288 self.advance(); self.skip_whitespace();
1290 }
1291
1292 let line_start = self.pos;
1294 let line_end = self.buffer[line_start..]
1295 .find('\n')
1296 .map(|i| line_start + i)
1297 .unwrap_or(self.buffer.len());
1298 let line = &self.buffer[line_start..line_end].trim();
1299
1300 let value = if self.peek_char() == Some('[') {
1301 self.parse_array_value()?
1302 } else if line.contains(':')
1303 && !line.starts_with('"')
1304 && line.matches(':').count() == 1
1305 && !line.trim_start().starts_with('-')
1306 {
1307 let key = self.parse_key()?;
1309 self.skip_whitespace();
1310 if self.peek_char() != Some(':') {
1311 return Err(Error::parse(
1312 self.pos,
1313 format!("Expected ':' after key '{key}'"),
1314 ));
1315 }
1316 self.advance(); self.skip_whitespace();
1318 let val = self.parse_value()?;
1319 let mut obj = Map::new();
1320 obj.insert(key, val);
1321 Value::Object(obj)
1322 } else {
1323 self.parse_value()?
1325 };
1326 items.push(value);
1327 if self.pos < self.buffer.len() && self.peek_char() == Some('\n') {
1329 self.advance();
1330 }
1331 }
1332
1333 if self.options.get_strict() && items.len() != expected_length {
1334 return Err(Error::LengthMismatch {
1335 expected: expected_length,
1336 found: items.len(),
1337 });
1338 }
1339
1340 Ok(Value::Array(items))
1341 }
1342
1343 fn parse_primitive_value(&self, s: &str) -> Result<Value, Error> {
1344 if s.is_empty() {
1345 return Ok(Value::Null);
1346 }
1347
1348 if s == "true" {
1350 return Ok(Value::Bool(true));
1351 }
1352 if s == "false" {
1353 return Ok(Value::Bool(false));
1354 }
1355
1356 if let Ok(n) = s.parse::<i64>() {
1358 return Ok(Value::Number(n.into()));
1359 }
1360 if let Ok(n) = s.parse::<f64>() {
1361 return Ok(Value::Number(
1362 serde_json::Number::from_f64(n)
1363 .ok_or_else(|| Error::InvalidNumber(s.to_string()))?,
1364 ));
1365 }
1366
1367 if s.starts_with('"') && s.ends_with('"') {
1369 self.parse_quoted_string(s)
1370 } else {
1371 Ok(Value::String(s.to_string()))
1372 }
1373 }
1374
1375 fn parse_quoted_string(&self, s: &str) -> Result<Value, Error> {
1376 let mut result = String::new();
1377 let chars: Vec<char> = s.chars().collect();
1378 let mut i = 1; while i < chars.len() - 1 {
1381 match chars[i] {
1383 '\\' => {
1384 i += 1;
1385 if i >= chars.len() - 1 {
1386 return Err(Error::InvalidEscape("Unterminated escape".to_string()));
1387 }
1388 match chars[i] {
1389 '"' => result.push('"'),
1390 '\\' => result.push('\\'),
1391 'n' => result.push('\n'),
1392 'r' => result.push('\r'),
1393 't' => result.push('\t'),
1394 _ => {
1395 return Err(Error::InvalidEscape(format!("\\{}", chars[i])));
1396 }
1397 }
1398 }
1399 ch => result.push(ch),
1400 }
1401 i += 1;
1402 }
1403
1404 Ok(Value::String(result))
1405 }
1406
1407 fn parse_string(&mut self) -> Result<Value, Error> {
1408 if self.peek_char() != Some('"') {
1409 return Err(Error::parse(self.pos, "Expected '\"'"));
1410 }
1411 self.advance(); let start = self.pos;
1414 let mut escaped = false;
1415
1416 while self.pos < self.buffer.len() || !self.eof {
1417 self.ensure_buffer(1)?;
1418 if self.pos >= self.buffer.len() {
1419 break;
1420 }
1421 let ch = self.buffer.chars().nth(self.pos).unwrap();
1422 if escaped {
1423 escaped = false;
1424 } else if ch == '\\' {
1425 escaped = true;
1426 } else if ch == '"' {
1427 let s = self.buffer[start..self.pos].to_string();
1428 self.advance(); return self.parse_quoted_string(&format!("\"{s}\""));
1430 }
1431 self.advance();
1432 }
1433
1434 Err(Error::UnterminatedString)
1435 }
1436
1437 fn parse_number(&mut self) -> Result<Value, Error> {
1438 let start = self.pos;
1439 let mut has_dot = false;
1440
1441 if self.peek_char() == Some('-') {
1442 self.advance();
1443 }
1444
1445 while self.pos < self.buffer.len() || !self.eof {
1446 self.ensure_buffer(1)?;
1447 if self.pos >= self.buffer.len() {
1448 break;
1449 }
1450 match self.peek_char() {
1451 Some(ch) if ch.is_ascii_digit() => {
1452 self.advance();
1453 }
1454 Some('.') if !has_dot => {
1455 has_dot = true;
1456 self.advance();
1457 }
1458 _ => break,
1459 }
1460 }
1461
1462 let s = &self.buffer[start..self.pos];
1463 if has_dot {
1464 let n = s
1465 .parse::<f64>()
1466 .map_err(|_| Error::InvalidNumber(s.to_string()))?;
1467 serde_json::Number::from_f64(n)
1468 .ok_or_else(|| Error::InvalidNumber(s.to_string()))
1469 .map(Value::Number)
1470 } else {
1471 s.parse::<i64>()
1472 .map(|n| Value::Number(n.into()))
1473 .map_err(|_| Error::InvalidNumber(s.to_string()))
1474 }
1475 }
1476
1477 fn parse_boolean_or_null(&mut self) -> Result<Value, Error> {
1478 let start = self.pos;
1479 self.parse_while(|ch| ch.is_ascii_alphabetic())?;
1480 let s = &self.buffer[start..self.pos];
1481
1482 match s {
1483 "true" => Ok(Value::Bool(true)),
1484 "false" => Ok(Value::Bool(false)),
1485 "null" => Ok(Value::Null),
1486 _ => {
1487 self.pos = start;
1489 Err(Error::parse(
1490 self.pos,
1491 format!("Not a boolean or null: {s}"),
1492 ))
1493 }
1494 }
1495 }
1496
1497 fn parse_key(&mut self) -> Result<String, Error> {
1498 self.skip_whitespace();
1499 let start = self.pos;
1500 while self.pos < self.buffer.len() || !self.eof {
1502 self.ensure_buffer(1)?;
1503 if self.pos >= self.buffer.len() {
1504 break;
1505 }
1506 match self.peek_char() {
1507 Some(ch) if ch == ':' || ch == '[' || ch == ' ' || ch == '\n' || ch == '\t' => {
1508 break
1509 }
1510 Some(_) => self.advance(),
1511 None => break,
1512 }
1513 }
1514 if self.pos == start {
1515 return Err(Error::parse(self.pos, "Expected key"));
1516 }
1517 Ok(self.buffer[start..self.pos].to_string())
1518 }
1519
1520 fn detect_delimiter(&self) -> char {
1521 let remaining = &self.buffer[self.pos..];
1523
1524 if remaining.len() >= 32 {
1526 simd::detect_delimiter_simd(remaining)
1527 } else {
1528 simd::detect_delimiter_fallback(remaining)
1529 }
1530 }
1531
1532 fn split_row<'b>(&self, row: &'b str, delimiter: char) -> Vec<&'b str> {
1533 if row.len() >= 32 {
1535 simd::split_row_simd(row, delimiter)
1536 } else {
1537 simd::split_row_fallback(row, delimiter)
1538 }
1539 }
1540
1541 fn count_indent(&mut self, indent_size: usize) -> Result<usize, Error> {
1542 let start = self.pos;
1543 let mut count = 0;
1544 let indent_str = " ".repeat(indent_size);
1545 while self.pos < self.buffer.len() || !self.eof {
1546 self.ensure_buffer(indent_size)?;
1547 if self.pos + indent_size > self.buffer.len() {
1548 break;
1549 }
1550 let slice = &self.buffer[self.pos..self.pos + indent_size];
1551 if slice == indent_str {
1552 count += 1;
1553 self.pos += indent_size;
1554 } else {
1555 break;
1556 }
1557 }
1558 let indent_level = count;
1559 self.pos = start;
1560 Ok(indent_level)
1561 }
1562
1563 fn skip_whitespace(&mut self) {
1564 while self.pos < self.buffer.len() || !self.eof {
1565 self.ensure_buffer(1).ok();
1566 if self.pos >= self.buffer.len() {
1567 break;
1568 }
1569 match self.buffer.chars().nth(self.pos) {
1570 Some(' ') | Some('\t') => self.pos += 1,
1571 _ => break,
1572 }
1573 }
1574 }
1575
1576 fn skip_to_next_line(&mut self) {
1577 while self.pos < self.buffer.len() || !self.eof {
1578 self.ensure_buffer(1).ok();
1579 if self.pos >= self.buffer.len() {
1580 break;
1581 }
1582 if self.buffer.chars().nth(self.pos) == Some('\n') {
1583 self.pos += 1;
1584 break;
1585 }
1586 self.pos += 1;
1587 }
1588 }
1589
1590 fn parse_while<F>(&mut self, mut pred: F) -> Result<String, Error>
1591 where
1592 F: FnMut(char) -> bool,
1593 {
1594 let start = self.pos;
1595 while self.pos < self.buffer.len() || !self.eof {
1596 self.ensure_buffer(1)?;
1597 if self.pos >= self.buffer.len() {
1598 break;
1599 }
1600 if let Some(ch) = self.buffer.chars().nth(self.pos) {
1601 if pred(ch) {
1602 self.pos += 1;
1603 } else {
1604 break;
1605 }
1606 } else {
1607 break;
1608 }
1609 }
1610 Ok(self.buffer[start..self.pos].to_string())
1611 }
1612
1613 fn peek_char(&self) -> Option<char> {
1614 if self.pos < self.buffer.len() {
1615 self.buffer.chars().nth(self.pos)
1616 } else {
1617 None
1618 }
1619 }
1620
1621 fn advance(&mut self) {
1622 if self.pos < self.buffer.len() {
1623 self.pos += 1;
1624 }
1625 }
1626}