@@ -67,6 +67,15 @@ impl From<ProtobufFloatParseError> for LexerError {
67
67
}
68
68
}
69
69
70
+ /// The raw bytes for a single char or escape sequence in a string literal
71
+ ///
72
+ /// The raw bytes are available via an `into_iter` implementation.
73
+ pub struct DecodedBytes {
74
+ // a single char can be up to 4-bytes when encoded in utf-8
75
+ buf : [ u8 ; 4 ] ,
76
+ len : u8 ,
77
+ }
78
+
70
79
#[ derive( Copy , Clone ) ]
71
80
pub struct Lexer < ' a > {
72
81
language : ParserLanguage ,
@@ -440,24 +449,24 @@ impl<'a> Lexer<'a> {
440
449
// octEscape = '\' octalDigit octalDigit octalDigit
441
450
// charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
442
451
// quote = "'" | '"'
443
- pub fn next_byte_value ( & mut self ) -> LexerResult < u8 > {
452
+ pub fn next_str_lit_bytes ( & mut self ) -> LexerResult < DecodedBytes > {
444
453
match self . next_char ( ) ? {
445
454
'\\' => {
446
455
match self . next_char ( ) ? {
447
- '\'' => Ok ( b'\'' ) ,
448
- '"' => Ok ( b'"' ) ,
449
- '\\' => Ok ( b'\\' ) ,
450
- 'a' => Ok ( b'\x07' ) ,
451
- 'b' => Ok ( b'\x08' ) ,
452
- 'f' => Ok ( b'\x0c' ) ,
453
- 'n' => Ok ( b'\n' ) ,
454
- 'r' => Ok ( b'\r' ) ,
455
- 't' => Ok ( b'\t' ) ,
456
- 'v' => Ok ( b'\x0b' ) ,
456
+ '\'' => Ok ( b'\'' . into ( ) ) ,
457
+ '"' => Ok ( b'"' . into ( ) ) ,
458
+ '\\' => Ok ( b'\\' . into ( ) ) ,
459
+ 'a' => Ok ( b'\x07' . into ( ) ) ,
460
+ 'b' => Ok ( b'\x08' . into ( ) ) ,
461
+ 'f' => Ok ( b'\x0c' . into ( ) ) ,
462
+ 'n' => Ok ( b'\n' . into ( ) ) ,
463
+ 'r' => Ok ( b'\r' . into ( ) ) ,
464
+ 't' => Ok ( b'\t' . into ( ) ) ,
465
+ 'v' => Ok ( b'\x0b' . into ( ) ) ,
457
466
'x' => {
458
467
let d1 = self . next_hex_digit ( ) ? as u8 ;
459
468
let d2 = self . next_hex_digit ( ) ? as u8 ;
460
- Ok ( ( ( d1 << 4 ) | d2) as u8 )
469
+ Ok ( ( ( ( d1 << 4 ) | d2) as u8 ) . into ( ) )
461
470
}
462
471
d if d >= '0' && d <= '7' => {
463
472
let mut r = d as u8 - b'0' ;
@@ -467,16 +476,14 @@ impl<'a> Lexer<'a> {
467
476
Ok ( d) => r = ( r << 3 ) + d as u8 ,
468
477
}
469
478
}
470
- Ok ( r)
479
+ Ok ( r. into ( ) )
471
480
}
472
481
// https://github.com/google/protobuf/issues/4562
473
- // TODO: overflow
474
- c => Ok ( c as u8 ) ,
482
+ c => Ok ( c. into ( ) ) ,
475
483
}
476
484
}
477
485
'\n' | '\0' => Err ( LexerError :: IncorrectInput ) ,
478
- // TODO: check overflow
479
- c => Ok ( c as u8 ) ,
486
+ c => Ok ( c. into ( ) ) ,
480
487
}
481
488
}
482
489
@@ -530,7 +537,7 @@ impl<'a> Lexer<'a> {
530
537
} ;
531
538
first = false ;
532
539
while self . lookahead_char ( ) != Some ( q) {
533
- self . next_byte_value ( ) ?;
540
+ self . next_str_lit_bytes ( ) ?;
534
541
}
535
542
self . next_char_expect_eq ( q) ?;
536
543
@@ -663,6 +670,37 @@ impl<'a> Lexer<'a> {
663
670
}
664
671
}
665
672
673
+ impl From < u8 > for DecodedBytes {
674
+ fn from ( value : u8 ) -> Self {
675
+ DecodedBytes {
676
+ buf : [ value, 0 , 0 , 0 ] ,
677
+ len : 1 ,
678
+ }
679
+ }
680
+ }
681
+
682
+ impl From < char > for DecodedBytes {
683
+ fn from ( value : char ) -> Self {
684
+ let mut this = DecodedBytes {
685
+ buf : [ 0 ; 4 ] ,
686
+ len : 0 ,
687
+ } ;
688
+ let len = value. encode_utf8 ( & mut this. buf ) . len ( ) ;
689
+ this. len = len as _ ;
690
+ this
691
+ }
692
+ }
693
+
694
+ // means that we work with `Vec::extend`.
695
+ impl IntoIterator for DecodedBytes {
696
+ type Item = u8 ;
697
+ type IntoIter = std:: iter:: Take < std:: array:: IntoIter < u8 , 4 > > ;
698
+
699
+ fn into_iter ( self ) -> Self :: IntoIter {
700
+ self . buf . into_iter ( ) . take ( self . len as _ )
701
+ }
702
+ }
703
+
666
704
#[ cfg( test) ]
667
705
mod test {
668
706
use super :: * ;
0 commit comments