Skip to content

Commit

Permalink
Fix length calculations when using multibyte characters
Browse files Browse the repository at this point in the history
fix tokenEnd and cursor for string, template, xml literals and
multiline comments when using multibyte characters
  • Loading branch information
meraedit authored Dec 3, 2024
1 parent 8b45873 commit 75e9c9f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
7 changes: 7 additions & 0 deletions rhino/src/main/java/org/mozilla/javascript/TokenStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,8 @@ final int getToken() throws IOException {

String str = getStringFromBuffer();
this.string = internString(str);
cursor = sourceCursor;
tokenEnd = cursor;
return Token.STRING;
}

Expand Down Expand Up @@ -1330,6 +1332,7 @@ && matchChar('.')) {
lookForSlash = true;
} else if (c == '/') {
if (lookForSlash) {
cursor = sourceCursor;
tokenEnd = cursor;
return Token.COMMENT;
}
Expand Down Expand Up @@ -1653,6 +1656,8 @@ int readTemplateLiteral(boolean isTaggedLiteral) throws IOException {
case '`':
rawString.setLength(rawString.length() - 1); // don't include "`"
this.string = hasInvalidEscapeSequences ? null : getStringFromBuffer();
cursor = sourceCursor;
tokenEnd = cursor;
return Token.TEMPLATE_LITERAL;
case '$':
if (matchTemplateLiteralChar('{')) {
Expand Down Expand Up @@ -1907,6 +1912,8 @@ int getNextXMLToken() throws IOException {

if (!xmlIsTagContent && xmlOpenTagsCount == 0) {
this.string = getStringFromBuffer();
cursor = sourceCursor;
tokenEnd = cursor;
return Token.XMLEND;
}
} else {
Expand Down
46 changes: 44 additions & 2 deletions tests/src/test/java/org/mozilla/javascript/tests/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,15 @@
import org.mozilla.javascript.ast.StringLiteral;
import org.mozilla.javascript.ast.SwitchCase;
import org.mozilla.javascript.ast.SwitchStatement;
import org.mozilla.javascript.ast.TemplateCharacters;
import org.mozilla.javascript.ast.TemplateLiteral;
import org.mozilla.javascript.ast.TryStatement;
import org.mozilla.javascript.ast.UpdateExpression;
import org.mozilla.javascript.ast.VariableDeclaration;
import org.mozilla.javascript.ast.VariableInitializer;
import org.mozilla.javascript.ast.WithStatement;
import org.mozilla.javascript.ast.XmlFragment;
import org.mozilla.javascript.ast.XmlLiteral;
import org.mozilla.javascript.testing.TestErrorReporter;

public class ParserTest {
Expand Down Expand Up @@ -1203,14 +1207,52 @@ public void parseUnicodeFormatName() {
}

@Test
public void testParseUnicodeMultibyteCharacter() {
public void parseUnicodeMultibyteCharacter() {
AstRoot root = parse("\uD842\uDFB7");
AstNode first = ((ExpressionStatement) root.getFirstChild()).getExpression();
assertEquals("𠮷", first.getString());
}

@Test
public void testParseUnicodeIdentifierPartWhichIsNotJavaIdentifierPart() {
public void parseMultibyteCharacter_StringLiteral() {
AstRoot root = parse("'\uD83C\uDF1F'");
StringLiteral first =
(StringLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
assertEquals(4, first.getLength());
assertEquals("'🌟'", first.getValue(true));
}

@Test
public void parseMultibyteCharacter_TemplateLiteral() {
AstRoot root = parse("`\uD83C\uDF1F`");
TemplateLiteral first =
(TemplateLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
TemplateCharacters templateCharacter = (TemplateCharacters) first.getElement(0);
assertEquals(2, templateCharacter.getLength());
assertEquals("🌟", templateCharacter.getValue());
assertEquals(4, first.getLength());
}

@Test
public void parseMultibyteCharacter_XMLLiteral() {
AstRoot root = parse("<xml>\uD83C\uDF1F</xml>");
XmlLiteral first =
(XmlLiteral) ((ExpressionStatement) root.getFirstChild()).getExpression();
XmlFragment fragment = first.getFragments().get(0);
assertEquals(13, fragment.getLength());
assertEquals("<xml>🌟</xml>", fragment.toSource());
}

@Test
public void parseMultibyteCharacter_Comment() {
AstRoot root = parse("/*\uD83C\uDF1F*/");
Comment comment = root.getComments().first();
assertEquals(6, comment.getLength());
assertEquals("/*🌟*/", comment.getValue());
}

@Test
public void parseUnicodeIdentifierPartWhichIsNotJavaIdentifierPart() {
// On the JDK 11 I'm using, Character.isUnicodeIdentifierPart(U+9FEB) returns true
// but Character.isJavaIdentifierPart(U+9FEB) returns false. On a JDK 17 results
// seem to vary, but I think it's enough to verify that TokenStream uses
Expand Down

0 comments on commit 75e9c9f

Please sign in to comment.