Mercurial > js-scanner
changeset 4:30c1f55eff96
fixed greedy regexp bug
author | Atul Varma <avarma@mozilla.com> |
---|---|
date | Thu, 22 Apr 2010 17:31:32 -0700 |
parents | ce894f57b30c |
children | 815520476fbb |
files | jsscan.py test_jsscan.py |
diffstat | 2 files changed, 16 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/jsscan.py Thu Apr 22 13:38:20 2010 -0700 +++ b/jsscan.py Thu Apr 22 17:31:32 2010 -0700 @@ -3,9 +3,9 @@ class Tokenizer(object): TOKENS = dict( whitespace=(r'\s+', re.MULTILINE), - string=(r'(".*(?<!\\)")' + string=(r'(".*?(?<!\\)")' r'|' - r"('.*(?<!\\)')"), + r"('.*?(?<!\\)')"), c_comment=(r'\/\*.*\*\/', re.MULTILINE | re.DOTALL), cpp_comment=r'\/\/.*', name=r'[A-Za-z$_][\w]*',
--- a/test_jsscan.py Thu Apr 22 13:38:20 2010 -0700 +++ b/test_jsscan.py Thu Apr 22 17:31:32 2010 -0700 @@ -25,6 +25,20 @@ >>> tokenize(' k', ignore='whitespace') ('name', 'k', (1, 2)) +Many double-quoted strings on the same line: + + >>> tokenize(r'"hello there "+" dude"') + ('string', '"hello there "', (1, 0)) + ('literal', '+', (1, 14)) + ('string', '" dude"', (1, 15)) + +Many single-quoted strings on the same line: + + >>> tokenize(r"'hello there '+' dude'") + ('string', "'hello there '", (1, 0)) + ('literal', '+', (1, 14)) + ('string', "' dude'", (1, 15)) + Escaped double-quoted strings: >>> tokenize(r'"i say \\"tomato\\""')