# HG changeset patch # User Atul Varma # Date 1271982692 25200 # Node ID 30c1f55eff96842f7660409411c5976eca18533d # Parent ce894f57b30ce9900a2a62aeb3ca65ff65407ae2 fixed greedy regexp bug diff -r ce894f57b30c -r 30c1f55eff96 jsscan.py --- a/jsscan.py Thu Apr 22 13:38:20 2010 -0700 +++ b/jsscan.py Thu Apr 22 17:31:32 2010 -0700 @@ -3,9 +3,9 @@ class Tokenizer(object): TOKENS = dict( whitespace=(r'\s+', re.MULTILINE), - string=(r'(".*(?>> tokenize(' k', ignore='whitespace') ('name', 'k', (1, 2)) +Many double-quoted strings on the same line: + + >>> tokenize(r'"hello there "+" dude"') + ('string', '"hello there "', (1, 0)) + ('literal', '+', (1, 14)) + ('string', '" dude"', (1, 15)) + +Many single-quoted strings on the same line: + + >>> tokenize(r"'hello there '+' dude'") + ('string', "'hello there '", (1, 0)) + ('literal', '+', (1, 14)) + ('string', "' dude'", (1, 15)) + Escaped double-quoted strings: >>> tokenize(r'"i say \\"tomato\\""')