Mercurial > js-scanner
comparison jsscan.py @ 5:815520476fbb default tip
accept '/' as a literal
author | Atul Varma <avarma@mozilla.com> |
---|---|
date | Thu, 22 Apr 2010 20:03:31 -0700 |
parents | 30c1f55eff96 |
children |
comparison
equal
deleted
inserted
replaced
4:30c1f55eff96 | 5:815520476fbb |
---|---|
11 name=r'[A-Za-z$_][\w]*', | 11 name=r'[A-Za-z$_][\w]*', |
12 digits=r'[0-9]+', | 12 digits=r'[0-9]+', |
13 ) | 13 ) |
14 | 14 |
15 LITERALS = ('(){}[];.,:?' | 15 LITERALS = ('(){}[];.,:?' |
16 '!=-+*&|<>') | 16 '!=-+*&|<>/') |
17 | 17 |
18 def __init__(self, text): | 18 def __init__(self, text): |
19 self.text = text | 19 self.text = text |
20 self.pos = 0 | 20 self.pos = 0 |
21 self.line = 1 | 21 self.line = 1 |
27 elif isinstance(ignore, basestring): | 27 elif isinstance(ignore, basestring): |
28 ignore = [ignore] | 28 ignore = [ignore] |
29 | 29 |
30 while self.pos < len(self.text): | 30 while self.pos < len(self.text): |
31 found = None | 31 found = None |
32 if self.text[self.pos] in self.LITERALS: | 32 for tokname, tokre in self.TOKENS.items(): |
33 match = tokre.match(self.text, self.pos) | |
34 if match: | |
35 tokvalue = match.group(0) | |
36 found = (tokname, tokvalue, | |
37 (self.line, self.char)) | |
38 self.pos += len(tokvalue) | |
39 if tokre.flags & re.MULTILINE: | |
40 newlines = tokvalue.count('\n') | |
41 if newlines: | |
42 self.line += newlines | |
43 self.char = ((len(tokvalue) - 1) - | |
44 tokvalue.rfind('\n')) | |
45 else: | |
46 self.char += len(tokvalue) | |
47 else: | |
48 self.char += len(tokvalue) | |
49 continue | |
50 if found is None and self.text[self.pos] in self.LITERALS: | |
33 found = ('literal', self.text[self.pos], | 51 found = ('literal', self.text[self.pos], |
34 (self.line, self.char)) | 52 (self.line, self.char)) |
35 self.pos += 1 | 53 self.pos += 1 |
36 self.char += 1 | 54 self.char += 1 |
37 else: | |
38 for tokname, tokre in self.TOKENS.items(): | |
39 match = tokre.match(self.text, self.pos) | |
40 if match: | |
41 tokvalue = match.group(0) | |
42 found = (tokname, tokvalue, | |
43 (self.line, self.char)) | |
44 self.pos += len(tokvalue) | |
45 if tokre.flags & re.MULTILINE: | |
46 newlines = tokvalue.count('\n') | |
47 if newlines: | |
48 self.line += newlines | |
49 self.char = ((len(tokvalue) - 1) - | |
50 tokvalue.rfind('\n')) | |
51 else: | |
52 self.char += len(tokvalue) | |
53 else: | |
54 self.char += len(tokvalue) | |
55 continue | |
56 if found is not None: | 55 if found is not None: |
57 if found[0] not in ignore: | 56 if found[0] not in ignore: |
58 yield found | 57 yield found |
59 else: | 58 else: |
60 raise TokenizationError('unrecognized token %s' % | 59 raise TokenizationError('unrecognized token %s' % |