annotate jsscan.py @ 3:ce894f57b30c

added jsrequires
author Atul Varma <avarma@mozilla.com>
date Thu, 22 Apr 2010 13:38:20 -0700
parents f82ff2c61c06
children 30c1f55eff96
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
1 import re
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
2
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
3 class Tokenizer(object):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
4 TOKENS = dict(
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
5 whitespace=(r'\s+', re.MULTILINE),
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
6 string=(r'(".*(?<!\\)")'
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
7 r'|'
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
8 r"('.*(?<!\\)')"),
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
9 c_comment=(r'\/\*.*\*\/', re.MULTILINE | re.DOTALL),
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
10 cpp_comment=r'\/\/.*',
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
11 name=r'[A-Za-z$_][\w]*',
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
12 digits=r'[0-9]+',
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
13 )
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
14
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
15 LITERALS = ('(){}[];.,:?'
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
16 '!=-+*&|<>')
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
17
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
18 def __init__(self, text):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
19 self.text = text
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
20 self.pos = 0
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
21 self.line = 1
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
22 self.char = 0
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
23
2
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
24 def tokenize(self, ignore=None):
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
25 if ignore is None:
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
26 ignore = []
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
27 elif isinstance(ignore, basestring):
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
28 ignore = [ignore]
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
29
0
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
30 while self.pos < len(self.text):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
31 found = None
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
32 if self.text[self.pos] in self.LITERALS:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
33 found = ('literal', self.text[self.pos],
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
34 (self.line, self.char))
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
35 self.pos += 1
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
36 self.char += 1
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
37 else:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
38 for tokname, tokre in self.TOKENS.items():
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
39 match = tokre.match(self.text, self.pos)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
40 if match:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
41 tokvalue = match.group(0)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
42 found = (tokname, tokvalue,
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
43 (self.line, self.char))
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
44 self.pos += len(tokvalue)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
45 if tokre.flags & re.MULTILINE:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
46 newlines = tokvalue.count('\n')
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
47 if newlines:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
48 self.line += newlines
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
49 self.char = ((len(tokvalue) - 1) -
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
50 tokvalue.rfind('\n'))
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
51 else:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
52 self.char += len(tokvalue)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
53 else:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
54 self.char += len(tokvalue)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
55 continue
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
56 if found is not None:
2
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
57 if found[0] not in ignore:
f82ff2c61c06 added ignore kwarg
Atul Varma <avarma@mozilla.com>
parents: 0
diff changeset
58 yield found
0
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
59 else:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
60 raise TokenizationError('unrecognized token %s' %
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
61 repr(self.text[self.pos]),
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
62 self.line,
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
63 self.char)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
64
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
65 def __init_tokens(tokens):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
66 for key, value in tokens.items():
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
67 if isinstance(value, tuple):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
68 args = value
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
69 else:
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
70 args = (value,)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
71 tokens[key] = re.compile(*args)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
72
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
73 __init_tokens(TOKENS)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
74
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
75 class TokenizationError(Exception):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
76 def __init__(self, msg, line, char):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
77 Exception.__init__(self, msg)
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
78 self.char = char
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
79 self.line = line
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
80
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
81 def __str__(self):
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
82 return '%s @ line %d, char %d' % (self.args[0], self.line,
daa1c6d996f3 Origination.
Atul Varma <avarma@mozilla.com>
parents:
diff changeset
83 self.char)