js-scanner: jsscan.py comparison

comparison jsscan.py @ 0:daa1c6d996f3

Origination.

author	Atul Varma <avarma@mozilla.com>
date	Thu, 22 Apr 2010 13:11:51 -0700
parents
children	f82ff2c61c06

comparison

equal deleted inserted replaced

--1:000000000000
+:daa1c6d996f3
+import re
+class Tokenizer(object):
+TOKENS = dict(
+whitespace=(r'\s+', re.MULTILINE),
+string=(r'(".*(?<!\\)")'
+r'|'
+r"('.*(?<!\\)')"),
+c_comment=(r'\/\*.*\*\/', re.MULTILINE | re.DOTALL),
+cpp_comment=r'\/\/.*',
+name=r'[A-Za-z$_][\w]*',
+digits=r'[0-9]+',
+)
+LITERALS = ('(){}[];.,:?'
+'!=-+*&|<>')
+def __init__(self, text):
+self.text = text
+self.pos = 0
+self.line = 1
+self.char = 0
+def tokenize(self):
+while self.pos < len(self.text):
+found = None
+if self.text[self.pos] in self.LITERALS:
+found = ('literal', self.text[self.pos],
+(self.line, self.char))
+self.pos += 1
+self.char += 1
+else:
+for tokname, tokre in self.TOKENS.items():
+match = tokre.match(self.text, self.pos)
+if match:
+tokvalue = match.group(0)
+found = (tokname, tokvalue,
+(self.line, self.char))
+self.pos += len(tokvalue)
+if tokre.flags & re.MULTILINE:
+newlines = tokvalue.count('\n')
+if newlines:
+self.line += newlines
+self.char = ((len(tokvalue) - 1) -
+tokvalue.rfind('\n'))
+else:
+self.char += len(tokvalue)
+else:
+self.char += len(tokvalue)
+continue
+if found is not None:
+yield found
+else:
+raise TokenizationError('unrecognized token %s' %
+repr(self.text[self.pos]),
+self.line,
+self.char)
+def __init_tokens(tokens):
+for key, value in tokens.items():
+if isinstance(value, tuple):
+args = value
+else:
+args = (value,)
+tokens[key] = re.compile(*args)
+__init_tokens(TOKENS)
+class TokenizationError(Exception):
+def __init__(self, msg, line, char):
+Exception.__init__(self, msg)
+self.char = char
+self.line = line
+def __str__(self):
+return '%s @ line %d, char %d' % (self.args[0], self.line,
+self.char)
+if __name__ == '__main__':
+import os
+import sys
+text = open(os.path.expanduser(sys.argv[1]), 'r').read()
+t = Tokenizer(text)
+try:
+tokens = t.tokenize()
+for token in tokens:
+if token[:2] == ('name', 'require'):
+print token
+except TokenizationError, e:
+print e
+sys.exit(1)

Mercurial > js-scanner

comparison jsscan.py @ 0:daa1c6d996f3