import re import string escapedQuotesRE = re.compile(r"(\\\\|\\\"|\\\')") stringsAndCommentsRE = \ re.compile("(\"\"\".*?\"\"\"|'''.*?'''|\"[^\"]*\"|\'[^\']*\'|#.*?\n)", re.DOTALL) allchars = string.maketrans("", "") allcharsExceptNewline = allchars[: allchars.index('\n')]+allchars[allchars.index('\n')+1:] allcharsExceptNewlineTranstable = string.maketrans(allcharsExceptNewline, '*'*len(allcharsExceptNewline)) # replaces all chars in a string or a comment with * (except newlines). # this ensures that text searches don't mistake comments for keywords, and that all # matches are in the same line/comment as the original def maskStringsAndComments(src): src = escapedQuotesRE.sub("**", src) allstrings = stringsAndCommentsRE.split(src) # every odd element is a string or comment for i in xrange(1, len(allstrings), 2): if allstrings[i].startswith("'''")or allstrings[i].startswith('"""'): allstrings[i] = allstrings[i][:3]+ \ allstrings[i][3:-3].translate(allcharsExceptNewlineTranstable)+ \ allstrings[i][-3:] else: allstrings[i] = allstrings[i][0]+ \ allstrings[i][1:-1].translate(allcharsExceptNewlineTranstable)+ \ allstrings[i][-1] return "".join(allstrings) # performs a transformation on all of the comments and strings so that # text searches for python keywords won't accidently find a keyword in # a string or comment def maskPythonKeywordsInStringsAndComments(src): src = escapedQuotesRE.sub("**", src) allstrings = stringsAndCommentsRE.split(src) # every odd element is a string or comment for i in xrange(1, len(allstrings), 2): allstrings[i] = allstrings[i].upper() return "".join(allstrings) # replaces all chars in a string or a comment with * (except newlines). # this ensures that text searches don't mistake comments for keywords, and that all # matches are in the same line/comment as the original def maskStringsAndRemoveComments(src): src = escapedQuotesRE.sub("**", src) allstrings = stringsAndCommentsRE.split(src) # every odd element is a string or comment for i in xrange(1, len(allstrings), 2): if allstrings[i].startswith("'''")or allstrings[i].startswith('"""'): allstrings[i] = allstrings[i][:3]+ \ allstrings[i][3:-3].translate(allcharsExceptNewlineTranstable)+ \ allstrings[i][-3:] elif allstrings[i].startswith("#"): allstrings[i] = '\n' else: allstrings[i] = allstrings[i][0]+ \ allstrings[i][1:-1].translate(allcharsExceptNewlineTranstable)+ \ allstrings[i][-1] return "".join(allstrings) implicitContinuationChars = (('(', ')'), ('[', ']'), ('{', '}')) emptyHangingBraces = [0,0,0,0,0] linecontinueRE = re.compile(r"\\\s*(#.*)?$") multiLineStringsRE = \ re.compile("(^.*?\"\"\".*?\"\"\".*?$|^.*?'''.*?'''.*?$)", re.DOTALL) #def splitLogicalLines(src): # src = multiLineStringsRE.split(src) # splits the string into logical lines. This requires the comments to # be removed, and strings masked (see other fns in this module) def splitLogicalLines(src): physicallines = src.splitlines(1) return [x for x in generateLogicalLines(physicallines)] class UnbalancedBracesException: pass # splits the string into logical lines. This requires the strings # masked (see other fns in this module) # Physical Lines *Must* start on a non-continued non-in-a-comment line # (although detects unbalanced braces) def generateLogicalLines(physicallines): tmp = [] hangingBraces = list(emptyHangingBraces) hangingComments = 0 for line in physicallines: # update hanging braces for i in range(len(implicitContinuationChars)): contchar = implicitContinuationChars[i] numHanging = hangingBraces[i] hangingBraces[i] = numHanging+line.count(contchar[0]) - \ line.count(contchar[1]) hangingComments ^= line.count('"""') % 2 hangingComments ^= line.count("'''") % 2 if hangingBraces[0] < 0 or \ hangingBraces[1] < 0 or \ hangingBraces[2] < 0: raise UnbalancedBracesException() if linecontinueRE.search(line): tmp.append(line) elif hangingBraces != emptyHangingBraces: tmp.append(line) elif hangingComments: tmp.append(line) else: tmp.append(line) yield "".join(tmp) tmp = [] # see above but yields (line,linenum) # needs physicallines to have linenum attribute # TODO: refactor with previous function def generateLogicalLinesAndLineNumbers(physicallines): tmp = [] hangingBraces = list(emptyHangingBraces) hangingComments = 0 linenum = None for line in physicallines: if tmp == []: linenum = line.linenum # update hanging braces for i in range(len(implicitContinuationChars)): contchar = implicitContinuationChars[i] numHanging = hangingBraces[i] hangingBraces[i] = numHanging+line.count(contchar[0]) - \ line.count(contchar[1]) hangingComments ^= line.count('"""') % 2 hangingComments ^= line.count("'''") % 2 if linecontinueRE.search(line): tmp.append(line) elif hangingBraces != emptyHangingBraces: tmp.append(line) elif hangingComments: tmp.append(line) else: tmp.append(line) yield "".join(tmp),linenum tmp = [] # takes a line of code, and decorates it with noops so that it can be # parsed by the python compiler. # e.g. "if foo:" -> "if foo: pass" # returns the line, and the adjustment made to the column pos of the first char # line must have strings and comments masked # # N.B. it only inserts keywords whitespace and 0's notSpaceRE = re.compile("\s*(\S)") commentRE = re.compile("#.*$") def makeLineParseable(line): return makeLineParseableWhenCommentsRemoved(commentRE.sub("",line)) def makeLineParseableWhenCommentsRemoved(line): line = line.strip() if ":" in line: if line.endswith(":"): line += " pass" if line.startswith("try"): line += "\nexcept: pass" elif line.startswith("except") or line.startswith("finally"): line = "try: pass\n" + line return line elif line.startswith("else") or line.startswith("elif"): line = "if 0: pass\n" + line return line elif line.startswith("yield"): return ("return"+line[5:]) return line wordRE = re.compile("\w+") def isWordInLine(word, line): if line.find(word) != -1: words = wordRE.findall(line) if word in words: return 1 return 0 # translates an idx in a logical line into physical line coordinates # -*- coding: iso-8859-1 -*- # # Bicycle Repair Man - the Python Refactoring Browser # Copyright (C) 2001-2006 Phil Dawes # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. # returns x and y coords def indexToCoordinates(src, index): y = src[:index].count("\n") startOfLineIdx = src.rfind("\n", 0, index)+1 x = index-startOfLineIdx return x, y # changess \" \' and \\ into ** so that text searches # for " and ' won't hit escaped ones def maskEscapedQuotes(src): return escapedQuotesRE.sub("**", src)