Source code for partpy.sourcestring

"""SourceString stores the entire string to be parsed in memory and provides
some simple methods for retrieving and moving current position aswell as
methods for matching strings and patterns.
"""
__author__ = 'Taylor "Nekroze" Lawson'
__email__ = 'nekroze@eturnilnetwork.com'


[docs]class SourceString(object): """Stores the parse string and its length followed by current position in the string and if the end of the string has been reached. It also stores the current row and column position as manually counted. Provides multiple methods for matching strings and patterns and working with the source string. """
[docs] def __init__(self, string=None): """Accepts a string or None by default. If a string is given then self.set_string(string) is run automatically. If you wish to load a file then create a SourceString object with no arguments and then use load_file or overload this function when inheriting from SourceString. """ self.string = '' self.length = 0 self.pos = 0 self.col = 0 self.row = 1 self.eos = 0 if string is not None: self.set_string(string)
[docs] def load_file(self, filename): """Read in file contents and set the current string.""" with open(filename, 'r') as sourcefile: self.set_string(sourcefile.read())
[docs] def set_string(self, string): """Set the working string and its length then reset positions.""" self.string = string self.length = len(string) self.reset_position()
[docs] def add_string(self, string): """Add to the working string and its length and reset eos.""" self.string += string self.length += len(string) self.eos = 0
[docs] def reset_position(self): """Reset all current positions.""" self.pos = 0 self.col = 0 self.row = 1 self.eos = 0
[docs] def has_space(self, length=1, offset=0): """Returns boolean if self.pos + length < working string length.""" return self.pos + (length + offset) - 1 < self.length
[docs] def eol_distance_next(self, offset=0): """Return the amount of characters until the next newline.""" distance = 0 for char in self.string[self.pos + offset:]: if char == '\n': break else: distance += 1 return distance
[docs] def eol_distance_last(self, offset=0): """Return the ammount of characters until the last newline.""" distance = 0 for char in reversed(self.string[:self.pos + offset]): if char == '\n': break else: distance += 1 return distance
[docs] def spew_length(self, length): """Move current position backwards by length.""" pos = self.pos if not pos or length > pos: return None row = self.row for char in reversed(self.string[pos - length:pos]): pos -= 1 if char == '\n': # handle a newline char row -= 1 self.pos = pos self.col = self.eol_distance_last() self.row = row if self.has_space(): # Set eos if there is no more space left. self.eos = 0
[docs] def eat_length(self, length): """Move current position forward by length and sets eos if needed.""" pos = self.pos if self.eos or pos + length > self.length: return None col = self.col row = self.row for char in self.string[pos:pos + length]: col += 1 pos += 1 if char == '\n': # handle a newline char col = 0 row += 1 self.pos = pos self.col = col self.row = row if not self.has_space(): # Set eos if there is no more space left. self.eos = 1
[docs] def eat_string(self, string): """Move current position by length of string and count lines by \n.""" pos = self.pos if self.eos or pos + len(string) > self.length: return None col = self.col row = self.row for char in string: col += 1 pos += 1 if char == '\n': # handle a newline char col = 0 row += 1 self.pos = pos self.col = col self.row = row if not self.has_space(): # Set eos if there is no more space left. self.eos = 1
[docs] def eat_line(self): """Move current position forward until the next line.""" if self.eos: return None eat_length = self.eat_length get_char = self.get_char has_space = self.has_space while has_space() and get_char() != '\n': eat_length(1) eat_length(1)
[docs] def get_char(self, offset=0): """Return the current character in the working string.""" if not self.has_space(offset=offset): return '' return self.string[self.pos + offset]
[docs] def get_length(self, length, trim=0, offset=0): """Return string at current position + length. If trim == true then get as much as possible before eos. """ if trim and not self.has_space(offset + length): return self.string[self.pos + offset:] elif self.has_space(offset + length): return self.string[self.pos + offset:self.pos + offset + length] else: return ''
[docs] def get_string(self, offset=0): """Return non space chars from current position until a whitespace.""" if not self.has_space(offset=offset): return '' # Get a char for each char in the current string from pos onward # solong as the char is not whitespace. string = self.string pos = self.pos + offset for i, char in enumerate(string[pos:]): if char.isspace(): return string[pos:pos + i] else: return string[pos:]
[docs] def rest_of_string(self, offset=0): """A copy of the current position till the end of the source string.""" if self.has_space(offset=offset): return self.string[self.pos + offset:] else: return ''
[docs] def get_line(self, lineno): """Return any line as a SourceLine and None if lineno doesnt exist.""" line = 0 output = [] for char in self.string: if line == lineno: output.append(char) elif line > lineno: break if char == '\n': line += 1 if not output: return None return SourceLine(''.join(output), lineno)
[docs] def get_current_line(self): """Return a SourceLine of the current line.""" if not self.has_space(): return None pos = self.pos - self.col string = self.string end = self.length output = [] while pos < len(string) and string[pos] != '\n': output.append(string[pos]) pos += 1 if pos == end: break else: output.append(string[pos]) if not output: return None return SourceLine(''.join(output), self.row)
[docs] def get_lines(self, first, last): """Return SourceLines for lines between and including first & last.""" line = 1 linestring = [] linestrings = [] for char in self.string: if line >= first and line <= last: linestring.append(char) if char == '\n': linestrings.append((''.join(linestring), line)) linestring = [] elif line > last: break if char == '\n': line += 1 if linestring: linestrings.append((''.join(linestring), line)) elif not linestrings: return None return [SourceLine(string, lineno) for string, lineno in linestrings]
[docs] def get_surrounding_lines(self, past=1, future=1): """Return the current line and x,y previous and future lines. Returns a list of SourceLine's. """ string = self.string pos = self.pos - self.col end = self.length row = self.row linesback = 0 while linesback > -past: if pos <= 0: break elif string[pos - 2] == '\n': linesback -= 1 pos -= 1 output = [] linestring = [] lines = future + 1 while linesback < lines: if pos >= end: linestring.append(string[pos - 1]) output.append( SourceLine(''.join(linestring[:-1]), row + linesback)) break elif string[pos] == '\n': linestring.append(string[pos]) pos += 1 output.append( SourceLine(''.join(linestring), row + linesback)) linesback += 1 linestring = [] linestring.append(string[pos]) pos += 1 return output
[docs] def get_all_lines(self): """Return all lines of the SourceString as a list of SourceLine's.""" output = [] line = [] lineno = 1 for char in self.string: line.append(char) if char == '\n': output.append(SourceLine(''.join(line), lineno)) line = [] lineno += 1 if line: output.append(SourceLine(''.join(line), lineno)) return output
[docs] def match_string(self, string, word=0, offset=0): """Returns 1 if string can be matches against SourceString's current position. If word is >= 1 then it will only match string followed by whitepsace. """ if word: return self.get_string(offset) == string return self.get_length(len(string), offset) == string
[docs] def match_any_string(self, strings, word=0, offset=0): """Attempts to match each string in strings in order. Will return the string that matches or an empty string if no match. If word arg >= 1 then only match if string is followed by a whitespace which is much higher performance. If word is 0 then you should sort the strings argument yourself by length. """ if word: current = self.get_string(offset) return current if current in strings else '' current = '' currentlength = 0 length = 0 for string in strings: length = len(string) if length != currentlength: current = self.get_length(length, offset) if string == current: return string return ''
[docs] def match_any_char(self, chars, offset=0): """Match and return the current SourceString char if its in chars.""" if not self.has_space(offset=offset): return '' current = self.string[self.pos + offset] return current if current in chars else ''
[docs] def match_string_pattern(self, first, rest=None, least=1, offset=0): """Match each char sequentially from current SourceString position until the pattern doesnt match and return all maches. Integer argument least defines and minimum amount of chars that can be matched. If rest is defined then first is used only to match the first arg and the rest of the chars are matched against rest. """ if not self.has_space(offset=offset): return '' firstchar = self.string[self.pos + offset] if not firstchar in first: return '' output = [firstchar] pattern = first if rest is None else rest for char in self.string[self.pos + offset + 1:]: if char in pattern: output.append(char) else: break if len(output) < least: return '' return ''.join(output)
[docs] def match_function_pattern(self, first, rest=None, least=1, offset=0): """Match each char sequentially from current SourceString position until the pattern doesnt match and return all maches. Integer argument least defines and minimum amount of chars that can be matched. This version takes functions instead of string patterns. Each function must take one argument, a string, and return a value that can be evauluated as True or False. If rest is defined then first is used only to match the first arg and the rest of the chars are matched against rest. """ if not self.has_space(offset=offset): return '' firstchar = self.string[self.pos + offset] if not first(firstchar): return '' output = [firstchar] pattern = first if rest is None else rest for char in self.string[self.pos + offset + 1:]: if pattern(char): output.append(char) else: break if len(output) < least: return '' return ''.join(output)
[docs] def count_indents(self, spacecount, tabs=0, offset=0): """Counts the number of indents that can be tabs or spacecount number of spaces in a row from the current line. """ if not self.has_space(offset=offset): return 0 spaces = 0 indents = 0 for char in self.string[self.pos + offset - self.col:]: if char == ' ': spaces += 1 elif tabs and char == '\t': indents += 1 spaces = 0 else: break if spaces == spacecount: indents += 1 spaces = 0 return indents
[docs] def count_indents_length(self, spacecount, tabs=0, offset=0): """Counts the number of indents that can be tabs or spacecount number of spaces in a row from the current line. Also returns the character length of the indents. """ if not self.has_space(offset=offset): return 0 spaces = 0 indents = 0 charlen = 0 for char in self.string[self.pos + offset - self.col:]: if char == ' ': spaces += 1 elif tabs and char == '\t': indents += 1 spaces = 0 else: break charlen += 1 if spaces == spacecount: indents += 1 spaces = 0 return (indents, charlen)
[docs] def count_indents_last_line(self, spacecount, tabs=0, back=5): """Finds the last meaningful line and returns its indent level. Back specifies the amount of lines to look back for a none whitespace line. """ if not self.has_space(): return 0 lines = self.get_surrounding_lines(back, 0) for line in reversed(lines): if not line.string.isspace(): return line.count_indents(spacecount, tabs) return 0
[docs] def count_indents_length_last_line(self, spacecount, tabs=0, back=5): """Finds the last meaningful line and returns its indent level and character length. Back specifies the amount of lines to look back for a none whitespace line. """ if not self.has_space(): return 0 lines = self.get_surrounding_lines(back, 0) for line in reversed(lines): if not line.string.isspace(): return line.count_indents_length(spacecount, tabs) return (0, 0)
[docs] def skip_whitespace(self, newlines=0): """Moves the position forwards to the next non newline space character. If newlines >= 1 include newlines as spaces. """ if newlines: while not self.eos: if self.get_char().isspace(): self.eat_length(1) else: break else: char = '' while not self.eos: char = self.get_char() if char.isspace() and char != '\n': self.eat_length(1) else: break
[docs] def __repr__(self): """Returns the entire base string. Called from the repr() builtin.""" return self.string
[docs] def __getitem__(self, index): """Returns the character at the given index. Called by SourceString[index] where index is an integer. """ return self.string[index]
def __delitem__(self, index): del self.string[index] def __setitem__(self, index, value): self.string[index] = value
[docs] def __len__(self): """Returns the length of base string. Called by len(SourceString).""" return len(self.string)
[docs] def __contains__(self, string): """Returns a boolean if the given string is within the base string. Called by 'word' in SourceString. """ return string in self.string
[docs] def __iter__(self): """Yields the current char and moves the position onwards until eos.""" string = self.string while not self.eos: yield string[self.pos] self.eat_length(1)
[docs]class SourceLine(SourceString): """Contains an entire line of a source with handy line specific methods.""" def __init__(self, string, lineno): # pylint: disable=W0231 self.string = '' self.length = 0 self.set_string(string) self.lineno = lineno # pylint: enable=W0231
[docs] def strip_trailing_ws(self): """Remove trailing whitespace from internal string.""" self.string = self.string.rstrip()
[docs] def get_first_char(self): """Return the first non-whitespace character of the line.""" for char in self.string: if not char.isspace(): return char
[docs] def get_last_char(self): """Return the last non-whitespace character of the line.""" for char in reversed(self.string): if not char.isspace(): return char
[docs] def pretty_print(self, carrot=False): """Return a string of this line including linenumber. If carrot is True then a line is added under the string with a carrot under the current character position. """ lineno = self.lineno padding = 0 if lineno < 1000: padding = 1 if lineno < 100: padding = 2 if lineno < 10: padding = 3 string = str(lineno) + (' ' * padding) + '|' + self.string if carrot: string += '\n' + (' ' * (self.col + 5)) return string
def __str__(self): return self.pretty_print()

Project Versions

This Page