See PythonTranslator for the origin of this exercise. ---- '''Original Java code:''' ''See also: CommentingChallengeResponsePartTwo'' import java.io.*; import java.util.*; /** * Like H''''''ttpUtils.parseQueryString, except that it never throws parse * exceptions. */ public class Q''''''ueryStringParser { private Dictionary _result = new Hashtable(); private I''''''nputStream _stream; private int _nextCharacter; public Q''''''ueryStringParser(I''''''nputStream stream) throws IOException { _stream = stream; _nextCharacter = stream.read(); } public Dictionary parseArgs() throws IOException { while (hasAnotherCharacter()) { parseNameValuePair(); } return _result; } private void parseNameValuePair() throws IOException { String name = readUpTo('='); String value = readUpTo('&'); _result.put(name, value); } private String readUpTo(char boundaryCharacter) throws IOException { String word = ""; while(hasAnotherCharacter()) { char character = readCharacter(); if (character == boundaryCharacter) return word; else if (character == '%') word += readHexEncodedCharacter(); else if (character == '+') word += " "; else word += character; } return word; } private String readHexEncodedCharacter() throws IOException { int sixteens = readHexDigit(); int ones = readHexDigit(); if ((sixteens < 0) || (ones < 0)) return ""; char character = (char)((16 * sixteens) + ones); return "" + character; } private int readHexDigit() throws IOException { if (!hasAnotherCharacter()) return -1; return Character.digit(readCharacter(), 16); } private boolean hasAnotherCharacter() { return (_nextCharacter >= 0); } private char readCharacter() throws IOException { if (!hasAnotherCharacter()) throw new I''''''llegalStateException("assertion failed"); char result = (char)_nextCharacter; _nextCharacter = _stream.read(); return result; } } '''Python port of the Java example:''' This is pretty much a line-for-line port of Java code that parses an HTTP query string. Differences between the two languages for this code snippet: * Python does not make you declare types or private/public. * Python makes you refer to class vars more explicitly. ("Self" is the Python/Smalltalk equivalent of "this" in C++/Java.) * Dictionaries are built into Python, so you get a little syntactic sugar. * Python does not make you declare what exceptions might be thrown. This code doesn't demonstrate any compelling advantages for Python over Java; instead, it simply shows that you can port from one language to the other without jumping through major hoops. You could, for example, prototype code like this in Python, then port the code to Java when you wanted stronger typing and more punctuation. ;} class queryStringParser: def __init__(self, stream): self._result = {} self._stream = stream self._nextCharacter = stream.read() def parseArgs(self): while self.hasAnotherCharacter(): self.parseNameValuePair() return self._result def parseNameValuePair(self): name = self.readUpTo('=') value = self.readUpTo('&') self._result[name] = value def readUpTo(self, boundaryCharacter): word = "" while self.hasAnotherCharacter(): character = self.readCharacter() if (character == boundaryCharacter): return word elif (character == '%'): word = word + self.readHexEncodedCharacter() elif (character == '+'): word = word + " " else: word = word + character return word def readHexEncodedCharacter(self): sixteens = self.readHexDigit() ones = self.readHexDigit() if (sixteens < 0 or ones < 0): return "" character = 16*sixteens + ones return "%c" % character def readHexDigit(self): if (not self.hasAnotherCharacter()): return -1 return Character().digit(self.readCharacter(), 16) def hasAnotherCharacter(self): return (self._nextCharacter is not None) def readCharacter(self): if (not self.hasAnotherCharacter()): raise "assertion failed" result = self._nextCharacter self._nextCharacter = self._stream.read() return result The following classes are an artifact of porting the original code from Java. If you were writing pure Python code from scratch, you might use file/string methods directly. class inputStream: def __init__(self): self.data = "search=find+stuff+here+%26+do+stuff&foo=bar" def read(self): try: c = self.data[0] self.data = self.data[1:] except: c = None return c class Character: def digit(self, c, base): try: return long(c, base) except: return -1 Extensive unit-testing:) qsp = queryStringParser(inputStream()) print qsp.parseArgs() Enjoy. -- SteveHowell ---- Here's a Python non-translation, 33 lines instead of 70. It depends on Python2 features: at least list comprehensions and string methods; and since Python's string type (like Java's String class) is immutable, I accumulate characters in a list to avoid O(N^2) behavior. It would be a little longer without those features. As it is, it's about half as long as the more literal implementation above. It also improves over the previous Python implementation in the following ways: * it does something reasonable on 'abc=def&ghi&jkl=mno' * the control flow is more straightforward, i.e. it's not spaghetti code * it reports hex encoding errors (by raising an exception) instead of silently discarding two characters * like the Java version, it lets ASCII NULs terminate the query-string reading --- admittedly this is a dubious feature, in the abstract, but presumably the original code had a reason for this dubious interface * it's not quadratic-time in the length of the input words * it works with the built-in Python input stream interface (sys.stdin implements the same interface as StringIO) and the built-in Python string type. import sys, StringIO, re # decode a URL string # URL-unescape def decode(astring): return re.sub('%(..)', lambda mo: chr(int(mo.group(1), 16)), astring.replace('+', ' ')) class queryStringParser: def __init__(self, input): self._stream = input def parseArgs(self): # read up to first \0 chars = [] while 1: c = self._stream.read(1) if c == '' or c == '\0': break chars.append(c) query_string = ''.join(chars) # parse rv = {} for name, value in [ pair.split('=', 1) for pair in query_string.split('&')]: rv[decode(name)] = decode(value) return rv qsp = queryStringParser( StringIO.StringIO("a=b&c=d+e&f=g=h&i=%2bjk%21l\0bad=man") ) print qsp.parseArgs() print qsp.parseArgs() ---- Here's an even shorter even cleaner idiomatic implementation.: import re class queryStringParser(list): def __init__(self, args): # [::-1] reverses the list so that pop() can be used self.extend(args.split("\0")[::-1]) def _decode(self, astring): def _convert(amatch): return chr(int(amatch.group(1), 16)) astring = astring.replace('+', ' ') return re.sub('%(..)', _convert, astring) def parseArgs(self): query_string = self.pop() # Throw Index''''''Error if called too often pairs = [pair.split('=', 1) for pair in query_string.split('&')] rv = [(self._decode(name), self._decode(val)) for name, val in pairs] return dict(rv) qsp = queryStringParser("a=b&c=d+e&f=g=h&i=%2bjk%21l\0bad=man") print qsp.parseArgs() print qsp.parseArgs() ---- ''Anyone care to add another translation of this program? How about Smalltalk?'' * QueryStringParserInRuby