Skip to content

Latest commit

 

History

History
538 lines (434 loc) · 12.8 KB

transforming-code-into-beautiful-idiomatic-python.md

File metadata and controls

538 lines (434 loc) · 12.8 KB

Transforming Code into Beautiful, Idiomatic Python

The following is my transcript of the video linked above.



When you see this, do that instead!

  • Replace traditional index manipulation with Python's core looping idioms
  • Learn advanced techniques with for-else clauses and the two argument form of iter()
  • Improve your craftsmanship and aim for clean, fast, idiomatic Python code

For each example, the last solution is the best.

Looping over a range of numbers

# This is a naive way to loop through a list of numbers.
for i in [0, 1, 2, 3, 4, 5]:
    print i**2

# In Python 2, this was inefficient. In Python 3, it's equivalent to xrange() in Python 2.
for i in range(6):
    print i**2

# In Python 2, xrange() is an iterator that is more memory efficient than the naive way.
for i in xrange(6):
    print i**2

Looping over a collection

colors = ['red', 'green', 'blue', 'yellow']

for i in range(len(colors)):
    print colors[i]

for color in colors:
    print color

Looping backwards

colors = ['red', 'green', 'blue', 'yellow']

for i in range(len(colors)-1, -1, -1):
    print colors[i]

for color in reversed(colors):
    print color

Looping over a collection and indices

colors = ['red', 'green', 'blue', 'yellow']

for i in range(len(colors)):
    print i, '-->', colors[i]

for i, color in enumerate(colors):
    print i, '-->', color

Looping over two collections

names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue', 'yellow']

for i in min(len(names), len(colors)):
    print i, '-->', colors[i]

# zip was in the first Lisp paper.
for name, color in zip(names, colors):
    print name, '-->', color

# izip uses an iterator to stay in L1 cache.
# In Python 3, zip() is izip().
for name, color in izip(names, colors):
    print name, '-->', color

Custom sort order

colors = ['red', 'green', 'blue', 'yellow']

def compare_length(c1, c2):
    if len(c1) < len(c2): return -1
    if len(c1) > len(c2): return 1
    return 0

print sorted(colors, cmp=compare_length)

print sorted(colors, key=len)

Call a function until a sentinel value

blocks = []
while True:
    block = f.read(32)
    if block == '':
        break
    block.append(block)

blocks = []
for block in iter(partial(f.read, 32), ''):
    blocks.append(block)

Distinguishing multiple exit points in loops

The for loop has an else clause.

def find(seq, target):
    found = False
    for i, value in enumerate(seq):
        if value == tgt:
            found = True
            break
        if not found:
            return -1
        return i

def find(seq, target):
    for i, value in enumerate(seq):
        if value == tgt:
            break
    else:
        return -1
    return i

Dictionary Skills

  • Mastering dictionaries is a fundamental Python skill

  • They are a fundamental tool for expressing relationships, linking, counting, and grouping

Looping over dictionary keys

d = {'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}

for k in d:
    print k

# Use .keys() when you need to mutate the dictionary.
for k in d.keys():
    if k.startswith('r'):
        del d[k]

Looping over a dictionary's keys and values

for k in d:
    print k, '-->', d[k]

for k, v in d.items():
    print k, '-->', v

# In Python 3, .iteritems() is just .items().
for k, v in d.iteritems():
    print k, '-->', v

Construct a dictionary from pairs

names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue', 'yellow']

# Combine 2 lists into a dictionary as keys and values.
d = dict(izip(names, colors))

d = dict(enumerate(names))

Counting with dictionaries

colors = ['red', 'green', 'red', 'blue', 'green', 'red']

d = {}
for color in colors:
    if color not in d:
        d[color] = 0
    d[color] += 1

d = {}
for color in colors:
    d[color] = d.get(color, 0) + 1

# A better way
from collections import defaultdict

d = defaultdict(int)
for color in colors:
    d[color] += 1

Grouping with dictionaries

names = ['raymond', 'rachel', 'mathew', 'roger', 'betty', 'melissa', 'judith', 'charlie']

d = {}
for name in names:
    key = len(name)
    if key not in d:
        d[key] = []
    d[key].append(name)

d = {}
for name in names:
    key = len(name)
    d.setdefault(key, []).append(name)

from collections import defaultdict

d = defaultdict(list)
for name in names:
    key = len(name)
    d[key].append(name)

Is a dictionary popitem() atomic?

d = {'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}

while d:
    key, value = d.popitem()
    print key, '-->', value

Linking dictionaries

defaults = {'color': red', 'user': 'guest'}
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--user')
parser.add_argument('-c', '--color')
namespace = parser.parse_args([])
command_line_args = {k:v for k, v in vars(namespace).items() if v}

d = defaults.copy()
d.update(os.environ)
d.update(command_line_args)

d = ChainMap(command_line_args, os.environ, defaults)

Improving Clarity

  • Positional arguments and indices are nice
  • Keywords and names are better
  • The first way is convenient for the computer
  • The second corresponds to how humans think

Clarify function calls with keyword arguments

twitter_search('@obama', False, 20, True)

twitter_search('@obama', retweets=False, numtweets=20, popular=True)

Clarify multiple return values with named tuples

doctest.testmod()
# Used to return (0, 4)

doctest.testmod()
# Now returns TestResults(failed=0, attempted=4)

TestResults = namedtuple('TestResults', ['failed', 'attempted'])

Unpacking sequences

p = 'Raymond', 'Hettinger', 0x30, '[email protected]'

fname = p[0]
lname = p[1]
age = p[2]
email = p[3]

fname, lname, age, email = p

Updating multiple state variables

def fibonacci(n):
    x = 0
    y = 1
    for i in range(n):
        print x
        t = y
        y = x + y
        x = t

def fibonacci(n):
    x, y = 0, 1
    for i in range(n):
        print x
        x, y = y, x+y

Tuple Packing & Unpacking

  • Don't undersestimate the advantages of updating state variables at the same time
  • It eliminates an entire class of errors due to out-of-order updates
  • It allows high level thinking: "chunking"

Simultaneous state updates

tmp_x = x + dx * t
tmp_y = y + dy * t
tmp_dx = influence(m, x, y, dx, dy, partial='x')
tmp_dy = influence(m, x, y, dx, dy, partial='y')
x = tmp_x
y = tmp_y
dx = tmp_dx
dy = tmp_dy

x, y, dx, dy = (x + dx * t,
                y + dy * t,
                influence(m, x, y, dx, dy, partial='x'),
                influence(m, x, y, dx, dy, partial='y'))

Efficiency

  • An optimization fundamental rule
  • Don't cause data to move around unnecessarily
  • It takes only a little care to avoid O(n**2) behavior instead of linear behavior

Concatenating strings

names = ['raymond', 'rachel', 'matthew', 'roger', 'betty', 'melissa', 'judith', 'charlie']

s = names[0]
for name in names[1:]:
    s += ', ' + name
print s

print ', '.join(names)

Updating sequences

names = ['raymond', 'rachel', 'matthew', 'roger', 'betty', 'melissa', 'judith', 'charlie']

del names[0]
names.pop(0)
names.insert(0, 'mark')

names = deque(names)

del names[0]
names.popleft(0)
names.insertleft('mark')

Decorators & Context Managers

  • Helps separate business logic from administrative logic
  • Clean, beautiful tools for factoring code and improving code reuse
  • Good naming is essential.
  • Remember the Spiderman rule: With great power, comes great responsibility!

Using Decorators to Factor Out Administrative Logic

def web_lookup(url, saved={}):
    if url in saved:
        return saved[url]
    page = urllib.urlopen(url).read()
    saved[url] = page
    return page

@cache
def web_lookup(url):
    return urllib.urlopen(url).read()

def cache(func):
    saved = {}
    @wraps(func)
    def newfunc(*args):
        if args in saved:
            return newfunc(*args)
        result = func(*args)
        saved[args] = result
        return result
    return newfunc

Factor out temporary contexts: precision

old_context = getcontext().copy()
getcontext().prec = 50
print Decimal(355) / Decimal(113)
setcontext(old_context)

with localcontext(Context(prec=50)):
    print Decimal(355) / Decimal(113)

How to open and close files

f = open('data.txt')
try:
    data = f.read()
finally:
    f.close()

with open('data.txt') as f:
    data = f.read()

How to use locks

# Make a lock
lock = threading.Lock()

# Old way to use a lock
lock.acquire()
try:
    print 'Critical section 1'
    print 'Critical section 2'
finally:
    lock.release()

# New way to use a lock
with lock:
    print 'Critical section 1'
    print 'Critical section 2'

Factor out temporary contexts: suppress

try:
    os.remove('somefile.tmp')
except OSError:
    pass

# A better way
with ignored(OSError):
    os.remove('somefile.tmp')

@contextmanager
def ignored(*exceptions):
    try:
        yield
    except exceptions
        pass

# In Python 3, check out suppress().
from contextlib import suppress
with suppress(OSError)
    os.remove('foo.txt')

Factor out temporary contexts: stdout

with open('help.txt', 'w') as f:
    oldstdout = sys.stdout
    sys.stdout = f
    try:
        help(pow)
    finally:
        sys.stdout = oldstdout

# A better way
with open('help.txt', 'w') as f:
    with redirect_stdout(f):
        help(pow)

@contextmanager
def redirect_stdout(fileobj):
    oldstdout = sys.stdout
    sys.stdout = fileobj
        try:
    yield fieldobj
        finally:
    sys.stdout = oldstdout

Concise Expressive One Liners

Two conflicting rules:

  1. Don't put too much on one line
  2. Don't break atoms of thought into subatomic particles

Raymond's rule: One logical line of code equals one sentence in English.

List Comprehensions & Generator Expressions

result = []
for i in range(10):
    s = i ** 2
    result.append(s)
print sum(result)

# A better way
print sum([i**2 for i in xrange(10)])

# An even better way
print sum(i**2 for i in xrange(10))