python cookbook 阅读笔记

2020/06/05 技术

python cookbook 阅读笔记

第一章:数据结构和算法

1.1 队列 deque:

from collections import deque

# 保留最后 10 个元素
previous_lines = deque(maxlen=10)

# 队列
q = deque([1, 2, 3])

q.appendleft(4)
q.popleft()

q.append(10)
q.pop(2)

1.2 查找最大或最小的 N 个元素

import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23] 
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

heapq 可以实现优先级队列.

1.3 Counter 找出 序列中出现次数最多的元素

from collections import Counter 

words = []
for index in range(10):
    for _ in range(index + 1):
        words.append("index-{}".format(index))

# 出现频率最高的 3 个单词 
word_counts = Counter(words) 
top_three = word_counts.most_common(3) 
print(top_three)

第二章:字符串和文本

2.1 使用多个界定符分割字符串

import re
line = 'asdf fjdk; afed, fjek,asdf, foo' 
print(re.split(r'[;,\s]\s*', line))  # ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

# 保留分隔符
fields = re.split(r'(;|,|\s)\s*', line)
print(fields)  # ['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']

2.2 fnmath 实现 shell 通配符匹配功能


from fnmatch import fnmatch, fnmatchcase

print(fnmatch('foo.txt', '*.txt'))  # True

print(fnmatch('foo.txt', '?oo.txt'))  # True

print(fnmatch('Dat45.csv', 'Dat[0-9]*'))  # True

# On Windows
print(fnmatch('foo.txt', '*.TXT'))  # True

# 完全匹配
print(fnmatchcase('foo.txt', '*.TXT'))  # False

2.3 字符串搜索和替换


import re
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'

print(re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text))  # 'Today is 2012-11-27. PyCon starts 2013-3-13.'

忽略字符串大小写的搜索替换

import re

text = 'UPPER PYTHON, lower python, Mixed Python'
print(re.findall('python', text, flags=re.IGNORECASE))  # ['PYTHON', 'python', 'Python']
print(re.sub('python', 'snake', text, flags=re.IGNORECASE))  # 'UPPER snake, lower snake, Mixed snake'

进阶: 替换字符串自动跟被匹配字符串的大小写保持一致


import re

def matchcase(word):
    def replace(m):
        text = m.group()
        if text.isupper():
            return word.upper()
        elif text.islower():
            return word.lower()
        elif text[0].isupper():
            return word.capitalize()
        else:
            return word
    return replace      

text = 'UPPER PYTHON, lower python, Mixed Python'
print(re.sub('python', matchcase('snake'), text, flags=re.IGNORECASE)) 
# UPPER SNAKE, lower snake, Mixed Snake

多行匹配:


import re

text1 = '/* this is a comment */'
text2 = '''/* this is a
multiline comment */
'''

# 不能匹配换行符 
re.compile(r'/\*(.*?)\*/').findall(text1)  # [' this is a comment ']
re.compile(r'/\*(.*?)\*/').findall(text2)  # []

# 匹配多行

# 方法一: 指定了一个非捕获组匹配
re.compile(r'/\*((?:.|\n)*?)\*/').findall(text2)
# [' this is a\n multiline comment ']

# 方法二: re.DOTALL
re.compile(r'/\*(.*?)\*/', re.DOTALL).findall(text2)
# [' this is a\n multiline comment ']

第四章:迭代器与生成器

4.1 手动遍历迭代器


def manual_iter():
    with open('/etc/passwd') as f:
        try:
            while True:
                line = next(f)
                print(line, end='')
        except StopIteration:
            pass

4.2 迭代器其他

  • 关键魔法方法: __iter__(self, )__next__(self, )
  • 反向迭代: __reversed__(self, ), 使用 reversed(iter)
  • 同时迭代多个序列: for x, y in zip(x_iter, y_iter)
  • 依次迭代多个序列: for x in itertools.chain(x1_iter, x2_iter, x3_iter)
  • 创建数据处理管道: 使用迭代器能简化逻辑
  • 展开嵌套的序列: yield from

顺序迭代合并后的排序迭代对象: 性能更好


import heapq
a = [1, 4, 7, 10]
b = [2, 5, 6, 11]
for c in heapq.merge(a, b):
    print(c)

# 1 2 4 5 6 7 10 11

第八章:类与对象

8.1 创建新的类或实例属性


# Descriptor attribute for an integer type-checked attribute
class Integer:
    def __init__(self, name):
        self.name = name

    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            return instance.__dict__[self.name]
    
    def __set__(self, instance, value):
        if not isinstance(value, int):
            raise TypeError('Expected an int')
        instance.__dict__[self.name] = value
    
    def __delete__(self, instance):
        del instance.__dict__[self.name]            

class Point: 
    x = Integer('x') 
    y = Integer('y')
    
    def __init__(self, x, y):
        self.x = x
        self.y = y      

p = Point(2, 3)
print(p.x) # Calls Point.x.__get__(p,Point)

p.y = 5 # Calls Point.y.__set__(p, 5)
p.x = 2.3 # Calls Point.x.__set__(p, 2.3)
# raise TypeError

8.2 使用延迟计算属性

import math


class lazyproperty:
    def __init__(self, func):
        self.func = func
    
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            value = self.func(instance)
        
        setattr(instance, self.func.__name__, value)
        return value

class Circle:
    def __init__(self, radius):
        self.radius = radius

    @lazyproperty
    def area(self):
        print('Computing area')
        return math.pi * self.radius ** 2

    @lazyproperty
    def perimeter(self):
        print('Computing perimeter')
        return 2 * math.pi * self.radius

# 使用
c = Circle(4.0)

print(vars(c))  # {'radius': 4.0}

c.area
print(vars(c))  # {'area': 50.26548245743669, 'radius': 4.0}

del c.area
print(vars(c))  # {'radius': 4.0}

8.3 实现自定义容器

你想实现一个自定义的类来模拟内置的容器类功能,比如列表和字典。但是你不确定到底要实现哪些方法。

collections 定义了很多抽象基类,当你想自定义容器类的时候它们会非常有用。

比如你想让你的类支持迭代,那就让你的类继承 collections.Iterable 即可:

import collections


class A(collections.Iterable):
    pass    

8.4 利用 Mixins 扩展类功能


class LoggedMappingMixin:
    """
    Add logging to get/set/delete operations for debugging.
    """
    __slots__ = () # 混入类都没有实例变量,因为直接实例化混入类没有任何意义
    
    def __getitem__(self, key):
        print('Getting ' + str(key))
        return super().__getitem__(key)
    
    def __setitem__(self, key, value):
        print('Setting {} = {!r}'.format(key, value))
        return super().__setitem__(key, value)
    
    def __delitem__(self, key):
        print('Deleting ' + str(key))
        return super().__delitem__(key)

8.5 创建缓存实例


import weakref


class CachedSpamManager:
    def __init__(self):
        self._cache = weakref.WeakValueDictionary()

    def get_spam(self, name):
        if name not in self._cache:
            s = Spam._new(name)
            self._cache[name] = s
        else:
            s = self._cache[name]
        return s

    def clear(self):
        self._cache.clear()

class Spam:
    def __init__(self, *args, **kwargs):
        raise RuntimeError("Can't instantiate directly")
    
    # Alternate constructor
    @classmethod
    def _new(cls, name):
        self = cls.__new__(cls)
        self.name = name
        return self

if __name__ == '__main__':
    cache = CachedSpamManager()
    a = cache.get_spam("a")
    b = cache.get_spam("b")
    c = cache.get_spam("a")
    print(a)
    print(b)
    print(c)
    assert a is not b
    assert a is c

TODO

page 301

Search

    Table of Contents