基础数据结构与算法
collections.abc
容器数据类型抽象基类,collections.abc为Python标准库
collections
容器数据类型,collections为Python标准库
命名元组
from collections import namedtuple
Card = namedtuple('card', ['suits', 'number'])
c1 = Card('红桃', 2)
print(c1, c1.number, c1.suits)
from random import choice, shuffle
from collections import namedtuple
_Card = namedtuple('Card', ['rank', 'suit'])
class Poker:
ranks = [str(n) for n in range(2, 11)] + list('JQKA')
suits = ['红心', '方片', '梅花', '黑桃']
def __init__(self):
self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]
def __len__(self):
return self._cards.__len__()
def __getitem__(self, item):
return self._cards[item]
def __setitem__(self, key, value):
self._cards[key] = value
cards = Poker()
print(cards[3])
print(len(cards))
shuffle(cards) # 会改变每个索引对应的值,需要__setitem__
print(cards[3])
print(cards[:3])
print(choice(cards))
print(choice(cards))
双端队列
from collections import deque
dq = deque()
dq.append(1)
dq.append(2)
print(dq.popleft()) # 1
print(dq.pop()) # 2
计数器
可哈希
from collections import Counter
items = ['red', 'blue', 'red', 'green', 'blue', 'blue']
print(Counter(items)) # Counter({'blue': 3, 'red': 2, 'green': 1})
带默认工厂字典
from collections import defaultdict
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = defaultdict(list)
for k, v in s:
d[k].append(v) # 会有一个None到[]的自动变换,否则会报错,处理空值
# default_factory为可调用对象,用于产生一个默认值
print(sorted(d.items())) # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
dd = defaultdict(lambda: 0)
print(dd['key']) # 0
构建树结构
from collections import defaultdict
Tree = lambda: defaultdict(Tree)
有序字典
有序字典中元素排列顺序与插入顺序保持一致,Python3.6及以前dict无序,Python3.7及以后有序
from collections import OrderedDict
d = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)
od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)
ChainMap
倒序迭代
from collections import ChainMap
baseline = {'music': 'bach', 'art': 'rembrandt'}
adjustments = {'art': 'van gogh', 'opera': 'carmen'}
cm = ChainMap(adjustments, baseline)
for i in cm:
print(i, end=" ") # music art opera
print(list(cm)) # ['music', 'art', 'opera']
print(cm.get('art')) # van gogh
UserString、UserList、UserDict
当需要对内置数据类型str、list、dict进行拓展,并且想要重写其某些标准功能时,应该使用UserDict, UserList, 和UserString,而不是直接继承str、list、dict
from collections import UserString, UserList, UserDict
us = UserString("abc")
print(us)
print(us.data)
ul = UserList([1, 2, 3])
print(ul)
print(ul.data)
ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
print(ud)
print(ud.data)
实现一个自动转大写key的字段,初始化、[]设置属性__setitem__、update分别需要重写,若使用UserDict只需重写__setitem__
class UpperDict(dict):
def __init__(self, data):
data = {k.upper(): v for k, v in data.items()}
super().__init__(data)
def __setitem__(self, key, value):
key = key.upper()
super().__setitem__(key, value)
def update(self, data): # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配
data = {k.upper(): v for k, v in data.items()}
super().update(data)
d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
class UpperDict(UserDict):
def __setitem__(self, key, value):
key = key.upper()
super().__setitem__(key, value)
d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
queue
queue为Python标准库,同步队列,线程安全
import queue
q = queue.Queue()
print(q.put("1")) # None
print(q.qsize()) # 1
print(q.get()) # 1
print(q.put_nowait("2")) # None
print(q.get_nowait()) # 2
# 后进先出队列
lifo_queue = queue.LifoQueue()
print(lifo_queue.put(1)) # None
print(lifo_queue.put(2)) # None
print(lifo_queue.get()) # 2
# 优先队列,最小的先出
pq = queue.PriorityQueue()
pq.put(3)
pq.put(1)
print(pq.get()) # 1
pq.put(4)
pq.put(0)
print(pq.get()) # 0
enum
enum为Python标准库,枚举
from enum import Enum
class Color(Enum):
RED = 1
GREEN = 2
BLUE = 3
print(Color.RED.name, Color.RED.value) # RED 1
枚举的另一种写法,节选自 Python 标准库 http
from enum import IntEnum
class HTTPStatus(IntEnum):
"""
HTTP status codes and reason phrases
"""
def __new__(cls, value, phrase, description=''):
obj = int.__new__(cls, value)
obj._value_ = value
obj.phrase = phrase
obj.description = description
return obj
# informational
CONTINUE = 100, 'Continue', 'Request received, please continue'
SWITCHING_PROTOCOLS = (101, 'Switching Protocols', 'Switching to new protocol; obey Upgrade header')
PROCESSING = 102, 'Processing'
EARLY_HINTS = 103, 'Early Hints'
print(HTTPStatus.CONTINUE.value)
array
array为Python标准库,数字数组,纯数字操作比list效率高
from array import array
a = array('l', [1, 2, 3, 4, 5])
print(a, type(a))
heapq
heapq为Python标准库,堆队列算法(优先队列算法),堆使用小根堆,基于数组实现
import heapq
import operator
l = [5, 3, 4, 2, 1]
# 堆化
heapq.heapify(l)
print(l) # [1, 2, 4, 5, 3]
print(l[0]) # 1 第一个元素为小根堆最小值
# 入堆
heapq.heappush(l, 6)
print(l) # [1, 2, 4, 5, 3, 6]
# 出堆
print(heapq.heappop(l)) # 1
# 先入后出,比heappush+heappop高效
print(heapq.heappushpop(l, 0)) # 0
print(l) # [2, 3, 4, 5, 6]
# 先出后入,比heappop+heappush高效
print(heapq.heapreplace(l, 0)) # 2
print(l) # [0, 3, 4, 5, 6]
# 获取迭代器最大的n个元素
print(heapq.nlargest(10, range(100, 200, 5))) # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]
# 获取迭代器最小的n个元素
print(heapq.nsmallest(10, range(100, 200, 5))) # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]
# 若干有序迭代器元素合并,合并结果为一个生成器,日志合并示例
log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]
for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):
print(line)
# 2023-10-23 10:12:11 [WARN] ...
# 2023-10-23 10:12:12 [INFO] ...
# 2023-10-23 10:12:13 [INFO] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:14 [DEBUG] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:15 [INFO] ...
# 2023-10-23 10:12:18 [INFO] ...
# 2023-10-23 10:12:20 [ERROR] ...
bisect
bisect为Python标准库,基于二分查找算法定位插入点,模块中函数基于插入点设计,并非使用__eq__查找特定值,而是使用__lt__查找插入点。
公共参数说明
a,列表x,列表元素lo,低位下标hi,高位下标key,比较键
函数说明
bisect.bisect_left(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片all(elem < x for elem in a[lo : ip])为真值而对于右侧切片all(elem >= x for elem in a[ip : hi])为真值bisect.bisect_right(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片all(elem <= x for elem in a[lo : ip])为真值而对于右则切片all(elem > x for elem in a[ip : hi])为真值bisect.insort_right(a, x, lo=0, hi=len(a), *, key=None),基于bisect_right函数定位插入点然后插入元素,插入算法时间复杂度为O(n)bisect.insort_left(a, x, lo=0, hi=len(a), *, key=None),基于bisect_left函数定位插入点然后插入元素,插入算法时间复杂度为O(n)bisect.bisect与bisect.bisect_right等价bisect.insort与bisect.insort_right等价
import bisect
import operator
from collections import namedtuple
l = [1, 3, 5, 7, 9]
print(bisect.bisect_right(l, 5)) # 3
print(bisect.bisect_left(l, 5)) # 2
print(bisect.bisect_right(l, 4)) # 2
print(bisect.bisect_left(l, 4)) # 2
print(bisect.bisect_right(l, 9)) # 5
print(bisect.bisect_left(l, 9)) # 4
print(bisect.bisect_right(l, 1)) # 1
print(bisect.bisect_left(l, 1)) # 0
print(bisect.bisect_right(l, 0)) # 0
print(bisect.bisect_left(l, 0)) # 0
print(bisect.bisect_right(l, 10)) # 5
print(bisect.bisect_left(l, 10)) # 5
print(bisect.bisect_left(l, 5)) # 2
bisect.insort_left(l, 5)
print(l) # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置
print(bisect.bisect_right(l, 5)) # 4
bisect.insort_right(l, 5)
print(l) # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置
# 若数组无序则不会正常工作
a = [1, 5, 3, 6]
print(bisect.bisect_left(a, 5)) # 3
bisect.bisect_left(a, 5) # [1, 5, 3, 6]
print(a)
基于定位插入点特性便于分段
import bisect
import operator
from collections import namedtuple
for score in [33, 99, 77, 70, 89, 90, 100]:
print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
# F A C C B A A
基于定位插入点特性不便于查找特定元素,通常需要封装一层函数
import bisect
import operator
from collections import namedtuple
Movie = namedtuple('Movie', ('id', 'name', 'released', 'director'))
movies = [
Movie(1, 'Jaws', 1975, 'Spielberg'),
Movie(3, 'Titanic', 1997, 'Cameron'),
Movie(5, 'The Birds', 1963, 'Hitchcock'),
Movie(7, 'Aliens', 1986, 'Cameron')
]
print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id'))) # 1
print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id'))) # 1
print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id'))) # 2
def index(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i != len(a) and (key(a[i]) if key else a[i]) == x:
return i
raise ValueError
def find_lt(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i:
return a[i - 1]
raise ValueError
def find_le(a, x, key=None):
i = bisect.bisect_right(a, x, key=key)
if i:
return a[i - 1]
raise ValueError
def find_gt(a, x, key=None):
i = bisect.bisect_right(a, x, key=key)
if i != len(a):
return a[i]
raise ValueError
def find_ge(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i != len(a):
return a[i]
raise ValueError
print(index(movies, 5, key=operator.attrgetter('id'))) # 2
print(index(movies, 2, key=operator.attrgetter('id'))) # ValueError
三方有序容器库,https://grantjenks.com/docs/sortedcollections/
浅拷贝与深拷贝,shallow copy and deep copy
import copy
l = [1, 2, [3]]
copy_copy = copy.copy(l)
copy_copy[0] = '1a'
copy_copy[2][0] = '3a'
print(l, copy_copy) # [1, 2, ['3a']] ['1a', 2, ['3a']]
copy_deepcopy = copy.deepcopy(copy_copy)
copy_deepcopy[0] = 1
copy_deepcopy[2][0] = 3
print(copy_copy, copy_deepcopy) # ['1a', 2, ['3a']] [1, 2, [3]]
bidirectional dict
# 双向字典
# https://bidict.readthedocs.io/
# pip install bidict
from bidict import frozenbidict
# 若无法形成双向字典会报错
investment_strategy = frozenbidict({
"1": "主观股票多头",
"2": "量化股票多头"
})
print(investment_strategy.inv)
print(investment_strategy.inverse)
print(investment_strategy["1"])
print(investment_strategy.inv["量化股票多头"])
多值字典
允许key重复
# pip install werkzeug
from werkzeug.datastructures import ImmutableMultiDict, MultiDict
# 多值字典
d = MultiDict([("A", 1), ("A", 2)])
print(d)
print(d.get("A"))
print(d.getlist("A"))
# 不可变多值字典
immutable_multi_dict = ImmutableMultiDict([('a', 1), ('a', 2)])
print(immutable_multi_dict)
嵌套字典合并
def dic_(dic1,dic2):
"""嵌套字典合并,参数1旧字典,参数2新字典,结果是将新字典合并到旧字典中"""
for i in dic2:
print(i)
if i in dic1:
if type(dic1[i]) is dict and type(dic2[i]) is dict:
dicMeg(dic1[i],dic2[i])
else:
dic1[i] = dic2[i]