7.基础数据结构与算法

予早 2024-10-05 11:29:46
Categories: Tags:

基础数据结构与算法

collections.abc

容器数据类型抽象基类,collections.abc为Python标准库

collections

容器数据类型,collections为Python标准库

命名元组

from collections import namedtuple

Card = namedtuple('card', ['suits', 'number'])
c1 = Card('红桃', 2)
print(c1, c1.number, c1.suits)
from random import choice, shuffle
from collections import namedtuple

_Card = namedtuple('Card', ['rank', 'suit'])


class Poker:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = ['红心', '方片', '梅花', '黑桃']

    def __init__(self):
        self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]

    def __len__(self):
        return self._cards.__len__()

    def __getitem__(self, item):
        return self._cards[item]

    def __setitem__(self, key, value):
        self._cards[key] = value


cards = Poker()
print(cards[3])
print(len(cards))
shuffle(cards)  # 会改变每个索引对应的值,需要__setitem__
print(cards[3])
print(cards[:3])
print(choice(cards))
print(choice(cards))

双端队列

from collections import deque

dq = deque()
dq.append(1)
dq.append(2)
print(dq.popleft())  # 1
print(dq.pop())  # 2

计数器

可哈希

from collections import Counter

items = ['red', 'blue', 'red', 'green', 'blue', 'blue']
print(Counter(items))  # Counter({'blue': 3, 'red': 2, 'green': 1})

带默认工厂字典

from collections import defaultdict

s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = defaultdict(list)
for k, v in s:
    d[k].append(v)  # 会有一个None到[]的自动变换,否则会报错,处理空值
# default_factory为可调用对象,用于产生一个默认值
print(sorted(d.items()))  # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]

dd = defaultdict(lambda: 0)
print(dd['key'])  # 0

构建树结构

from collections import defaultdict

Tree = lambda: defaultdict(Tree)

有序字典

有序字典中元素排列顺序与插入顺序保持一致,Python3.6及以前dict无序,Python3.7及以后有序

from collections import OrderedDict

d = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)

od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)

ChainMap

倒序迭代

from collections import ChainMap

baseline = {'music': 'bach', 'art': 'rembrandt'}
adjustments = {'art': 'van gogh', 'opera': 'carmen'}
cm = ChainMap(adjustments, baseline)
for i in cm:
    print(i, end=" ")  # music art opera 
print(list(cm))  # ['music', 'art', 'opera']
print(cm.get('art'))  # van gogh

UserString、UserList、UserDict

当需要对内置数据类型str、list、dict进行拓展,并且想要重写其某些标准功能时,应该使用UserDict, UserList, 和UserString,而不是直接继承str、list、dict

from collections import UserString, UserList, UserDict

us = UserString("abc")
print(us)
print(us.data)

ul = UserList([1, 2, 3])
print(ul)
print(ul.data)

ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
print(ud)
print(ud.data)

实现一个自动转大写key的字段,初始化、[]设置属性__setitem__、update分别需要重写,若使用UserDict只需重写__setitem__

class UpperDict(dict):

    def __init__(self, data):
        data = {k.upper(): v for k, v in data.items()}
        super().__init__(data)

    def __setitem__(self, key, value):
        key = key.upper()
        super().__setitem__(key, value)

    def update(self, data):  # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配
        data = {k.upper(): v for k, v in data.items()}
        super().update(data)


d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
class UpperDict(UserDict):
    def __setitem__(self, key, value):
        key = key.upper()
        super().__setitem__(key, value)


d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)

queue

queue为Python标准库,同步队列,线程安全

import queue

q = queue.Queue()
print(q.put("1"))  # None
print(q.qsize())  # 1
print(q.get())  # 1
print(q.put_nowait("2"))  # None
print(q.get_nowait())  # 2

# 后进先出队列
lifo_queue = queue.LifoQueue()
print(lifo_queue.put(1))  # None
print(lifo_queue.put(2))  # None
print(lifo_queue.get())  # 2

# 优先队列,最小的先出
pq = queue.PriorityQueue()
pq.put(3)
pq.put(1)
print(pq.get())  # 1
pq.put(4)
pq.put(0)
print(pq.get())  # 0

enum

enum为Python标准库,枚举

from enum import Enum


class Color(Enum):
    RED = 1
    GREEN = 2
    BLUE = 3


print(Color.RED.name, Color.RED.value)  # RED 1

枚举的另一种写法,节选自 Python 标准库 http

from enum import IntEnum


class HTTPStatus(IntEnum):
    """
    HTTP status codes and reason phrases
    """

    def __new__(cls, value, phrase, description=''):
        obj = int.__new__(cls, value)
        obj._value_ = value
        obj.phrase = phrase
        obj.description = description
        return obj

    # informational
    CONTINUE = 100, 'Continue', 'Request received, please continue'
    SWITCHING_PROTOCOLS = (101, 'Switching Protocols', 'Switching to new protocol; obey Upgrade header')
    PROCESSING = 102, 'Processing'
    EARLY_HINTS = 103, 'Early Hints'


print(HTTPStatus.CONTINUE.value)

array

array为Python标准库,数字数组,纯数字操作比list效率高

from array import array

a = array('l', [1, 2, 3, 4, 5])
print(a, type(a))

heapq

heapq为Python标准库,堆队列算法(优先队列算法),堆使用小根堆,基于数组实现

import heapq
import operator

l = [5, 3, 4, 2, 1]

# 堆化
heapq.heapify(l)
print(l)  # [1, 2, 4, 5, 3]
print(l[0])  # 1 第一个元素为小根堆最小值

# 入堆
heapq.heappush(l, 6)
print(l)  # [1, 2, 4, 5, 3, 6]

# 出堆
print(heapq.heappop(l))  # 1

# 先入后出,比heappush+heappop高效
print(heapq.heappushpop(l, 0))  # 0
print(l)  # [2, 3, 4, 5, 6]

# 先出后入,比heappop+heappush高效
print(heapq.heapreplace(l, 0))  # 2
print(l)  # [0, 3, 4, 5, 6]

# 获取迭代器最大的n个元素
print(heapq.nlargest(10, range(100, 200, 5)))  # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]

# 获取迭代器最小的n个元素
print(heapq.nsmallest(10, range(100, 200, 5)))  # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]


# 若干有序迭代器元素合并,合并结果为一个生成器,日志合并示例
log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]

for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):
    print(line)
# 2023-10-23 10:12:11 [WARN] ...
# 2023-10-23 10:12:12 [INFO] ...
# 2023-10-23 10:12:13 [INFO] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:14 [DEBUG] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:15 [INFO] ...
# 2023-10-23 10:12:18 [INFO] ...
# 2023-10-23 10:12:20 [ERROR] ...

bisect

bisect为Python标准库,基于二分查找算法定位插入点,模块中函数基于插入点设计,并非使用__eq__查找特定值,而是使用__lt__查找插入点。

公共参数说明

函数说明

import bisect
import operator
from collections import namedtuple

l = [1, 3, 5, 7, 9]

print(bisect.bisect_right(l, 5))  # 3
print(bisect.bisect_left(l, 5))  # 2
print(bisect.bisect_right(l, 4))  # 2
print(bisect.bisect_left(l, 4))  # 2
print(bisect.bisect_right(l, 9))  # 5
print(bisect.bisect_left(l, 9))  # 4
print(bisect.bisect_right(l, 1))  # 1
print(bisect.bisect_left(l, 1))  # 0
print(bisect.bisect_right(l, 0))  # 0
print(bisect.bisect_left(l, 0))  # 0
print(bisect.bisect_right(l, 10))  # 5
print(bisect.bisect_left(l, 10))  # 5

print(bisect.bisect_left(l, 5))  # 2
bisect.insort_left(l, 5)
print(l)  # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置  

print(bisect.bisect_right(l, 5))  # 4
bisect.insort_right(l, 5)
print(l)  # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置

# 若数组无序则不会正常工作
a = [1, 5, 3, 6]
print(bisect.bisect_left(a, 5))  # 3
bisect.bisect_left(a, 5)  # [1, 5, 3, 6]
print(a)

基于定位插入点特性便于分段

import bisect
import operator
from collections import namedtuple

for score in [33, 99, 77, 70, 89, 90, 100]:
    print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
# F A C C B A A

基于定位插入点特性不便于查找特定元素,通常需要封装一层函数

import bisect
import operator
from collections import namedtuple

Movie = namedtuple('Movie', ('id', 'name', 'released', 'director'))

movies = [
    Movie(1, 'Jaws', 1975, 'Spielberg'),
    Movie(3, 'Titanic', 1997, 'Cameron'),
    Movie(5, 'The Birds', 1963, 'Hitchcock'),
    Movie(7, 'Aliens', 1986, 'Cameron')
]

print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id')))  # 2


def index(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i != len(a) and (key(a[i]) if key else a[i]) == x:
        return i
    raise ValueError


def find_lt(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i:
        return a[i - 1]
    raise ValueError


def find_le(a, x, key=None):
    i = bisect.bisect_right(a, x, key=key)
    if i:
        return a[i - 1]
    raise ValueError


def find_gt(a, x, key=None):
    i = bisect.bisect_right(a, x, key=key)
    if i != len(a):
        return a[i]
    raise ValueError


def find_ge(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i != len(a):
        return a[i]
    raise ValueError


print(index(movies, 5, key=operator.attrgetter('id')))  # 2
print(index(movies, 2, key=operator.attrgetter('id')))  # ValueError

三方有序容器库,https://grantjenks.com/docs/sortedcollections/

浅拷贝与深拷贝,shallow copy and deep copy

import copy

l = [1, 2, [3]]
copy_copy = copy.copy(l)
copy_copy[0] = '1a'
copy_copy[2][0] = '3a'
print(l, copy_copy)  # [1, 2, ['3a']] ['1a', 2, ['3a']]

copy_deepcopy = copy.deepcopy(copy_copy)
copy_deepcopy[0] = 1
copy_deepcopy[2][0] = 3
print(copy_copy, copy_deepcopy)  # ['1a', 2, ['3a']] [1, 2, [3]]

bidirectional dict

# 双向字典
# https://bidict.readthedocs.io/
# pip install bidict


from bidict import frozenbidict

# 若无法形成双向字典会报错
investment_strategy = frozenbidict({
    "1": "主观股票多头",
    "2": "量化股票多头"
})

print(investment_strategy.inv)
print(investment_strategy.inverse)

print(investment_strategy["1"])
print(investment_strategy.inv["量化股票多头"])

多值字典

允许key重复

# pip install werkzeug
from werkzeug.datastructures import ImmutableMultiDict, MultiDict

# 多值字典
d = MultiDict([("A", 1), ("A", 2)])
print(d)
print(d.get("A"))
print(d.getlist("A"))

# 不可变多值字典
immutable_multi_dict = ImmutableMultiDict([('a', 1), ('a', 2)])
print(immutable_multi_dict)

嵌套字典合并

def dic_(dic1,dic2):
    """嵌套字典合并,参数1旧字典,参数2新字典,结果是将新字典合并到旧字典中"""
    for i in dic2:
        print(i)
        if i in dic1:
            if type(dic1[i]) is dict and type(dic2[i]) is dict:

                dicMeg(dic1[i],dic2[i])
        else:
            dic1[i] = dic2[i]