PartsInquiry/backend/txm/app/ean13_decoder.py

from typing import List, Optional, Tuple

import numpy as np


# EAN-13 编码表（L/G/R 模式），每个数字对应 4 个模块（7 宽度）内的宽窄模式
# 采用 7 位宽度单元，1 表示黑，0 表示白。此处用字符串仅做查表，不做模拟。
L_CODES = {
    "0": "0001101",
    "1": "0011001",
    "2": "0010011",
    "3": "0111101",
    "4": "0100011",
    "5": "0110001",
    "6": "0101111",
    "7": "0111011",
    "8": "0110111",
    "9": "0001011",
}

G_CODES = {
    "0": "0100111",
    "1": "0110011",
    "2": "0011011",
    "3": "0100001",
    "4": "0011101",
    "5": "0111001",
    "6": "0000101",
    "7": "0010001",
    "8": "0001001",
    "9": "0010111",
}

R_CODES = {
    "0": "1110010",
    "1": "1100110",
    "2": "1101100",
    "3": "1000010",
    "4": "1011100",
    "5": "1001110",
    "6": "1010000",
    "7": "1000100",
    "8": "1001000",
    "9": "1110100",
}

# 左侧 6 位的奇偶模式用来编码首位数字
LEADING_PARITY_TO_FIRST = {
    "LLLLLL": "0",
    "LLGLGG": "1",
    "LLGGLG": "2",
    "LLGGGL": "3",
    "LGLLGG": "4",
    "LGGLLG": "5",
    "LGGGLL": "6",
    "LGLGLG": "7",
    "LGLGGL": "8",
    "LGGLGL": "9",
}


def _normalize_run_lengths(line: np.ndarray, total_modules: int) -> Tuple[np.ndarray, List[int]]:
    # 将行强度阈值化为黑白，再统计 run-length，然后按照总模块数归一化为 95 个模块
    # 使用中位数作为阈值以抵抗亮度变化
    threshold = np.median(line)
    binary = (line < threshold).astype(np.uint8)  # 黑为 1
    # run-length 编码
    values = binary.tolist()
    runs: List[int] = []
    last = values[0]
    length = 1
    for v in values[1:]:
        if v == last:
            length += 1
        else:
            runs.append(length)
            last = v
            length = 1
    runs.append(length)
    # 放缩为 total_modules 模块
    total_pixels = float(sum(runs))
    if total_pixels <= 0:
        return binary, runs
    scale = total_modules / total_pixels
    scaled = [max(1, int(round(r * scale))) for r in runs]
    # 对齐长度
    diff = total_modules - sum(scaled)
    if diff != 0:
        # 简单补偿到首个 run
        scaled[0] = max(1, scaled[0] + diff)
    # 展开为模块级二进制
    expanded = []
    color = binary[0]  # 起始颜色
    for r in scaled:
        expanded.extend([color] * r)
        color = 1 - color
    return np.array(expanded[:total_modules], dtype=np.uint8), scaled


def _find_guards(bits: np.ndarray, tol: float) -> Optional[Tuple[int, int, int, int]]:
    # 守卫位模式：左 101，中 01010，右 101
    # 以模块位寻找
    s = ''.join('1' if b else '0' for b in bits.tolist())
    # 直接匹配应对理想情况，否则滑窗匹配
    # 找左 101
    left_pos = s.find('101')
    if left_pos == -1:
        return None
    # 找中间 01010（需位于左与右之间）
    mid_pos = s.find('01010', left_pos + 3)
    if mid_pos == -1:
        return None
    # 找右 101
    right_pos = s.find('101', mid_pos + 5)
    if right_pos == -1:
        return None
    return left_pos, mid_pos, right_pos, right_pos + 3


def _bits_to_digit(bits: np.ndarray, tables: List[Tuple[str, dict]]) -> Optional[Tuple[str, str]]:
    pattern = ''.join('1' if b else '0' for b in bits.tolist())
    for parity, table in tables:
        for d, code in table.items():
            if pattern == code:
                return d, parity
    return None


def decode_ean13_from_row(bits_row: np.ndarray) -> Optional[str]:
    # 输入为 0/1 模块位数组，长度应为 95
    if bits_row.size != 95:
        return None
    guards = _find_guards(bits_row, tol=0.35)
    if not guards:
        return None
    left_start, mid_start, right_start, right_end = guards
    # 划分区域
    left_data = bits_row[left_start + 3 : mid_start]
    right_data = bits_row[mid_start + 5 : right_start]
    # 左右各 6 个数字，每个 7 位
    if left_data.size != 6 * 7 or right_data.size != 6 * 7:
        return None
    digits_left: List[str] = []
    parity_seq: List[str] = []
    for i in range(6):
        seg = left_data[i * 7 : (i + 1) * 7]
        ret = _bits_to_digit(seg, [("L", L_CODES), ("G", G_CODES)])
        if not ret:
            return None
        d, parity = ret
        digits_left.append(d)
        parity_seq.append(parity)
    parity_str = ''.join(parity_seq)
    first_digit = LEADING_PARITY_TO_FIRST.get(parity_str)
    if first_digit is None:
        return None
    digits_right: List[str] = []
    for i in range(6):
        seg = right_data[i * 7 : (i + 1) * 7]
        ret = _bits_to_digit(seg, [("R", R_CODES)])
        if not ret:
            return None
        d, _ = ret
        digits_right.append(d)
    code_12 = first_digit + ''.join(digits_left) + ''.join(digits_right[:-1])
    check_digit = int(digits_right[-1])
    # 校验位计算
    s = 0
    for idx, ch in enumerate(code_12):
        v = int(ch)
        if (idx + 1) % 2 == 0:
            s += v * 3
        else:
            s += v
    calc = (10 - (s % 10)) % 10
    if calc != check_digit:
        return None
    return code_12 + str(check_digit)


def sample_and_decode(warped_gray: np.ndarray, sample_rows: List[float], total_modules: int) -> Optional[str]:
    h, w = warped_gray.shape[:2]
    results: List[str] = []
    for r in sample_rows:
        row_y = min(h - 1, max(0, int(round(h * r))))
        line = warped_gray[row_y, :].astype(np.float32)
        # 直方图均衡增强对比
        line_eq = line
        # 归一化为 0..255
        if line_eq.max() > line_eq.min():
            line_eq = (line_eq - line_eq.min()) / (line_eq.max() - line_eq.min()) * 255.0
        bits, _ = _normalize_run_lengths(line_eq, total_modules)
        if bits.size != total_modules:
            continue
        code = decode_ean13_from_row(bits)
        if code:
            results.append(code)
    if not results:
        return None
    # 取众数
    vals, counts = np.unique(np.array(results), return_counts=True)
    return vals[int(np.argmax(counts))]