2016-08-25

音视频处理算法与优化技术深度解析

音视频处理是现代多媒体技术的核心，涉及复杂的数学算法和计算机科学原理。本文将深入探讨音视频处理中的关键算法，从基础的信号处理到高级的优化技术，为开发者提供全面的技术指南。

1. 图像处理算法基础

1.1 像素级操作与滤波

图像处理的基础是像素级操作，包括点运算、邻域运算和几何变换：

# 音视频处理算法核心依赖库导入
# 这些库构成了现代图像和音频处理的基础工具链

import numpy as np  # 数值计算核心库，提供高效的多维数组操作和数学函数
from typing import Tuple, List, Optional, Union  # 类型注解，提高代码可读性和IDE支持
import cv2  # OpenCV计算机视觉库，提供丰富的图像处理算法
from scipy import ndimage  # SciPy图像处理模块，提供高级滤波和形态学操作
from scipy.signal import convolve2d  # 二维卷积操作，用于实现各种滤波器
import time  # 时间测量，用于性能分析和算法优化
from dataclasses import dataclass  # 数据类装饰器，简化数据结构定义
from enum import Enum  # 枚举类型，提供类型安全的常量定义

class FilterType(Enum):
    """滤波器类型"""
    GAUSSIAN = "gaussian"
    MEDIAN = "median"
    BILATERAL = "bilateral"
    SOBEL = "sobel"
    LAPLACIAN = "laplacian"
    UNSHARP_MASK = "unsharp_mask"

@dataclass
class ImageMetrics:
    """图像质量指标"""
    psnr: float
    ssim: float
    mse: float
    processing_time: float

class ImageProcessor:
    """图像处理器"""
    
    def __init__(self):
        self.processing_history = []
        self.performance_metrics = {}
    
    def apply_gaussian_filter(self, image: np.ndarray, sigma: float = 1.0, 
                             kernel_size: Optional[int] = None) -> np.ndarray:
        """应用高斯滤波"""
        start_time = time.time()
        
        if kernel_size is None:
            kernel_size = int(6 * sigma + 1)
            if kernel_size % 2 == 0:
                kernel_size += 1
        
        # 创建高斯核
        kernel = self._create_gaussian_kernel(kernel_size, sigma)
        
        # 应用卷积
        if len(image.shape) == 3:
            # 彩色图像，分别处理每个通道
            filtered_image = np.zeros_like(image)
            for channel in range(image.shape[2]):
                filtered_image[:, :, channel] = convolve2d(
                    image[:, :, channel], kernel, mode='same', boundary='symm'
                )
        else:
            # 灰度图像
            filtered_image = convolve2d(image, kernel, mode='same', boundary='symm')
        
        processing_time = time.time() - start_time
        self._record_operation('gaussian_filter', processing_time, 
                              {'sigma': sigma, 'kernel_size': kernel_size})
        
        return filtered_image.astype(image.dtype)
    
    def apply_median_filter(self, image: np.ndarray, kernel_size: int = 5) -> np.ndarray:
        """应用中值滤波"""
        start_time = time.time()
        
        if len(image.shape) == 3:
            # 彩色图像
            filtered_image = np.zeros_like(image)
            for channel in range(image.shape[2]):
                filtered_image[:, :, channel] = ndimage.median_filter(
                    image[:, :, channel], size=kernel_size
                )
        else:
            # 灰度图像
            filtered_image = ndimage.median_filter(image, size=kernel_size)
        
        processing_time = time.time() - start_time
        self._record_operation('median_filter', processing_time, 
                              {'kernel_size': kernel_size})
        
        return filtered_image
    
    def apply_bilateral_filter(self, image: np.ndarray, d: int = 9, 
                              sigma_color: float = 75, sigma_space: float = 75) -> np.ndarray:
        """应用双边滤波"""
        start_time = time.time()
        
        # 使用OpenCV的双边滤波实现
        if len(image.shape) == 3:
            filtered_image = cv2.bilateralFilter(image, d, sigma_color, sigma_space)
        else:
            filtered_image = cv2.bilateralFilter(image, d, sigma_color, sigma_space)
        
        processing_time = time.time() - start_time
        self._record_operation('bilateral_filter', processing_time, 
                              {'d': d, 'sigma_color': sigma_color, 'sigma_space': sigma_space})
        
        return filtered_image
    
    def apply_edge_detection(self, image: np.ndarray, method: str = 'sobel', 
                           threshold: Optional[Tuple[float, float]] = None) -> np.ndarray:
        """边缘检测"""
        start_time = time.time()
        
        # 转换为灰度图像
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image.copy()
        
        if method == 'sobel':
            # Sobel边缘检测
            grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
            grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
            edges = np.sqrt(grad_x**2 + grad_y**2)
            
        elif method == 'canny':
            # Canny边缘检测
            if threshold is None:
                threshold = (50, 150)
            edges = cv2.Canny(gray, threshold[0], threshold[1])
            
        elif method == 'laplacian':
            # Laplacian边缘检测
            edges = cv2.Laplacian(gray, cv2.CV_64F)
            edges = np.abs(edges)
            
        else:
            raise ValueError(f"Unknown edge detection method: {method}")
        
        # 归一化到0-255范围
        edges = ((edges - edges.min()) / (edges.max() - edges.min()) * 255).astype(np.uint8)
        
        processing_time = time.time() - start_time
        self._record_operation('edge_detection', processing_time, 
                              {'method': method, 'threshold': threshold})
        
        return edges
    
    def apply_unsharp_mask(self, image: np.ndarray, sigma: float = 1.0, 
                          strength: float = 1.5, threshold: float = 0) -> np.ndarray:
        """应用反锐化掩模"""
        start_time = time.time()
        
        # 创建模糊版本
        blurred = self.apply_gaussian_filter(image, sigma)
        
        # 计算高频成分
        high_freq = image.astype(np.float32) - blurred.astype(np.float32)
        
        # 应用阈值
        if threshold > 0:
            high_freq = np.where(np.abs(high_freq) < threshold, 0, high_freq)
        
        # 增强图像
        sharpened = image.astype(np.float32) + strength * high_freq
        
        # 限制到有效范围
        sharpened = np.clip(sharpened, 0, 255)
        
        processing_time = time.time() - start_time
        self._record_operation('unsharp_mask', processing_time, 
                              {'sigma': sigma, 'strength': strength, 'threshold': threshold})
        
        return sharpened.astype(image.dtype)
    
    def _create_gaussian_kernel(self, size: int, sigma: float) -> np.ndarray:
        """创建高斯核"""
        kernel = np.zeros((size, size))
        center = size // 2
        
        for i in range(size):
            for j in range(size):
                x, y = i - center, j - center
                kernel[i, j] = np.exp(-(x**2 + y**2) / (2 * sigma**2))
        
        return kernel / kernel.sum()
    
    def _record_operation(self, operation: str, processing_time: float, params: dict):
        """记录操作历史"""
        self.processing_history.append({
            'operation': operation,
            'time': processing_time,
            'params': params,
            'timestamp': time.time()
        })
    
    def calculate_image_quality_metrics(self, original: np.ndarray, 
                                       processed: np.ndarray) -> ImageMetrics:
        """计算图像质量指标"""
        start_time = time.time()
        
        # 确保图像为浮点类型
        orig_float = original.astype(np.float64)
        proc_float = processed.astype(np.float64)
        
        # 计算MSE
        mse = np.mean((orig_float - proc_float) ** 2)
        
        # 计算PSNR
        if mse == 0:
            psnr = float('inf')
        else:
            max_pixel = 255.0
            psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
        
        # 计算SSIM（简化版本）
        ssim = self._calculate_ssim(orig_float, proc_float)
        
        processing_time = time.time() - start_time
        
        return ImageMetrics(psnr=psnr, ssim=ssim, mse=mse, processing_time=processing_time)
    
    def _calculate_ssim(self, img1: np.ndarray, img2: np.ndarray, 
                       window_size: int = 11, k1: float = 0.01, k2: float = 0.03) -> float:
        """计算结构相似性指数（SSIM）"""
        # 转换为灰度图像
        if len(img1.shape) == 3:
            img1 = np.mean(img1, axis=2)
            img2 = np.mean(img2, axis=2)
        
        # 常数
        c1 = (k1 * 255) ** 2
        c2 = (k2 * 255) ** 2
        
        # 计算均值
        mu1 = ndimage.uniform_filter(img1, size=window_size)
        mu2 = ndimage.uniform_filter(img2, size=window_size)
        
        # 计算方差和协方差
        mu1_sq = mu1 ** 2
        mu2_sq = mu2 ** 2
        mu1_mu2 = mu1 * mu2
        
        sigma1_sq = ndimage.uniform_filter(img1 ** 2, size=window_size) - mu1_sq
        sigma2_sq = ndimage.uniform_filter(img2 ** 2, size=window_size) - mu2_sq
        sigma12 = ndimage.uniform_filter(img1 * img2, size=window_size) - mu1_mu2
        
        # 计算SSIM
        numerator = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2)
        denominator = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)
        
        ssim_map = numerator / denominator
        return np.mean(ssim_map)
    
    def get_performance_summary(self) -> dict:
        """获取性能摘要"""
        if not self.processing_history:
            return {}
        
        summary = {}
        for record in self.processing_history:
            op = record['operation']
            if op not in summary:
                summary[op] = {
                    'count': 0,
                    'total_time': 0,
                    'avg_time': 0,
                    'min_time': float('inf'),
                    'max_time': 0
                }
            
            summary[op]['count'] += 1
            summary[op]['total_time'] += record['time']
            summary[op]['min_time'] = min(summary[op]['min_time'], record['time'])
            summary[op]['max_time'] = max(summary[op]['max_time'], record['time'])
            summary[op]['avg_time'] = summary[op]['total_time'] / summary[op]['count']
        
        return summary

# 图像处理演示
def demo_image_processing():
    print("Image Processing Algorithms Demo")
    print("================================")
    
    # 创建测试图像
    test_image = np.random.randint(0, 256, (256, 256, 3), dtype=np.uint8)
    
    # 添加噪声
    noise = np.random.normal(0, 25, test_image.shape)
    noisy_image = np.clip(test_image.astype(np.float32) + noise, 0, 255).astype(np.uint8)
    
    processor = ImageProcessor()
    
    print("\n1. Gaussian Filter")
    gaussian_filtered = processor.apply_gaussian_filter(noisy_image, sigma=1.5)
    gaussian_metrics = processor.calculate_image_quality_metrics(test_image, gaussian_filtered)
    print(f"PSNR: {gaussian_metrics.psnr:.2f} dB")
    print(f"SSIM: {gaussian_metrics.ssim:.4f}")
    print(f"Processing time: {gaussian_metrics.processing_time:.4f} seconds")
    
    print("\n2. Median Filter")
    median_filtered = processor.apply_median_filter(noisy_image, kernel_size=5)
    median_metrics = processor.calculate_image_quality_metrics(test_image, median_filtered)
    print(f"PSNR: {median_metrics.psnr:.2f} dB")
    print(f"SSIM: {median_metrics.ssim:.4f}")
    
    print("\n3. Bilateral Filter")
    bilateral_filtered = processor.apply_bilateral_filter(noisy_image, d=9, sigma_color=75, sigma_space=75)
    bilateral_metrics = processor.calculate_image_quality_metrics(test_image, bilateral_filtered)
    print(f"PSNR: {bilateral_metrics.psnr:.2f} dB")
    print(f"SSIM: {bilateral_metrics.ssim:.4f}")
    
    print("\n4. Edge Detection")
    edges_sobel = processor.apply_edge_detection(test_image, method='sobel')
    edges_canny = processor.apply_edge_detection(test_image, method='canny', threshold=(50, 150))
    print("Edge detection completed (Sobel and Canny methods)")
    
    print("\n5. Unsharp Mask")
    sharpened = processor.apply_unsharp_mask(test_image, sigma=1.0, strength=1.5)
    sharpened_metrics = processor.calculate_image_quality_metrics(test_image, sharpened)
    print(f"PSNR: {sharpened_metrics.psnr:.2f} dB")
    print(f"SSIM: {sharpened_metrics.ssim:.4f}")
    
    # 显示性能摘要
    print("\n6. Performance Summary")
    performance = processor.get_performance_summary()
    for operation, stats in performance.items():
        print(f"  {operation}:")
        print(f"    Count: {stats['count']}")
        print(f"    Average time: {stats['avg_time']:.4f}s")
        print(f"    Min time: {stats['min_time']:.4f}s")
        print(f"    Max time: {stats['max_time']:.4f}s")
    
    print("\nImage processing demo completed")

# 运行图像处理演示
if __name__ == "__main__":
    demo_image_processing()

1.2 频域处理与变换

频域处理是图像处理的重要技术，包括傅里叶变换、小波变换等：

import numpy as np
from scipy.fft import fft2, ifft2, fftshift, ifftshift
from scipy import ndimage
import pywt
from typing import Tuple, List, Optional

class FrequencyDomainProcessor:
    """频域处理器"""
    
    def __init__(self):
        self.supported_wavelets = ['db1', 'db4', 'db8', 'haar', 'bior2.2', 'coif2']
    
    def apply_fft_filter(self, image: np.ndarray, filter_type: str = 'lowpass', 
                        cutoff_freq: float = 0.1) -> np.ndarray:
        """应用FFT滤波"""
        # 转换为灰度图像
        if len(image.shape) == 3:
            gray = np.mean(image, axis=2)
        else:
            gray = image.copy()
        
        # 执行FFT
        f_transform = fft2(gray)
        f_shift = fftshift(f_transform)
        
        # 创建滤波器
        rows, cols = gray.shape
        crow, ccol = rows // 2, cols // 2
        
        # 创建频率掩模
        mask = self._create_frequency_mask(rows, cols, filter_type, cutoff_freq)
        
        # 应用滤波器
        f_shift_filtered = f_shift * mask
        
        # 逆变换
        f_ishift = ifftshift(f_shift_filtered)
        img_back = ifft2(f_ishift)
        img_back = np.abs(img_back)
        
        return img_back.astype(np.uint8)
    
    def _create_frequency_mask(self, rows: int, cols: int, filter_type: str, 
                              cutoff_freq: float) -> np.ndarray:
        """创建频率掩模"""
        crow, ccol = rows // 2, cols // 2
        
        # 创建距离矩阵
        y, x = np.ogrid[:rows, :cols]
        distance = np.sqrt((x - ccol)**2 + (y - crow)**2)
        
        # 归一化距离
        max_distance = np.sqrt(crow**2 + ccol**2)
        normalized_distance = distance / max_distance
        
        if filter_type == 'lowpass':
            # 低通滤波器
            mask = (normalized_distance <= cutoff_freq).astype(float)
        elif filter_type == 'highpass':
            # 高通滤波器
            mask = (normalized_distance > cutoff_freq).astype(float)
        elif filter_type == 'bandpass':
            # 带通滤波器（需要两个截止频率）
            low_cutoff = cutoff_freq * 0.5
            high_cutoff = cutoff_freq * 1.5
            mask = ((normalized_distance >= low_cutoff) & 
                   (normalized_distance <= high_cutoff)).astype(float)
        else:
            raise ValueError(f"Unknown filter type: {filter_type}")
        
        return mask
    
    def apply_wavelet_denoising(self, image: np.ndarray, wavelet: str = 'db4', 
                               sigma: Optional[float] = None, mode: str = 'soft') -> np.ndarray:
        """应用小波去噪"""
        # 转换为灰度图像
        if len(image.shape) == 3:
            gray = np.mean(image, axis=2)
        else:
            gray = image.copy()
        
        # 小波变换
        coeffs = pywt.wavedec2(gray, wavelet, mode='symmetric')
        
        # 估计噪声标准差
        if sigma is None:
            sigma = self._estimate_noise_sigma(coeffs[-1])
        
        # 阈值处理
        threshold = sigma * np.sqrt(2 * np.log(gray.size))
        
        # 对细节系数进行阈值处理
        coeffs_thresh = list(coeffs)
        coeffs_thresh[0] = coeffs[0]  # 保持近似系数不变
        
        for i in range(1, len(coeffs)):
            coeffs_thresh[i] = tuple([
                pywt.threshold(detail, threshold, mode=mode) 
                for detail in coeffs[i]
            ])
        
        # 逆小波变换
        denoised = pywt.waverec2(coeffs_thresh, wavelet, mode='symmetric')
        
        return np.clip(denoised, 0, 255).astype(np.uint8)
    
    def _estimate_noise_sigma(self, detail_coeffs: np.ndarray) -> float:
        """估计噪声标准差"""
        # 使用最高频细节系数估计噪声
        return np.median(np.abs(detail_coeffs)) / 0.6745
    
    def apply_wavelet_compression(self, image: np.ndarray, wavelet: str = 'db4', 
                                 compression_ratio: float = 0.1) -> Tuple[np.ndarray, float]:
        """应用小波压缩"""
        # 转换为灰度图像
        if len(image.shape) == 3:
            gray = np.mean(image, axis=2)
        else:
            gray = image.copy()
        
        # 小波变换
        coeffs = pywt.wavedec2(gray, wavelet, mode='symmetric')
        
        # 计算所有系数
        all_coeffs = []
        all_coeffs.extend(coeffs[0].flatten())
        for detail_level in coeffs[1:]:
            for detail in detail_level:
                all_coeffs.extend(detail.flatten())
        
        all_coeffs = np.array(all_coeffs)
        
        # 确定阈值（保留最大的compression_ratio比例的系数）
        num_keep = int(len(all_coeffs) * compression_ratio)
        threshold = np.sort(np.abs(all_coeffs))[-num_keep]
        
        # 应用阈值
        coeffs_compressed = list(coeffs)
        coeffs_compressed[0] = np.where(np.abs(coeffs[0]) >= threshold, coeffs[0], 0)
        
        for i in range(1, len(coeffs)):
            coeffs_compressed[i] = tuple([
                np.where(np.abs(detail) >= threshold, detail, 0)
                for detail in coeffs[i]
            ])
        
        # 逆小波变换
        compressed = pywt.waverec2(coeffs_compressed, wavelet, mode='symmetric')
        
        # 计算实际压缩比
        original_nonzero = np.count_nonzero(all_coeffs)
        compressed_nonzero = sum([
            np.count_nonzero(coeffs_compressed[0])
        ] + [
            sum(np.count_nonzero(detail) for detail in level)
            for level in coeffs_compressed[1:]
        ])
        
        actual_ratio = compressed_nonzero / original_nonzero if original_nonzero > 0 else 0
        
        return np.clip(compressed, 0, 255).astype(np.uint8), actual_ratio
    
    def analyze_frequency_spectrum(self, image: np.ndarray) -> dict:
        """分析频谱"""
        # 转换为灰度图像
        if len(image.shape) == 3:
            gray = np.mean(image, axis=2)
        else:
            gray = image.copy()
        
        # FFT变换
        f_transform = fft2(gray)
        f_shift = fftshift(f_transform)
        magnitude_spectrum = np.log(np.abs(f_shift) + 1)
        
        # 计算频谱统计信息
        spectrum_stats = {
            'mean_magnitude': np.mean(magnitude_spectrum),
            'std_magnitude': np.std(magnitude_spectrum),
            'max_magnitude': np.max(magnitude_spectrum),
            'min_magnitude': np.min(magnitude_spectrum),
            'energy': np.sum(np.abs(f_transform)**2),
            'dc_component': np.abs(f_transform[0, 0])
        }
        
        return spectrum_stats

# 频域处理演示
def demo_frequency_domain_processing():
    print("Frequency Domain Processing Demo")
    print("================================")
    
    # 创建测试图像
    test_image = np.random.randint(0, 256, (128, 128), dtype=np.uint8)
    
    # 添加噪声
    noise = np.random.normal(0, 20, test_image.shape)
    noisy_image = np.clip(test_image.astype(np.float32) + noise, 0, 255).astype(np.uint8)
    
    processor = FrequencyDomainProcessor()
    
    print("\n1. FFT Filtering")
    
    # 低通滤波
    lowpass_filtered = processor.apply_fft_filter(noisy_image, 'lowpass', 0.3)
    print("Low-pass filtering completed")
    
    # 高通滤波
    highpass_filtered = processor.apply_fft_filter(test_image, 'highpass', 0.1)
    print("High-pass filtering completed")
    
    print("\n2. Wavelet Denoising")
    
    # 测试不同小波
    wavelets = ['db4', 'haar', 'bior2.2']
    for wavelet in wavelets:
        denoised = processor.apply_wavelet_denoising(noisy_image, wavelet=wavelet)
        print(f"Wavelet denoising with {wavelet} completed")
    
    print("\n3. Wavelet Compression")
    
    compression_ratios = [0.1, 0.05, 0.01]
    for ratio in compression_ratios:
        compressed, actual_ratio = processor.apply_wavelet_compression(test_image, compression_ratio=ratio)
        print(f"Compression ratio {ratio:.2f} -> actual ratio {actual_ratio:.4f}")
    
    print("\n4. Frequency Spectrum Analysis")
    
    # 分析原始图像和噪声图像的频谱
    original_spectrum = processor.analyze_frequency_spectrum(test_image)
    noisy_spectrum = processor.analyze_frequency_spectrum(noisy_image)
    
    print("Original image spectrum:")
    for key, value in original_spectrum.items():
        print(f"  {key}: {value:.4f}")
    
    print("Noisy image spectrum:")
    for key, value in noisy_spectrum.items():
        print(f"  {key}: {value:.4f}")
    
    print("\nFrequency domain processing demo completed")

# 运行频域处理演示
if __name__ == "__main__":
    demo_frequency_domain_processing()

2. 音频处理算法

2.1 数字音频滤波与增强

音频处理涉及时域和频域的信号处理技术：

import numpy as np
from scipy import signal
from scipy.fft import fft, ifft, fftfreq
from typing import Tuple, List, Optional, Union
import librosa
from dataclasses import dataclass

@dataclass
class AudioMetrics:
    """音频质量指标"""
    snr: float  # 信噪比
    thd: float  # 总谐波失真
    dynamic_range: float  # 动态范围
    spectral_centroid: float  # 频谱重心

class AudioProcessor:
    """音频处理器"""
    
    def __init__(self, sample_rate: int = 44100):
        self.sample_rate = sample_rate
        self.processing_history = []
    
    def apply_lowpass_filter(self, audio: np.ndarray, cutoff_freq: float, 
                           order: int = 5) -> np.ndarray:
        """应用低通滤波器"""
        nyquist = self.sample_rate / 2
        normalized_cutoff = cutoff_freq / nyquist
        
        # 设计Butterworth滤波器
        b, a = signal.butter(order, normalized_cutoff, btype='low', analog=False)
        
        # 应用滤波器
        filtered_audio = signal.filtfilt(b, a, audio)
        
        self._record_operation('lowpass_filter', {
            'cutoff_freq': cutoff_freq,
            'order': order
        })
        
        return filtered_audio
    
    def apply_highpass_filter(self, audio: np.ndarray, cutoff_freq: float, 
                            order: int = 5) -> np.ndarray:
        """应用高通滤波器"""
        nyquist = self.sample_rate / 2
        normalized_cutoff = cutoff_freq / nyquist
        
        # 设计Butterworth滤波器
        b, a = signal.butter(order, normalized_cutoff, btype='high', analog=False)
        
        # 应用滤波器
        filtered_audio = signal.filtfilt(b, a, audio)
        
        self._record_operation('highpass_filter', {
            'cutoff_freq': cutoff_freq,
            'order': order
        })
        
        return filtered_audio
    
    def apply_bandpass_filter(self, audio: np.ndarray, low_freq: float, 
                            high_freq: float, order: int = 5) -> np.ndarray:
        """应用带通滤波器"""
        nyquist = self.sample_rate / 2
        low_normalized = low_freq / nyquist
        high_normalized = high_freq / nyquist
        
        # 设计Butterworth滤波器
        b, a = signal.butter(order, [low_normalized, high_normalized], 
                           btype='band', analog=False)
        
        # 应用滤波器
        filtered_audio = signal.filtfilt(b, a, audio)
        
        self._record_operation('bandpass_filter', {
            'low_freq': low_freq,
            'high_freq': high_freq,
            'order': order
        })
        
        return filtered_audio
    
    def apply_notch_filter(self, audio: np.ndarray, notch_freq: float, 
                          quality_factor: float = 30) -> np.ndarray:
        """应用陷波滤波器（去除特定频率）"""
        nyquist = self.sample_rate / 2
        normalized_freq = notch_freq / nyquist
        
        # 设计陷波滤波器
        b, a = signal.iirnotch(normalized_freq, quality_factor)
        
        # 应用滤波器
        filtered_audio = signal.filtfilt(b, a, audio)
        
        self._record_operation('notch_filter', {
            'notch_freq': notch_freq,
            'quality_factor': quality_factor
        })
        
        return filtered_audio
    
    def apply_equalizer(self, audio: np.ndarray, eq_bands: List[Tuple[float, float, float]]) -> np.ndarray:
        """应用多频段均衡器
        
        Args:
            audio: 输入音频信号
            eq_bands: 均衡器频段列表，每个元素为(中心频率, 增益dB, Q值)
        """
        filtered_audio = audio.copy()
        
        for center_freq, gain_db, q_factor in eq_bands:
            # 计算带宽
            bandwidth = center_freq / q_factor
            low_freq = center_freq - bandwidth / 2
            high_freq = center_freq + bandwidth / 2
            
            # 提取频段
            band_audio = self.apply_bandpass_filter(audio, low_freq, high_freq)
            
            # 应用增益
            gain_linear = 10 ** (gain_db / 20)
            band_audio *= gain_linear
            
            # 添加到输出
            filtered_audio += band_audio
        
        # 归一化
        filtered_audio = filtered_audio / len(eq_bands)
        
        self._record_operation('equalizer', {
            'eq_bands': eq_bands
        })
        
        return filtered_audio
    
    def apply_dynamic_range_compression(self, audio: np.ndarray, threshold: float = -20, 
                                      ratio: float = 4, attack_time: float = 0.003, 
                                      release_time: float = 0.1) -> np.ndarray:
        """应用动态范围压缩"""
        # 转换为dB
        audio_db = 20 * np.log10(np.abs(audio) + 1e-10)
        
        # 计算增益减少
        gain_reduction = np.zeros_like(audio_db)
        mask = audio_db > threshold
        gain_reduction[mask] = (audio_db[mask] - threshold) * (1 - 1/ratio)
        
        # 应用攻击和释放时间
        attack_samples = int(attack_time * self.sample_rate)
        release_samples = int(release_time * self.sample_rate)
        
        # 平滑增益减少
        smoothed_gain = self._smooth_gain_reduction(gain_reduction, attack_samples, release_samples)
        
        # 应用压缩
        compressed_db = audio_db - smoothed_gain
        compressed_audio = np.sign(audio) * (10 ** (compressed_db / 20))
        
        self._record_operation('compression', {
            'threshold': threshold,
            'ratio': ratio,
            'attack_time': attack_time,
            'release_time': release_time
        })
        
        return compressed_audio
    
    def _smooth_gain_reduction(self, gain_reduction: np.ndarray, 
                              attack_samples: int, release_samples: int) -> np.ndarray:
        """平滑增益减少"""
        smoothed = np.zeros_like(gain_reduction)
        
        for i in range(1, len(gain_reduction)):
            if gain_reduction[i] > smoothed[i-1]:
                # 攻击阶段
                alpha = 1 - np.exp(-1 / attack_samples)
            else:
                # 释放阶段
                alpha = 1 - np.exp(-1 / release_samples)
            
            smoothed[i] = alpha * gain_reduction[i] + (1 - alpha) * smoothed[i-1]
        
        return smoothed
    
    def apply_noise_gate(self, audio: np.ndarray, threshold: float = -40, 
                        ratio: float = 10, attack_time: float = 0.001, 
                        release_time: float = 0.1) -> np.ndarray:
        """应用噪声门"""
        # 转换为dB
        audio_db = 20 * np.log10(np.abs(audio) + 1e-10)
        
        # 计算增益减少
        gain_reduction = np.zeros_like(audio_db)
        mask = audio_db < threshold
        gain_reduction[mask] = (threshold - audio_db[mask]) * (1 - 1/ratio)
        
        # 应用攻击和释放时间
        attack_samples = int(attack_time * self.sample_rate)
        release_samples = int(release_time * self.sample_rate)
        
        # 平滑增益减少
        smoothed_gain = self._smooth_gain_reduction(gain_reduction, attack_samples, release_samples)
        
        # 应用噪声门
        gated_db = audio_db - smoothed_gain
        gated_audio = np.sign(audio) * (10 ** (gated_db / 20))
        
        self._record_operation('noise_gate', {
            'threshold': threshold,
            'ratio': ratio,
            'attack_time': attack_time,
            'release_time': release_time
        })
        
        return gated_audio
    
    def apply_reverb(self, audio: np.ndarray, room_size: float = 0.5, 
                    damping: float = 0.5, wet_level: float = 0.3) -> np.ndarray:
        """应用混响效果（简化版本）"""
        # 创建多个延迟线模拟混响
        delay_times = [0.03, 0.05, 0.07, 0.09, 0.11, 0.13]  # 秒
        decay_factors = [0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
        
        reverb_audio = np.zeros_like(audio)
        
        for delay_time, decay_factor in zip(delay_times, decay_factors):
            # 计算延迟样本数
            delay_samples = int(delay_time * self.sample_rate * room_size)
            
            if delay_samples < len(audio):
                # 创建延迟信号
                delayed_audio = np.zeros_like(audio)
                delayed_audio[delay_samples:] = audio[:-delay_samples]
                
                # 应用衰减和阻尼
                delayed_audio *= decay_factor * (1 - damping)
                
                # 添加到混响信号
                reverb_audio += delayed_audio
        
        # 混合原始信号和混响信号
        output_audio = (1 - wet_level) * audio + wet_level * reverb_audio
        
        self._record_operation('reverb', {
            'room_size': room_size,
            'damping': damping,
            'wet_level': wet_level
        })
        
        return output_audio
    
    def calculate_audio_metrics(self, audio: np.ndarray, 
                               reference: Optional[np.ndarray] = None) -> AudioMetrics:
        """计算音频质量指标"""
        # 计算信噪比
        if reference is not None:
            noise = audio - reference
            signal_power = np.mean(reference ** 2)
            noise_power = np.mean(noise ** 2)
            snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
        else:
            # 估算SNR（假设最小值为噪声）
            signal_power = np.mean(audio ** 2)
            noise_power = np.min(audio ** 2)
            snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
        
        # 计算总谐波失真（简化版本）
        fft_audio = fft(audio)
        freqs = fftfreq(len(audio), 1/self.sample_rate)
        
        # 找到基频
        fundamental_idx = np.argmax(np.abs(fft_audio[1:len(fft_audio)//2])) + 1
        fundamental_freq = freqs[fundamental_idx]
        
        # 计算谐波功率
        fundamental_power = np.abs(fft_audio[fundamental_idx]) ** 2
        harmonic_power = 0
        
        for harmonic in range(2, 6):  # 2-5次谐波
            harmonic_freq = fundamental_freq * harmonic
            harmonic_idx = np.argmin(np.abs(freqs - harmonic_freq))
            if harmonic_idx < len(fft_audio):
                harmonic_power += np.abs(fft_audio[harmonic_idx]) ** 2
        
        thd = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
        
        # 计算动态范围
        dynamic_range = 20 * np.log10(np.max(np.abs(audio)) / (np.min(np.abs(audio[audio != 0])) + 1e-10))
        
        # 计算频谱重心
        magnitude_spectrum = np.abs(fft_audio[:len(fft_audio)//2])
        freqs_positive = freqs[:len(freqs)//2]
        spectral_centroid = np.sum(freqs_positive * magnitude_spectrum) / (np.sum(magnitude_spectrum) + 1e-10)
        
        return AudioMetrics(
            snr=snr,
            thd=thd,
            dynamic_range=dynamic_range,
            spectral_centroid=spectral_centroid
        )
    
    def _record_operation(self, operation: str, params: dict):
        """记录操作历史"""
        self.processing_history.append({
            'operation': operation,
            'params': params,
            'timestamp': time.time()
        })

# 音频处理演示
def demo_audio_processing():
    print("Audio Processing Algorithms Demo")
    print("================================")
    
    # 创建测试音频信号
    sample_rate = 44100
    duration = 2.0  # 秒
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # 创建复合信号：基频 + 谐波 + 噪声
    fundamental_freq = 440  # A4
    audio = (np.sin(2 * np.pi * fundamental_freq * t) + 
            0.3 * np.sin(2 * np.pi * fundamental_freq * 2 * t) +  # 二次谐波
            0.1 * np.sin(2 * np.pi * fundamental_freq * 3 * t))   # 三次谐波
    
    # 添加噪声
    noise = np.random.normal(0, 0.05, audio.shape)
    noisy_audio = audio + noise
    
    processor = AudioProcessor(sample_rate)
    
    print("\n1. Filter Applications")
    
    # 低通滤波
    lowpass_filtered = processor.apply_lowpass_filter(noisy_audio, cutoff_freq=2000)
    print("Low-pass filter applied (cutoff: 2000 Hz)")
    
    # 高通滤波
    highpass_filtered = processor.apply_highpass_filter(noisy_audio, cutoff_freq=100)
    print("High-pass filter applied (cutoff: 100 Hz)")
    
    # 带通滤波
    bandpass_filtered = processor.apply_bandpass_filter(noisy_audio, 200, 2000)
    print("Band-pass filter applied (200-2000 Hz)")
    
    # 陷波滤波（去除50Hz工频干扰）
    notch_filtered = processor.apply_notch_filter(noisy_audio, 50)
    print("Notch filter applied (50 Hz removed)")
    
    print("\n2. Equalizer")
    
    # 多频段均衡器
    eq_bands = [
        (100, 3, 2),    # 低频增强
        (1000, -2, 4),  # 中频衰减
        (5000, 4, 3)    # 高频增强
    ]
    equalized_audio = processor.apply_equalizer(noisy_audio, eq_bands)
    print(f"Equalizer applied with {len(eq_bands)} bands")
    
    print("\n3. Dynamic Processing")
    
    # 动态范围压缩
    compressed_audio = processor.apply_dynamic_range_compression(
        noisy_audio, threshold=-20, ratio=4
    )
    print("Dynamic range compression applied")
    
    # 噪声门
    gated_audio = processor.apply_noise_gate(
        noisy_audio, threshold=-40, ratio=10
    )
    print("Noise gate applied")
    
    print("\n4. Effects")
    
    # 混响
    reverb_audio = processor.apply_reverb(
        audio, room_size=0.7, damping=0.3, wet_level=0.4
    )
    print("Reverb effect applied")
    
    print("\n5. Audio Quality Metrics")
    
    # 计算原始音频指标
    original_metrics = processor.calculate_audio_metrics(audio)
    print("Original audio metrics:")
    print(f"  SNR: {original_metrics.snr:.2f} dB")
    print(f"  THD: {original_metrics.thd:.2f}%")
    print(f"  Dynamic Range: {original_metrics.dynamic_range:.2f} dB")
    print(f"  Spectral Centroid: {original_metrics.spectral_centroid:.2f} Hz")
    
    # 计算处理后音频指标
    processed_metrics = processor.calculate_audio_metrics(lowpass_filtered, audio)
    print("\nLow-pass filtered audio metrics:")
    print(f"  SNR: {processed_metrics.snr:.2f} dB")
    print(f"  THD: {processed_metrics.thd:.2f}%")
    print(f"  Dynamic Range: {processed_metrics.dynamic_range:.2f} dB")
    print(f"  Spectral Centroid: {processed_metrics.spectral_centroid:.2f} Hz")
    
    print("\nAudio processing demo completed")

# 运行音频处理演示
if __name__ == "__main__":
    demo_audio_processing()

2.2 音频特征提取与分析

音频特征提取是音频处理和机器学习的重要环节：

import numpy as np
from scipy import signal
from scipy.fft import fft, fftfreq
import librosa
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass

@dataclass
class AudioFeatures:
    """音频特征数据结构"""
    mfcc: np.ndarray  # 梅尔频率倒谱系数
    spectral_centroid: np.ndarray  # 频谱重心
    spectral_rolloff: np.ndarray  # 频谱滚降
    spectral_bandwidth: np.ndarray  # 频谱带宽
    zero_crossing_rate: np.ndarray  # 过零率
    chroma: np.ndarray  # 色度特征
    tempo: float  # 节拍
    onset_frames: np.ndarray  # 起始帧

class AudioFeatureExtractor:
    """音频特征提取器"""
    
    def __init__(self, sample_rate: int = 22050, hop_length: int = 512, n_fft: int = 2048):
        self.sample_rate = sample_rate
        self.hop_length = hop_length
        self.n_fft = n_fft
        self.frame_length = n_fft
    
    def extract_mfcc(self, audio: np.ndarray, n_mfcc: int = 13) -> np.ndarray:
        """提取MFCC特征"""
        # 计算梅尔频谱图
        mel_spectrogram = librosa.feature.melspectrogram(
            y=audio, sr=self.sample_rate, hop_length=self.hop_length, n_fft=self.n_fft
        )
        
        # 转换为对数刻度
        log_mel = librosa.power_to_db(mel_spectrogram)
        
        # 计算MFCC
        mfcc = librosa.feature.mfcc(
            S=log_mel, sr=self.sample_rate, n_mfcc=n_mfcc
        )
        
        return mfcc
    
    def extract_spectral_features(self, audio: np.ndarray) -> Dict[str, np.ndarray]:
        """提取频谱特征"""
        # 计算短时傅里叶变换
        stft = librosa.stft(audio, hop_length=self.hop_length, n_fft=self.n_fft)
        magnitude = np.abs(stft)
        
        # 频谱重心
        spectral_centroid = librosa.feature.spectral_centroid(
            S=magnitude, sr=self.sample_rate, hop_length=self.hop_length
        )[0]
        
        # 频谱滚降
        spectral_rolloff = librosa.feature.spectral_rolloff(
            S=magnitude, sr=self.sample_rate, hop_length=self.hop_length
        )[0]
        
        # 频谱带宽
        spectral_bandwidth = librosa.feature.spectral_bandwidth(
            S=magnitude, sr=self.sample_rate, hop_length=self.hop_length
        )[0]
        
        # 频谱对比度
        spectral_contrast = librosa.feature.spectral_contrast(
            S=magnitude, sr=self.sample_rate, hop_length=self.hop_length
        )
        
        return {
            'centroid': spectral_centroid,
            'rolloff': spectral_rolloff,
            'bandwidth': spectral_bandwidth,
            'contrast': spectral_contrast
        }
    
    def extract_temporal_features(self, audio: np.ndarray) -> Dict[str, np.ndarray]:
        """提取时域特征"""
        # 过零率
        zero_crossing_rate = librosa.feature.zero_crossing_rate(
            audio, frame_length=self.frame_length, hop_length=self.hop_length
        )[0]
        
        # 能量
        frame_length = self.frame_length
        hop_length = self.hop_length
        energy = []
        
        for i in range(0, len(audio) - frame_length, hop_length):
            frame = audio[i:i + frame_length]
            frame_energy = np.sum(frame ** 2)
            energy.append(frame_energy)
        
        energy = np.array(energy)
        
        # RMS能量
        rms = librosa.feature.rms(
            y=audio, frame_length=self.frame_length, hop_length=self.hop_length
        )[0]
        
        return {
            'zero_crossing_rate': zero_crossing_rate,
            'energy': energy,
            'rms': rms
        }
    
    def extract_chroma_features(self, audio: np.ndarray) -> np.ndarray:
        """提取色度特征"""
        chroma = librosa.feature.chroma_stft(
            y=audio, sr=self.sample_rate, hop_length=self.hop_length, n_fft=self.n_fft
        )
        return chroma
    
    def extract_rhythm_features(self, audio: np.ndarray) -> Dict[str, Union[float, np.ndarray]]:
        """提取节奏特征"""
        # 节拍跟踪
        tempo, beats = librosa.beat.beat_track(
            y=audio, sr=self.sample_rate, hop_length=self.hop_length
        )
        
        # 起始检测
        onset_frames = librosa.onset.onset_detect(
            y=audio, sr=self.sample_rate, hop_length=self.hop_length
        )
        
        # 节拍强度
        onset_strength = librosa.onset.onset_strength(
            y=audio, sr=self.sample_rate, hop_length=self.hop_length
        )
        
        return {
            'tempo': tempo,
            'beats': beats,
            'onset_frames': onset_frames,
            'onset_strength': onset_strength
        }
    
    def extract_all_features(self, audio: np.ndarray) -> AudioFeatures:
        """提取所有特征"""
        # MFCC特征
        mfcc = self.extract_mfcc(audio)
        
        # 频谱特征
        spectral_features = self.extract_spectral_features(audio)
        
        # 时域特征
        temporal_features = self.extract_temporal_features(audio)
        
        # 色度特征
        chroma = self.extract_chroma_features(audio)
        
        # 节奏特征
        rhythm_features = self.extract_rhythm_features(audio)
        
        return AudioFeatures(
            mfcc=mfcc,
            spectral_centroid=spectral_features['centroid'],
            spectral_rolloff=spectral_features['rolloff'],
            spectral_bandwidth=spectral_features['bandwidth'],
            zero_crossing_rate=temporal_features['zero_crossing_rate'],
            chroma=chroma,
            tempo=rhythm_features['tempo'],
            onset_frames=rhythm_features['onset_frames']
        )
    
    def compare_audio_similarity(self, audio1: np.ndarray, audio2: np.ndarray) -> Dict[str, float]:
        """比较两个音频的相似性"""
        # 提取特征
        features1 = self.extract_all_features(audio1)
        features2 = self.extract_all_features(audio2)
        
        # 计算MFCC相似性
        mfcc_similarity = self._calculate_feature_similarity(features1.mfcc, features2.mfcc)
        
        # 计算色度相似性
        chroma_similarity = self._calculate_feature_similarity(features1.chroma, features2.chroma)
        
        # 计算频谱特征相似性
        centroid_similarity = self._calculate_sequence_similarity(
            features1.spectral_centroid, features2.spectral_centroid
        )
        
        # 节拍相似性
        tempo_similarity = 1 - abs(features1.tempo - features2.tempo) / max(features1.tempo, features2.tempo)
        
        return {
            'mfcc_similarity': mfcc_similarity,
            'chroma_similarity': chroma_similarity,
            'centroid_similarity': centroid_similarity,
            'tempo_similarity': tempo_similarity
        }
    
    def _calculate_feature_similarity(self, feature1: np.ndarray, feature2: np.ndarray) -> float:
        """计算特征相似性"""
        # 使用余弦相似性
        min_length = min(feature1.shape[1], feature2.shape[1])
        feature1_truncated = feature1[:, :min_length]
        feature2_truncated = feature2[:, :min_length]
        
        # 展平特征
        flat1 = feature1_truncated.flatten()
        flat2 = feature2_truncated.flatten()
        
        # 计算余弦相似性
        dot_product = np.dot(flat1, flat2)
        norm1 = np.linalg.norm(flat1)
        norm2 = np.linalg.norm(flat2)
        
        if norm1 == 0 or norm2 == 0:
            return 0.0
        
        return dot_product / (norm1 * norm2)
    
    def _calculate_sequence_similarity(self, seq1: np.ndarray, seq2: np.ndarray) -> float:
        """计算序列相似性"""
        min_length = min(len(seq1), len(seq2))
        seq1_truncated = seq1[:min_length]
        seq2_truncated = seq2[:min_length]
        
        # 计算相关系数
        correlation = np.corrcoef(seq1_truncated, seq2_truncated)[0, 1]
        return correlation if not np.isnan(correlation) else 0.0

# 音频特征提取演示
def demo_audio_feature_extraction():
    print("Audio Feature Extraction Demo")
    print("=============================")
    
    # 创建测试音频信号
    sample_rate = 22050
    duration = 3.0
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # 创建复杂音频信号
    audio1 = (np.sin(2 * np.pi * 440 * t) +  # A4
             0.5 * np.sin(2 * np.pi * 554.37 * t) +  # C#5
             0.3 * np.sin(2 * np.pi * 659.25 * t))   # E5
    
    # 添加包络和噪声
    envelope = np.exp(-t * 0.5)  # 指数衰减
    audio1 *= envelope
    audio1 += 0.05 * np.random.normal(0, 1, audio1.shape)
    
    # 创建第二个音频信号（不同的和弦）
    audio2 = (np.sin(2 * np.pi * 523.25 * t) +  # C5
             0.5 * np.sin(2 * np.pi * 659.25 * t) +  # E5
             0.3 * np.sin(2 * np.pi * 783.99 * t))   # G5
    audio2 *= envelope
    audio2 += 0.05 * np.random.normal(0, 1, audio2.shape)
    
    extractor = AudioFeatureExtractor(sample_rate)
    
    print("\n1. MFCC Features")
    mfcc1 = extractor.extract_mfcc(audio1, n_mfcc=13)
    print(f"MFCC shape: {mfcc1.shape}")
    print(f"MFCC mean: {np.mean(mfcc1, axis=1)[:5]}")
    
    print("\n2. Spectral Features")
    spectral_features1 = extractor.extract_spectral_features(audio1)
    for feature_name, feature_values in spectral_features1.items():
        if feature_values.ndim == 1:
            print(f"{feature_name}: mean={np.mean(feature_values):.2f}, std={np.std(feature_values):.2f}")
        else:
            print(f"{feature_name}: shape={feature_values.shape}")
    
    print("\n3. Temporal Features")
    temporal_features1 = extractor.extract_temporal_features(audio1)
    for feature_name, feature_values in temporal_features1.items():
        print(f"{feature_name}: mean={np.mean(feature_values):.4f}, std={np.std(feature_values):.4f}")
    
    print("\n4. Chroma Features")
    chroma1 = extractor.extract_chroma_features(audio1)
    print(f"Chroma shape: {chroma1.shape}")
    print(f"Dominant chroma bins: {np.argmax(np.mean(chroma1, axis=1))}")
    
    print("\n5. Rhythm Features")
    rhythm_features1 = extractor.extract_rhythm_features(audio1)
    print(f"Estimated tempo: {rhythm_features1['tempo']:.2f} BPM")
    print(f"Number of beats: {len(rhythm_features1['beats'])}")
    print(f"Number of onsets: {len(rhythm_features1['onset_frames'])}")
    
    print("\n6. Complete Feature Extraction")
    all_features1 = extractor.extract_all_features(audio1)
    print(f"Complete feature extraction completed for audio1")
    print(f"MFCC shape: {all_features1.mfcc.shape}")
    print(f"Chroma shape: {all_features1.chroma.shape}")
    print(f"Tempo: {all_features1.tempo:.2f} BPM")
    
    print("\n7. Audio Similarity Comparison")
    similarity = extractor.compare_audio_similarity(audio1, audio2)
    print("Similarity between audio1 and audio2:")
    for metric, value in similarity.items():
        print(f"  {metric}: {value:.4f}")
    
    # 自相似性测试
    self_similarity = extractor.compare_audio_similarity(audio1, audio1)
    print("\nSelf-similarity of audio1:")
    for metric, value in self_similarity.items():
        print(f"  {metric}: {value:.4f}")
    
    print("\nAudio feature extraction demo completed")

# 运行音频特征提取演示
if __name__ == "__main__":
    demo_audio_feature_extraction()

3. 编解码优化技术

3.1 视频编码优化

视频编码优化涉及率失真优化、运动估计算法和编码模式选择：

import numpy as np
from typing import Tuple, List, Dict, Optional
from dataclasses import dataclass
from enum import Enum
import cv2
from scipy.optimize import minimize_scalar

class BlockSize(Enum):
    """块大小枚举"""
    SIZE_4x4 = (4, 4)
    SIZE_8x8 = (8, 8)
    SIZE_16x16 = (16, 16)
    SIZE_32x32 = (32, 32)

class PredictionMode(Enum):
    """预测模式枚举"""
    INTRA = "intra"
    INTER = "inter"
    SKIP = "skip"
    MERGE = "merge"

@dataclass
class MotionVector:
    """运动矢量"""
    x: int
    y: int
    cost: float

@dataclass
class CodingUnit:
    """编码单元"""
    x: int
    y: int
    width: int
    height: int
    mode: PredictionMode
    motion_vector: Optional[MotionVector]
    quantization_parameter: int
    rate: float
    distortion: float
    rd_cost: float

class VideoEncoder:
    """视频编码器"""
    
    def __init__(self, lambda_factor: float = 1.0):
        self.lambda_factor = lambda_factor
        self.search_range = 16
        self.block_sizes = [BlockSize.SIZE_16x16, BlockSize.SIZE_8x8, BlockSize.SIZE_4x4]
    
    def motion_estimation_full_search(self, current_block: np.ndarray, 
                                    reference_frame: np.ndarray, 
                                    block_x: int, block_y: int) -> MotionVector:
        """全搜索运动估计"""
        block_height, block_width = current_block.shape
        ref_height, ref_width = reference_frame.shape
        
        best_mv = MotionVector(0, 0, float('inf'))
        
        # 搜索范围
        for dy in range(-self.search_range, self.search_range + 1):
            for dx in range(-self.search_range, self.search_range + 1):
                ref_x = block_x + dx
                ref_y = block_y + dy
                
                # 检查边界
                if (ref_x >= 0 and ref_y >= 0 and 
                    ref_x + block_width <= ref_width and 
                    ref_y + block_height <= ref_height):
                    
                    ref_block = reference_frame[ref_y:ref_y + block_height, 
                                              ref_x:ref_x + block_width]
                    
                    # 计算SAD（绝对差值和）
                    sad = np.sum(np.abs(current_block.astype(np.float32) - 
                                       ref_block.astype(np.float32)))
                    
                    if sad < best_mv.cost:
                        best_mv = MotionVector(dx, dy, sad)
        
        return best_mv
    
    def motion_estimation_diamond_search(self, current_block: np.ndarray, 
                                       reference_frame: np.ndarray, 
                                       block_x: int, block_y: int) -> MotionVector:
        """菱形搜索运动估计"""
        block_height, block_width = current_block.shape
        ref_height, ref_width = reference_frame.shape
        
        # 大菱形搜索模式
        large_diamond = [(-2, 0), (0, -2), (2, 0), (0, 2)]
        # 小菱形搜索模式
        small_diamond = [(-1, 0), (0, -1), (1, 0), (0, 1)]
        
        # 初始位置
        center_x, center_y = 0, 0
        best_cost = self._calculate_block_cost(current_block, reference_frame, 
                                             block_x + center_x, block_y + center_y)
        
        # 大菱形搜索
        step_size = 2
        while step_size >= 1:
            improved = False
            pattern = large_diamond if step_size == 2 else small_diamond
            
            for dx, dy in pattern:
                test_x = center_x + dx
                test_y = center_y + dy
                
                ref_x = block_x + test_x
                ref_y = block_y + test_y
                
                if (ref_x >= 0 and ref_y >= 0 and 
                    ref_x + block_width <= ref_width and 
                    ref_y + block_height <= ref_height):
                    
                    cost = self._calculate_block_cost(current_block, reference_frame, 
                                                     ref_x, ref_y)
                    
                    if cost < best_cost:
                        best_cost = cost
                        center_x, center_y = test_x, test_y
                        improved = True
            
            if not improved:
                step_size //= 2
        
        return MotionVector(center_x, center_y, best_cost)
    
    def _calculate_block_cost(self, current_block: np.ndarray, 
                            reference_frame: np.ndarray, 
                            ref_x: int, ref_y: int) -> float:
        """计算块匹配代价"""
        block_height, block_width = current_block.shape
        
        ref_block = reference_frame[ref_y:ref_y + block_height, 
                                  ref_x:ref_x + block_width]
        
        # 计算SAD
        sad = np.sum(np.abs(current_block.astype(np.float32) - 
                           ref_block.astype(np.float32)))
        
        return sad
    
    def rate_distortion_optimization(self, current_block: np.ndarray, 
                                   reference_frame: np.ndarray, 
                                   block_x: int, block_y: int, 
                                   block_size: BlockSize) -> CodingUnit:
        """率失真优化"""
        width, height = block_size.value
        
        # 测试不同的编码模式
        best_cu = None
        best_rd_cost = float('inf')
        
        # INTRA模式
        intra_cu = self._test_intra_mode(current_block, block_x, block_y, width, height)
        if intra_cu.rd_cost < best_rd_cost:
            best_cu = intra_cu
            best_rd_cost = intra_cu.rd_cost
        
        # INTER模式
        inter_cu = self._test_inter_mode(current_block, reference_frame, 
                                       block_x, block_y, width, height)
        if inter_cu.rd_cost < best_rd_cost:
            best_cu = inter_cu
            best_rd_cost = inter_cu.rd_cost
        
        # SKIP模式
        skip_cu = self._test_skip_mode(current_block, reference_frame, 
                                     block_x, block_y, width, height)
        if skip_cu.rd_cost < best_rd_cost:
            best_cu = skip_cu
            best_rd_cost = skip_cu.rd_cost
        
        return best_cu
    
    def _test_intra_mode(self, current_block: np.ndarray, 
                        block_x: int, block_y: int, 
                        width: int, height: int) -> CodingUnit:
        """测试帧内预测模式"""
        # 简化的帧内预测（DC预测）
        dc_value = np.mean(current_block)
        predicted_block = np.full_like(current_block, dc_value)
        
        # 计算残差
        residual = current_block.astype(np.float32) - predicted_block
        
        # 计算失真（MSE）
        distortion = np.mean(residual ** 2)
        
        # 估算码率（简化）
        rate = self._estimate_intra_rate(residual)
        
        # 计算RD代价
        rd_cost = distortion + self.lambda_factor * rate
        
        return CodingUnit(
            x=block_x, y=block_y, width=width, height=height,
            mode=PredictionMode.INTRA, motion_vector=None,
            quantization_parameter=22, rate=rate, distortion=distortion,
            rd_cost=rd_cost
        )
    
    def _test_inter_mode(self, current_block: np.ndarray, 
                        reference_frame: np.ndarray, 
                        block_x: int, block_y: int, 
                        width: int, height: int) -> CodingUnit:
        """测试帧间预测模式"""
        # 运动估计
        mv = self.motion_estimation_diamond_search(current_block, reference_frame, 
                                                  block_x, block_y)
        
        # 获取预测块
        ref_x = block_x + mv.x
        ref_y = block_y + mv.y
        predicted_block = reference_frame[ref_y:ref_y + height, ref_x:ref_x + width]
        
        # 计算残差
        residual = current_block.astype(np.float32) - predicted_block.astype(np.float32)
        
        # 计算失真
        distortion = np.mean(residual ** 2)
        
        # 估算码率
        rate = self._estimate_inter_rate(residual, mv)
        
        # 计算RD代价
        rd_cost = distortion + self.lambda_factor * rate
        
        return CodingUnit(
            x=block_x, y=block_y, width=width, height=height,
            mode=PredictionMode.INTER, motion_vector=mv,
            quantization_parameter=22, rate=rate, distortion=distortion,
            rd_cost=rd_cost
        )
    
    def _test_skip_mode(self, current_block: np.ndarray, 
                       reference_frame: np.ndarray, 
                       block_x: int, block_y: int, 
                       width: int, height: int) -> CodingUnit:
        """测试跳过模式"""
        # 跳过模式使用零运动矢量
        predicted_block = reference_frame[block_y:block_y + height, 
                                        block_x:block_x + width]
        
        # 计算失真
        distortion = np.mean((current_block.astype(np.float32) - 
                            predicted_block.astype(np.float32)) ** 2)
        
        # 跳过模式的码率很低
        rate = 1.0  # 只需要编码跳过标志
        
        # 计算RD代价
        rd_cost = distortion + self.lambda_factor * rate
        
        return CodingUnit(
            x=block_x, y=block_y, width=width, height=height,
            mode=PredictionMode.SKIP, motion_vector=MotionVector(0, 0, 0),
            quantization_parameter=22, rate=rate, distortion=distortion,
            rd_cost=rd_cost
        )
    
    def _estimate_intra_rate(self, residual: np.ndarray) -> float:
        """估算帧内预测的码率"""
        # 简化的码率估算：基于残差的方差
        variance = np.var(residual)
        # 使用信息论估算
        rate = 0.5 * np.log2(2 * np.pi * np.e * variance + 1e-10)
        return max(rate, 0.1)  # 最小码率
    
    def _estimate_inter_rate(self, residual: np.ndarray, mv: MotionVector) -> float:
        """估算帧间预测的码率"""
        # 残差码率
        residual_rate = self._estimate_intra_rate(residual)
        
        # 运动矢量码率（简化）
        mv_rate = 2 + abs(mv.x) * 0.1 + abs(mv.y) * 0.1
        
        return residual_rate + mv_rate
    
    def adaptive_quantization(self, frame: np.ndarray, 
                            target_bitrate: float) -> np.ndarray:
        """自适应量化"""
        # 计算帧的复杂度
        complexity = self._calculate_frame_complexity(frame)
        
        # 基于复杂度调整量化参数
        base_qp = 22
        complexity_factor = complexity / np.mean(complexity)
        
        # 自适应QP映射
        adaptive_qp = np.zeros_like(complexity)
        for i in range(complexity.shape[0]):
            for j in range(complexity.shape[1]):
                if complexity_factor[i, j] > 1.5:
                    adaptive_qp[i, j] = base_qp + 3  # 复杂区域使用更粗量化
                elif complexity_factor[i, j] < 0.5:
                    adaptive_qp[i, j] = base_qp - 3  # 简单区域使用更细量化
                else:
                    adaptive_qp[i, j] = base_qp
        
        return adaptive_qp
    
    def _calculate_frame_complexity(self, frame: np.ndarray) -> np.ndarray:
        """计算帧复杂度"""
        # 使用梯度幅值作为复杂度指标
        grad_x = cv2.Sobel(frame, cv2.CV_64F, 1, 0, ksize=3)
        grad_y = cv2.Sobel(frame, cv2.CV_64F, 0, 1, ksize=3)
        complexity = np.sqrt(grad_x**2 + grad_y**2)
        
        # 块级复杂度
        block_size = 16
        height, width = frame.shape
        block_complexity = np.zeros((height // block_size, width // block_size))
        
        for i in range(0, height - block_size, block_size):
            for j in range(0, width - block_size, block_size):
                block = complexity[i:i + block_size, j:j + block_size]
                block_complexity[i // block_size, j // block_size] = np.mean(block)
        
        return block_complexity

# 视频编码优化演示
def demo_video_encoding_optimization():
    print("Video Encoding Optimization Demo")
    print("================================")
    
    # 创建测试帧
    frame_size = (128, 128)
    current_frame = np.random.randint(0, 256, frame_size, dtype=np.uint8)
    reference_frame = np.random.randint(0, 256, frame_size, dtype=np.uint8)
    
    # 添加一些结构化内容
    cv2.rectangle(current_frame, (20, 20), (60, 60), 200, -1)
    cv2.rectangle(reference_frame, (25, 25), (65, 65), 200, -1)  # 轻微移动
    
    encoder = VideoEncoder(lambda_factor=1.0)
    
    print("\n1. Motion Estimation Comparison")
    
    # 测试块
    block_size = 16
    test_block = current_frame[20:36, 20:36]
    
    # 全搜索
    mv_full = encoder.motion_estimation_full_search(test_block, reference_frame, 20, 20)
    print(f"Full search MV: ({mv_full.x}, {mv_full.y}), cost: {mv_full.cost:.2f}")
    
    # 菱形搜索
    mv_diamond = encoder.motion_estimation_diamond_search(test_block, reference_frame, 20, 20)
    print(f"Diamond search MV: ({mv_diamond.x}, {mv_diamond.y}), cost: {mv_diamond.cost:.2f}")
    
    print("\n2. Rate-Distortion Optimization")
    
    # 测试不同块大小的RDO
    for block_size in encoder.block_sizes:
        width, height = block_size.value
        test_block = current_frame[20:20+height, 20:20+width]
        
        cu = encoder.rate_distortion_optimization(test_block, reference_frame, 
                                                 20, 20, block_size)
        
        print(f"Block size {width}x{height}:")
        print(f"  Best mode: {cu.mode.value}")
        print(f"  Rate: {cu.rate:.2f}")
        print(f"  Distortion: {cu.distortion:.2f}")
        print(f"  RD cost: {cu.rd_cost:.2f}")
        if cu.motion_vector:
            print(f"  Motion vector: ({cu.motion_vector.x}, {cu.motion_vector.y})")
    
    print("\n3. Adaptive Quantization")
    
    # 自适应量化
    adaptive_qp = encoder.adaptive_quantization(current_frame, target_bitrate=1000)
    print(f"Adaptive QP map shape: {adaptive_qp.shape}")
    print(f"QP range: {np.min(adaptive_qp):.0f} - {np.max(adaptive_qp):.0f}")
    print(f"Average QP: {np.mean(adaptive_qp):.2f}")
    
    # 复杂度分析
    complexity = encoder._calculate_frame_complexity(current_frame)
    print(f"\nFrame complexity analysis:")
    print(f"  Complexity range: {np.min(complexity):.2f} - {np.max(complexity):.2f}")
    print(f"  Average complexity: {np.mean(complexity):.2f}")
    
    print("\nVideo encoding optimization demo completed")

# 运行视频编码优化演示
if __name__ == "__main__":
    demo_video_encoding_optimization()

3.2 音频编码优化

音频编码优化包括心理声学模型、比特分配和感知编码技术：

import numpy as np
from scipy import signal
from scipy.fft import fft, ifft
from typing import List, Tuple, Dict, Optional
from dataclasses import dataclass
import math

@dataclass
class PsychoacousticModel:
    """心理声学模型参数"""
    bark_bands: np.ndarray
    masking_thresholds: np.ndarray
    tonality: np.ndarray
    noise_to_mask_ratio: np.ndarray

@dataclass
class BitAllocation:
    """比特分配结果"""
    band_bits: np.ndarray
    total_bits: int
    snr_per_band: np.ndarray

class AudioEncoder:
    """音频编码器"""
    
    def __init__(self, sample_rate: int = 44100, frame_size: int = 1024):
        self.sample_rate = sample_rate
        self.frame_size = frame_size
        self.bark_scale = self._init_bark_scale()
        self.masking_curves = self._init_masking_curves()
    
    def _init_bark_scale(self) -> np.ndarray:
        """初始化Bark频率刻度"""
        # Bark频率刻度转换
        freqs = np.linspace(0, self.sample_rate // 2, self.frame_size // 2 + 1)
        bark_freqs = 13 * np.arctan(0.00076 * freqs) + 3.5 * np.arctan((freqs / 7500) ** 2)
        return bark_freqs
    
    def _init_masking_curves(self) -> Dict[str, np.ndarray]:
        """初始化掩蔽曲线"""
        # 简化的掩蔽曲线
        bark_range = np.linspace(-3, 8, 100)
        
        # 音调掩蔽曲线
        tonal_masking = np.zeros_like(bark_range)
        mask = bark_range >= -1
        tonal_masking[mask] = -17 - 0.4 * bark_range[mask] + 0.15 * bark_range[mask] ** 2
        mask = bark_range < -1
        tonal_masking[mask] = -17 + 0.15 * bark_range[mask] ** 2
        
        # 噪声掩蔽曲线
        noise_masking = np.zeros_like(bark_range)
        mask = bark_range >= -1
        noise_masking[mask] = -17 - 0.4 * bark_range[mask] + 0.15 * bark_range[mask] ** 2
        mask = bark_range < -1
        noise_masking[mask] = -17 + 0.15 * bark_range[mask] ** 2
        
        return {
            'tonal': tonal_masking,
            'noise': noise_masking,
            'bark_range': bark_range
        }
    
    def psychoacoustic_analysis(self, audio_frame: np.ndarray) -> PsychoacousticModel:
        """心理声学分析"""
        # FFT分析
        spectrum = fft(audio_frame)
        magnitude = np.abs(spectrum[:len(spectrum)//2 + 1])
        power = magnitude ** 2
        
        # Bark频段分组
        bark_bands = self._group_into_bark_bands(power)
        
        # 音调性检测
        tonality = self._detect_tonality(magnitude)
        
        # 计算掩蔽阈值
        masking_thresholds = self._calculate_masking_thresholds(bark_bands, tonality)
        
        # 噪声掩蔽比
        noise_to_mask_ratio = self._calculate_noise_to_mask_ratio(bark_bands, masking_thresholds)
        
        return PsychoacousticModel(
            bark_bands=bark_bands,
            masking_thresholds=masking_thresholds,
            tonality=tonality,
            noise_to_mask_ratio=noise_to_mask_ratio
        )
    
    def _group_into_bark_bands(self, power_spectrum: np.ndarray) -> np.ndarray:
        """将功率谱分组到Bark频段"""
        num_bands = 25  # 标准Bark频段数
        bark_bands = np.zeros(num_bands)
        
        # 简化的频段分组
        band_edges = np.linspace(0, len(power_spectrum) - 1, num_bands + 1).astype(int)
        
        for i in range(num_bands):
            start_idx = band_edges[i]
            end_idx = band_edges[i + 1]
            bark_bands[i] = np.sum(power_spectrum[start_idx:end_idx])
        
        return bark_bands
    
    def _detect_tonality(self, magnitude_spectrum: np.ndarray) -> np.ndarray:
        """检测音调性"""
        # 简化的音调性检测
        tonality = np.zeros(25)  # 25个Bark频段
        
        # 使用频谱平坦度作为音调性指标
        band_edges = np.linspace(0, len(magnitude_spectrum) - 1, 26).astype(int)
        
        for i in range(25):
            start_idx = band_edges[i]
            end_idx = band_edges[i + 1]
            band_spectrum = magnitude_spectrum[start_idx:end_idx]
            
            if len(band_spectrum) > 0:
                # 几何平均 / 算术平均
                geometric_mean = np.exp(np.mean(np.log(band_spectrum + 1e-10)))
                arithmetic_mean = np.mean(band_spectrum)
                tonality[i] = geometric_mean / (arithmetic_mean + 1e-10)
        
        return tonality
    
    def _calculate_masking_thresholds(self, bark_bands: np.ndarray, 
                                    tonality: np.ndarray) -> np.ndarray:
        """计算掩蔽阈值"""
        num_bands = len(bark_bands)
        masking_thresholds = np.zeros(num_bands)
        
        for i in range(num_bands):
            # 基于能量和音调性计算掩蔽阈值
            energy_db = 10 * np.log10(bark_bands[i] + 1e-10)
            
            # 音调掩蔽
            if tonality[i] > 0.5:  # 音调成分
                masking_threshold = energy_db - 14.5 - 5.5
            else:  # 噪声成分
                masking_threshold = energy_db - 5.5
            
            masking_thresholds[i] = masking_threshold
        
        return masking_thresholds
    
    def _calculate_noise_to_mask_ratio(self, bark_bands: np.ndarray, 
                                     masking_thresholds: np.ndarray) -> np.ndarray:
        """计算噪声掩蔽比"""
        signal_db = 10 * np.log10(bark_bands + 1e-10)
        nmr = signal_db - masking_thresholds
        return nmr
    
    def bit_allocation_algorithm(self, psychoacoustic_model: PsychoacousticModel, 
                               total_bits: int) -> BitAllocation:
        """比特分配算法"""
        num_bands = len(psychoacoustic_model.bark_bands)
        
        # 基于噪声掩蔽比的比特分配
        nmr = psychoacoustic_model.noise_to_mask_ratio
        
        # 初始比特分配
        min_bits_per_band = 2
        available_bits = total_bits - num_bands * min_bits_per_band
        
        band_bits = np.full(num_bands, min_bits_per_band, dtype=float)
        
        # 迭代分配剩余比特
        for _ in range(available_bits):
            # 找到需要最多比特的频段（最高的NMR）
            max_nmr_idx = np.argmax(nmr)
            
            # 分配一个比特
            band_bits[max_nmr_idx] += 1
            
            # 更新NMR（分配比特后噪声减少）
            nmr[max_nmr_idx] -= 6  # 每增加1比特，SNR提高6dB
        
        # 计算每频段的SNR
        snr_per_band = band_bits * 6  # 简化：每比特6dB SNR
        
        return BitAllocation(
            band_bits=band_bits.astype(int),
            total_bits=int(np.sum(band_bits)),
            snr_per_band=snr_per_band
        )
    
    def perceptual_quantization(self, audio_frame: np.ndarray, 
                              bit_allocation: BitAllocation) -> Tuple[np.ndarray, float]:
        """感知量化"""
        # FFT变换
        spectrum = fft(audio_frame)
        magnitude = np.abs(spectrum)
        phase = np.angle(spectrum)
        
        # 分频段量化
        quantized_magnitude = np.zeros_like(magnitude)
        total_distortion = 0
        
        band_edges = np.linspace(0, len(magnitude) // 2, len(bit_allocation.band_bits) + 1).astype(int)
        
        for i, bits in enumerate(bit_allocation.band_bits):
            start_idx = band_edges[i]
            end_idx = band_edges[i + 1]
            
            if bits > 0:
                # 量化该频段
                band_magnitude = magnitude[start_idx:end_idx]
                
                # 计算量化步长
                max_val = np.max(band_magnitude)
                quantization_levels = 2 ** bits
                step_size = max_val / quantization_levels
                
                # 量化
                quantized_band = np.round(band_magnitude / step_size) * step_size
                quantized_magnitude[start_idx:end_idx] = quantized_band
                
                # 计算量化误差
                error = np.sum((band_magnitude - quantized_band) ** 2)
                total_distortion += error
            else:
                # 0比特分配，设为0
                quantized_magnitude[start_idx:end_idx] = 0
        
        # 重构信号
        quantized_spectrum = quantized_magnitude * np.exp(1j * phase)
        reconstructed_audio = np.real(ifft(quantized_spectrum))
        
        return reconstructed_audio[:len(audio_frame)], total_distortion
    
    def adaptive_bit_rate_control(self, audio_frames: List[np.ndarray], 
                                target_bitrate: float) -> List[BitAllocation]:
        """自适应码率控制"""
        frame_duration = self.frame_size / self.sample_rate
        bits_per_frame = int(target_bitrate * frame_duration)
        
        allocations = []
        
        for frame in audio_frames:
            # 心理声学分析
            psycho_model = self.psychoacoustic_analysis(frame)
            
            # 基于帧复杂度调整比特预算
            frame_complexity = np.mean(psycho_model.noise_to_mask_ratio)
            complexity_factor = max(0.5, min(2.0, frame_complexity / 10))
            
            adjusted_bits = int(bits_per_frame * complexity_factor)
            
            # 比特分配
            allocation = self.bit_allocation_algorithm(psycho_model, adjusted_bits)
            allocations.append(allocation)
        
        return allocations
    
    def calculate_perceptual_quality(self, original: np.ndarray, 
                                   encoded: np.ndarray) -> Dict[str, float]:
        """计算感知质量指标"""
        # 基本质量指标
        mse = np.mean((original - encoded) ** 2)
        snr = 10 * np.log10(np.var(original) / (mse + 1e-10))
        
        # 频域分析
        orig_spectrum = np.abs(fft(original))
        enc_spectrum = np.abs(fft(encoded))
        
        # 频谱失真
        spectral_distortion = np.mean((orig_spectrum - enc_spectrum) ** 2)
        
        # 感知加权失真
        psycho_model = self.psychoacoustic_analysis(original)
        
        # 使用掩蔽阈值加权频谱误差
        band_edges = np.linspace(0, len(orig_spectrum) // 2, len(psycho_model.masking_thresholds) + 1).astype(int)
        weighted_distortion = 0
        
        for i, threshold in enumerate(psycho_model.masking_thresholds):
            start_idx = band_edges[i]
            end_idx = band_edges[i + 1]
            
            band_error = np.sum((orig_spectrum[start_idx:end_idx] - 
                               enc_spectrum[start_idx:end_idx]) ** 2)
            
            # 使用掩蔽阈值作为权重
            weight = 1.0 / (10 ** (threshold / 10) + 1e-10)
            weighted_distortion += band_error * weight
        
        return {
            'snr': snr,
            'spectral_distortion': spectral_distortion,
            'perceptual_distortion': weighted_distortion,
            'overall_quality': snr - 0.1 * weighted_distortion
        }

# 音频编码优化演示
def demo_audio_encoding_optimization():
    print("Audio Encoding Optimization Demo")
    print("================================")
    
    # 创建测试音频
    sample_rate = 44100
    duration = 1.0
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # 复杂音频信号：多个频率成分
    audio = (np.sin(2 * np.pi * 440 * t) +  # A4
            0.5 * np.sin(2 * np.pi * 880 * t) +  # A5
            0.3 * np.sin(2 * np.pi * 1320 * t) +  # E6
            0.1 * np.random.normal(0, 1, len(t)))  # 噪声
    
    # 分帧
    frame_size = 1024
    frames = []
    for i in range(0, len(audio) - frame_size, frame_size // 2):
        frame = audio[i:i + frame_size]
        if len(frame) == frame_size:
            frames.append(frame)
    
    encoder = AudioEncoder(sample_rate, frame_size)
    
    print(f"\nProcessing {len(frames)} audio frames")
    
    print("\n1. Psychoacoustic Analysis")
    
    # 分析第一帧
    test_frame = frames[0]
    psycho_model = encoder.psychoacoustic_analysis(test_frame)
    
    print(f"Bark bands energy: {psycho_model.bark_bands[:5]}")
    print(f"Masking thresholds: {psycho_model.masking_thresholds[:5]}")
    print(f"Tonality: {psycho_model.tonality[:5]}")
    print(f"Noise-to-mask ratio: {psycho_model.noise_to_mask_ratio[:5]}")
    
    print("\n2. Bit Allocation")
    
    # 测试不同的比特预算
    bit_budgets = [128, 256, 512, 1024]
    
    for total_bits in bit_budgets:
        allocation = encoder.bit_allocation_algorithm(psycho_model, total_bits)
        print(f"\nTotal bits: {total_bits}")
        print(f"  Allocated bits: {allocation.total_bits}")
        print(f"  Bits per band (first 10): {allocation.band_bits[:10]}")
        print(f"  Average SNR: {np.mean(allocation.snr_per_band):.2f} dB")
    
    print("\n3. Perceptual Quantization")
    
    # 使用中等比特预算进行量化
    allocation = encoder.bit_allocation_algorithm(psycho_model, 512)
    quantized_frame, distortion = encoder.perceptual_quantization(test_frame, allocation)
    
    print(f"Quantization distortion: {distortion:.2f}")
    print(f"Original frame energy: {np.sum(test_frame ** 2):.2f}")
    print(f"Quantized frame energy: {np.sum(quantized_frame ** 2):.2f}")
    
    print("\n4. Adaptive Bit Rate Control")
    
    # 自适应码率控制
    target_bitrate = 128000  # 128 kbps
    allocations = encoder.adaptive_bit_rate_control(frames[:5], target_bitrate)
    
    print(f"Adaptive bit rate control for {len(allocations)} frames:")
    for i, allocation in enumerate(allocations):
        print(f"  Frame {i}: {allocation.total_bits} bits, avg SNR: {np.mean(allocation.snr_per_band):.2f} dB")
    
    print("\n5. Perceptual Quality Assessment")
    
    # 质量评估
    quality_metrics = encoder.calculate_perceptual_quality(test_frame, quantized_frame)
    
    print("Quality metrics:")
    for metric, value in quality_metrics.items():
        print(f"  {metric}: {value:.2f}")
    
    print("\n6. Encoding Efficiency Analysis")
    
    # 比较不同比特率的编码效率
    print("\nBit rate vs Quality analysis:")
    print("Bits\tSNR\tPerceptual Quality")
    
    for bits in [64, 128, 256, 512, 1024]:
        allocation = encoder.bit_allocation_algorithm(psycho_model, bits)
        quantized, _ = encoder.perceptual_quantization(test_frame, allocation)
        quality = encoder.calculate_perceptual_quality(test_frame, quantized)
        
        print(f"{bits}\t{quality['snr']:.1f}\t{quality['overall_quality']:.1f}")
    
    print("\nAudio encoding optimization demo completed")

# 运行音频编码优化演示
if __name__ == "__main__":
    demo_audio_encoding_optimization()

4. 性能优化策略

4.1 算法并行化与SIMD优化

现代处理器的并行计算能力是音视频处理性能优化的关键：

import numpy as np
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from typing import List, Callable, Any, Tuple
import time
from functools import partial
import threading
from dataclasses import dataclass

@dataclass
class PerformanceMetrics:
    """性能指标"""
    execution_time: float
    throughput: float  # 处理速度（帧/秒或样本/秒）
    cpu_utilization: float
    memory_usage: float
    speedup_ratio: float

class ParallelProcessor:
    """并行处理器"""
    
    def __init__(self, num_workers: int = None):
        self.num_workers = num_workers or mp.cpu_count()
        self.thread_pool = ThreadPoolExecutor(max_workers=self.num_workers)
        self.process_pool = ProcessPoolExecutor(max_workers=self.num_workers)
    
    def parallel_filter_processing(self, images: List[np.ndarray], 
                                 filter_func: Callable, 
                                 use_processes: bool = False) -> List[np.ndarray]:
        """并行图像滤波处理"""
        if use_processes:
            with ProcessPoolExecutor(max_workers=self.num_workers) as executor:
                results = list(executor.map(filter_func, images))
        else:
            with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
                results = list(executor.map(filter_func, images))
        
        return results
    
    def parallel_block_processing(self, image: np.ndarray, 
                                block_size: Tuple[int, int], 
                                process_func: Callable) -> np.ndarray:
        """并行块处理"""
        height, width = image.shape[:2]
        block_h, block_w = block_size
        
        # 分割图像为块
        blocks = []
        positions = []
        
        for y in range(0, height, block_h):
            for x in range(0, width, block_w):
                end_y = min(y + block_h, height)
                end_x = min(x + block_w, width)
                
                block = image[y:end_y, x:end_x]
                blocks.append(block)
                positions.append((y, x, end_y, end_x))
        
        # 并行处理块
        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
            processed_blocks = list(executor.map(process_func, blocks))
        
        # 重组图像
        result = np.zeros_like(image)
        for (y, x, end_y, end_x), processed_block in zip(positions, processed_blocks):
            result[y:end_y, x:end_x] = processed_block
        
        return result
    
    def vectorized_operations(self, data: np.ndarray) -> np.ndarray:
        """向量化操作示例"""
        # 使用NumPy的向量化操作替代循环
        
        # 错误的方式：使用Python循环
        # result = np.zeros_like(data)
        # for i in range(data.shape[0]):
        #     for j in range(data.shape[1]):
        #         result[i, j] = np.sqrt(data[i, j] ** 2 + 1)
        
        # 正确的方式：向量化操作
        result = np.sqrt(data ** 2 + 1)
        
        return result
    
    def simd_optimized_convolution(self, image: np.ndarray, 
                                 kernel: np.ndarray) -> np.ndarray:
        """SIMD优化的卷积操作"""
        # 使用NumPy的优化实现，它内部使用SIMD指令
        from scipy.signal import convolve2d
        
        if len(image.shape) == 3:
            # 彩色图像，分别处理每个通道
            result = np.zeros_like(image)
            for channel in range(image.shape[2]):
                result[:, :, channel] = convolve2d(
                    image[:, :, channel], kernel, mode='same', boundary='symm'
                )
        else:
            # 灰度图像
            result = convolve2d(image, kernel, mode='same', boundary='symm')
        
        return result
    
    def memory_efficient_processing(self, large_data: np.ndarray, 
                                  chunk_size: int = 1024) -> np.ndarray:
        """内存高效的处理"""
        # 分块处理大数据，避免内存溢出
        result = np.zeros_like(large_data)
        
        for i in range(0, large_data.shape[0], chunk_size):
            end_i = min(i + chunk_size, large_data.shape[0])
            chunk = large_data[i:end_i]
            
            # 处理块
            processed_chunk = self.vectorized_operations(chunk)
            result[i:end_i] = processed_chunk
        
        return result
    
    def benchmark_processing_methods(self, data: np.ndarray, 
                                   process_func: Callable) -> Dict[str, PerformanceMetrics]:
        """基准测试不同处理方法"""
        results = {}
        
        # 串行处理
        start_time = time.time()
        serial_result = process_func(data)
        serial_time = time.time() - start_time
        
        results['serial'] = PerformanceMetrics(
            execution_time=serial_time,
            throughput=data.size / serial_time,
            cpu_utilization=100.0,  # 假设单核100%
            memory_usage=data.nbytes,
            speedup_ratio=1.0
        )
        
        # 多线程处理
        if data.ndim >= 2:
            # 将数据分割为多个部分
            chunks = np.array_split(data, self.num_workers)
            
            start_time = time.time()
            with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
                thread_results = list(executor.map(process_func, chunks))
            thread_time = time.time() - start_time
            
            results['threaded'] = PerformanceMetrics(
                execution_time=thread_time,
                throughput=data.size / thread_time,
                cpu_utilization=100.0 * self.num_workers,
                memory_usage=data.nbytes,
                speedup_ratio=serial_time / thread_time
            )
        
        # 向量化处理
        if hasattr(process_func, '__name__') and 'vectorized' in process_func.__name__:
            start_time = time.time()
            vectorized_result = self.vectorized_operations(data)
            vectorized_time = time.time() - start_time
            
            results['vectorized'] = PerformanceMetrics(
                execution_time=vectorized_time,
                throughput=data.size / vectorized_time,
                cpu_utilization=100.0,
                memory_usage=data.nbytes,
                speedup_ratio=serial_time / vectorized_time
            )
        
        return results
    
    def __del__(self):
        """清理资源"""
        if hasattr(self, 'thread_pool'):
            self.thread_pool.shutdown(wait=True)
        if hasattr(self, 'process_pool'):
            self.process_pool.shutdown(wait=True)

class CacheOptimizer:
    """缓存优化器"""
    
    def __init__(self, cache_size: int = 1000):
        self.cache = {}
        self.cache_size = cache_size
        self.access_count = {}
    
    def cache_friendly_matrix_multiply(self, A: np.ndarray, B: np.ndarray, 
                                     block_size: int = 64) -> np.ndarray:
        """缓存友好的矩阵乘法"""
        m, k = A.shape
        k2, n = B.shape
        assert k == k2, "Matrix dimensions must match"
        
        C = np.zeros((m, n))
        
        # 分块矩阵乘法，提高缓存命中率
        for i in range(0, m, block_size):
            for j in range(0, n, block_size):
                for l in range(0, k, block_size):
                    # 计算块边界
                    i_end = min(i + block_size, m)
                    j_end = min(j + block_size, n)
                    l_end = min(l + block_size, k)
                    
                    # 块乘法
                    C[i:i_end, j:j_end] += np.dot(
                        A[i:i_end, l:l_end], 
                        B[l:l_end, j:j_end]
                    )
        
        return C
    
    def data_locality_optimization(self, data: np.ndarray) -> np.ndarray:
        """数据局部性优化"""
        # 确保数据在内存中连续存储
        if not data.flags['C_CONTIGUOUS']:
            data = np.ascontiguousarray(data)
        
        # 使用内存预取友好的访问模式
        result = np.zeros_like(data)
        
        # 按行优先顺序处理（C风格）
        for i in range(data.shape[0]):
            result[i] = data[i] * 2 + 1  # 简单操作示例
        
        return result

# 性能优化演示
def demo_performance_optimization():
    print("Performance Optimization Demo")
    print("=============================")
    
    # 创建测试数据
    image_size = (512, 512)
    test_image = np.random.randint(0, 256, image_size, dtype=np.uint8)
    large_data = np.random.rand(1000, 1000)
    
    processor = ParallelProcessor()
    cache_optimizer = CacheOptimizer()
    
    print(f"\nUsing {processor.num_workers} worker threads/processes")
    
    print("\n1. Parallel Image Processing")
    
    # 创建多个图像进行并行处理
    images = [test_image + np.random.randint(-10, 10, image_size) for _ in range(8)]
    
    # 定义简单的滤波函数
    def simple_filter(img):
        return np.clip(img.astype(np.float32) * 1.2, 0, 255).astype(np.uint8)
    
    # 串行处理
    start_time = time.time()
    serial_results = [simple_filter(img) for img in images]
    serial_time = time.time() - start_time
    
    # 并行处理（线程）
    start_time = time.time()
    parallel_results_thread = processor.parallel_filter_processing(images, simple_filter, use_processes=False)
    parallel_time_thread = time.time() - start_time
    
    # 并行处理（进程）
    start_time = time.time()
    parallel_results_process = processor.parallel_filter_processing(images, simple_filter, use_processes=True)
    parallel_time_process = time.time() - start_time
    
    print(f"Serial processing: {serial_time:.4f}s")
    print(f"Parallel (threads): {parallel_time_thread:.4f}s, speedup: {serial_time/parallel_time_thread:.2f}x")
    print(f"Parallel (processes): {parallel_time_process:.4f}s, speedup: {serial_time/parallel_time_process:.2f}x")
    
    print("\n2. Block-based Parallel Processing")
    
    # 块处理函数
    def block_process(block):
        # 简单的边缘增强
        kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
        if len(block.shape) == 2 and block.shape[0] >= 3 and block.shape[1] >= 3:
            from scipy.signal import convolve2d
            return convolve2d(block, kernel, mode='same', boundary='symm')
        return block
    
    # 串行块处理
    start_time = time.time()
    serial_block_result = block_process(test_image)
    serial_block_time = time.time() - start_time
    
    # 并行块处理
    start_time = time.time()
    parallel_block_result = processor.parallel_block_processing(
        test_image, (64, 64), block_process
    )
    parallel_block_time = time.time() - start_time
    
    print(f"Serial block processing: {serial_block_time:.4f}s")
    print(f"Parallel block processing: {parallel_block_time:.4f}s, speedup: {serial_block_time/parallel_block_time:.2f}x")
    
    print("\n3. Vectorization Optimization")
    
    # 比较循环vs向量化操作
    test_data = np.random.rand(1000, 1000)
    
    # Python循环（慢）
    def loop_operation(data):
        result = np.zeros_like(data)
        for i in range(data.shape[0]):
            for j in range(data.shape[1]):
                result[i, j] = np.sqrt(data[i, j] ** 2 + 1)
        return result
    
    # 向量化操作（快）
    def vectorized_operation(data):
        return processor.vectorized_operations(data)
    
    # 基准测试
    print("Benchmarking vectorization...")
    
    # 只测试小数据集的循环操作（避免太慢）
    small_data = test_data[:100, :100]
    
    start_time = time.time()
    loop_result = loop_operation(small_data)
    loop_time = time.time() - start_time
    
    start_time = time.time()
    vectorized_result = vectorized_operation(small_data)
    vectorized_time = time.time() - start_time
    
    print(f"Loop operation (100x100): {loop_time:.4f}s")
    print(f"Vectorized operation (100x100): {vectorized_time:.4f}s, speedup: {loop_time/vectorized_time:.2f}x")
    
    # 大数据集只测试向量化
    start_time = time.time()
    large_vectorized_result = vectorized_operation(test_data)
    large_vectorized_time = time.time() - start_time
    
    print(f"Vectorized operation (1000x1000): {large_vectorized_time:.4f}s")
    
    print("\n4. Memory Optimization")
    
    # 内存高效处理
    very_large_data = np.random.rand(2000, 2000)
    
    start_time = time.time()
    memory_efficient_result = processor.memory_efficient_processing(very_large_data, chunk_size=500)
    memory_efficient_time = time.time() - start_time
    
    print(f"Memory-efficient processing (2000x2000): {memory_efficient_time:.4f}s")
    print(f"Memory usage: {very_large_data.nbytes / 1024 / 1024:.2f} MB")
    
    print("\n5. Cache Optimization")
    
    # 矩阵乘法比较
    A = np.random.rand(256, 256)
    B = np.random.rand(256, 256)
    
    # 标准矩阵乘法
    start_time = time.time()
    standard_result = np.dot(A, B)
    standard_time = time.time() - start_time
    
    # 缓存友好的矩阵乘法
    start_time = time.time()
    cache_friendly_result = cache_optimizer.cache_friendly_matrix_multiply(A, B, block_size=32)
    cache_friendly_time = time.time() - start_time
    
    print(f"Standard matrix multiply: {standard_time:.4f}s")
    print(f"Cache-friendly matrix multiply: {cache_friendly_time:.4f}s")
    print(f"Results match: {np.allclose(standard_result, cache_friendly_result)}")
    
    print("\n6. Performance Summary")
    
    # 综合性能指标
    optimizations = {
        'Parallel Processing': serial_time / parallel_time_thread,
        'Block Processing': serial_block_time / parallel_block_time,
        'Vectorization': loop_time / vectorized_time,
        'Memory Efficiency': 1.0,  # 基准
    }
    
    print("Optimization technique -> Speedup ratio:")
    for technique, speedup in optimizations.items():
        print(f"  {technique}: {speedup:.2f}x")
    
    print("\nPerformance optimization demo completed")

# 运行性能优化演示
if __name__ == "__main__":
    demo_performance_optimization()

4.2 GPU加速与硬件优化

GPU在音视频处理中提供了强大的并行计算能力：

import numpy as np
from typing import List, Tuple, Dict, Optional
from dataclasses import dataclass
import time

# 模拟GPU加速处理（实际应用中需要使用CUDA、OpenCL等）
class GPUAccelerator:
    """GPU加速器模拟"""
    
    def __init__(self, device_id: int = 0):
        self.device_id = device_id
        self.memory_pool = {}
        self.compute_units = 2048  # 模拟计算单元数
        self.memory_bandwidth = 500  # GB/s
        
    def allocate_memory(self, size: Tuple[int, ...], dtype: str = 'float32') -> str:
        """分配GPU内存"""
        memory_id = f"gpu_mem_{len(self.memory_pool)}"
        
        # 计算内存大小
        element_size = 4 if dtype == 'float32' else 1
        total_size = np.prod(size) * element_size
        
        self.memory_pool[memory_id] = {
            'size': size,
            'dtype': dtype,
            'total_bytes': total_size,
            'data': np.zeros(size, dtype=np.float32 if dtype == 'float32' else np.uint8)
        }
        
        return memory_id
    
    def copy_to_gpu(self, data: np.ndarray, memory_id: str) -> float:
        """复制数据到GPU"""
        if memory_id not in self.memory_pool:
            raise ValueError(f"Memory ID {memory_id} not found")
        
        # 模拟传输时间
        transfer_time = data.nbytes / (self.memory_bandwidth * 1e9)
        
        # 复制数据
        self.memory_pool[memory_id]['data'] = data.copy()
        
        return transfer_time
    
    def copy_from_gpu(self, memory_id: str) -> Tuple[np.ndarray, float]:
        """从GPU复制数据"""
        if memory_id not in self.memory_pool:
            raise ValueError(f"Memory ID {memory_id} not found")
        
        data = self.memory_pool[memory_id]['data']
        transfer_time = data.nbytes / (self.memory_bandwidth * 1e9)
        
        return data.copy(), transfer_time
    
    def parallel_convolution(self, input_mem_id: str, kernel_mem_id: str, 
                           output_mem_id: str) -> float:
        """并行卷积计算"""
        input_data = self.memory_pool[input_mem_id]['data']
        kernel_data = self.memory_pool[kernel_mem_id]['data']
        
        # 模拟GPU并行计算时间
        operations = np.prod(input_data.shape) * np.prod(kernel_data.shape)
        compute_time = operations / (self.compute_units * 1e9)  # 假设每个计算单元1GHz
        
        # 执行卷积（简化版本）
        if len(input_data.shape) == 2 and len(kernel_data.shape) == 2:
            from scipy.signal import convolve2d
            result = convolve2d(input_data, kernel_data, mode='same')
            self.memory_pool[output_mem_id]['data'] = result
        
        return compute_time
    
    def parallel_matrix_multiply(self, a_mem_id: str, b_mem_id: str, 
                               c_mem_id: str) -> float:
        """并行矩阵乘法"""
        a_data = self.memory_pool[a_mem_id]['data']
        b_data = self.memory_pool[b_mem_id]['data']
        
        # 模拟GPU矩阵乘法时间
        m, k = a_data.shape
        k2, n = b_data.shape
        operations = m * n * k * 2  # 乘法和加法
        compute_time = operations / (self.compute_units * 1e9)
        
        # 执行矩阵乘法
        result = np.dot(a_data, b_data)
        self.memory_pool[c_mem_id]['data'] = result
        
        return compute_time
    
    def free_memory(self, memory_id: str):
        """释放GPU内存"""
        if memory_id in self.memory_pool:
            del self.memory_pool[memory_id]
    
    def get_memory_usage(self) -> Dict[str, float]:
        """获取内存使用情况"""
        total_bytes = sum(mem['total_bytes'] for mem in self.memory_pool.values())
        return {
            'used_memory_mb': total_bytes / (1024 * 1024),
            'allocated_blocks': len(self.memory_pool),
            'memory_efficiency': min(1.0, total_bytes / (8 * 1024 * 1024 * 1024))  # 假设8GB显存
        }

class HardwareOptimizer:
    """硬件优化器"""
    
    def __init__(self):
        self.gpu = GPUAccelerator()
        self.cpu_cores = 8
        self.cache_sizes = {'L1': 32, 'L2': 256, 'L3': 8192}  # KB
    
    def optimize_memory_access_pattern(self, data: np.ndarray, 
                                     block_size: int = 64) -> np.ndarray:
        """优化内存访问模式"""
        # 确保数据连续性
        if not data.flags['C_CONTIGUOUS']:
            data = np.ascontiguousarray(data)
        
        # 分块处理以提高缓存命中率
        result = np.zeros_like(data)
        
        for i in range(0, data.shape[0], block_size):
            for j in range(0, data.shape[1], block_size):
                end_i = min(i + block_size, data.shape[0])
                end_j = min(j + block_size, data.shape[1])
                
                # 处理块
                block = data[i:end_i, j:end_j]
                result[i:end_i, j:end_j] = block * 2 + 1  # 简单操作
        
        return result
    
    def cpu_gpu_hybrid_processing(self, large_image: np.ndarray, 
                                kernel: np.ndarray) -> Tuple[np.ndarray, Dict[str, float]]:
        """CPU-GPU混合处理"""
        metrics = {}
        
        # 分析数据大小决定处理策略
        data_size_mb = large_image.nbytes / (1024 * 1024)
        
        if data_size_mb < 10:  # 小数据用CPU
            start_time = time.time()
            from scipy.signal import convolve2d
            result = convolve2d(large_image, kernel, mode='same')
            cpu_time = time.time() - start_time
            
            metrics['processing_method'] = 'CPU'
            metrics['total_time'] = cpu_time
            metrics['transfer_time'] = 0
            
        else:  # 大数据用GPU
            start_time = time.time()
            
            # 分配GPU内存
            input_mem = self.gpu.allocate_memory(large_image.shape, 'float32')
            kernel_mem = self.gpu.allocate_memory(kernel.shape, 'float32')
            output_mem = self.gpu.allocate_memory(large_image.shape, 'float32')
            
            # 传输数据到GPU
            transfer_to_time = self.gpu.copy_to_gpu(large_image.astype(np.float32), input_mem)
            transfer_to_time += self.gpu.copy_to_gpu(kernel.astype(np.float32), kernel_mem)
            
            # GPU计算
            compute_time = self.gpu.parallel_convolution(input_mem, kernel_mem, output_mem)
            
            # 传输结果回CPU
            result, transfer_from_time = self.gpu.copy_from_gpu(output_mem)
            
            # 清理GPU内存
            self.gpu.free_memory(input_mem)
            self.gpu.free_memory(kernel_mem)
            self.gpu.free_memory(output_mem)
            
            total_time = time.time() - start_time
            
            metrics['processing_method'] = 'GPU'
            metrics['total_time'] = total_time
            metrics['compute_time'] = compute_time
            metrics['transfer_to_time'] = transfer_to_time
            metrics['transfer_from_time'] = transfer_from_time
            metrics['transfer_time'] = transfer_to_time + transfer_from_time
        
        return result, metrics
    
    def adaptive_processing_strategy(self, workload: Dict[str, any]) -> str:
        """自适应处理策略选择"""
        data_size = workload.get('data_size_mb', 0)
        complexity = workload.get('complexity', 'low')
        real_time_requirement = workload.get('real_time', False)
        
        # 决策逻辑
        if real_time_requirement and data_size > 50:
            return 'gpu_optimized'
        elif data_size > 100:
            return 'gpu_parallel'
        elif complexity == 'high':
            return 'cpu_optimized'
        else:
            return 'cpu_simple'
    
    def benchmark_hardware_configurations(self, test_data: np.ndarray) -> Dict[str, Dict[str, float]]:
        """基准测试不同硬件配置"""
        results = {}
        kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.float32)
        
        # CPU单线程
        start_time = time.time()
        from scipy.signal import convolve2d
        cpu_result = convolve2d(test_data, kernel, mode='same')
        cpu_time = time.time() - start_time
        
        results['cpu_single'] = {
            'execution_time': cpu_time,
            'throughput': test_data.size / cpu_time,
            'memory_usage': test_data.nbytes
        }
        
        # GPU处理
        gpu_result, gpu_metrics = self.cpu_gpu_hybrid_processing(test_data, kernel)
        
        results['gpu'] = {
            'execution_time': gpu_metrics['total_time'],
            'compute_time': gpu_metrics.get('compute_time', 0),
            'transfer_time': gpu_metrics.get('transfer_time', 0),
            'throughput': test_data.size / gpu_metrics['total_time']
        }
        
        # 内存优化CPU处理
        start_time = time.time()
        optimized_result = self.optimize_memory_access_pattern(test_data)
        optimized_time = time.time() - start_time
        
        results['cpu_optimized'] = {
            'execution_time': optimized_time,
            'throughput': test_data.size / optimized_time,
            'memory_usage': test_data.nbytes
        }
        
        return results

# GPU加速演示
def demo_gpu_acceleration():
    print("GPU Acceleration Demo")
    print("====================")
    
    optimizer = HardwareOptimizer()
    
    # 创建测试数据
    small_image = np.random.rand(256, 256).astype(np.float32)
    large_image = np.random.rand(1024, 1024).astype(np.float32)
    kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.float32)
    
    print(f"\nSmall image: {small_image.shape}, {small_image.nbytes / 1024:.1f} KB")
    print(f"Large image: {large_image.shape}, {large_image.nbytes / 1024:.1f} KB")
    
    print("\n1. CPU vs GPU Processing Comparison")
    
    # 小图像处理
    small_result, small_metrics = optimizer.cpu_gpu_hybrid_processing(small_image, kernel)
    print(f"\nSmall image processing:")
    print(f"  Method: {small_metrics['processing_method']}")
    print(f"  Total time: {small_metrics['total_time']:.4f}s")
    
    # 大图像处理
    large_result, large_metrics = optimizer.cpu_gpu_hybrid_processing(large_image, kernel)
    print(f"\nLarge image processing:")
    print(f"  Method: {large_metrics['processing_method']}")
    print(f"  Total time: {large_metrics['total_time']:.4f}s")
    if 'compute_time' in large_metrics:
        print(f"  Compute time: {large_metrics['compute_time']:.4f}s")
        print(f"  Transfer time: {large_metrics['transfer_time']:.4f}s")
        print(f"  Compute efficiency: {large_metrics['compute_time']/large_metrics['total_time']*100:.1f}%")
    
    print("\n2. Adaptive Processing Strategy")
    
    # 测试不同工作负载
    workloads = [
        {'data_size_mb': 5, 'complexity': 'low', 'real_time': False},
        {'data_size_mb': 50, 'complexity': 'high', 'real_time': True},
        {'data_size_mb': 200, 'complexity': 'medium', 'real_time': False},
        {'data_size_mb': 10, 'complexity': 'high', 'real_time': True}
    ]
    
    for i, workload in enumerate(workloads):
        strategy = optimizer.adaptive_processing_strategy(workload)
        print(f"  Workload {i+1}: {workload} -> Strategy: {strategy}")
    
    print("\n3. Hardware Configuration Benchmark")
    
    # 基准测试
    test_image = np.random.rand(512, 512).astype(np.float32)
    benchmark_results = optimizer.benchmark_hardware_configurations(test_image)
    
    print(f"\nBenchmark results for {test_image.shape} image:")
    for config, metrics in benchmark_results.items():
        print(f"  {config}:")
        print(f"    Execution time: {metrics['execution_time']:.4f}s")
        print(f"    Throughput: {metrics['throughput']/1e6:.2f} Mpixels/s")
        if 'compute_time' in metrics:
            print(f"    Compute time: {metrics['compute_time']:.4f}s")
            print(f"    Transfer time: {metrics['transfer_time']:.4f}s")
    
    print("\n4. GPU Memory Management")
    
    # GPU内存使用情况
    gpu = optimizer.gpu
    
    # 分配一些内存
    mem1 = gpu.allocate_memory((1024, 1024), 'float32')
    mem2 = gpu.allocate_memory((512, 512, 3), 'uint8')
    
    memory_usage = gpu.get_memory_usage()
    print(f"\nGPU Memory Usage:")
    print(f"  Used memory: {memory_usage['used_memory_mb']:.2f} MB")
    print(f"  Allocated blocks: {memory_usage['allocated_blocks']}")
    print(f"  Memory efficiency: {memory_usage['memory_efficiency']*100:.1f}%")
    
    # 清理内存
    gpu.free_memory(mem1)
    gpu.free_memory(mem2)
    
    final_usage = gpu.get_memory_usage()
    print(f"\nAfter cleanup:")
    print(f"  Used memory: {final_usage['used_memory_mb']:.2f} MB")
    print(f"  Allocated blocks: {final_usage['allocated_blocks']}")
    
    print("\nGPU acceleration demo completed")

# 运行GPU加速演示
if __name__ == "__main__":
    demo_gpu_acceleration()

5. 实际应用案例

5.1 实时视频处理系统

结合前面介绍的算法，构建一个完整的实时视频处理系统：

import numpy as np
from typing import List, Dict, Tuple, Optional, Callable
from dataclasses import dataclass
from enum import Enum
import time
import threading
from queue import Queue, Empty

class ProcessingMode(Enum):
    """处理模式"""
    REAL_TIME = "real_time"
    BATCH = "batch"
    ADAPTIVE = "adaptive"

@dataclass
class VideoFrame:
    """视频帧数据"""
    data: np.ndarray
    timestamp: float
    frame_id: int
    metadata: Dict[str, any]

@dataclass
class ProcessingResult:
    """处理结果"""
    processed_frame: VideoFrame
    processing_time: float
    algorithm_used: str
    quality_metrics: Dict[str, float]

class RealTimeVideoProcessor:
    """实时视频处理系统"""
    
    def __init__(self, mode: ProcessingMode = ProcessingMode.ADAPTIVE):
        self.mode = mode
        self.input_queue = Queue(maxsize=10)
        self.output_queue = Queue(maxsize=10)
        self.processing_thread = None
        self.is_running = False
        
        # 性能监控
        self.frame_count = 0
        self.total_processing_time = 0
        self.dropped_frames = 0
        
        # 算法组件
        self.noise_reducer = self._init_noise_reducer()
        self.edge_enhancer = self._init_edge_enhancer()
        self.color_corrector = self._init_color_corrector()
        
        # 自适应参数
        self.target_fps = 30
        self.max_processing_time = 1.0 / self.target_fps
        self.quality_threshold = 0.8
        
    def _init_noise_reducer(self) -> Callable:
        """初始化降噪算法"""
        def gaussian_denoise(frame: np.ndarray, sigma: float = 1.0) -> np.ndarray:
            from scipy.ndimage import gaussian_filter
            if len(frame.shape) == 3:
                result = np.zeros_like(frame)
                for c in range(frame.shape[2]):
                    result[:, :, c] = gaussian_filter(frame[:, :, c], sigma)
                return result
            else:
                return gaussian_filter(frame, sigma)
        
        return gaussian_denoise
    
    def _init_edge_enhancer(self) -> Callable:
        """初始化边缘增强算法"""
        def unsharp_mask(frame: np.ndarray, strength: float = 1.0) -> np.ndarray:
            from scipy.ndimage import gaussian_filter
            
            if len(frame.shape) == 3:
                result = np.zeros_like(frame)
                for c in range(frame.shape[2]):
                    channel = frame[:, :, c].astype(np.float32)
                    blurred = gaussian_filter(channel, sigma=1.0)
                    mask = channel - blurred
                    enhanced = channel + strength * mask
                    result[:, :, c] = np.clip(enhanced, 0, 255).astype(np.uint8)
                return result
            else:
                frame_float = frame.astype(np.float32)
                blurred = gaussian_filter(frame_float, sigma=1.0)
                mask = frame_float - blurred
                enhanced = frame_float + strength * mask
                return np.clip(enhanced, 0, 255).astype(np.uint8)
        
        return unsharp_mask
    
    def _init_color_corrector(self) -> Callable:
        """初始化色彩校正算法"""
        def gamma_correction(frame: np.ndarray, gamma: float = 1.0) -> np.ndarray:
            # 伽马校正
            inv_gamma = 1.0 / gamma
            table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in range(256)]).astype(np.uint8)
            return table[frame]
        
        return gamma_correction
    
    def add_frame(self, frame: VideoFrame) -> bool:
        """添加帧到处理队列"""
        try:
            self.input_queue.put(frame, block=False)
            return True
        except:
            self.dropped_frames += 1
            return False
    
    def get_processed_frame(self, timeout: float = 0.1) -> Optional[ProcessingResult]:
        """获取处理后的帧"""
        try:
            return self.output_queue.get(timeout=timeout)
        except Empty:
            return None
    
    def _select_processing_algorithm(self, frame: VideoFrame) -> Tuple[str, Dict[str, any]]:
        """选择处理算法"""
        # 分析帧特征
        frame_data = frame.data
        
        # 计算噪声水平
        if len(frame_data.shape) == 3:
            gray = np.mean(frame_data, axis=2)
        else:
            gray = frame_data
        
        # 简单的噪声检测
        laplacian_var = np.var(np.gradient(gray.astype(np.float32)))
        noise_level = min(1.0, laplacian_var / 1000)
        
        # 计算边缘密度
        from scipy.ndimage import sobel
        edges = sobel(gray.astype(np.float32))
        edge_density = np.mean(np.abs(edges)) / 255.0
        
        # 计算亮度分布
        brightness = np.mean(gray) / 255.0
        
        # 基于分析结果选择算法
        algorithms = []
        params = {}
        
        if noise_level > 0.3:
            algorithms.append('denoise')
            params['denoise_strength'] = min(2.0, noise_level * 3)
        
        if edge_density < 0.1:
            algorithms.append('enhance')
            params['enhance_strength'] = 1.5
        
        if brightness < 0.3 or brightness > 0.8:
            algorithms.append('color_correct')
            if brightness < 0.3:
                params['gamma'] = 0.7  # 提亮
            else:
                params['gamma'] = 1.3  # 压暗
        
        algorithm_name = '+'.join(algorithms) if algorithms else 'none'
        
        return algorithm_name, params
    
    def _process_frame(self, frame: VideoFrame) -> ProcessingResult:
        """处理单帧"""
        start_time = time.time()
        
        # 选择算法
        algorithm_name, params = self._select_processing_algorithm(frame)
        
        processed_data = frame.data.copy()
        
        # 应用算法
        if 'denoise' in algorithm_name:
            processed_data = self.noise_reducer(processed_data, 
                                              params.get('denoise_strength', 1.0))
        
        if 'enhance' in algorithm_name:
            processed_data = self.edge_enhancer(processed_data, 
                                              params.get('enhance_strength', 1.0))
        
        if 'color_correct' in algorithm_name:
            processed_data = self.color_corrector(processed_data, 
                                                 params.get('gamma', 1.0))
        
        processing_time = time.time() - start_time
        
        # 计算质量指标
        quality_metrics = self._calculate_quality_metrics(frame.data, processed_data)
        
        # 创建处理后的帧
        processed_frame = VideoFrame(
            data=processed_data,
            timestamp=frame.timestamp,
            frame_id=frame.frame_id,
            metadata={**frame.metadata, 'processed': True, 'algorithm': algorithm_name}
        )
        
        return ProcessingResult(
            processed_frame=processed_frame,
            processing_time=processing_time,
            algorithm_used=algorithm_name,
            quality_metrics=quality_metrics
        )
    
    def _calculate_quality_metrics(self, original: np.ndarray, 
                                 processed: np.ndarray) -> Dict[str, float]:
        """计算质量指标"""
        # PSNR
        mse = np.mean((original.astype(np.float32) - processed.astype(np.float32)) ** 2)
        if mse == 0:
            psnr = float('inf')
        else:
            psnr = 20 * np.log10(255.0 / np.sqrt(mse))
        
        # 结构相似性（简化版）
        orig_mean = np.mean(original)
        proc_mean = np.mean(processed)
        orig_var = np.var(original)
        proc_var = np.var(processed)
        covariance = np.mean((original - orig_mean) * (processed - proc_mean))
        
        c1 = (0.01 * 255) ** 2
        c2 = (0.03 * 255) ** 2
        
        ssim = ((2 * orig_mean * proc_mean + c1) * (2 * covariance + c2)) / \
               ((orig_mean ** 2 + proc_mean ** 2 + c1) * (orig_var + proc_var + c2))
        
        return {
            'psnr': psnr,
            'ssim': ssim,
            'mse': mse
        }
    
    def _processing_loop(self):
        """处理循环"""
        while self.is_running:
            try:
                frame = self.input_queue.get(timeout=0.1)
                
                # 检查是否需要跳帧（实时模式）
                if self.mode == ProcessingMode.REAL_TIME:
                    current_time = time.time()
                    if current_time - frame.timestamp > self.max_processing_time:
                        self.dropped_frames += 1
                        continue
                
                # 处理帧
                result = self._process_frame(frame)
                
                # 更新统计
                self.frame_count += 1
                self.total_processing_time += result.processing_time
                
                # 自适应调整
                if self.mode == ProcessingMode.ADAPTIVE:
                    avg_processing_time = self.total_processing_time / self.frame_count
                    if avg_processing_time > self.max_processing_time * 0.8:
                        # 降低处理质量以保持实时性
                        self.quality_threshold *= 0.95
                
                # 输出结果
                try:
                    self.output_queue.put(result, block=False)
                except:
                    # 输出队列满，丢弃最旧的结果
                    try:
                        self.output_queue.get(block=False)
                        self.output_queue.put(result, block=False)
                    except:
                        pass
                
            except Empty:
                continue
            except Exception as e:
                print(f"Processing error: {e}")
    
    def start(self):
        """启动处理"""
        if not self.is_running:
            self.is_running = True
            self.processing_thread = threading.Thread(target=self._processing_loop)
            self.processing_thread.start()
    
    def stop(self):
        """停止处理"""
        self.is_running = False
        if self.processing_thread:
            self.processing_thread.join()
    
    def get_statistics(self) -> Dict[str, float]:
        """获取处理统计"""
        if self.frame_count == 0:
            return {'fps': 0, 'avg_processing_time': 0, 'drop_rate': 0}
        
        avg_processing_time = self.total_processing_time / self.frame_count
        fps = 1.0 / avg_processing_time if avg_processing_time > 0 else 0
        drop_rate = self.dropped_frames / (self.frame_count + self.dropped_frames)
        
        return {
            'fps': fps,
            'avg_processing_time': avg_processing_time,
            'drop_rate': drop_rate,
            'total_frames': self.frame_count,
            'dropped_frames': self.dropped_frames
        }

# 实时视频处理演示
def demo_realtime_video_processing():
    print("Real-time Video Processing Demo")
    print("==============================")
    
    # 创建处理器
    processor = RealTimeVideoProcessor(ProcessingMode.ADAPTIVE)
    processor.start()
    
    print("\nStarting video processing simulation...")
    
    # 模拟视频流
    frame_width, frame_height = 640, 480
    total_frames = 100
    
    start_time = time.time()
    
    for i in range(total_frames):
        # 生成模拟帧
        frame_data = np.random.randint(0, 256, (frame_height, frame_width, 3), dtype=np.uint8)
        
        # 添加一些特征
        if i % 10 == 0:  # 每10帧添加噪声
            noise = np.random.normal(0, 20, frame_data.shape)
            frame_data = np.clip(frame_data.astype(np.float32) + noise, 0, 255).astype(np.uint8)
        
        frame = VideoFrame(
            data=frame_data,
            timestamp=time.time(),
            frame_id=i,
            metadata={'source': 'simulation'}
        )
        
        # 添加到处理队列
        success = processor.add_frame(frame)
        if not success:
            print(f"Frame {i} dropped (input queue full)")
        
        # 获取处理结果
        result = processor.get_processed_frame(timeout=0.01)
        if result:
            print(f"Processed frame {result.processed_frame.frame_id}: "
                  f"algorithm={result.algorithm_used}, "
                  f"time={result.processing_time:.3f}s, "
                  f"PSNR={result.quality_metrics['psnr']:.1f}dB")
        
        # 模拟帧率
        time.sleep(1.0 / 30)  # 30 FPS
    
    # 等待处理完成
    time.sleep(1.0)
    
    # 获取剩余结果
    remaining_results = []
    while True:
        result = processor.get_processed_frame(timeout=0.1)
        if result is None:
            break
        remaining_results.append(result)
    
    processor.stop()
    
    # 显示统计信息
    stats = processor.get_statistics()
    total_time = time.time() - start_time
    
    print(f"\nProcessing Statistics:")
    print(f"  Total time: {total_time:.2f}s")
    print(f"  Processed frames: {stats['total_frames']}")
    print(f"  Dropped frames: {stats['dropped_frames']}")
    print(f"  Drop rate: {stats['drop_rate']*100:.1f}%")
    print(f"  Average processing time: {stats['avg_processing_time']:.3f}s")
    print(f"  Effective FPS: {stats['fps']:.1f}")
    print(f"  Target FPS: 30")
    
    # 分析算法使用情况
    algorithm_usage = {}
    for result in remaining_results:
        algo = result.algorithm_used
        algorithm_usage[algo] = algorithm_usage.get(algo, 0) + 1
    
    print(f"\nAlgorithm Usage:")
    for algo, count in algorithm_usage.items():
        print(f"  {algo}: {count} frames ({count/len(remaining_results)*100:.1f}%)")
    
    print("\nReal-time video processing demo completed")

# 运行实时视频处理演示
if __name__ == "__main__":
    demo_realtime_video_processing()

5.2 音频处理应用案例

音频处理在实际应用中同样重要，以下是一个综合的音频处理系统：

import numpy as np
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
from enum import Enum
import time

class AudioApplicationType(Enum):
    """音频应用类型"""
    MUSIC_ENHANCEMENT = "music_enhancement"
    SPEECH_PROCESSING = "speech_processing"
    NOISE_REDUCTION = "noise_reduction"
    AUDIO_ANALYSIS = "audio_analysis"

@dataclass
class AudioSegment:
    """音频片段"""
    data: np.ndarray
    sample_rate: int
    channels: int
    timestamp: float
    metadata: Dict[str, any]

class AudioProcessingPipeline:
    """音频处理管道"""
    
    def __init__(self, application_type: AudioApplicationType):
        self.application_type = application_type
        self.sample_rate = 44100
        self.frame_size = 1024
        self.hop_length = 512
        
        # 根据应用类型配置处理链
        self.processing_chain = self._configure_processing_chain()
    
    def _configure_processing_chain(self) -> List[str]:
        """配置处理链"""
        if self.application_type == AudioApplicationType.MUSIC_ENHANCEMENT:
            return ['normalize', 'eq', 'stereo_enhance', 'limiter']
        elif self.application_type == AudioApplicationType.SPEECH_PROCESSING:
            return ['normalize', 'noise_gate', 'compressor', 'de_esser']
        elif self.application_type == AudioApplicationType.NOISE_REDUCTION:
            return ['spectral_subtraction', 'wiener_filter', 'normalize']
        else:  # AUDIO_ANALYSIS
            return ['normalize', 'feature_extraction']
    
    def spectral_subtraction(self, audio: np.ndarray, noise_factor: float = 2.0) -> np.ndarray:
        """频谱减法降噪"""
        # 短时傅里叶变换
        stft = self._stft(audio)
        magnitude = np.abs(stft)
        phase = np.angle(stft)
        
        # 估计噪声谱（使用前几帧）
        noise_frames = 10
        noise_spectrum = np.mean(magnitude[:noise_frames], axis=0)
        
        # 频谱减法
        enhanced_magnitude = magnitude - noise_factor * noise_spectrum
        enhanced_magnitude = np.maximum(enhanced_magnitude, 0.1 * magnitude)
        
        # 重构信号
        enhanced_stft = enhanced_magnitude * np.exp(1j * phase)
        enhanced_audio = self._istft(enhanced_stft)
        
        return enhanced_audio
    
    def wiener_filter(self, audio: np.ndarray, noise_variance: float = 0.01) -> np.ndarray:
        """维纳滤波"""
        stft = self._stft(audio)
        magnitude = np.abs(stft)
        phase = np.angle(stft)
        
        # 估计信号功率
        signal_power = magnitude ** 2
        
        # 维纳滤波
        wiener_gain = signal_power / (signal_power + noise_variance)
        filtered_magnitude = magnitude * wiener_gain
        
        # 重构信号
        filtered_stft = filtered_magnitude * np.exp(1j * phase)
        filtered_audio = self._istft(filtered_stft)
        
        return filtered_audio
    
    def dynamic_range_compressor(self, audio: np.ndarray, threshold: float = -20, 
                               ratio: float = 4.0, attack: float = 0.003, 
                               release: float = 0.1) -> np.ndarray:
        """动态范围压缩器"""
        # 转换为dB
        audio_db = 20 * np.log10(np.abs(audio) + 1e-10)
        
        # 计算增益减少
        gain_reduction = np.zeros_like(audio_db)
        mask = audio_db > threshold
        gain_reduction[mask] = (audio_db[mask] - threshold) * (1 - 1/ratio)
        
        # 应用攻击和释放时间
        smoothed_gain = self._smooth_gain(gain_reduction, attack, release)
        
        # 应用压缩
        compressed_db = audio_db - smoothed_gain
        compressed_audio = np.sign(audio) * (10 ** (compressed_db / 20))
        
        return compressed_audio
    
    def _smooth_gain(self, gain_reduction: np.ndarray, attack: float, release: float) -> np.ndarray:
        """平滑增益变化"""
        smoothed = np.zeros_like(gain_reduction)
        attack_coeff = np.exp(-1 / (attack * self.sample_rate))
        release_coeff = np.exp(-1 / (release * self.sample_rate))
        
        for i in range(1, len(gain_reduction)):
            if gain_reduction[i] > smoothed[i-1]:
                # 攻击
                smoothed[i] = attack_coeff * smoothed[i-1] + (1 - attack_coeff) * gain_reduction[i]
            else:
                # 释放
                smoothed[i] = release_coeff * smoothed[i-1] + (1 - release_coeff) * gain_reduction[i]
        
        return smoothed
    
    def parametric_equalizer(self, audio: np.ndarray, eq_bands: List[Dict]) -> np.ndarray:
        """参数均衡器"""
        # 每个频段: {'freq': 中心频率, 'gain': 增益(dB), 'q': 品质因数}
        result = audio.copy()
        
        for band in eq_bands:
            freq = band['freq']
            gain = band['gain']
            q = band['q']
            
            # 设计二阶IIR滤波器
            w = 2 * np.pi * freq / self.sample_rate
            alpha = np.sin(w) / (2 * q)
            A = 10 ** (gain / 40)
            
            # 峰值/凹陷滤波器系数
            b0 = 1 + alpha * A
            b1 = -2 * np.cos(w)
            b2 = 1 - alpha * A
            a0 = 1 + alpha / A
            a1 = -2 * np.cos(w)
            a2 = 1 - alpha / A
            
            # 归一化
            b = np.array([b0, b1, b2]) / a0
            a = np.array([1, a1/a0, a2/a0])
            
            # 应用滤波器
            from scipy.signal import lfilter
            result = lfilter(b, a, result)
        
        return result
    
    def stereo_enhancer(self, stereo_audio: np.ndarray, width: float = 1.5) -> np.ndarray:
        """立体声增强"""
        if stereo_audio.shape[1] != 2:
            return stereo_audio
        
        left = stereo_audio[:, 0]
        right = stereo_audio[:, 1]
        
        # 计算中间和侧面信号
        mid = (left + right) / 2
        side = (left - right) / 2
        
        # 增强侧面信号
        enhanced_side = side * width
        
        # 重构立体声
        enhanced_left = mid + enhanced_side
        enhanced_right = mid - enhanced_side
        
        return np.column_stack([enhanced_left, enhanced_right])
    
    def _stft(self, audio: np.ndarray) -> np.ndarray:
        """短时傅里叶变换"""
        # 简化的STFT实现
        n_frames = (len(audio) - self.frame_size) // self.hop_length + 1
        stft_matrix = np.zeros((n_frames, self.frame_size // 2 + 1), dtype=complex)
        
        # 汉宁窗
        window = np.hanning(self.frame_size)
        
        for i in range(n_frames):
            start = i * self.hop_length
            frame = audio[start:start + self.frame_size] * window
            stft_matrix[i] = np.fft.rfft(frame)
        
        return stft_matrix
    
    def _istft(self, stft_matrix: np.ndarray) -> np.ndarray:
        """逆短时傅里叶变换"""
        n_frames, n_freq = stft_matrix.shape
        audio_length = (n_frames - 1) * self.hop_length + self.frame_size
        audio = np.zeros(audio_length)
        window = np.hanning(self.frame_size)
        
        for i in range(n_frames):
            start = i * self.hop_length
            frame = np.fft.irfft(stft_matrix[i], n=self.frame_size)
            audio[start:start + self.frame_size] += frame * window
        
        return audio
    
    def process_audio(self, audio_segment: AudioSegment) -> AudioSegment:
        """处理音频"""
        processed_data = audio_segment.data.copy()
        processing_log = []
        
        for process in self.processing_chain:
            start_time = time.time()
            
            if process == 'normalize':
                max_val = np.max(np.abs(processed_data))
                if max_val > 0:
                    processed_data = processed_data / max_val * 0.95
            
            elif process == 'spectral_subtraction':
                if len(processed_data.shape) == 1:
                    processed_data = self.spectral_subtraction(processed_data)
                else:
                    for ch in range(processed_data.shape[1]):
                        processed_data[:, ch] = self.spectral_subtraction(processed_data[:, ch])
            
            elif process == 'wiener_filter':
                if len(processed_data.shape) == 1:
                    processed_data = self.wiener_filter(processed_data)
                else:
                    for ch in range(processed_data.shape[1]):
                        processed_data[:, ch] = self.wiener_filter(processed_data[:, ch])
            
            elif process == 'compressor':
                if len(processed_data.shape) == 1:
                    processed_data = self.dynamic_range_compressor(processed_data)
                else:
                    for ch in range(processed_data.shape[1]):
                        processed_data[:, ch] = self.dynamic_range_compressor(processed_data[:, ch])
            
            elif process == 'eq':
                # 默认EQ设置
                eq_bands = [
                    {'freq': 100, 'gain': 2, 'q': 1.0},   # 低频增强
                    {'freq': 1000, 'gain': 0, 'q': 1.0},  # 中频保持
                    {'freq': 8000, 'gain': 1, 'q': 1.0}   # 高频轻微增强
                ]
                
                if len(processed_data.shape) == 1:
                    processed_data = self.parametric_equalizer(processed_data, eq_bands)
                else:
                    for ch in range(processed_data.shape[1]):
                        processed_data[:, ch] = self.parametric_equalizer(processed_data[:, ch], eq_bands)
            
            elif process == 'stereo_enhance':
                if len(processed_data.shape) == 2 and processed_data.shape[1] == 2:
                    processed_data = self.stereo_enhancer(processed_data)
            
            processing_time = time.time() - start_time
            processing_log.append(f"{process}: {processing_time:.3f}s")
        
        # 创建处理后的音频段
        processed_segment = AudioSegment(
            data=processed_data,
            sample_rate=audio_segment.sample_rate,
            channels=audio_segment.channels,
            timestamp=audio_segment.timestamp,
            metadata={
                **audio_segment.metadata,
                'processed': True,
                'processing_chain': self.processing_chain,
                'processing_log': processing_log
            }
        )
        
        return processed_segment

# 音频处理演示
def demo_audio_processing():
    print("Audio Processing Pipeline Demo")
    print("=============================")
    
    # 创建测试音频
    duration = 2.0  # 秒
    sample_rate = 44100
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # 生成测试信号：440Hz正弦波 + 噪声
    clean_signal = np.sin(2 * np.pi * 440 * t)
    noise = np.random.normal(0, 0.1, len(clean_signal))
    noisy_signal = clean_signal + noise
    
    # 创建立体声版本
    stereo_signal = np.column_stack([noisy_signal, noisy_signal * 0.8])
    
    print(f"\nTest audio: {duration}s, {sample_rate}Hz, stereo")
    print(f"Signal shape: {stereo_signal.shape}")
    
    # 测试不同应用类型
    applications = [
        AudioApplicationType.NOISE_REDUCTION,
        AudioApplicationType.MUSIC_ENHANCEMENT,
        AudioApplicationType.SPEECH_PROCESSING
    ]
    
    for app_type in applications:
        print(f"\n{app_type.value.upper()} Processing:")
        print("-" * 40)
        
        # 创建处理管道
        pipeline = AudioProcessingPipeline(app_type)
        print(f"Processing chain: {' -> '.join(pipeline.processing_chain)}")
        
        # 创建音频段
        audio_segment = AudioSegment(
            data=stereo_signal.copy(),
            sample_rate=sample_rate,
            channels=2,
            timestamp=time.time(),
            metadata={'source': 'test_signal'}
        )
        
        # 处理音频
        start_time = time.time()
        processed_segment = pipeline.process_audio(audio_segment)
        total_time = time.time() - start_time
        
        print(f"Total processing time: {total_time:.3f}s")
        print(f"Processing log: {processed_segment.metadata['processing_log']}")
        
        # 计算处理前后的统计信息
        original_rms = np.sqrt(np.mean(audio_segment.data ** 2))
        processed_rms = np.sqrt(np.mean(processed_segment.data ** 2))
        
        print(f"Original RMS: {original_rms:.4f}")
        print(f"Processed RMS: {processed_rms:.4f}")
        print(f"RMS change: {processed_rms/original_rms:.2f}x")
        
        # 计算频谱特征
        original_spectrum = np.abs(np.fft.rfft(audio_segment.data[:, 0]))
        processed_spectrum = np.abs(np.fft.rfft(processed_segment.data[:, 0]))
        
        # 找到主要频率成分
        freqs = np.fft.rfftfreq(len(audio_segment.data[:, 0]), 1/sample_rate)
        peak_freq_idx = np.argmax(original_spectrum)
        peak_freq = freqs[peak_freq_idx]
        
        original_peak_power = original_spectrum[peak_freq_idx]
        processed_peak_power = processed_spectrum[peak_freq_idx]
        
        print(f"Peak frequency: {peak_freq:.1f}Hz")
        print(f"Peak power change: {processed_peak_power/original_peak_power:.2f}x")
    
    print("\nAudio processing demo completed")

# 运行音频处理演示
if __name__ == "__main__":
    demo_audio_processing()

6. 总结与未来展望

6.1 关键技术要点

通过本文的深入探讨，我们可以总结出音视频处理算法与优化的几个关键要点：

图像处理核心算法

空域处理：像素级操作、卷积滤波、形态学处理
频域处理：傅里叶变换、小波变换、频域滤波
特征提取：边缘检测、角点检测、纹理分析
图像增强：直方图均衡化、对比度增强、锐化

音频处理核心技术

时域处理：滤波、增益控制、动态范围处理
频域分析：STFT、MFCC、频谱分析
降噪技术：频谱减法、维纳滤波、自适应滤波
音频特征：基频检测、音色分析、节奏提取

编解码优化策略

运动估计：块匹配、亚像素精度、快速搜索算法
变换编码：DCT、小波变换、整数变换
量化优化：率失真优化、感知量化、自适应量化
熵编码：霍夫曼编码、算术编码、CABAC

性能优化方法

并行处理：多线程、SIMD、GPU加速
内存优化：缓存友好、数据局部性、内存池
算法优化：快速算法、近似算法、自适应算法
硬件加速：专用芯片、FPGA、神经网络处理器

6.2 技术发展趋势

AI与机器学习集成

class AIEnhancedProcessor:
    """AI增强的音视频处理器"""
    
    def __init__(self):
        # 模拟AI模型
        self.denoising_model = self._load_denoising_model()
        self.super_resolution_model = self._load_sr_model()
        self.audio_enhancement_model = self._load_audio_model()
    
    def _load_denoising_model(self):
        """加载降噪模型"""
        # 实际应用中会加载训练好的神经网络
        return lambda x: x  # 占位符
    
    def _load_sr_model(self):
        """加载超分辨率模型"""
        return lambda x: x  # 占位符
    
    def _load_audio_model(self):
        """加载音频增强模型"""
        return lambda x: x  # 占位符
    
    def ai_denoise(self, image: np.ndarray) -> np.ndarray:
        """AI降噪"""
        # 实际应用中会使用深度学习模型
        # 这里使用传统方法模拟
        from scipy.ndimage import gaussian_filter
        return gaussian_filter(image, sigma=0.8)
    
    def ai_super_resolution(self, image: np.ndarray, scale: int = 2) -> np.ndarray:
        """AI超分辨率"""
        # 模拟超分辨率效果
        from scipy.ndimage import zoom
        upscaled = zoom(image, (scale, scale, 1) if len(image.shape) == 3 else (scale, scale))
        return upscaled.astype(image.dtype)
    
    def adaptive_processing(self, data: np.ndarray, content_type: str) -> np.ndarray:
        """自适应处理"""
        if content_type == 'face':
            # 人脸特定处理
            return self.ai_denoise(data)
        elif content_type == 'text':
            # 文本图像处理
            return self.ai_super_resolution(data)
        else:
            # 通用处理
            return data

# AI增强处理演示
def demo_ai_enhanced_processing():
    print("AI-Enhanced Processing Demo")
    print("==========================")
    
    processor = AIEnhancedProcessor()
    
    # 创建测试图像
    test_image = np.random.randint(0, 256, (128, 128, 3), dtype=np.uint8)
    
    print(f"\nOriginal image shape: {test_image.shape}")
    
    # AI降噪
    denoised = processor.ai_denoise(test_image)
    print(f"Denoised image shape: {denoised.shape}")
    
    # AI超分辨率
    sr_image = processor.ai_super_resolution(test_image, scale=2)
    print(f"Super-resolution image shape: {sr_image.shape}")
    
    # 自适应处理
    face_processed = processor.adaptive_processing(test_image, 'face')
    text_processed = processor.adaptive_processing(test_image, 'text')
    
    print(f"Face processing completed: {face_processed.shape}")
    print(f"Text processing completed: {text_processed.shape}")
    
    print("\nAI-enhanced processing demo completed")

if __name__ == "__main__":
    demo_ai_enhanced_processing()

边缘计算与实时处理

边缘AI芯片：专用神经网络处理器
实时优化：低延迟算法、流水线处理
资源约束：功耗优化、内存限制适应
分布式处理：云边协同、负载均衡

WebAssembly与跨平台

浏览器原生性能：接近原生代码速度
跨平台部署：一次编写，多平台运行
安全沙箱：内存安全、权限控制
渐进式增强：向后兼容、功能检测

6.3 实际应用建议

算法选择策略

class AlgorithmSelector:
    """算法选择器"""
    
    def __init__(self):
        self.performance_profiles = {
            'gaussian_blur': {'speed': 9, 'quality': 7, 'memory': 8},
            'bilateral_filter': {'speed': 5, 'quality': 9, 'memory': 6},
            'median_filter': {'speed': 6, 'quality': 8, 'memory': 7},
            'wiener_filter': {'speed': 4, 'quality': 9, 'memory': 5}
        }
    
    def select_algorithm(self, requirements: Dict[str, int]) -> str:
        """根据需求选择算法"""
        best_score = 0
        best_algorithm = None
        
        for algo, profile in self.performance_profiles.items():
            score = 0
            for req, weight in requirements.items():
                if req in profile:
                    score += profile[req] * weight
            
            if score > best_score:
                best_score = score
                best_algorithm = algo
        
        return best_algorithm
    
    def benchmark_algorithms(self, test_data: np.ndarray) -> Dict[str, Dict[str, float]]:
        """基准测试算法"""
        results = {}
        
        for algo in self.performance_profiles.keys():
            start_time = time.time()
            
            # 模拟算法执行
            if algo == 'gaussian_blur':
                from scipy.ndimage import gaussian_filter
                result = gaussian_filter(test_data, sigma=1.0)
            elif algo == 'bilateral_filter':
                # 简化的双边滤波
                result = gaussian_filter(test_data, sigma=1.0)
            elif algo == 'median_filter':
                from scipy.ndimage import median_filter
                result = median_filter(test_data, size=3)
            else:
                result = test_data.copy()
            
            execution_time = time.time() - start_time
            
            # 计算质量指标（简化）
            mse = np.mean((test_data.astype(np.float32) - result.astype(np.float32)) ** 2)
            psnr = 20 * np.log10(255.0 / np.sqrt(mse)) if mse > 0 else float('inf')
            
            results[algo] = {
                'execution_time': execution_time,
                'psnr': psnr,
                'memory_usage': result.nbytes
            }
        
        return results

# 算法选择演示
def demo_algorithm_selection():
    print("Algorithm Selection Demo")
    print("=======================")
    
    selector = AlgorithmSelector()
    
    # 不同应用场景的需求
    scenarios = {
        'real_time_video': {'speed': 10, 'quality': 5, 'memory': 8},
        'high_quality_photo': {'speed': 3, 'quality': 10, 'memory': 5},
        'mobile_app': {'speed': 7, 'quality': 6, 'memory': 9},
        'batch_processing': {'speed': 5, 'quality': 9, 'memory': 4}
    }
    
    print("\nAlgorithm Selection for Different Scenarios:")
    for scenario, requirements in scenarios.items():
        selected = selector.select_algorithm(requirements)
        print(f"  {scenario}: {selected}")
        print(f"    Requirements: {requirements}")
    
    # 基准测试
    test_image = np.random.randint(0, 256, (256, 256), dtype=np.uint8)
    benchmark_results = selector.benchmark_algorithms(test_image)
    
    print(f"\nBenchmark Results (image size: {test_image.shape}):")
    for algo, metrics in benchmark_results.items():
        print(f"  {algo}:")
        print(f"    Execution time: {metrics['execution_time']:.4f}s")
        print(f"    PSNR: {metrics['psnr']:.2f}dB")
        print(f"    Memory usage: {metrics['memory_usage']/1024:.1f}KB")
    
    print("\nAlgorithm selection demo completed")

if __name__ == "__main__":
    demo_algorithm_selection()

性能调优指南

分析瓶颈：使用性能分析工具识别热点
选择合适算法：平衡质量、速度和资源消耗
并行化处理：充分利用多核和GPU资源
内存优化：减少内存分配和拷贝
缓存策略：合理使用缓存提高数据访问效率

跨平台开发考虑

抽象层设计：统一的API接口
平台特定优化：利用平台特有功能
资源管理：适应不同平台的资源限制
测试策略：多平台兼容性测试

6.4 持续学习建议

理论基础：深入学习数字信号处理、图像处理理论
实践项目：参与开源项目，积累实战经验
新技术跟踪：关注AI、WebAssembly等新兴技术
性能优化：学习底层优化技术和硬件特性
行业应用：了解不同领域的具体需求和挑战

音视频处理技术正在快速发展，AI技术的融入、硬件性能的提升以及新的应用场景不断涌现。掌握核心算法原理、优化技术和实际应用经验，将有助于在这个充满机遇的领域中取得成功。

通过本文的学习，读者应该能够：

理解音视频处理的核心算法原理
掌握性能优化的关键技术
具备实际项目开发的能力
了解技术发展趋势和未来方向

希望这些知识能够为您的音视频开发之路提供有价值的指导和参考。

编外计划 - 日志

To be or not to be,--that is question.