This commit is contained in:
lzc
2025-07-02 13:54:05 +08:00
commit 4b3870440c
6351 changed files with 282880 additions and 0 deletions

278
models/lstm_model.py Normal file
View File

@@ -0,0 +1,278 @@
"""
LSTM模型定义模块用于构建语音情感识别模型
"""
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, LSTM, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import os
class EmotionModel:
"""
语音情感识别模型类
"""
def __init__(self, input_shape, num_emotions, num_languages=None, include_language=False):
"""
初始化模型
Args:
input_shape: 输入特征的形状,例如 (1, 60) 表示每个样本有1个时间步和60个特征
num_emotions: 情感类别数量
num_languages: 语言类别数量如果include_language为True
include_language: 是否将语言作为额外特征
"""
self.input_shape = input_shape
self.num_emotions = num_emotions
self.num_languages = num_languages
self.include_language = include_language
self.model = None
self.save_weights_only = True # 默认只保存权重
def build_model(self, lstm_units=128, dropout_rate=0.3, regularization_rate=0.001):
"""
构建LSTM模型
Args:
lstm_units: LSTM层的单元数
dropout_rate: Dropout层的丢弃率
regularization_rate: L2正则化系数
Returns:
构建好的模型
"""
# 定义正则化器
regularizer = l2(regularization_rate)
# 情感识别模型
if self.include_language and self.num_languages is not None:
# 如果包含语言信息使用函数式API构建模型
# 音频特征输入
audio_input = Input(shape=self.input_shape, name='audio_input')
# LSTM处理音频特征
x = Bidirectional(LSTM(lstm_units, return_sequences=False,
kernel_regularizer=regularizer,
recurrent_regularizer=regularizer))(audio_input)
x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)
# 语言类别输入
language_input = Input(shape=(self.num_languages,), name='language_input')
# 合并特征
merged = tf.keras.layers.concatenate([x, language_input])
# 全连接层
x = Dense(128, activation='relu', kernel_regularizer=regularizer)(merged)
x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)
# 输出层
output = Dense(self.num_emotions, activation='softmax', name='emotion_output')(x)
# 创建模型
self.model = Model(inputs=[audio_input, language_input], outputs=output)
else:
# 如果不包含语言信息使用顺序API构建模型
self.model = Sequential([
Bidirectional(LSTM(lstm_units, return_sequences=True,
kernel_regularizer=regularizer,
recurrent_regularizer=regularizer),
input_shape=self.input_shape),
BatchNormalization(),
Dropout(dropout_rate),
Bidirectional(LSTM(lstm_units, return_sequences=False,
kernel_regularizer=regularizer,
recurrent_regularizer=regularizer)),
BatchNormalization(),
Dropout(dropout_rate),
Dense(128, activation='relu', kernel_regularizer=regularizer),
BatchNormalization(),
Dropout(dropout_rate),
Dense(64, activation='relu', kernel_regularizer=regularizer),
BatchNormalization(),
Dropout(dropout_rate),
Dense(self.num_emotions, activation='softmax')
])
return self.model
def compile_model(self, learning_rate=0.001):
"""
编译模型
Args:
learning_rate: 学习率
"""
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(
optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy']
)
def get_callbacks(self, checkpoint_path=None):
"""
获取回调函数列表
Args:
checkpoint_path: 模型检查点保存路径
Returns:
callbacks: 回调函数列表
"""
callbacks = []
# 提前停止
early_stopping = EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
)
callbacks.append(early_stopping)
# 学习率衰减
reduce_lr = ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=5,
min_lr=1e-6
)
callbacks.append(reduce_lr)
# 模型检查点
if checkpoint_path:
# 确保文件扩展名正确
if self.save_weights_only:
# 如果只保存权重,确保路径以.weights.h5结尾
if not checkpoint_path.endswith('.weights.h5'):
# 修改路径,确保扩展名正确
checkpoint_path = os.path.splitext(checkpoint_path)[0] + '.weights.h5'
checkpoint = ModelCheckpoint(
filepath=checkpoint_path,
monitor='val_loss',
save_best_only=True,
save_weights_only=self.save_weights_only,
mode='min',
verbose=1
)
callbacks.append(checkpoint)
return callbacks
def train(self, X_train, y_train, X_val, y_val, epochs=100, batch_size=32,
checkpoint_path='best_model.h5', language_train=None, language_val=None):
"""
训练模型
Args:
X_train: 训练集特征
y_train: 训练集标签
X_val: 验证集特征
y_val: 验证集标签
epochs: 训练轮数
batch_size: 批量大小
checkpoint_path: 模型检查点保存路径
language_train: 训练集语言标签如果include_language为True
language_val: 验证集语言标签如果include_language为True
Returns:
训练历史
"""
callbacks = self.get_callbacks(checkpoint_path)
if self.include_language and language_train is not None and language_val is not None:
# 如果包含语言信息
history = self.model.fit(
[X_train, language_train], y_train,
validation_data=([X_val, language_val], y_val),
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks,
verbose=1
)
else:
# 如果不包含语言信息
history = self.model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks,
verbose=1
)
return history
def evaluate(self, X_test, y_test, language_test=None):
"""
评估模型
Args:
X_test: 测试集特征
y_test: 测试集标签
language_test: 测试集语言标签如果include_language为True
Returns:
评估结果(损失和准确率)
"""
if self.include_language and language_test is not None:
# 如果包含语言信息
return self.model.evaluate([X_test, language_test], y_test)
else:
# 如果不包含语言信息
return self.model.evaluate(X_test, y_test)
def predict(self, X, language=None):
"""
预测情感
Args:
X: 特征
language: 语言标签如果include_language为True
Returns:
预测结果
"""
if self.include_language and language is not None:
# 如果包含语言信息
return self.model.predict([X, language])
else:
# 如果不包含语言信息
return self.model.predict(X)
def save(self, filepath):
"""
保存模型
Args:
filepath: 保存路径
"""
self.model.save(filepath)
@classmethod
def load(cls, filepath, custom_objects=None):
"""
加载模型
Args:
filepath: 模型文件路径
custom_objects: 自定义对象
Returns:
加载的模型
"""
model = tf.keras.models.load_model(filepath, custom_objects=custom_objects)
return model