9763 lines
287 KiB
Python
9763 lines
287 KiB
Python
#!/usr/bin/env python
|
||
# coding: utf-8
|
||
|
||
# In[ ]:
|
||
|
||
|
||
# 语音情感分析实验
|
||
|
||
本实验将实现从多语言语音数据中提取特征,并建立LSTM模型进行情感分析。
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
## 1. 对多语言语音数据集进行预处理
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
### 1.1 在下面空白处写出数据读取,使用 librosa 库对数据进行初步读取与探索的 python 代码
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CASIA' # 正确的CASIA数据集路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
多语言语音数据集读取与探索
|
||
"""
|
||
|
||
import os
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import librosa
|
||
import librosa.display
|
||
from IPython.display import Audio
|
||
import glob
|
||
|
||
# 设置路径,根据实际情况修改
|
||
RAVDESS_PATH = './RAVDESS'
|
||
SAVEE_PATH = './SAVEE'
|
||
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
|
||
|
||
def load_audio_sample(file_path):
|
||
"""
|
||
加载音频文件并返回波形和采样率
|
||
|
||
Args:
|
||
file_path: 音频文件路径
|
||
|
||
Returns:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
"""
|
||
print(f"加载音频文件: {file_path}")
|
||
audio, sr = librosa.load(file_path, sr=None)
|
||
print(f"采样率: {sr}Hz")
|
||
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f} 秒")
|
||
return audio, sr
|
||
|
||
def explore_audio(audio, sr, title="音频波形"):
|
||
"""
|
||
探索音频数据,显示波形和频谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制频谱图
|
||
plt.subplot(2, 1, 2)
|
||
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
|
||
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 频谱图")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 播放音频
|
||
return Audio(data=audio, rate=sr)
|
||
|
||
# 示例:读取和探索RAVDESS数据集中的一个文件
|
||
def explore_ravdess():
|
||
"""探索RAVDESS数据集"""
|
||
if not os.path.exists(RAVDESS_PATH):
|
||
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
|
||
return
|
||
|
||
# 获取所有Actor_01目录下的所有音频文件
|
||
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
|
||
|
||
if not audio_files:
|
||
print("未找到音频文件")
|
||
return
|
||
|
||
# 读取第一个文件进行探索
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
|
||
# 模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(sample_file)
|
||
parts = filename.split('-')
|
||
emotion_mapping = {
|
||
'01': '中性',
|
||
'02': '平静',
|
||
'03': '开心',
|
||
'04': '悲伤',
|
||
'05': '愤怒',
|
||
'06': '恐惧',
|
||
'07': '厌恶',
|
||
'08': '惊讶'
|
||
}
|
||
emotion = emotion_mapping.get(parts[2], '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
|
||
|
||
# 示例:读取和探索SAVEE数据集中的一个文件
|
||
def explore_savee():
|
||
"""探索SAVEE数据集"""
|
||
if not os.path.exists(SAVEE_PATH):
|
||
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
|
||
return
|
||
|
||
# SAVEE数据集结构
|
||
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
|
||
|
||
if not audio_dirs:
|
||
print("未找到SAVEE音频目录")
|
||
return
|
||
|
||
# 获取第一个说话者目录中的第一个音频
|
||
speaker_dir = audio_dirs[0]
|
||
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {speaker_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
audio, sr = load_audio_sample(sample_file)
|
||
|
||
# SAVEE命名规则: a01.wav, 其中a表示anger情感
|
||
filename = os.path.basename(sample_file)
|
||
emotion_mapping = {
|
||
'a': '愤怒',
|
||
'd': '厌恶',
|
||
'f': '恐惧',
|
||
'h': '开心',
|
||
'n': '中性',
|
||
'sa': '悲伤',
|
||
'su': '惊讶'
|
||
}
|
||
|
||
# 提取情感标识
|
||
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
|
||
print(f"文件: {filename}")
|
||
print(f"情感: {emotion}")
|
||
|
||
# 探索音频
|
||
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
|
||
|
||
# 示例:读取和探索CASIA数据集中的一个文件
|
||
def explore_casia():
|
||
"""探索CASIA数据集"""
|
||
if not os.path.exists(CASIA_PATH):
|
||
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
|
||
return
|
||
|
||
# CASIA数据集结构
|
||
speakers = os.listdir(CASIA_PATH)
|
||
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
|
||
|
||
if not speakers:
|
||
print("未找到CASIA说话者目录")
|
||
return
|
||
|
||
# 获取第一个说话者的情感目录
|
||
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
|
||
emotion_dirs = [d for d in os.listdir(speaker_dir)
|
||
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
|
||
|
||
if not emotion_dirs:
|
||
print(f"在 {speaker_dir} 中未找到情感目录")
|
||
return
|
||
|
||
# 获取情感目录下的文件
|
||
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
|
||
|
||
if not audio_files:
|
||
# CASIA可能是其他格式,如peak
|
||
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
|
||
|
||
if not audio_files:
|
||
print(f"在 {emotion_dir} 中未找到音频文件")
|
||
return
|
||
|
||
sample_file = audio_files[0]
|
||
|
||
# CASIA可能不是wav格式,这里简单显示文件信息
|
||
print(f"文件: {sample_file}")
|
||
print(f"情感: {os.path.basename(emotion_dir)}")
|
||
|
||
# 如果是.wav格式,则读取并分析
|
||
if sample_file.endswith('.wav'):
|
||
try:
|
||
audio, sr = load_audio_sample(sample_file)
|
||
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
else:
|
||
print(f"文件格式不是wav,无法直接使用librosa读取: {sample_file}")
|
||
|
||
# 数据集统计分析
|
||
def analyze_datasets():
|
||
"""
|
||
对数据集进行统计分析
|
||
"""
|
||
stats = {
|
||
'RAVDESS': {'total': 0, 'emotions': {}},
|
||
'SAVEE': {'total': 0, 'emotions': {}},
|
||
'CASIA': {'total': 0, 'emotions': {}}
|
||
}
|
||
|
||
# 分析RAVDESS
|
||
if os.path.exists(RAVDESS_PATH):
|
||
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
|
||
ravdess_files = []
|
||
for actor_dir in actor_dirs:
|
||
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
|
||
|
||
stats['RAVDESS']['total'] = len(ravdess_files)
|
||
|
||
# 统计每种情感的数量
|
||
emotion_mapping = {
|
||
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
|
||
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
|
||
}
|
||
|
||
for file_path in ravdess_files:
|
||
filename = os.path.basename(file_path)
|
||
parts = filename.split('-')
|
||
if len(parts) >= 3:
|
||
emotion_id = parts[2]
|
||
emotion = emotion_mapping.get(emotion_id, '未知')
|
||
if emotion not in stats['RAVDESS']['emotions']:
|
||
stats['RAVDESS']['emotions'][emotion] = 0
|
||
stats['RAVDESS']['emotions'][emotion] += 1
|
||
|
||
# 打印统计结果
|
||
print("数据集统计:")
|
||
for dataset, data in stats.items():
|
||
if data['total'] > 0:
|
||
print(f"\n{dataset} 数据集:")
|
||
print(f" 总文件数: {data['total']}")
|
||
if data['emotions']:
|
||
print(" 情感分布:")
|
||
for emotion, count in data['emotions'].items():
|
||
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
|
||
else:
|
||
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
|
||
|
||
# 执行数据集探索
|
||
analyze_datasets()
|
||
|
||
# 尝试探索各数据集的样例
|
||
try:
|
||
print("\nRAVDESS数据集样例:")
|
||
explore_ravdess()
|
||
except Exception as e:
|
||
print(f"RAVDESS探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nSAVEE数据集样例:")
|
||
explore_savee()
|
||
except Exception as e:
|
||
print(f"SAVEE探索时出错: {e}")
|
||
|
||
try:
|
||
print("\nCASIA数据集样例:")
|
||
explore_casia()
|
||
except Exception as e:
|
||
print(f"CASIA探索时出错: {e}")
|
||
|
||
## 2. 特征工程
|
||
### 2.1 在下面空白处写出音高、估计调谐偏差指标的构建代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
音高和调谐偏差指标提取
|
||
"""
|
||
import numpy as np
|
||
import librosa
|
||
import matplotlib.pyplot as plt
|
||
|
||
def extract_pitch_features(audio, sr):
|
||
"""
|
||
提取音高相关特征
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
|
||
Returns:
|
||
features: 特征字典
|
||
"""
|
||
features = {}
|
||
|
||
# 使用librosa.piptrack计算音高和幅度
|
||
pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
|
||
|
||
# 提取每个时间帧的主要音高
|
||
pitches_mean = []
|
||
for t in range(pitches.shape[1]):
|
||
idx = np.argmax(magnitudes[:, t])
|
||
pitch = pitches[idx, t]
|
||
if pitch > 0: # 过滤掉静音帧
|
||
pitches_mean.append(pitch)
|
||
|
||
if pitches_mean: # 确保有有效的音高值
|
||
features['pitch_mean'] = np.mean(pitches_mean)
|
||
features['pitch_std'] = np.std(pitches_mean) if len(pitches_mean) > 1 else 0
|
||
features['pitch_max'] = np.max(pitches_mean)
|
||
features['pitch_min'] = np.min(pitches_mean) if len(pitches_mean) > 0 else 0
|
||
else:
|
||
features['pitch_mean'] = 0
|
||
features['pitch_std'] = 0
|
||
features['pitch_max'] = 0
|
||
features['pitch_min'] = 0
|
||
|
||
# 提取调谐偏差
|
||
tuning_offset = librosa.estimate_tuning(y=audio, sr=sr)
|
||
features['tuning_offset'] = tuning_offset
|
||
|
||
return features
|
||
|
||
def visualize_pitch(audio, sr, title="音高分析"):
|
||
"""
|
||
可视化音高变化
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 计算音高和幅度
|
||
pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
|
||
|
||
# 提取每个时间帧的主要音高
|
||
times = librosa.times_like(pitches)
|
||
pitch_values = []
|
||
pitch_times = []
|
||
|
||
for t in range(pitches.shape[1]):
|
||
idx = np.argmax(magnitudes[:, t])
|
||
pitch = pitches[idx, t]
|
||
if pitch > 0: # 过滤掉静音帧
|
||
pitch_values.append(pitch)
|
||
pitch_times.append(times[t])
|
||
|
||
# 绘制波形图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制音高变化图
|
||
plt.subplot(2, 1, 2)
|
||
if pitch_values:
|
||
plt.plot(pitch_times, pitch_values, 'o-', markersize=1)
|
||
plt.title(f"{title} - 音高变化")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("音高 (Hz)")
|
||
|
||
# 添加均值线
|
||
mean_pitch = np.mean(pitch_values)
|
||
plt.axhline(y=mean_pitch, color='r', linestyle='--', alpha=0.8,
|
||
label=f"平均音高: {mean_pitch:.1f} Hz")
|
||
plt.legend()
|
||
else:
|
||
plt.text(0.5, 0.5, "未检测到音高", horizontalalignment='center',
|
||
verticalalignment='center', transform=plt.gca().transAxes)
|
||
|
||
# 计算并显示调谐偏差
|
||
tuning_offset = librosa.estimate_tuning(y=audio, sr=sr)
|
||
plt.figtext(0.5, 0.01, f"调谐偏差: {tuning_offset:.4f} (±半音)",
|
||
ha="center", fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 测试函数 - 使用之前加载的样例
|
||
try:
|
||
# 尝试加载一个RAVDESS样例
|
||
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
|
||
if os.path.exists(audio_file):
|
||
audio, sr = librosa.load(audio_file, sr=None)
|
||
|
||
# 提取音高特征
|
||
features = extract_pitch_features(audio, sr)
|
||
|
||
# 显示特征
|
||
print("音高特征:")
|
||
for name, value in features.items():
|
||
print(f" {name}: {value:.4f}")
|
||
|
||
# 可视化
|
||
filename = os.path.basename(audio_file)
|
||
visualize_pitch(audio, sr, f"RAVDESS音频 ({filename})")
|
||
else:
|
||
print(f"文件不存在: {audio_file}")
|
||
except Exception as e:
|
||
print(f"音高分析出错: {e}")
|
||
|
||
### 2.2 在下面空白处写出频谱质心、光谱平坦度指标的构建代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
频谱质心和光谱平坦度指标提取
|
||
"""
|
||
import numpy as np
|
||
import librosa
|
||
import matplotlib.pyplot as plt
|
||
|
||
def extract_spectral_features(audio, sr):
|
||
"""
|
||
提取频谱质心和光谱平坦度指标
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
|
||
Returns:
|
||
features: 特征字典
|
||
"""
|
||
features = {}
|
||
|
||
# 提取频谱质心
|
||
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
|
||
features['spectral_centroid_mean'] = np.mean(spectral_centroid)
|
||
features['spectral_centroid_std'] = np.std(spectral_centroid)
|
||
features['spectral_centroid_max'] = np.max(spectral_centroid)
|
||
features['spectral_centroid_min'] = np.min(spectral_centroid)
|
||
|
||
# 提取光谱平坦度
|
||
spectral_flatness = librosa.feature.spectral_flatness(y=audio)[0]
|
||
features['spectral_flatness_mean'] = np.mean(spectral_flatness)
|
||
features['spectral_flatness_std'] = np.std(spectral_flatness)
|
||
features['spectral_flatness_max'] = np.max(spectral_flatness)
|
||
features['spectral_flatness_min'] = np.min(spectral_flatness)
|
||
|
||
return features
|
||
|
||
def visualize_spectral_features(audio, sr, title="频谱特征分析"):
|
||
"""
|
||
可视化频谱质心和光谱平坦度
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 12))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(3, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 计算并绘制频谱质心
|
||
times = librosa.times_like(librosa.feature.spectral_centroid(y=audio, sr=sr))
|
||
centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
|
||
|
||
plt.subplot(3, 1, 2)
|
||
plt.semilogy(times, centroids, label='频谱质心')
|
||
plt.title(f"{title} - 频谱质心随时间变化")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("频率 (Hz)")
|
||
plt.grid(True, alpha=0.3)
|
||
|
||
# 添加均值线
|
||
mean_centroid = np.mean(centroids)
|
||
plt.axhline(y=mean_centroid, color='r', linestyle='--', alpha=0.8,
|
||
label=f"平均频谱质心: {mean_centroid:.1f} Hz")
|
||
plt.legend()
|
||
|
||
# 计算并绘制光谱平坦度
|
||
times_flat = librosa.times_like(librosa.feature.spectral_flatness(y=audio))
|
||
flatness = librosa.feature.spectral_flatness(y=audio)[0]
|
||
|
||
plt.subplot(3, 1, 3)
|
||
plt.plot(times_flat, flatness, label='光谱平坦度')
|
||
plt.title(f"{title} - 光谱平坦度随时间变化")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("平坦度")
|
||
plt.ylim([0, np.max(flatness)*1.2]) # 限制y轴范围
|
||
plt.grid(True, alpha=0.3)
|
||
|
||
# 添加均值线
|
||
mean_flatness = np.mean(flatness)
|
||
plt.axhline(y=mean_flatness, color='r', linestyle='--', alpha=0.8,
|
||
label=f"平均光谱平坦度: {mean_flatness:.4f}")
|
||
plt.legend()
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 打印统计值
|
||
features = extract_spectral_features(audio, sr)
|
||
print("\n频谱特征统计:")
|
||
print(f" 频谱质心均值: {features['spectral_centroid_mean']:.2f} Hz")
|
||
print(f" 频谱质心标准差: {features['spectral_centroid_std']:.2f}")
|
||
print(f" 光谱平坦度均值: {features['spectral_flatness_mean']:.6f}")
|
||
print(f" 光谱平坦度标准差: {features['spectral_flatness_std']:.6f}")
|
||
|
||
# 测试函数 - 使用之前加载的样例
|
||
try:
|
||
# 尝试加载一个RAVDESS样例
|
||
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
|
||
if os.path.exists(audio_file):
|
||
audio, sr = librosa.load(audio_file, sr=None)
|
||
|
||
# 提取频谱特征
|
||
features = extract_spectral_features(audio, sr)
|
||
|
||
# 显示特征
|
||
print("频谱特征:")
|
||
for name, value in features.items():
|
||
print(f" {name}: {value:.6f}")
|
||
|
||
# 可视化
|
||
filename = os.path.basename(audio_file)
|
||
visualize_spectral_features(audio, sr, f"RAVDESS音频 ({filename})")
|
||
else:
|
||
print(f"文件不存在: {audio_file}")
|
||
except Exception as e:
|
||
print(f"频谱特征分析出错: {e}")
|
||
|
||
### 2.3 在下面空白处写出梅尔频率、光谱对比度指标的构建代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
梅尔频率和光谱对比度指标提取
|
||
"""
|
||
import numpy as np
|
||
import librosa
|
||
import librosa.display
|
||
import matplotlib.pyplot as plt
|
||
|
||
def extract_mfcc_contrast_features(audio, sr):
|
||
"""
|
||
提取梅尔频率倒谱系数和光谱对比度指标
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
|
||
Returns:
|
||
features: 特征字典
|
||
"""
|
||
features = {}
|
||
|
||
# 提取MFCC(梅尔频率倒谱系数)
|
||
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
|
||
for i in range(1, 14):
|
||
features[f'mfcc_{i}_mean'] = np.mean(mfccs[i-1])
|
||
features[f'mfcc_{i}_std'] = np.std(mfccs[i-1])
|
||
features[f'mfcc_{i}_max'] = np.max(mfccs[i-1])
|
||
features[f'mfcc_{i}_min'] = np.min(mfccs[i-1])
|
||
|
||
# 提取梅尔频率特征的一阶差分和二阶差分
|
||
mfcc_delta = librosa.feature.delta(mfccs)
|
||
mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
|
||
|
||
# 添加一阶差分特征
|
||
for i in range(1, 14):
|
||
features[f'mfcc_{i}_delta_mean'] = np.mean(mfcc_delta[i-1])
|
||
features[f'mfcc_{i}_delta_std'] = np.std(mfcc_delta[i-1])
|
||
|
||
# 添加二阶差分特征
|
||
for i in range(1, 14):
|
||
features[f'mfcc_{i}_delta2_mean'] = np.mean(mfcc_delta2[i-1])
|
||
features[f'mfcc_{i}_delta2_std'] = np.std(mfcc_delta2[i-1])
|
||
|
||
# 提取光谱对比度指标
|
||
spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
|
||
|
||
# 为每个频带提取统计特征
|
||
for i in range(spectral_contrast.shape[0]):
|
||
features[f'spectral_contrast_{i+1}_mean'] = np.mean(spectral_contrast[i])
|
||
features[f'spectral_contrast_{i+1}_std'] = np.std(spectral_contrast[i])
|
||
|
||
# 光谱对比度的总体统计
|
||
features['spectral_contrast_mean'] = np.mean(spectral_contrast)
|
||
features['spectral_contrast_std'] = np.std(spectral_contrast)
|
||
|
||
return features
|
||
|
||
def visualize_mfcc(audio, sr, title="MFCC特征分析"):
|
||
"""
|
||
可视化MFCC特征
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 10))
|
||
|
||
# 计算MFCC
|
||
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
|
||
|
||
# 绘制波形图
|
||
plt.subplot(3, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制梅尔频谱
|
||
plt.subplot(3, 1, 2)
|
||
mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr)
|
||
librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max),
|
||
y_axis='mel', x_axis='time')
|
||
plt.colorbar(format='%+2.0f dB')
|
||
plt.title(f"{title} - 梅尔频谱")
|
||
|
||
# 绘制MFCC
|
||
plt.subplot(3, 1, 3)
|
||
librosa.display.specshow(mfccs, x_axis='time')
|
||
plt.colorbar()
|
||
plt.title(f"{title} - MFCC")
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
def visualize_spectral_contrast(audio, sr, title="光谱对比度分析"):
|
||
"""
|
||
可视化光谱对比度
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 计算光谱对比度
|
||
contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
|
||
|
||
# 绘制光谱对比度
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.specshow(contrast, x_axis='time')
|
||
plt.colorbar()
|
||
plt.title(f"{title} - 光谱对比度")
|
||
|
||
# 绘制每个频带的平均光谱对比度
|
||
plt.subplot(2, 1, 2)
|
||
plt.bar(range(contrast.shape[0]), [np.mean(contrast[i]) for i in range(contrast.shape[0])],
|
||
color='skyblue')
|
||
plt.xlabel("频带")
|
||
plt.ylabel("平均光谱对比度")
|
||
plt.title("各频带平均光谱对比度")
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 打印统计信息
|
||
print("\n光谱对比度统计:")
|
||
print(f" 总平均值: {np.mean(contrast):.4f}")
|
||
print(f" 总标准差: {np.std(contrast):.4f}")
|
||
print(" 各频带统计:")
|
||
for i in range(contrast.shape[0]):
|
||
print(f" 频带{i+1}: 均值={np.mean(contrast[i]):.4f}, 标准差={np.std(contrast[i]):.4f}")
|
||
|
||
# 测试函数 - 使用之前加载的样例
|
||
try:
|
||
# 尝试加载一个RAVDESS样例
|
||
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
|
||
if os.path.exists(audio_file):
|
||
audio, sr = librosa.load(audio_file, sr=None)
|
||
|
||
# 提取特征
|
||
features = extract_mfcc_contrast_features(audio, sr)
|
||
|
||
# 显示部分特征
|
||
print("梅尔频率和光谱对比度特征 (前10项):")
|
||
for i, (name, value) in enumerate(features.items()):
|
||
print(f" {name}: {value:.6f}")
|
||
if i >= 9: # 只显示前10项
|
||
print(" ...")
|
||
break
|
||
|
||
# 可视化
|
||
filename = os.path.basename(audio_file)
|
||
visualize_mfcc(audio, sr, f"RAVDESS音频 ({filename})")
|
||
visualize_spectral_contrast(audio, sr, f"RAVDESS音频 ({filename})")
|
||
else:
|
||
print(f"文件不存在: {audio_file}")
|
||
except Exception as e:
|
||
print(f"梅尔频率和光谱对比度分析出错: {e}")
|
||
|
||
### 2.4 在下面空白处写出均方根能量、谱熵、色谱图指标的构建代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
均方根能量、谱熵、色谱图指标的构建代码
|
||
"""
|
||
import numpy as np
|
||
import librosa
|
||
import librosa.display
|
||
import matplotlib.pyplot as plt
|
||
from scipy.stats import entropy
|
||
|
||
def extract_energy_entropy_chroma_features(audio, sr):
|
||
"""
|
||
提取均方根能量、谱熵、色谱图特征
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
|
||
Returns:
|
||
features: 特征字典
|
||
"""
|
||
features = {}
|
||
|
||
# 提取RMS(均方根能量)
|
||
rms = librosa.feature.rms(y=audio)[0]
|
||
features['rms_mean'] = np.mean(rms)
|
||
features['rms_std'] = np.std(rms)
|
||
features['rms_max'] = np.max(rms)
|
||
features['rms_min'] = np.min(rms)
|
||
|
||
# 计算谱熵
|
||
spec = np.abs(librosa.stft(audio))
|
||
# 确保谱图中没有零值(避免log(0))
|
||
spec = spec + 1e-10
|
||
# 归一化频谱
|
||
spec_norm = spec / np.sum(spec, axis=0, keepdims=True)
|
||
# 计算每个时间帧的熵
|
||
entropies = np.array([entropy(spec_norm[:, t]) for t in range(spec.shape[1])])
|
||
|
||
features['spectral_entropy_mean'] = np.mean(entropies)
|
||
features['spectral_entropy_std'] = np.std(entropies)
|
||
features['spectral_entropy_max'] = np.max(entropies)
|
||
features['spectral_entropy_min'] = np.min(entropies)
|
||
|
||
# 提取色度特征
|
||
chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
|
||
# 存储每个色度的统计特征
|
||
for i in range(12): # 12个色度
|
||
features[f'chroma_{i+1}_mean'] = np.mean(chroma[i])
|
||
features[f'chroma_{i+1}_std'] = np.std(chroma[i])
|
||
|
||
# 整体色度统计
|
||
features['chroma_mean'] = np.mean(chroma)
|
||
features['chroma_std'] = np.std(chroma)
|
||
|
||
return features
|
||
|
||
def visualize_energy_entropy(audio, sr, title="能量与熵分析"):
|
||
"""
|
||
可视化能量和熵
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 10))
|
||
|
||
# 绘制波形图
|
||
plt.subplot(3, 1, 1)
|
||
librosa.display.waveshow(audio, sr=sr)
|
||
plt.title(f"{title} - 波形图")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("振幅")
|
||
|
||
# 绘制均方根能量
|
||
rms = librosa.feature.rms(y=audio)[0]
|
||
times = librosa.times_like(rms)
|
||
|
||
plt.subplot(3, 1, 2)
|
||
plt.plot(times, rms)
|
||
plt.title(f"{title} - 均方根能量")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("RMS能量")
|
||
plt.grid(True, alpha=0.3)
|
||
|
||
# 添加均值线
|
||
mean_rms = np.mean(rms)
|
||
plt.axhline(y=mean_rms, color='r', linestyle='--', alpha=0.8,
|
||
label=f"平均RMS: {mean_rms:.4f}")
|
||
plt.legend()
|
||
|
||
# 计算并绘制谱熵
|
||
spec = np.abs(librosa.stft(audio))
|
||
spec = spec + 1e-10 # 避免log(0)
|
||
spec_norm = spec / np.sum(spec, axis=0, keepdims=True)
|
||
entropies = np.array([entropy(spec_norm[:, t]) for t in range(spec.shape[1])])
|
||
times_entropy = librosa.times_like(entropies)
|
||
|
||
plt.subplot(3, 1, 3)
|
||
plt.plot(times_entropy, entropies)
|
||
plt.title(f"{title} - 谱熵")
|
||
plt.xlabel("时间 (秒)")
|
||
plt.ylabel("熵")
|
||
plt.grid(True, alpha=0.3)
|
||
|
||
# 添加均值线
|
||
mean_entropy = np.mean(entropies)
|
||
plt.axhline(y=mean_entropy, color='r', linestyle='--', alpha=0.8,
|
||
label=f"平均熵: {mean_entropy:.4f}")
|
||
plt.legend()
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 打印统计值
|
||
print("\n能量和熵统计:")
|
||
print(f" RMS能量均值: {np.mean(rms):.6f}")
|
||
print(f" RMS能量标准差: {np.std(rms):.6f}")
|
||
print(f" 谱熵均值: {mean_entropy:.6f}")
|
||
print(f" 谱熵标准差: {np.std(entropies):.6f}")
|
||
|
||
def visualize_chroma(audio, sr, title="色谱图分析"):
|
||
"""
|
||
可视化色谱图
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
title: 图表标题
|
||
"""
|
||
plt.figure(figsize=(12, 8))
|
||
|
||
# 计算色谱图
|
||
chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
|
||
|
||
# 绘制色谱图
|
||
plt.subplot(2, 1, 1)
|
||
librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
|
||
plt.colorbar()
|
||
plt.title(f"{title} - 色谱图")
|
||
|
||
# 绘制每个色度的平均值
|
||
plt.subplot(2, 1, 2)
|
||
chroma_notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
||
plt.bar(chroma_notes, [np.mean(chroma[i]) for i in range(12)], color='skyblue')
|
||
plt.xlabel("音符")
|
||
plt.ylabel("平均能量")
|
||
plt.title("各色度平均能量")
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 打印统计信息
|
||
print("\n色度统计:")
|
||
print(f" 总平均值: {np.mean(chroma):.4f}")
|
||
print(f" 总标准差: {np.std(chroma):.4f}")
|
||
print(" 各音符统计:")
|
||
for i, note in enumerate(chroma_notes):
|
||
print(f" {note}: 均值={np.mean(chroma[i]):.4f}, 标准差={np.std(chroma[i]):.4f}")
|
||
|
||
# 测试函数 - 使用之前加载的样例
|
||
try:
|
||
# 尝试加载一个RAVDESS样例
|
||
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
|
||
if os.path.exists(audio_file):
|
||
audio, sr = librosa.load(audio_file, sr=None)
|
||
|
||
# 提取特征
|
||
features = extract_energy_entropy_chroma_features(audio, sr)
|
||
|
||
# 显示特征
|
||
print("均方根能量、谱熵、色谱图特征:")
|
||
for name, value in features.items():
|
||
print(f" {name}: {value:.6f}")
|
||
|
||
# 可视化
|
||
filename = os.path.basename(audio_file)
|
||
visualize_energy_entropy(audio, sr, f"RAVDESS音频 ({filename})")
|
||
visualize_chroma(audio, sr, f"RAVDESS音频 ({filename})")
|
||
else:
|
||
print(f"文件不存在: {audio_file}")
|
||
except Exception as e:
|
||
print(f"均方根能量、谱熵、色谱图分析出错: {e}")
|
||
|
||
### 2.5 在下面空白处写出特征数据处理代码,包括标准化、二维数据转三维数据、标签数值化、划分数据集代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
特征数据处理代码
|
||
"""
|
||
import numpy as np
|
||
import pandas as pd
|
||
import pickle
|
||
import os
|
||
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
||
from sklearn.model_selection import train_test_split
|
||
import matplotlib.pyplot as plt
|
||
|
||
def extract_all_features(audio, sr):
|
||
"""
|
||
提取所有特征
|
||
|
||
Args:
|
||
audio: 音频波形数据
|
||
sr: 采样率
|
||
|
||
Returns:
|
||
features: 特征字典
|
||
"""
|
||
features = {}
|
||
|
||
# 组合之前定义的所有特征
|
||
features.update(extract_pitch_features(audio, sr))
|
||
features.update(extract_spectral_features(audio, sr))
|
||
features.update(extract_mfcc_contrast_features(audio, sr))
|
||
features.update(extract_energy_entropy_chroma_features(audio, sr))
|
||
|
||
return features
|
||
|
||
def features_to_matrix(features_list):
|
||
"""
|
||
将特征字典列表转换为特征矩阵
|
||
|
||
Args:
|
||
features_list: 特征字典列表
|
||
|
||
Returns:
|
||
X: 特征矩阵
|
||
feature_names: 特征名称列表
|
||
"""
|
||
# 收集所有特征名称
|
||
all_features = set()
|
||
for feature_dict in features_list:
|
||
all_features.update(feature_dict.keys())
|
||
|
||
feature_names = sorted(list(all_features))
|
||
|
||
# 创建特征矩阵
|
||
X = np.zeros((len(features_list), len(feature_names)))
|
||
|
||
# 填充特征矩阵
|
||
for i, feature_dict in enumerate(features_list):
|
||
for j, feature_name in enumerate(feature_names):
|
||
if feature_name in feature_dict:
|
||
X[i, j] = feature_dict[feature_name]
|
||
|
||
return X, feature_names
|
||
|
||
def features_to_dataframe(features_list, labels=None):
|
||
"""
|
||
将特征字典列表转换为DataFrame
|
||
|
||
Args:
|
||
features_list: 特征字典列表
|
||
labels: 标签列表
|
||
|
||
Returns:
|
||
df: 特征数据框
|
||
"""
|
||
# 转换为DataFrame
|
||
df = pd.DataFrame(features_list)
|
||
|
||
# 如果有标签,添加到DataFrame
|
||
if labels is not None:
|
||
df['label'] = labels
|
||
|
||
return df
|
||
|
||
def standardize_features(X_train, X_val=None, X_test=None):
|
||
"""
|
||
标准化特征
|
||
|
||
Args:
|
||
X_train: 训练集特征矩阵
|
||
X_val: 验证集特征矩阵
|
||
X_test: 测试集特征矩阵
|
||
|
||
Returns:
|
||
X_train_norm: 标准化后的训练集
|
||
X_val_norm: 标准化后的验证集
|
||
X_test_norm: 标准化后的测试集
|
||
scaler: 标准化器
|
||
"""
|
||
# 初始化标准化器
|
||
scaler = StandardScaler()
|
||
|
||
# 使用训练集拟合标准化器
|
||
scaler.fit(X_train)
|
||
|
||
# 转换训练集
|
||
X_train_norm = scaler.transform(X_train)
|
||
|
||
# 如果提供了验证集
|
||
if X_val is not None:
|
||
X_val_norm = scaler.transform(X_val)
|
||
else:
|
||
X_val_norm = None
|
||
|
||
# 如果提供了测试集
|
||
if X_test is not None:
|
||
X_test_norm = scaler.transform(X_test)
|
||
else:
|
||
X_test_norm = None
|
||
|
||
return X_train_norm, X_val_norm, X_test_norm, scaler
|
||
|
||
def reshape_for_lstm(X):
|
||
"""
|
||
将特征矩阵重塑为LSTM输入格式(二维数据转三维数据)
|
||
|
||
Args:
|
||
X: 特征矩阵
|
||
|
||
Returns:
|
||
X_reshaped: 重塑后的特征矩阵
|
||
"""
|
||
# 添加时间步维度 (样本数, 特征数) -> (样本数, 时间步, 特征数)
|
||
return X.reshape(X.shape[0], 1, X.shape[1])
|
||
|
||
def encode_labels(y):
|
||
"""
|
||
对标签进行数值化编码
|
||
|
||
Args:
|
||
y: 标签列表
|
||
|
||
Returns:
|
||
y_encoded: 编码后的标签
|
||
encoder: 标签编码器
|
||
"""
|
||
encoder = LabelEncoder()
|
||
y_encoded = encoder.fit_transform(y)
|
||
return y_encoded, encoder
|
||
|
||
def split_dataset(X, y, test_size=0.2, val_size=0.1, random_state=42):
|
||
"""
|
||
划分数据集为训练集、验证集和测试集
|
||
|
||
Args:
|
||
X: 特征矩阵
|
||
y: 标签
|
||
test_size: 测试集比例
|
||
val_size: 验证集比例
|
||
random_state: 随机种子
|
||
|
||
Returns:
|
||
X_train, X_val, X_test: 划分后的特征矩阵
|
||
y_train, y_val, y_test: 划分后的标签
|
||
"""
|
||
# 先划分出测试集
|
||
X_train_val, X_test, y_train_val, y_test = train_test_split(
|
||
X, y, test_size=test_size, random_state=random_state, stratify=y)
|
||
|
||
# 从剩下的数据中划分出验证集
|
||
val_ratio = val_size / (1 - test_size) # 调整验证集比例
|
||
X_train, X_val, y_train, y_val = train_test_split(
|
||
X_train_val, y_train_val, test_size=val_ratio, random_state=random_state, stratify=y_train_val)
|
||
|
||
return X_train, X_val, X_test, y_train, y_val, y_test
|
||
|
||
def save_preprocessing_objects(scaler, encoder, feature_names, output_dir='output/emotion_model'):
|
||
"""
|
||
保存预处理对象
|
||
|
||
Args:
|
||
scaler: 标准化器
|
||
encoder: 标签编码器
|
||
feature_names: 特征名称列表
|
||
output_dir: 输出目录
|
||
"""
|
||
# 确保输出目录存在
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 保存标准化器
|
||
with open(os.path.join(output_dir, 'feature_scaler.pkl'), 'wb') as f:
|
||
pickle.dump(scaler, f)
|
||
|
||
# 保存标签编码器
|
||
with open(os.path.join(output_dir, 'emotion_encoder.pkl'), 'wb') as f:
|
||
pickle.dump(encoder, f)
|
||
|
||
# 保存特征名称
|
||
with open(os.path.join(output_dir, 'feature_names.pkl'), 'wb') as f:
|
||
pickle.dump(feature_names, f)
|
||
|
||
print(f"预处理对象已保存到: {output_dir}")
|
||
|
||
def visualize_features_distribution(X, y, y_encoder, title="特征分布"):
|
||
"""
|
||
可视化特征分布
|
||
|
||
Args:
|
||
X: 特征矩阵
|
||
y: 标签
|
||
y_encoder: 标签编码器
|
||
title: 标题
|
||
"""
|
||
plt.figure(figsize=(12, 6))
|
||
|
||
# 计算每个类别的特征均值
|
||
class_names = y_encoder.classes_
|
||
class_means = []
|
||
|
||
for i, class_name in enumerate(class_names):
|
||
# 获取当前类别的特征
|
||
X_class = X[y == i]
|
||
# 计算均值
|
||
class_mean = np.mean(X_class, axis=0)
|
||
class_means.append(class_mean)
|
||
|
||
# 转换为numpy数组
|
||
class_means = np.array(class_means)
|
||
|
||
# 选择前10个特征可视化
|
||
n_features = 10
|
||
|
||
# 绘制特征均值热图
|
||
plt.subplot(1, 2, 1)
|
||
plt.imshow(class_means[:, :n_features], aspect='auto', cmap='viridis')
|
||
plt.colorbar()
|
||
plt.xlabel('特征索引')
|
||
plt.ylabel('情感类别')
|
||
plt.title(f"{title} - 前{n_features}个特征均值")
|
||
plt.yticks(range(len(class_names)), class_names)
|
||
|
||
# 绘制箱形图(使用第一个特征)
|
||
plt.subplot(1, 2, 2)
|
||
box_data = []
|
||
for i, class_name in enumerate(class_names):
|
||
box_data.append(X[y == i, 0])
|
||
|
||
plt.boxplot(box_data, labels=class_names)
|
||
plt.xlabel('情感类别')
|
||
plt.ylabel('特征值')
|
||
plt.title(f"{title} - 第1个特征的分布")
|
||
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
# 测试数据处理流程
|
||
try:
|
||
# 模拟从多个数据集加载特征和标签
|
||
print("模拟加载和处理多语言语音数据...")
|
||
|
||
# 生成一些示例数据(正常情况下,这些数据会从音频文件中提取)
|
||
n_samples = 100
|
||
n_features = 193 # 假设我们有193个特征
|
||
|
||
# 随机生成特征
|
||
X_random = np.random.rand(n_samples, n_features)
|
||
|
||
# 随机生成标签
|
||
emotions = ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise']
|
||
y_random = np.random.choice(emotions, size=n_samples)
|
||
|
||
# 转换为数值标签
|
||
y_encoded, label_encoder = encode_labels(y_random)
|
||
|
||
# 划分数据集
|
||
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(X_random, y_encoded)
|
||
|
||
print(f"数据集划分完成:")
|
||
print(f" 训练集: {X_train.shape[0]} 样本")
|
||
print(f" 验证集: {X_val.shape[0]} 样本")
|
||
print(f" 测试集: {X_test.shape[0]} 样本")
|
||
|
||
# 标准化特征
|
||
X_train_norm, X_val_norm, X_test_norm, scaler = standardize_features(X_train, X_val, X_test)
|
||
|
||
print("特征标准化完成")
|
||
|
||
# 重塑为LSTM输入格式
|
||
X_train_reshaped = reshape_for_lstm(X_train_norm)
|
||
X_val_reshaped = reshape_for_lstm(X_val_norm)
|
||
X_test_reshaped = reshape_for_lstm(X_test_norm)
|
||
|
||
print(f"数据重塑为LSTM格式:")
|
||
print(f" 训练集形状: {X_train_reshaped.shape}")
|
||
print(f" 验证集形状: {X_val_reshaped.shape}")
|
||
print(f" 测试集形状: {X_test_reshaped.shape}")
|
||
|
||
# 保存预处理对象
|
||
feature_names = [f'feature_{i}' for i in range(n_features)]
|
||
save_preprocessing_objects(scaler, label_encoder, feature_names)
|
||
|
||
# 可视化特征分布
|
||
visualize_features_distribution(X_random, y_encoded, label_encoder, "随机生成的特征数据")
|
||
|
||
except Exception as e:
|
||
print(f"特征处理出错: {e}")
|
||
|
||
## 3. 分类预测模型构建与分析
|
||
### 3.1 在下面空白处写出搭建的LSTM神经网络模型的代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
LSTM神经网络模型构建
|
||
"""
|
||
import tensorflow as tf
|
||
from tensorflow.keras.models import Sequential, Model
|
||
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
|
||
from tensorflow.keras.optimizers import Adam
|
||
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
||
from tensorflow.keras.utils import to_categorical
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
import pickle
|
||
import os
|
||
|
||
class EmotionModel:
|
||
"""
|
||
情感识别模型类
|
||
"""
|
||
|
||
def __init__(self, input_shape, num_emotions, num_languages=None, include_language=False):
|
||
"""
|
||
初始化模型
|
||
|
||
Args:
|
||
input_shape: 输入特征形状 (时间步, 特征数)
|
||
num_emotions: 情感类别数
|
||
num_languages: 语言类别数
|
||
include_language: 是否包含语言分类任务
|
||
"""
|
||
self.input_shape = input_shape
|
||
self.num_emotions = num_emotions
|
||
self.num_languages = num_languages
|
||
self.include_language = include_language
|
||
self.model = None
|
||
self.history = None
|
||
|
||
def build_model(self):
|
||
"""
|
||
构建LSTM模型
|
||
"""
|
||
# 单任务模型 - 只有情感分类
|
||
if not self.include_language:
|
||
self.model = Sequential([
|
||
# LSTM层
|
||
LSTM(128, input_shape=self.input_shape, return_sequences=True),
|
||
BatchNormalization(),
|
||
Dropout(0.4),
|
||
|
||
# 第二个LSTM层
|
||
LSTM(64, return_sequences=False),
|
||
BatchNormalization(),
|
||
Dropout(0.4),
|
||
|
||
# 全连接层
|
||
Dense(64, activation='relu'),
|
||
BatchNormalization(),
|
||
Dropout(0.4),
|
||
|
||
# 输出层
|
||
Dense(self.num_emotions, activation='softmax', name='emotion_output')
|
||
])
|
||
|
||
# 多任务模型 - 情感和语言分类
|
||
else:
|
||
# 输入层
|
||
inputs = Input(shape=self.input_shape)
|
||
|
||
# 共享层
|
||
lstm1 = LSTM(128, return_sequences=True)(inputs)
|
||
bn1 = BatchNormalization()(lstm1)
|
||
drop1 = Dropout(0.4)(bn1)
|
||
|
||
lstm2 = LSTM(64, return_sequences=False)(drop1)
|
||
bn2 = BatchNormalization()(lstm2)
|
||
drop2 = Dropout(0.4)(bn2)
|
||
|
||
# 情感分类分支
|
||
emotion_dense = Dense(64, activation='relu')(drop2)
|
||
emotion_bn = BatchNormalization()(emotion_dense)
|
||
emotion_drop = Dropout(0.4)(emotion_bn)
|
||
emotion_output = Dense(self.num_emotions, activation='softmax', name='emotion_output')(emotion_drop)
|
||
|
||
# 语言分类分支
|
||
language_dense = Dense(32, activation='relu')(drop2)
|
||
language_bn = BatchNormalization()(language_dense)
|
||
language_drop = Dropout(0.4)(language_bn)
|
||
language_output = Dense(self.num_languages, activation='softmax', name='language_output')(language_drop)
|
||
|
||
# 创建模型
|
||
self.model = Model(inputs=inputs, outputs=[emotion_output, language_output])
|
||
|
||
return self.model
|
||
|
||
def compile_model(self, learning_rate=0.001):
|
||
"""
|
||
编译模型
|
||
|
||
Args:
|
||
learning_rate: 学习率
|
||
"""
|
||
optimizer = Adam(learning_rate=learning_rate)
|
||
|
||
if not self.include_language:
|
||
self.model.compile(
|
||
optimizer=optimizer,
|
||
loss='categorical_crossentropy',
|
||
metrics=['accuracy']
|
||
)
|
||
else:
|
||
self.model.compile(
|
||
optimizer=optimizer,
|
||
loss={
|
||
'emotion_output': 'categorical_crossentropy',
|
||
'language_output': 'categorical_crossentropy'
|
||
},
|
||
metrics={
|
||
'emotion_output': 'accuracy',
|
||
'language_output': 'accuracy'
|
||
},
|
||
loss_weights={
|
||
'emotion_output': 0.7,
|
||
'language_output': 0.3
|
||
}
|
||
)
|
||
|
||
return self.model
|
||
|
||
def train(self, X_train, y_train, X_val, y_val, epochs=100, batch_size=32,
|
||
save_dir='output/emotion_model'):
|
||
"""
|
||
训练模型
|
||
|
||
Args:
|
||
X_train: 训练集特征
|
||
y_train: 训练集标签
|
||
X_val: 验证集特征
|
||
y_val: 验证集标签
|
||
epochs: 训练轮数
|
||
batch_size: 批量大小
|
||
save_dir: 保存目录
|
||
|
||
Returns:
|
||
history: 训练历史
|
||
"""
|
||
# 确保目录存在
|
||
os.makedirs(save_dir, exist_ok=True)
|
||
|
||
# 保存最佳模型
|
||
checkpoint_path = os.path.join(save_dir, 'best_model_weights.h5')
|
||
checkpoint = ModelCheckpoint(
|
||
checkpoint_path,
|
||
monitor='val_accuracy' if not self.include_language else 'val_emotion_output_accuracy',
|
||
save_best_only=True,
|
||
save_weights_only=True,
|
||
mode='max',
|
||
verbose=1
|
||
)
|
||
|
||
# 早停
|
||
early_stopping = EarlyStopping(
|
||
monitor='val_loss',
|
||
patience=15,
|
||
restore_best_weights=True,
|
||
verbose=1
|
||
)
|
||
|
||
# 转换为分类格式
|
||
y_train_cat = to_categorical(y_train, num_classes=self.num_emotions)
|
||
y_val_cat = to_categorical(y_val, num_classes=self.num_emotions)
|
||
|
||
callbacks = [checkpoint, early_stopping]
|
||
|
||
# 训练模型
|
||
if not self.include_language:
|
||
self.history = self.model.fit(
|
||
X_train, y_train_cat,
|
||
validation_data=(X_val, y_val_cat),
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
callbacks=callbacks,
|
||
verbose=1
|
||
)
|
||
else:
|
||
# 这里假设我们有语言标签,实际应用需要添加语言标签
|
||
# 这里只是示例,实际应用需要传入实际的语言标签
|
||
y_train_lang = np.zeros((y_train.shape[0], self.num_languages)) # 示例
|
||
y_val_lang = np.zeros((y_val.shape[0], self.num_languages)) # 示例
|
||
|
||
self.history = self.model.fit(
|
||
X_train,
|
||
{'emotion_output': y_train_cat, 'language_output': y_train_lang},
|
||
validation_data=(
|
||
X_val,
|
||
{'emotion_output': y_val_cat, 'language_output': y_val_lang}
|
||
),
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
callbacks=callbacks,
|
||
verbose=1
|
||
)
|
||
|
||
# 保存完整模型
|
||
model_path = os.path.join(save_dir, 'emotion_model.h5')
|
||
self.model.save(model_path)
|
||
|
||
# 保存模型配置
|
||
config = {
|
||
'input_shape': self.input_shape,
|
||
'num_emotions': self.num_emotions,
|
||
'num_languages': self.num_languages,
|
||
'include_language': self.include_language,
|
||
'num_features': self.input_shape[1]
|
||
}
|
||
|
||
with open(os.path.join(save_dir, 'config.pkl'), 'wb') as f:
|
||
pickle.dump(config, f)
|
||
|
||
print(f"模型和配置已保存到: {save_dir}")
|
||
|
||
return self.history
|
||
|
||
def evaluate(self, X_test, y_test):
|
||
"""
|
||
评估模型
|
||
|
||
Args:
|
||
X_test: 测试集特征
|
||
y_test: 测试集标签
|
||
|
||
Returns:
|
||
metrics: 评估指标
|
||
"""
|
||
# 转换为分类格式
|
||
y_test_cat = to_categorical(y_test, num_classes=self.num_emotions)
|
||
|
||
if not self.include_language:
|
||
results = self.model.evaluate(X_test, y_test_cat)
|
||
metrics = {
|
||
'loss': results[0],
|
||
'accuracy': results[1]
|
||
}
|
||
else:
|
||
# 示例语言标签
|
||
y_test_lang = np.zeros((y_test.shape[0], self.num_languages))
|
||
|
||
results = self.model.evaluate(
|
||
X_test,
|
||
{'emotion_output': y_test_cat, 'language_output': y_test_lang}
|
||
)
|
||
|
||
metrics = {
|
||
'loss': results[0],
|
||
'emotion_accuracy': results[2],
|
||
'language_accuracy': results[4]
|
||
}
|
||
|
||
return metrics
|
||
|
||
def predict(self, X):
|
||
"""
|
||
预测情感
|
||
|
||
Args:
|
||
X: 输入特征
|
||
|
||
Returns:
|
||
y_pred: 预测标签
|
||
y_pred_probs: 预测概率
|
||
"""
|
||
if not self.include_language:
|
||
y_pred_probs = self.model.predict(X)
|
||
y_pred = np.argmax(y_pred_probs, axis=1)
|
||
return y_pred, y_pred_probs
|
||
else:
|
||
emotion_probs, _ = self.model.predict(X)
|
||
y_pred = np.argmax(emotion_probs, axis=1)
|
||
return y_pred, emotion_probs
|
||
|
||
def plot_training_history(self, save_path=None):
|
||
"""
|
||
绘制训练历史
|
||
|
||
Args:
|
||
save_path: 保存路径
|
||
"""
|
||
if self.history is None:
|
||
print("请先训练模型")
|
||
return
|
||
|
||
plt.figure(figsize=(12, 4))
|
||
|
||
# 绘制训练和验证准确率
|
||
plt.subplot(1, 2, 1)
|
||
if not self.include_language:
|
||
plt.plot(self.history.history['accuracy'], label='训练准确率')
|
||
plt.plot(self.history.history['val_accuracy'], label='验证准确率')
|
||
else:
|
||
plt.plot(self.history.history['emotion_output_accuracy'], label='训练准确率')
|
||
plt.plot(self.history.history['val_emotion_output_accuracy'], label='验证准确率')
|
||
|
||
plt.title('模型准确率')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('准确率')
|
||
plt.legend()
|
||
|
||
# 绘制训练和验证损失
|
||
plt.subplot(1, 2, 2)
|
||
if not self.include_language:
|
||
plt.plot(self.history.history['loss'], label='训练损失')
|
||
plt.plot(self.history.history['val_loss'], label='验证损失')
|
||
else:
|
||
plt.plot(self.history.history['emotion_output_loss'], label='训练损失')
|
||
plt.plot(self.history.history['val_emotion_output_loss'], label='验证损失')
|
||
|
||
plt.title('模型损失')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('损失')
|
||
plt.legend()
|
||
|
||
plt.tight_layout()
|
||
|
||
if save_path:
|
||
plt.savefig(save_path)
|
||
print(f"训练历史图保存到: {save_path}")
|
||
|
||
plt.show()
|
||
|
||
# 测试LSTM模型
|
||
try:
|
||
# 使用随机生成的数据来测试模型
|
||
print("创建并测试LSTM模型...")
|
||
|
||
# 参数
|
||
n_samples = 100
|
||
n_features = 193
|
||
n_emotions = 6
|
||
|
||
# 随机生成数据
|
||
X_random = np.random.rand(n_samples, 1, n_features) # 已经是LSTM格式 (样本数, 时间步, 特征数)
|
||
y_random = np.random.randint(0, n_emotions, size=n_samples)
|
||
|
||
# 划分数据
|
||
X_train, X_val, X_test = X_random[:70], X_random[70:85], X_random[85:]
|
||
y_train, y_val, y_test = y_random[:70], y_random[70:85], y_random[85:]
|
||
|
||
# 创建模型
|
||
model = EmotionModel(
|
||
input_shape=(X_train.shape[1], X_train.shape[2]), # (时间步, 特征数)
|
||
num_emotions=n_emotions
|
||
)
|
||
|
||
# 构建和编译模型
|
||
model.build_model()
|
||
model.compile_model()
|
||
|
||
# 打印模型结构
|
||
model.model.summary()
|
||
|
||
print("\n模型构建成功!在实际应用中,接下来应该进行模型训练和评估。")
|
||
|
||
except Exception as e:
|
||
print(f"LSTM模型测试出错: {e}")
|
||
|
||
### 3.2 在下面空白处写出模型训练及模型评估代码
|
||
|
||
# In[ ]:
|
||
|
||
|
||
"""
|
||
模型训练及评估代码 - 使用真实数据集
|
||
"""
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
|
||
import seaborn as sns
|
||
import tensorflow as tf
|
||
from tensorflow.keras.utils import to_categorical
|
||
import pickle
|
||
import os
|
||
import glob
|
||
import librosa
|
||
import pandas as pd
|
||
from tqdm.notebook import tqdm
|
||
from sklearn.preprocessing import LabelEncoder
|
||
from sklearn.model_selection import train_test_split
|
||
from time import time
|
||
|
||
def load_ravdess_data(ravdess_path):
|
||
"""
|
||
加载RAVDESS数据集
|
||
|
||
Args:
|
||
ravdess_path: RAVDESS数据集路径
|
||
|
||
Returns:
|
||
audio_files: 音频文件列表
|
||
emotion_labels: 对应的情感标签
|
||
"""
|
||
# RAVDESS情感标签映射(根据文档,第三位数字表示情感类别)
|
||
emotion_map = {
|
||
'01': 'neutral',
|
||
'03': 'happy',
|
||
'04': 'sad',
|
||
'05': 'angry',
|
||
'06': 'fear',
|
||
'08': 'surprise'
|
||
}
|
||
|
||
audio_files = []
|
||
emotion_labels = []
|
||
|
||
# 获取所有Actor目录
|
||
actor_dirs = glob.glob(os.path.join(ravdess_path, 'Actor_*'))
|
||
|
||
for actor_dir in actor_dirs:
|
||
# 获取该演员的所有音频文件
|
||
wav_files = glob.glob(os.path.join(actor_dir, '*.wav'))
|
||
|
||
for wav_file in wav_files:
|
||
# 从文件名解析情感标签
|
||
# 文件名格式: 03-01-01-01-01-01-01.wav
|
||
# 位置:模态-声道-情感-强度-语句-重复-演员
|
||
filename = os.path.basename(wav_file)
|
||
parts = filename.split('-')
|
||
|
||
# 确保文件名格式正确
|
||
if len(parts) == 7:
|
||
emotion_code = parts[2]
|
||
|
||
# 只处理我们关心的6种情感
|
||
if emotion_code in emotion_map:
|
||
emotion = emotion_map[emotion_code]
|
||
audio_files.append(wav_file)
|
||
emotion_labels.append(emotion)
|
||
|
||
return audio_files, emotion_labels
|
||
|
||
def load_savee_data(savee_path):
|
||
"""
|
||
加载SAVEE数据集
|
||
|
||
Args:
|
||
savee_path: SAVEE数据集路径
|
||
|
||
Returns:
|
||
audio_files: 音频文件列表
|
||
emotion_labels: 对应的情感标签
|
||
"""
|
||
# SAVEE情感标签映射
|
||
emotion_map = {
|
||
'a': 'angry',
|
||
'd': 'disgust', # 注:我们可能会过滤掉这个情感,因为其他数据集没有
|
||
'f': 'fear',
|
||
'h': 'happy',
|
||
'n': 'neutral',
|
||
'sa': 'sad',
|
||
'su': 'surprise'
|
||
}
|
||
|
||
audio_files = []
|
||
emotion_labels = []
|
||
|
||
# 获取AudioData目录中的所有演讲者目录
|
||
audiodata_path = os.path.join(savee_path, 'AudioData')
|
||
speaker_dirs = [d for d in os.listdir(audiodata_path)
|
||
if os.path.isdir(os.path.join(audiodata_path, d)) and d != 'Info.txt']
|
||
|
||
for speaker in speaker_dirs:
|
||
speaker_dir = os.path.join(audiodata_path, speaker)
|
||
|
||
# 获取该演讲者的所有音频文件
|
||
wav_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
|
||
|
||
for wav_file in wav_files:
|
||
# 从文件名解析情感标签
|
||
# 文件名格式: a01.wav, 其中a表示情感
|
||
filename = os.path.basename(wav_file)
|
||
|
||
# 提取情感代码
|
||
emotion_code = ''.join([c for c in filename if not c.isdigit() and c != '.'])
|
||
|
||
# 只处理我们关心的情感(排除 'disgust' 因为其他数据集没有)
|
||
if emotion_code in emotion_map and emotion_map[emotion_code] != 'disgust':
|
||
emotion = emotion_map[emotion_code]
|
||
audio_files.append(wav_file)
|
||
emotion_labels.append(emotion)
|
||
|
||
return audio_files, emotion_labels
|
||
|
||
def load_casia_data(casia_path):
|
||
"""
|
||
加载CASIA数据集
|
||
|
||
Args:
|
||
casia_path: CASIA数据集路径
|
||
|
||
Returns:
|
||
audio_files: 音频文件列表
|
||
emotion_labels: 对应的情感标签
|
||
"""
|
||
# CASIA情感标签映射
|
||
emotion_map = {
|
||
'angry': 'angry',
|
||
'fear': 'fear',
|
||
'happy': 'happy',
|
||
'neutral': 'neutral',
|
||
'sad': 'sad',
|
||
'surprise': 'surprise'
|
||
}
|
||
|
||
audio_files = []
|
||
emotion_labels = []
|
||
|
||
# 获取所有说话者目录
|
||
speaker_dirs = [d for d in os.listdir(casia_path)
|
||
if os.path.isdir(os.path.join(casia_path, d)) and d != '_desktop.ini'
|
||
and not d.endswith('.txt')]
|
||
|
||
for speaker in speaker_dirs:
|
||
speaker_path = os.path.join(casia_path, speaker)
|
||
|
||
# 获取该说话者的所有情感目录
|
||
emotion_dirs = [d for d in os.listdir(speaker_path)
|
||
if os.path.isdir(os.path.join(speaker_path, d)) and d != '_desktop.ini']
|
||
|
||
for emotion_dir in emotion_dirs:
|
||
# 确保情感在我们的映射中
|
||
if emotion_dir in emotion_map:
|
||
emotion = emotion_map[emotion_dir]
|
||
|
||
# 获取该情感目录下的所有音频文件
|
||
# CASIA数据集中的音频文件可能是.wav或其他格式
|
||
wav_files = glob.glob(os.path.join(speaker_path, emotion_dir, '*.wav'))
|
||
|
||
# 如果没有.wav文件,我们可能需要找其他格式,但为简单起见,我们只处理.wav
|
||
for wav_file in wav_files:
|
||
audio_files.append(wav_file)
|
||
emotion_labels.append(emotion)
|
||
|
||
return audio_files, emotion_labels
|
||
|
||
def load_all_datasets(ravdess_path='./RAVDESS', savee_path='./SAVEE', casia_path='./CAISA'):
|
||
"""
|
||
加载所有数据集
|
||
|
||
Args:
|
||
ravdess_path: RAVDESS数据集路径
|
||
savee_path: SAVEE数据集路径
|
||
casia_path: CASIA数据集路径
|
||
|
||
Returns:
|
||
audio_files: 所有音频文件列表
|
||
emotion_labels: 对应的情感标签
|
||
dataset_labels: 对应的数据集标签
|
||
"""
|
||
all_audio_files = []
|
||
all_emotion_labels = []
|
||
all_dataset_labels = []
|
||
|
||
# 加载RAVDESS数据集
|
||
if os.path.exists(ravdess_path):
|
||
try:
|
||
ravdess_files, ravdess_emotions = load_ravdess_data(ravdess_path)
|
||
all_audio_files.extend(ravdess_files)
|
||
all_emotion_labels.extend(ravdess_emotions)
|
||
all_dataset_labels.extend(['ravdess'] * len(ravdess_files))
|
||
print(f"加载了 {len(ravdess_files)} 个RAVDESS音频文件")
|
||
except Exception as e:
|
||
print(f"加载RAVDESS数据集时出错: {e}")
|
||
else:
|
||
print(f"RAVDESS路径不存在: {ravdess_path}")
|
||
|
||
# 加载SAVEE数据集
|
||
if os.path.exists(savee_path):
|
||
try:
|
||
savee_files, savee_emotions = load_savee_data(savee_path)
|
||
all_audio_files.extend(savee_files)
|
||
all_emotion_labels.extend(savee_emotions)
|
||
all_dataset_labels.extend(['savee'] * len(savee_files))
|
||
print(f"加载了 {len(savee_files)} 个SAVEE音频文件")
|
||
except Exception as e:
|
||
print(f"加载SAVEE数据集时出错: {e}")
|
||
else:
|
||
print(f"SAVEE路径不存在: {savee_path}")
|
||
|
||
# 加载CASIA数据集
|
||
if os.path.exists(casia_path):
|
||
try:
|
||
casia_files, casia_emotions = load_casia_data(casia_path)
|
||
all_audio_files.extend(casia_files)
|
||
all_emotion_labels.extend(casia_emotions)
|
||
all_dataset_labels.extend(['casia'] * len(casia_files))
|
||
print(f"加载了 {len(casia_files)} 个CASIA音频文件")
|
||
except Exception as e:
|
||
print(f"加载CASIA数据集时出错: {e}")
|
||
else:
|
||
print(f"CASIA路径不存在: {casia_path}")
|
||
|
||
if not all_audio_files:
|
||
raise ValueError("未能加载任何数据集,请检查数据集路径")
|
||
|
||
return all_audio_files, all_emotion_labels, all_dataset_labels
|
||
|
||
def extract_features_from_files(audio_files, max_files=None):
|
||
"""
|
||
从音频文件中提取特征
|
||
|
||
Args:
|
||
audio_files: 音频文件列表
|
||
max_files: 最大处理文件数,None表示处理所有文件
|
||
|
||
Returns:
|
||
features_list: 特征字典列表
|
||
"""
|
||
if max_files is not None:
|
||
audio_files = audio_files[:max_files]
|
||
|
||
features_list = []
|
||
successful_files = 0
|
||
error_files = 0
|
||
|
||
for file_path in tqdm(audio_files, desc="提取特征"):
|
||
try:
|
||
# 确保文件存在
|
||
if not os.path.exists(file_path):
|
||
print(f"文件不存在: {file_path}")
|
||
features_list.append({})
|
||
error_files += 1
|
||
continue
|
||
|
||
# 确保是wav文件
|
||
if not file_path.lower().endswith('.wav'):
|
||
print(f"跳过非wav文件: {file_path}")
|
||
features_list.append({})
|
||
error_files += 1
|
||
continue
|
||
|
||
# 加载音频
|
||
audio, sr = librosa.load(file_path, sr=22050, res_type='kaiser_fast')
|
||
|
||
# 提取所有特征
|
||
features = extract_all_features(audio, sr)
|
||
features_list.append(features)
|
||
successful_files += 1
|
||
|
||
# 每处理20个文件打印一次进度
|
||
if successful_files % 20 == 0:
|
||
print(f"已成功处理 {successful_files} 个文件,失败 {error_files} 个文件")
|
||
|
||
except Exception as e:
|
||
print(f"处理文件 {file_path} 时出错: {e}")
|
||
# 添加空特征字典,保持索引一致
|
||
features_list.append({})
|
||
error_files += 1
|
||
|
||
print(f"特征提取完成:成功 {successful_files} 个文件,失败 {error_files} 个文件")
|
||
return features_list
|
||
|
||
def train_emotion_model(X_train, y_train, X_val, y_val,
|
||
epochs=50, batch_size=32, learning_rate=0.001,
|
||
output_dir='output/emotion_model'):
|
||
"""
|
||
训练情感识别模型
|
||
|
||
Args:
|
||
X_train: 训练集特征
|
||
y_train: 训练集标签
|
||
X_val: 验证集特征
|
||
y_val: 验证集标签
|
||
epochs: 训练轮数
|
||
batch_size: 批量大小
|
||
learning_rate: 学习率
|
||
output_dir: 输出目录
|
||
|
||
Returns:
|
||
model: 训练后的模型
|
||
"""
|
||
# 确保目录存在
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 获取输入形状和类别数
|
||
num_emotions = len(np.unique(y_train))
|
||
input_shape = (X_train.shape[1], X_train.shape[2]) # (时间步, 特征数)
|
||
|
||
# 创建模型
|
||
model = EmotionModel(input_shape=input_shape, num_emotions=num_emotions)
|
||
model.build_model()
|
||
model.compile_model(learning_rate=learning_rate)
|
||
|
||
# 打印模型摘要
|
||
model.model.summary()
|
||
|
||
# 开始训练
|
||
start_time = time()
|
||
print(f"\n开始训练模型,epochs={epochs}, batch_size={batch_size}...")
|
||
|
||
history = model.train(
|
||
X_train, y_train,
|
||
X_val, y_val,
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
save_dir=output_dir
|
||
)
|
||
|
||
training_time = time() - start_time
|
||
print(f"训练完成,用时: {training_time:.2f} 秒")
|
||
|
||
# 绘制训练历史
|
||
history_path = os.path.join(output_dir, 'training_history.png')
|
||
model.plot_training_history(save_path=history_path)
|
||
|
||
return model
|
||
|
||
def evaluate_emotion_model(model, X_test, y_test, class_names, output_dir='output/emotion_model'):
|
||
"""
|
||
评估情感识别模型
|
||
|
||
Args:
|
||
model: 训练好的模型
|
||
X_test: 测试集特征
|
||
y_test: 测试集标签
|
||
class_names: 类别名称
|
||
output_dir: 输出目录
|
||
|
||
Returns:
|
||
metrics: 评估指标
|
||
"""
|
||
# 获取评估指标
|
||
metrics = model.evaluate(X_test, y_test)
|
||
|
||
print("\n模型评估结果:")
|
||
for name, value in metrics.items():
|
||
print(f" {name}: {value:.4f}")
|
||
|
||
# 获取预测结果
|
||
y_pred, y_pred_probs = model.predict(X_test)
|
||
|
||
# 保存预测结果
|
||
np.save(os.path.join(output_dir, 'y_pred.npy'), y_pred)
|
||
np.save(os.path.join(output_dir, 'y_true.npy'), y_test)
|
||
|
||
# 计算混淆矩阵
|
||
cm = confusion_matrix(y_test, y_pred)
|
||
|
||
# 绘制混淆矩阵
|
||
plt.figure(figsize=(10, 8))
|
||
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
||
xticklabels=class_names, yticklabels=class_names)
|
||
plt.xlabel('预测类别')
|
||
plt.ylabel('真实类别')
|
||
plt.title('混淆矩阵')
|
||
|
||
# 保存混淆矩阵图
|
||
confusion_matrix_path = os.path.join(output_dir, 'confusion_matrix.png')
|
||
plt.tight_layout()
|
||
plt.savefig(confusion_matrix_path)
|
||
plt.show()
|
||
|
||
# 计算分类报告
|
||
report = classification_report(y_test, y_pred, target_names=class_names)
|
||
print("\n分类报告:")
|
||
print(report)
|
||
|
||
# 计算每个类别的准确率
|
||
accuracy = accuracy_score(y_test, y_pred)
|
||
class_accuracies = {}
|
||
for i, class_name in enumerate(class_names):
|
||
class_mask = (y_test == i)
|
||
if np.any(class_mask):
|
||
class_acc = accuracy_score(y_test[class_mask], y_pred[class_mask])
|
||
class_accuracies[class_name] = class_acc
|
||
|
||
# 绘制每个类别的准确率
|
||
plt.figure(figsize=(12, 6))
|
||
classes = list(class_accuracies.keys())
|
||
accs = list(class_accuracies.values())
|
||
|
||
plt.bar(classes, accs, color='skyblue')
|
||
plt.axhline(y=accuracy, color='r', linestyle='--', label=f'总体准确率: {accuracy:.4f}')
|
||
|
||
plt.xlabel('情感类别')
|
||
plt.ylabel('准确率')
|
||
plt.title('各情感类别准确率')
|
||
plt.ylim([0, 1.0])
|
||
|
||
# 添加数值标签
|
||
for i, v in enumerate(accs):
|
||
plt.text(i, v + 0.02, f'{v:.2f}', ha='center')
|
||
|
||
plt.legend()
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
return metrics, report, class_accuracies
|
||
|
||
def run_real_data_pipeline(max_files_per_dataset=None, test_size=0.2, val_size=0.1,
|
||
epochs=30, batch_size=32):
|
||
"""
|
||
运行使用真实数据的训练流程
|
||
|
||
Args:
|
||
max_files_per_dataset: 每个数据集最大处理文件数,None表示处理所有文件
|
||
test_size: 测试集比例
|
||
val_size: 验证集比例
|
||
epochs: 训练轮数
|
||
batch_size: 批量大小
|
||
"""
|
||
try:
|
||
print("开始使用真实数据训练模型...")
|
||
|
||
# 加载所有数据集
|
||
print("加载数据集...")
|
||
# 使用正确的路径
|
||
audio_files, emotion_labels, dataset_labels = load_all_datasets(
|
||
ravdess_path='./RAVDESS',
|
||
savee_path='./SAVEE',
|
||
casia_path='./CAISA' # 注意路径与目录名一致
|
||
)
|
||
|
||
# 如果设置了最大文件数,限制数据量
|
||
if max_files_per_dataset is not None:
|
||
# 按数据集分组
|
||
ravdess_mask = np.array(dataset_labels) == 'ravdess'
|
||
savee_mask = np.array(dataset_labels) == 'savee'
|
||
casia_mask = np.array(dataset_labels) == 'casia'
|
||
|
||
# 获取每个数据集的索引
|
||
ravdess_indices = np.where(ravdess_mask)[0]
|
||
savee_indices = np.where(savee_mask)[0]
|
||
casia_indices = np.where(casia_mask)[0]
|
||
|
||
# 限制每个数据集的文件数
|
||
if len(ravdess_indices) > max_files_per_dataset:
|
||
# 随机选择文件以获得更均衡的样本
|
||
np.random.seed(42) # 设置随机种子以确保可重复性
|
||
ravdess_indices = np.random.choice(ravdess_indices, max_files_per_dataset, replace=False)
|
||
|
||
if len(savee_indices) > max_files_per_dataset:
|
||
np.random.seed(42)
|
||
savee_indices = np.random.choice(savee_indices, max_files_per_dataset, replace=False)
|
||
|
||
if len(casia_indices) > max_files_per_dataset:
|
||
np.random.seed(42)
|
||
casia_indices = np.random.choice(casia_indices, max_files_per_dataset, replace=False)
|
||
|
||
# 合并所有选择的索引
|
||
all_indices = []
|
||
if len(ravdess_indices) > 0:
|
||
all_indices.append(ravdess_indices)
|
||
if len(savee_indices) > 0:
|
||
all_indices.append(savee_indices)
|
||
if len(casia_indices) > 0:
|
||
all_indices.append(casia_indices)
|
||
|
||
if all_indices:
|
||
selected_indices = np.concatenate(all_indices)
|
||
|
||
# 筛选数据
|
||
audio_files = [audio_files[i] for i in selected_indices]
|
||
emotion_labels = [emotion_labels[i] for i in selected_indices]
|
||
dataset_labels = [dataset_labels[i] for i in selected_indices]
|
||
|
||
print(f"限制后的数据集大小: {len(audio_files)} 个文件")
|
||
|
||
# 显示情感分布
|
||
emotion_counts = pd.Series(emotion_labels).value_counts()
|
||
print("\n情感分布:")
|
||
for emotion, count in emotion_counts.items():
|
||
print(f" {emotion}: {count} 个样本")
|
||
|
||
# 显示数据集分布
|
||
dataset_counts = pd.Series(dataset_labels).value_counts()
|
||
print("\n数据集分布:")
|
||
for dataset, count in dataset_counts.items():
|
||
print(f" {dataset}: {count} 个样本")
|
||
|
||
# 提取特征
|
||
print("\n开始提取特征...")
|
||
features_list = extract_features_from_files(audio_files)
|
||
|
||
# 移除可能的空特征字典
|
||
valid_indices = [i for i, f in enumerate(features_list) if f]
|
||
valid_features = [features_list[i] for i in valid_indices]
|
||
valid_emotions = [emotion_labels[i] for i in valid_indices]
|
||
valid_datasets = [dataset_labels[i] for i in valid_indices]
|
||
|
||
print(f"\n有效样本数: {len(valid_features)}/{len(features_list)}")
|
||
|
||
# 显示有效样本的分布
|
||
valid_emotion_counts = pd.Series(valid_emotions).value_counts()
|
||
print("\n有效样本情感分布:")
|
||
for emotion, count in valid_emotion_counts.items():
|
||
print(f" {emotion}: {count} 个样本")
|
||
|
||
valid_dataset_counts = pd.Series(valid_datasets).value_counts()
|
||
print("\n有效样本数据集分布:")
|
||
for dataset, count in valid_dataset_counts.items():
|
||
print(f" {dataset}: {count} 个样本")
|
||
|
||
# 如果有效样本太少,停止处理
|
||
if len(valid_features) < 10:
|
||
print("有效样本太少,无法继续处理")
|
||
return
|
||
|
||
# 转换为特征矩阵
|
||
print("\n转换为特征矩阵...")
|
||
X, feature_names = features_to_matrix(valid_features)
|
||
print(f"特征矩阵形状: {X.shape}")
|
||
|
||
# 标签编码
|
||
y, label_encoder = encode_labels(valid_emotions)
|
||
class_names = label_encoder.classes_
|
||
print(f"类别: {class_names}")
|
||
|
||
# 划分数据集
|
||
print("\n划分数据集...")
|
||
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(
|
||
X, y, test_size=test_size, val_size=val_size)
|
||
|
||
print(f"数据集划分:")
|
||
print(f" 训练集: {X_train.shape[0]} 样本")
|
||
print(f" 验证集: {X_val.shape[0]} 样本")
|
||
print(f" 测试集: {X_test.shape[0]} 样本")
|
||
|
||
# 标准化特征
|
||
print("\n标准化特征...")
|
||
X_train_norm, X_val_norm, X_test_norm, scaler = standardize_features(
|
||
X_train, X_val, X_test)
|
||
|
||
# 重塑为LSTM输入格式
|
||
print("\n重塑为LSTM输入格式...")
|
||
X_train_reshaped = reshape_for_lstm(X_train_norm)
|
||
X_val_reshaped = reshape_for_lstm(X_val_norm)
|
||
X_test_reshaped = reshape_for_lstm(X_test_norm)
|
||
|
||
print(f"LSTM输入形状:")
|
||
print(f" 训练集: {X_train_reshaped.shape}")
|
||
print(f" 验证集: {X_val_reshaped.shape}")
|
||
print(f" 测试集: {X_test_reshaped.shape}")
|
||
|
||
# 保存预处理对象
|
||
print("\n保存预处理对象...")
|
||
save_preprocessing_objects(scaler, label_encoder, feature_names)
|
||
|
||
# 训练模型
|
||
print("\n开始训练模型...")
|
||
model = train_emotion_model(
|
||
X_train_reshaped, y_train,
|
||
X_val_reshaped, y_val,
|
||
epochs=epochs,
|
||
batch_size=batch_size
|
||
)
|
||
|
||
# 评估模型
|
||
print("\n评估模型...")
|
||
evaluate_emotion_model(
|
||
model, X_test_reshaped, y_test, class_names
|
||
)
|
||
|
||
print("\n使用真实数据的训练流程完成")
|
||
|
||
except Exception as e:
|
||
print(f"训练流程出错: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
# 运行真实数据训练流程,限制每个数据集的样本数以加快处理速度
|
||
# 实际应用时可以增加样本数或设为None使用所有样本
|
||
run_real_data_pipeline(max_files_per_dataset=50) # 每个数据集最多使用50个样本
|
||
|