Files
yuyinfenxi/语音情感分析.py
2025-07-02 13:54:05 +08:00

9763 lines
287 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# 语音情感分析实验
本实验将实现从多语言语音数据中提取特征并建立LSTM模型进行情感分析
# In[ ]:
## 1. 对多语言语音数据集进行预处理
# In[ ]:
### 1.1 在下面空白处写出数据读取,使用 librosa 库对数据进行初步读取与探索的 python 代码
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CASIA' # 正确的CASIA数据集路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
# In[ ]:
"""
多语言语音数据集读取与探索
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import glob
# 设置路径,根据实际情况修改
RAVDESS_PATH = './RAVDESS'
SAVEE_PATH = './SAVEE'
CASIA_PATH = './CAISA' # 注意:这是当前文件夹名,我们会在代码中处理路径
def load_audio_sample(file_path):
"""
加载音频文件并返回波形和采样率
Args:
file_path: 音频文件路径
Returns:
audio: 音频波形数据
sr: 采样率
"""
print(f"加载音频文件: {file_path}")
audio, sr = librosa.load(file_path, sr=None)
print(f"采样率: {sr}Hz")
print(f"音频长度: {len(audio)} 采样点, {len(audio)/sr:.2f}")
return audio, sr
def explore_audio(audio, sr, title="音频波形"):
"""
探索音频数据,显示波形和频谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制频谱图
plt.subplot(2, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 频谱图")
plt.tight_layout()
plt.show()
# 播放音频
return Audio(data=audio, rate=sr)
# 示例读取和探索RAVDESS数据集中的一个文件
def explore_ravdess():
"""探索RAVDESS数据集"""
if not os.path.exists(RAVDESS_PATH):
print(f"错误: RAVDESS路径不存在: {RAVDESS_PATH}")
return
# 获取所有Actor_01目录下的所有音频文件
audio_files = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_01/*.wav'))
if not audio_files:
print("未找到音频文件")
return
# 读取第一个文件进行探索
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# RAVDESS命名规则: 03-01-01-01-01-01-01.wav
# 模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(sample_file)
parts = filename.split('-')
emotion_mapping = {
'01': '中性',
'02': '平静',
'03': '开心',
'04': '悲伤',
'05': '愤怒',
'06': '恐惧',
'07': '厌恶',
'08': '惊讶'
}
emotion = emotion_mapping.get(parts[2], '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"RAVDESS - {emotion}情感")
# 示例读取和探索SAVEE数据集中的一个文件
def explore_savee():
"""探索SAVEE数据集"""
if not os.path.exists(SAVEE_PATH):
print(f"错误: SAVEE路径不存在: {SAVEE_PATH}")
return
# SAVEE数据集结构
audio_dirs = glob.glob(os.path.join(SAVEE_PATH, 'AudioData/*'))
if not audio_dirs:
print("未找到SAVEE音频目录")
return
# 获取第一个说话者目录中的第一个音频
speaker_dir = audio_dirs[0]
audio_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
if not audio_files:
print(f"{speaker_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
audio, sr = load_audio_sample(sample_file)
# SAVEE命名规则: a01.wav, 其中a表示anger情感
filename = os.path.basename(sample_file)
emotion_mapping = {
'a': '愤怒',
'd': '厌恶',
'f': '恐惧',
'h': '开心',
'n': '中性',
'sa': '悲伤',
'su': '惊讶'
}
# 提取情感标识
emotion_id = ''.join([c for c in filename if not c.isdigit() and c != '.'])
emotion = emotion_mapping.get(emotion_id, '未知')
print(f"文件: {filename}")
print(f"情感: {emotion}")
# 探索音频
return explore_audio(audio, sr, f"SAVEE - {emotion}情感")
# 示例读取和探索CASIA数据集中的一个文件
def explore_casia():
"""探索CASIA数据集"""
if not os.path.exists(CASIA_PATH):
print(f"错误: CASIA路径不存在: {CASIA_PATH}")
return
# CASIA数据集结构
speakers = os.listdir(CASIA_PATH)
speakers = [s for s in speakers if os.path.isdir(os.path.join(CASIA_PATH, s)) and s != '_desktop.ini']
if not speakers:
print("未找到CASIA说话者目录")
return
# 获取第一个说话者的情感目录
speaker_dir = os.path.join(CASIA_PATH, speakers[0])
emotion_dirs = [d for d in os.listdir(speaker_dir)
if os.path.isdir(os.path.join(speaker_dir, d)) and d != '_desktop.ini']
if not emotion_dirs:
print(f"{speaker_dir} 中未找到情感目录")
return
# 获取情感目录下的文件
emotion_dir = os.path.join(speaker_dir, emotion_dirs[0])
audio_files = glob.glob(os.path.join(emotion_dir, '*.wav'))
if not audio_files:
# CASIA可能是其他格式如peak
audio_files = glob.glob(os.path.join(emotion_dir, '*.peak'))
if not audio_files:
print(f"{emotion_dir} 中未找到音频文件")
return
sample_file = audio_files[0]
# CASIA可能不是wav格式这里简单显示文件信息
print(f"文件: {sample_file}")
print(f"情感: {os.path.basename(emotion_dir)}")
# 如果是.wav格式则读取并分析
if sample_file.endswith('.wav'):
try:
audio, sr = load_audio_sample(sample_file)
return explore_audio(audio, sr, f"CASIA - {os.path.basename(emotion_dir)}情感")
except Exception as e:
print(f"读取文件时出错: {e}")
else:
print(f"文件格式不是wav无法直接使用librosa读取: {sample_file}")
# 数据集统计分析
def analyze_datasets():
"""
对数据集进行统计分析
"""
stats = {
'RAVDESS': {'total': 0, 'emotions': {}},
'SAVEE': {'total': 0, 'emotions': {}},
'CASIA': {'total': 0, 'emotions': {}}
}
# 分析RAVDESS
if os.path.exists(RAVDESS_PATH):
actor_dirs = glob.glob(os.path.join(RAVDESS_PATH, 'Actor_*'))
ravdess_files = []
for actor_dir in actor_dirs:
ravdess_files.extend(glob.glob(os.path.join(actor_dir, '*.wav')))
stats['RAVDESS']['total'] = len(ravdess_files)
# 统计每种情感的数量
emotion_mapping = {
'01': '中性', '02': '平静', '03': '开心', '04': '悲伤',
'05': '愤怒', '06': '恐惧', '07': '厌恶', '08': '惊讶'
}
for file_path in ravdess_files:
filename = os.path.basename(file_path)
parts = filename.split('-')
if len(parts) >= 3:
emotion_id = parts[2]
emotion = emotion_mapping.get(emotion_id, '未知')
if emotion not in stats['RAVDESS']['emotions']:
stats['RAVDESS']['emotions'][emotion] = 0
stats['RAVDESS']['emotions'][emotion] += 1
# 打印统计结果
print("数据集统计:")
for dataset, data in stats.items():
if data['total'] > 0:
print(f"\n{dataset} 数据集:")
print(f" 总文件数: {data['total']}")
if data['emotions']:
print(" 情感分布:")
for emotion, count in data['emotions'].items():
print(f" {emotion}: {count} 文件 ({count/data['total']*100:.1f}%)")
else:
print(f"\n{dataset} 数据集: 未找到文件或路径不存在")
# 执行数据集探索
analyze_datasets()
# 尝试探索各数据集的样例
try:
print("\nRAVDESS数据集样例:")
explore_ravdess()
except Exception as e:
print(f"RAVDESS探索时出错: {e}")
try:
print("\nSAVEE数据集样例:")
explore_savee()
except Exception as e:
print(f"SAVEE探索时出错: {e}")
try:
print("\nCASIA数据集样例:")
explore_casia()
except Exception as e:
print(f"CASIA探索时出错: {e}")
## 2. 特征工程
### 2.1 在下面空白处写出音高、估计调谐偏差指标的构建代码
# In[ ]:
"""
音高和调谐偏差指标提取
"""
import numpy as np
import librosa
import matplotlib.pyplot as plt
def extract_pitch_features(audio, sr):
"""
提取音高相关特征
Args:
audio: 音频波形数据
sr: 采样率
Returns:
features: 特征字典
"""
features = {}
# 使用librosa.piptrack计算音高和幅度
pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
# 提取每个时间帧的主要音高
pitches_mean = []
for t in range(pitches.shape[1]):
idx = np.argmax(magnitudes[:, t])
pitch = pitches[idx, t]
if pitch > 0: # 过滤掉静音帧
pitches_mean.append(pitch)
if pitches_mean: # 确保有有效的音高值
features['pitch_mean'] = np.mean(pitches_mean)
features['pitch_std'] = np.std(pitches_mean) if len(pitches_mean) > 1 else 0
features['pitch_max'] = np.max(pitches_mean)
features['pitch_min'] = np.min(pitches_mean) if len(pitches_mean) > 0 else 0
else:
features['pitch_mean'] = 0
features['pitch_std'] = 0
features['pitch_max'] = 0
features['pitch_min'] = 0
# 提取调谐偏差
tuning_offset = librosa.estimate_tuning(y=audio, sr=sr)
features['tuning_offset'] = tuning_offset
return features
def visualize_pitch(audio, sr, title="音高分析"):
"""
可视化音高变化
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 计算音高和幅度
pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
# 提取每个时间帧的主要音高
times = librosa.times_like(pitches)
pitch_values = []
pitch_times = []
for t in range(pitches.shape[1]):
idx = np.argmax(magnitudes[:, t])
pitch = pitches[idx, t]
if pitch > 0: # 过滤掉静音帧
pitch_values.append(pitch)
pitch_times.append(times[t])
# 绘制波形图
plt.subplot(2, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制音高变化图
plt.subplot(2, 1, 2)
if pitch_values:
plt.plot(pitch_times, pitch_values, 'o-', markersize=1)
plt.title(f"{title} - 音高变化")
plt.xlabel("时间 (秒)")
plt.ylabel("音高 (Hz)")
# 添加均值线
mean_pitch = np.mean(pitch_values)
plt.axhline(y=mean_pitch, color='r', linestyle='--', alpha=0.8,
label=f"平均音高: {mean_pitch:.1f} Hz")
plt.legend()
else:
plt.text(0.5, 0.5, "未检测到音高", horizontalalignment='center',
verticalalignment='center', transform=plt.gca().transAxes)
# 计算并显示调谐偏差
tuning_offset = librosa.estimate_tuning(y=audio, sr=sr)
plt.figtext(0.5, 0.01, f"调谐偏差: {tuning_offset:.4f} (±半音)",
ha="center", fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
plt.tight_layout()
plt.show()
# 测试函数 - 使用之前加载的样例
try:
# 尝试加载一个RAVDESS样例
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
if os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=None)
# 提取音高特征
features = extract_pitch_features(audio, sr)
# 显示特征
print("音高特征:")
for name, value in features.items():
print(f" {name}: {value:.4f}")
# 可视化
filename = os.path.basename(audio_file)
visualize_pitch(audio, sr, f"RAVDESS音频 ({filename})")
else:
print(f"文件不存在: {audio_file}")
except Exception as e:
print(f"音高分析出错: {e}")
### 2.2 在下面空白处写出频谱质心、光谱平坦度指标的构建代码
# In[ ]:
"""
频谱质心和光谱平坦度指标提取
"""
import numpy as np
import librosa
import matplotlib.pyplot as plt
def extract_spectral_features(audio, sr):
"""
提取频谱质心和光谱平坦度指标
Args:
audio: 音频波形数据
sr: 采样率
Returns:
features: 特征字典
"""
features = {}
# 提取频谱质心
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
features['spectral_centroid_mean'] = np.mean(spectral_centroid)
features['spectral_centroid_std'] = np.std(spectral_centroid)
features['spectral_centroid_max'] = np.max(spectral_centroid)
features['spectral_centroid_min'] = np.min(spectral_centroid)
# 提取光谱平坦度
spectral_flatness = librosa.feature.spectral_flatness(y=audio)[0]
features['spectral_flatness_mean'] = np.mean(spectral_flatness)
features['spectral_flatness_std'] = np.std(spectral_flatness)
features['spectral_flatness_max'] = np.max(spectral_flatness)
features['spectral_flatness_min'] = np.min(spectral_flatness)
return features
def visualize_spectral_features(audio, sr, title="频谱特征分析"):
"""
可视化频谱质心和光谱平坦度
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 12))
# 绘制波形图
plt.subplot(3, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 计算并绘制频谱质心
times = librosa.times_like(librosa.feature.spectral_centroid(y=audio, sr=sr))
centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
plt.subplot(3, 1, 2)
plt.semilogy(times, centroids, label='频谱质心')
plt.title(f"{title} - 频谱质心随时间变化")
plt.xlabel("时间 (秒)")
plt.ylabel("频率 (Hz)")
plt.grid(True, alpha=0.3)
# 添加均值线
mean_centroid = np.mean(centroids)
plt.axhline(y=mean_centroid, color='r', linestyle='--', alpha=0.8,
label=f"平均频谱质心: {mean_centroid:.1f} Hz")
plt.legend()
# 计算并绘制光谱平坦度
times_flat = librosa.times_like(librosa.feature.spectral_flatness(y=audio))
flatness = librosa.feature.spectral_flatness(y=audio)[0]
plt.subplot(3, 1, 3)
plt.plot(times_flat, flatness, label='光谱平坦度')
plt.title(f"{title} - 光谱平坦度随时间变化")
plt.xlabel("时间 (秒)")
plt.ylabel("平坦度")
plt.ylim([0, np.max(flatness)*1.2]) # 限制y轴范围
plt.grid(True, alpha=0.3)
# 添加均值线
mean_flatness = np.mean(flatness)
plt.axhline(y=mean_flatness, color='r', linestyle='--', alpha=0.8,
label=f"平均光谱平坦度: {mean_flatness:.4f}")
plt.legend()
plt.tight_layout()
plt.show()
# 打印统计值
features = extract_spectral_features(audio, sr)
print("\n频谱特征统计:")
print(f" 频谱质心均值: {features['spectral_centroid_mean']:.2f} Hz")
print(f" 频谱质心标准差: {features['spectral_centroid_std']:.2f}")
print(f" 光谱平坦度均值: {features['spectral_flatness_mean']:.6f}")
print(f" 光谱平坦度标准差: {features['spectral_flatness_std']:.6f}")
# 测试函数 - 使用之前加载的样例
try:
# 尝试加载一个RAVDESS样例
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
if os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=None)
# 提取频谱特征
features = extract_spectral_features(audio, sr)
# 显示特征
print("频谱特征:")
for name, value in features.items():
print(f" {name}: {value:.6f}")
# 可视化
filename = os.path.basename(audio_file)
visualize_spectral_features(audio, sr, f"RAVDESS音频 ({filename})")
else:
print(f"文件不存在: {audio_file}")
except Exception as e:
print(f"频谱特征分析出错: {e}")
### 2.3 在下面空白处写出梅尔频率、光谱对比度指标的构建代码
# In[ ]:
"""
梅尔频率和光谱对比度指标提取
"""
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
def extract_mfcc_contrast_features(audio, sr):
"""
提取梅尔频率倒谱系数和光谱对比度指标
Args:
audio: 音频波形数据
sr: 采样率
Returns:
features: 特征字典
"""
features = {}
# 提取MFCC梅尔频率倒谱系数
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
for i in range(1, 14):
features[f'mfcc_{i}_mean'] = np.mean(mfccs[i-1])
features[f'mfcc_{i}_std'] = np.std(mfccs[i-1])
features[f'mfcc_{i}_max'] = np.max(mfccs[i-1])
features[f'mfcc_{i}_min'] = np.min(mfccs[i-1])
# 提取梅尔频率特征的一阶差分和二阶差分
mfcc_delta = librosa.feature.delta(mfccs)
mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
# 添加一阶差分特征
for i in range(1, 14):
features[f'mfcc_{i}_delta_mean'] = np.mean(mfcc_delta[i-1])
features[f'mfcc_{i}_delta_std'] = np.std(mfcc_delta[i-1])
# 添加二阶差分特征
for i in range(1, 14):
features[f'mfcc_{i}_delta2_mean'] = np.mean(mfcc_delta2[i-1])
features[f'mfcc_{i}_delta2_std'] = np.std(mfcc_delta2[i-1])
# 提取光谱对比度指标
spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
# 为每个频带提取统计特征
for i in range(spectral_contrast.shape[0]):
features[f'spectral_contrast_{i+1}_mean'] = np.mean(spectral_contrast[i])
features[f'spectral_contrast_{i+1}_std'] = np.std(spectral_contrast[i])
# 光谱对比度的总体统计
features['spectral_contrast_mean'] = np.mean(spectral_contrast)
features['spectral_contrast_std'] = np.std(spectral_contrast)
return features
def visualize_mfcc(audio, sr, title="MFCC特征分析"):
"""
可视化MFCC特征
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 10))
# 计算MFCC
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
# 绘制波形图
plt.subplot(3, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制梅尔频谱
plt.subplot(3, 1, 2)
mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr)
librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max),
y_axis='mel', x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title(f"{title} - 梅尔频谱")
# 绘制MFCC
plt.subplot(3, 1, 3)
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar()
plt.title(f"{title} - MFCC")
plt.tight_layout()
plt.show()
def visualize_spectral_contrast(audio, sr, title="光谱对比度分析"):
"""
可视化光谱对比度
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 计算光谱对比度
contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
# 绘制光谱对比度
plt.subplot(2, 1, 1)
librosa.display.specshow(contrast, x_axis='time')
plt.colorbar()
plt.title(f"{title} - 光谱对比度")
# 绘制每个频带的平均光谱对比度
plt.subplot(2, 1, 2)
plt.bar(range(contrast.shape[0]), [np.mean(contrast[i]) for i in range(contrast.shape[0])],
color='skyblue')
plt.xlabel("频带")
plt.ylabel("平均光谱对比度")
plt.title("各频带平均光谱对比度")
plt.tight_layout()
plt.show()
# 打印统计信息
print("\n光谱对比度统计:")
print(f" 总平均值: {np.mean(contrast):.4f}")
print(f" 总标准差: {np.std(contrast):.4f}")
print(" 各频带统计:")
for i in range(contrast.shape[0]):
print(f" 频带{i+1}: 均值={np.mean(contrast[i]):.4f}, 标准差={np.std(contrast[i]):.4f}")
# 测试函数 - 使用之前加载的样例
try:
# 尝试加载一个RAVDESS样例
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
if os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=None)
# 提取特征
features = extract_mfcc_contrast_features(audio, sr)
# 显示部分特征
print("梅尔频率和光谱对比度特征 (前10项):")
for i, (name, value) in enumerate(features.items()):
print(f" {name}: {value:.6f}")
if i >= 9: # 只显示前10项
print(" ...")
break
# 可视化
filename = os.path.basename(audio_file)
visualize_mfcc(audio, sr, f"RAVDESS音频 ({filename})")
visualize_spectral_contrast(audio, sr, f"RAVDESS音频 ({filename})")
else:
print(f"文件不存在: {audio_file}")
except Exception as e:
print(f"梅尔频率和光谱对比度分析出错: {e}")
### 2.4 在下面空白处写出均方根能量、谱熵、色谱图指标的构建代码
# In[ ]:
"""
均方根能量、谱熵、色谱图指标的构建代码
"""
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from scipy.stats import entropy
def extract_energy_entropy_chroma_features(audio, sr):
"""
提取均方根能量、谱熵、色谱图特征
Args:
audio: 音频波形数据
sr: 采样率
Returns:
features: 特征字典
"""
features = {}
# 提取RMS均方根能量
rms = librosa.feature.rms(y=audio)[0]
features['rms_mean'] = np.mean(rms)
features['rms_std'] = np.std(rms)
features['rms_max'] = np.max(rms)
features['rms_min'] = np.min(rms)
# 计算谱熵
spec = np.abs(librosa.stft(audio))
# 确保谱图中没有零值(避免log(0))
spec = spec + 1e-10
# 归一化频谱
spec_norm = spec / np.sum(spec, axis=0, keepdims=True)
# 计算每个时间帧的熵
entropies = np.array([entropy(spec_norm[:, t]) for t in range(spec.shape[1])])
features['spectral_entropy_mean'] = np.mean(entropies)
features['spectral_entropy_std'] = np.std(entropies)
features['spectral_entropy_max'] = np.max(entropies)
features['spectral_entropy_min'] = np.min(entropies)
# 提取色度特征
chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
# 存储每个色度的统计特征
for i in range(12): # 12个色度
features[f'chroma_{i+1}_mean'] = np.mean(chroma[i])
features[f'chroma_{i+1}_std'] = np.std(chroma[i])
# 整体色度统计
features['chroma_mean'] = np.mean(chroma)
features['chroma_std'] = np.std(chroma)
return features
def visualize_energy_entropy(audio, sr, title="能量与熵分析"):
"""
可视化能量和熵
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 10))
# 绘制波形图
plt.subplot(3, 1, 1)
librosa.display.waveshow(audio, sr=sr)
plt.title(f"{title} - 波形图")
plt.xlabel("时间 (秒)")
plt.ylabel("振幅")
# 绘制均方根能量
rms = librosa.feature.rms(y=audio)[0]
times = librosa.times_like(rms)
plt.subplot(3, 1, 2)
plt.plot(times, rms)
plt.title(f"{title} - 均方根能量")
plt.xlabel("时间 (秒)")
plt.ylabel("RMS能量")
plt.grid(True, alpha=0.3)
# 添加均值线
mean_rms = np.mean(rms)
plt.axhline(y=mean_rms, color='r', linestyle='--', alpha=0.8,
label=f"平均RMS: {mean_rms:.4f}")
plt.legend()
# 计算并绘制谱熵
spec = np.abs(librosa.stft(audio))
spec = spec + 1e-10 # 避免log(0)
spec_norm = spec / np.sum(spec, axis=0, keepdims=True)
entropies = np.array([entropy(spec_norm[:, t]) for t in range(spec.shape[1])])
times_entropy = librosa.times_like(entropies)
plt.subplot(3, 1, 3)
plt.plot(times_entropy, entropies)
plt.title(f"{title} - 谱熵")
plt.xlabel("时间 (秒)")
plt.ylabel("")
plt.grid(True, alpha=0.3)
# 添加均值线
mean_entropy = np.mean(entropies)
plt.axhline(y=mean_entropy, color='r', linestyle='--', alpha=0.8,
label=f"平均熵: {mean_entropy:.4f}")
plt.legend()
plt.tight_layout()
plt.show()
# 打印统计值
print("\n能量和熵统计:")
print(f" RMS能量均值: {np.mean(rms):.6f}")
print(f" RMS能量标准差: {np.std(rms):.6f}")
print(f" 谱熵均值: {mean_entropy:.6f}")
print(f" 谱熵标准差: {np.std(entropies):.6f}")
def visualize_chroma(audio, sr, title="色谱图分析"):
"""
可视化色谱图
Args:
audio: 音频波形数据
sr: 采样率
title: 图表标题
"""
plt.figure(figsize=(12, 8))
# 计算色谱图
chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
# 绘制色谱图
plt.subplot(2, 1, 1)
librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
plt.colorbar()
plt.title(f"{title} - 色谱图")
# 绘制每个色度的平均值
plt.subplot(2, 1, 2)
chroma_notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
plt.bar(chroma_notes, [np.mean(chroma[i]) for i in range(12)], color='skyblue')
plt.xlabel("音符")
plt.ylabel("平均能量")
plt.title("各色度平均能量")
plt.tight_layout()
plt.show()
# 打印统计信息
print("\n色度统计:")
print(f" 总平均值: {np.mean(chroma):.4f}")
print(f" 总标准差: {np.std(chroma):.4f}")
print(" 各音符统计:")
for i, note in enumerate(chroma_notes):
print(f" {note}: 均值={np.mean(chroma[i]):.4f}, 标准差={np.std(chroma[i]):.4f}")
# 测试函数 - 使用之前加载的样例
try:
# 尝试加载一个RAVDESS样例
audio_file = os.path.join(RAVDESS_PATH, 'Actor_01', '03-01-01-01-01-01-01.wav')
if os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=None)
# 提取特征
features = extract_energy_entropy_chroma_features(audio, sr)
# 显示特征
print("均方根能量、谱熵、色谱图特征:")
for name, value in features.items():
print(f" {name}: {value:.6f}")
# 可视化
filename = os.path.basename(audio_file)
visualize_energy_entropy(audio, sr, f"RAVDESS音频 ({filename})")
visualize_chroma(audio, sr, f"RAVDESS音频 ({filename})")
else:
print(f"文件不存在: {audio_file}")
except Exception as e:
print(f"均方根能量、谱熵、色谱图分析出错: {e}")
### 2.5 在下面空白处写出特征数据处理代码,包括标准化、二维数据转三维数据、标签数值化、划分数据集代码
# In[ ]:
"""
特征数据处理代码
"""
import numpy as np
import pandas as pd
import pickle
import os
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
def extract_all_features(audio, sr):
"""
提取所有特征
Args:
audio: 音频波形数据
sr: 采样率
Returns:
features: 特征字典
"""
features = {}
# 组合之前定义的所有特征
features.update(extract_pitch_features(audio, sr))
features.update(extract_spectral_features(audio, sr))
features.update(extract_mfcc_contrast_features(audio, sr))
features.update(extract_energy_entropy_chroma_features(audio, sr))
return features
def features_to_matrix(features_list):
"""
将特征字典列表转换为特征矩阵
Args:
features_list: 特征字典列表
Returns:
X: 特征矩阵
feature_names: 特征名称列表
"""
# 收集所有特征名称
all_features = set()
for feature_dict in features_list:
all_features.update(feature_dict.keys())
feature_names = sorted(list(all_features))
# 创建特征矩阵
X = np.zeros((len(features_list), len(feature_names)))
# 填充特征矩阵
for i, feature_dict in enumerate(features_list):
for j, feature_name in enumerate(feature_names):
if feature_name in feature_dict:
X[i, j] = feature_dict[feature_name]
return X, feature_names
def features_to_dataframe(features_list, labels=None):
"""
将特征字典列表转换为DataFrame
Args:
features_list: 特征字典列表
labels: 标签列表
Returns:
df: 特征数据框
"""
# 转换为DataFrame
df = pd.DataFrame(features_list)
# 如果有标签添加到DataFrame
if labels is not None:
df['label'] = labels
return df
def standardize_features(X_train, X_val=None, X_test=None):
"""
标准化特征
Args:
X_train: 训练集特征矩阵
X_val: 验证集特征矩阵
X_test: 测试集特征矩阵
Returns:
X_train_norm: 标准化后的训练集
X_val_norm: 标准化后的验证集
X_test_norm: 标准化后的测试集
scaler: 标准化器
"""
# 初始化标准化器
scaler = StandardScaler()
# 使用训练集拟合标准化器
scaler.fit(X_train)
# 转换训练集
X_train_norm = scaler.transform(X_train)
# 如果提供了验证集
if X_val is not None:
X_val_norm = scaler.transform(X_val)
else:
X_val_norm = None
# 如果提供了测试集
if X_test is not None:
X_test_norm = scaler.transform(X_test)
else:
X_test_norm = None
return X_train_norm, X_val_norm, X_test_norm, scaler
def reshape_for_lstm(X):
"""
将特征矩阵重塑为LSTM输入格式二维数据转三维数据
Args:
X: 特征矩阵
Returns:
X_reshaped: 重塑后的特征矩阵
"""
# 添加时间步维度 (样本数, 特征数) -> (样本数, 时间步, 特征数)
return X.reshape(X.shape[0], 1, X.shape[1])
def encode_labels(y):
"""
对标签进行数值化编码
Args:
y: 标签列表
Returns:
y_encoded: 编码后的标签
encoder: 标签编码器
"""
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
return y_encoded, encoder
def split_dataset(X, y, test_size=0.2, val_size=0.1, random_state=42):
"""
划分数据集为训练集、验证集和测试集
Args:
X: 特征矩阵
y: 标签
test_size: 测试集比例
val_size: 验证集比例
random_state: 随机种子
Returns:
X_train, X_val, X_test: 划分后的特征矩阵
y_train, y_val, y_test: 划分后的标签
"""
# 先划分出测试集
X_train_val, X_test, y_train_val, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state, stratify=y)
# 从剩下的数据中划分出验证集
val_ratio = val_size / (1 - test_size) # 调整验证集比例
X_train, X_val, y_train, y_val = train_test_split(
X_train_val, y_train_val, test_size=val_ratio, random_state=random_state, stratify=y_train_val)
return X_train, X_val, X_test, y_train, y_val, y_test
def save_preprocessing_objects(scaler, encoder, feature_names, output_dir='output/emotion_model'):
"""
保存预处理对象
Args:
scaler: 标准化器
encoder: 标签编码器
feature_names: 特征名称列表
output_dir: 输出目录
"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 保存标准化器
with open(os.path.join(output_dir, 'feature_scaler.pkl'), 'wb') as f:
pickle.dump(scaler, f)
# 保存标签编码器
with open(os.path.join(output_dir, 'emotion_encoder.pkl'), 'wb') as f:
pickle.dump(encoder, f)
# 保存特征名称
with open(os.path.join(output_dir, 'feature_names.pkl'), 'wb') as f:
pickle.dump(feature_names, f)
print(f"预处理对象已保存到: {output_dir}")
def visualize_features_distribution(X, y, y_encoder, title="特征分布"):
"""
可视化特征分布
Args:
X: 特征矩阵
y: 标签
y_encoder: 标签编码器
title: 标题
"""
plt.figure(figsize=(12, 6))
# 计算每个类别的特征均值
class_names = y_encoder.classes_
class_means = []
for i, class_name in enumerate(class_names):
# 获取当前类别的特征
X_class = X[y == i]
# 计算均值
class_mean = np.mean(X_class, axis=0)
class_means.append(class_mean)
# 转换为numpy数组
class_means = np.array(class_means)
# 选择前10个特征可视化
n_features = 10
# 绘制特征均值热图
plt.subplot(1, 2, 1)
plt.imshow(class_means[:, :n_features], aspect='auto', cmap='viridis')
plt.colorbar()
plt.xlabel('特征索引')
plt.ylabel('情感类别')
plt.title(f"{title} - 前{n_features}个特征均值")
plt.yticks(range(len(class_names)), class_names)
# 绘制箱形图(使用第一个特征)
plt.subplot(1, 2, 2)
box_data = []
for i, class_name in enumerate(class_names):
box_data.append(X[y == i, 0])
plt.boxplot(box_data, labels=class_names)
plt.xlabel('情感类别')
plt.ylabel('特征值')
plt.title(f"{title} - 第1个特征的分布")
plt.tight_layout()
plt.show()
# 测试数据处理流程
try:
# 模拟从多个数据集加载特征和标签
print("模拟加载和处理多语言语音数据...")
# 生成一些示例数据(正常情况下,这些数据会从音频文件中提取)
n_samples = 100
n_features = 193 # 假设我们有193个特征
# 随机生成特征
X_random = np.random.rand(n_samples, n_features)
# 随机生成标签
emotions = ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise']
y_random = np.random.choice(emotions, size=n_samples)
# 转换为数值标签
y_encoded, label_encoder = encode_labels(y_random)
# 划分数据集
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(X_random, y_encoded)
print(f"数据集划分完成:")
print(f" 训练集: {X_train.shape[0]} 样本")
print(f" 验证集: {X_val.shape[0]} 样本")
print(f" 测试集: {X_test.shape[0]} 样本")
# 标准化特征
X_train_norm, X_val_norm, X_test_norm, scaler = standardize_features(X_train, X_val, X_test)
print("特征标准化完成")
# 重塑为LSTM输入格式
X_train_reshaped = reshape_for_lstm(X_train_norm)
X_val_reshaped = reshape_for_lstm(X_val_norm)
X_test_reshaped = reshape_for_lstm(X_test_norm)
print(f"数据重塑为LSTM格式:")
print(f" 训练集形状: {X_train_reshaped.shape}")
print(f" 验证集形状: {X_val_reshaped.shape}")
print(f" 测试集形状: {X_test_reshaped.shape}")
# 保存预处理对象
feature_names = [f'feature_{i}' for i in range(n_features)]
save_preprocessing_objects(scaler, label_encoder, feature_names)
# 可视化特征分布
visualize_features_distribution(X_random, y_encoded, label_encoder, "随机生成的特征数据")
except Exception as e:
print(f"特征处理出错: {e}")
## 3. 分类预测模型构建与分析
### 3.1 在下面空白处写出搭建的LSTM神经网络模型的代码
# In[ ]:
"""
LSTM神经网络模型构建
"""
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import pickle
import os
class EmotionModel:
"""
情感识别模型类
"""
def __init__(self, input_shape, num_emotions, num_languages=None, include_language=False):
"""
初始化模型
Args:
input_shape: 输入特征形状 (时间步, 特征数)
num_emotions: 情感类别数
num_languages: 语言类别数
include_language: 是否包含语言分类任务
"""
self.input_shape = input_shape
self.num_emotions = num_emotions
self.num_languages = num_languages
self.include_language = include_language
self.model = None
self.history = None
def build_model(self):
"""
构建LSTM模型
"""
# 单任务模型 - 只有情感分类
if not self.include_language:
self.model = Sequential([
# LSTM层
LSTM(128, input_shape=self.input_shape, return_sequences=True),
BatchNormalization(),
Dropout(0.4),
# 第二个LSTM层
LSTM(64, return_sequences=False),
BatchNormalization(),
Dropout(0.4),
# 全连接层
Dense(64, activation='relu'),
BatchNormalization(),
Dropout(0.4),
# 输出层
Dense(self.num_emotions, activation='softmax', name='emotion_output')
])
# 多任务模型 - 情感和语言分类
else:
# 输入层
inputs = Input(shape=self.input_shape)
# 共享层
lstm1 = LSTM(128, return_sequences=True)(inputs)
bn1 = BatchNormalization()(lstm1)
drop1 = Dropout(0.4)(bn1)
lstm2 = LSTM(64, return_sequences=False)(drop1)
bn2 = BatchNormalization()(lstm2)
drop2 = Dropout(0.4)(bn2)
# 情感分类分支
emotion_dense = Dense(64, activation='relu')(drop2)
emotion_bn = BatchNormalization()(emotion_dense)
emotion_drop = Dropout(0.4)(emotion_bn)
emotion_output = Dense(self.num_emotions, activation='softmax', name='emotion_output')(emotion_drop)
# 语言分类分支
language_dense = Dense(32, activation='relu')(drop2)
language_bn = BatchNormalization()(language_dense)
language_drop = Dropout(0.4)(language_bn)
language_output = Dense(self.num_languages, activation='softmax', name='language_output')(language_drop)
# 创建模型
self.model = Model(inputs=inputs, outputs=[emotion_output, language_output])
return self.model
def compile_model(self, learning_rate=0.001):
"""
编译模型
Args:
learning_rate: 学习率
"""
optimizer = Adam(learning_rate=learning_rate)
if not self.include_language:
self.model.compile(
optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy']
)
else:
self.model.compile(
optimizer=optimizer,
loss={
'emotion_output': 'categorical_crossentropy',
'language_output': 'categorical_crossentropy'
},
metrics={
'emotion_output': 'accuracy',
'language_output': 'accuracy'
},
loss_weights={
'emotion_output': 0.7,
'language_output': 0.3
}
)
return self.model
def train(self, X_train, y_train, X_val, y_val, epochs=100, batch_size=32,
save_dir='output/emotion_model'):
"""
训练模型
Args:
X_train: 训练集特征
y_train: 训练集标签
X_val: 验证集特征
y_val: 验证集标签
epochs: 训练轮数
batch_size: 批量大小
save_dir: 保存目录
Returns:
history: 训练历史
"""
# 确保目录存在
os.makedirs(save_dir, exist_ok=True)
# 保存最佳模型
checkpoint_path = os.path.join(save_dir, 'best_model_weights.h5')
checkpoint = ModelCheckpoint(
checkpoint_path,
monitor='val_accuracy' if not self.include_language else 'val_emotion_output_accuracy',
save_best_only=True,
save_weights_only=True,
mode='max',
verbose=1
)
# 早停
early_stopping = EarlyStopping(
monitor='val_loss',
patience=15,
restore_best_weights=True,
verbose=1
)
# 转换为分类格式
y_train_cat = to_categorical(y_train, num_classes=self.num_emotions)
y_val_cat = to_categorical(y_val, num_classes=self.num_emotions)
callbacks = [checkpoint, early_stopping]
# 训练模型
if not self.include_language:
self.history = self.model.fit(
X_train, y_train_cat,
validation_data=(X_val, y_val_cat),
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks,
verbose=1
)
else:
# 这里假设我们有语言标签,实际应用需要添加语言标签
# 这里只是示例,实际应用需要传入实际的语言标签
y_train_lang = np.zeros((y_train.shape[0], self.num_languages)) # 示例
y_val_lang = np.zeros((y_val.shape[0], self.num_languages)) # 示例
self.history = self.model.fit(
X_train,
{'emotion_output': y_train_cat, 'language_output': y_train_lang},
validation_data=(
X_val,
{'emotion_output': y_val_cat, 'language_output': y_val_lang}
),
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks,
verbose=1
)
# 保存完整模型
model_path = os.path.join(save_dir, 'emotion_model.h5')
self.model.save(model_path)
# 保存模型配置
config = {
'input_shape': self.input_shape,
'num_emotions': self.num_emotions,
'num_languages': self.num_languages,
'include_language': self.include_language,
'num_features': self.input_shape[1]
}
with open(os.path.join(save_dir, 'config.pkl'), 'wb') as f:
pickle.dump(config, f)
print(f"模型和配置已保存到: {save_dir}")
return self.history
def evaluate(self, X_test, y_test):
"""
评估模型
Args:
X_test: 测试集特征
y_test: 测试集标签
Returns:
metrics: 评估指标
"""
# 转换为分类格式
y_test_cat = to_categorical(y_test, num_classes=self.num_emotions)
if not self.include_language:
results = self.model.evaluate(X_test, y_test_cat)
metrics = {
'loss': results[0],
'accuracy': results[1]
}
else:
# 示例语言标签
y_test_lang = np.zeros((y_test.shape[0], self.num_languages))
results = self.model.evaluate(
X_test,
{'emotion_output': y_test_cat, 'language_output': y_test_lang}
)
metrics = {
'loss': results[0],
'emotion_accuracy': results[2],
'language_accuracy': results[4]
}
return metrics
def predict(self, X):
"""
预测情感
Args:
X: 输入特征
Returns:
y_pred: 预测标签
y_pred_probs: 预测概率
"""
if not self.include_language:
y_pred_probs = self.model.predict(X)
y_pred = np.argmax(y_pred_probs, axis=1)
return y_pred, y_pred_probs
else:
emotion_probs, _ = self.model.predict(X)
y_pred = np.argmax(emotion_probs, axis=1)
return y_pred, emotion_probs
def plot_training_history(self, save_path=None):
"""
绘制训练历史
Args:
save_path: 保存路径
"""
if self.history is None:
print("请先训练模型")
return
plt.figure(figsize=(12, 4))
# 绘制训练和验证准确率
plt.subplot(1, 2, 1)
if not self.include_language:
plt.plot(self.history.history['accuracy'], label='训练准确率')
plt.plot(self.history.history['val_accuracy'], label='验证准确率')
else:
plt.plot(self.history.history['emotion_output_accuracy'], label='训练准确率')
plt.plot(self.history.history['val_emotion_output_accuracy'], label='验证准确率')
plt.title('模型准确率')
plt.xlabel('Epoch')
plt.ylabel('准确率')
plt.legend()
# 绘制训练和验证损失
plt.subplot(1, 2, 2)
if not self.include_language:
plt.plot(self.history.history['loss'], label='训练损失')
plt.plot(self.history.history['val_loss'], label='验证损失')
else:
plt.plot(self.history.history['emotion_output_loss'], label='训练损失')
plt.plot(self.history.history['val_emotion_output_loss'], label='验证损失')
plt.title('模型损失')
plt.xlabel('Epoch')
plt.ylabel('损失')
plt.legend()
plt.tight_layout()
if save_path:
plt.savefig(save_path)
print(f"训练历史图保存到: {save_path}")
plt.show()
# 测试LSTM模型
try:
# 使用随机生成的数据来测试模型
print("创建并测试LSTM模型...")
# 参数
n_samples = 100
n_features = 193
n_emotions = 6
# 随机生成数据
X_random = np.random.rand(n_samples, 1, n_features) # 已经是LSTM格式 (样本数, 时间步, 特征数)
y_random = np.random.randint(0, n_emotions, size=n_samples)
# 划分数据
X_train, X_val, X_test = X_random[:70], X_random[70:85], X_random[85:]
y_train, y_val, y_test = y_random[:70], y_random[70:85], y_random[85:]
# 创建模型
model = EmotionModel(
input_shape=(X_train.shape[1], X_train.shape[2]), # (时间步, 特征数)
num_emotions=n_emotions
)
# 构建和编译模型
model.build_model()
model.compile_model()
# 打印模型结构
model.model.summary()
print("\n模型构建成功!在实际应用中,接下来应该进行模型训练和评估。")
except Exception as e:
print(f"LSTM模型测试出错: {e}")
### 3.2 在下面空白处写出模型训练及模型评估代码
# In[ ]:
"""
模型训练及评估代码 - 使用真实数据集
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import pickle
import os
import glob
import librosa
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from time import time
def load_ravdess_data(ravdess_path):
"""
加载RAVDESS数据集
Args:
ravdess_path: RAVDESS数据集路径
Returns:
audio_files: 音频文件列表
emotion_labels: 对应的情感标签
"""
# RAVDESS情感标签映射根据文档第三位数字表示情感类别
emotion_map = {
'01': 'neutral',
'03': 'happy',
'04': 'sad',
'05': 'angry',
'06': 'fear',
'08': 'surprise'
}
audio_files = []
emotion_labels = []
# 获取所有Actor目录
actor_dirs = glob.glob(os.path.join(ravdess_path, 'Actor_*'))
for actor_dir in actor_dirs:
# 获取该演员的所有音频文件
wav_files = glob.glob(os.path.join(actor_dir, '*.wav'))
for wav_file in wav_files:
# 从文件名解析情感标签
# 文件名格式: 03-01-01-01-01-01-01.wav
# 位置:模态-声道-情感-强度-语句-重复-演员
filename = os.path.basename(wav_file)
parts = filename.split('-')
# 确保文件名格式正确
if len(parts) == 7:
emotion_code = parts[2]
# 只处理我们关心的6种情感
if emotion_code in emotion_map:
emotion = emotion_map[emotion_code]
audio_files.append(wav_file)
emotion_labels.append(emotion)
return audio_files, emotion_labels
def load_savee_data(savee_path):
"""
加载SAVEE数据集
Args:
savee_path: SAVEE数据集路径
Returns:
audio_files: 音频文件列表
emotion_labels: 对应的情感标签
"""
# SAVEE情感标签映射
emotion_map = {
'a': 'angry',
'd': 'disgust', # 注:我们可能会过滤掉这个情感,因为其他数据集没有
'f': 'fear',
'h': 'happy',
'n': 'neutral',
'sa': 'sad',
'su': 'surprise'
}
audio_files = []
emotion_labels = []
# 获取AudioData目录中的所有演讲者目录
audiodata_path = os.path.join(savee_path, 'AudioData')
speaker_dirs = [d for d in os.listdir(audiodata_path)
if os.path.isdir(os.path.join(audiodata_path, d)) and d != 'Info.txt']
for speaker in speaker_dirs:
speaker_dir = os.path.join(audiodata_path, speaker)
# 获取该演讲者的所有音频文件
wav_files = glob.glob(os.path.join(speaker_dir, '*.wav'))
for wav_file in wav_files:
# 从文件名解析情感标签
# 文件名格式: a01.wav, 其中a表示情感
filename = os.path.basename(wav_file)
# 提取情感代码
emotion_code = ''.join([c for c in filename if not c.isdigit() and c != '.'])
# 只处理我们关心的情感(排除 'disgust' 因为其他数据集没有)
if emotion_code in emotion_map and emotion_map[emotion_code] != 'disgust':
emotion = emotion_map[emotion_code]
audio_files.append(wav_file)
emotion_labels.append(emotion)
return audio_files, emotion_labels
def load_casia_data(casia_path):
"""
加载CASIA数据集
Args:
casia_path: CASIA数据集路径
Returns:
audio_files: 音频文件列表
emotion_labels: 对应的情感标签
"""
# CASIA情感标签映射
emotion_map = {
'angry': 'angry',
'fear': 'fear',
'happy': 'happy',
'neutral': 'neutral',
'sad': 'sad',
'surprise': 'surprise'
}
audio_files = []
emotion_labels = []
# 获取所有说话者目录
speaker_dirs = [d for d in os.listdir(casia_path)
if os.path.isdir(os.path.join(casia_path, d)) and d != '_desktop.ini'
and not d.endswith('.txt')]
for speaker in speaker_dirs:
speaker_path = os.path.join(casia_path, speaker)
# 获取该说话者的所有情感目录
emotion_dirs = [d for d in os.listdir(speaker_path)
if os.path.isdir(os.path.join(speaker_path, d)) and d != '_desktop.ini']
for emotion_dir in emotion_dirs:
# 确保情感在我们的映射中
if emotion_dir in emotion_map:
emotion = emotion_map[emotion_dir]
# 获取该情感目录下的所有音频文件
# CASIA数据集中的音频文件可能是.wav或其他格式
wav_files = glob.glob(os.path.join(speaker_path, emotion_dir, '*.wav'))
# 如果没有.wav文件我们可能需要找其他格式但为简单起见我们只处理.wav
for wav_file in wav_files:
audio_files.append(wav_file)
emotion_labels.append(emotion)
return audio_files, emotion_labels
def load_all_datasets(ravdess_path='./RAVDESS', savee_path='./SAVEE', casia_path='./CAISA'):
"""
加载所有数据集
Args:
ravdess_path: RAVDESS数据集路径
savee_path: SAVEE数据集路径
casia_path: CASIA数据集路径
Returns:
audio_files: 所有音频文件列表
emotion_labels: 对应的情感标签
dataset_labels: 对应的数据集标签
"""
all_audio_files = []
all_emotion_labels = []
all_dataset_labels = []
# 加载RAVDESS数据集
if os.path.exists(ravdess_path):
try:
ravdess_files, ravdess_emotions = load_ravdess_data(ravdess_path)
all_audio_files.extend(ravdess_files)
all_emotion_labels.extend(ravdess_emotions)
all_dataset_labels.extend(['ravdess'] * len(ravdess_files))
print(f"加载了 {len(ravdess_files)} 个RAVDESS音频文件")
except Exception as e:
print(f"加载RAVDESS数据集时出错: {e}")
else:
print(f"RAVDESS路径不存在: {ravdess_path}")
# 加载SAVEE数据集
if os.path.exists(savee_path):
try:
savee_files, savee_emotions = load_savee_data(savee_path)
all_audio_files.extend(savee_files)
all_emotion_labels.extend(savee_emotions)
all_dataset_labels.extend(['savee'] * len(savee_files))
print(f"加载了 {len(savee_files)} 个SAVEE音频文件")
except Exception as e:
print(f"加载SAVEE数据集时出错: {e}")
else:
print(f"SAVEE路径不存在: {savee_path}")
# 加载CASIA数据集
if os.path.exists(casia_path):
try:
casia_files, casia_emotions = load_casia_data(casia_path)
all_audio_files.extend(casia_files)
all_emotion_labels.extend(casia_emotions)
all_dataset_labels.extend(['casia'] * len(casia_files))
print(f"加载了 {len(casia_files)} 个CASIA音频文件")
except Exception as e:
print(f"加载CASIA数据集时出错: {e}")
else:
print(f"CASIA路径不存在: {casia_path}")
if not all_audio_files:
raise ValueError("未能加载任何数据集,请检查数据集路径")
return all_audio_files, all_emotion_labels, all_dataset_labels
def extract_features_from_files(audio_files, max_files=None):
"""
从音频文件中提取特征
Args:
audio_files: 音频文件列表
max_files: 最大处理文件数None表示处理所有文件
Returns:
features_list: 特征字典列表
"""
if max_files is not None:
audio_files = audio_files[:max_files]
features_list = []
successful_files = 0
error_files = 0
for file_path in tqdm(audio_files, desc="提取特征"):
try:
# 确保文件存在
if not os.path.exists(file_path):
print(f"文件不存在: {file_path}")
features_list.append({})
error_files += 1
continue
# 确保是wav文件
if not file_path.lower().endswith('.wav'):
print(f"跳过非wav文件: {file_path}")
features_list.append({})
error_files += 1
continue
# 加载音频
audio, sr = librosa.load(file_path, sr=22050, res_type='kaiser_fast')
# 提取所有特征
features = extract_all_features(audio, sr)
features_list.append(features)
successful_files += 1
# 每处理20个文件打印一次进度
if successful_files % 20 == 0:
print(f"已成功处理 {successful_files} 个文件,失败 {error_files} 个文件")
except Exception as e:
print(f"处理文件 {file_path} 时出错: {e}")
# 添加空特征字典,保持索引一致
features_list.append({})
error_files += 1
print(f"特征提取完成:成功 {successful_files} 个文件,失败 {error_files} 个文件")
return features_list
def train_emotion_model(X_train, y_train, X_val, y_val,
epochs=50, batch_size=32, learning_rate=0.001,
output_dir='output/emotion_model'):
"""
训练情感识别模型
Args:
X_train: 训练集特征
y_train: 训练集标签
X_val: 验证集特征
y_val: 验证集标签
epochs: 训练轮数
batch_size: 批量大小
learning_rate: 学习率
output_dir: 输出目录
Returns:
model: 训练后的模型
"""
# 确保目录存在
os.makedirs(output_dir, exist_ok=True)
# 获取输入形状和类别数
num_emotions = len(np.unique(y_train))
input_shape = (X_train.shape[1], X_train.shape[2]) # (时间步, 特征数)
# 创建模型
model = EmotionModel(input_shape=input_shape, num_emotions=num_emotions)
model.build_model()
model.compile_model(learning_rate=learning_rate)
# 打印模型摘要
model.model.summary()
# 开始训练
start_time = time()
print(f"\n开始训练模型epochs={epochs}, batch_size={batch_size}...")
history = model.train(
X_train, y_train,
X_val, y_val,
epochs=epochs,
batch_size=batch_size,
save_dir=output_dir
)
training_time = time() - start_time
print(f"训练完成,用时: {training_time:.2f}")
# 绘制训练历史
history_path = os.path.join(output_dir, 'training_history.png')
model.plot_training_history(save_path=history_path)
return model
def evaluate_emotion_model(model, X_test, y_test, class_names, output_dir='output/emotion_model'):
"""
评估情感识别模型
Args:
model: 训练好的模型
X_test: 测试集特征
y_test: 测试集标签
class_names: 类别名称
output_dir: 输出目录
Returns:
metrics: 评估指标
"""
# 获取评估指标
metrics = model.evaluate(X_test, y_test)
print("\n模型评估结果:")
for name, value in metrics.items():
print(f" {name}: {value:.4f}")
# 获取预测结果
y_pred, y_pred_probs = model.predict(X_test)
# 保存预测结果
np.save(os.path.join(output_dir, 'y_pred.npy'), y_pred)
np.save(os.path.join(output_dir, 'y_true.npy'), y_test)
# 计算混淆矩阵
cm = confusion_matrix(y_test, y_pred)
# 绘制混淆矩阵
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.xlabel('预测类别')
plt.ylabel('真实类别')
plt.title('混淆矩阵')
# 保存混淆矩阵图
confusion_matrix_path = os.path.join(output_dir, 'confusion_matrix.png')
plt.tight_layout()
plt.savefig(confusion_matrix_path)
plt.show()
# 计算分类报告
report = classification_report(y_test, y_pred, target_names=class_names)
print("\n分类报告:")
print(report)
# 计算每个类别的准确率
accuracy = accuracy_score(y_test, y_pred)
class_accuracies = {}
for i, class_name in enumerate(class_names):
class_mask = (y_test == i)
if np.any(class_mask):
class_acc = accuracy_score(y_test[class_mask], y_pred[class_mask])
class_accuracies[class_name] = class_acc
# 绘制每个类别的准确率
plt.figure(figsize=(12, 6))
classes = list(class_accuracies.keys())
accs = list(class_accuracies.values())
plt.bar(classes, accs, color='skyblue')
plt.axhline(y=accuracy, color='r', linestyle='--', label=f'总体准确率: {accuracy:.4f}')
plt.xlabel('情感类别')
plt.ylabel('准确率')
plt.title('各情感类别准确率')
plt.ylim([0, 1.0])
# 添加数值标签
for i, v in enumerate(accs):
plt.text(i, v + 0.02, f'{v:.2f}', ha='center')
plt.legend()
plt.tight_layout()
plt.show()
return metrics, report, class_accuracies
def run_real_data_pipeline(max_files_per_dataset=None, test_size=0.2, val_size=0.1,
epochs=30, batch_size=32):
"""
运行使用真实数据的训练流程
Args:
max_files_per_dataset: 每个数据集最大处理文件数None表示处理所有文件
test_size: 测试集比例
val_size: 验证集比例
epochs: 训练轮数
batch_size: 批量大小
"""
try:
print("开始使用真实数据训练模型...")
# 加载所有数据集
print("加载数据集...")
# 使用正确的路径
audio_files, emotion_labels, dataset_labels = load_all_datasets(
ravdess_path='./RAVDESS',
savee_path='./SAVEE',
casia_path='./CAISA' # 注意路径与目录名一致
)
# 如果设置了最大文件数,限制数据量
if max_files_per_dataset is not None:
# 按数据集分组
ravdess_mask = np.array(dataset_labels) == 'ravdess'
savee_mask = np.array(dataset_labels) == 'savee'
casia_mask = np.array(dataset_labels) == 'casia'
# 获取每个数据集的索引
ravdess_indices = np.where(ravdess_mask)[0]
savee_indices = np.where(savee_mask)[0]
casia_indices = np.where(casia_mask)[0]
# 限制每个数据集的文件数
if len(ravdess_indices) > max_files_per_dataset:
# 随机选择文件以获得更均衡的样本
np.random.seed(42) # 设置随机种子以确保可重复性
ravdess_indices = np.random.choice(ravdess_indices, max_files_per_dataset, replace=False)
if len(savee_indices) > max_files_per_dataset:
np.random.seed(42)
savee_indices = np.random.choice(savee_indices, max_files_per_dataset, replace=False)
if len(casia_indices) > max_files_per_dataset:
np.random.seed(42)
casia_indices = np.random.choice(casia_indices, max_files_per_dataset, replace=False)
# 合并所有选择的索引
all_indices = []
if len(ravdess_indices) > 0:
all_indices.append(ravdess_indices)
if len(savee_indices) > 0:
all_indices.append(savee_indices)
if len(casia_indices) > 0:
all_indices.append(casia_indices)
if all_indices:
selected_indices = np.concatenate(all_indices)
# 筛选数据
audio_files = [audio_files[i] for i in selected_indices]
emotion_labels = [emotion_labels[i] for i in selected_indices]
dataset_labels = [dataset_labels[i] for i in selected_indices]
print(f"限制后的数据集大小: {len(audio_files)} 个文件")
# 显示情感分布
emotion_counts = pd.Series(emotion_labels).value_counts()
print("\n情感分布:")
for emotion, count in emotion_counts.items():
print(f" {emotion}: {count} 个样本")
# 显示数据集分布
dataset_counts = pd.Series(dataset_labels).value_counts()
print("\n数据集分布:")
for dataset, count in dataset_counts.items():
print(f" {dataset}: {count} 个样本")
# 提取特征
print("\n开始提取特征...")
features_list = extract_features_from_files(audio_files)
# 移除可能的空特征字典
valid_indices = [i for i, f in enumerate(features_list) if f]
valid_features = [features_list[i] for i in valid_indices]
valid_emotions = [emotion_labels[i] for i in valid_indices]
valid_datasets = [dataset_labels[i] for i in valid_indices]
print(f"\n有效样本数: {len(valid_features)}/{len(features_list)}")
# 显示有效样本的分布
valid_emotion_counts = pd.Series(valid_emotions).value_counts()
print("\n有效样本情感分布:")
for emotion, count in valid_emotion_counts.items():
print(f" {emotion}: {count} 个样本")
valid_dataset_counts = pd.Series(valid_datasets).value_counts()
print("\n有效样本数据集分布:")
for dataset, count in valid_dataset_counts.items():
print(f" {dataset}: {count} 个样本")
# 如果有效样本太少,停止处理
if len(valid_features) < 10:
print("有效样本太少,无法继续处理")
return
# 转换为特征矩阵
print("\n转换为特征矩阵...")
X, feature_names = features_to_matrix(valid_features)
print(f"特征矩阵形状: {X.shape}")
# 标签编码
y, label_encoder = encode_labels(valid_emotions)
class_names = label_encoder.classes_
print(f"类别: {class_names}")
# 划分数据集
print("\n划分数据集...")
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(
X, y, test_size=test_size, val_size=val_size)
print(f"数据集划分:")
print(f" 训练集: {X_train.shape[0]} 样本")
print(f" 验证集: {X_val.shape[0]} 样本")
print(f" 测试集: {X_test.shape[0]} 样本")
# 标准化特征
print("\n标准化特征...")
X_train_norm, X_val_norm, X_test_norm, scaler = standardize_features(
X_train, X_val, X_test)
# 重塑为LSTM输入格式
print("\n重塑为LSTM输入格式...")
X_train_reshaped = reshape_for_lstm(X_train_norm)
X_val_reshaped = reshape_for_lstm(X_val_norm)
X_test_reshaped = reshape_for_lstm(X_test_norm)
print(f"LSTM输入形状:")
print(f" 训练集: {X_train_reshaped.shape}")
print(f" 验证集: {X_val_reshaped.shape}")
print(f" 测试集: {X_test_reshaped.shape}")
# 保存预处理对象
print("\n保存预处理对象...")
save_preprocessing_objects(scaler, label_encoder, feature_names)
# 训练模型
print("\n开始训练模型...")
model = train_emotion_model(
X_train_reshaped, y_train,
X_val_reshaped, y_val,
epochs=epochs,
batch_size=batch_size
)
# 评估模型
print("\n评估模型...")
evaluate_emotion_model(
model, X_test_reshaped, y_test, class_names
)
print("\n使用真实数据的训练流程完成")
except Exception as e:
print(f"训练流程出错: {e}")
import traceback
traceback.print_exc()
# 运行真实数据训练流程,限制每个数据集的样本数以加快处理速度
# 实际应用时可以增加样本数或设为None使用所有样本
run_real_data_pipeline(max_files_per_dataset=50) # 每个数据集最多使用50个样本