Based on research and Android ML frameworks, here's the complete Kotlin implementation for extracting stress levels from WAV files:
Complete Android Kotlin Implementation
Stage 1: Audio File Processing
class AudioProcessor @Inject constructor() {
/**
* Load and preprocess WAV file for stress analysis
*/
suspend fun loadWavFile(filePath: String): FloatArray = withContext(Dispatchers.IO) {
try {
// Use Android MediaMetadataRetriever or custom WAV parser
val audioData = parseWavFile(filePath)
// Normalize to 16kHz mono if needed
val normalized = normalizeAudio(audioData, targetSampleRate = 16000)
// Remove silence and apply noise reduction
val cleaned = removeNoiseAndSilence(normalized)
return@withContext cleaned
} catch (e: Exception) {
throw AudioProcessingException("Failed to load WAV file: $filePath", e)
}
}
/**
* Parse WAV file manually (since Android doesn't have librosa)
*/
private fun parseWavFile(filePath: String): FloatArray {
return FileInputStream(filePath).use { inputStream ->
val wavHeader = ByteArray(44) // WAV header is 44 bytes
inputStream.read(wavHeader)
// Parse WAV header to get audio parameters
val sampleRate = parseWavHeader(wavHeader)
// Read audio data
val audioBytes = inputStream.readBytes()
// Convert bytes to float array (16-bit PCM to float)
convertBytesToFloat(audioBytes)
}
}
/**
* Convert 16-bit PCM bytes to normalized float array
*/
private fun convertBytesToFloat(bytes: ByteArray): FloatArray {
val floats = FloatArray(bytes.size / 2)
val buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)
for (i in floats.indices) {
floats[i] = buffer.short.toFloat() / Short.MAX_VALUE.toFloat()
}
return floats
}
/**
* Apply basic noise reduction and normalization
*/
private fun removeNoiseAndSilence(audio: FloatArray): FloatArray {
// Simple noise gate
val threshold = 0.01f
val filtered = audio.map { sample ->
if (abs(sample) < threshold) 0f else sample
}.toFloatArray()
// Normalize amplitude
val maxAbs = filtered.maxOfOrNull { abs(it) } ?: 1f
return if (maxAbs > 0) {
filtered.map { it / maxAbs }.toFloatArray()
} else filtered
}
}
class MFCCExtractor @Inject constructor() {
private val sampleRate = 16000
private val frameSize = 512
private val hopLength = 256
private val numMFCC = 13
private val numMelBands = 26
/**
* Extract MFCC features from audio using pure Kotlin implementation
* No external libraries required
*/
suspend fun extractMFCCFeatures(audio: FloatArray): FloatArray =
withContext(Dispatchers.Default) {
// 1. Pre-emphasis filter
val preEmphasized = applyPreEmphasis(audio)
// 2. Windowing and FFT
val frames = createFrames(preEmphasized, frameSize, hopLength)
val spectrogram = computeSpectrogram(frames)
// 3. Mel filter bank
val melSpectrogram = applyMelFilterBank(spectrogram)
// 4. DCT to get MFCC
val mfccCoefficients = computeDCT(melSpectrogram)
// 5. Add delta and delta-delta features (39 total)
val deltaFeatures = computeDeltaFeatures(mfccCoefficients)
val deltaDeltaFeatures = computeDeltaFeatures(deltaFeatures)
// Combine all features (13 MFCC + 13 delta + 13 delta-delta = 39)
return@withContext combineFeatures(mfccCoefficients, deltaFeatures, deltaDeltaFeatures)
}
/**
* Apply pre-emphasis filter (high-pass filter)
*/
private fun applyPreEmphasis(audio: FloatArray, alpha: Float = 0.97f): FloatArray {
val filtered = FloatArray(audio.size)
filtered[0] = audio[0]
for (i in 1 until audio.size) {
filtered[i] = audio[i] - alpha * audio[i - 1]
}
return filtered
}
/**
* Create overlapping frames from audio
*/
private fun createFrames(audio: FloatArray, frameSize: Int, hopLength: Int): Array<FloatArray> {
val numFrames = (audio.size - frameSize) / hopLength + 1
val frames = Array(numFrames) { FloatArray(frameSize) }
for (i in 0 until numFrames) {
val startIndex = i * hopLength
System.arraycopy(audio, startIndex, frames[i], 0, frameSize)
}
return frames
}
/**
* Compute FFT spectrogram using Android's built-in FFT or custom implementation
*/
private fun computeSpectrogram(frames: Array<FloatArray>): Array<FloatArray> {
return frames.map { frame ->
// Apply Hamming window
val windowed = applyHammingWindow(frame)
// Compute FFT magnitude
val fftResult = computeFFT(windowed)
// Return magnitude spectrum
fftResult.map { abs(it) }.toFloatArray()
}.toTypedArray()
}
/**
* Apply Hamming window function
*/
private fun applyHammingWindow(frame: FloatArray): FloatArray {
val windowed = FloatArray(frame.size)
val N = frame.size.toDouble()
for (i in frame.indices) {
val window = 0.54 - 0.46 * cos(2.0 * PI * i / (N - 1))
windowed[i] = (frame[i] * window).toFloat()
}
return windowed
}
/**
* Simple FFT implementation (or use Android's FFT libraries)
*/
private fun computeFFT(signal: FloatArray): Array<Complex> {
// Implement FFT or use Android's built-in FFT
// For simplicity, using basic DFT approach
val N = signal.size
val result = Array(N / 2 + 1) { Complex(0.0, 0.0) }
for (k in result.indices) {
var real = 0.0
var imag = 0.0
for (n in signal.indices) {
val angle = -2.0 * PI * k * n / N
real += signal[n] * cos(angle)
imag += signal[n] * sin(angle)
}
result[k] = Complex(real, imag)
}
return result
}
/**
* Apply Mel filter bank to convert spectrogram to mel scale
*/
private fun applyMelFilterBank(spectrogram: Array<FloatArray>): Array<FloatArray> {
// Create mel filter bank
val melFilters = createMelFilterBank(numMelBands, spectrogram[0].size, sampleRate)
return spectrogram.map { spectrum ->
val melSpectrum = FloatArray(numMelBands)
for (i in 0 until numMelBands) {
var sum = 0f
for (j in spectrum.indices) {
sum += spectrum[j] * melFilters[i][j]
}
melSpectrum[i] = ln(sum + 1e-10f) // Log mel spectrum
}
melSpectrum
}.toTypedArray()
}
/**
* Create mel filter bank
*/
private fun createMelFilterBank(numFilters: Int, fftSize: Int, sampleRate: Int): Array<FloatArray> {
val filters = Array(numFilters) { FloatArray(fftSize) }
// Convert frequency to mel scale
fun hzToMel(hz: Float) = 2595f * log10(1f + hz / 700f)
fun melToHz(mel: Float) = 700f * (10f.pow(mel / 2595f) - 1f)
val lowFreqMel = hzToMel(0f)
val highFreqMel = hzToMel(sampleRate / 2f)
val melPoints = FloatArray(numFilters + 2) { i ->
lowFreqMel + i * (highFreqMel - lowFreqMel) / (numFilters + 1)
}
val hzPoints = melPoints.map { melToHz(it) }
val binIndices = hzPoints.map { (it * fftSize / sampleRate).toInt() }
for (i in 1..numFilters) {
val left = binIndices[i - 1]
val center = binIndices[i]
val right = binIndices[i + 1]
// Create triangular filter
for (j in left until center) {
filters[i - 1][j] = (j - left).toFloat() / (center - left)
}
for (j in center until right) {
filters[i - 1][j] = (right - j).toFloat() / (right - center)
}
}
return filters
}
/**
* Compute Discrete Cosine Transform (DCT) to get MFCC
*/
private fun computeDCT(melSpectrogram: Array<FloatArray>): Array<FloatArray> {
return melSpectrogram.map { melFrame ->
val mfcc = FloatArray(numMFCC)
for (i in 0 until numMFCC) {
var sum = 0f
for (j in melFrame.indices) {
sum += melFrame[j] * cos(i * (j + 0.5) * PI / melFrame.size).toFloat()
}
mfcc[i] = sum
}
mfcc
}.toTypedArray()
}
/**
* Compute delta features (derivatives)
*/
private fun computeDeltaFeatures(features: Array<FloatArray>): Array<FloatArray> {
val deltaFeatures = Array(features.size) { FloatArray(features[0].size) }
for (t in features.indices) {
for (f in features[t].indices) {
val prev = if (t > 0) features[t - 1][f] else features[t][f]
val next = if (t < features.size - 1) features[t + 1][f] else features[t][f]
deltaFeatures[t][f] = (next - prev) / 2f
}
}
return deltaFeatures
}
/**
* Combine MFCC, delta, and delta-delta features
*/
private fun combineFeatures(
mfcc: Array<FloatArray>,
delta: Array<FloatArray>,
deltaDelta: Array<FloatArray>
): FloatArray {
val totalFeatures = mfcc.size * (mfcc[0].size * 3) // 39 features per frame
val combined = FloatArray(totalFeatures)
var index = 0
for (t in mfcc.indices) {
// Add MFCC features
for (f in mfcc[t]) combined[index++] = f
// Add delta features
for (f in delta[t]) combined[index++] = f
// Add delta-delta features
for (f in deltaDelta[t]) combined[index++] = f
}
return combined
}
}
// Complex number class for FFT
data class Complex(val real: Double, val imag: Double) {
fun abs() = sqrt(real * real + imag * imag)
}
Stage 3: ECAPA-TDNN Model Implementation (TensorFlow Lite)
class ECAPAStressEngine @Inject constructor(
private val modelLoader: ModelLoader,
private val performanceMonitor: PerformanceMonitor
) {
private var ecapaInterpreter: Interpreter? = null
private var stressClassifier: Interpreter? = null
suspend fun initialize() = withContext(Dispatchers.IO) {
try {
// Load pre-trained ECAPA-TDNN models (converted to TFLite)
ecapaInterpreter = modelLoader.createInterpreter("models/ecapa_tdnn_stress.tflite")
stressClassifier = modelLoader.createInterpreter("models/stress_classifier.tflite")
Log.i("ECAPAEngine", "Models loaded successfully")
} catch (e: Exception) {
throw ModelLoadingException("Failed to initialize ECAPA models", e)
}
}
/**
* Extract speaker embeddings adapted for stress patterns
*/
suspend fun extractStressEmbeddings(mfccFeatures: FloatArray): FloatArray =
withContext(Dispatchers.Default) {
val session = performanceMonitor.startTiming("ecapa_embedding_extraction")
try {
// Prepare input tensor (reshape MFCC features for ECAPA input)
val inputShape = intArrayOf(1, mfccFeatures.size) // Batch size 1
val inputBuffer = ByteBuffer.allocateDirect(mfccFeatures.size * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.apply { put(mfccFeatures) }
// Prepare output tensor (192-dimensional embeddings)
val outputBuffer = ByteBuffer.allocateDirect(192 * 4)
.order(ByteOrder.nativeOrder())
// Run ECAPA-TDNN inference
ecapaInterpreter?.run(inputBuffer, outputBuffer)
// Extract embeddings
val embeddings = FloatArray(192)
outputBuffer.rewind()
outputBuffer.asFloatBuffer().get(embeddings)
session.endWithResult("Embeddings extracted: ${embeddings.size} dimensions")
return@withContext embeddings
} catch (e: Exception) {
session.end()
throw StressAnalysisException("ECAPA embedding extraction failed", e)
}
}
/**
* Classify stress level from embeddings
*/
suspend fun classifyStress(embeddings: FloatArray): StressClassification =
withContext(Dispatchers.Default) {
val session = performanceMonitor.startTiming("stress_classification")
try {
// Prepare input (192-dimensional embeddings)
val inputBuffer = ByteBuffer.allocateDirect(embeddings.size * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.apply { put(embeddings) }
// Prepare output (10 stress levels probability)
val outputBuffer = ByteBuffer.allocateDirect(10 * 4)
.order(ByteOrder.nativeOrder())
// Run stress classification
stressClassifier?.run(inputBuffer, outputBuffer)
// Parse results
val probabilities = FloatArray(10)
outputBuffer.rewind()
outputBuffer.asFloatBuffer().get(probabilities)
// Get predicted stress level (1-10)
val maxIndex = probabilities.indices.maxByOrNull { probabilities[it] } ?: 0
val predictedLevel = maxIndex + 1 // Convert 0-based to 1-based
val confidence = probabilities[maxIndex]
val result = StressClassification(
level = predictedLevel,
confidence = confidence,
probabilities = probabilities.copyOf(),
processingTimeMs = session.end()
)
Log.d("ECAPAEngine", "Stress level: $predictedLevel/10, Confidence: ${(confidence * 100).toInt()}%")
return@withContext result
} catch (e: Exception) {
session.end()
throw StressAnalysisException("Stress classification failed", e)
}
}
}
data class StressClassification(
val level: Int, // Stress level 1-10
val confidence: Float, // Confidence 0.0-1.0
val probabilities: FloatArray, // Probability distribution
val processingTimeMs: Long
)
Stage 4: Complete Stress Analysis Pipeline
class StressAnalysisManager @Inject constructor(
private val audioProcessor: AudioProcessor,
private val mfccExtractor: MFCCExtractor,
private val ecapaEngine: ECAPAStressEngine,
private val repository: LocalStressRepository
) {
/**
* Complete pipeline: WAV file → Stress level (1-10)
*/
suspend fun analyzeWavFile(wavFilePath: String): StressAnalysisResult =
withContext(Dispatchers.Default) {
val totalStartTime = System.currentTimeMillis()
try {
Log.i("StressAnalysis", "Starting analysis of: $wavFilePath")
// Stage 1: Load and preprocess WAV file
val audioData = audioProcessor.loadWavFile(wavFilePath)
Log.d("StressAnalysis", "Audio loaded: ${audioData.size} samples")
// Stage 2: Extract MFCC features
val mfccFeatures = mfccExtractor.extractMFCCFeatures(audioData)
Log.d("StressAnalysis", "MFCC extracted: ${mfccFeatures.size} features")
// Stage 3: Extract stress-related embeddings using ECAPA-TDNN
val embeddings = ecapaEngine.extractStressEmbeddings(mfccFeatures)
Log.d("StressAnalysis", "Embeddings extracted: ${embeddings.size} dimensions")
// Stage 4: Classify stress level
val classification = ecapaEngine.classifyStress(embeddings)
// Stage 5: Generate recommendations
val recommendations = generateStressRecommendations(
classification.level,
classification.confidence
)
// Create final result
val totalProcessingTime = System.currentTimeMillis() - totalStartTime
val result = StressAnalysisResult(
id = UUID.randomUUID().toString(),
stressLevel = classification.level,
confidence = classification.confidence,
recommendations = recommendations,
audioFilePath = wavFilePath,
timestamp = System.currentTimeMillis(),
processingTimeMs = totalProcessingTime
)
// Save result
repository.saveStressAssessment(result)
Log.i("StressAnalysis",
"Analysis complete: Level ${classification.level}/10, " +
"Confidence ${(classification.confidence * 100).toInt()}%, " +
"Time: ${totalProcessingTime}ms")
return@withContext result
} catch (e: Exception) {
Log.e("StressAnalysis", "Analysis failed for: $wavFilePath", e)
throw StressAnalysisException("Failed to analyze audio file", e)
}
}
/**
* Generate contextual recommendations based on stress level
*/
private fun generateStressRecommendations(stressLevel: Int, confidence: Float): List<String> {
val recommendations = mutableListOf<String>()
// Add confidence-based feedback
if (confidence < 0.7f) {
recommendations.add("Analysis confidence is lower than usual. Consider recording in a quieter environment.")
}
// Add stress-level specific recommendations
when {
stressLevel <= 3 -> {
recommendations.addAll(listOf(
"You appear calm and relaxed. Great job managing stress!",
"Continue with your current stress management strategies."
))
}
stressLevel <= 5 -> {
recommendations.addAll(listOf(
"Mild stress detected. This is normal and manageable.",
"Try the 4-7-8 breathing technique: inhale 4, hold 7, exhale 8.",
"Consider taking a short break if possible."
))
}
stressLevel <= 7 -> {
recommendations.addAll(listOf(
"Moderate stress detected. Time for some stress relief.",
"Try progressive muscle relaxation or brief meditation.",
"Consider stepping away from stressful tasks for a few minutes."
))
}
else -> {
recommendations.addAll(listOf(
"Higher stress level detected. Please prioritize stress management.",
"Practice deep breathing exercises immediately.",
"Consider speaking with a healthcare professional if stress persists."
))
}
}
return recommendations
}
}
/**
* Usage example
*/
class MainActivity : ComponentActivity() {
@Inject lateinit var stressAnalysisManager: StressAnalysisManager
private fun analyzeAudioFile() {
lifecycleScope.launch {
try {
val wavFilePath = "/storage/emulated/0/Download/voice_sample.wav"
val result = stressAnalysisManager.analyzeWavFile(wavFilePath)
// Display results
showStressResult(result)
} catch (e: Exception) {
Log.e("MainActivity", "Analysis failed", e)
showError("Analysis failed: ${e.message}")
}
}
}
}