This repository has been archived on 2025-04-28. You can view files and clone it, but cannot push or open issues or pull requests.
computers-sound-music-portf.../code/adaptive-tone-control/main.py
David Westgate fba6ccacdb touch ups
2024-12-11 09:54:49 -08:00

97 lines
3.3 KiB
Python

from scipy.io import wavfile as wav
import numpy as np
import sys
print("Portfolio Object 2: Adaptive Tone Control")
# Ingest the wave signal, and convert to mono if needed
file = "alien.wav"
if len(sys.argv) > 1:
file = sys.argv[1]
sample_rate, wav_signal = wav.read(file)
if wav_signal.ndim > 1:
wav_signal = np.mean(wav_signal, axis=1)
# Constants
bands = {"low": (0, 300), "mid": (300, 2000), "high": (2000, 20000)}
window_size = (
1024 # This seems like a fine choice, but hard to know one way or the other
)
hop_size = window_size // 2 # Overlap window by 1/2 of previous
num_frames = (
len(wav_signal) - window_size
) // hop_size + 1 # discrete window frames in the signal length
reconstructed_signal = np.zeros(len(wav_signal))
# FFT to get energy at an arbitrart window
fft_values = np.fft.fft(wav_signal[:window_size])
fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)
def band_energy(band, fft_values, fft_freqs):
idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs <= band[1]))[0]
return np.sum(np.abs(fft_values[idx_band]) ** 2)
# Calculate and display the band energy results for this given window
energy_low = band_energy(bands["low"], fft_values, fft_freqs)
energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
energy_high = band_energy(bands["high"], fft_values, fft_freqs)
avg_energy = (energy_low + energy_mid + energy_high) / 3
print(f"low {energy_low:.2e}")
print(f"mid {energy_mid:.2e}")
print(f"high {energy_high:.2e}")
print(f"avg {avg_energy:.2e}")
# Adjust the fft_value of all frequencies in the given band by a factor of the gain. This could be > 0 or < 0
def adjust(target_energy, current_energy, fft_values, band):
idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs < band[1]))[0]
gain = np.sqrt(target_energy / (current_energy + 1e-6))
fft_values[idx_band] *= gain
# For each window in the sample, we need to calculate, then adjust the low, medium, and hight band energies
for i in range(num_frames):
# Window bounds and window frame contents
start_idx = i * hop_size
end_idx = start_idx + window_size
frame = wav_signal[start_idx:end_idx] * np.hanning(
min(window_size, end_idx - start_idx)
)
# Calculate FFT
fft_values = np.fft.fft(frame)
fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)
energy_low = band_energy(bands["low"], fft_values, fft_freqs)
energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
energy_high = band_energy(bands["high"], fft_values, fft_freqs)
avg_energy = (energy_low + energy_mid + energy_high) / 3
adjust(avg_energy, energy_low, fft_values, bands["low"])
adjust(avg_energy, energy_mid, fft_values, bands["mid"])
adjust(avg_energy, energy_high, fft_values, bands["high"])
# Now, FFT values have been modified in place, up or down. We can inverse FFT
adjusted_frame = np.fft.ifft(fft_values).real
# Put the signal back together, frame by frame
reconstructed_signal[start_idx:end_idx] += adjusted_frame * np.hanning(window_size)
reconstructed_signal = np.int16(
reconstructed_signal / np.max(np.abs(reconstructed_signal)) * np.iinfo(np.int16).max
)
assert len(wav_signal) == len(reconstructed_signal) # Sanity check
output_file = "adj-" + file
wav.write(output_file, sample_rate, reconstructed_signal)
print(f"Adjusted audio written to {output_file}")