97 lines
3.3 KiB
Python
97 lines
3.3 KiB
Python
from scipy.io import wavfile as wav
|
|
import numpy as np
|
|
import sys
|
|
|
|
print("Portfolio Object 2: Adaptive Tone Control")
|
|
|
|
# Ingest the wave signal, and convert to mono if needed
|
|
file = "alien.wav"
|
|
|
|
if len(sys.argv) > 1:
|
|
file = sys.argv[1]
|
|
|
|
sample_rate, wav_signal = wav.read(file)
|
|
if wav_signal.ndim > 1:
|
|
wav_signal = np.mean(wav_signal, axis=1)
|
|
|
|
# Constants
|
|
bands = {"low": (0, 300), "mid": (300, 2000), "high": (2000, 20000)}
|
|
window_size = (
|
|
1024 # This seems like a fine choice, but hard to know one way or the other
|
|
)
|
|
hop_size = window_size // 2 # Overlap window by 1/2 of previous
|
|
num_frames = (
|
|
len(wav_signal) - window_size
|
|
) // hop_size + 1 # discrete window frames in the signal length
|
|
|
|
reconstructed_signal = np.zeros(len(wav_signal))
|
|
|
|
# FFT to get energy at an arbitrart window
|
|
fft_values = np.fft.fft(wav_signal[:window_size])
|
|
fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)
|
|
|
|
|
|
def band_energy(band, fft_values, fft_freqs):
|
|
idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs <= band[1]))[0]
|
|
return np.sum(np.abs(fft_values[idx_band]) ** 2)
|
|
|
|
|
|
# Calculate and display the band energy results for this given window
|
|
energy_low = band_energy(bands["low"], fft_values, fft_freqs)
|
|
energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
|
|
energy_high = band_energy(bands["high"], fft_values, fft_freqs)
|
|
avg_energy = (energy_low + energy_mid + energy_high) / 3
|
|
|
|
|
|
print(f"low {energy_low:.2e}")
|
|
print(f"mid {energy_mid:.2e}")
|
|
print(f"high {energy_high:.2e}")
|
|
print(f"avg {avg_energy:.2e}")
|
|
|
|
|
|
# Adjust the fft_value of all frequencies in the given band by a factor of the gain. This could be > 0 or < 0
|
|
def adjust(target_energy, current_energy, fft_values, band):
|
|
idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs < band[1]))[0]
|
|
gain = np.sqrt(target_energy / (current_energy + 1e-6))
|
|
fft_values[idx_band] *= gain
|
|
|
|
|
|
# For each window in the sample, we need to calculate, then adjust the low, medium, and hight band energies
|
|
for i in range(num_frames):
|
|
|
|
# Window bounds and window frame contents
|
|
start_idx = i * hop_size
|
|
end_idx = start_idx + window_size
|
|
frame = wav_signal[start_idx:end_idx] * np.hanning(
|
|
min(window_size, end_idx - start_idx)
|
|
)
|
|
|
|
# Calculate FFT
|
|
fft_values = np.fft.fft(frame)
|
|
fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)
|
|
|
|
energy_low = band_energy(bands["low"], fft_values, fft_freqs)
|
|
energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
|
|
energy_high = band_energy(bands["high"], fft_values, fft_freqs)
|
|
avg_energy = (energy_low + energy_mid + energy_high) / 3
|
|
|
|
adjust(avg_energy, energy_low, fft_values, bands["low"])
|
|
adjust(avg_energy, energy_mid, fft_values, bands["mid"])
|
|
adjust(avg_energy, energy_high, fft_values, bands["high"])
|
|
|
|
# Now, FFT values have been modified in place, up or down. We can inverse FFT
|
|
adjusted_frame = np.fft.ifft(fft_values).real
|
|
|
|
# Put the signal back together, frame by frame
|
|
reconstructed_signal[start_idx:end_idx] += adjusted_frame * np.hanning(window_size)
|
|
|
|
reconstructed_signal = np.int16(
|
|
reconstructed_signal / np.max(np.abs(reconstructed_signal)) * np.iinfo(np.int16).max
|
|
)
|
|
|
|
assert len(wav_signal) == len(reconstructed_signal) # Sanity check
|
|
|
|
output_file = "adj-" + file
|
|
wav.write(output_file, sample_rate, reconstructed_signal)
|
|
print(f"Adjusted audio written to {output_file}")
|