computers-sound-music-portf.../code/adaptive-tone-control/main.py

from scipy.io import wavfile as wav
import numpy as np
import sys

print("Portfolio Object 2: Adaptive Tone Control")

# Ingest the wave signal, and convert to mono if needed
file = "alien.wav"

if len(sys.argv) > 1:
    file = sys.argv[1]

sample_rate, wav_signal = wav.read(file)
if wav_signal.ndim > 1:
    wav_signal = np.mean(wav_signal, axis=1)

# Constants
bands = {"low": (0, 300), "mid": (300, 2000), "high": (2000, 20000)}
window_size = (
    1024  # This seems like a fine choice, but hard to know one way or the other
)
hop_size = window_size // 2  # Overlap window by 1/2 of previous
num_frames = (
    len(wav_signal) - window_size
) // hop_size + 1  # discrete window frames in the signal length

reconstructed_signal = np.zeros(len(wav_signal))

# FFT to get energy at an arbitrart window
fft_values = np.fft.fft(wav_signal[:window_size])
fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)


def band_energy(band, fft_values, fft_freqs):
    idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs <= band[1]))[0]
    return np.sum(np.abs(fft_values[idx_band]) ** 2)


# Calculate and display the band energy results for this given window
energy_low = band_energy(bands["low"], fft_values, fft_freqs)
energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
energy_high = band_energy(bands["high"], fft_values, fft_freqs)
avg_energy = (energy_low + energy_mid + energy_high) / 3


print(f"low {energy_low:.2e}")
print(f"mid {energy_mid:.2e}")
print(f"high {energy_high:.2e}")
print(f"avg {avg_energy:.2e}")


# Adjust the fft_value of all frequencies in the given band by a factor of the gain. This could be > 0 or < 0
def adjust(target_energy, current_energy, fft_values, band):
    idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs < band[1]))[0]
    gain = np.sqrt(target_energy / (current_energy + 1e-6))
    fft_values[idx_band] *= gain


# For each window in the sample, we need to calculate, then adjust the low, medium, and hight band energies
for i in range(num_frames):

    # Window bounds and window frame contents
    start_idx = i * hop_size
    end_idx = start_idx + window_size
    frame = wav_signal[start_idx:end_idx] * np.hanning(
        min(window_size, end_idx - start_idx)
    )

    # Calculate FFT
    fft_values = np.fft.fft(frame)
    fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate)

    energy_low = band_energy(bands["low"], fft_values, fft_freqs)
    energy_mid = band_energy(bands["mid"], fft_values, fft_freqs)
    energy_high = band_energy(bands["high"], fft_values, fft_freqs)
    avg_energy = (energy_low + energy_mid + energy_high) / 3

    adjust(avg_energy, energy_low, fft_values, bands["low"])
    adjust(avg_energy, energy_mid, fft_values, bands["mid"])
    adjust(avg_energy, energy_high, fft_values, bands["high"])

    # Now, FFT values have been modified in place, up or down. We can inverse FFT
    adjusted_frame = np.fft.ifft(fft_values).real

    # Put the signal back together, frame by frame
    reconstructed_signal[start_idx:end_idx] += adjusted_frame * np.hanning(window_size)

reconstructed_signal = np.int16(
    reconstructed_signal / np.max(np.abs(reconstructed_signal)) * np.iinfo(np.int16).max
)

assert len(wav_signal) == len(reconstructed_signal)  # Sanity check

output_file = "adj-" + file
wav.write(output_file, sample_rate, reconstructed_signal)
print(f"Adjusted audio written to {output_file}")