from scipy.io import wavfile as wav import numpy as np import sys print("Portfolio Object 2: Adaptive Tone Control") # Ingest the wave signal, and convert to mono if needed file = "alien.wav" if len(sys.argv) > 1: file = sys.argv[1] sample_rate, wav_signal = wav.read(file) if wav_signal.ndim > 1: wav_signal = np.mean(wav_signal, axis=1) # Constants bands = {"low": (0, 300), "mid": (300, 2000), "high": (2000, 20000)} window_size = ( 1024 # This seems like a fine choice, but hard to know one way or the other ) hop_size = window_size // 2 # Overlap window by 1/2 of previous num_frames = ( len(wav_signal) - window_size ) // hop_size + 1 # discrete window frames in the signal length reconstructed_signal = np.zeros(len(wav_signal)) # FFT to get energy at an arbitrart window fft_values = np.fft.fft(wav_signal[:window_size]) fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate) def band_energy(band, fft_values, fft_freqs): idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs <= band[1]))[0] return np.sum(np.abs(fft_values[idx_band]) ** 2) # Calculate and display the band energy results for this given window energy_low = band_energy(bands["low"], fft_values, fft_freqs) energy_mid = band_energy(bands["mid"], fft_values, fft_freqs) energy_high = band_energy(bands["high"], fft_values, fft_freqs) avg_energy = (energy_low + energy_mid + energy_high) / 3 print(f"low {energy_low:.2e}") print(f"mid {energy_mid:.2e}") print(f"high {energy_high:.2e}") print(f"avg {avg_energy:.2e}") # Adjust the fft_value of all frequencies in the given band by a factor of the gain. This could be > 0 or < 0 def adjust(target_energy, current_energy, fft_values, band): idx_band = np.where((fft_freqs >= band[0]) & (fft_freqs < band[1]))[0] gain = np.sqrt(target_energy / (current_energy + 1e-6)) fft_values[idx_band] *= gain # For each window in the sample, we need to calculate, then adjust the low, medium, and hight band energies for i in range(num_frames): # Window bounds and window frame contents start_idx = i * hop_size end_idx = start_idx + window_size frame = wav_signal[start_idx:end_idx] * np.hanning( min(window_size, end_idx - start_idx) ) # Calculate FFT fft_values = np.fft.fft(frame) fft_freqs = np.fft.fftfreq(window_size, 1 / sample_rate) energy_low = band_energy(bands["low"], fft_values, fft_freqs) energy_mid = band_energy(bands["mid"], fft_values, fft_freqs) energy_high = band_energy(bands["high"], fft_values, fft_freqs) avg_energy = (energy_low + energy_mid + energy_high) / 3 adjust(avg_energy, energy_low, fft_values, bands["low"]) adjust(avg_energy, energy_mid, fft_values, bands["mid"]) adjust(avg_energy, energy_high, fft_values, bands["high"]) # Now, FFT values have been modified in place, up or down. We can inverse FFT adjusted_frame = np.fft.ifft(fft_values).real # Put the signal back together, frame by frame reconstructed_signal[start_idx:end_idx] += adjusted_frame * np.hanning(window_size) reconstructed_signal = np.int16( reconstructed_signal / np.max(np.abs(reconstructed_signal)) * np.iinfo(np.int16).max ) assert len(wav_signal) == len(reconstructed_signal) # Sanity check output_file = "adj-" + file wav.write(output_file, sample_rate, reconstructed_signal) print(f"Adjusted audio written to {output_file}")