|
|
import gradio as gr |
|
|
import soundfile as sf |
|
|
import tempfile |
|
|
import torch |
|
|
import numpy as np |
|
|
from NovaSR import FastSR |
|
|
|
|
|
|
|
|
upsampler = FastSR() |
|
|
upsampler.model.float() |
|
|
TARGET_SR = 48000 |
|
|
|
|
|
|
|
|
def super_resolve(audio): |
|
|
""" |
|
|
audio: tuple (sample_rate, numpy_array) from Gradio |
|
|
""" |
|
|
if audio is None: |
|
|
return None |
|
|
|
|
|
sr, wav = audio |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
|
|
sf.write(f.name, wav, sr) |
|
|
temp_path = f.name |
|
|
|
|
|
|
|
|
lowres_audio = upsampler.load_audio(temp_path).float() |
|
|
with torch.no_grad(): |
|
|
highres_audio = upsampler.infer(lowres_audio).cpu().squeeze(0).numpy() |
|
|
highres_audio = (np.clip(highres_audio, -1.0, 1.0) * 32767).astype(np.int16) |
|
|
|
|
|
print(highres_audio.shape) |
|
|
return (TARGET_SR, highres_audio) |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=super_resolve, |
|
|
inputs=gr.Audio( |
|
|
sources=["upload", "microphone"], |
|
|
type="numpy", |
|
|
label="Low-resolution audio (16 kHz recommended)" |
|
|
), |
|
|
outputs=gr.Audio( |
|
|
type="numpy", |
|
|
label="Upsampled audio (48 kHz)" |
|
|
), |
|
|
title="NovaSR Audio Super-Resolution", |
|
|
description=( |
|
|
"NovaSR is just a 52kb incredibly fast audio upsampler reaching speeds of 3600x realtime. This demo is on a 2 core CPU so speeds will not be as fast as it can be on consumer devices. Please upload a low quality audio file to try it out." |
|
|
), |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |