## 1. Choose Device

In [3]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## 2. Choose the target directory

In [1]:
%cd ..

D:\GitHub\WSRGlow


In [2]:
%ls pretrained

 Volume in drive D is data
 Volume Serial Number is 20F4-F94C

 Directory of D:\GitHub\WSRGlow\pretrained

03/11/2022  01:53 PM    <DIR>          .
03/11/2022  09:34 PM    <DIR>          ..
03/11/2022  01:57 PM    <DIR>          4k
03/09/2022  10:52 PM     7,744,631,188 4k.zip
03/11/2022  01:44 PM    <DIR>          8k
03/11/2022  01:10 PM    <DIR>          debug
               1 File(s)  7,744,631,188 bytes
               5 Dir(s)  567,092,137,984 bytes free


## 3. Load model

In [4]:
eval_dir = "pretrained/4k"
from src.models import load_wrapper_model
skeleton_model, checkpoints = load_wrapper_model(eval_dir, device)

skeleton_model is loaded.
	sr: 4000 to 48000
checkpoints list:
	pretrained\4k\checkpoints\epoch_032.ckpt
	pretrained\4k\checkpoints\epoch_033.ckpt
	pretrained\4k\checkpoints\epoch_035.ckpt
model is on cuda


 **IMPORTANT** WSRGlowTestee: 
Input audio must have been resampled to 24kusing librosa library to match the dimension.


In [5]:
skeleton_model.load_checkpoints(checkpoints[0])

checkpoint loaded: pretrained\4k\checkpoints\epoch_032.ckpt


## 4. Load a low-resolution-sample

In [10]:
skeleton_model.sr_rates

{'audio_sample_rate': 48000, 'target_rate': 4000, 'resampled_rate': 24000}

In [20]:
from IPython.display import Audio, display
import librosa

wav, sr = librosa.load('sample.flac', sr=skeleton_model.sr_rates['audio_sample_rate'])
display(Audio(wav, rate=sr) )

### Resample to the target sampling rate and Re-Resample it to 24k

In [21]:
wav = librosa.resample(wav, orig_sr=skeleton_model.sr_rates['audio_sample_rate'], target_sr=skeleton_model.sr_rates['target_rate'])
print('target sampling rate:', skeleton_model.sr_rates['target_rate'])
display(Audio(wav, rate=skeleton_model.sr_rates['target_rate']) )

wav = librosa.resample(wav, orig_sr=skeleton_model.sr_rates['target_rate'], target_sr=skeleton_model.sr_rates['resampled_rate'])
print('resampled rate:', skeleton_model.sr_rates['resampled_rate'])
display(Audio(wav, rate=skeleton_model.sr_rates['resampled_rate']) )


target sampling rate: 4000


resampled rate: 24000


## 5. Inference


Customise ```class WSRGlowTestee: ``` in ```src/models/TesteeWrapper.py```

```python
    def infer(self, x, target):
        """A testee that do nothing
        Args:
            x (np.array): [sample,], with original_sr sample rate
            ***IMPORTANT*** x must be re-resampled to 24k

            target (np.array): [sample,], with target_sr sample rate
        Returns:
            np.array: [sample,]
        """

        wav = torch.Tensor(x).reshape(1, -1)
        wav = wav.to(self.device)
        output = np.array(self.model.infer(wav, sigma=self.sigma)[0].cpu().detach())
        x = output.reshape(-1)

        return x

```

#### Important
- Currently, we don't use ```target```
- ```x``` must be re-resampled to 24k

In [24]:
estimated = skeleton_model.infer(wav, None)
display(Audio(estimated, rate=skeleton_model.sr_rates['audio_sample_rate']) )