Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -7,6 +7,60 @@ base_model: pyannote/segmentation-3.0 | |
| 7 | 
             
            https://huggingface.co/pyannote/segmentation-3.0 with ONNX weights to be compatible with Transformers.js.
         | 
| 8 |  | 
| 9 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 10 | 
             
            ## Torch → ONNX conversion code:
         | 
| 11 | 
             
            ```py
         | 
| 12 | 
             
            # pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
         | 
|  | |
| 7 | 
             
            https://huggingface.co/pyannote/segmentation-3.0 with ONNX weights to be compatible with Transformers.js.
         | 
| 8 |  | 
| 9 |  | 
| 10 | 
            +
            ## Transformers.js (v3) usage
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            ```js
         | 
| 13 | 
            +
            import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@xenova/transformers';
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            // Load model and processor
         | 
| 16 | 
            +
            const model_id = 'onnx-community/pyannote-segmentation-3.0';
         | 
| 17 | 
            +
            const model = await AutoModelForAudioFrameClassification.from_pretrained(model_id);
         | 
| 18 | 
            +
            const processor = await AutoProcessor.from_pretrained(model_id);
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            // Read and preprocess audio
         | 
| 21 | 
            +
            const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav';
         | 
| 22 | 
            +
            const audio = await read_audio(url, processor.feature_extractor.config.sampling_rate);
         | 
| 23 | 
            +
            const inputs = await processor(audio);
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            // Run model with inputs
         | 
| 26 | 
            +
            const { logits } = await model(inputs);
         | 
| 27 | 
            +
            // {
         | 
| 28 | 
            +
            //   logits: Tensor {
         | 
| 29 | 
            +
            //     dims: [ 1, 767, 7 ],  // [batch_size, num_frames, num_classes]
         | 
| 30 | 
            +
            //     type: 'float32',
         | 
| 31 | 
            +
            //     data: Float32Array(5369) [ ... ],
         | 
| 32 | 
            +
            //     size: 5369
         | 
| 33 | 
            +
            //   }
         | 
| 34 | 
            +
            // }
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            const result = processor.post_process_speaker_diarization(logits, audio.length);
         | 
| 37 | 
            +
            // [
         | 
| 38 | 
            +
            //   [
         | 
| 39 | 
            +
            //     { id: 0, start: 0, end: 1.0512535626298245, confidence: 0.8220156481664611 },
         | 
| 40 | 
            +
            //     { id: 2, start: 1.0512535626298245, end: 2.3398869619825127, confidence: 0.9008811707860472 },
         | 
| 41 | 
            +
            //     ...
         | 
| 42 | 
            +
            //   ]
         | 
| 43 | 
            +
            // ]
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            // Display result
         | 
| 46 | 
            +
            console.table(result[0], ['start', 'end', 'id', 'confidence']);
         | 
| 47 | 
            +
            // ┌─────────┬────────────────────┬────────────────────┬────┬─────────────────────┐
         | 
| 48 | 
            +
            // │ (index) │ start              │ end                │ id │ confidence          │
         | 
| 49 | 
            +
            // ├─────────┼────────────────────┼────────────────────┼────┼─────────────────────┤
         | 
| 50 | 
            +
            // │ 0       │ 0                  │ 1.0512535626298245 │ 0  │ 0.8220156481664611  │
         | 
| 51 | 
            +
            // │ 1       │ 1.0512535626298245 │ 2.3398869619825127 │ 2  │ 0.9008811707860472  │
         | 
| 52 | 
            +
            // │ 2       │ 2.3398869619825127 │ 3.5946089560890773 │ 0  │ 0.7521651315796233  │
         | 
| 53 | 
            +
            // │ 3       │ 3.5946089560890773 │ 4.578039708226655  │ 2  │ 0.8491978128022479  │
         | 
| 54 | 
            +
            // │ 4       │ 4.578039708226655  │ 4.594995410849717  │ 0  │ 0.2935352600416393  │
         | 
| 55 | 
            +
            // │ 5       │ 4.594995410849717  │ 6.121008646925269  │ 3  │ 0.6788051309866024  │
         | 
| 56 | 
            +
            // │ 6       │ 6.121008646925269  │ 6.256654267909762  │ 0  │ 0.37125512393851134 │
         | 
| 57 | 
            +
            // │ 7       │ 6.256654267909762  │ 8.630452635138397  │ 2  │ 0.7467035186353542  │
         | 
| 58 | 
            +
            // │ 8       │ 8.630452635138397  │ 10.088643060721703 │ 0  │ 0.7689364814666032  │
         | 
| 59 | 
            +
            // │ 9       │ 10.088643060721703 │ 12.58113134631177  │ 2  │ 0.9123324509131324  │
         | 
| 60 | 
            +
            // │ 10      │ 12.58113134631177  │ 13.005023911888312 │ 0  │ 0.4828358177572041  │
         | 
| 61 | 
            +
            // └─────────┴────────────────────┴────────────────────┴────┴─────────────────────┘
         | 
| 62 | 
            +
            ```
         | 
| 63 | 
            +
             | 
| 64 | 
             
            ## Torch → ONNX conversion code:
         | 
| 65 | 
             
            ```py
         | 
| 66 | 
             
            # pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
         | 

