Getting ONNX Models Working In A Quarto Blog

A step-by-step guide to integrating ONNX models into a Quarto blog post.

quarto

Python Code

Imports and Model

Code

import torch
import torch.nn as nn
import onnxruntime as ort
import numpy as np

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(10, 20)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(20, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

Export the model

Code

model = MyModel()  
model.eval()  # Set the model to evaluation mode
torch.onnx.export(model, torch.randn(1, 10), "model.onnx",opset_version=20);

[torch.onnx] Obtain model graph for `MyModel([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MyModel([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
[torch.onnx] Optimize the ONNX graph... ✅

Test Model in Python

Code

def to_numpy(tensor):
    return tensor.detach().cpu().numpy()

ort_session = ort.InferenceSession("model.onnx")

x = torch.randn(1, 10)
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)
print("ONNX Runtime output:", ort_outs)

ONNX Runtime output: [array([[-0.2114761]], dtype=float32)]

HTML and Javascript Code

Example of HTML and Javascript

Code

from IPython.display import Markdown,HTML


with open("incrementbutton.html", "r") as f:
    raw_code = f.read()

# Using Markdown to wrap the code in a syntax-highlighted block
display(Markdown(f"```html\n{raw_code}\n```"))

<!DOCTYPE html>
<html>
<body>
    <div style="padding: 20px; border: 1px solid #ccc; border-radius: 5px;">
        <h3>Counter: <span id="count">0</span></h3>
        <button onclick="increment()">Click Me!</button>
    </div>

    <script>
        let count = 0;
        function increment() {
            count++;
            document.getElementById('count').innerText = count;
        }
    </script>
</body>
</html>

Code

display(HTML(raw_code))

Counter: 0

Loading The Model

Code

with open("loadmodel.html", "r") as f:
    model_code = f.read()

# Using Markdown to wrap the code in a syntax-highlighted block
display(Markdown(f"```html\n{model_code}\n```"))

<div id="ai-widget" style="padding: 20px; border: 1px solid #ddd; border-radius: 10px; background: #fff; max-width: 400px; margin: 10px auto; font-family: system-ui, -apple-system, sans-serif;">
    <h4 style="margin-top: 0;">MyModel Inference</h4>
    
    <div id="status-light" style="font-size: 0.85em; margin-bottom: 15px;">
        <span style="height: 10px; width: 10px; background-color: #bbb; border-radius: 50%; display: inline-block; margin-right: 5px;"></span>
        Status: <span id="status-text">Initializing...</span>
    </div>

    <div style="display: flex; gap: 10px; flex-direction: column;">
        <button id="run-inference" style="padding: 10px; cursor: pointer; background: #007bff; color: white; border: none; border-radius: 5px;" disabled>
            Predict from Random Input
        </button>
        <div id="prediction-output" style="padding: 10px; background: #f8f9fa; border-radius: 5px; font-weight: bold; text-align: center; min-height: 20px;">
            --
        </div>
    </div>
</div>

<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.20.0/dist/ort.min.js"></script>

<script type="module">
    // Configure WASM
    ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.20.0/dist/";

    let session;
    const statusText = document.getElementById('status-text');
    const statusLight = document.getElementById('status-light').children[0];
    const predictBtn = document.getElementById('run-inference');
    const outputDiv = document.getElementById('prediction-output');

    async function init() {
        try {
            // Using the External Data logic from the docs you found
            session = await ort.InferenceSession.create('./model.onnx', {
                executionProviders: ['wasm'],
                externalData: [
                    {
                        path: 'model.onnx.data',
                        data: './model.onnx.data'
                    }
                ]
            });

            statusText.innerText = "Ready";
            statusLight.style.backgroundColor = "#28a745";
            predictBtn.disabled = false;
        } catch (e) {
            statusText.innerText = "Error loading model";
            statusLight.style.backgroundColor = "#dc3545";
            console.error(e);
        }
    }

    async function run() {
        try {
            // MyModel expects FLOAT, [1, 10]
            const inputData = Float32Array.from({length: 10}, () => Math.random() * 2 - 1);
            const inputTensor = new ort.Tensor('float32', inputData, [1, 10]);

            // Feed key 'x' matches the input name in your Python graph
            const results = await session.run({ x: inputTensor });
            
            // Output name 'linear_1' matches your Python graph output
            const resultValue = results.linear_1.data[0];
            
            outputDiv.innerText = `Result: ${resultValue.toFixed(6)}`;
        } catch (e) {
            outputDiv.innerText = "Inference failed";
            console.error(e);
        }
    }

    predictBtn.onclick = run;
    init();
</script>

Code

display(HTML(model_code))