Investigating the effect of noise on the area uncertainty quantification

Investigating the effect of noise on the area uncertainty quantification#

%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import arviz as az
import pymc as pm
from pathlib import Path
from peak_performance import pipeline as pl, models, plots
from matplotlib import pyplot as plt

Load and inspect raw intensity data#

timeseries = np.load(Path(r"..\example\A2t2R1Part1_132_85.9_86.1.npy"))
time = timeseries[0]
ints = timeseries[1]

def guess_noise(intensity):
    n = len(intensity)
    ifrom = int(np.ceil(0.15 * n))
    ito = int(np.floor(0.85 * n))
    start_ints = intensity[:ifrom]
    end_ints = intensity[ito:]
    return np.std([*(start_ints - np.mean(start_ints)), *(end_ints - np.mean(end_ints))])

fig, ax = plt.subplots()
ax.scatter(timeseries[0], timeseries[1], marker="x", color="black")
slope, intercept, noise = models.initial_guesses(time, ints)
ax.fill_between(
    time,
    slope * time + intercept - noise / 2,
    slope * time + intercept + noise / 2,
    alpha=0.5
)
ax.set(
    xlabel="time / h",
    ylabel="intensity / a.u.",
)
plt.show()
../_images/5005c01bca3a5094d80a4ee5f33490b0d88bf2565285733b796f34c5f88eb402.png

Define a peak model#

pmodel = models.define_model_double_normal(
    time=timeseries[0],
    intensity=timeseries[1]
)
pmodel.to_graphviz()
c:\Users\osthege\AppData\Local\mambaforge\envs\pepe\Lib\site-packages\pymc\data.py:273: FutureWarning: ConstantData is deprecated. All Data variables are now mutable. Use Data instead.
  warnings.warn(
../_images/17609e809b0f779c36dd77bda970707e32dc93cbfcfd6f292df34719df1a6fe9.svg

Fix the noise settings using pm.do#

pm.do(pmodel, {"noise_width_guess": 100}).to_graphviz()
../_images/8f789be62176ad058f5dbe45629f5c45dfbe7d96b5dca10b4bd33876ae70142c.svg

Sample the models with fixed noise#

sigma_idatas = {}
for s in [50, 100, 150, 200, 300, 400, 500]:
    spmodel = pm.do(pmodel, {"noise": s})
    sigma_idatas[s] = pl.sampling(spmodel, tune=6_000, draws=2000)
    sigma_idatas[s] = pl.posterior_predictive_sampling(spmodel, sigma_idatas[s])
Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:02<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:03<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:03<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:02<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:03<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:03<00:00 Sampling chains: 0, Divergences: 0]
Sampling: [L]


Sampling: [L, baseline_intercept, baseline_slope, height, meanmean, separation, std]
100.00% [32000/32000 00:02<00:00 Sampling chains: 0, Divergences: 1]
Sampling: [L]


for s, idata in sigma_idatas.items():
    plots.plot_posterior_predictive(
        identifier="peak_fit",
        time=idata.constant_data.time.values,
        intensity=idata.constant_data.intensity.values,
        path=None,
        idata=idata,
        discarded=False,
    )
    fig = plt.gcf()
    fig.suptitle(f"noise={s} a.u.")
    fig.tight_layout()
    plt.show()
../_images/5bbf34d02ccf0dfc31689f254877b98ea39993da0a3096863642c25467094e18.png ../_images/837bc8351371a7b1f21d91e2661430bbf05765c179a38537b5e4014a18808f43.png ../_images/e5321ff7bf663cff6a4152b83cf24a5c9fae2fe901cd679b9fdce477b5841d80.png ../_images/93ab32fe2a9ee134826bb2dc1d65525f37788d3d7a87358524affb340330c54c.png ../_images/11dbc14e2a92ed0b3a50e13036f2f222a2ebd020cdfe0fb10a75e11eb1f68f83.png ../_images/1bdde6bdfbfe9bdff1dcb326dce1e45a8714cbd564fc819cc03712826fa5aa9e.png ../_images/b784d51a23271155d19a6dbbcac7e68387c00611b35c54e3b37eae3afcbc524b.png
summary = pd.concat([
    az.summary(idata, var_names=["area"])
    for s, idata in sigma_idatas.items()
], keys=sigma_idatas, names=["noise", "var_name"])
summary
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
noise var_name
50 area[0] 313.351 11.665 291.073 334.905 0.109 0.077 11348.0 6043.0 1.0
area[1] 678.536 11.291 656.315 698.359 0.105 0.075 11806.0 5822.0 1.0
100 area[0] 316.106 23.420 272.294 359.828 0.215 0.153 11865.0 6763.0 1.0
area[1] 675.603 22.394 636.958 722.187 0.200 0.142 12404.0 5899.0 1.0
150 area[0] 319.884 37.831 249.006 391.667 0.369 0.263 10519.0 6268.0 1.0
area[1] 673.569 32.766 611.630 734.258 0.301 0.213 11899.0 6263.0 1.0
200 area[0] 324.587 52.400 229.037 423.322 0.585 0.433 8537.0 5144.0 1.0
area[1] 670.918 44.970 592.101 761.562 0.463 0.327 9483.0 5651.0 1.0
300 area[0] 358.377 125.401 160.648 551.351 2.581 2.006 3855.0 2160.0 1.0
area[1] 650.773 81.505 493.518 801.641 1.647 1.165 2905.0 1754.0 1.0
400 area[0] 435.775 233.185 123.857 853.499 4.999 3.535 2735.0 3092.0 1.0
area[1] 614.937 117.477 386.018 833.273 2.432 1.720 2418.0 2722.0 1.0
500 area[0] 540.067 369.859 14.128 1160.742 8.399 6.922 2429.0 2371.0 1.0
area[1] 589.281 142.407 329.538 856.490 2.730 1.931 2686.0 3486.0 1.0
fig, ax = plt.subplots()

for vname, df in summary.reset_index().groupby("var_name"):
    ax.errorbar(
        df["noise"],
        y=df["mean"],
        yerr=[
            df["mean"] - df["hdi_3%"],
            df["hdi_97%"] - df["mean"],
        ],
        label=vname,
        ls="none",
        capsize=2,
    )
ax.legend()
ax.set(
    xlabel="noise / a.u.",
    ylabel="$\mathrm{area\ /\ (a.u. \cdot min)}$"
)
plt.show()
../_images/8fc0b034a552db701eafe77b506ed7f108866f476f81c111f6f9a3c1b0eacf2f.png

Conclusion#

  • At high noise (>200 a.u.), the uncertainty about peak areas increases.

  • In this case, the model fits a noise of ~115 a.u. so that’s well below the critical level.

%load_ext watermark
%watermark -idu
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Last updated: 2024-05-11T14:43:08.623240+02:00