Bayesian Inference: OPV light-intensity dependant JV fits with SIMsalabim (fake data)

This notebook is a demonstration of how to fit light-intensity dependent JV curves with drift-diffusion models using the SIMsalabim package.

[1]:

# Import necessary libraries
import warnings, os, sys, shutil
# remove warnings from the output
os.environ["PYTHONWARNINGS"] = "ignore"
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import default_rng
import torch, copy, uuid
import pySIMsalabim as sim
from pySIMsalabim.experiments.JV_steady_state import *
import ax, logging
from ax.utils.notebook.plotting import init_notebook_plotting, render
init_notebook_plotting() # for Jupyter notebooks

try:
    from optimpv import *
    from optimpv.axBOtorch.axUtils import *
except Exception as e:
    sys.path.append('../') # add the path to the optimpv module
    from optimpv import *
    from optimpv.axBOtorch.axUtils import *

[INFO 08-13 17:27:30] ax.utils.notebook.plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.
[INFO 08-13 17:27:30] ax.utils.notebook.plotting: Please see
    (https://ax.dev/tutorials/visualizations.html#Fix-for-plots-that-are-not-rendering)
    if visualizations are not rendering.

Define the parameters for the simulation

[2]:

params = [] # list of parameters to be optimized

mun = FitParam(name = 'l2.mu_n', value = 7e-8, bounds = [1e-9,1e-6], log_scale = True, value_type = 'float', fscale = None, rescale = False, display_name=r'$\mu_n$', unit='m$^2$ V$^{-1}$s$^{-1}$', axis_type = 'log', force_log = True)
params.append(mun)

mup = FitParam(name = 'l2.mu_p', value = 5e-8, bounds = [1e-9,1e-6], log_scale = True, value_type = 'float', fscale = None, rescale = False, display_name=r'$\mu_p$', unit=r'm$^2$ V$^{-1}$s$^{-1}$', axis_type = 'log', force_log = True)
params.append(mup)

bulk_tr = FitParam(name = 'l2.N_t_bulk', value = 1e20, bounds = [1e19,1e22], log_scale = True, value_type = 'float', fscale = None, rescale = False,  display_name=r'$N_{T}$', unit=r'm$^{-3}$', axis_type = 'log', force_log = True)
params.append(bulk_tr)

preLangevin = FitParam(name = 'l2.preLangevin', value = 1e-2, bounds = [0.005,1], log_scale = True, value_type = 'float', fscale = None, rescale = False, display_name=r'$\gamma_{pre}$', unit=r'', axis_type = 'log', force_log = True)
params.append(preLangevin)

R_series = FitParam(name = 'R_series', value = 1e-4, bounds = [1e-5,1e-3], log_scale = True, value_type = 'float', fscale = None, rescale = False,  display_name=r'$R_{series}$', unit=r'$\Omega$ m$^2$', axis_type = 'log', force_log = True)
params.append(R_series)

# save the original parameters for later
params_orig = copy.deepcopy(params)

Generate some fake data

Here we generate some fake data to fit. The data is generated using the same model as the one used for the fitting, so it is a good test of the fitting procedure. For more information on how to run SIMsalabim from python see the pySIMsalabim package.

[3]:

# Set the session path for the simulation and the input files
session_path = os.path.join(os.path.join(os.path.abspath('../'),'SIMsalabim','SimSS'))
input_path = os.path.join(os.path.join(os.path.join(os.path.abspath('../'),'Data','simsalabim_test_inputs','fakeOPV')))
simulation_setup_filename = 'simulation_setup_fakeOPV.txt'
simulation_setup = os.path.join(session_path, simulation_setup_filename)

# path to the layer files defined in the simulation_setup file
l1 = 'ZnO.txt'
l2 = 'ActiveLayer.txt'
l3 = 'BM_HTL.txt'
l1 = os.path.join(input_path, l1)
l2 = os.path.join(input_path, l2)
l3 = os.path.join(input_path, l3)

# copy this files to session_path
force_copy = True
if not os.path.exists(session_path):
    os.makedirs(session_path)
for file in [l1,l2,l3,simulation_setup_filename]:
    file = os.path.join(input_path, os.path.basename(file))
    if force_copy or not os.path.exists(os.path.join(session_path, os.path.basename(file))):
        shutil.copyfile(file, os.path.join(session_path, os.path.basename(file)))
    else:
        print('File already exists: ',file)

# Show the device structure
fig = sim.plot_band_diagram(simulation_setup, session_path)

# reset simss
# Set the JV parameters
Gfracs = [0.1,0.5,1] # Fractions of the generation rate to simulate (None if you want only one light intensity as define in the simulation_setup file)
UUID = str(uuid.uuid4()) # random UUID to avoid overwriting files

cmd_pars = [] # see pySIMsalabim documentation for the command line parameters
# Add the parameters to the command line arguments
for param in params:
    cmd_pars.append({'par':param.name, 'val':str(param.value)})

# Run the JV simulation
ret, mess = run_SS_JV(simulation_setup, session_path, JV_file_name = 'JV.dat', G_fracs = Gfracs, parallel = True, max_jobs = 3, UUID=UUID, cmd_pars=cmd_pars)

# save data for fitting
X,y = [],[]
X_orig,y_orig = [],[]
if Gfracs is None:
    data = pd.read_csv(os.path.join(session_path, 'JV_'+UUID+'.dat'), sep=r'\s+') # Load the data
    Vext = np.asarray(data['Vext'].values)
    Jext = np.asarray(data['Jext'].values)
    G = np.ones_like(Vext)
    rng = default_rng()#
    noise = rng.standard_normal(Jext.shape) * 0.01 * Jext
    Jext = Jext + noise
    X = Vext
    y = Jext

    plt.figure()
    plt.plot(X,y)
    plt.show()
else:
    for Gfrac in Gfracs:
        data = pd.read_csv(os.path.join(session_path, 'JV_Gfrac_'+str(Gfrac)+'_'+UUID+'.dat'), sep=r'\s+') # Load the data
        Vext = np.asarray(data['Vext'].values)
        Jext = np.asarray(data['Jext'].values)
        G = np.ones_like(Vext)*Gfrac
        rng = default_rng()#
        noise = rng.standard_normal(Jext.shape) * 0.005 * Jext

        if len(X) == 0:
            X = np.vstack((Vext,G)).T
            y = Jext + noise
            y_orig = Jext
        else:
            X = np.vstack((X,np.vstack((Vext,G)).T))
            y = np.hstack((y,Jext+ noise))
            y_orig = np.hstack((y_orig,Jext))

    # remove all the current where Jext is higher than a given value
    X = X[y<200]
    X_orig = copy.deepcopy(X)
    y_orig = y_orig[y<200]
    y = y[y<200]



    plt.figure()
    for Gfrac in Gfracs:
        plt.plot(X[X[:,1]==Gfrac,0],y[X[:,1]==Gfrac],label='Gfrac = '+str(Gfrac))
    plt.xlabel('Voltage [V]')
    plt.ylabel('Current density [A/m$^2$]')
    plt.legend()
    plt.show()

../_images/examples_JV_fakeOPV_BayesInf_5_0.png

../_images/examples_JV_fakeOPV_BayesInf_5_1.png

Run the optimization

[4]:

# Define the Agent and the target metric/loss function
from optimpv.DDfits.JVAgent import JVAgent
metric = 'mse' # can be 'nrmse', 'mse', 'mae'
loss = 'linear' # can be 'linear', 'huber', 'soft_l1'

# create a different params list for the agent
params_agent = copy.deepcopy(params)
#select a random value between the bounds, we do this because the walkers will be randomly initialized from the param.value
for param in params_agent:
    if param.force_log:
        param.value =10**np.random.uniform(np.log10(param.bounds[0]),np.log10(param.bounds[1]))
    else:
        param.value = np.random.uniform(param.bounds[0],param.bounds[1])

jv = JVAgent(params, X, y, session_path, simulation_setup, parallel = True, max_jobs = 3, metric = metric, loss = loss)

# Calulate the target metric for the original parameters
best_fit_possible = loss_function(calc_metric(y,y_orig, metric_name = metric),loss)
print('Best fit: ',best_fit_possible)

Best fit:  0.28005831028145817

MCMC Bayesian Inference for Parameter Fitting

We’ll use the emcee package to perform Markov Chain Monte Carlo sampling to find the posterior distribution of our model parameters.

[5]:

from optimpv.BayesInfEmcee.EmceeOptimizer import EmceeOptimizer

# Define the Bayesian Inference object
optimizer = EmceeOptimizer(params = params, agents = jv, nwalkers=20, nsteps=2000, burn_in=100, progress=True, name='emcee_opti')

[6]:

optimizer.optimize()

[INFO 2025-08-13 17:27:31,715] optimpv.BayesInfEmcee.EmceeOptimizer: Running MCMC with 20 walkers for 2000 steps...

----------------------------------------------------

100%|██████████| 100/100 [01:08<00:00,  1.45it/s]
100%|██████████| 2000/2000 [20:08<00:00,  1.66it/s]
[INFO 2025-08-13 17:48:51,437] optimpv.BayesInfEmcee.EmceeOptimizer: MCMC run complete.
[INFO 2025-08-13 17:48:51,448] optimpv.BayesInfEmcee.EmceeOptimizer: MCMC Results (Median & 16th/84th Percentiles)
[INFO 2025-08-13 17:48:51,449] optimpv.BayesInfEmcee.EmceeOptimizer: $\mu_n$ (l2.mu_n): 6.015e-08 (+6.34e-08 / -2.2e-08)
[INFO 2025-08-13 17:48:51,449] optimpv.BayesInfEmcee.EmceeOptimizer: $\mu_p$ (l2.mu_p): 5.533e-08 (+6.86e-08 / -2.42e-08)
[INFO 2025-08-13 17:48:51,449] optimpv.BayesInfEmcee.EmceeOptimizer: $N_{T}$ (l2.N_t_bulk): 4.212e+19 (+6.47e+19 / -2.46e+19)
[INFO 2025-08-13 17:48:51,450] optimpv.BayesInfEmcee.EmceeOptimizer: $\gamma_{pre}$ (l2.preLangevin): 0.009357 (+0.00381 / -0.00308)
[INFO 2025-08-13 17:48:51,450] optimpv.BayesInfEmcee.EmceeOptimizer: $R_{series}$ (R_series): 9.837e-05 (+4.77e-05 / -4.79e-05)

----------------------------------------------------

[6]:

{'l2.mu_n': {'median': np.float64(6.014606890715596e-08),
  '16th': np.float64(3.816208452598251e-08),
  '84th': np.float64(1.2352463823526105e-07),
  'lower_err': np.float64(2.1983984381173457e-08),
  'upper_err': np.float64(6.337856932810508e-08)},
 'l2.mu_p': {'median': np.float64(5.532677125269291e-08),
  '16th': np.float64(3.1122211576392954e-08),
  '84th': np.float64(1.239355762742183e-07),
  'lower_err': np.float64(2.4204559676299956e-08),
  'upper_err': np.float64(6.860880502152538e-08)},
 'l2.N_t_bulk': {'median': np.float64(4.212239283604533e+19),
  '16th': np.float64(1.7476208366474541e+19),
  '84th': np.float64(1.0682424659536201e+20),
  'lower_err': np.float64(2.464618446957079e+19),
  'upper_err': np.float64(6.470185375931668e+19)},
 'l2.preLangevin': {'median': np.float64(0.009357205939363799),
  '16th': np.float64(0.006275504818845882),
  '84th': np.float64(0.013170601111692404),
  'lower_err': np.float64(0.003081701120517917),
  'upper_err': np.float64(0.003813395172328605)},
 'R_series': {'median': np.float64(9.836998305321582e-05),
  '16th': np.float64(5.045978659638413e-05),
  '84th': np.float64(0.00014606127717969037),
  'lower_err': np.float64(4.791019645683169e-05),
  'upper_err': np.float64(4.769129412647455e-05)}}

[7]:

# Run simulation with best-fit parameters
best_fit_params = copy.deepcopy(optimizer.params)

# Create agent with best-fit parameters
best_agent = JVAgent(best_fit_params, X, y, session_path, simulation_setup,
                     parallel=True, max_jobs=3, metric=metric, loss=loss)

best_fit_y = best_agent.run(parameters={})

# Plot the best-fit curves against data
plt.figure(figsize=(10, 6))
for Gfrac in Gfracs:
    mask = X[:,1] == Gfrac
    plt.plot(X[mask,0], y[mask], 'ko', alpha=0.6, label=f'Data (G={Gfrac})')
    plt.plot(X[mask,0], best_fit_y[mask], 'k-', label=f'MCMC fit (G={Gfrac})')

# add the 10 best fit curves
for i in range(10):
    # prep dum dic
    dum_dic = {}
    # add the parameters
    idx = 0
    for param in params:
        if param.type != 'fixed':
            dum_dic[param.name] = optimizer.flat_samples[i][idx]
            idx += 1

    y_ = best_agent.run(parameters=dum_dic)
    for Gfrac in Gfracs:
        mask = X[:,1] == Gfrac
        plt.plot(X[mask,0], y_[mask], 'b-', alpha=0.3, zorder = -1)

plt.xlabel('Voltage [V]')
plt.ylabel('Current density [A/m$^2$]')
plt.legend()
plt.title('Best-fit JV Curves from MCMC')
plt.grid(True, alpha=0.3)
plt.show()

../_images/examples_JV_fakeOPV_BayesInf_11_0.png

[8]:

# corner plot
True_params = {p.name: p.value for p in params_orig} # Dictionary to hold true values for parameters
fig = optimizer.plot_corner(True_params=True_params) # Dictionary to hold true values for parameters

fig.set_size_inches(15, 15)

[WARNING 2025-08-13 17:48:54,815] root: Too few points to create valid contours

../_images/examples_JV_fakeOPV_BayesInf_12_1.png

[9]:

# optimizer.plot_trace()
fig = optimizer.plot_traces()
fig.set_size_inches(15, 15)

../_images/examples_JV_fakeOPV_BayesInf_13_0.png

[10]:

# Clean up the output files (comment out if you want to keep the output files)
sim.clean_all_output(session_path)
sim.delete_folders('tmp',session_path)
# uncomment the following lines to delete specific files
sim.clean_up_output('ZnO',session_path)
sim.clean_up_output('ActiveLayer',session_path)
sim.clean_up_output('BM_HTL',session_path)
sim.clean_up_output('simulation_setup_fakeOPV',session_path)