Design of Experiment: perovskite thin film optimization PLQY and FWHM

This notebook is made to use optimPV for experimental design. Here, we show how to load some data from a presampling, and how to use optimPV to suggest the next set of experiment using Bayesian optimization.
The goal here is to do multi-objective optimization (MO) to maximize the photoluminescence quantum yield (PLQY) and minimize the full width at half maximum (FWHM) of a perovskite thin film.

Note: The data used here is real data generated in the i-MEET and HI-ERN labs at the university of Erlangen-Nuremberg (FAU) by Frederik Schmitt. The data is not published yet, and is only used here for demonstration purposes. For more information, please contact us.

[1]:
# Import necessary libraries
import warnings, os, sys, shutil
# remove warnings from the output
os.environ["PYTHONWARNINGS"] = "ignore"
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import default_rng
import torch, copy, uuid
import pySIMsalabim as sim
from pySIMsalabim.experiments.JV_steady_state import *
import ax, logging
from ax.utils.notebook.plotting import init_notebook_plotting, render
init_notebook_plotting() # for Jupyter notebooks

try:
    from optimpv import *
    from optimpv.pymooOpti.pymooOptimizer import PymooOptimizer
except Exception as e:
    sys.path.append('../') # add the path to the optimpv module
    from optimpv import *
    from optimpv.pymooOpti.pymooOptimizer import PymooOptimizer
[INFO 08-13 17:14:14] ax.utils.notebook.plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.
[INFO 08-13 17:14:14] ax.utils.notebook.plotting: Please see
    (https://ax.dev/tutorials/visualizations.html#Fix-for-plots-that-are-not-rendering)
    if visualizations are not rendering.

Get the data

[2]:
# Define the path to the data
data_dir =os.path.join(os.path.abspath('../'),'Data','pero_MOO_opti') # path to the data directory

# Load the data
df = pd.read_csv(os.path.join(data_dir,'Pero_PLQY_FWHM.csv'),sep=r',') # load the data

stepsize_fraction = 0.05
stepsize_spin_speed = 100

# Display some information about the data
print(df.describe())

        iteration   batch_number  Cs_fraction  Fa_fraction  Ma_fraction  \
count  128.000000     128.000000   128.000000   128.000000   128.000000
mean     3.500000       7.500000     0.209375     0.533984     0.256641
std      2.300291       4.627885     0.278052     0.361914     0.277742
min      0.000000       0.000000     0.000000     0.000000     0.000000
25%      1.750000       3.750000     0.000000     0.200000     0.050000
50%      3.500000       7.500000     0.050000     0.500000     0.100000
75%      5.250000      11.250000     0.400000     0.950000     0.462500
max      7.000000      15.000000     1.000000     1.000000     1.000000

       Spin_duration_Antisolvent  Spin_duration_High_Speed   Spin_speed  \
count                 128.000000                128.000000   128.000000
mean                   14.617188                 40.531250  2914.843750
std                     7.183885                 14.651295  1015.400301
min                     5.000000                 15.000000  1000.000000
25%                    10.000000                 26.500000  2400.000000
50%                    12.000000                 43.000000  2700.000000
75%                    20.000000                 55.000000  3400.000000
max                    30.000000                 60.000000  5000.000000

             FWHM        PLQY       merit
count  128.000000  128.000000  128.000000
mean    98.012656    7.975000    0.549372
std     82.719007   10.030822    0.148835
min     45.740000    0.000000    0.219360
25%     49.255000    0.000000    0.471784
50%     53.950000    5.480000    0.528186
75%    101.587500   11.952500    0.575939
max    335.900000   40.920000    0.915136

Define the parameters for the simulation

[3]:
params = [] # list of parameters to be optimized

Cs_fraction = FitParam(name = 'Cs_fraction', value = 0, bounds = [0,1], value_type = 'float', display_name='Cs fraction', unit='', axis_type = 'linear')
params.append(Cs_fraction)

Fa_fraction = FitParam(name = 'Fa_fraction', value = 0, bounds = [0,1], value_type = 'float', display_name='Fa fraction', unit='', axis_type = 'linear')
params.append(Fa_fraction)

Spin_duration_Antisolvent = FitParam(name = 'Spin_duration_Antisolvent', value = 10, bounds = [5,30], value_type = 'float', display_name='Spin duration Antisolvent', unit='s', axis_type = 'linear')
params.append(Spin_duration_Antisolvent)

Spin_duration_High_Speed = FitParam(name = 'Spin_duration_High_Speed', value = 20, bounds = [15,60], value_type = 'float', display_name='Spin duration High Speed', unit='s', axis_type = 'linear')
params.append(Spin_duration_High_Speed)

Spin_speed = FitParam(name = 'Spin_speed', value = 1000, bounds = [1000,5000], value_type = 'float', display_name='Spin speed', unit='rpm', axis_type = 'linear')
params.append(Spin_speed)

Run the optimization

[4]:
# Define the Agent and the target metric/loss function
from optimpv.general.SuggestOnlyAgent import SuggestOnlyAgent
threshold = [0.5, 50] # thresholds for the metrics
suggest = SuggestOnlyAgent(params,exp_format=['PLQY', 'FWHM'],minimize=[False,True],name=None,threshold=threshold)

[5]:
# Define the optimizer
parameter_constraints =[f'Cs_fraction + Fa_fraction <= 1']

optimizer = PymooOptimizer(params=params, agents=suggest, algorithm='NSGA2', pop_size=6, n_gen=1, name='pymoo_single_obj', verbose_logging=True,max_parallelism=20,existing_data=df, suggest_only=True, parameter_constraints=parameter_constraints)
[6]:
to_run_next = optimizer.optimize() # run the optimization with pymoo

[INFO 08-13 17:14:15] optimpv.pymooOptimizer: Starting optimization using NSGA2 algorithm
[INFO 08-13 17:14:15] optimpv.pymooOptimizer: Population size: 6, Generations: 1
[INFO 08-13 17:14:15] optimpv.pymooOptimizer: Using existing population of size 128
[INFO 08-13 17:14:15] optimpv.pymooOptimizer: Suggesting new points without running agents
[INFO 08-13 17:14:15] optimpv.pymooOptimizer: Attempt 2: Found 6 valid candidates out of 6 requested
[7]:
# get the best parameters and update the params list in the optimizer and the agent
optimizer.update_params_with_best_balance() # update the params list in the optimizer with the best parameters
suggest.params = optimizer.params # update the params list in the agent with the best parameters

print("Best parameters found:")
for p in optimizer.params:
    print(f"{p.name}: {p.value} {p.unit} ")
Best parameters found:
Cs_fraction: 0.0
Fa_fraction: 0.2
Spin_duration_Antisolvent: 6.0 s
Spin_duration_High_Speed: 41.0 s
Spin_speed: 4500.0 rpm
[8]:
# Plot optimization results
data = optimizer.get_df_from_pymoo()
data = optimizer.rescale_dataframe(data, params)
all_metrics = optimizer.all_metrics
plt.figure()
for i, metric in enumerate(all_metrics):
    if suggest.minimize[i]:
       plt.plot(np.minimum.accumulate(data[metric]), label=metric)
    else:
       plt.plot(np.maximum.accumulate(data[metric]), label=metric)

plt.yscale("log")
plt.xlabel("Iteration")
plt.ylabel("Target")
plt.legend()
plt.title("Best value seen so far")

plt.show()
../_images/examples_Exp_design_MOO_pymoo_11_0.png
[9]:
# print the parameters to that are running in data
print("Parameters to run next:")
print(to_run_next)
Parameters to run next:
   Cs_fraction  Fa_fraction  Spin_duration_Antisolvent  \
0     0.462828     0.120416                   5.915060
1     0.386477     0.520864                  15.533100
2     0.077489     0.009327                  23.767857
3     0.517932     0.216770                   8.153620
4     0.500223     0.472368                  21.286282
5     0.607216     0.092475                  25.286421

   Spin_duration_High_Speed   Spin_speed
0                 28.413915  1316.999281
1                 45.368768  3840.053262
2                 50.286409  4119.618779
3                 26.813496  1807.968858
4                 49.173313  4226.851066
5                 32.843114  4178.097473
[10]:
# Plot the density of the exploration of the parameters
# this gives a nice visualization of where the optimizer focused its exploration and may show some correlation between the parameters
plot_dens = True
if plot_dens:
    from optimpv.posterior.posterior import *
    params_orig_dict, best_parameters = {}, {}
    for p in optimizer.params:
        best_parameters[p.name] = p.value

    fig_dens, ax_dens = plot_density_exploration(params, optimizer = optimizer, best_parameters = best_parameters,  optimizer_type = 'pymoo')

../_images/examples_Exp_design_MOO_pymoo_13_0.png