Design of Experiment: Optimize perovskite solar cells efficiency
This notebook is made to use optimPV for experimental design. Here, we show how to load some data from a presampling, and how to use optimPV to suggest the next set of experiment using Bayesian optimization. The goal here is to optimize the processing conditions for a perovskite solar cell to maximize the power conversion efficiency (PCE).
Note: The data used here is real data generated in the i-MEET and HI-ERN labs at the university of Erlangen-Nuremberg (FAU) by Jiyun Zhang for the paper: Autonomous Optimization of Air-Processed Perovskite Solar Cell in a 6D Parameter Space
[1]:
# Import necessary libraries
import warnings, os, sys, shutil
# remove warnings from the output
os.environ["PYTHONWARNINGS"] = "ignore"
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import default_rng
import torch, copy, uuid
import pySIMsalabim as sim
from pySIMsalabim.experiments.JV_steady_state import *
import ax, logging
from ax.utils.notebook.plotting import init_notebook_plotting, render
init_notebook_plotting() # for Jupyter notebooks
try:
from optimpv import *
from optimpv.pymooOpti.pymooOptimizer import PymooOptimizer
except Exception as e:
sys.path.append('../') # add the path to the optimpv module
from optimpv import *
from optimpv.pymooOpti.pymooOptimizer import PymooOptimizer
[INFO 08-13 17:17:27] ax.utils.notebook.plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.
[INFO 08-13 17:17:27] ax.utils.notebook.plotting: Please see
(https://ax.dev/tutorials/visualizations.html#Fix-for-plots-that-are-not-rendering)
if visualizations are not rendering.
Get the data
[2]:
# Define the path to the data
data_dir =os.path.join(os.path.abspath('../'),'Data','6D_pero_opti') # path to the data directory
# Load the data
df = pd.read_csv(os.path.join(data_dir,'6D_pero_opti.csv'),sep=r'\s+') # load the data
# Display some information about the data
print(df.describe())
Spin_Speed_1 Duration_t1 Spin_Speed_2 Dispense_Speed Duration_t3 \
count 76.000000 76.000000 76.000000 76.000000 76.000000
mean 1520.486842 18.276316 2273.289474 234.697368 17.315789
std 458.510240 7.000188 501.488845 83.133470 7.919950
min 540.000000 5.000000 1021.000000 16.000000 5.000000
25% 1203.500000 13.750000 2019.500000 201.750000 10.750000
50% 1511.000000 18.000000 2385.500000 247.500000 17.000000
75% 1801.250000 22.000000 2671.000000 277.500000 23.250000
max 2579.000000 34.000000 3000.000000 396.000000 35.000000
Spin_Speed_3 Jsc Voc FF Pmax Vmpp \
count 76.000000 76.000000 76.000000 76.000000 76.000000 76.000000
mean 3717.789474 24.391395 1.021157 0.724692 18.320711 0.825778
std 917.207346 1.872929 0.108197 0.055290 3.486546 0.105413
min 2000.000000 12.231000 0.616300 0.448800 4.690000 0.463000
25% 3002.000000 24.173500 0.987650 0.704400 17.477000 0.785750
50% 3825.000000 24.615000 1.038750 0.734700 18.851500 0.835000
75% 4471.500000 25.279500 1.100725 0.759925 20.347250 0.895000
max 5000.000000 25.938000 1.162200 0.810900 23.729000 0.990100
Rseries Rshunt
count 76.000000 76.000000
mean 3720.112882 2763.793289
std 1487.516713 1956.263177
min 92.129000 2.990000
25% 3146.452500 228.750000
50% 3739.157000 3290.000000
75% 4567.406250 4285.000000
max 6513.151000 6860.000000
Define the parameters for the simulation
[3]:
params = [] # list of parameters to be optimized
Spin_Speed_1 = FitParam(name = 'Spin_Speed_1', value = 1000, bounds = [500,3000], value_type = 'float', display_name='Spin Speed 1', unit='rpm', axis_type = 'linear')
params.append(Spin_Speed_1)
Duration_t1 = FitParam(name = 'Duration_t1', value = 10, bounds = [5,35], value_type = 'float', display_name='Duration t1', unit='s', axis_type = 'linear')
params.append(Duration_t1)
Spin_Speed_2 = FitParam(name = 'Spin_Speed_2', value = 1000, bounds = [1000,3000], value_type = 'float', display_name='Spin Speed 2', unit='rpm', axis_type = 'linear')
params.append(Spin_Speed_2)
Dispense_Speed = FitParam(name = 'Dispense_Speed', value = 100, bounds = [10,400], value_type = 'float', display_name='Dispense Speed', unit='rpm', axis_type = 'linear')
params.append(Dispense_Speed)
Duration_t3 = FitParam(name = 'Duration_t3', value = 10, bounds = [5,35], value_type = 'float', display_name='Duration t3', unit='s', axis_type = 'linear')
params.append(Duration_t3)
Spin_Speed_3 = FitParam(name = 'Spin_Speed_3', value = 3000, bounds = [2000,5000], value_type = 'float', display_name='Spin Speed 3', unit='rpm', axis_type = 'linear')
params.append(Spin_Speed_3)
Run the optimization
[4]:
# Define the Agent and the target metric/loss function
from optimpv.general.SuggestOnlyAgent import SuggestOnlyAgent
suggest = SuggestOnlyAgent(params,exp_format='Pmax',minimize=False,tracking_exp_format=['Jsc','Voc','FF'],name=None)
[5]:
# Define the optimizer
optimizer = PymooOptimizer(params=params, agents=suggest, algorithm='GA', pop_size=6, n_gen=1, name='pymoo_single_obj', verbose_logging=True,max_parallelism=20,existing_data=df, suggest_only=True)
[6]:
to_run_next = optimizer.optimize() # run the optimization with pymoo
print(to_run_next)
[INFO 08-13 17:17:28] optimpv.pymooOptimizer: Starting optimization using GA algorithm
[INFO 08-13 17:17:28] optimpv.pymooOptimizer: Population size: 6, Generations: 1
[INFO 08-13 17:17:28] optimpv.pymooOptimizer: Using existing population of size 76
[INFO 08-13 17:17:28] optimpv.pymooOptimizer: Suggesting new points without running agents
Spin_Speed_1 Duration_t1 Spin_Speed_2 Dispense_Speed Duration_t3 \
0 1927.425837 33.842799 2078.829767 102.564890 27.168043
1 1936.434155 10.021594 2719.939593 76.719177 26.926563
2 2098.990526 26.983773 1960.701115 278.404493 9.275085
3 2483.290223 7.124224 2020.796262 97.833661 32.554338
4 2299.473176 18.599020 1841.925563 141.728898 11.595574
5 1645.500855 16.122172 1998.086794 316.325267 33.340630
Spin_Speed_3
0 3555.774222
1 4478.346843
2 3675.656867
3 4326.728438
4 4608.387972
5 2073.586546
[7]:
# get the best parameters and update the params list in the optimizer and the agent
optimizer.update_params_with_best_balance() # update the params list in the optimizer with the best parameters
suggest.params = optimizer.params # update the params list in the agent with the best parameters
print("Best parameters found:")
for p in optimizer.params:
print(f"{p.name}: {p.value} {p.unit} ")
Best parameters found:
Spin_Speed_1: 1165 rpm
Duration_t1: 23 s
Spin_Speed_2: 2063 rpm
Dispense_Speed: 241 rpm
Duration_t3: 32 s
Spin_Speed_3: 2863 rpm
[8]:
# Plot optimization results
data = optimizer.get_df_from_pymoo()
all_metrics = optimizer.all_metrics
all_minimize = optimizer.all_minimize
plt.figure()
for i, metric in enumerate(all_metrics):
if all_minimize[i]:
plt.plot(np.minimum.accumulate(data[metric]), label=metric, )
else:
plt.plot(np.maximum.accumulate(data[metric]), label=metric, )
plt.yscale("log")
plt.xlabel("Iteration")
plt.ylabel("Target")
plt.legend()
plt.title("Best value seen so far")
print("Best value seen so far is ", max(data[all_metrics[0]]), "at iteration ", int(data[all_metrics[0]].idxmin()))
plt.show()
Best value seen so far is 23.729 at iteration 27

[9]:
# Plot the density of the exploration of the parameters
# this gives a nice visualization of where the optimizer focused its exploration and may show some correlation between the parameters
plot_dens = True
if plot_dens:
from optimpv.posterior.posterior import *
params_orig_dict, best_parameters = {}, {}
for p in optimizer.params:
best_parameters[p.name] = p.value
fig_dens, ax_dens = plot_density_exploration(params, optimizer = optimizer, best_parameters = best_parameters, optimizer_type = 'pymoo')
