In [1]:
Copied!
# ruff: noqa: E402, F601, E741
# ruff: noqa: E402, F601, E741
Device Architecture Evolution in Perovskite Solar Cells
This notebook analyzes the temporal evolution of device architectures and functional layer materials in perovskite solar cells using data from the Perovskite Database in NOMAD.
In [2]:
Copied!
from plotly_theme import register_template, set_defaults
register_template()
set_defaults()
from plotly_theme import register_template, set_defaults
register_template()
set_defaults()
Setup and Data Loading¶
In [ ]:
Copied!
# Load the data from the parquet file into a DataFrame
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.patches import PathPatch
from matplotlib.path import Path
from scipy import stats
# Configure matplotlib to match plotly theme
plt.rcParams.update(
{
'font.family': 'Arial',
'font.size': 10,
'axes.labelsize': 11,
'axes.titlesize': 12,
'xtick.labelsize': 10,
'ytick.labelsize': 10,
'legend.fontsize': 10,
'figure.titlesize': 12,
'axes.linewidth': 1,
'axes.edgecolor': 'black',
'axes.facecolor': 'white',
'figure.facecolor': 'white',
'grid.color': 'lightgray',
'grid.linewidth': 0.5,
'axes.grid': False,
'axes.spines.top': True,
'axes.spines.right': True,
'savefig.dpi': 300,
'savefig.bbox': 'tight',
'savefig.facecolor': 'white',
}
)
df = pd.read_parquet('perovskite_solar_cell_database.parquet')
# Load the data from the parquet file into a DataFrame
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.patches import PathPatch
from matplotlib.path import Path
from scipy import stats
# Configure matplotlib to match plotly theme
plt.rcParams.update(
{
'font.family': 'Arial',
'font.size': 10,
'axes.labelsize': 11,
'axes.titlesize': 12,
'xtick.labelsize': 10,
'ytick.labelsize': 10,
'legend.fontsize': 10,
'figure.titlesize': 12,
'axes.linewidth': 1,
'axes.edgecolor': 'black',
'axes.facecolor': 'white',
'figure.facecolor': 'white',
'grid.color': 'lightgray',
'grid.linewidth': 0.5,
'axes.grid': False,
'axes.spines.top': True,
'axes.spines.right': True,
'savefig.dpi': 300,
'savefig.bbox': 'tight',
'savefig.facecolor': 'white',
}
)
df = pd.read_parquet('perovskite_solar_cell_database.parquet')
In [ ]:
Copied!
# Set the source_database column based on who entered the data
df['source_database'] = df['data.ref.name_of_person_entering_the_data'].apply(
lambda x: 'LLM Extracted' if x == 'LLM Extraction' else 'Manual Entry'
)
# Set the source_database column based on who entered the data
df['source_database'] = df['data.ref.name_of_person_entering_the_data'].apply(
lambda x: 'LLM Extracted' if x == 'LLM Extraction' else 'Manual Entry'
)
In [5]:
Copied!
df['pub_date'] = pd.to_datetime(df['data.ref.publication_date'], errors='coerce')
df['pub_year'] = df['pub_date'].dt.year
df['ETL'] = df['results.properties.optoelectronic.solar_cell.electron_transport_layer']
df['HTL'] = df['results.properties.optoelectronic.solar_cell.hole_transport_layer']
df['absorber'] = df['results.properties.optoelectronic.solar_cell.absorber']
df['PCE'] = df['results.properties.optoelectronic.solar_cell.efficiency']
df['architecture'] = df['data.cell.architecture']
df_early = df[(df['pub_year'] < 2022)]
df_late = df[(df['pub_year'] >= 2022)]
df['pub_date'] = pd.to_datetime(df['data.ref.publication_date'], errors='coerce')
df['pub_year'] = df['pub_date'].dt.year
df['ETL'] = df['results.properties.optoelectronic.solar_cell.electron_transport_layer']
df['HTL'] = df['results.properties.optoelectronic.solar_cell.hole_transport_layer']
df['absorber'] = df['results.properties.optoelectronic.solar_cell.absorber']
df['PCE'] = df['results.properties.optoelectronic.solar_cell.efficiency']
df['architecture'] = df['data.cell.architecture']
df_early = df[(df['pub_year'] < 2022)]
df_late = df[(df['pub_year'] >= 2022)]
In [6]:
Copied!
print(f'Early period entries: {len(df_early)}')
print(f'Late period entries: {len(df_late)}')
print(f'Early period entries: {len(df_early)}')
print(f'Late period entries: {len(df_late)}')
Early period entries: 43701 Late period entries: 4672
Data Preparation and Filtering¶
In [7]:
Copied!
arch_data = (
df[df['architecture'].isin(['nip', 'pin'])]
.groupby(['pub_year', 'architecture'])
.size()
.unstack(fill_value=0)
)
arch_data = (
df[df['architecture'].isin(['nip', 'pin'])]
.groupby(['pub_year', 'architecture'])
.size()
.unstack(fill_value=0)
)
In [8]:
Copied!
arch_data
arch_data
Out[8]:
| architecture | nip | pin |
|---|---|---|
| pub_year | ||
| 2009.0 | 2 | 0 |
| 2011.0 | 6 | 0 |
| 2012.0 | 18 | 0 |
| 2013.0 | 215 | 11 |
| 2014.0 | 1112 | 431 |
| 2015.0 | 2563 | 968 |
| 2016.0 | 4312 | 1899 |
| 2017.0 | 5719 | 2301 |
| 2018.0 | 7284 | 3312 |
| 2019.0 | 7408 | 3218 |
| 2020.0 | 1243 | 625 |
| 2021.0 | 566 | 226 |
| 2022.0 | 592 | 445 |
| 2023.0 | 873 | 394 |
| 2024.0 | 1031 | 534 |
| 2025.0 | 367 | 286 |
In [9]:
Copied!
arch_pct = arch_data.div(arch_data.sum(axis=1), axis=0) * 100
pin_pct = arch_pct['pin'].to_dict()
arch_pct = arch_data.div(arch_data.sum(axis=1), axis=0) * 100
pin_pct = arch_pct['pin'].to_dict()
In [20]:
Copied!
def get_three_layer_flows(df_subset):
abs_htl_flows = {}
htl_etl_flows = {}
for _, row in df_subset.iterrows():
abs_arr = row['absorber']
htl_arr = row['HTL']
etl_arr = row['ETL']
if abs_arr is None or htl_arr is None or etl_arr is None:
continue
if (
not hasattr(abs_arr, '__iter__')
or not hasattr(htl_arr, '__iter__')
or not hasattr(etl_arr, '__iter__')
):
continue
# Simplify absorber
for absorber in abs_arr:
if not absorber or absorber == 'Unknown':
continue
if absorber == 'MAPbI':
abs_simple = 'MAPbI'
elif 'CsFA' in absorber or 'FAMA' in absorber:
abs_simple = 'Mixed'
elif absorber == 'FAPbI':
abs_simple = 'FAPbI'
else:
abs_simple = 'Other'
for htl in htl_arr:
if not htl or htl in {'none', 'Unknown'}:
continue
if 'Spiro' in htl:
htl_simple = 'Spiro'
elif 'PEDOT' in htl:
htl_simple = 'PEDOT:PSS'
elif 'NiO' in htl:
htl_simple = 'NiOx'
elif 'PTAA' in htl:
htl_simple = 'PTAA'
elif 'PACz' in htl or 'SAM' in htl:
htl_simple = 'SAMs'
else:
htl_simple = 'Other'
abs_htl_flows[(abs_simple, htl_simple)] = (
abs_htl_flows.get((abs_simple, htl_simple), 0) + 1
)
for etl in etl_arr:
if not etl or etl in {'none', 'Unknown'}:
continue
if 'TiO2' in etl:
etl_simple = 'TiO2'
elif 'SnO2' in etl:
etl_simple = 'SnO2'
elif 'PCBM' in etl or 'PC61BM' in etl:
etl_simple = 'PCBM'
elif etl == 'C60':
etl_simple = 'C60'
else:
etl_simple = 'Other'
htl_etl_flows[(htl_simple, etl_simple)] = (
htl_etl_flows.get((htl_simple, etl_simple), 0) + 1
)
return abs_htl_flows, htl_etl_flows
pre_abs_htl_pin, pre_htl_etl_pin = get_three_layer_flows(
df_early[df_early['architecture'].isin(['pin'])]
)
post_abs_htl_pin, post_htl_etl_pin = get_three_layer_flows(
df_late[df_late['architecture'].isin(['pin'])]
)
pre_abs_htl_nip, pre_htl_etl_nip = get_three_layer_flows(
df_early[df_early['architecture'].isin(['nip'])]
)
post_abs_htl_nip, post_htl_etl_nip = get_three_layer_flows(
df_late[df_late['architecture'].isin(['nip'])]
)
def get_three_layer_flows(df_subset):
abs_htl_flows = {}
htl_etl_flows = {}
for _, row in df_subset.iterrows():
abs_arr = row['absorber']
htl_arr = row['HTL']
etl_arr = row['ETL']
if abs_arr is None or htl_arr is None or etl_arr is None:
continue
if (
not hasattr(abs_arr, '__iter__')
or not hasattr(htl_arr, '__iter__')
or not hasattr(etl_arr, '__iter__')
):
continue
# Simplify absorber
for absorber in abs_arr:
if not absorber or absorber == 'Unknown':
continue
if absorber == 'MAPbI':
abs_simple = 'MAPbI'
elif 'CsFA' in absorber or 'FAMA' in absorber:
abs_simple = 'Mixed'
elif absorber == 'FAPbI':
abs_simple = 'FAPbI'
else:
abs_simple = 'Other'
for htl in htl_arr:
if not htl or htl in {'none', 'Unknown'}:
continue
if 'Spiro' in htl:
htl_simple = 'Spiro'
elif 'PEDOT' in htl:
htl_simple = 'PEDOT:PSS'
elif 'NiO' in htl:
htl_simple = 'NiOx'
elif 'PTAA' in htl:
htl_simple = 'PTAA'
elif 'PACz' in htl or 'SAM' in htl:
htl_simple = 'SAMs'
else:
htl_simple = 'Other'
abs_htl_flows[(abs_simple, htl_simple)] = (
abs_htl_flows.get((abs_simple, htl_simple), 0) + 1
)
for etl in etl_arr:
if not etl or etl in {'none', 'Unknown'}:
continue
if 'TiO2' in etl:
etl_simple = 'TiO2'
elif 'SnO2' in etl:
etl_simple = 'SnO2'
elif 'PCBM' in etl or 'PC61BM' in etl:
etl_simple = 'PCBM'
elif etl == 'C60':
etl_simple = 'C60'
else:
etl_simple = 'Other'
htl_etl_flows[(htl_simple, etl_simple)] = (
htl_etl_flows.get((htl_simple, etl_simple), 0) + 1
)
return abs_htl_flows, htl_etl_flows
pre_abs_htl_pin, pre_htl_etl_pin = get_three_layer_flows(
df_early[df_early['architecture'].isin(['pin'])]
)
post_abs_htl_pin, post_htl_etl_pin = get_three_layer_flows(
df_late[df_late['architecture'].isin(['pin'])]
)
pre_abs_htl_nip, pre_htl_etl_nip = get_three_layer_flows(
df_early[df_early['architecture'].isin(['nip'])]
)
post_abs_htl_nip, post_htl_etl_nip = get_three_layer_flows(
df_late[df_late['architecture'].isin(['nip'])]
)
Analysis Functions and Material Flow Calculations¶
In [16]:
Copied!
def get_global_color_mapping(*flow_pairs):
"""Create consistent color mappings across multiple datasets.
Args:
*flow_pairs: Tuples of (abs_htl_flows, htl_etl_flows) for each dataset
Returns:
Tuple of (abs_colors, htl_colors, etl_colors) dictionaries
"""
# Define color palettes matching plotly theme
COLOR_PALETTES = {
'absorber': ['#1f77b4', '#ff0e5a', '#e9c821', '#ba78d6', '#4cd8a5', '#ff9408'],
'htl': ['#ba78d6', '#86d9ea', '#4cd8a5', '#7f7f7f', '#e9c821', '#17becf'],
'etl': ['#1f77b4', '#4cd8a5', '#ff9408', '#ff0e5a', '#ba78d6', '#86d9ea'],
}
# Aggregate counts across all datasets
abs_counts = {}
htl_counts = {}
etl_counts = {}
for abs_htl_flows, htl_etl_flows in flow_pairs:
for (abs, htl), count in abs_htl_flows.items():
abs_counts[abs] = abs_counts.get(abs, 0) + count
htl_counts[htl] = htl_counts.get(htl, 0) + count
for (htl, etl), count in htl_etl_flows.items():
htl_counts[htl] = htl_counts.get(htl, 0) + count
etl_counts[etl] = etl_counts.get(etl, 0) + count
# Sort by total frequency across all datasets
abs_labels = sorted(abs_counts.keys(), key=abs_counts.get, reverse=True)
htl_labels = sorted(htl_counts.keys(), key=htl_counts.get, reverse=True)
etl_labels = sorted(etl_counts.keys(), key=etl_counts.get, reverse=True)
# Create consistent color mappings
abs_colors = {
label: COLOR_PALETTES['absorber'][i % len(COLOR_PALETTES['absorber'])]
for i, label in enumerate(abs_labels)
}
htl_colors = {
label: COLOR_PALETTES['htl'][i % len(COLOR_PALETTES['htl'])]
for i, label in enumerate(htl_labels)
}
etl_colors = {
label: COLOR_PALETTES['etl'][i % len(COLOR_PALETTES['etl'])]
for i, label in enumerate(etl_labels)
}
return abs_colors, htl_colors, etl_colors
def get_global_color_mapping(*flow_pairs):
"""Create consistent color mappings across multiple datasets.
Args:
*flow_pairs: Tuples of (abs_htl_flows, htl_etl_flows) for each dataset
Returns:
Tuple of (abs_colors, htl_colors, etl_colors) dictionaries
"""
# Define color palettes matching plotly theme
COLOR_PALETTES = {
'absorber': ['#1f77b4', '#ff0e5a', '#e9c821', '#ba78d6', '#4cd8a5', '#ff9408'],
'htl': ['#ba78d6', '#86d9ea', '#4cd8a5', '#7f7f7f', '#e9c821', '#17becf'],
'etl': ['#1f77b4', '#4cd8a5', '#ff9408', '#ff0e5a', '#ba78d6', '#86d9ea'],
}
# Aggregate counts across all datasets
abs_counts = {}
htl_counts = {}
etl_counts = {}
for abs_htl_flows, htl_etl_flows in flow_pairs:
for (abs, htl), count in abs_htl_flows.items():
abs_counts[abs] = abs_counts.get(abs, 0) + count
htl_counts[htl] = htl_counts.get(htl, 0) + count
for (htl, etl), count in htl_etl_flows.items():
htl_counts[htl] = htl_counts.get(htl, 0) + count
etl_counts[etl] = etl_counts.get(etl, 0) + count
# Sort by total frequency across all datasets
abs_labels = sorted(abs_counts.keys(), key=abs_counts.get, reverse=True)
htl_labels = sorted(htl_counts.keys(), key=htl_counts.get, reverse=True)
etl_labels = sorted(etl_counts.keys(), key=etl_counts.get, reverse=True)
# Create consistent color mappings
abs_colors = {
label: COLOR_PALETTES['absorber'][i % len(COLOR_PALETTES['absorber'])]
for i, label in enumerate(abs_labels)
}
htl_colors = {
label: COLOR_PALETTES['htl'][i % len(COLOR_PALETTES['htl'])]
for i, label in enumerate(htl_labels)
}
etl_colors = {
label: COLOR_PALETTES['etl'][i % len(COLOR_PALETTES['etl'])]
for i, label in enumerate(etl_labels)
}
return abs_colors, htl_colors, etl_colors
In [ ]:
Copied!
def format_chemical_formula(name):
"""Convert chemical formulas to proper subscript notation using mathtext."""
formula_map = {
'C60': r'C$_{60}$',
'SnO2': r'SnO$_2$',
'TiO2': r'TiO$_2$',
'NiOx': r'NiO$_x$',
'PbI2': r'PbI$_2$',
'ZnO2': r'ZnO$_2$',
'MAPbI': r'MAPbI', # no change needed
'FAPbI': r'FAPbI',
}
return formula_map.get(name, name)
def draw_three_layer_alluvial( # noqa: PLR0913
ax,
abs_htl_flows,
htl_etl_flows,
abs_colors,
htl_colors,
etl_colors,
title='',
panel_label='',
):
"""Draw a 3-layer alluvial diagram: Absorber → HTL → ETL
Uses provided color mappings for consistency across multiple plots.
"""
# Extract unique materials from flows and sort by frequency
abs_counts = {}
htl_counts = {}
etl_counts = {}
for (abs, htl), count in abs_htl_flows.items():
abs_counts[abs] = abs_counts.get(abs, 0) + count
htl_counts[htl] = htl_counts.get(htl, 0) + count
for (htl, etl), count in htl_etl_flows.items():
htl_counts[htl] = htl_counts.get(htl, 0) + count
etl_counts[etl] = etl_counts.get(etl, 0) + count
# Sort materials by count (most common first) within this dataset
abs_labels = sorted(abs_counts.keys(), key=abs_counts.get, reverse=True)
htl_labels = sorted(htl_counts.keys(), key=htl_counts.get, reverse=True)
etl_labels = sorted(etl_counts.keys(), key=etl_counts.get, reverse=True)
# Calculate totals for normalization
total_abs_htl = sum(abs_htl_flows.values())
total_htl_etl = sum(htl_etl_flows.values())
# Calculate heights for each layer
abs_heights = {label: abs_counts[label] / total_abs_htl for label in abs_labels}
htl_heights_left = {}
htl_heights_right = {}
for label in htl_labels:
htl_heights_left[label] = (
sum(v for (a, h), v in abs_htl_flows.items() if h == label) / total_abs_htl
)
htl_heights_right[label] = (
sum(v for (h, e), v in htl_etl_flows.items() if h == label) / total_htl_etl
)
etl_heights = {label: etl_counts[label] / total_htl_etl for label in etl_labels}
# Positions: x = 0 (Absorber), 0.4 (HTL), 0.8 (ETL)
x_abs, x_htl, x_etl = 0, 0.4, 0.8
bar_width = 0.06
# Draw Absorber nodes
y_pos = 0
abs_positions = {}
for label in abs_labels:
height = abs_heights[label]
if height > 0.01:
rect = plt.Rectangle(
(x_abs, y_pos),
bar_width,
height,
facecolor=abs_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
abs_positions[label] = (y_pos, y_pos + height)
if height > 0.03:
ax.text(
x_abs - 0.02,
y_pos + height / 2,
format_chemical_formula(label),
ha='right',
va='center',
fontsize=7,
fontweight='normal',
)
y_pos += height + 0.008
# Draw HTL nodes (use average of left and right heights)
y_pos = 0
htl_positions = {}
for label in htl_labels:
height = (htl_heights_left.get(label, 0) + htl_heights_right.get(label, 0)) / 2
if height > 0.01:
rect = plt.Rectangle(
(x_htl, y_pos),
bar_width,
height,
facecolor=htl_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
htl_positions[label] = (y_pos, y_pos + height)
if height > 0.04:
ax.text(
x_htl + bar_width / 2,
y_pos + height / 2,
format_chemical_formula(label),
ha='center',
va='center',
fontsize=7,
fontweight='normal',
rotation=90,
)
y_pos += height + 0.008
# Draw ETL nodes
y_pos = 0
etl_positions = {}
for label in etl_labels:
height = etl_heights[label]
if height > 0.01:
rect = plt.Rectangle(
(x_etl, y_pos),
bar_width,
height,
facecolor=etl_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
etl_positions[label] = (y_pos, y_pos + height)
if height > 0.03:
ax.text(
x_etl + bar_width + 0.02,
y_pos + height / 2,
format_chemical_formula(label),
ha='left',
va='center',
fontsize=7,
fontweight='normal',
)
y_pos += height + 0.008
# Draw flows: Absorber → HTL
abs_cursors = {l: abs_positions[l][0] for l in abs_positions}
htl_cursors_left = {l: htl_positions[l][0] for l in htl_positions}
for (abs_label, htl_label), value in sorted(
abs_htl_flows.items(), key=lambda x: -x[1]
):
if abs_label not in abs_positions or htl_label not in htl_positions:
continue
height = value / total_abs_htl
if height < 0.005:
continue
y_abs = abs_cursors[abs_label]
y_htl = htl_cursors_left[htl_label]
verts = [
(x_abs + bar_width, y_abs),
(x_abs + bar_width + 0.1, y_abs),
(x_htl - 0.1, y_htl),
(x_htl, y_htl),
(x_htl, y_htl + height),
(x_htl - 0.1, y_htl + height),
(x_abs + bar_width + 0.1, y_abs + height),
(x_abs + bar_width, y_abs + height),
(x_abs + bar_width, y_abs),
]
codes = [
Path.MOVETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.LINETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.CLOSEPOLY,
]
path = Path(verts, codes)
patch = PathPatch(
path, facecolor=abs_colors[abs_label], alpha=0.4, edgecolor='none'
)
ax.add_patch(patch)
abs_cursors[abs_label] += height
htl_cursors_left[htl_label] += height
# Draw flows: HTL → ETL
htl_cursors_right = {l: htl_positions[l][0] for l in htl_positions}
etl_cursors = {l: etl_positions[l][0] for l in etl_positions}
for (htl_label, etl_label), value in sorted(
htl_etl_flows.items(), key=lambda x: -x[1]
):
if htl_label not in htl_positions or etl_label not in etl_positions:
continue
height = value / total_htl_etl
if height < 0.005:
continue
y_htl = htl_cursors_right[htl_label]
y_etl = etl_cursors[etl_label]
verts = [
(x_htl + bar_width, y_htl),
(x_htl + bar_width + 0.1, y_htl),
(x_etl - 0.1, y_etl),
(x_etl, y_etl),
(x_etl, y_etl + height),
(x_etl - 0.1, y_etl + height),
(x_htl + bar_width + 0.1, y_htl + height),
(x_htl + bar_width, y_htl + height),
(x_htl + bar_width, y_htl),
]
codes = [
Path.MOVETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.LINETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.CLOSEPOLY,
]
path = Path(verts, codes)
patch = PathPatch(
path, facecolor=htl_colors[htl_label], alpha=0.4, edgecolor='none'
)
ax.add_patch(patch)
htl_cursors_right[htl_label] += height
etl_cursors[etl_label] += height
# Add layer labels
ax.text(x_abs + bar_width / 2, -0.02, 'Absorber', ha='center', va='top', fontsize=8)
ax.text(x_htl + bar_width / 2, -0.02, 'HTL', ha='center', va='top', fontsize=8)
ax.text(x_etl + bar_width / 2, -0.02, 'ETL', ha='center', va='top', fontsize=8)
# Add panel label (a, b, etc.)
if panel_label:
ax.text(
-0.15,
1.02,
panel_label,
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax.transAxes,
)
# Add title
if title:
ax.text(
0.5,
1.02,
title,
fontsize=10,
fontweight='bold',
ha='center',
va='bottom',
transform=ax.transAxes,
)
ax.set_xlim(-0.15, 1.0)
ax.set_ylim(-0.12, 1.05)
ax.axis('off')
def format_chemical_formula(name):
"""Convert chemical formulas to proper subscript notation using mathtext."""
formula_map = {
'C60': r'C$_{60}$',
'SnO2': r'SnO$_2$',
'TiO2': r'TiO$_2$',
'NiOx': r'NiO$_x$',
'PbI2': r'PbI$_2$',
'ZnO2': r'ZnO$_2$',
'MAPbI': r'MAPbI', # no change needed
'FAPbI': r'FAPbI',
}
return formula_map.get(name, name)
def draw_three_layer_alluvial( # noqa: PLR0913
ax,
abs_htl_flows,
htl_etl_flows,
abs_colors,
htl_colors,
etl_colors,
title='',
panel_label='',
):
"""Draw a 3-layer alluvial diagram: Absorber → HTL → ETL
Uses provided color mappings for consistency across multiple plots.
"""
# Extract unique materials from flows and sort by frequency
abs_counts = {}
htl_counts = {}
etl_counts = {}
for (abs, htl), count in abs_htl_flows.items():
abs_counts[abs] = abs_counts.get(abs, 0) + count
htl_counts[htl] = htl_counts.get(htl, 0) + count
for (htl, etl), count in htl_etl_flows.items():
htl_counts[htl] = htl_counts.get(htl, 0) + count
etl_counts[etl] = etl_counts.get(etl, 0) + count
# Sort materials by count (most common first) within this dataset
abs_labels = sorted(abs_counts.keys(), key=abs_counts.get, reverse=True)
htl_labels = sorted(htl_counts.keys(), key=htl_counts.get, reverse=True)
etl_labels = sorted(etl_counts.keys(), key=etl_counts.get, reverse=True)
# Calculate totals for normalization
total_abs_htl = sum(abs_htl_flows.values())
total_htl_etl = sum(htl_etl_flows.values())
# Calculate heights for each layer
abs_heights = {label: abs_counts[label] / total_abs_htl for label in abs_labels}
htl_heights_left = {}
htl_heights_right = {}
for label in htl_labels:
htl_heights_left[label] = (
sum(v for (a, h), v in abs_htl_flows.items() if h == label) / total_abs_htl
)
htl_heights_right[label] = (
sum(v for (h, e), v in htl_etl_flows.items() if h == label) / total_htl_etl
)
etl_heights = {label: etl_counts[label] / total_htl_etl for label in etl_labels}
# Positions: x = 0 (Absorber), 0.4 (HTL), 0.8 (ETL)
x_abs, x_htl, x_etl = 0, 0.4, 0.8
bar_width = 0.06
# Draw Absorber nodes
y_pos = 0
abs_positions = {}
for label in abs_labels:
height = abs_heights[label]
if height > 0.01:
rect = plt.Rectangle(
(x_abs, y_pos),
bar_width,
height,
facecolor=abs_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
abs_positions[label] = (y_pos, y_pos + height)
if height > 0.03:
ax.text(
x_abs - 0.02,
y_pos + height / 2,
format_chemical_formula(label),
ha='right',
va='center',
fontsize=7,
fontweight='normal',
)
y_pos += height + 0.008
# Draw HTL nodes (use average of left and right heights)
y_pos = 0
htl_positions = {}
for label in htl_labels:
height = (htl_heights_left.get(label, 0) + htl_heights_right.get(label, 0)) / 2
if height > 0.01:
rect = plt.Rectangle(
(x_htl, y_pos),
bar_width,
height,
facecolor=htl_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
htl_positions[label] = (y_pos, y_pos + height)
if height > 0.04:
ax.text(
x_htl + bar_width / 2,
y_pos + height / 2,
format_chemical_formula(label),
ha='center',
va='center',
fontsize=7,
fontweight='normal',
rotation=90,
)
y_pos += height + 0.008
# Draw ETL nodes
y_pos = 0
etl_positions = {}
for label in etl_labels:
height = etl_heights[label]
if height > 0.01:
rect = plt.Rectangle(
(x_etl, y_pos),
bar_width,
height,
facecolor=etl_colors[label],
edgecolor='white',
linewidth=1,
)
ax.add_patch(rect)
etl_positions[label] = (y_pos, y_pos + height)
if height > 0.03:
ax.text(
x_etl + bar_width + 0.02,
y_pos + height / 2,
format_chemical_formula(label),
ha='left',
va='center',
fontsize=7,
fontweight='normal',
)
y_pos += height + 0.008
# Draw flows: Absorber → HTL
abs_cursors = {l: abs_positions[l][0] for l in abs_positions}
htl_cursors_left = {l: htl_positions[l][0] for l in htl_positions}
for (abs_label, htl_label), value in sorted(
abs_htl_flows.items(), key=lambda x: -x[1]
):
if abs_label not in abs_positions or htl_label not in htl_positions:
continue
height = value / total_abs_htl
if height < 0.005:
continue
y_abs = abs_cursors[abs_label]
y_htl = htl_cursors_left[htl_label]
verts = [
(x_abs + bar_width, y_abs),
(x_abs + bar_width + 0.1, y_abs),
(x_htl - 0.1, y_htl),
(x_htl, y_htl),
(x_htl, y_htl + height),
(x_htl - 0.1, y_htl + height),
(x_abs + bar_width + 0.1, y_abs + height),
(x_abs + bar_width, y_abs + height),
(x_abs + bar_width, y_abs),
]
codes = [
Path.MOVETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.LINETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.CLOSEPOLY,
]
path = Path(verts, codes)
patch = PathPatch(
path, facecolor=abs_colors[abs_label], alpha=0.4, edgecolor='none'
)
ax.add_patch(patch)
abs_cursors[abs_label] += height
htl_cursors_left[htl_label] += height
# Draw flows: HTL → ETL
htl_cursors_right = {l: htl_positions[l][0] for l in htl_positions}
etl_cursors = {l: etl_positions[l][0] for l in etl_positions}
for (htl_label, etl_label), value in sorted(
htl_etl_flows.items(), key=lambda x: -x[1]
):
if htl_label not in htl_positions or etl_label not in etl_positions:
continue
height = value / total_htl_etl
if height < 0.005:
continue
y_htl = htl_cursors_right[htl_label]
y_etl = etl_cursors[etl_label]
verts = [
(x_htl + bar_width, y_htl),
(x_htl + bar_width + 0.1, y_htl),
(x_etl - 0.1, y_etl),
(x_etl, y_etl),
(x_etl, y_etl + height),
(x_etl - 0.1, y_etl + height),
(x_htl + bar_width + 0.1, y_htl + height),
(x_htl + bar_width, y_htl + height),
(x_htl + bar_width, y_htl),
]
codes = [
Path.MOVETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.LINETO,
Path.CURVE4,
Path.CURVE4,
Path.CURVE4,
Path.CLOSEPOLY,
]
path = Path(verts, codes)
patch = PathPatch(
path, facecolor=htl_colors[htl_label], alpha=0.4, edgecolor='none'
)
ax.add_patch(patch)
htl_cursors_right[htl_label] += height
etl_cursors[etl_label] += height
# Add layer labels
ax.text(x_abs + bar_width / 2, -0.02, 'Absorber', ha='center', va='top', fontsize=8)
ax.text(x_htl + bar_width / 2, -0.02, 'HTL', ha='center', va='top', fontsize=8)
ax.text(x_etl + bar_width / 2, -0.02, 'ETL', ha='center', va='top', fontsize=8)
# Add panel label (a, b, etc.)
if panel_label:
ax.text(
-0.15,
1.02,
panel_label,
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax.transAxes,
)
# Add title
if title:
ax.text(
0.5,
1.02,
title,
fontsize=10,
fontweight='bold',
ha='center',
va='bottom',
transform=ax.transAxes,
)
ax.set_xlim(-0.15, 1.0)
ax.set_ylim(-0.12, 1.05)
ax.axis('off')
In [34]:
Copied!
def get_architecture_evolution(df):
"""Get PIN vs NIP percentages by year."""
arch_by_year = (
df[df['architecture'].isin(['nip', 'pin'])]
.groupby(['pub_year', 'architecture'])
.size()
.unstack(fill_value=0)
)
arch_pct = arch_by_year.div(arch_by_year.sum(axis=1), axis=0) * 100
return arch_pct
def get_etl_evolution(df):
"""Get TiO2 and SnO2 percentages by year."""
etl_by_year = {}
for year in sorted(df['pub_year'].dropna().unique()):
df_year = df[df['pub_year'] == year]
all_etls = []
for _, row in df_year.iterrows():
etl_arr = row['ETL']
if etl_arr is None or not hasattr(etl_arr, '__iter__'):
continue
for etl in etl_arr:
if etl and etl not in {'none', 'Unknown'}:
all_etls.append(etl)
if len(all_etls) == 0:
continue
# Count TiO2 and SnO2
tio2_count = sum(1 for e in all_etls if 'TiO2' in e)
sno2_count = sum(1 for e in all_etls if 'SnO2' in e)
total = len(all_etls)
etl_by_year[year] = {
'TiO2': tio2_count / total * 100,
'SnO2': sno2_count / total * 100,
}
return pd.DataFrame(etl_by_year).T
# Get evolution data
arch_evolution = get_architecture_evolution(df)
etl_evolution = get_etl_evolution(df)
print('Architecture evolution:')
print(arch_evolution)
print('\nETL evolution:')
print(etl_evolution)
def get_architecture_evolution(df):
"""Get PIN vs NIP percentages by year."""
arch_by_year = (
df[df['architecture'].isin(['nip', 'pin'])]
.groupby(['pub_year', 'architecture'])
.size()
.unstack(fill_value=0)
)
arch_pct = arch_by_year.div(arch_by_year.sum(axis=1), axis=0) * 100
return arch_pct
def get_etl_evolution(df):
"""Get TiO2 and SnO2 percentages by year."""
etl_by_year = {}
for year in sorted(df['pub_year'].dropna().unique()):
df_year = df[df['pub_year'] == year]
all_etls = []
for _, row in df_year.iterrows():
etl_arr = row['ETL']
if etl_arr is None or not hasattr(etl_arr, '__iter__'):
continue
for etl in etl_arr:
if etl and etl not in {'none', 'Unknown'}:
all_etls.append(etl)
if len(all_etls) == 0:
continue
# Count TiO2 and SnO2
tio2_count = sum(1 for e in all_etls if 'TiO2' in e)
sno2_count = sum(1 for e in all_etls if 'SnO2' in e)
total = len(all_etls)
etl_by_year[year] = {
'TiO2': tio2_count / total * 100,
'SnO2': sno2_count / total * 100,
}
return pd.DataFrame(etl_by_year).T
# Get evolution data
arch_evolution = get_architecture_evolution(df)
etl_evolution = get_etl_evolution(df)
print('Architecture evolution:')
print(arch_evolution)
print('\nETL evolution:')
print(etl_evolution)
Architecture evolution:
architecture nip pin
pub_year
2009.0 100.000000 0.000000
2011.0 100.000000 0.000000
2012.0 100.000000 0.000000
2013.0 95.132743 4.867257
2014.0 72.067401 27.932599
2015.0 72.585670 27.414330
2016.0 69.425213 30.574787
2017.0 71.309227 28.690773
2018.0 68.742922 31.257078
2019.0 69.715791 30.284209
2020.0 66.541756 33.458244
2021.0 71.464646 28.535354
2022.0 57.087753 42.912247
2023.0 68.902920 31.097080
2024.0 65.878594 34.121406
2025.0 56.202144 43.797856
ETL evolution:
TiO2 SnO2
2009.0 100.000000 0.000000
2011.0 100.000000 0.000000
2012.0 94.285714 0.000000
2013.0 77.294686 0.000000
2014.0 65.475743 0.000000
2015.0 62.765407 1.018471
2016.0 57.076476 2.333011
2017.0 55.780584 3.871161
2018.0 48.009616 7.212345
2019.0 44.607704 11.578828
2020.0 39.519946 17.308993
2021.0 35.543562 25.057826
2022.0 24.522489 21.441774
2023.0 29.578438 35.176227
2024.0 22.216187 35.415535
2025.0 20.048309 28.985507
Material Flow Analysis for p-i-n Architecture¶
The following alluvial diagrams visualize the material flows from absorber through hole transport layer (HTL) to electron transport layer (ETL) specifically for p-i-n architectures, comparing pre-2022 and post-2022 periods.
In [40]:
Copied!
# Create consistent color mapping across both periods
abs_colors, htl_colors, etl_colors = get_global_color_mapping(
(pre_abs_htl_pin, pre_htl_etl_pin), (post_abs_htl_pin, post_htl_etl_pin)
)
# Define colors for line plots (from plotly theme)
arch_colors = {'pin': '#ff0e5a', 'nip': '#1f77b4'}
etl_line_colors = {'TiO2': '#1f77b4', 'SnO2': '#4cd8a5'}
# Create 2x2 Nature figure with taller alluvial plots (panels a, b)
# height_ratios makes top row 1.5x taller than bottom row
fig = plt.figure(figsize=(7.2, 6.5))
gs = fig.add_gridspec(2, 2, hspace=0.1, wspace=0.3, height_ratios=[3, 1])
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, 0])
ax4 = fig.add_subplot(gs[1, 1])
# Draw alluvial diagrams (panels a, b)
draw_three_layer_alluvial(
ax1,
pre_abs_htl_pin,
pre_htl_etl_pin,
abs_colors,
htl_colors,
etl_colors,
title='Pre-2022',
panel_label='a',
)
draw_three_layer_alluvial(
ax2,
post_abs_htl_pin,
post_htl_etl_pin,
abs_colors,
htl_colors,
etl_colors,
title='2022+',
panel_label='b',
)
# Draw architecture evolution (panel c)
ax3.text(
-0.25,
1.05,
'c',
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax3.transAxes,
)
for arch in arch_evolution.columns:
ax3.plot(
arch_evolution.index,
arch_evolution[arch],
'o-',
label=arch.upper(),
color=arch_colors[arch],
linewidth=2,
markersize=4,
)
ax3.set_xlabel('Publication Year', fontsize=9)
ax3.set_ylabel('Adoption / %', fontsize=9)
ax3.set_title('Device Architecture Evolution', fontsize=10, fontweight='bold', pad=8)
ax3.legend(frameon=False, fontsize=8, loc='best')
ax3.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax3.set_xlim(2012, 2026)
ax3.set_ylim(0, 105)
# Draw ETL material evolution (panel d)
ax4.text(
-0.25,
1.05,
'd',
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax4.transAxes,
)
for etl in ['TiO2', 'SnO2']:
if etl in etl_evolution.columns:
ax4.plot(
etl_evolution.index,
etl_evolution[etl],
'o-',
label=format_chemical_formula(etl),
color=etl_line_colors[etl],
linewidth=2,
markersize=4,
)
ax4.set_xlabel('Publication Year', fontsize=9)
ax4.set_ylabel('Adoption / %', fontsize=9)
ax4.set_title('ETL Material Evolution', fontsize=10, fontweight='bold', pad=8)
ax4.legend(frameon=False, fontsize=8, loc='best')
ax4.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax4.set_xlim(2012, 2026)
ax4.set_ylim(0, 105)
fig.tight_layout()
fig.savefig(
'perovskite_solar_cell_evolution_figure_pin.pdf',
)
# Create consistent color mapping across both periods
abs_colors, htl_colors, etl_colors = get_global_color_mapping(
(pre_abs_htl_pin, pre_htl_etl_pin), (post_abs_htl_pin, post_htl_etl_pin)
)
# Define colors for line plots (from plotly theme)
arch_colors = {'pin': '#ff0e5a', 'nip': '#1f77b4'}
etl_line_colors = {'TiO2': '#1f77b4', 'SnO2': '#4cd8a5'}
# Create 2x2 Nature figure with taller alluvial plots (panels a, b)
# height_ratios makes top row 1.5x taller than bottom row
fig = plt.figure(figsize=(7.2, 6.5))
gs = fig.add_gridspec(2, 2, hspace=0.1, wspace=0.3, height_ratios=[3, 1])
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, 0])
ax4 = fig.add_subplot(gs[1, 1])
# Draw alluvial diagrams (panels a, b)
draw_three_layer_alluvial(
ax1,
pre_abs_htl_pin,
pre_htl_etl_pin,
abs_colors,
htl_colors,
etl_colors,
title='Pre-2022',
panel_label='a',
)
draw_three_layer_alluvial(
ax2,
post_abs_htl_pin,
post_htl_etl_pin,
abs_colors,
htl_colors,
etl_colors,
title='2022+',
panel_label='b',
)
# Draw architecture evolution (panel c)
ax3.text(
-0.25,
1.05,
'c',
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax3.transAxes,
)
for arch in arch_evolution.columns:
ax3.plot(
arch_evolution.index,
arch_evolution[arch],
'o-',
label=arch.upper(),
color=arch_colors[arch],
linewidth=2,
markersize=4,
)
ax3.set_xlabel('Publication Year', fontsize=9)
ax3.set_ylabel('Adoption / %', fontsize=9)
ax3.set_title('Device Architecture Evolution', fontsize=10, fontweight='bold', pad=8)
ax3.legend(frameon=False, fontsize=8, loc='best')
ax3.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax3.set_xlim(2012, 2026)
ax3.set_ylim(0, 105)
# Draw ETL material evolution (panel d)
ax4.text(
-0.25,
1.05,
'd',
fontsize=14,
fontweight='bold',
ha='left',
va='bottom',
transform=ax4.transAxes,
)
for etl in ['TiO2', 'SnO2']:
if etl in etl_evolution.columns:
ax4.plot(
etl_evolution.index,
etl_evolution[etl],
'o-',
label=format_chemical_formula(etl),
color=etl_line_colors[etl],
linewidth=2,
markersize=4,
)
ax4.set_xlabel('Publication Year', fontsize=9)
ax4.set_ylabel('Adoption / %', fontsize=9)
ax4.set_title('ETL Material Evolution', fontsize=10, fontweight='bold', pad=8)
ax4.legend(frameon=False, fontsize=8, loc='best')
ax4.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax4.set_xlim(2012, 2026)
ax4.set_ylim(0, 105)
fig.tight_layout()
fig.savefig(
'perovskite_solar_cell_evolution_figure_pin.pdf',
)
Material Flow Analysis for n-i-p Architecture¶
The following alluvial diagrams show the evolution of material flows for n-i-p architectures only.
In [ ]:
Copied!
# Create consistent color mapping across both periods
abs_colors, htl_colors, etl_colors = get_global_color_mapping(
(pre_abs_htl_nip, pre_htl_etl_nip), (post_abs_htl_nip, post_htl_etl_nip)
)
# Create 1x2 figure with just the alluvial plots (panels a, b)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7.2, 4))
# Draw alluvial diagrams (panels a, b)
draw_three_layer_alluvial(
ax1,
pre_abs_htl_nip,
pre_htl_etl_nip,
abs_colors,
htl_colors,
etl_colors,
title='Pre-2022',
panel_label='a',
)
draw_three_layer_alluvial(
ax2,
post_abs_htl_nip,
post_htl_etl_nip,
abs_colors,
htl_colors,
etl_colors,
title='2022+',
panel_label='b',
)
fig.tight_layout()
fig.savefig('perovskite_solar_cell_evolution_figure_nip.pdf')
# Create consistent color mapping across both periods
abs_colors, htl_colors, etl_colors = get_global_color_mapping(
(pre_abs_htl_nip, pre_htl_etl_nip), (post_abs_htl_nip, post_htl_etl_nip)
)
# Create 1x2 figure with just the alluvial plots (panels a, b)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7.2, 4))
# Draw alluvial diagrams (panels a, b)
draw_three_layer_alluvial(
ax1,
pre_abs_htl_nip,
pre_htl_etl_nip,
abs_colors,
htl_colors,
etl_colors,
title='Pre-2022',
panel_label='a',
)
draw_three_layer_alluvial(
ax2,
post_abs_htl_nip,
post_htl_etl_nip,
abs_colors,
htl_colors,
etl_colors,
title='2022+',
panel_label='b',
)
fig.tight_layout()
fig.savefig('perovskite_solar_cell_evolution_figure_nip.pdf')