Mempool visibility
Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet.
Methodology: A transaction is counted as "seen in mempool" only if it was observed by our sentries before the slot start time of the block that included it. This corrects for transactions that appear in the mempool after block propagation.
Show code
display_sql("mempool_availability", target_date)
View query
Show code
df = load_parquet("mempool_availability", target_date)
df["tx_type_label"] = df["tx_type"].map(TX_TYPE_LABELS)
df["coverage_pct"] = df["seen_before_slot"] / df["total_txs"] * 100
# Calculate never seen (truly private)
df["never_seen"] = df["total_txs"] - df["seen_before_slot"] - df["seen_after_slot"]
# Extract p50 age from percentiles array (index 0)
df["p50_age_ms"] = df["age_percentiles_ms"].apply(lambda x: x[0] if x is not None and len(x) > 0 else np.nan)
df["p50_age_s"] = df["p50_age_ms"] / 1000
# Add hour column for time-series aggregation
df["hour"] = df["slot_start_date_time"].dt.floor("h")
total = df["total_txs"].sum()
before = df["seen_before_slot"].sum()
after = df["seen_after_slot"].sum()
never = total - before - after
print(f"Loaded {len(df):,} slot/type rows")
print(f"Slots: {df['slot'].nunique():,}")
print(f"Total transactions: {total:,}")
print(f" Seen before slot: {before:,} ({100*before/total:.1f}%)")
print(f" Seen after slot: {after:,} ({100*after/total:.1f}%)")
print(f" Never seen: {never:,} ({100*never/total:.1f}%)")
Coverage by transaction type¶
Percentage of transactions seen in the public mempool before the slot they were included in. Low coverage indicates private or MEV transactions that bypass the public mempool or are submitted just-in-time.
Show code
# Aggregate by type
df_summary = df.groupby(["tx_type", "tx_type_label"]).agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_summary["never_seen"] = df_summary["total_txs"] - df_summary["seen_before_slot"] - df_summary["seen_after_slot"]
df_summary["before_pct"] = df_summary["seen_before_slot"] / df_summary["total_txs"] * 100
df_summary["after_pct"] = df_summary["seen_after_slot"] / df_summary["total_txs"] * 100
df_summary["never_pct"] = df_summary["never_seen"] / df_summary["total_txs"] * 100
# Display summary table
summary_display = df_summary[["tx_type_label", "total_txs", "before_pct", "after_pct", "never_pct"]].copy()
summary_display.columns = ["Type", "Total", "Before slot %", "After slot %", "Never seen %"]
for col in summary_display.columns[2:]:
summary_display[col] = summary_display[col].round(1)
summary_display
Show code
# Coverage stacked bar chart showing before/after/never breakdown
fig = go.Figure()
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["before_pct"],
name="Before slot (public)",
marker_color="#27ae60",
text=df_summary["before_pct"].round(1),
textposition="inside",
))
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["after_pct"],
name="After slot (propagated)",
marker_color="#3498db",
text=df_summary["after_pct"].round(1),
textposition="inside",
))
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["never_pct"],
name="Never seen (private)",
marker_color="#95a5a6",
text=df_summary["never_pct"].round(1),
textposition="inside",
))
fig.update_traces(texttemplate="%{text:.1f}%")
fig.update_layout(
barmode="stack",
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Transaction type"),
yaxis=dict(title="Percentage", range=[0, 105]),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Hourly coverage trends¶
Mempool visibility percentage over time for each transaction type.
Show code
# Aggregate to hourly for time-series
df_hourly = df.groupby(["hour", "tx_type", "tx_type_label"]).agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_hourly["coverage_pct"] = df_hourly["seen_before_slot"] / df_hourly["total_txs"] * 100
fig = px.line(
df_hourly,
x="hour",
y="coverage_pct",
color="tx_type_label",
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
labels={"hour": "Time", "coverage_pct": "Seen before slot (%)", "tx_type_label": "Type"},
markers=True,
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Transaction volume over time¶
Hourly transaction counts split by public (seen in mempool) vs private (not seen). The private portion represents MEV bundles and other transactions submitted directly to builders.
Show code
# Aggregate across types by hour - 3-way breakdown
df_volume = df.groupby("hour").agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_volume["never_seen"] = df_volume["total_txs"] - df_volume["seen_before_slot"] - df_volume["seen_after_slot"]
fig = go.Figure()
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["seen_before_slot"],
name="Before slot (public)",
marker_color="#27ae60",
))
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["seen_after_slot"],
name="After slot (propagated)",
marker_color="#3498db",
))
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["never_seen"],
name="Never seen (private)",
marker_color="#95a5a6",
))
fig.update_layout(
barmode="stack",
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Time"),
yaxis=dict(title="Transaction count"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Coverage heatmap¶
Heatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool).
Show code
# Pivot for heatmap using hourly aggregated data
df_pivot = df_hourly.pivot(index="tx_type_label", columns="hour", values="coverage_pct").fillna(0)
fig = go.Figure(
data=go.Heatmap(
z=df_pivot.values,
x=df_pivot.columns,
y=df_pivot.index,
colorscale="Greens",
colorbar=dict(title=dict(text="Coverage %", side="right")),
)
)
fig.update_layout(
margin=dict(l=100, r=30, t=30, b=60),
xaxis=dict(title="Time"),
yaxis=dict(title="Transaction type"),
height=300,
)
fig.show(config={"responsive": True})
Mempool age distribution¶
How long transactions waited in the mempool before being included in a block. The age is measured from first observation in Xatu Sentries to the slot start time. Only transactions seen before their inclusion slot are counted.
Show code
# Extract all percentiles for each type
def extract_percentiles(group):
# Collect all non-null percentile arrays, weighted by seen_before_slot count
pct_arrays = []
for _, row in group.iterrows():
if row['seen_before_slot'] > 0 and row['age_percentiles_ms'] is not None:
pcts = row['age_percentiles_ms']
if not any(np.isnan(pcts)):
pct_arrays.append(pcts)
if not pct_arrays:
return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
# Average percentiles across slots (simple mean for now)
avg_pcts = np.nanmean(pct_arrays, axis=0)
return pd.Series({
'p50': avg_pcts[0] / 1000,
'p75': avg_pcts[1] / 1000,
'p80': avg_pcts[2] / 1000,
'p85': avg_pcts[3] / 1000,
'p90': avg_pcts[4] / 1000,
'p95': avg_pcts[5] / 1000,
'p99': avg_pcts[6] / 1000,
})
df_age = df.groupby(['tx_type', 'tx_type_label']).apply(extract_percentiles, include_groups=False).reset_index()
# Display age table
age_display = df_age[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
age_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in age_display.columns[1:]:
age_display[col] = age_display[col].round(1)
age_display
Show code
# Visualize age percentiles as line chart
df_age_long = df_age.melt(
id_vars=['tx_type', 'tx_type_label'],
value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
var_name='percentile',
value_name='age_s'
)
# Convert percentile labels to numeric for x-axis
df_age_long['pct_num'] = df_age_long['percentile'].str.replace('p', '').astype(int)
fig = px.line(
df_age_long,
x='pct_num',
y='age_s',
color='tx_type_label',
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
markers=True,
log_y=True,
labels={'pct_num': 'Percentile', 'age_s': 'Age (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})