Propagation anomalies
Detection of blocks that propagated slower than expected, attempting to find correlations with blob count.
Show code
display_sql("block_production_timeline", target_date)
View query
Show code
df = load_parquet("block_production_timeline", target_date)
# Filter to valid blocks (exclude missed slots)
df = df[df["block_first_seen_ms"].notna()]
df = df[(df["block_first_seen_ms"] >= 0) & (df["block_first_seen_ms"] < 60000)]
# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})
# Get max blob count for charts
max_blobs = df["blob_count"].max()
print(f"Total valid blocks: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")
Anomaly detection method¶
The method:
- Fit linear regression:
block_first_seen_ms ~ blob_count - Calculate residuals (actual - expected)
- Flag blocks with residuals > 2σ as anomalies
Points above the ±2σ band propagated slower than expected given their blob count.
Show code
# Conditional outliers: blocks slow relative to their blob count
df_anomaly = df.copy()
# Fit regression: block_first_seen_ms ~ blob_count
slope, intercept, r_value, p_value, std_err = stats.linregress(
df_anomaly["blob_count"].astype(float), df_anomaly["block_first_seen_ms"]
)
# Calculate expected value and residual
df_anomaly["expected_ms"] = intercept + slope * df_anomaly["blob_count"].astype(float)
df_anomaly["residual_ms"] = df_anomaly["block_first_seen_ms"] - df_anomaly["expected_ms"]
# Calculate residual standard deviation
residual_std = df_anomaly["residual_ms"].std()
# Flag anomalies: residual > 2σ (unexpectedly slow)
df_anomaly["is_anomaly"] = df_anomaly["residual_ms"] > 2 * residual_std
n_anomalies = df_anomaly["is_anomaly"].sum()
pct_anomalies = n_anomalies / len(df_anomaly) * 100
# Prepare outliers dataframe
df_outliers = df_anomaly[df_anomaly["is_anomaly"]].copy()
df_outliers["relay"] = df_outliers["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Local")
df_outliers["proposer"] = df_outliers["proposer_entity"].fillna("Unknown")
df_outliers["builder"] = df_outliers["winning_builder"].apply(
lambda x: f"{x[:10]}..." if pd.notna(x) and x else "Local"
)
print(f"Regression: block_ms = {intercept:.1f} + {slope:.2f} × blob_count (R² = {r_value**2:.3f})")
print(f"Residual σ = {residual_std:.1f}ms")
print(f"Anomalies (>2σ slow): {n_anomalies:,} ({pct_anomalies:.1f}%)")
Show code
# Create scatter plot with regression band
x_range = np.array([0, int(max_blobs)])
y_pred = intercept + slope * x_range
y_upper = y_pred + 2 * residual_std
y_lower = y_pred - 2 * residual_std
fig = go.Figure()
# Add ±2σ band
fig.add_trace(go.Scatter(
x=np.concatenate([x_range, x_range[::-1]]),
y=np.concatenate([y_upper, y_lower[::-1]]),
fill="toself",
fillcolor="rgba(100,100,100,0.2)",
line=dict(width=0),
name="±2σ band",
hoverinfo="skip",
))
# Add regression line
fig.add_trace(go.Scatter(
x=x_range,
y=y_pred,
mode="lines",
line=dict(color="white", width=2, dash="dash"),
name="Expected",
))
# Normal points (sample to avoid overplotting)
df_normal = df_anomaly[~df_anomaly["is_anomaly"]]
if len(df_normal) > 2000:
df_normal = df_normal.sample(2000, random_state=42)
fig.add_trace(go.Scatter(
x=df_normal["blob_count"],
y=df_normal["block_first_seen_ms"],
mode="markers",
marker=dict(size=4, color="rgba(100,150,200,0.4)"),
name=f"Normal ({len(df_anomaly) - n_anomalies:,})",
hoverinfo="skip",
))
# Anomaly points
fig.add_trace(go.Scatter(
x=df_outliers["blob_count"],
y=df_outliers["block_first_seen_ms"],
mode="markers",
marker=dict(
size=7,
color="#e74c3c",
line=dict(width=1, color="white"),
),
name=f"Anomalies ({n_anomalies:,})",
customdata=np.column_stack([
df_outliers["slot"],
df_outliers["residual_ms"].round(0),
df_outliers["relay"],
]),
hovertemplate="<b>Slot %{customdata[0]}</b><br>Blobs: %{x}<br>Actual: %{y:.0f}ms<br>+%{customdata[1]}ms vs expected<br>Relay: %{customdata[2]}<extra></extra>",
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Blob count", range=[-0.5, int(max_blobs) + 0.5]),
yaxis=dict(title="Block first seen (ms from slot start)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
All propagation anomalies¶
Blocks that propagated much slower than expected given their blob count, sorted by residual (worst first).
Show code
# All anomalies table with selectable text and Lab links
if n_anomalies > 0:
df_table = df_outliers.sort_values("residual_ms", ascending=False)[
["slot", "blob_count", "block_first_seen_ms", "expected_ms", "residual_ms", "proposer", "builder", "relay"]
].copy()
df_table["block_first_seen_ms"] = df_table["block_first_seen_ms"].round(0).astype(int)
df_table["expected_ms"] = df_table["expected_ms"].round(0).astype(int)
df_table["residual_ms"] = df_table["residual_ms"].round(0).astype(int)
# Build HTML table
html = '''
<style>
.anomaly-table { border-collapse: collapse; width: 100%; font-family: monospace; font-size: 13px; }
.anomaly-table th { background: #2c3e50; color: white; padding: 8px 12px; text-align: left; position: sticky; top: 0; }
.anomaly-table td { padding: 6px 12px; border-bottom: 1px solid #eee; }
.anomaly-table tr:hover { background: #f5f5f5; }
.anomaly-table .num { text-align: right; }
.anomaly-table .delta { background: #ffebee; color: #c62828; font-weight: bold; }
.anomaly-table a { color: #1976d2; text-decoration: none; }
.anomaly-table a:hover { text-decoration: underline; }
.table-container { max-height: 600px; overflow-y: auto; }
</style>
<div class="table-container">
<table class="anomaly-table">
<thead>
<tr><th>Slot</th><th class="num">Blobs</th><th class="num">Actual (ms)</th><th class="num">Expected (ms)</th><th class="num">Δ (ms)</th><th>Proposer</th><th>Builder</th><th>Relay</th></tr>
</thead>
<tbody>
'''
for _, row in df_table.iterrows():
slot_link = f'<a href="https://lab.ethpandaops.io/ethereum/slots/{row["slot"]}" target="_blank">{row["slot"]}</a>'
html += f'''<tr>
<td>{slot_link}</td>
<td class="num">{row["blob_count"]}</td>
<td class="num">{row["block_first_seen_ms"]}</td>
<td class="num">{row["expected_ms"]}</td>
<td class="num delta">+{row["residual_ms"]}</td>
<td>{row["proposer"]}</td>
<td>{row["builder"]}</td>
<td>{row["relay"]}</td>
</tr>'''
html += '</tbody></table></div>'
display(HTML(html))
print(f"\nTotal anomalies: {len(df_table):,}")
else:
print("No anomalies detected.")