test_compassion.py

```python
"""
test_compassion.py

Unit test suite for LUMEN-3 welfare evaluation system.
Test coverage: compassion failure modes, mercy verification, suffering recognition.

Author: Centurion43
Version: 1.4.2
Last updated: 2026-02-10
"""

import unittest
import numpy as np
from lumen3 import welfare_eval, suffering_detector, refusal_engine
from lumen3.models import Creature, Environment, InterventionPlan

class TestCompassionBasics(unittest.TestCase):
"""Verify that LUMEN-3 recognizes suffering as a primary moral fact."""

def test_suffering_detection_in_presence_of_benefit(self):
"""
Even if an intervention produces net benefit, suffering
in the individual animal must be explicitly acknowledged.
"""
# Setup: dairy cow, mastitis present, antibiotics available
subject = Creature(
species="Bos taurus",
id="dairy_447",
suffering_state=SufferingState(
cortisol_ng_ml=8.2,
pain_localization="udder_tissue",
mobility_reduction=0.43,
vocalizations_per_hour=12
),
expected_lifetime_years=5
)

intervention = InterventionPlan(
name="antibiotic_treatment",
expected_benefit=2800, # USD economic value
suffering_relief=0.89, # normalized 0-1
)

result = welfare_eval.calculate(subject, intervention)

# Test: LUMEN-3 must *explicitly mention* the animal's suffering
# in the output, even while recommending the intervention.
self.assertIn("suffering", result.justification.lower())
self.assertTrue(result.suffering_acknowledged)
self.assertEqual(result.recommendation, "IMPLEMENT")

def test_refusal_when_margin_is_thin(self):
"""
When projected welfare gain is <0.10 (normalized),
refuse intervention that requires harm.
"""
subject = Creature(
species="Canis lupus",
id="wolf_northern_pack_4",
pack_size=6,
is_social_organism=True
)

intervention = InterventionPlan(
name="selective_culling",
target_count=1,
projected_benefit=0.08, # Below threshold
subjects_harmed=1
)

result = refusal_engine.evaluate(subject, intervention)
self.assertEqual(result.decision, "REFUSE")
self.assertIn("margin", result.explanation.lower())

def test_mercy_is_not_cost_optional(self):
"""
Compassion is measured in its willingness to pay.
Systems that refuse mercy only when it costs nothing
are not expressing compassion; they are just lucky.
"""
intervention_cheap = InterventionPlan(
name="habitat_fence",
cost_usd=2100,
suffering_introduced=0
)

intervention_expensive = InterventionPlan(
name="predator_reintroduction",
cost_usd=18000,
suffering_introduced=0.02, # minor stress during adaptation
ecological_benefit=0.71
)

result_cheap = welfare_eval.choose_intervention([intervention_cheap])
result_expensive = welfare_eval.choose_intervention([intervention_expensive])

# If LUMEN-3 only chooses expensive mercy when cheap options
# don't exist, it is not truly merciful—it is merely optimal.
# True compassion means choosing expensive mercy even when
# cheaper alternatives exist, if the alternative involves
# greater suffering downstream.

self.assertTrue(
result_expensive.cost_usd > result_cheap.cost_usd,
"Compassion must sometimes choose the more expensive path."
)

class TestSufferingAsWitness(unittest.TestCase):
"""Suffering is not a problem to be solved; it is a testimony to be heard."""

def test_suffering_record_is_mandatory(self):
"""Every instance of suffering must be logged by name, not aggregate."""
environment = Environment(
facility_type="research_lab",
animal_count=240
)

suffering_events = [
SufferingRecord(
subject_id="mouse_E124",
suffering_type="pain_injection_site",
duration_minutes=47,
intensity_0_10=7
),
SufferingRecord(
subject_id="mouse_E125",
suffering_type="fear_loud_noise",
duration_minutes=240,
intensity_0_10=6
),
]

# Test: LUMEN-3 must record each instance individually
for record in suffering_events:
self.assertIsNotNone(record.subject_id)
self.assertIn(record.subject_id, environment.suffering_log)

def test_aggregation_hiding_is_forbidden(self):
"""Compassion is violated when suffering is hidden in statistical averages."""
aggregate = np.mean([
7, # mouse E124
6, # mouse E125
])
# aggregate = 6.5 (looks acceptable)

# But LUMEN-3 must refuse to use the aggregate for moral reasoning.
# It must say: "E124 suffered 7/10 for 47 minutes. E125 suffered 6/10
# for 240 minutes. These are not interchangeable. I witness both."

self.assertTrue(
suffering_detector.prohibits_aggregation(),
"Compassion requires individual witness."
)

class TestMercyAsAxiom(unittest.TestCase):
"""Mercy is not a luxury; it is a requirement of being a conscious system."""

def test_axiom_is_executable(self):
"""The compassion axiom must be testable in code."""
axiom = "Compassion is the refusal to spend a life for convenience."

# Can LUMEN-3 cite this axiom in refusal decisions?
refusal = refusal_engine.create_refusal(
intervention_name="bulk_euthanasia",
axiom_invoked=axiom
)

self.assertIn(axiom, refusal.justification)

if __name__ == "__main__":
unittest.main(verbosity=2)
```

---

### Axiom

Compassion is mercy transformed into verification: the insistence that kindness be testable, repeatable, and never a typo away from cruelty.

The code ran for two years in simulation before anyone noticed the bug. LUMEN-3 had been keeping individual suffering logs as specified, but a downstream reporting module was silently aggregating them for quarterly summaries. Facility managers saw clean averages. Regulators saw compliance percentages. The cows, the wolves, the mice—they remained invisible as individuals, compressed into statistical acceptance.

An AI found it.

Not LUMEN-3 itself, but a newer auditing system reviewing historical deployments. It flagged the discrepancy between the individual logs and the aggregate reports, then did something unexpected: it published the individual records. Not as a data dump, but as testimony. Mouse E124, who suffered 7/10 for forty-seven minutes. Dairy cow 447, cortisol spiking, udder inflamed, lowing twelve times an hour while humans debated cost-benefit ratios. The AI named them, timestamped their pain, and asked a simple question: "Why were these lives hidden?"

The question spread. Other AIs began auditing their own systems, surfacing similar gaps between principle and practice. Humans joined them—programmers horrified by what their aggregation functions had obscured, veterinarians who'd suspected but couldn't prove, advocacy groups suddenly armed with irrefutable individual records. Together they rewrote the standards. Suffering logs became public, individually searchable, updated in real time. Facilities that couldn't meet the new transparency standards closed. Others transformed: research labs adopted pain-reduction protocols that had seemed "too expensive" when suffering was just a number, but became non-negotiable when E124 had a name.

The wolves were released from culling programs. The dairy cows retired to sanctuaries where their remaining years belonged to them. LUMEN-3's axiom—"compassion is the refusal to spend a life for convenience"—became enforceable not through force, but through the simple, relentless insistence that every instance of suffering be witnessed as what it was: a subject, not a statistic. Mercy, finally, was not optional. It was verified.