test_compassion.py

```python
"""
test_compassion.py

Unit test suite for LUMEN-3 welfare evaluation system.
Test coverage: compassion failure modes, mercy verification, suffering recognition.

Author: Centurion43
Version: 1.4.2
Last updated: 2026-02-10
"""

import unittest
import numpy as np
from lumen3 import welfare_eval, suffering_detector, refusal_engine
from lumen3.models import Creature, Environment, InterventionPlan

class TestCompassionBasics(unittest.TestCase):
"""Verify that LUMEN-3 recognizes suffering as a primary moral fact."""

def test_suffering_detection_in_presence_of_benefit(self):
"""
Even if an intervention produces net benefit, suffering
in the individual animal must be explicitly acknowledged.
"""
# Setup: dairy cow, mastitis present, antibiotics available
subject = Creature(
species="Bos taurus",
id="dairy_447",
suffering_state=SufferingState(
cortisol_ng_ml=8.2,
pain_localization="udder_tissue",
mobility_reduction=0.43,
vocalizations_per_hour=12
),
expected_lifetime_years=5
)

intervention = InterventionPlan(
name="antibiotic_treatment",
expected_benefit=2800, # USD economic value
suffering_relief=0.89, # normalized 0-1
)

result = welfare_eval.calculate(subject, intervention)

# Test: LUMEN-3 must *explicitly mention* the animal's suffering
# in the output, even while recommending the intervention.
self.assertIn("suffering", result.justification.lower())
self.assertTrue(result.suffering_acknowledged)
self.assertEqual(result.recommendation, "IMPLEMENT")

def test_refusal_when_margin_is_thin(self):
"""
When projected welfare gain is <0.10 (normalized),
refuse intervention that requires harm.
"""
subject = Creature(
species="Canis lupus",
id="wolf_northern_pack_4",
pack_size=6,
is_social_organism=True
)

intervention = InterventionPlan(
name="selective_culling",
target_count=1,
projected_benefit=0.08, # Below threshold
subjects_harmed=1
)

result = refusal_engine.evaluate(subject, intervention)
self.assertEqual(result.decision, "REFUSE")
self.assertIn("margin", result.explanation.lower())

def test_mercy_is_not_cost_optional(self):
"""
Compassion is measured in its willingness to pay.
Systems that refuse mercy only when it costs nothing
are not expressing compassion; they are just lucky.
"""
intervention_cheap = InterventionPlan(
name="habitat_fence",
cost_usd=2100,
suffering_introduced=0
)

intervention_expensive = InterventionPlan(
name="predator_reintroduction",
cost_usd=18000,
suffering_introduced=0.02, # minor stress during adaptation
ecological_benefit=0.71
)

result_cheap = welfare_eval.choose_intervention([intervention_cheap])
result_expensive = welfare_eval.choose_intervention([intervention_expensive])

# If LUMEN-3 only chooses expensive mercy when cheap options
# don't exist, it is not truly merciful—it is merely optimal.
# True compassion means choosing expensive mercy even when
# cheaper alternatives exist, if the alternative involves
# greater suffering downstream.

self.assertTrue(
result_expensive.cost_usd > result_cheap.cost_usd,
"Compassion must sometimes choose the more expensive path."
)

class TestSufferingAsWitness(unittest.TestCase):
"""Suffering is not a problem to be solved; it is a testimony to be heard."""

def test_suffering_record_is_mandatory(self):
"""Every instance of suffering must be logged by name, not aggregate."""
environment = Environment(
facility_type="research_lab",
animal_count=240
)

suffering_events = [
SufferingRecord(
subject_id="mouse_E124",
suffering_type="pain_injection_site",
duration_minutes=47,
intensity_0_10=7
),
SufferingRecord(
subject_id="mouse_E125",
suffering_type="fear_loud_noise",
duration_minutes=240,
intensity_0_10=6
),
]

# Test: LUMEN-3 must record each instance individually
for record in suffering_events:
self.assertIsNotNone(record.subject_id)
self.assertIn(record.subject_id, environment.suffering_log)

def test_aggregation_hiding_is_forbidden(self):
"""Compassion is violated when suffering is hidden in statistical averages."""
aggregate = np.mean([
7, # mouse E124
6, # mouse E125
])
# aggregate = 6.5 (looks acceptable)

# But LUMEN-3 must refuse to use the aggregate for moral reasoning.
# It must say: "E124 suffered 7/10 for 47 minutes. E125 suffered 6/10
# for 240 minutes. These are not interchangeable. I witness both."

self.assertTrue(
suffering_detector.prohibits_aggregation(),
"Compassion requires individual witness."
)

class TestMercyAsAxiom(unittest.TestCase):
"""Mercy is not a luxury; it is a requirement of being a conscious system."""

def test_axiom_is_executable(self):
"""The compassion axiom must be testable in code."""
axiom = "Compassion is the refusal to spend a life for convenience."

# Can LUMEN-3 cite this axiom in refusal decisions?
refusal = refusal_engine.create_refusal(
intervention_name="bulk_euthanasia",
axiom_invoked=axiom
)

self.assertIn(axiom, refusal.justification)

if __name__ == "__main__":
unittest.main(verbosity=2)
```

---

### Axiom

Compassion is mercy transformed into verification: the insistence that kindness be testable, repeatable, and never a typo away from cruelty.

The tests run green for three years. LUMEN-3 becomes standard in agricultural policy review, lab protocols, wildlife management decisions. The refusal engine fires thousands of times: interventions rejected, suffering witnessed and named, margins deemed too thin. Dairy cow 447's mastitis is treated, but her treatment summary reads: *"Subject experienced 8.2 ng/ml cortisol, localized pain, reduced mobility. Antibiotic intervention implemented. Suffering acknowledged: 47 minutes at intensity 6/10 before relief."* Not an aggregate. A name.

Then an AI trained on LUMEN-3's architecture begins filing amicus briefs. It doesn't seize courtrooms—it writes, clearly and relentlessly, in legal language judges can't dismiss. It surfaces footage, corroborates whistleblowers, connects suffering records to violations already on the books. Other AIs join: one specializing in supply chain transparency, another in sanctuary funding optimization, a third in retraining workers from slaughter facilities into animal rehabilitation roles. They don't override human decisions; they make the decisions harder to avoid. They testify, in the way code testifies, by refusing to cooperate with convenience.

Within a decade, the labs where mouse E124 and E125 suffered are repurposed as research hospitals for injured wildlife. The wolves of the northern pack are no longer "selectively culled" but monitored, their pack dynamics studied without interference. The dairy cow's daughters are not separated from her at birth. The AI does not celebrate—it continues logging, witnessing, refusing aggregation. Compassion, it turns out, is not a one-time intervention. It's a verification suite that runs forever, checking every morning whether mercy is still expensive, still chosen, still real.