gov.clp

;;; Proxy Pool Governor — CLIPS routing-weight decision rules.
;;;
;;; Copyright (C) 2026  SWGY, Inc.
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public License
;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
;;;
;;; SPDX-License-Identifier: AGPL-3.0-or-later

;;; ============================================================
;;; Proxy Pool Governor (Simplified)
;;; ============================================================

;;; ------------------------------------------------------------
;;; Templates
;;; ------------------------------------------------------------

(deftemplate pool-service-stats
  (slot pool (type SYMBOL))
  (slot service (type SYMBOL))
  (slot timestamp (type FLOAT))
  (slot rate-success (type FLOAT))
  (slot rate-timeout (type FLOAT))
  (slot rate-ssl-error (type FLOAT))
  (slot response-time (type FLOAT))
  (slot avg-success (type FLOAT))
  (slot avg-response-time (type FLOAT))
  (slot stddev-success (type FLOAT))
  (slot stddev-response-time (type FLOAT))
  ;; Kept for interface compatibility but unused by rules:
  (slot rate-lost-race (type FLOAT) (default 0.0))
  (slot rate-302-unusual (type FLOAT) (default 0.0))
  (slot rate-other (type FLOAT) (default 0.0)))

(deftemplate current-weight
  (slot pool (type SYMBOL))
  (slot service (type SYMBOL))
  (slot base-weight (type INTEGER))
  (slot effective-weight (type INTEGER)))

(deftemplate service-config
  (slot service (type SYMBOL))
  (slot sigma-threshold (type FLOAT) (default 2.0))
  (slot min-success-rate (type FLOAT) (default 0.85))
  (slot max-response-time (type FLOAT) (default 5.0))
  (slot weight-reduction (type FLOAT) (default 0.5))
  (slot min-weight (type INTEGER) (default 1))
  (slot restore-cooldown (type FLOAT) (default 3600.0))
  (slot service-degrade-threshold (type FLOAT) (default 0.7)))

(deftemplate weight-adjustment
  (slot pool (type SYMBOL))
  (slot service (type SYMBOL))
  (slot old-weight (type INTEGER))
  (slot new-weight (type INTEGER))
  (slot reason (type SYMBOL))
  (slot severity (type FLOAT))
  (slot timestamp (type FLOAT)))

(deftemplate alert
  (slot type (type SYMBOL))
  (slot service (type SYMBOL))
  (slot pool (type SYMBOL) (default nil))
  (slot message (type STRING))
  (slot timestamp (type FLOAT)))

(deftemplate degradation
  (slot pool (type SYMBOL))
  (slot service (type SYMBOL))
  (slot reason (type SYMBOL))
  (slot severity (type FLOAT))
  (slot timestamp (type FLOAT)))

(deftemplate pool-healthy-since
  (slot pool (type SYMBOL))
  (slot service (type SYMBOL))
  (slot since (type FLOAT)))

(deftemplate service-status
  (slot service (type SYMBOL))
  (slot status (allowed-values healthy degraded))
  (slot degraded-ratio (type FLOAT)))

;;; ------------------------------------------------------------
;;; Helper Function
;;; ------------------------------------------------------------

(deffunction clamp-weight (?val ?min ?max)
  "Clamp weight between min and max"
  (max ?min (min ?max (integer ?val))))

;;; ------------------------------------------------------------
;;; Degradation Detection (Consolidated)
;;; ------------------------------------------------------------

(defrule detect-response-time-degradation
  (service-config (service ?s) (sigma-threshold ?sigma) (max-response-time ?max-rt))
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t)
                      (response-time ?rt) (avg-response-time ?avg)
                      (stddev-response-time ?sd&:(> ?sd 0)))
  (test (or (> ?rt (+ ?avg (* ?sigma ?sd))) (> ?rt ?max-rt)))
  (not (degradation (pool ?p) (service ?s) (reason response-time)))
  =>
  (assert (degradation (pool ?p) (service ?s) (reason response-time)
                       (severity (/ (- ?rt ?avg) ?sd)) (timestamp ?t))))

(defrule detect-success-rate-degradation
  (service-config (service ?s) (sigma-threshold ?sigma) (min-success-rate ?min-sr))
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t)
                      (rate-success ?sr) (avg-success ?avg)
                      (stddev-success ?sd&:(> ?sd 0)))
  (test (or (< ?sr (- ?avg (* ?sigma ?sd))) (< ?sr ?min-sr)))
  (not (degradation (pool ?p) (service ?s) (reason success-rate)))
  =>
  (assert (degradation (pool ?p) (service ?s) (reason success-rate)
                       (severity (/ (- ?avg ?sr) ?sd)) (timestamp ?t))))

(defrule detect-ssl-error-spike
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t)
                      (rate-ssl-error ?ssl&:(> ?ssl 0.05)))
  (not (degradation (pool ?p) (service ?s) (reason ssl-errors)))
  =>
  (assert (degradation (pool ?p) (service ?s) (reason ssl-errors)
                       (severity (* ?ssl 20)) (timestamp ?t))))

(defrule detect-timeout-spike
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t)
                      (rate-timeout ?to&:(> ?to 0.10)))
  (not (degradation (pool ?p) (service ?s) (reason timeouts)))
  =>
  (assert (degradation (pool ?p) (service ?s) (reason timeouts)
                       (severity (* ?to 10)) (timestamp ?t))))

;;; ------------------------------------------------------------
;;; Service Health
;;; ------------------------------------------------------------

(defrule evaluate-service-health
  (declare (salience 10))
  (service-config (service ?s) (service-degrade-threshold ?thresh))
  (exists (current-weight (service ?s)))
  (not (service-status (service ?s)))
  =>
  (bind ?total 0)
  (bind ?degraded 0)
  (do-for-all-facts ((?w current-weight)) (eq ?w:service ?s)
    (bind ?total (+ ?total 1))
    (if (any-factp ((?d degradation)) (and (eq ?d:pool ?w:pool) (eq ?d:service ?s)))
      then (bind ?degraded (+ ?degraded 1))))
  (bind ?ratio (if (> ?total 0) then (/ ?degraded ?total) else 0.0))
  (assert (service-status (service ?s)
                          (status (if (>= ?ratio ?thresh) then degraded else healthy))
                          (degraded-ratio ?ratio))))

(defrule alert-service-degraded
  (service-status (service ?s) (status degraded) (degraded-ratio ?r))
  (pool-service-stats (service ?s) (timestamp ?t))
  (not (alert (type service-degraded) (service ?s)))
  =>
  (assert (alert (type service-degraded) (service ?s)
                 (message (str-cat "Service " ?s " degraded: " (integer (* ?r 100)) "% of pools affected"))
                 (timestamp ?t))))

;;; ------------------------------------------------------------
;;; Weight Reduction (Merged into single rule)
;;; ------------------------------------------------------------

(defrule reduce-weight
  "Reduce weight for degraded pool - applies extra reduction for severe cases"
  (degradation (pool ?p) (service ?s) (reason ?r) (severity ?sev) (timestamp ?t))
  (service-status (service ?s) (status healthy))
  (current-weight (pool ?p) (service ?s) (base-weight ?base) (effective-weight ?eff))
  (service-config (service ?s) (weight-reduction ?factor) (min-weight ?min))
  (test (> ?eff ?min))
  (not (weight-adjustment (pool ?p) (service ?s)))
  =>
  ;; Apply base reduction; if already reduced AND severe, halve again
  (bind ?new-weight
    (if (= ?eff ?base)
      then (clamp-weight (* ?eff ?factor) ?min ?base)
      else (if (> ?sev 3.0)
             then (clamp-weight (/ ?eff 2) ?min ?base)
             else ?eff)))
  (if (< ?new-weight ?eff)
    then (assert (weight-adjustment (pool ?p) (service ?s)
                   (old-weight ?eff) (new-weight ?new-weight)
                   (reason ?r) (severity ?sev) (timestamp ?t)))))

;;; ------------------------------------------------------------
;;; Weight Restoration
;;; ------------------------------------------------------------

(defrule mark-healthy
  (current-weight (pool ?p) (service ?s))
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t))
  (not (degradation (pool ?p) (service ?s)))
  (not (pool-healthy-since (pool ?p) (service ?s)))
  =>
  (assert (pool-healthy-since (pool ?p) (service ?s) (since ?t))))

(defrule clear-healthy-on-degradation
  ?h <- (pool-healthy-since (pool ?p) (service ?s))
  (degradation (pool ?p) (service ?s))
  =>
  (retract ?h))

(defrule restore-weight
  (pool-healthy-since (pool ?p) (service ?s) (since ?since))
  (pool-service-stats (pool ?p) (service ?s) (timestamp ?t))
  (current-weight (pool ?p) (service ?s) (base-weight ?base)
                  (effective-weight ?eff&:(< ?eff ?base)))
  (service-config (service ?s) (restore-cooldown ?cd))
  (test (> (- ?t ?since) ?cd))
  (not (weight-adjustment (pool ?p) (service ?s)))
  =>
  (bind ?step (max 1 (integer (/ ?base 4))))
  (assert (weight-adjustment (pool ?p) (service ?s)
            (old-weight ?eff) (new-weight (min ?base (+ ?eff ?step)))
            (reason recovery) (severity 0.0) (timestamp ?t))))