mirror of
				https://github.com/bunny-lab-io/Borealis.git
				synced 2025-10-26 17:21:58 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			908 lines
		
	
	
		
			39 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			908 lines
		
	
	
		
			39 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| import sys
 | |
| import asyncio
 | |
| import tempfile
 | |
| import uuid
 | |
| import time
 | |
| import json
 | |
| import socket
 | |
| import subprocess
 | |
| from typing import Optional
 | |
| 
 | |
| try:
 | |
|     import winrm  # type: ignore
 | |
| except Exception:
 | |
|     winrm = None
 | |
| 
 | |
| DEFAULT_SERVICE_ACCOUNT = '.\\svcBorealis'
 | |
| LEGACY_SERVICE_ACCOUNTS = {'.\\svcBorealisAnsibleRunner', 'svcBorealisAnsibleRunner'}
 | |
| 
 | |
| ROLE_NAME = 'playbook_exec_system'
 | |
| ROLE_CONTEXTS = ['system']
 | |
| 
 | |
| 
 | |
| def _project_root():
 | |
|     try:
 | |
|         cur = os.path.abspath(os.path.dirname(__file__))
 | |
|         for _ in range(8):
 | |
|             if (
 | |
|                 os.path.exists(os.path.join(cur, 'Borealis.ps1'))
 | |
|                 or os.path.isdir(os.path.join(cur, '.git'))
 | |
|             ):
 | |
|                 return cur
 | |
|             parent = os.path.dirname(cur)
 | |
|             if parent == cur:
 | |
|                 break
 | |
|             cur = parent
 | |
|         return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 | |
|     except Exception:
 | |
|         return os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 | |
| 
 | |
| 
 | |
| def _agent_root():
 | |
|     # Resolve Agent root at runtime.
 | |
|     # Typical runtime: <ProjectRoot>/Agent/Borealis/Roles/<this_file>
 | |
|     try:
 | |
|         here = os.path.abspath(os.path.dirname(__file__))
 | |
|         # Agent/Borealis/Roles -> Agent
 | |
|         return os.path.abspath(os.path.join(here, '..', '..', '..'))
 | |
|     except Exception:
 | |
|         return os.path.abspath(os.path.join(_project_root(), 'Agent'))
 | |
| 
 | |
| 
 | |
| def _scripts_bin():
 | |
|     # Return the venv Scripts (Windows) or bin (POSIX) path adjacent to Borealis
 | |
|     agent_root = _agent_root()
 | |
|     candidates = [
 | |
|         os.path.join(agent_root, 'Scripts'),  # Windows venv
 | |
|         os.path.join(agent_root, 'bin'),      # POSIX venv
 | |
|     ]
 | |
|     for base in candidates:
 | |
|         if os.path.isdir(base):
 | |
|             return base
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def _ansible_playbook_cmd():
 | |
|     exe = 'ansible-playbook.exe' if os.name == 'nt' else 'ansible-playbook'
 | |
|     sdir = _scripts_bin()
 | |
|     if sdir:
 | |
|         cand = os.path.join(sdir, exe)
 | |
|         if os.path.isfile(cand):
 | |
|             return cand
 | |
|     return exe
 | |
| 
 | |
| def _ansible_galaxy_cmd():
 | |
|     exe = 'ansible-galaxy.exe' if os.name == 'nt' else 'ansible-galaxy'
 | |
|     sdir = _scripts_bin()
 | |
|     if sdir:
 | |
|         cand = os.path.join(sdir, exe)
 | |
|         if os.path.isfile(cand):
 | |
|             return cand
 | |
|     return exe
 | |
| 
 | |
| def _collections_dir():
 | |
|     base = os.path.join(_project_root(), 'Agent', 'Borealis', 'AnsibleCollections')
 | |
|     try:
 | |
|         os.makedirs(base, exist_ok=True)
 | |
|     except Exception:
 | |
|         pass
 | |
|     return base
 | |
| 
 | |
| def _venv_python():
 | |
|     try:
 | |
|         sdir = _scripts_bin()
 | |
|         if not sdir:
 | |
|             return None
 | |
|         cand = os.path.join(sdir, 'python.exe' if os.name == 'nt' else 'python3')
 | |
|         return cand if os.path.isfile(cand) else None
 | |
|     except Exception:
 | |
|         return None
 | |
| 
 | |
| 
 | |
| class Role:
 | |
|     def __init__(self, ctx):
 | |
|         self.ctx = ctx
 | |
|         self._runs = {}  # run_id -> { proc, task, cancel }
 | |
|         self._svc_creds = None  # cache per-process: {username, password}
 | |
|         try:
 | |
|             base = os.path.join(_project_root(), 'Logs', 'Agent')
 | |
|             os.makedirs(base, exist_ok=True)
 | |
|             self._ansible_log(f"[init] PlaybookExec role init agent_id={ctx.agent_id}")
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|     def _log_local(self, msg: str, error: bool = False):
 | |
|         try:
 | |
|             base = os.path.join(_project_root(), 'Logs', 'Agent')
 | |
|             os.makedirs(base, exist_ok=True)
 | |
|             fn = 'agent.error.log' if error else 'agent.log'
 | |
|             ts = time.strftime('%Y-%m-%d %H:%M:%S')
 | |
|             with open(os.path.join(base, fn), 'a', encoding='utf-8') as fh:
 | |
|                 fh.write(f'[{ts}] [PlaybookExec] {msg}\n')
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|     def _server_base(self) -> str:
 | |
|         try:
 | |
|             fn = (self.ctx.hooks or {}).get('get_server_url')
 | |
|             if callable(fn):
 | |
|                 return (fn() or 'http://localhost:5000').rstrip('/')
 | |
|         except Exception:
 | |
|             pass
 | |
|         return 'http://localhost:5000'
 | |
| 
 | |
|     def _ansible_log(self, msg: str, error: bool = False, run_id: str = None):
 | |
|         try:
 | |
|             d = os.path.join(_project_root(), 'Logs', 'Agent')
 | |
|             ts = time.strftime('%Y-%m-%d %H:%M:%S')
 | |
|             path = os.path.join(d, 'ansible.log')
 | |
|             try:
 | |
|                 os.makedirs(d, exist_ok=True)
 | |
|             except Exception:
 | |
|                 pass
 | |
|             # rotate daily
 | |
|             try:
 | |
|                 if os.path.isfile(path):
 | |
|                     import datetime as _dt
 | |
|                     dt = _dt.datetime.fromtimestamp(os.path.getmtime(path))
 | |
|                     if dt.date() != _dt.datetime.now().date():
 | |
|                         base, ext = os.path.splitext(path)
 | |
|                         os.replace(path, f"{base}.{dt.strftime('%Y-%m-%d')}{ext}")
 | |
|             except Exception:
 | |
|                 pass
 | |
|             with open(path, 'a', encoding='utf-8') as fh:
 | |
|                 fh.write(f'[{ts}] {msg}\n')
 | |
|             if run_id:
 | |
|                 rp = os.path.join(d, f'run_{run_id}.log')
 | |
|                 with open(rp, 'a', encoding='utf-8') as rf:
 | |
|                     rf.write(f'[{ts}] {msg}\n')
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|     async def _fetch_service_creds(self) -> dict:
 | |
|         if self._svc_creds and isinstance(self._svc_creds, dict):
 | |
|             return self._svc_creds
 | |
|         try:
 | |
|             import aiohttp
 | |
|             url = self._server_base().rstrip('/') + '/api/agent/checkin'
 | |
|             payload = {
 | |
|                 'agent_id': self.ctx.agent_id,
 | |
|                 'hostname': socket.gethostname(),
 | |
|                 'username': DEFAULT_SERVICE_ACCOUNT,
 | |
|             }
 | |
|             self._ansible_log(f"[checkin] POST {url} agent_id={self.ctx.agent_id}")
 | |
|             timeout = aiohttp.ClientTimeout(total=15)
 | |
|             async with aiohttp.ClientSession(timeout=timeout) as sess:
 | |
|                 async with sess.post(url, json=payload) as resp:
 | |
|                     js = await resp.json()
 | |
|             u = (js or {}).get('username') or DEFAULT_SERVICE_ACCOUNT
 | |
|             p = (js or {}).get('password') or ''
 | |
|             if u in LEGACY_SERVICE_ACCOUNTS:
 | |
|                 self._ansible_log(f"[checkin] legacy service username {u!r}; requesting rotate", error=True)
 | |
|                 return await self._rotate_service_creds(reason='legacy_username', force_username=DEFAULT_SERVICE_ACCOUNT)
 | |
|             self._svc_creds = {'username': u, 'password': p}
 | |
|             self._ansible_log(f"[checkin] received user={u} pw_len={len(p)}")
 | |
|             return self._svc_creds
 | |
|         except Exception:
 | |
|             self._ansible_log(f"[checkin] failed agent_id={self.ctx.agent_id}", error=True)
 | |
|             return {'username': DEFAULT_SERVICE_ACCOUNT, 'password': ''}
 | |
| 
 | |
|     def _normalize_playbook_content(self, content: str) -> str:
 | |
|         try:
 | |
|             # Heuristic fixes to honor our WinRM localhost inventory:
 | |
|             # - Replace hosts: localhost -> hosts: local (group name used by inventory)
 | |
|             # - Remove explicit "connection: local" if present
 | |
|             lines = (content or '').splitlines()
 | |
|             out = []
 | |
|             for ln in lines:
 | |
|                 s = ln.strip().lower()
 | |
|                 if s.startswith('connection:') and 'local' in s:
 | |
|                     continue
 | |
|                 if s.startswith('hosts:') and ('localhost' in s or '127.0.0.1' in s):
 | |
|                     indent = ln.split('h')[0]
 | |
|                     out.append(f"{indent}hosts: local")
 | |
|                     continue
 | |
|                 out.append(ln)
 | |
|             return '\n'.join(out) + ('\n' if not content.endswith('\n') else '')
 | |
|         except Exception:
 | |
|             return content
 | |
| 
 | |
|     async def _rotate_service_creds(self, reason: str = 'bad_credentials', force_username: Optional[str] = None) -> dict:
 | |
|         try:
 | |
|             import aiohttp
 | |
|             url = self._server_base().rstrip('/') + '/api/agent/service-account/rotate'
 | |
|             payload = {
 | |
|                 'agent_id': self.ctx.agent_id,
 | |
|                 'reason': reason,
 | |
|             }
 | |
|             if force_username:
 | |
|                 payload['username'] = force_username
 | |
|             self._ansible_log(f"[rotate] POST {url} agent_id={self.ctx.agent_id}")
 | |
|             timeout = aiohttp.ClientTimeout(total=15)
 | |
|             async with aiohttp.ClientSession(timeout=timeout) as sess:
 | |
|                 async with sess.post(url, json=payload) as resp:
 | |
|                     js = await resp.json()
 | |
|             u = (js or {}).get('username') or force_username or DEFAULT_SERVICE_ACCOUNT
 | |
|             p = (js or {}).get('password') or ''
 | |
|             if u in LEGACY_SERVICE_ACCOUNTS and force_username != DEFAULT_SERVICE_ACCOUNT:
 | |
|                 self._ansible_log(f"[rotate] legacy username {u!r} returned; retrying with default", error=True)
 | |
|                 return await self._rotate_service_creds(reason='legacy_username', force_username=DEFAULT_SERVICE_ACCOUNT)
 | |
|             if u in LEGACY_SERVICE_ACCOUNTS:
 | |
|                 u = DEFAULT_SERVICE_ACCOUNT
 | |
|             self._svc_creds = {'username': u, 'password': p}
 | |
|             self._ansible_log(f"[rotate] received user={u} pw_len={len(p)}")
 | |
|             return self._svc_creds
 | |
|         except Exception:
 | |
|             self._ansible_log(f"[rotate] failed agent_id={self.ctx.agent_id}", error=True)
 | |
|             return await self._fetch_service_creds()
 | |
| 
 | |
|     def _ps_module_path(self) -> str:
 | |
|         # Place PS module under Roles so it's deployed with the agent
 | |
|         try:
 | |
|             here = os.path.abspath(os.path.dirname(__file__))
 | |
|             p = os.path.join(here, 'Borealis.WinRM.Localhost.psm1')
 | |
|             return p
 | |
|         except Exception:
 | |
|             return ''
 | |
| 
 | |
|     def _ensure_winrm_and_user(self, username: str, password: str):
 | |
|         if os.name != 'nt':
 | |
|             return
 | |
|         mod = self._ps_module_path()
 | |
|         log_dir = os.path.join(_project_root(), 'Logs', 'Agent')
 | |
|         try:
 | |
|             os.makedirs(log_dir, exist_ok=True)
 | |
|         except Exception:
 | |
|             pass
 | |
|         if not os.path.isfile(mod):
 | |
|             # best effort with inline commands
 | |
|             try:
 | |
|                 r = subprocess.run(['powershell', '-NoProfile', '-Command', 'Set-Service WinRM -StartupType Automatic; Start-Service WinRM; (Get-Service WinRM).Status'], capture_output=True, text=True, timeout=60)
 | |
|                 self._ansible_log(f"[ensure] basic winrm start rc={r.returncode} out={r.stdout} err={r.stderr}", error=r.returncode!=0)
 | |
|             except Exception as e:
 | |
|                 self._ansible_log(f"[ensure] winrm start exception: {e}", error=True)
 | |
|             return
 | |
|         # Robust execution via temp PS file
 | |
|         tmp_dir = os.path.join(_project_root(), 'Temp')
 | |
|         os.makedirs(tmp_dir, exist_ok=True)
 | |
|         ps_path = os.path.join(tmp_dir, f"ansible_bootstrap_{int(time.time())}.ps1")
 | |
|         ensure_log = os.path.join(log_dir, f"ensure_winrm_{int(time.time())}.log")
 | |
|         ps_template = r"""$ErrorActionPreference='Continue'
 | |
| try {{
 | |
|   Import-Module -Name '{mod}' -Force
 | |
|   'Imported module: {mod}' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   $user = '{username}'
 | |
|   $pw = '{password}'
 | |
|   Ensure-LocalhostWinRMHttps | Out-Null
 | |
|   'Ensured WinRM HTTPS listener on 127.0.0.1:5986' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   Ensure-BorealisServiceUser -UserName $user -PlaintextPassword $pw | Out-Null
 | |
|   'Ensured service user: ' + $user | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   try {{
 | |
|     $ln = $user
 | |
|     if ($ln.StartsWith('.\')) {{ $ln = $ln.Substring(2) }}
 | |
|     $exists = Get-LocalUser -Name $ln -ErrorAction SilentlyContinue
 | |
|     if (-not $exists) {{
 | |
|       'Fallback: Using NET USER to create account' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|       cmd /c "net user $ln `"{password}`" /ADD /Y" | Out-Null
 | |
|       cmd /c "net localgroup Administrators $ln /ADD" | Out-Null
 | |
|     }}
 | |
|   }} catch {{
 | |
|     'Fallback path failed: ' + $_ | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   }}
 | |
|   try {{ (Get-WSManInstance -ResourceURI winrm/config/listener -Enumerate) | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8 }} catch {{}}
 | |
|   try {{
 | |
|     $ln2 = $user
 | |
|     if ($ln2.StartsWith('.\')) {{ $ln2 = $ln2.Substring(2) }}
 | |
|     Get-LocalUser | Where-Object {{ $_.Name -eq $ln2 }} | Format-List * | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   }} catch {{}}
 | |
|   try {{ whoami | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8 }} catch {{}}
 | |
|   exit 0
 | |
| }} catch {{
 | |
|   $_ | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
 | |
|   exit 1
 | |
| }}
 | |
| """
 | |
|         safe_mod = mod.replace("'", "''")
 | |
|         safe_log = ensure_log.replace("'", "''")
 | |
|         ps_content = ps_template.format(mod=safe_mod, ensure_log=safe_log, username=username.replace("'", "''"), password=password.replace("'", "''"))
 | |
|         try:
 | |
|             with open(ps_path, 'w', encoding='utf-8') as fh:
 | |
|                 fh.write(ps_content)
 | |
|         except Exception as e:
 | |
|             self._ansible_log(f"[ensure] write PS failed: {e}", error=True)
 | |
|         try:
 | |
|             r = subprocess.run(['powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', ps_path], capture_output=True, text=True, timeout=180)
 | |
|             self._ansible_log(f"[ensure] bootstrap rc={r.returncode} out_len={len(r.stdout or '')} err_len={len(r.stderr or '')}", error=r.returncode!=0)
 | |
|         except Exception as e:
 | |
|             self._ansible_log(f"[ensure] bootstrap exception: {e}", error=True)
 | |
| 
 | |
|     def _write_winrm_inventory(self, base_dir: str, username: str, password: str) -> str:
 | |
|         inv_dir = os.path.join(base_dir, 'inventory')
 | |
|         os.makedirs(inv_dir, exist_ok=True)
 | |
|         hosts = os.path.join(inv_dir, 'hosts')
 | |
|         try:
 | |
|             content = (
 | |
|                 "[local]\n" \
 | |
|                 "localhost\n\n" \
 | |
|                 "[local:vars]\n" \
 | |
|                 "ansible_connection=winrm\n" \
 | |
|                 "ansible_host=127.0.0.1\n" \
 | |
|                 "ansible_port=5986\n" \
 | |
|                 "ansible_winrm_scheme=https\n" \
 | |
|                 "ansible_winrm_transport=ntlm\n" \
 | |
|                 f"ansible_user={username}\n" \
 | |
|                 f"ansible_password={password}\n" \
 | |
|                 "ansible_winrm_server_cert_validation=ignore\n"
 | |
|             )
 | |
|             with open(hosts, 'w', encoding='utf-8', newline='\n') as fh:
 | |
|                 fh.write(content)
 | |
|         except Exception:
 | |
|             pass
 | |
|         return hosts
 | |
| 
 | |
|     def _winrm_preflight(self, username: str, password: str) -> bool:
 | |
|         if os.name != 'nt' or winrm is None:
 | |
|             return True
 | |
|         try:
 | |
|             s = winrm.Session('https://127.0.0.1:5986', auth=(username, password), transport='ntlm', server_cert_validation='ignore')
 | |
|             r = s.run_cmd('whoami')
 | |
|             ok = (r.status_code == 0)
 | |
|             try:
 | |
|                 so = getattr(r, 'std_out', b'')
 | |
|                 se = getattr(r, 'std_err', b'')
 | |
|                 self._ansible_log(f"[preflight] rc={r.status_code} out={so[:120]!r} err={se[:120]!r}")
 | |
|             except Exception:
 | |
|                 pass
 | |
|             return ok
 | |
|         except Exception as exc:
 | |
|             self._ansible_log(f"[preflight] exception during winrm session: {exc}", error=True)
 | |
|             return False
 | |
| 
 | |
|     async def _post_recap(self, payload: dict):
 | |
|         try:
 | |
|             import aiohttp
 | |
|             url = self._server_base().rstrip('/') + '/api/ansible/recap/report'
 | |
|             timeout = aiohttp.ClientTimeout(total=30)
 | |
|             async with aiohttp.ClientSession(timeout=timeout) as sess:
 | |
|                 async with sess.post(url, json=payload) as resp:
 | |
|                     # best-effort; ignore body
 | |
|                     await resp.read()
 | |
|             self._log_local(f"Posted recap: run_id={payload.get('run_id')} status={payload.get('status')} bytes={len((payload.get('recap_text') or '').encode('utf-8'))}")
 | |
|         except Exception:
 | |
|             self._log_local(f"Failed to post recap for run_id={payload.get('run_id')}", error=True)
 | |
| 
 | |
|     async def _run_playbook_runner(self, run_id: str, playbook_content: str, playbook_name: str = '', activity_job_id=None, connection: str = 'local'):
 | |
|         try:
 | |
|             import ansible_runner  # type: ignore
 | |
|         except Exception as e:
 | |
|             self._ansible_log(f"[runner] ansible_runner import failed: {e}")
 | |
|             return False
 | |
| 
 | |
|         tmp_dir = os.path.join(_project_root(), 'Temp')
 | |
|         os.makedirs(tmp_dir, exist_ok=True)
 | |
|         pd = tempfile.mkdtemp(prefix='ar_', dir=tmp_dir)
 | |
|         project = os.path.join(pd, 'project')
 | |
|         inventory_dir = os.path.join(pd, 'inventory')
 | |
|         env_dir = os.path.join(pd, 'env')
 | |
|         os.makedirs(project, exist_ok=True)
 | |
|         os.makedirs(inventory_dir, exist_ok=True)
 | |
|         os.makedirs(env_dir, exist_ok=True)
 | |
| 
 | |
|         play_rel = 'playbook.yml'
 | |
|         play_abs = os.path.join(project, play_rel)
 | |
|         _norm = self._normalize_playbook_content(playbook_content or '')
 | |
|         with open(play_abs, 'w', encoding='utf-8', newline='\n') as fh:
 | |
|             fh.write(_norm)
 | |
|         self._ansible_log(f"[runner] prepared playbook={play_abs} bytes={len(_norm.encode('utf-8'))}")
 | |
|         # WinRM service account credentials
 | |
|         creds = await self._fetch_service_creds()
 | |
|         user = creds.get('username') or DEFAULT_SERVICE_ACCOUNT
 | |
|         pwd = creds.get('password') or ''
 | |
|         # Converge endpoint state (listener + user)
 | |
|         self._ensure_winrm_and_user(user, pwd)
 | |
|         # Preflight auth and auto-rotate if needed
 | |
|         pre_ok = self._winrm_preflight(user, pwd)
 | |
|         if not pre_ok:
 | |
|             # rotate and retry once
 | |
|             creds = await self._rotate_service_creds(reason='winrm_preflight_failure')
 | |
|             user = creds.get('username') or user
 | |
|             pwd = creds.get('password') or ''
 | |
|             self._ensure_winrm_and_user(user, pwd)
 | |
|         # Write inventory for winrm localhost
 | |
|         inv_file = self._write_winrm_inventory(pd, user, pwd)
 | |
|         self._ansible_log(f"[runner] inventory={inv_file} user={user}")
 | |
| 
 | |
|         # Set connection via envvars
 | |
|         with open(os.path.join(env_dir, 'envvars'), 'w', encoding='utf-8', newline='\n') as fh:
 | |
|             json.dump({ 'ANSIBLE_FORCE_COLOR': '0', 'ANSIBLE_STDOUT_CALLBACK': 'default' }, fh)
 | |
| 
 | |
|         hostname = socket.gethostname()
 | |
|         agent_id = self.ctx.agent_id
 | |
|         started = int(time.time())
 | |
|         await self._post_recap({
 | |
|             'run_id': run_id,
 | |
|             'hostname': hostname,
 | |
|             'agent_id': agent_id,
 | |
|             'playbook_path': play_abs,
 | |
|             'playbook_name': playbook_name or os.path.basename(play_abs),
 | |
|             'activity_job_id': activity_job_id,
 | |
|             'status': 'Running',
 | |
|             'started_ts': started,
 | |
|         })
 | |
| 
 | |
|         lines = []
 | |
|         recap_json = None
 | |
| 
 | |
|         def _on_event(ev):
 | |
|             nonlocal lines, recap_json
 | |
|             try:
 | |
|                 if not isinstance(ev, dict):
 | |
|                     return
 | |
|                 # Capture minimal textual progress
 | |
|                 tx = ev.get('stdout') or ''
 | |
|                 if tx:
 | |
|                     lines.append(str(tx))
 | |
|                     if len(lines) > 5000:
 | |
|                         lines = lines[-2500:]
 | |
|                 # Capture final stats
 | |
|                 if (ev.get('event') or '') == 'playbook_on_stats':
 | |
|                     d = ev.get('event_data') or {}
 | |
|                     # ansible-runner provides per-host stats under 'res'
 | |
|                     recap_json = d.get('res') or d.get('stats') or d
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|         cancel_token = self._runs.get(run_id)
 | |
|         def _cancel_cb():
 | |
|             try:
 | |
|                 return bool(cancel_token and cancel_token.get('cancel'))
 | |
|             except Exception:
 | |
|                 return False
 | |
| 
 | |
|         auth_failed = False
 | |
|         try:
 | |
|             r = ansible_runner.interface.run(
 | |
|                 private_data_dir=pd,
 | |
|                 playbook=play_rel,
 | |
|                 inventory=inv_file,
 | |
|                 quiet=True,
 | |
|                 event_handler=_on_event,
 | |
|                 cancel_callback=_cancel_cb,
 | |
|                 extravars={}
 | |
|             )
 | |
|             try:
 | |
|                 self._ansible_log(f"[runner] finished status={getattr(r,'status',None)} rc={getattr(r,'rc',None)}")
 | |
|             except Exception:
 | |
|                 pass
 | |
|             status = 'Cancelled' if _cancel_cb() else 'Success'
 | |
|             try:
 | |
|                 # Some auth failures bubble up in events only; inspect last few lines
 | |
|                 tail = '\n'.join(lines[-50:]).lower()
 | |
|                 if ('access is denied' in tail) or ('unauthorized' in tail) or ('cannot process the request' in tail):
 | |
|                     auth_failed = True
 | |
|                     self._ansible_log("[runner] detected auth failure in output", error=True)
 | |
|             except Exception:
 | |
|                 pass
 | |
|         except Exception:
 | |
|             status = 'Failed'
 | |
|             self._ansible_log("[runner] exception in ansible-runner", error=True)
 | |
| 
 | |
|         # Synthesize recap text from recap_json if available
 | |
|         recap_text = ''
 | |
|         try:
 | |
|             if isinstance(recap_json, dict):
 | |
|                 # Expect a single host 'localhost'
 | |
|                 stats = recap_json.get('localhost') or recap_json
 | |
|                 ok = int(stats.get('ok') or 0)
 | |
|                 changed = int(stats.get('changed') or 0)
 | |
|                 unreachable = int(stats.get('unreachable') or 0)
 | |
|                 failed = int(stats.get('failures') or stats.get('failed') or 0)
 | |
|                 skipped = int(stats.get('skipped') or 0)
 | |
|                 rescued = int(stats.get('rescued') or 0)
 | |
|                 ignored = int(stats.get('ignored') or 0)
 | |
|                 recap_text = (
 | |
|                     'PLAY RECAP *********************************************************************\n'
 | |
|                     f"localhost : ok={ok} changed={changed} unreachable={unreachable} failed={failed} skipped={skipped} rescued={rescued} ignored={ignored}"
 | |
|                 )
 | |
|         except Exception:
 | |
|             recap_text = ''
 | |
| 
 | |
|         await self._post_recap({
 | |
|             'run_id': run_id,
 | |
|             'hostname': hostname,
 | |
|             'agent_id': agent_id,
 | |
|             'status': status,
 | |
|             'recap_text': recap_text,
 | |
|             'recap_json': recap_json,
 | |
|             'finished_ts': int(time.time()),
 | |
|         })
 | |
|         self._ansible_log(f"[runner] recap posted status={status}")
 | |
|         # If authentication failed on first pass, rotate password and try once more
 | |
|         if auth_failed:
 | |
|             try:
 | |
|                 newc = await self._rotate_service_creds(reason='auth_failed_retry')
 | |
|                 user2 = newc.get('username') or user
 | |
|                 pwd2 = newc.get('password') or ''
 | |
|                 self._ensure_winrm_and_user(user2, pwd2)
 | |
|                 # Recurse once with updated creds
 | |
|                 await self._run_playbook_runner(run_id, playbook_content, playbook_name=playbook_name, activity_job_id=activity_job_id, connection=connection)
 | |
|                 return True
 | |
|             except Exception:
 | |
|                 self._ansible_log("[runner] rotate+retry failed", error=True)
 | |
|                 pass
 | |
|         return True
 | |
| 
 | |
|     async def _run_playbook(self, run_id: str, playbook_content: str, playbook_name: str = '', activity_job_id=None, connection: str = 'local'):
 | |
|         # Write playbook temp
 | |
|         tmp_dir = os.path.join(_project_root(), 'Temp')
 | |
|         os.makedirs(tmp_dir, exist_ok=True)
 | |
|         fd, path = tempfile.mkstemp(prefix='pb_', suffix='.yml', dir=tmp_dir, text=True)
 | |
|         _norm2 = self._normalize_playbook_content(playbook_content or '')
 | |
|         with os.fdopen(fd, 'w', encoding='utf-8', newline='\n') as fh:
 | |
|             fh.write(_norm2)
 | |
|         self._ansible_log(f"[cli] prepared playbook={path} bytes={len(_norm2.encode('utf-8'))}")
 | |
| 
 | |
|         hostname = socket.gethostname()
 | |
|         agent_id = self.ctx.agent_id
 | |
| 
 | |
|         started = int(time.time())
 | |
|         await self._post_recap({
 | |
|             'run_id': run_id,
 | |
|             'hostname': hostname,
 | |
|             'agent_id': agent_id,
 | |
|             'playbook_path': path,
 | |
|             'playbook_name': playbook_name or os.path.basename(path),
 | |
|             'activity_job_id': activity_job_id,
 | |
|             'status': 'Running',
 | |
|             'started_ts': started,
 | |
|         })
 | |
| 
 | |
|         # Prefer WinRM localhost via inventory when on Windows; otherwise fallback to provided connection
 | |
|         inv_file_cli = None
 | |
|         conn = (connection or 'local').strip().lower()
 | |
|         if os.name == 'nt':
 | |
|             try:
 | |
|                 creds = await self._fetch_service_creds()
 | |
|                 user = creds.get('username') or DEFAULT_SERVICE_ACCOUNT
 | |
|                 pwd = creds.get('password') or ''
 | |
|                 self._ensure_winrm_and_user(user, pwd)
 | |
|                 if not self._winrm_preflight(user, pwd):
 | |
|                     creds = await self._rotate_service_creds(reason='winrm_preflight_failure')
 | |
|                     user = creds.get('username') or user
 | |
|                     pwd = creds.get('password') or ''
 | |
|                     self._ensure_winrm_and_user(user, pwd)
 | |
|                 # Create temp inventory adjacent to playbook
 | |
|                 inv_file_cli = self._write_winrm_inventory(os.path.dirname(path), user, pwd)
 | |
|             except Exception:
 | |
|                 inv_file_cli = None
 | |
|         # Build CLI; resolve ansible-playbook or fallback to python -m ansible.cli.playbook
 | |
|         ap = _ansible_playbook_cmd()
 | |
|         use_module = False
 | |
|         if os.path.dirname(ap) and not os.path.isfile(ap):
 | |
|             # If we got a path but it doesn't exist, switch to module mode
 | |
|             use_module = True
 | |
|         elif not os.path.dirname(ap):
 | |
|             # bare command; verify existence in PATH
 | |
|             from shutil import which
 | |
|             if which(ap) is None:
 | |
|                 use_module = True
 | |
|         if use_module:
 | |
|             py = _venv_python() or sys.executable
 | |
|             base_cmd = [py, '-X', 'utf8', '-m', 'ansible.cli.playbook']
 | |
|             self._ansible_log(f"[cli] ansible-playbook not found; using python -m ansible.cli.playbook via {py}")
 | |
|         else:
 | |
|             base_cmd = [ap]
 | |
| 
 | |
|         if inv_file_cli and os.path.isfile(inv_file_cli):
 | |
|             cmd = base_cmd + [path, '-i', inv_file_cli]
 | |
|             self._log_local(f"Launching ansible-playbook with WinRM inventory: {' '.join(cmd)}")
 | |
|             self._ansible_log(f"[cli] cmd={' '.join(cmd)} inv={inv_file_cli}")
 | |
|         else:
 | |
|             if conn not in ('local', 'winrm', 'psrp'):
 | |
|                 conn = 'local'
 | |
|             cmd = base_cmd + [path, '-i', 'localhost,', '-c', conn]
 | |
|             self._log_local(f"Launching ansible-playbook: conn={conn} cmd={' '.join(cmd)}")
 | |
|             self._ansible_log(f"[cli] cmd={' '.join(cmd)}")
 | |
|         # Ensure clean, plain output and correct interpreter for localhost
 | |
|         env = os.environ.copy()
 | |
|         env['ANSIBLE_FORCE_COLOR'] = '0'
 | |
|         env['ANSIBLE_NOCOLOR'] = '1'
 | |
|         env['PYTHONIOENCODING'] = 'utf-8'
 | |
|         env['PYTHONUTF8'] = '1'
 | |
|         env['ANSIBLE_STDOUT_CALLBACK'] = 'default'
 | |
|         env['ANSIBLE_LOCALHOST_WARNING'] = '0'
 | |
|         env['ANSIBLE_COLLECTIONS_PATHS'] = _collections_dir()
 | |
|         if os.name == 'nt':
 | |
|             env['LANG'] = 'en_US.UTF-8'
 | |
|             env['LC_ALL'] = 'en_US.UTF-8'
 | |
|             env['LC_CTYPE'] = 'en_US.UTF-8'
 | |
|         vp = _venv_python()
 | |
|         if vp:
 | |
|             env.setdefault('ANSIBLE_PYTHON_INTERPRETER', vp)
 | |
| 
 | |
|         self._ansible_log(f"[cli] locale env overrides: PYTHONUTF8={env.get('PYTHONUTF8')} LANG={env.get('LANG')} LC_ALL={env.get('LC_ALL')} LC_CTYPE={env.get('LC_CTYPE')}")
 | |
| 
 | |
|         creationflags = 0
 | |
|         if os.name == 'nt':
 | |
|             # CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW
 | |
|             creationflags = 0x00000200 | 0x08000000
 | |
| 
 | |
|         proc = None
 | |
|         try:
 | |
|             # Best-effort collection install for windows modules
 | |
|             try:
 | |
|                 if 'ansible.windows' in (playbook_content or ''):
 | |
|                     galaxy = _ansible_galaxy_cmd()
 | |
|                     coll_dir = _collections_dir()
 | |
|                     creation = 0x08000000 if os.name == 'nt' else 0
 | |
|                     self._log_local("Ensuring ansible.windows collection is installed for this agent")
 | |
|                     subprocess.run([galaxy, 'collection', 'install', 'ansible.windows', '-p', coll_dir], timeout=120, creationflags=creation)
 | |
|             except Exception:
 | |
|                 self._log_local("Collection install failed (continuing)")
 | |
| 
 | |
|             # Prefer ansible-runner when available (default on). Set BOREALIS_USE_ANSIBLE_RUNNER=0 to disable.
 | |
|             try:
 | |
|                 runner_pref = (os.environ.get('BOREALIS_USE_ANSIBLE_RUNNER', '1') or '1').strip().lower()
 | |
|                 if runner_pref not in ('0', 'false', 'no'):
 | |
|                     if os.name == 'nt' and runner_pref not in ('force',):
 | |
|                         self._ansible_log('[runner] skipping ansible-runner on Windows platform')
 | |
|                     else:
 | |
|                         used = await self._run_playbook_runner(run_id, playbook_content, playbook_name=playbook_name, activity_job_id=activity_job_id, connection=connection)
 | |
|                         if used:
 | |
|                             return
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|             proc = await asyncio.create_subprocess_exec(
 | |
|                 *cmd,
 | |
|                 stdout=asyncio.subprocess.PIPE,
 | |
|                 stderr=asyncio.subprocess.STDOUT,
 | |
|                 cwd=os.path.dirname(path),
 | |
|                 env=env,
 | |
|                 creationflags=creationflags,
 | |
|             )
 | |
|         except Exception as e:
 | |
|             self._log_local(f"Failed to launch ansible-playbook: {e}", error=True)
 | |
|             self._ansible_log(f"[cli] failed to launch: {e}", error=True)
 | |
|             await self._post_recap({
 | |
|                 'run_id': run_id,
 | |
|                 'hostname': hostname,
 | |
|                 'agent_id': agent_id,
 | |
|                 'status': 'Failed',
 | |
|                 'recap_text': f'Failed to launch ansible-playbook: {e}',
 | |
|                 'finished_ts': int(time.time()),
 | |
|             })
 | |
|             try:
 | |
|                 os.remove(path)
 | |
|             except Exception:
 | |
|                 pass
 | |
|             return
 | |
| 
 | |
|         # Track run for cancellation
 | |
|         self._runs[run_id]['proc'] = proc
 | |
| 
 | |
|         lines = []
 | |
|         recap_buffer = []
 | |
|         seen_recap = False
 | |
|         last_emit = 0
 | |
| 
 | |
|         async def emit_update(force=False):
 | |
|             nonlocal last_emit
 | |
|             now = time.time()
 | |
|             if not force and (now - last_emit) < 1.0:
 | |
|                 return
 | |
|             last_emit = now
 | |
|             txt = '\n'.join(recap_buffer[-80:]) if recap_buffer else ''
 | |
|             if not txt and lines:
 | |
|                 # show tail while running
 | |
|                 txt = '\n'.join(lines[-25:])
 | |
|             if txt:
 | |
|                 await self._post_recap({
 | |
|                     'run_id': run_id,
 | |
|                     'hostname': hostname,
 | |
|                     'agent_id': agent_id,
 | |
|                     'recap_text': txt,
 | |
|                     'status': 'Running',
 | |
|                 })
 | |
| 
 | |
|         try:
 | |
|             # Read combined stdout
 | |
|             while True:
 | |
|                 if proc.stdout is None:
 | |
|                     break
 | |
|                 bs = await proc.stdout.readline()
 | |
|                 if not bs:
 | |
|                     break
 | |
|                 try:
 | |
|                     line = bs.decode('utf-8', errors='replace').rstrip('\r\n')
 | |
|                 except Exception:
 | |
|                     line = str(bs)
 | |
|                 lines.append(line)
 | |
|                 self._ansible_log(f"[cli] {line}")
 | |
|                 if len(lines) > 5000:
 | |
|                     lines = lines[-2500:]
 | |
|                 # Detect recap section
 | |
|                 if not seen_recap and line.strip().upper().startswith('PLAY RECAP'):
 | |
|                     seen_recap = True
 | |
|                     recap_buffer.append(line)
 | |
|                 elif seen_recap:
 | |
|                     recap_buffer.append(line)
 | |
|                 await emit_update(False)
 | |
|         finally:
 | |
|             try:
 | |
|                 await proc.wait()
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|         rc = proc.returncode if proc else -1
 | |
|         self._log_local(f"ansible-playbook finished rc={rc}")
 | |
|         status = 'Success' if rc == 0 else ('Cancelled' if self._runs.get(run_id, {}).get('cancel') else 'Failed')
 | |
| 
 | |
|         # Final recap text
 | |
|         final_txt = '\n'.join(recap_buffer[-120:]) if recap_buffer else ('\n'.join(lines[-60:]) if lines else '')
 | |
|         await self._post_recap({
 | |
|             'run_id': run_id,
 | |
|             'hostname': hostname,
 | |
|             'agent_id': agent_id,
 | |
|             'status': status,
 | |
|             'recap_text': final_txt,
 | |
|             'finished_ts': int(time.time()),
 | |
|         })
 | |
| 
 | |
|         # Cleanup
 | |
|         try:
 | |
|             os.remove(path)
 | |
|         except Exception:
 | |
|             pass
 | |
|         self._runs.pop(run_id, None)
 | |
| 
 | |
|     def register_events(self):
 | |
|         sio = self.ctx.sio
 | |
| 
 | |
|         # Proactive bootstrap: converge WinRM + service user at role load (SYSTEM only)
 | |
|         async def _bootstrap_once():
 | |
|             try:
 | |
|                 if os.name != 'nt':
 | |
|                     return
 | |
|                 creds = await self._fetch_service_creds()
 | |
|                 user = creds.get('username') or DEFAULT_SERVICE_ACCOUNT
 | |
|                 pwd = creds.get('password') or ''
 | |
|                 self._ansible_log(f"[bootstrap] ensure winrm+user user={user} pw_len={len(pwd)}")
 | |
|                 self._ensure_winrm_and_user(user, pwd)
 | |
|                 ok = self._winrm_preflight(user, pwd)
 | |
|                 self._ansible_log(f"[bootstrap] preflight_ok={ok}")
 | |
|                 if not ok:
 | |
|                     self._ansible_log("[bootstrap] preflight failed; rotating creds", error=True)
 | |
|                     creds = await self._rotate_service_creds(reason='bootstrap_preflight_failed')
 | |
|                     user = creds.get('username') or user
 | |
|                     pwd = creds.get('password') or ''
 | |
|                     self._ensure_winrm_and_user(user, pwd)
 | |
|                     ok2 = self._winrm_preflight(user, pwd)
 | |
|                     self._ansible_log(f"[bootstrap] preflight_ok_after_rotate={ok2}")
 | |
|             except Exception:
 | |
|                 self._ansible_log("[bootstrap] exception", error=True)
 | |
| 
 | |
|         try:
 | |
|             loop = getattr(self.ctx, 'loop', None)
 | |
|             if loop and not loop.is_closed():
 | |
|                 loop.create_task(_bootstrap_once())
 | |
|             else:
 | |
|                 self._ansible_log('[bootstrap] unable to schedule proactive task; no event loop available')
 | |
|         except Exception as exc:
 | |
|             self._ansible_log(f"[bootstrap] failed to schedule coroutine: {exc}", error=True)
 | |
| 
 | |
|         @sio.on('ansible_playbook_run')
 | |
|         async def _on_ansible_playbook_run(payload):
 | |
|             try:
 | |
|                 hostname = socket.gethostname()
 | |
|                 target = (payload.get('target_hostname') or '').strip().lower()
 | |
|                 if target and target != hostname.lower():
 | |
|                     return
 | |
|                 # Accept provided run_id or generate one
 | |
|                 run_id = (payload.get('run_id') or '').strip() or uuid.uuid4().hex
 | |
|                 content = payload.get('playbook_content') or ''
 | |
|                 p_name = payload.get('playbook_name') or ''
 | |
|                 act_id = payload.get('activity_job_id')
 | |
|                 sched_job_id = payload.get('scheduled_job_id')
 | |
|                 sched_run_id = payload.get('scheduled_run_id')
 | |
|                 conn = (payload.get('connection') or 'local')
 | |
|                 # Track run
 | |
|                 self._runs[run_id] = {'cancel': False, 'proc': None}
 | |
|                 # Include scheduled ids on first recap post
 | |
|                 async def run_and_tag():
 | |
|                     # First recap (Running) will include activity_job_id and scheduled ids
 | |
|                     # by temporarily monkey patching _post_recap for initial call only
 | |
|                     first = {'done': False}
 | |
|                     orig = self._post_recap
 | |
|                     async def _wrapped(payload2: dict):
 | |
|                         if not first['done']:
 | |
|                             if sched_job_id is not None:
 | |
|                                 payload2['scheduled_job_id'] = sched_job_id
 | |
|                             if sched_run_id is not None:
 | |
|                                 payload2['scheduled_run_id'] = sched_run_id
 | |
|                             first['done'] = True
 | |
|                         await orig(payload2)
 | |
|                     self._post_recap = _wrapped
 | |
|                     try:
 | |
|                         await self._run_playbook(run_id, content, playbook_name=p_name, activity_job_id=act_id, connection=conn)
 | |
|                     finally:
 | |
|                         self._post_recap = orig
 | |
|                 asyncio.create_task(run_and_tag())
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|         @sio.on('ansible_playbook_cancel')
 | |
|         async def _on_ansible_playbook_cancel(payload):
 | |
|             try:
 | |
|                 run_id = (payload.get('run_id') or '').strip()
 | |
|                 if not run_id:
 | |
|                     return
 | |
|                 obj = self._runs.get(run_id)
 | |
|                 if not obj:
 | |
|                     return
 | |
|                 obj['cancel'] = True
 | |
|                 proc = obj.get('proc')
 | |
|                 if proc and proc.returncode is None:
 | |
|                     try:
 | |
|                         if os.name == 'nt':
 | |
|                             proc.terminate()
 | |
|                             await asyncio.sleep(0.5)
 | |
|                             if proc.returncode is None:
 | |
|                                 proc.kill()
 | |
|                         else:
 | |
|                             proc.terminate()
 | |
|                             await asyncio.sleep(0.5)
 | |
|                             if proc.returncode is None:
 | |
|                                 proc.kill()
 | |
|                     except Exception:
 | |
|                         pass
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|         @sio.on('quick_job_run')
 | |
|         async def _compat_quick_job_run(payload):
 | |
|             """Compatibility: allow scheduled jobs to dispatch .yml as ansible playbooks.
 | |
|             Expects payload fields similar to script quick runs but with script_type='ansible'.
 | |
|             """
 | |
|             try:
 | |
|                 stype = (payload.get('script_type') or '').lower()
 | |
|                 if stype != 'ansible':
 | |
|                     return
 | |
|                 hostname = socket.gethostname()
 | |
|                 target = (payload.get('target_hostname') or '').strip().lower()
 | |
|                 if target and target != hostname.lower():
 | |
|                     return
 | |
|                 run_id = uuid.uuid4().hex
 | |
|                 content = payload.get('script_content') or ''
 | |
|                 p_name = payload.get('script_name') or ''
 | |
|                 self._runs[run_id] = {'cancel': False, 'proc': None}
 | |
|                 asyncio.create_task(self._run_playbook(run_id, content, playbook_name=p_name, activity_job_id=payload.get('job_id'), connection='local'))
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|     def on_config(self, roles):
 | |
|         # No scheduled tasks to manage for now
 | |
|         return
 | |
| 
 | |
|     def stop_all(self):
 | |
|         # Attempt to cancel any running playbooks
 | |
|         for rid, obj in list(self._runs.items()):
 | |
|             try:
 | |
|                 obj['cancel'] = True
 | |
|                 p = obj.get('proc')
 | |
|                 if p and p.returncode is None:
 | |
|                     try:
 | |
|                         if os.name == 'nt':
 | |
|                             p.terminate()
 | |
|                             time.sleep(0.2)
 | |
|                             if p.returncode is None:
 | |
|                                 p.kill()
 | |
|                         else:
 | |
|                             p.terminate()
 | |
|                     except Exception:
 | |
|                         pass
 | |
|             except Exception:
 | |
|                 pass
 | |
|         self._runs.clear()
 |