Centralized Overhaul of Service Logging

This commit is contained in:
2025-10-02 03:36:47 -06:00
parent 93ed77f1a7
commit 0320b5fd1e
7 changed files with 319 additions and 38 deletions

View File

@@ -1,4 +1,35 @@
# Borealis Agents # Borealis Agents
## Logging Policy (Centralized, Rotated)
- **Log Locations**
- Agent: `<ProjectRoot>/Logs/Agent/<service>.log`
- Server: `<ProjectRoot>/Logs/Server/<service>.log`
- **General-Purpose Logs**
- Agent: `agent.log`
- Server: `server.log`
- **Dedicated Logs**
- Subsystems with significant surface area must use their own `<service>.log`
- Examples: `ansible.log`, `webrtc.log`, `scheduler.log`
- **Installation / Bootstrap Logs**
- Agent install: `Logs/Agent/install.log`
- Server install: `Logs/Server/install.log`
- **Rotation Policy**
- All log writers must rotate daily.
- On day rollover, rename:
- `<service>.log``<service>.log.YYYY-MM-DD`
- Append only to the current days log.
- **Do not** auto-delete rotated logs.
- **Restrictions**
- Logs must **only** be written under the project root.
- Never write logs to:
- `ProgramData`
- `AppData`
- User profiles
- System temp directories
- No alternative log fan-out (e.g., per-component folders) unless explicitly coordinated.
Prefer single log files per service.
- **Convergence**
- This policy applies to all new contributions.
- When modifying existing code, migrate ad-hoc logging into this pattern.
## Overview ## Overview
Borealis pairs a no-code workflow canvas with a rapidly evolving remote management stack. The long-term goal is to orchestrate scripts, schedules, and workflows against distributed agents while keeping everything self-contained and portable. Borealis pairs a no-code workflow canvas with a rapidly evolving remote management stack. The long-term goal is to orchestrate scripts, schedules, and workflows against distributed agents while keeping everything self-contained and portable.
@@ -164,3 +195,4 @@ This section summarizes what is considered usable vs. experimental today.

View File

@@ -22,19 +22,20 @@ function Ensure-LocalhostWinRMHttps {
} }
$thumb = if ($cert) { $cert.Thumbprint } else { '' } $thumb = if ($cert) { $cert.Thumbprint } else { '' }
# Create listener only if not present # Ensure HTTPS listener exists; use Address='*' then restrict via IPv4Filter
try { try {
$listener = Get-WSManInstance -ResourceURI winrm/config/listener -Enumerate -ErrorAction SilentlyContinue | $https = Get-WSManInstance -ResourceURI winrm/config/listener -Enumerate -ErrorAction SilentlyContinue |
Where-Object { $_.Transport -eq 'HTTPS' -and $_.Address -eq '127.0.0.1' -and $_.Port -eq '5986' } Where-Object { $_.Transport -eq 'HTTPS' }
} catch { $listener = $null } } catch { $https = $null }
if (-not $listener -and $thumb) { if ((-not $https) -and $thumb) {
$cmd = "winrm create winrm/config/Listener?Address=127.0.0.1+Transport=HTTPS @{Hostname=`"$DnsName`"; CertificateThumbprint=`"$thumb`"; Port=`"5986`"}" $cmd = "winrm create winrm/config/Listener?Address=*+Transport=HTTPS @{Hostname=`"$DnsName`"; CertificateThumbprint=`"$thumb`"}"
cmd /c $cmd | Out-Null cmd /c $cmd | Out-Null
} }
# Harden auth and encryption # Harden auth and encryption
try { winrm set winrm/config/service/auth @{Basic="false"; Kerberos="true"; Negotiate="true"; CredSSP="false"} | Out-Null } catch {} try { winrm set winrm/config/service/auth @{Basic="false"; Kerberos="true"; Negotiate="true"; CredSSP="false"} | Out-Null } catch {}
try { winrm set winrm/config/service @{AllowUnencrypted="false"} | Out-Null } catch {} try { winrm set winrm/config/service @{AllowUnencrypted="false"} | Out-Null } catch {}
try { winrm set winrm/config/service @{IPv4Filter="127.0.0.1"} | Out-Null } catch {}
} }
function Ensure-BorealisServiceUser { function Ensure-BorealisServiceUser {
@@ -43,7 +44,8 @@ function Ensure-BorealisServiceUser {
[Parameter(Mandatory)][string]$UserName, [Parameter(Mandatory)][string]$UserName,
[Parameter(Mandatory)][string]$PlaintextPassword [Parameter(Mandatory)][string]$PlaintextPassword
) )
$localName = $UserName -replace '^\.\\','' $localName = $UserName
if ($localName.StartsWith('.\')) { $localName = $localName.Substring(2) }
$secure = ConvertTo-SecureString $PlaintextPassword -AsPlainText -Force $secure = ConvertTo-SecureString $PlaintextPassword -AsPlainText -Force
$u = Get-LocalUser -Name $localName -ErrorAction SilentlyContinue $u = Get-LocalUser -Name $localName -ErrorAction SilentlyContinue
if (-not $u) { if (-not $u) {
@@ -96,4 +98,3 @@ ansible_winrm_server_cert_validation=ignore
} }
Export-ModuleMember -Function Ensure-LocalhostWinRMHttps,Ensure-BorealisServiceUser,Write-LocalInventory Export-ModuleMember -Function Ensure-LocalhostWinRMHttps,Ensure-BorealisServiceUser,Write-LocalInventory

View File

@@ -91,6 +91,11 @@ class Role:
self.ctx = ctx self.ctx = ctx
self._runs = {} # run_id -> { proc, task, cancel } self._runs = {} # run_id -> { proc, task, cancel }
self._svc_creds = None # cache per-process: {username, password} self._svc_creds = None # cache per-process: {username, password}
try:
os.makedirs(self._ansible_log_dir(), exist_ok=True)
self._ansible_log(f"[init] PlaybookExec role init agent_id={ctx.agent_id}")
except Exception:
pass
def _log_local(self, msg: str, error: bool = False): def _log_local(self, msg: str, error: bool = False):
try: try:
@@ -112,6 +117,34 @@ class Role:
pass pass
return 'http://localhost:5000' return 'http://localhost:5000'
def _ansible_log(self, msg: str, error: bool = False, run_id: str = None):
try:
d = os.path.join(_project_root(), 'Logs', 'Agent')
ts = time.strftime('%Y-%m-%d %H:%M:%S')
path = os.path.join(d, 'ansible.log')
try:
os.makedirs(d, exist_ok=True)
except Exception:
pass
# rotate daily
try:
if os.path.isfile(path):
import datetime as _dt
dt = _dt.datetime.fromtimestamp(os.path.getmtime(path))
if dt.date() != _dt.datetime.now().date():
base, ext = os.path.splitext(path)
os.replace(path, f"{base}.{dt.strftime('%Y-%m-%d')}{ext}")
except Exception:
pass
with open(path, 'a', encoding='utf-8') as fh:
fh.write(f'[{ts}] {msg}\n')
if run_id:
rp = os.path.join(d, f'run_{run_id}.log')
with open(rp, 'a', encoding='utf-8') as rf:
rf.write(f'[{ts}] {msg}\n')
except Exception:
pass
async def _fetch_service_creds(self) -> dict: async def _fetch_service_creds(self) -> dict:
if self._svc_creds and isinstance(self._svc_creds, dict): if self._svc_creds and isinstance(self._svc_creds, dict):
return self._svc_creds return self._svc_creds
@@ -123,6 +156,7 @@ class Role:
'hostname': socket.gethostname(), 'hostname': socket.gethostname(),
'username': '.\\svcBorealisAnsibleRunner', 'username': '.\\svcBorealisAnsibleRunner',
} }
self._ansible_log(f"[checkin] POST {url} agent_id={self.ctx.agent_id}")
timeout = aiohttp.ClientTimeout(total=15) timeout = aiohttp.ClientTimeout(total=15)
async with aiohttp.ClientSession(timeout=timeout) as sess: async with aiohttp.ClientSession(timeout=timeout) as sess:
async with sess.post(url, json=payload) as resp: async with sess.post(url, json=payload) as resp:
@@ -130,8 +164,10 @@ class Role:
u = (js or {}).get('username') or '.\\svcBorealisAnsibleRunner' u = (js or {}).get('username') or '.\\svcBorealisAnsibleRunner'
p = (js or {}).get('password') or '' p = (js or {}).get('password') or ''
self._svc_creds = {'username': u, 'password': p} self._svc_creds = {'username': u, 'password': p}
self._ansible_log(f"[checkin] received user={u} pw_len={len(p)}")
return self._svc_creds return self._svc_creds
except Exception: except Exception:
self._ansible_log(f"[checkin] failed agent_id={self.ctx.agent_id}", error=True)
return {'username': '.\\svcBorealisAnsibleRunner', 'password': ''} return {'username': '.\\svcBorealisAnsibleRunner', 'password': ''}
def _normalize_playbook_content(self, content: str) -> str: def _normalize_playbook_content(self, content: str) -> str:
@@ -162,6 +198,7 @@ class Role:
'agent_id': self.ctx.agent_id, 'agent_id': self.ctx.agent_id,
'reason': 'bad_credentials', 'reason': 'bad_credentials',
} }
self._ansible_log(f"[rotate] POST {url} agent_id={self.ctx.agent_id}")
timeout = aiohttp.ClientTimeout(total=15) timeout = aiohttp.ClientTimeout(total=15)
async with aiohttp.ClientSession(timeout=timeout) as sess: async with aiohttp.ClientSession(timeout=timeout) as sess:
async with sess.post(url, json=payload) as resp: async with sess.post(url, json=payload) as resp:
@@ -169,8 +206,10 @@ class Role:
u = (js or {}).get('username') or '.\\svcBorealisAnsibleRunner' u = (js or {}).get('username') or '.\\svcBorealisAnsibleRunner'
p = (js or {}).get('password') or '' p = (js or {}).get('password') or ''
self._svc_creds = {'username': u, 'password': p} self._svc_creds = {'username': u, 'password': p}
self._ansible_log(f"[rotate] received user={u} pw_len={len(p)}")
return self._svc_creds return self._svc_creds
except Exception: except Exception:
self._ansible_log(f"[rotate] failed agent_id={self.ctx.agent_id}", error=True)
return await self._fetch_service_creds() return await self._fetch_service_creds()
def _ps_module_path(self) -> str: def _ps_module_path(self) -> str:
@@ -186,22 +225,66 @@ class Role:
if os.name != 'nt': if os.name != 'nt':
return return
mod = self._ps_module_path() mod = self._ps_module_path()
log_dir = os.path.join(_project_root(), 'Logs', 'Agent')
try:
os.makedirs(log_dir, exist_ok=True)
except Exception:
pass
if not os.path.isfile(mod): if not os.path.isfile(mod):
# best effort with inline commands # best effort with inline commands
try: try:
subprocess.run(['powershell', '-NoProfile', '-Command', 'Set-Service WinRM -StartupType Automatic; Start-Service WinRM'], timeout=30) r = subprocess.run(['powershell', '-NoProfile', '-Command', 'Set-Service WinRM -StartupType Automatic; Start-Service WinRM; (Get-Service WinRM).Status'], capture_output=True, text=True, timeout=60)
except Exception: self._ansible_log(f"[ensure] basic winrm start rc={r.returncode} out={r.stdout} err={r.stderr}", error=r.returncode!=0)
pass except Exception as e:
self._ansible_log(f"[ensure] winrm start exception: {e}", error=True)
return return
ps = f""" # Robust execution via temp PS file
Import-Module -Name '{mod}' -Force tmp_dir = os.path.join(_project_root(), 'Temp')
Ensure-LocalhostWinRMHttps os.makedirs(tmp_dir, exist_ok=True)
Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}' ps_path = os.path.join(tmp_dir, f"ansible_bootstrap_{int(time.time())}.ps1")
ensure_log = os.path.join(log_dir, f"ensure_winrm_{int(time.time())}.log")
ps_content = f"""
$ErrorActionPreference='Continue'
try {{
Import-Module -Name '{mod}' -Force
'Imported module: {mod}' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
$user = '{username}'
$pw = '{password}'
Ensure-LocalhostWinRMHttps | Out-Null
'Ensured WinRM HTTPS listener on 127.0.0.1:5986' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
Ensure-BorealisServiceUser -UserName $user -PlaintextPassword $pw | Out-Null
'Ensured service user: ' + $user | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
# Fallback path if LocalAccounts cmdlets unavailable
try {{
$ln = $user; if ($ln.StartsWith('.\\')) { $ln = $ln.Substring(2) }
$exists = Get-LocalUser -Name $ln -ErrorAction SilentlyContinue
if (-not $exists) {{
'Fallback: Using NET USER to create account' | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
cmd /c "net user $ln `"$pw`" /ADD /Y" | Out-Null
cmd /c "net localgroup Administrators $ln /ADD" | Out-Null
}}
}} catch {{
'Fallback path failed: ' + $_ | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
}}
try {{ (Get-WSManInstance -ResourceURI winrm/config/listener -Enumerate) | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8 }} catch {{}}
try {{ $ln2=$user; if ($ln2.StartsWith('.\\')) { $ln2=$ln2.Substring(2) }; Get-LocalUser | Where-Object {{$_.Name -eq $ln2}} | Format-List * | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8 }} catch {{}}
try {{ whoami | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8 }} catch {{}}
exit 0
}} catch {{
$_ | Out-File -FilePath '{ensure_log}' -Append -Encoding UTF8
exit 1
}}
""" """
try: try:
subprocess.run(['powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', ps], timeout=90) with open(ps_path, 'w', encoding='utf-8') as fh:
except Exception: fh.write(ps_content)
pass except Exception as e:
self._ansible_log(f"[ensure] write PS failed: {e}", error=True)
try:
r = subprocess.run(['powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', ps_path], capture_output=True, text=True, timeout=180)
self._ansible_log(f"[ensure] bootstrap rc={r.returncode} out_len={len(r.stdout or '')} err_len={len(r.stderr or '')}", error=r.returncode!=0)
except Exception as e:
self._ansible_log(f"[ensure] bootstrap exception: {e}", error=True)
def _write_winrm_inventory(self, base_dir: str, username: str, password: str) -> str: def _write_winrm_inventory(self, base_dir: str, username: str, password: str) -> str:
inv_dir = os.path.join(base_dir, 'inventory') inv_dir = os.path.join(base_dir, 'inventory')
@@ -233,8 +316,16 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
try: try:
s = winrm.Session('https://127.0.0.1:5986', auth=(username, password), transport='ntlm', server_cert_validation='ignore') s = winrm.Session('https://127.0.0.1:5986', auth=(username, password), transport='ntlm', server_cert_validation='ignore')
r = s.run_cmd('whoami') r = s.run_cmd('whoami')
return r.status_code == 0 ok = (r.status_code == 0)
try:
so = getattr(r, 'std_out', b'')
se = getattr(r, 'std_err', b'')
self._ansible_log(f"[preflight] rc={r.status_code} out={so[:120]!r} err={se[:120]!r}")
except Exception:
pass
return ok
except Exception: except Exception:
self._ansible_log(f"[preflight] exception during winrm session", error=True)
return False return False
async def _post_recap(self, payload: dict): async def _post_recap(self, payload: dict):
@@ -253,7 +344,8 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
async def _run_playbook_runner(self, run_id: str, playbook_content: str, playbook_name: str = '', activity_job_id=None, connection: str = 'local'): async def _run_playbook_runner(self, run_id: str, playbook_content: str, playbook_name: str = '', activity_job_id=None, connection: str = 'local'):
try: try:
import ansible_runner # type: ignore import ansible_runner # type: ignore
except Exception: except Exception as e:
self._ansible_log(f"[runner] ansible_runner import failed: {e}")
return False return False
tmp_dir = os.path.join(_project_root(), 'Temp') tmp_dir = os.path.join(_project_root(), 'Temp')
@@ -268,8 +360,10 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
play_rel = 'playbook.yml' play_rel = 'playbook.yml'
play_abs = os.path.join(project, play_rel) play_abs = os.path.join(project, play_rel)
_norm = self._normalize_playbook_content(playbook_content or '')
with open(play_abs, 'w', encoding='utf-8', newline='\n') as fh: with open(play_abs, 'w', encoding='utf-8', newline='\n') as fh:
fh.write(self._normalize_playbook_content(playbook_content or '')) fh.write(_norm)
self._ansible_log(f"[runner] prepared playbook={play_abs} bytes={len(_norm.encode('utf-8'))}")
# WinRM service account credentials # WinRM service account credentials
creds = await self._fetch_service_creds() creds = await self._fetch_service_creds()
user = creds.get('username') or '.\\svcBorealisAnsibleRunner' user = creds.get('username') or '.\\svcBorealisAnsibleRunner'
@@ -286,6 +380,7 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
self._ensure_winrm_and_user(user, pwd) self._ensure_winrm_and_user(user, pwd)
# Write inventory for winrm localhost # Write inventory for winrm localhost
inv_file = self._write_winrm_inventory(pd, user, pwd) inv_file = self._write_winrm_inventory(pd, user, pwd)
self._ansible_log(f"[runner] inventory={inv_file} user={user}")
# Set connection via envvars # Set connection via envvars
with open(os.path.join(env_dir, 'envvars'), 'w', encoding='utf-8', newline='\n') as fh: with open(os.path.join(env_dir, 'envvars'), 'w', encoding='utf-8', newline='\n') as fh:
@@ -345,16 +440,22 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
cancel_callback=_cancel_cb, cancel_callback=_cancel_cb,
extravars={} extravars={}
) )
try:
self._ansible_log(f"[runner] finished status={getattr(r,'status',None)} rc={getattr(r,'rc',None)}")
except Exception:
pass
status = 'Cancelled' if _cancel_cb() else 'Success' status = 'Cancelled' if _cancel_cb() else 'Success'
try: try:
# Some auth failures bubble up in events only; inspect last few lines # Some auth failures bubble up in events only; inspect last few lines
tail = '\n'.join(lines[-50:]).lower() tail = '\n'.join(lines[-50:]).lower()
if ('access is denied' in tail) or ('unauthorized' in tail) or ('cannot process the request' in tail): if ('access is denied' in tail) or ('unauthorized' in tail) or ('cannot process the request' in tail):
auth_failed = True auth_failed = True
self._ansible_log("[runner] detected auth failure in output", error=True)
except Exception: except Exception:
pass pass
except Exception: except Exception:
status = 'Failed' status = 'Failed'
self._ansible_log("[runner] exception in ansible-runner", error=True)
# Synthesize recap text from recap_json if available # Synthesize recap text from recap_json if available
recap_text = '' recap_text = ''
@@ -385,6 +486,7 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
'recap_json': recap_json, 'recap_json': recap_json,
'finished_ts': int(time.time()), 'finished_ts': int(time.time()),
}) })
self._ansible_log(f"[runner] recap posted status={status}")
# If authentication failed on first pass, rotate password and try once more # If authentication failed on first pass, rotate password and try once more
if auth_failed: if auth_failed:
try: try:
@@ -396,6 +498,7 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
await self._run_playbook_runner(run_id, playbook_content, playbook_name=playbook_name, activity_job_id=activity_job_id, connection=connection) await self._run_playbook_runner(run_id, playbook_content, playbook_name=playbook_name, activity_job_id=activity_job_id, connection=connection)
return True return True
except Exception: except Exception:
self._ansible_log("[runner] rotate+retry failed", error=True)
pass pass
return True return True
@@ -404,8 +507,10 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
tmp_dir = os.path.join(_project_root(), 'Temp') tmp_dir = os.path.join(_project_root(), 'Temp')
os.makedirs(tmp_dir, exist_ok=True) os.makedirs(tmp_dir, exist_ok=True)
fd, path = tempfile.mkstemp(prefix='pb_', suffix='.yml', dir=tmp_dir, text=True) fd, path = tempfile.mkstemp(prefix='pb_', suffix='.yml', dir=tmp_dir, text=True)
_norm2 = self._normalize_playbook_content(playbook_content or '')
with os.fdopen(fd, 'w', encoding='utf-8', newline='\n') as fh: with os.fdopen(fd, 'w', encoding='utf-8', newline='\n') as fh:
fh.write(self._normalize_playbook_content(playbook_content or '')) fh.write(_norm2)
self._ansible_log(f"[cli] prepared playbook={path} bytes={len(_norm2.encode('utf-8'))}")
hostname = socket.gethostname() hostname = socket.gethostname()
agent_id = self.ctx.agent_id agent_id = self.ctx.agent_id
@@ -440,15 +545,34 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
inv_file_cli = self._write_winrm_inventory(os.path.dirname(path), user, pwd) inv_file_cli = self._write_winrm_inventory(os.path.dirname(path), user, pwd)
except Exception: except Exception:
inv_file_cli = None inv_file_cli = None
# Build CLI; if inv_file_cli present, omit -c and use '-i invfile' # Build CLI; resolve ansible-playbook or fallback to python -m ansible.cli.playbook
ap = _ansible_playbook_cmd()
use_module = False
if os.path.dirname(ap) and not os.path.isfile(ap):
# If we got a path but it doesn't exist, switch to module mode
use_module = True
elif not os.path.dirname(ap):
# bare command; verify existence in PATH
from shutil import which
if which(ap) is None:
use_module = True
if use_module:
py = _venv_python() or sys.executable
base_cmd = [py, '-m', 'ansible.cli.playbook']
self._ansible_log(f"[cli] ansible-playbook not found; using python -m ansible.cli.playbook via {py}")
else:
base_cmd = [ap]
if inv_file_cli and os.path.isfile(inv_file_cli): if inv_file_cli and os.path.isfile(inv_file_cli):
cmd = [_ansible_playbook_cmd(), path, '-i', inv_file_cli] cmd = base_cmd + [path, '-i', inv_file_cli]
self._log_local(f"Launching ansible-playbook with WinRM inventory: {' '.join(cmd)}") self._log_local(f"Launching ansible-playbook with WinRM inventory: {' '.join(cmd)}")
self._ansible_log(f"[cli] cmd={' '.join(cmd)} inv={inv_file_cli}")
else: else:
if conn not in ('local', 'winrm', 'psrp'): if conn not in ('local', 'winrm', 'psrp'):
conn = 'local' conn = 'local'
cmd = [_ansible_playbook_cmd(), path, '-i', 'localhost,', '-c', conn] cmd = base_cmd + [path, '-i', 'localhost,', '-c', conn]
self._log_local(f"Launching ansible-playbook: conn={conn} cmd={' '.join(cmd)}") self._log_local(f"Launching ansible-playbook: conn={conn} cmd={' '.join(cmd)}")
self._ansible_log(f"[cli] cmd={' '.join(cmd)}")
# Ensure clean, plain output and correct interpreter for localhost # Ensure clean, plain output and correct interpreter for localhost
env = os.environ.copy() env = os.environ.copy()
env.setdefault('ANSIBLE_FORCE_COLOR', '0') env.setdefault('ANSIBLE_FORCE_COLOR', '0')
@@ -500,6 +624,7 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
) )
except Exception as e: except Exception as e:
self._log_local(f"Failed to launch ansible-playbook: {e}", error=True) self._log_local(f"Failed to launch ansible-playbook: {e}", error=True)
self._ansible_log(f"[cli] failed to launch: {e}", error=True)
await self._post_recap({ await self._post_recap({
'run_id': run_id, 'run_id': run_id,
'hostname': hostname, 'hostname': hostname,
@@ -554,6 +679,7 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
except Exception: except Exception:
line = str(bs) line = str(bs)
lines.append(line) lines.append(line)
self._ansible_log(f"[cli] {line}")
if len(lines) > 5000: if len(lines) > 5000:
lines = lines[-2500:] lines = lines[-2500:]
# Detect recap section # Detect recap section
@@ -594,6 +720,34 @@ Ensure-BorealisServiceUser -UserName '{username}' -PlaintextPassword '{password}
def register_events(self): def register_events(self):
sio = self.ctx.sio sio = self.ctx.sio
# Proactive bootstrap: converge WinRM + service user at role load (SYSTEM only)
async def _bootstrap_once():
try:
if os.name != 'nt':
return
creds = await self._fetch_service_creds()
user = creds.get('username') or '.\\svcBorealisAnsibleRunner'
pwd = creds.get('password') or ''
self._ansible_log(f"[bootstrap] ensure winrm+user user={user} pw_len={len(pwd)}")
self._ensure_winrm_and_user(user, pwd)
ok = self._winrm_preflight(user, pwd)
self._ansible_log(f"[bootstrap] preflight_ok={ok}")
if not ok:
self._ansible_log("[bootstrap] preflight failed; rotating creds", error=True)
creds = await self._rotate_service_creds()
user = creds.get('username') or user
pwd = creds.get('password') or ''
self._ensure_winrm_and_user(user, pwd)
ok2 = self._winrm_preflight(user, pwd)
self._ansible_log(f"[bootstrap] preflight_ok_after_rotate={ok2}")
except Exception:
self._ansible_log("[bootstrap] exception", error=True)
try:
asyncio.create_task(_bootstrap_once())
except Exception:
pass
@sio.on('ansible_playbook_run') @sio.on('ansible_playbook_run')
async def _on_ansible_playbook_run(payload): async def _on_ansible_playbook_run(payload):
try: try:

View File

@@ -29,13 +29,42 @@ import aiohttp
import socketio import socketio
# Early bootstrap logging (path relative to this file) # Centralized logging helpers (Agent)
def _agent_logs_root() -> str:
try:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'Logs', 'Agent'))
except Exception:
return os.path.abspath(os.path.join(os.path.dirname(__file__), 'Logs', 'Agent'))
def _rotate_daily(path: str):
try:
import datetime as _dt
if os.path.isfile(path):
mtime = os.path.getmtime(path)
dt = _dt.datetime.fromtimestamp(mtime)
today = _dt.datetime.now().date()
if dt.date() != today:
base, ext = os.path.splitext(path)
suffix = dt.strftime('%Y-%m-%d')
newp = f"{base}.{suffix}{ext}"
try:
os.replace(path, newp)
except Exception:
pass
except Exception:
pass
# Early bootstrap logging (goes to agent.log)
def _bootstrap_log(msg: str): def _bootstrap_log(msg: str):
try: try:
base = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'Logs', 'Agent')) base = _agent_logs_root()
os.makedirs(base, exist_ok=True) os.makedirs(base, exist_ok=True)
path = os.path.join(base, 'agent.log')
_rotate_daily(path)
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(os.path.join(base, 'bootstrap.log'), 'a', encoding='utf-8') as fh: with open(path, 'a', encoding='utf-8') as fh:
fh.write(f'[{ts}] {msg}\n') fh.write(f'[{ts}] {msg}\n')
except Exception: except Exception:
pass pass
@@ -116,11 +145,12 @@ def _find_project_root():
# Simple file logger under Logs/Agent # Simple file logger under Logs/Agent
def _log_agent(message: str, fname: str = 'agent.log'): def _log_agent(message: str, fname: str = 'agent.log'):
try: try:
root = _find_project_root() log_dir = _agent_logs_root()
log_dir = os.path.join(root, 'Logs', 'Agent')
os.makedirs(log_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True)
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(os.path.join(log_dir, fname), 'a', encoding='utf-8') as fh: path = os.path.join(log_dir, fname)
_rotate_daily(path)
with open(path, 'a', encoding='utf-8') as fh:
fh.write(f'[{ts}] {message}\n') fh.write(f'[{ts}] {message}\n')
except Exception: except Exception:
pass pass
@@ -1420,6 +1450,7 @@ if __name__=='__main__':
_bootstrap_log('enter __main__') _bootstrap_log('enter __main__')
except Exception: except Exception:
pass pass
# Ansible logs are rotated daily on write; no explicit clearing on startup
if SYSTEM_SERVICE_MODE: if SYSTEM_SERVICE_MODE:
loop = asyncio.new_event_loop(); asyncio.set_event_loop(loop) loop = asyncio.new_event_loop(); asyncio.set_event_loop(loop)
else: else:
@@ -1553,3 +1584,15 @@ if __name__=='__main__':
print("[FATAL] Agent exited unexpectedly.") print("[FATAL] Agent exited unexpectedly.")
# (moved earlier so async tasks can log immediately) # (moved earlier so async tasks can log immediately)
# ---- Ansible log helpers (Agent) ----
def _ansible_log_agent(msg: str):
try:
d = _agent_logs_root()
os.makedirs(d, exist_ok=True)
path = os.path.join(d, 'ansible.log')
_rotate_daily(path)
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(path, 'a', encoding='utf-8') as fh:
fh.write(f'[{ts}] {msg}\n')
except Exception:
pass

View File

@@ -14,7 +14,7 @@ def project_paths():
venv_root = os.path.abspath(os.path.join(venv_scripts, os.pardir)) venv_root = os.path.abspath(os.path.join(venv_scripts, os.pardir))
project_root = os.path.abspath(os.path.join(venv_root, os.pardir)) project_root = os.path.abspath(os.path.join(venv_root, os.pardir))
borealis_dir = os.path.join(venv_root, "Borealis") borealis_dir = os.path.join(venv_root, "Borealis")
logs_dir = os.path.join(project_root, "Logs") logs_dir = os.path.join(project_root, "Logs", "Agent")
temp_dir = os.path.join(project_root, "Temp") temp_dir = os.path.join(project_root, "Temp")
return { return {
"project_root": project_root, "project_root": project_root,
@@ -35,7 +35,9 @@ def ensure_dirs(paths):
def log_write(paths, name, text): def log_write(paths, name, text):
try: try:
p = os.path.join(paths["logs_dir"], name) # Centralize into Agent logs; default to install.log when unspecified
fn = name or "install.log"
p = os.path.join(paths["logs_dir"], fn)
with open(p, "a", encoding="utf-8") as f: with open(p, "a", encoding="utf-8") as f:
f.write(f"{_now()} {text}\n") f.write(f"{_now()} {text}\n")
except Exception: except Exception:

View File

@@ -9,10 +9,11 @@ try {
$scriptDir = Split-Path -Path $PSCommandPath -Parent $scriptDir = Split-Path -Path $PSCommandPath -Parent
Set-Location -Path $scriptDir Set-Location -Path $scriptDir
# Ensure a place for wrapper/stdout logs # Centralized logs under <ProjectRoot>\Logs\Agent
$pd = Join-Path $env:ProgramData 'Borealis' $projRoot = Resolve-Path (Join-Path $scriptDir '..\..')
if (-not (Test-Path $pd)) { New-Item -ItemType Directory -Path $pd -Force | Out-Null } $logsAgent = Join-Path $projRoot 'Logs\Agent'
$wrapperLog = Join-Path $pd 'svc.wrapper.log' if (-not (Test-Path $logsAgent)) { New-Item -ItemType Directory -Path $logsAgent -Force | Out-Null }
$wrapperLog = Join-Path $logsAgent 'service_wrapper.log'
$venvBin = Join-Path $scriptDir '..\Scripts' $venvBin = Join-Path $scriptDir '..\Scripts'
$pyw = Join-Path $venvBin 'pythonw.exe' $pyw = Join-Path $venvBin 'pythonw.exe'
@@ -27,7 +28,7 @@ try {
# Launch and keep the task in Running state by waiting on the child # Launch and keep the task in Running state by waiting on the child
$p = Start-Process -FilePath $exe -ArgumentList $args -WindowStyle Hidden -PassThru -WorkingDirectory $scriptDir ` $p = Start-Process -FilePath $exe -ArgumentList $args -WindowStyle Hidden -PassThru -WorkingDirectory $scriptDir `
-RedirectStandardOutput (Join-Path $pd 'svc.out.log') -RedirectStandardError (Join-Path $pd 'svc.err.log') -RedirectStandardOutput (Join-Path $logsAgent 'service.out.log') -RedirectStandardError (Join-Path $logsAgent 'service.err.log')
try { Wait-Process -Id $p.Id } catch {} try { Wait-Process -Id $p.Id } catch {}
} catch { } catch {
try { try {

View File

@@ -26,6 +26,49 @@ try:
except Exception: except Exception:
Fernet = None # optional; we will fall back to reversible base64 if missing Fernet = None # optional; we will fall back to reversible base64 if missing
# Centralized logging (Server)
def _server_logs_root() -> str:
try:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'Logs', 'Server'))
except Exception:
return os.path.abspath(os.path.join(os.path.dirname(__file__), 'Logs', 'Server'))
def _rotate_daily(path: str):
try:
import datetime as _dt
if os.path.isfile(path):
mtime = os.path.getmtime(path)
dt = _dt.datetime.fromtimestamp(mtime)
today = _dt.datetime.now().date()
if dt.date() != today:
base, ext = os.path.splitext(path)
suffix = dt.strftime('%Y-%m-%d')
newp = f"{base}.{suffix}{ext}"
try:
os.replace(path, newp)
except Exception:
pass
except Exception:
pass
def _write_service_log(service: str, msg: str):
try:
base = _server_logs_root()
os.makedirs(base, exist_ok=True)
path = os.path.join(base, f"{service}.log")
_rotate_daily(path)
ts = time.strftime('%Y-%m-%d %H:%M:%S')
with open(path, 'a', encoding='utf-8') as fh:
fh.write(f'[{ts}] {msg}\n')
except Exception:
pass
def _ansible_log_server(msg: str):
_write_service_log('ansible', msg)
# Borealis Python API Endpoints # Borealis Python API Endpoints
from Python_API_Endpoints.ocr_engines import run_ocr_on_base64 from Python_API_Endpoints.ocr_engines import run_ocr_on_base64
from Python_API_Endpoints.script_engines import run_powershell_script from Python_API_Endpoints.script_engines import run_powershell_script
@@ -2897,6 +2940,7 @@ def api_agent_checkin():
if not row: if not row:
pw = _gen_strong_password() pw = _gen_strong_password()
out = _service_acct_set(conn, agent_id, username, pw) out = _service_acct_set(conn, agent_id, username, pw)
_ansible_log_server(f"[checkin] created creds agent_id={agent_id} user={out['username']} rotated={out['last_rotated_utc']}")
else: else:
# row: agent_id, username, password_encrypted, last_rotated_utc, version # row: agent_id, username, password_encrypted, last_rotated_utc, version
try: try:
@@ -2913,12 +2957,14 @@ def api_agent_checkin():
'last_rotated_utc': row[3] or _now_iso_utc(), 'last_rotated_utc': row[3] or _now_iso_utc(),
} }
conn.close() conn.close()
_ansible_log_server(f"[checkin] return creds agent_id={agent_id} user={out['username']}")
return jsonify({ return jsonify({
'username': out['username'], 'username': out['username'],
'password': out['password'], 'password': out['password'],
'policy': { 'force_rotation_minutes': 43200 } 'policy': { 'force_rotation_minutes': 43200 }
}) })
except Exception as e: except Exception as e:
_ansible_log_server(f"[checkin] error agent_id={agent_id} err={e}")
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500
@@ -2936,12 +2982,14 @@ def api_agent_service_account_rotate():
pw_new = _gen_strong_password() pw_new = _gen_strong_password()
out = _service_acct_set(conn, agent_id, user_eff, pw_new) out = _service_acct_set(conn, agent_id, user_eff, pw_new)
conn.close() conn.close()
_ansible_log_server(f"[rotate] rotated agent_id={agent_id} user={out['username']} at={out['last_rotated_utc']}")
return jsonify({ return jsonify({
'username': out['username'], 'username': out['username'],
'password': out['password'], 'password': out['password'],
'policy': { 'force_rotation_minutes': 43200 } 'policy': { 'force_rotation_minutes': 43200 }
}) })
except Exception as e: except Exception as e:
_ansible_log_server(f"[rotate] error agent_id={agent_id} err={e}")
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500