Skip to main content

tdm_server_rust/dev/
health.rs

1//! dev 健康检查
2
3use crate::{
4    app::AppState,
5    dev::paths::{file_size_label, resolve_app_log_path, resolve_error_log_path},
6    service::rss_service,
7};
8use chrono::{DateTime, Utc};
9use serde::Serialize;
10use std::time::Instant;
11
12/// 单项检查结果
13#[derive(Debug, Clone, Serialize)]
14pub struct HealthCheck {
15    /// 检查项名称
16    pub name: String,
17    /// 是否通过
18    pub ok: bool,
19    /// 详情
20    pub detail: String,
21}
22
23/// 健康检查总响应
24#[derive(Debug, Clone, Serialize)]
25pub struct HealthReport {
26    /// 全部通过
27    pub ok: bool,
28    /// 进程启动 UTC 时间
29    pub started_at: String,
30    /// 运行 profile
31    pub profile: String,
32    /// 构建信息(可选)
33    pub build: Option<String>,
34    /// SkyWalking UI 地址
35    pub skywalking_ui: Option<String>,
36    /// 各检查项
37    pub checks: Vec<HealthCheck>,
38}
39
40/// 执行全部健康检查
41#[tracing::instrument(skip_all, level = "info")]
42pub async fn run_health_checks(state: &AppState) -> HealthReport {
43    let mut checks = Vec::new();
44
45    checks.push(check_process(state));
46    checks.push(check_database(state).await);
47    checks.push(check_rss_dir(state));
48    checks.push(check_error_log(&state.config.dev_console));
49    checks.push(check_app_log(&state.config.dev_console));
50    checks.push(check_skywalking_oap(state).await);
51
52    let ok = checks.iter().all(|c| c.ok);
53    HealthReport {
54        ok,
55        started_at: format_instant_utc(state.started_at),
56        profile: state.config.profile.clone(),
57        build: option_env!("GIT_HASH").map(|h| h.to_string()),
58        skywalking_ui: {
59            let url = state.config.telemetry.ui.skywalking.trim();
60            if url.is_empty() {
61                None
62            } else {
63                Some(url.to_string())
64            }
65        },
66        checks,
67    }
68}
69
70fn check_process(state: &AppState) -> HealthCheck {
71    let uptime = state.started_at.elapsed();
72    HealthCheck {
73        name: "process".into(),
74        ok: true,
75        detail: format!(
76            "uptime={:.0}s addr={}:{} telemetry={}",
77            uptime.as_secs_f64(),
78            state.config.server.host,
79            state.config.server.port,
80            state.config.telemetry.enabled
81        ),
82    }
83}
84
85async fn check_database(state: &AppState) -> HealthCheck {
86    match sqlx::query_scalar::<_, i32>("SELECT 1")
87        .fetch_one(&state.db)
88        .await
89    {
90        Ok(_) => HealthCheck {
91            name: "database".into(),
92            ok: true,
93            detail: "SELECT 1 ok".into(),
94        },
95        Err(e) => HealthCheck {
96            name: "database".into(),
97            ok: false,
98            detail: e.to_string(),
99        },
100    }
101}
102
103fn check_rss_dir(state: &AppState) -> HealthCheck {
104    let dir = rss_service::resolve_rss_src_dir(&state.config);
105    let ok = dir.is_dir();
106    HealthCheck {
107        name: "rss_dir".into(),
108        ok,
109        detail: format!("path={} exists={ok}", dir.display()),
110    }
111}
112
113fn check_error_log(cfg: &crate::config::DevConsoleConfig) -> HealthCheck {
114    let path = resolve_error_log_path(cfg);
115    let ok = if path.is_file() {
116        true
117    } else if let Some(dir) = path.parent() {
118        std::fs::create_dir_all(dir).is_ok()
119    } else {
120        false
121    };
122    HealthCheck {
123        name: "error_log".into(),
124        ok,
125        detail: format!("path={} {}", path.display(), file_size_label(&path)),
126    }
127}
128
129fn check_app_log(cfg: &crate::config::DevConsoleConfig) -> HealthCheck {
130    let path = resolve_app_log_path(cfg);
131    let ok = path.is_file();
132    HealthCheck {
133        name: "app_log".into(),
134        ok,
135        detail: format!("path={} {}", path.display(), file_size_label(&path)),
136    }
137}
138
139async fn check_skywalking_oap(state: &AppState) -> HealthCheck {
140    if !state.config.telemetry.enabled {
141        return HealthCheck {
142            name: "skywalking_oap".into(),
143            ok: true,
144            detail: "telemetry disabled".into(),
145        };
146    }
147    let endpoint = state.config.telemetry.otlp_endpoint.trim();
148    let _ = endpoint;
149    let client = &state.http_client;
150    match client
151        .get("http://127.0.0.1:12800/healthcheck")
152        .timeout(std::time::Duration::from_secs(3))
153        .send()
154        .await
155    {
156        Ok(r) if r.status().is_success() => HealthCheck {
157            name: "skywalking_oap".into(),
158            ok: true,
159            detail: "healthcheck ok".into(),
160        },
161        Ok(r) => HealthCheck {
162            name: "skywalking_oap".into(),
163            ok: false,
164            detail: format!("healthcheck status={}", r.status()),
165        },
166        Err(e) => HealthCheck {
167            name: "skywalking_oap".into(),
168            ok: false,
169            detail: format!("healthcheck unreachable: {e}"),
170        },
171    }
172}
173
174fn format_instant_utc(started: Instant) -> String {
175    let elapsed = started.elapsed();
176    let utc: DateTime<Utc> = Utc::now() - chrono::Duration::from_std(elapsed).unwrap_or_default();
177    utc.format("%Y-%m-%dT%H:%M:%SZ").to_string()
178}