diff --git a/build.gradle.kts b/build.gradle.kts index 0d2c0f3..4a24ade 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -71,7 +71,7 @@ dependencies { implementation("com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.12.0") } - implementation("io.minio:minio:8.5.17") + implementation("io.minio:minio:8.6.0") // Kubernetes API (Pod 목록 조회, phase == Running 확인) implementation("io.kubernetes:client-java:19.0.3") diff --git a/src/main/java/kr/re/etri/autoflow/batch/KubeflowRunBatchConfig.java b/src/main/java/kr/re/etri/autoflow/batch/KubeflowRunBatchConfig.java index 67e2407..4d22218 100644 --- a/src/main/java/kr/re/etri/autoflow/batch/KubeflowRunBatchConfig.java +++ b/src/main/java/kr/re/etri/autoflow/batch/KubeflowRunBatchConfig.java @@ -102,13 +102,13 @@ public class KubeflowRunBatchConfig { .block(); if (response == null || response.getRuns() == null || response.getRuns().isEmpty()) { - log.info("KubeflowRunBatch: 데이터 없음, 종료"); + log.debug("KubeflowRunBatch: 데이터 없음, 종료"); runs = Collections.emptyList(); return null; } runs = response.getRuns(); - log.info("KubeflowRunBatch: {}건 조회 완료", runs.size()); + log.debug("KubeflowRunBatch: {}건 조회 완료", runs.size()); } if (index >= runs.size()) { @@ -151,7 +151,7 @@ public class KubeflowRunBatchConfig { public ItemWriter runWriter() { return items -> { kubeflowRunRepository.saveAll(items); - log.info("KubeflowRunBatch: {}건 DB 저장 완료", items.size()); + log.debug("KubeflowRunBatch: {}건 DB 저장 완료", items.size()); }; } } diff --git a/src/main/java/kr/re/etri/autoflow/common/WebConfiguration.java b/src/main/java/kr/re/etri/autoflow/common/WebConfiguration.java index 051bf60..f02d819 100644 --- a/src/main/java/kr/re/etri/autoflow/common/WebConfiguration.java +++ b/src/main/java/kr/re/etri/autoflow/common/WebConfiguration.java @@ -12,7 +12,7 @@ public class WebConfiguration implements WebMvcConfigurer { @Override public void addCorsMappings(CorsRegistry registry) { registry.addMapping("/**") - .allowedOriginPatterns("http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:3000", "http://127.0.0.1:5173", "http://10.10.11.144", "http://cuuva.com:2481", "http://210.217.121.58:2481") // 허용할 Origin 지정 + .allowedOriginPatterns("http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:3000", "http://127.0.0.1:5173", "http://10.10.11.144", "http://cuuva.com:2481", "http://210.217.121.58:2481", "http://aisw.openlivinglab.kr:20001", "http://10.55.15.92") // 허용할 Origin 지정 .allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS") .allowedHeaders("*") // 필요하면 "cuuva-jwt", "Content-Type", "Authorization" 명시 가능 .exposedHeaders("cuuva-jwt") diff --git a/src/main/java/kr/re/etri/autoflow/controllers/ExperimentsController.java b/src/main/java/kr/re/etri/autoflow/controllers/ExperimentsController.java index 75bdbf8..ccaa928 100644 --- a/src/main/java/kr/re/etri/autoflow/controllers/ExperimentsController.java +++ b/src/main/java/kr/re/etri/autoflow/controllers/ExperimentsController.java @@ -156,6 +156,11 @@ public class ExperimentsController { .bodyValue(kubeflowPayload) .retrieve() .bodyToMono(Map.class) + .doOnError(WebClientResponseException.class, e -> { + if (e.getStatusCode().value() == 403) { + log.error("[권한 오류] Kubeflow API (POST /apis/v2beta1/experiments) 403 Forbidden. 네임스페이스('default') 권한이나 ServiceAccount 설정을 확인하세요. 응답바디: {}", e.getResponseBodyAsString()); + } + }) .onErrorResume(WebClientResponseException.Conflict.class, e -> { log.info("Kubeflow experiment 가 이미 존재합니다 (409 Conflict). 기존 experiment를 조회합니다."); return webClientBuilder.build() @@ -163,6 +168,11 @@ public class ExperimentsController { .uri(kubeflowBaseUrl + "/apis/v2beta1/experiments") .retrieve() .bodyToMono(Map.class) + .doOnError(WebClientResponseException.class, e2 -> { + if (e2.getStatusCode().value() == 403) { + log.error("[권한 오류] Kubeflow API (GET /apis/v2beta1/experiments) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString()); + } + }) .flatMap(listResp -> { if (listResp != null && listResp.containsKey("experiments")) { List> experiments = (List>) listResp.get("experiments"); @@ -227,6 +237,11 @@ public class ExperimentsController { .bodyValue(mlflowPayload) .retrieve() .bodyToMono(Map.class) + .doOnError(WebClientResponseException.class, ex -> { + if (ex.getStatusCode().value() == 403) { + log.error("[권한 오류] MLflow API (POST /experiments/create) 403 Forbidden. mlflow.user / mlflow.password 설정 및 권한을 확인하세요. 응답바디: {}", ex.getResponseBodyAsString()); + } + }) .onErrorResume(WebClientResponseException.BadRequest.class, ex -> { log.info("MLflow experiment 가 이미 존재할 가능성이 있습니다 (400 Bad Request). 이름으로 조회합니다."); try { @@ -237,6 +252,11 @@ public class ExperimentsController { .headers(headers -> headers.setBasicAuth(mlflowUser, mlflowPassword)) .retrieve() .bodyToMono(Map.class) + .doOnError(WebClientResponseException.class, e2 -> { + if (e2.getStatusCode().value() == 403) { + log.error("[권한 오류] MLflow API (GET /experiments/get-by-name) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString()); + } + }) .flatMap(getByNameResp -> { if (getByNameResp != null && getByNameResp.containsKey("experiment")) { Map exp = (Map) getByNameResp.get("experiment"); @@ -260,6 +280,11 @@ public class ExperimentsController { .headers(headers -> headers.setBasicAuth(mlflowUser, mlflowPassword)) .retrieve() .bodyToMono(Map.class) + .doOnError(WebClientResponseException.class, e2 -> { + if (e2.getStatusCode().value() == 403) { + log.error("[권한 오류] MLflow API (GET /experiments/get) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString()); + } + }) .map(getResp -> { if (getResp.containsKey("experiment")) { Map exp = (Map) getResp.get("experiment"); diff --git a/src/main/java/kr/re/etri/autoflow/service/AdminService.java b/src/main/java/kr/re/etri/autoflow/service/AdminService.java index 66b7d5f..ce8d5c6 100644 --- a/src/main/java/kr/re/etri/autoflow/service/AdminService.java +++ b/src/main/java/kr/re/etri/autoflow/service/AdminService.java @@ -311,7 +311,9 @@ public class AdminService { if (runId != null && !runId.isBlank() && podNameParam != null && !podNameParam.isBlank() && kubeflowUrl != null && !kubeflowUrl.isBlank()) { - String kfpOnly = tryKfpMlPipelineNodeLog(runId, podNameParam.trim()); + String ns = (workflowNamespaceParam != null && !workflowNamespaceParam.isBlank()) + ? workflowNamespaceParam.trim() : null; + String kfpOnly = tryKfpMlPipelineNodeLog(runId, ns, podNameParam.trim()); if (kfpOnly != null) { return "-- KFP ml-pipeline API (kubectl 없이) v1beta1 노드 로그 --\n\n" + kfpOnly; } @@ -347,7 +349,7 @@ public class AdminService { } } if (preferKfpApiForLogs) { - String kfpFirst = tryKfpMlPipelineNodeLog(runId, res0.podName, podNameParam.trim()); + String kfpFirst = tryKfpMlPipelineNodeLog(runId, res0.namespace, res0.podName, podNameParam.trim()); if (kfpFirst != null) { return "-- KFP ml-pipeline API (UI와 동일) | node: " + res0.podName + (podNameParam.trim().equals(res0.podName) ? "" : " (요청: " + podNameParam.trim() + ")") @@ -363,6 +365,32 @@ public class AdminService { podHealthService.readPipelinePodLog(res.podName, null, normalizeTail(tailLines)); logText = fb.logText; } + if (logText != null && logText.contains("code=403") && logText.contains("ApiException")) { + log.warn("[Admin] K8s Pod 로그 조회 권한이 없습니다 (403 Forbidden). Kubeflow API로 대체를 시도합니다. runId={}, podName={}", runId, podNameParam); + String resolvedTaskId = null; + try { + Map run = pipelineUploadService.getKfpRunById(runId); + List> tasks = kfpTasksWithPods(run); + for (Map t : tasks) { + String pod = firstString(t.get("podName"), t.get("pod_name")); + if (podNameParam.trim().equals(pod) || res.podName.equals(pod)) { + resolvedTaskId = firstString(t.get("taskId"), t.get("task_id")); + if (resolvedTaskId != null && !resolvedTaskId.isBlank()) { + break; + } + } + } + } catch (Exception ex) { + log.error("[Admin] KFP Run에서 Task ID 추출 실패: {}", ex.getMessage()); + } + String kfpLog = tryKfpMlPipelineNodeLog(runId, res.namespace, resolvedTaskId, res.podName, podNameParam.trim()); + if (kfpLog != null) { + log.info("[Admin] Kubeflow API로 로그 대체 조회 성공. nodeId/podName={}", resolvedTaskId != null ? resolvedTaskId : res.podName); + return "-- KFP ml-pipeline API (kubectl 403 Forbidden 대체) --\n\n" + kfpLog; + } else { + log.warn("[Admin] Kubeflow API로 로그 대체 조회 실패. runId={}, podName={}, resolvedTaskId={}", runId, podNameParam, resolvedTaskId); + } + } String out = "-- kubectl logs " + res.podName + " -n " + res.namespace + (podNameParam.trim().equals(res.podName) ? "" : " (요청: " + podNameParam.trim() + ")") + " --\n\n" + (logText != null ? logText : ""); @@ -468,7 +496,7 @@ public class AdminService { } } if (preferKfpApiForLogs) { - String kfpLog = tryKfpMlPipelineNodeLog(runId, res.podName, pod.trim()); + String kfpLog = tryKfpMlPipelineNodeLog(runId, wfNsEff, res.podName, pod.trim()); if (kfpLog != null) { StringBuilder k = new StringBuilder(); k.append("-- KFP ml-pipeline API v1beta1/runs/.../nodes/{node_id}/log (UI와 동일 백엔드) --\n"); @@ -488,6 +516,25 @@ public class AdminService { podHealthService.readPipelinePodLog(res.podName, null, normalizeTail(tailLines)); logText = fb.logText; } + if (logText != null && logText.contains("code=403") && logText.contains("ApiException")) { + log.warn("[Admin] K8s Pod 로그 조회 권한이 없습니다 (403 Forbidden). Kubeflow API로 대체를 시도합니다. runId={}, podName={}, step={}", runId, res.podName, step); + String taskId = firstString(task.get("taskId"), task.get("task_id")); + String kfpLog = tryKfpMlPipelineNodeLog(runId, res.namespace, taskId, res.podName, pod.trim()); + if (kfpLog != null) { + log.info("[Admin] Kubeflow API로 로그 대체 조회 성공. nodeId/podName={}", taskId != null ? taskId : res.podName); + StringBuilder k = new StringBuilder(); + k.append("-- KFP ml-pipeline API (kubectl 403 Forbidden 대체) --\n"); + k.append("node_id: ").append(taskId != null ? taskId : res.podName); + if (!pod.trim().equals(res.podName)) { + k.append(" | KFP task pod_name: ").append(pod.trim()); + } + k.append(" | Step: ").append(step != null ? step : "(이름 없음)").append(" --\n\n"); + k.append(kfpLog); + return k.toString(); + } else { + log.warn("[Admin] Kubeflow API로 로그 대체 조회 실패. runId={}, podName={}, taskId={}", runId, res.podName, taskId); + } + } StringBuilder sb = new StringBuilder(); sb.append("-- kubectl logs ").append(res.podName).append(" -n ").append(res.namespace); sb.append(" (KFP 로그와 동일) | Step: ").append(step != null ? step : "(이름 없음)"); @@ -506,7 +553,7 @@ public class AdminService { /** * KFP UI가 쓰는 것과 같은 ml-pipeline 노드 로그. node_id 후보 순서대로 시도. */ - private String tryKfpMlPipelineNodeLog(String runId, String... nodeIdsOrdered) { + private String tryKfpMlPipelineNodeLog(String runId, String namespace, String... nodeIdsOrdered) { if (runId == null || runId.isBlank() || kubeflowUrl == null || kubeflowUrl.isBlank()) { return null; } @@ -519,7 +566,7 @@ public class AdminService { if (!seen.add(t)) { continue; } - String body = pipelineUploadService.getV1beta1RunNodeLog(runId, t); + String body = pipelineUploadService.getV1beta1RunNodeLog(runId, t, namespace); if (isSubstantialKfpV1LogBody(body)) { return body; } diff --git a/src/main/java/kr/re/etri/autoflow/service/PipelineUploadService.java b/src/main/java/kr/re/etri/autoflow/service/PipelineUploadService.java index 32ba489..71bf367 100644 --- a/src/main/java/kr/re/etri/autoflow/service/PipelineUploadService.java +++ b/src/main/java/kr/re/etri/autoflow/service/PipelineUploadService.java @@ -305,13 +305,25 @@ public class PipelineUploadService { *

v2 Run ID도 일부 배포에서 동작합니다. 404/비어 있으면 null.

*/ public String getV1beta1RunNodeLog(String runId, String nodeId) { + return getV1beta1RunNodeLog(runId, nodeId, null); + } + + /** + * KFP ml-pipeline이 클러스터에서 읽는 Pod 로그 (KFP UI와 동일한 백엔드 경로, 네임스페이스 지정 추가). + */ + public String getV1beta1RunNodeLog(String runId, String nodeId, String namespace) { if (runId == null || runId.isBlank() || nodeId == null || nodeId.isBlank()) { return null; } String base = kubeflowBaseUrl.replaceAll("/+$", ""); String encRun = UriUtils.encodePathSegment(runId.trim(), StandardCharsets.UTF_8); String encNode = UriUtils.encodePathSegment(nodeId.trim(), StandardCharsets.UTF_8); - String url = base + "/apis/v1beta1/runs/" + encRun + "/nodes/" + encNode + "/log"; + UriComponentsBuilder builder = UriComponentsBuilder.fromHttpUrl( + base + "/apis/v1beta1/runs/" + encRun + "/nodes/" + encNode + "/log"); + if (namespace != null && !namespace.isBlank()) { + builder.queryParam("namespace", namespace.trim()); + } + String url = builder.toUriString(); try { return webClient.get() .uri(url) @@ -323,13 +335,13 @@ public class PipelineUploadService { } catch (WebClientResponseException e) { int code = e.getStatusCode().value(); if (code == 404 || code == 400) { - log.debug("[KFP] v1beta1 node log {} node={}: {}", runId, nodeId, code); + log.debug("[KFP] v1beta1 node log {} node={} ns={}: {}", runId, nodeId, namespace, code); } else { - log.debug("[KFP] v1beta1 node log {} node={}: {}", runId, nodeId, e.getMessage()); + log.debug("[KFP] v1beta1 node log {} node={} ns={}: {}", runId, nodeId, namespace, e.getMessage()); } return null; } catch (Exception e) { - log.debug("[KFP] v1beta1 node log failed runId={} node={}: {}", runId, nodeId, e.getMessage()); + log.debug("[KFP] v1beta1 node log failed runId={} node={} ns={}: {}", runId, nodeId, namespace, e.getMessage()); return null; } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 0498d5c..6185c8e 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -24,13 +24,15 @@ cuuva.app.jwtSecret= 275511b31c520562d69802ce4a913773102563891563a24062f44b3f312 cuuva.app.jwtExpirationMs= 900000 cuuva.app.jwtRefreshExpirationMs= 8640000 -spring.jpa.show-sql=true +spring.jpa.show-sql=false spring.jpa.properties.hibernate.format_sql=true -logging.level.org.hibernate.type.descriptor.sql=TRACE +# logging.level.org.hibernate.type.descriptor.sql=TRACE +# logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE -logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE +# Spring Batch 내부 로그 가리기 +logging.level.org.springframework.batch=WARN spring.datasource.hikari.connection-timeout=30000 spring.datasource.hikari.idle-timeout=60000