[UPDATE] Kubeflow 및 MLflow 권한 오류 처리 로직 추가, KubeflowRunBatch 및 로그 조회 로직 개선, 의존성 업데이트 및 CORS 설정 변경

feature/apply-patched-updates
bjkim 6 hours ago
parent 5ac6190235
commit ec78bd1210

@ -71,7 +71,7 @@ dependencies {
implementation("com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.12.0")
}
implementation("io.minio:minio:8.5.17")
implementation("io.minio:minio:8.6.0")
// Kubernetes API (Pod 목록 조회, phase == Running 확인)
implementation("io.kubernetes:client-java:19.0.3")

@ -102,13 +102,13 @@ public class KubeflowRunBatchConfig {
.block();
if (response == null || response.getRuns() == null || response.getRuns().isEmpty()) {
log.info("KubeflowRunBatch: 데이터 없음, 종료");
log.debug("KubeflowRunBatch: 데이터 없음, 종료");
runs = Collections.emptyList();
return null;
}
runs = response.getRuns();
log.info("KubeflowRunBatch: {}건 조회 완료", runs.size());
log.debug("KubeflowRunBatch: {}건 조회 완료", runs.size());
}
if (index >= runs.size()) {
@ -151,7 +151,7 @@ public class KubeflowRunBatchConfig {
public ItemWriter<KubeflowRunEntity> runWriter() {
return items -> {
kubeflowRunRepository.saveAll(items);
log.info("KubeflowRunBatch: {}건 DB 저장 완료", items.size());
log.debug("KubeflowRunBatch: {}건 DB 저장 완료", items.size());
};
}
}

@ -12,7 +12,7 @@ public class WebConfiguration implements WebMvcConfigurer {
@Override
public void addCorsMappings(CorsRegistry registry) {
registry.addMapping("/**")
.allowedOriginPatterns("http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:3000", "http://127.0.0.1:5173", "http://10.10.11.144", "http://cuuva.com:2481", "http://210.217.121.58:2481") // 허용할 Origin 지정
.allowedOriginPatterns("http://localhost:3000", "http://localhost:5173", "http://127.0.0.1:3000", "http://127.0.0.1:5173", "http://10.10.11.144", "http://cuuva.com:2481", "http://210.217.121.58:2481", "http://aisw.openlivinglab.kr:20001", "http://10.55.15.92") // 허용할 Origin 지정
.allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS")
.allowedHeaders("*") // 필요하면 "cuuva-jwt", "Content-Type", "Authorization" 명시 가능
.exposedHeaders("cuuva-jwt")

@ -156,6 +156,11 @@ public class ExperimentsController {
.bodyValue(kubeflowPayload)
.retrieve()
.bodyToMono(Map.class)
.doOnError(WebClientResponseException.class, e -> {
if (e.getStatusCode().value() == 403) {
log.error("[권한 오류] Kubeflow API (POST /apis/v2beta1/experiments) 403 Forbidden. 네임스페이스('default') 권한이나 ServiceAccount 설정을 확인하세요. 응답바디: {}", e.getResponseBodyAsString());
}
})
.onErrorResume(WebClientResponseException.Conflict.class, e -> {
log.info("Kubeflow experiment 가 이미 존재합니다 (409 Conflict). 기존 experiment를 조회합니다.");
return webClientBuilder.build()
@ -163,6 +168,11 @@ public class ExperimentsController {
.uri(kubeflowBaseUrl + "/apis/v2beta1/experiments")
.retrieve()
.bodyToMono(Map.class)
.doOnError(WebClientResponseException.class, e2 -> {
if (e2.getStatusCode().value() == 403) {
log.error("[권한 오류] Kubeflow API (GET /apis/v2beta1/experiments) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString());
}
})
.flatMap(listResp -> {
if (listResp != null && listResp.containsKey("experiments")) {
List<Map<String, Object>> experiments = (List<Map<String, Object>>) listResp.get("experiments");
@ -227,6 +237,11 @@ public class ExperimentsController {
.bodyValue(mlflowPayload)
.retrieve()
.bodyToMono(Map.class)
.doOnError(WebClientResponseException.class, ex -> {
if (ex.getStatusCode().value() == 403) {
log.error("[권한 오류] MLflow API (POST /experiments/create) 403 Forbidden. mlflow.user / mlflow.password 설정 및 권한을 확인하세요. 응답바디: {}", ex.getResponseBodyAsString());
}
})
.onErrorResume(WebClientResponseException.BadRequest.class, ex -> {
log.info("MLflow experiment 가 이미 존재할 가능성이 있습니다 (400 Bad Request). 이름으로 조회합니다.");
try {
@ -237,6 +252,11 @@ public class ExperimentsController {
.headers(headers -> headers.setBasicAuth(mlflowUser, mlflowPassword))
.retrieve()
.bodyToMono(Map.class)
.doOnError(WebClientResponseException.class, e2 -> {
if (e2.getStatusCode().value() == 403) {
log.error("[권한 오류] MLflow API (GET /experiments/get-by-name) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString());
}
})
.flatMap(getByNameResp -> {
if (getByNameResp != null && getByNameResp.containsKey("experiment")) {
Map<String, Object> exp = (Map<String, Object>) getByNameResp.get("experiment");
@ -260,6 +280,11 @@ public class ExperimentsController {
.headers(headers -> headers.setBasicAuth(mlflowUser, mlflowPassword))
.retrieve()
.bodyToMono(Map.class)
.doOnError(WebClientResponseException.class, e2 -> {
if (e2.getStatusCode().value() == 403) {
log.error("[권한 오류] MLflow API (GET /experiments/get) 403 Forbidden. 응답바디: {}", e2.getResponseBodyAsString());
}
})
.map(getResp -> {
if (getResp.containsKey("experiment")) {
Map<String, Object> exp = (Map<String, Object>) getResp.get("experiment");

@ -311,7 +311,9 @@ public class AdminService {
if (runId != null && !runId.isBlank()
&& podNameParam != null && !podNameParam.isBlank()
&& kubeflowUrl != null && !kubeflowUrl.isBlank()) {
String kfpOnly = tryKfpMlPipelineNodeLog(runId, podNameParam.trim());
String ns = (workflowNamespaceParam != null && !workflowNamespaceParam.isBlank())
? workflowNamespaceParam.trim() : null;
String kfpOnly = tryKfpMlPipelineNodeLog(runId, ns, podNameParam.trim());
if (kfpOnly != null) {
return "-- KFP ml-pipeline API (kubectl 없이) v1beta1 노드 로그 --\n\n" + kfpOnly;
}
@ -347,7 +349,7 @@ public class AdminService {
}
}
if (preferKfpApiForLogs) {
String kfpFirst = tryKfpMlPipelineNodeLog(runId, res0.podName, podNameParam.trim());
String kfpFirst = tryKfpMlPipelineNodeLog(runId, res0.namespace, res0.podName, podNameParam.trim());
if (kfpFirst != null) {
return "-- KFP ml-pipeline API (UI와 동일) | node: " + res0.podName
+ (podNameParam.trim().equals(res0.podName) ? "" : " (요청: " + podNameParam.trim() + ")")
@ -363,6 +365,32 @@ public class AdminService {
podHealthService.readPipelinePodLog(res.podName, null, normalizeTail(tailLines));
logText = fb.logText;
}
if (logText != null && logText.contains("code=403") && logText.contains("ApiException")) {
log.warn("[Admin] K8s Pod 로그 조회 권한이 없습니다 (403 Forbidden). Kubeflow API로 대체를 시도합니다. runId={}, podName={}", runId, podNameParam);
String resolvedTaskId = null;
try {
Map<String, Object> run = pipelineUploadService.getKfpRunById(runId);
List<Map<String, Object>> tasks = kfpTasksWithPods(run);
for (Map<String, Object> t : tasks) {
String pod = firstString(t.get("podName"), t.get("pod_name"));
if (podNameParam.trim().equals(pod) || res.podName.equals(pod)) {
resolvedTaskId = firstString(t.get("taskId"), t.get("task_id"));
if (resolvedTaskId != null && !resolvedTaskId.isBlank()) {
break;
}
}
}
} catch (Exception ex) {
log.error("[Admin] KFP Run에서 Task ID 추출 실패: {}", ex.getMessage());
}
String kfpLog = tryKfpMlPipelineNodeLog(runId, res.namespace, resolvedTaskId, res.podName, podNameParam.trim());
if (kfpLog != null) {
log.info("[Admin] Kubeflow API로 로그 대체 조회 성공. nodeId/podName={}", resolvedTaskId != null ? resolvedTaskId : res.podName);
return "-- KFP ml-pipeline API (kubectl 403 Forbidden 대체) --\n\n" + kfpLog;
} else {
log.warn("[Admin] Kubeflow API로 로그 대체 조회 실패. runId={}, podName={}, resolvedTaskId={}", runId, podNameParam, resolvedTaskId);
}
}
String out = "-- kubectl logs " + res.podName + " -n " + res.namespace
+ (podNameParam.trim().equals(res.podName) ? "" : " (요청: " + podNameParam.trim() + ")")
+ " --\n\n" + (logText != null ? logText : "");
@ -468,7 +496,7 @@ public class AdminService {
}
}
if (preferKfpApiForLogs) {
String kfpLog = tryKfpMlPipelineNodeLog(runId, res.podName, pod.trim());
String kfpLog = tryKfpMlPipelineNodeLog(runId, wfNsEff, res.podName, pod.trim());
if (kfpLog != null) {
StringBuilder k = new StringBuilder();
k.append("-- KFP ml-pipeline API v1beta1/runs/.../nodes/{node_id}/log (UI와 동일 백엔드) --\n");
@ -488,6 +516,25 @@ public class AdminService {
podHealthService.readPipelinePodLog(res.podName, null, normalizeTail(tailLines));
logText = fb.logText;
}
if (logText != null && logText.contains("code=403") && logText.contains("ApiException")) {
log.warn("[Admin] K8s Pod 로그 조회 권한이 없습니다 (403 Forbidden). Kubeflow API로 대체를 시도합니다. runId={}, podName={}, step={}", runId, res.podName, step);
String taskId = firstString(task.get("taskId"), task.get("task_id"));
String kfpLog = tryKfpMlPipelineNodeLog(runId, res.namespace, taskId, res.podName, pod.trim());
if (kfpLog != null) {
log.info("[Admin] Kubeflow API로 로그 대체 조회 성공. nodeId/podName={}", taskId != null ? taskId : res.podName);
StringBuilder k = new StringBuilder();
k.append("-- KFP ml-pipeline API (kubectl 403 Forbidden 대체) --\n");
k.append("node_id: ").append(taskId != null ? taskId : res.podName);
if (!pod.trim().equals(res.podName)) {
k.append(" | KFP task pod_name: ").append(pod.trim());
}
k.append(" | Step: ").append(step != null ? step : "(이름 없음)").append(" --\n\n");
k.append(kfpLog);
return k.toString();
} else {
log.warn("[Admin] Kubeflow API로 로그 대체 조회 실패. runId={}, podName={}, taskId={}", runId, res.podName, taskId);
}
}
StringBuilder sb = new StringBuilder();
sb.append("-- kubectl logs ").append(res.podName).append(" -n ").append(res.namespace);
sb.append(" (KFP 로그와 동일) | Step: ").append(step != null ? step : "(이름 없음)");
@ -506,7 +553,7 @@ public class AdminService {
/**
* KFP UI ml-pipeline . node_id .
*/
private String tryKfpMlPipelineNodeLog(String runId, String... nodeIdsOrdered) {
private String tryKfpMlPipelineNodeLog(String runId, String namespace, String... nodeIdsOrdered) {
if (runId == null || runId.isBlank() || kubeflowUrl == null || kubeflowUrl.isBlank()) {
return null;
}
@ -519,7 +566,7 @@ public class AdminService {
if (!seen.add(t)) {
continue;
}
String body = pipelineUploadService.getV1beta1RunNodeLog(runId, t);
String body = pipelineUploadService.getV1beta1RunNodeLog(runId, t, namespace);
if (isSubstantialKfpV1LogBody(body)) {
return body;
}

@ -305,13 +305,25 @@ public class PipelineUploadService {
* <p>v2 Run ID . 404/ null.</p>
*/
public String getV1beta1RunNodeLog(String runId, String nodeId) {
return getV1beta1RunNodeLog(runId, nodeId, null);
}
/**
* KFP ml-pipeline Pod (KFP UI , ).
*/
public String getV1beta1RunNodeLog(String runId, String nodeId, String namespace) {
if (runId == null || runId.isBlank() || nodeId == null || nodeId.isBlank()) {
return null;
}
String base = kubeflowBaseUrl.replaceAll("/+$", "");
String encRun = UriUtils.encodePathSegment(runId.trim(), StandardCharsets.UTF_8);
String encNode = UriUtils.encodePathSegment(nodeId.trim(), StandardCharsets.UTF_8);
String url = base + "/apis/v1beta1/runs/" + encRun + "/nodes/" + encNode + "/log";
UriComponentsBuilder builder = UriComponentsBuilder.fromHttpUrl(
base + "/apis/v1beta1/runs/" + encRun + "/nodes/" + encNode + "/log");
if (namespace != null && !namespace.isBlank()) {
builder.queryParam("namespace", namespace.trim());
}
String url = builder.toUriString();
try {
return webClient.get()
.uri(url)
@ -323,13 +335,13 @@ public class PipelineUploadService {
} catch (WebClientResponseException e) {
int code = e.getStatusCode().value();
if (code == 404 || code == 400) {
log.debug("[KFP] v1beta1 node log {} node={}: {}", runId, nodeId, code);
log.debug("[KFP] v1beta1 node log {} node={} ns={}: {}", runId, nodeId, namespace, code);
} else {
log.debug("[KFP] v1beta1 node log {} node={}: {}", runId, nodeId, e.getMessage());
log.debug("[KFP] v1beta1 node log {} node={} ns={}: {}", runId, nodeId, namespace, e.getMessage());
}
return null;
} catch (Exception e) {
log.debug("[KFP] v1beta1 node log failed runId={} node={}: {}", runId, nodeId, e.getMessage());
log.debug("[KFP] v1beta1 node log failed runId={} node={} ns={}: {}", runId, nodeId, namespace, e.getMessage());
return null;
}
}

@ -24,13 +24,15 @@ cuuva.app.jwtSecret= 275511b31c520562d69802ce4a913773102563891563a24062f44b3f312
cuuva.app.jwtExpirationMs= 900000
cuuva.app.jwtRefreshExpirationMs= 8640000
spring.jpa.show-sql=true
spring.jpa.show-sql=false
spring.jpa.properties.hibernate.format_sql=true
logging.level.org.hibernate.type.descriptor.sql=TRACE
# logging.level.org.hibernate.type.descriptor.sql=TRACE
# logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE
logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE
# Spring Batch 내부 로그 가리기
logging.level.org.springframework.batch=WARN
spring.datasource.hikari.connection-timeout=30000
spring.datasource.hikari.idle-timeout=60000

Loading…
Cancel
Save