diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy index d00dd66d534..9c5fb6d86e6 100644 --- a/jenkins/L0_MergeRequest.groovy +++ b/jenkins/L0_MergeRequest.groovy @@ -1039,7 +1039,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars) 'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"], ] - launchJob("L0_Test-x86_64-Single-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters) + launchJob("L0_Test-x86_64-Single-GPU", reuseBuild, enableFailFast, globalVars, "x86_64", additionalParameters) } catch (InterruptedException e) { throw e } catch (Exception e) { @@ -1095,7 +1095,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars) 'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"], ] - launchJob("L0_Test-x86_64-Multi-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters) + launchJob("L0_Test-x86_64-Multi-GPU", reuseBuild, enableFailFast, globalVars, "x86_64", additionalParameters) } catch (InterruptedException e) { throw e @@ -1143,7 +1143,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars) "dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"], ] - launchJob("L0_Test-SBSA-Single-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters) + launchJob("L0_Test-SBSA-Single-GPU", reuseBuild, enableFailFast, globalVars, "SBSA", additionalParameters) } catch (InterruptedException e) { throw e } catch (Exception e) { @@ -1197,7 +1197,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars) "dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"], ] - launchJob("L0_Test-SBSA-Multi-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters) + launchJob("L0_Test-SBSA-Multi-GPU", reuseBuild, enableFailFast, globalVars, "SBSA", additionalParameters) } catch (InterruptedException e) { throw e diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index b40c7a11a7e..48d72045bb2 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -25,6 +25,8 @@ LLM_ROOT = "llm" ARTIFACT_PATH = env.artifactPath ? env.artifactPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}" UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}" +REUSE_ARTIFACT_PATH = env.reuseArtifactPath + X86_64_TRIPLE = "x86_64-linux-gnu" AARCH64_TRIPLE = "aarch64-linux-gnu" @@ -349,6 +351,10 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL // if the line cannot be split by "=", just ignore that line. def makoOptsJson = transformMakoArgsToJson(["Mako options:"] + makoArgs) def testListPath = renderTestDB(testList, llmSrcLocal, stageName, makoOptsJson) + // Reuse passed tests + if (REUSE_ARTIFACT_PATH) { + reusePassedTests(pipeline, llmSrcLocal, REUSE_ARTIFACT_PATH, stageName, testListPath) + } Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -r -p -oStrictHostKeyChecking=no ${testListPath} ${remote.user}@${remote.host}:${testListPathNode}",) // Generate Multi Node Job Launch Script @@ -1063,6 +1069,41 @@ def renderTestDB(testContext, llmSrc, stageName, preDefinedMakoOpts=null) { return testList } +def reusePassedTests(pipeline, llmSrc, reusedArtifactPath, stageName, testListFile) { + def reusedPath = "${WORKSPACE}/reused" + sh "mkdir -p ${reusedPath}" + def resultsFileName = "results-${stageName}" + def passedTestsFile = "${reusedPath}/${stageName}/passed_tests.txt" + try { + def resultsUrl = "https://urm.nvidia.com/artifactory/${reusedArtifactPath}/test-results/${resultsFileName}.tar.gz" + trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${reusedPath} && wget -nv ${resultsUrl}") + sh "cd ${reusedPath} && tar -zxf ${resultsFileName}.tar.gz" + // Get passed tests + sh """ + python3 ${llmSrc}/jenkins/scripts/delete_passed_tests.py \ + get_passed_tests \ + --input-file=${reusedPath}/${stageName}/results.xml \ + --output-file=${passedTestsFile} + """ + sh "The passed tests are: \$(cat ${passedTestsFile})" + + // Copy the original test file to a new file + sh "cp ${testListFile} original_${testListFile}" + // Remove passed tests from original test file + sh """ + python3 ${llmSrc}/jenkins/scripts/delete_passed_tests.py \ + remove_passed_tests \ + --input-file=${testListFile} \ + --passed-tests-file=${passedTestsFile} + """ + sh "The test list after removing passed tests is: \$(cat ${testListFile})" + } catch (InterruptedException e) { + throw e + } catch (Exception e) { + echo "Failed to get passed tests: ${e.message}" + } +} + def getSSHConnectionPorts(portConfigFile, stageName) { def type = stageName.split('-')[0] @@ -1409,6 +1450,11 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO extraInternalEnv += " CPP_TEST_TIMEOUT_OVERRIDDEN=${pytestTestTimeout}" def testDBList = renderTestDB(testList, llmSrc, stageName) + // Reuse passed tests + if (REUSE_ARTIFACT_PATH) { + reusePassedTests(pipeline, llmSrc, REUSE_ARTIFACT_PATH, stageName, testDBList) + } + testList = "${testList}_${splitId}" def testCmdLine = [ "LLM_ROOT=${llmSrc}", diff --git a/jenkins/scripts/reuse_passed_tests.py b/jenkins/scripts/reuse_passed_tests.py new file mode 100644 index 00000000000..0f10bba01f6 --- /dev/null +++ b/jenkins/scripts/reuse_passed_tests.py @@ -0,0 +1,94 @@ +import argparse +import os +import sys +import xml.etree.ElementTree as ET + +import test_rerun + + +def get_passed_tests(input_file, output_file): + if not os.path.exists(input_file): + print(f"Input file {input_file} does not exist") + return + + # Parse the JUnit XML file and extract passed test names + passed_tests = [] + try: + tree = ET.parse(input_file) + root = tree.getroot() + suite = root.find('testsuite') + for testcase in suite.iter('testcase'): + # Check test status + has_failure = testcase.find('failure') is not None + has_error = testcase.find('error') is not None + has_skipped = testcase.find('skipped') is not None + if not has_failure and not has_error and not has_skipped: + # Parse the test name + classname = testcase.attrib.get('classname', '') + name = testcase.attrib.get('name', '') + filename = testcase.attrib.get('file', '') + test_name = test_rerun.parse_name(classname, name, filename) + passed_tests.append(test_name) + except Exception as e: + print(f"Failed to parse {input_file}: {e}") + return + + # Write passed test names to output file, one per line + with open(output_file, 'w') as f: + for test in passed_tests: + f.write(test + '\n') + + +def remove_passed_tests(input_file, passed_tests_file): + if not os.path.exists(input_file): + print(f"Input file {input_file} does not exist") + return + if not os.path.exists(passed_tests_file): + print(f"Passed tests file {passed_tests_file} does not exist") + return + + passed_tests = [] + # Read passed tests from file + with open(passed_tests_file, 'r') as f: + for line in f: + passed_tests.append(line.strip()) + + tests_to_keep = [] + # Remove passed tests from input file + with open(input_file, 'r') as f: + for line in f: + if line.strip() not in passed_tests: + tests_to_keep.append(line.strip()) + + # Delete input file + try: + os.remove(input_file) + except Exception as e: + print(f"Failed to delete {input_file}: {e}") + # Write tests to keep to input file + with open(input_file, 'w') as f: + for test in tests_to_keep: + f.write(test + '\n') + + +if __name__ == '__main__': + if (sys.argv[1] == "get_passed_tests"): + parser = argparse.ArgumentParser() + parser.add_argument('--input-file', + required=True, + help='Input XML file containing test results') + parser.add_argument('--output-file', + required=True, + help='Output file to write passed tests') + args = parser.parse_args(sys.argv[2:]) + get_passed_tests(args.input_file, args.output_file) + elif (sys.argv[1] == "remove_passed_tests"): + parser = argparse.ArgumentParser() + parser.add_argument('--input-file', + required=True, + help='Input XML file containing test results') + parser.add_argument('--passed-tests-file', + required=True, + help='File containing passed tests') + args = parser.parse_args(sys.argv[2:]) + remove_passed_tests(args.input_file, args.passed_tests_file) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 7d0b66ae1d2..47a560eb529 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1462,6 +1462,7 @@ def test_openai_completions_example(llm_root, llm_venv, backend: str): @pytest.mark.parametrize("backend", ["pytorch", "trt"]) def test_openai_chat_example(llm_root, llm_venv, backend: str): + pytest.fail("test_openai_chat_example") test_root = unittest_path() / "llmapi" / "apps" filter_expr = f"{backend} and not sampler" llm_venv.run_cmd([