Skip to content

Commit

Permalink
Merge pull request TencentBlueKing#10364 from tangruotian/issue-10362
Browse files Browse the repository at this point in the history
Worker杀掉当前进程父进程导致Agent误报 TencentBlueKing#10362
  • Loading branch information
bkci-bot committed Jun 3, 2024
2 parents e56facf + 6b34884 commit ea5da5e
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,15 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.Map.Entry;

Expand Down Expand Up @@ -165,7 +168,7 @@ public void killRecursively(boolean forceFlag) {
proc.destroy();
}

public void kill(boolean forceFlag) throws InterruptedException {
public void kill0(boolean forceFlag) throws InterruptedException {
proc.destroy();
this.killByKiller();
}
Expand Down Expand Up @@ -681,7 +684,7 @@ protected final File getFile(String relativePath) {
return new File(new File("/proc/" + this.getPid()), relativePath);
}

public void kill(boolean forceFlag) throws InterruptedException {
public void kill0(boolean forceFlag) throws InterruptedException {
try {
int pid = this.getPid();
BkProcessTree.log("Killing pid=" + pid);
Expand Down Expand Up @@ -790,7 +793,7 @@ public void killRecursively(boolean forceFlag) throws InterruptedException {
this.killByKiller();
}

public void kill(boolean forceFlag) throws InterruptedException {
public void kill0(boolean forceFlag) throws InterruptedException {
BkProcessTree.log("Killing " + this.getPid());
p.kill();
this.killByKiller();
Expand Down Expand Up @@ -877,6 +880,8 @@ Object readResolve() {
}

public abstract class OSProcess implements IOSProcess, Serializable {
private Set<Integer> keepAlivePids = new HashSet<Integer>(64);

final int pid;

private OSProcess(int pid) {
Expand All @@ -887,6 +892,10 @@ public final int getPid() {
return this.pid;
}

public final void addKeepAlivePids(Collection<Integer> pids) {
keepAlivePids.addAll(pids);
}

public abstract BkProcessTree.OSProcess getParent();

final BkProcessTree getTree() {
Expand All @@ -898,14 +907,26 @@ public final List<BkProcessTree.OSProcess> getChildren() {

for (OSProcess p : BkProcessTree.this) {
if (p.getParent() == this) {
r.add(p);
if (keepAlivePids.contains(p.pid)) {
this.keepAlivePids.add(this.pid);
} else {
p.addKeepAlivePids(keepAlivePids);
r.add(p);
}
}
}

return r;
}

public abstract void kill(boolean forceFlag) throws InterruptedException;
public void kill(boolean forceFlag) throws InterruptedException {
BkProcessTree.log("pid=" + pid + ", iskeepAlive=" + keepAlivePids.contains(pid));
if (!keepAlivePids.contains(pid)) {
kill0(forceFlag);
}
}

public abstract void kill0(boolean forceFlag) throws InterruptedException;

void killByKiller() throws InterruptedException {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ object KillBuildProcessTree {
}
val processTreeIterator = processTree.iterator()
val killedProcessIds = mutableListOf<Int>()
val keepAlivePids = mutableSetOf(currentProcessId)
while (processTreeIterator.hasNext()) {
val osProcess = processTreeIterator.next()
var envVars: EnvVars?
Expand All @@ -123,10 +124,22 @@ object KillBuildProcessTree {
val dontKillProcessTree = envVars["DEVOPS_DONT_KILL_PROCESS_TREE"]
if ("true".equals(dontKillProcessTree, ignoreCase = true)) {
logger.info("DEVOPS_DONT_KILL_PROCESS_TREE is true, skip")
/*
Q: 这里为什么只排除本进程,而不顺便加parent?
A: 1、因这类进程基本上parent为1,或者为当前worker,这2类不需要保护,worker自身已经排除了。
2、如果parent是业务自身产生的进程,则应由业务自己控制退出,不要主动去keep业务进程,这会导致本该兜底杀掉的没有被杀掉。
(注:如果发生了上述情况,那是业务没控制好进程,本应该要退出的进程,出现了残留,因此是要被兜底杀掉,不能因
子进程的DEVOPS_DONT_KILL_PROCESS_TREE 而被级联keep
*/
keepAlivePids.add(osProcess.pid)
continue
}

if (osProcess.pid == currentProcessId) {
if (keepAlivePids.contains(osProcess.pid)) {
osProcess.parent?.let { parent ->
keepAlivePids.add(parent.pid) // 防止父进程被干掉,级联到自身
}
continue
}
try {
Expand All @@ -141,6 +154,7 @@ object KillBuildProcessTree {
flag = flag && taskIds.contains(envTaskId)
}
if (flag) {
osProcess.addKeepAlivePids(keepAlivePids)
osProcess.killRecursively(forceFlag)
osProcess.kill(forceFlag)
killedProcessIds.add(osProcess.pid)
Expand Down

0 comments on commit ea5da5e

Please sign in to comment.