(点击上方公众号,可快速关注)
来源:伯乐在线专栏作者 - gityuan
链接:http://android.jobbole.com/84881/
点击 → 了解如何加入专栏作者
接上文
//获取等待状态【见小节3.3】
final int waitState = evaluateCheckerCompletionLocked();
if (waitState == COMPLETED) {
waitedHalf = false;
continue;
} else if (waitState == WAITING) {
continue;
} else if (waitState == WAITED_HALF) {
if (!waitedHalf) {
//第一次进入等待时间过半的状态
ArrayList pids = new ArrayList();
pids.add(Process.myPid());
//则输出栈信息【见小节3.4】
ActivityManagerService.dumpStackTraces(true, pids, null, null,
NATIVE_STACKS_OF_INTEREST);
waitedHalf = true;
}
continue;
}
//获取被阻塞的checkers
blockedCheckers = getBlockedCheckersLocked();
subject = describeCheckersLocked(blockedCheckers);
allowRestart = mAllowRestart;
}
EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
ArrayList pids = new ArrayList();
pids.add(Process.myPid());
if (mPhonePid > 0) pids.add(mPhonePid);
//waitedHalf=true,则追加输出栈信息【见小节3.4】
final File stack = ActivityManagerService.dumpStackTraces(
!waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
//系统已被阻塞1分钟,也不在乎多等待2s来确保stack trace信息输出
SystemClock.sleep(2000);
if (RECORD_KERNEL_THREADS) {
//输出kernel栈信息【见小节3.5】
dumpKernelStackTraces();
}
//触发kernel来dump所有阻塞线程【见小节3.6】
doSysRq('l');
//输出dropbox信息【见小节3.7】
Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
public void run() {
mActivity.addErrorToDropBox(
"watchdog", null, "system_server", null, null,
subject, null, stack, null);
}
};
dropboxThread.start();
try {
//等待dropbox线程工作2s
dropboxThread.join(2000);
} catch (InterruptedException ignored) {}
IActivityController controller;
synchronized (this) {
controller = mController;
}
if (controller != null) {
//将阻塞状态报告给activity controller,
try {
Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
//返回值为1表示继续等待,-1表示杀死系统
int res = controller.systemNotResponding(subject);
if (res >= 0) {
waitedHalf = false; //继续等待
continue;
}
} catch (RemoteException e) {
}
}
//当debugger没有attach时,才杀死进程
if (Debug.isDebuggerConnected()) {
debuggerWasConnected = 2;
}
if (debuggerWasConnected >= 2) {
Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
} else if (debuggerWasConnected > 0) {
Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
} else if (!allowRestart) {
Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
} else {
Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
//遍历输出阻塞线程的栈信息
for (int i=0; iblockedCheckers.size(); i++) {
Slog.w(TAG, blockedCheckers.get(i).getName() + " stack trace:");
StackTraceElement[] stackTrace
= blockedCheckers.get(i).getThread().getStackTrace();
for (StackTraceElement element: stackTrace) {
Slog.w(TAG, " at " + element);
}
}
Slog.w(TAG, "*** GOODBYE!");
//杀死进程system_server【见小节3.8】
Process.killProcess(Process.myPid());
System.exit(10);
}
waitedHalf = false;
}
}
3.2 scheduleCheckLocked
public final class HandlerChecker implements Runnable {
...
public void scheduleCheckLocked() {
if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) {
mCompleted = true;
return;
}
if (!mCompleted) {
return; //有一个check正在处理中,则无需重复发送
}
mCompleted = false;
mCurrentMonitor = null;
mStartTime = SystemClock.uptimeMillis();
//发送消息,插入消息队列最开头【见3.2.1】
mHandler.postAtFrontOfQueue(this);
}
}
postAtFrontOfQueue(this),该方法输入参数为Runnable对象,根据消息机制,回调HandlerChecker中的run方法。
3.2.1 HandlerChecker.run
public final class HandlerChecker implements Runnable {
public void run() {
final int size = mMonitors.size();
for (int i = 0 ; i size ; i++) {
synchronized (Watchdog.this) {
mCurrentMonitor = mMonitors.get(i);
}
//回调具体服务的monitor方法
mCurrentMonitor.monitor();
}
synchronized (Watchdog.this) {
mCompleted = true;
mCurrentMonitor = null;
}
}
}
回调的方法,例如BinderThreadMonitor.monitor
3.3 evaluateCheckerCompletionLocked
private int evaluateCheckerCompletionLocked() {
int state = COMPLETED;
for (int i=0; imHandlerCheckers.size(); i++) {
HandlerChecker hc = mHandlerCheckers.get(i);
//【见小节3.3.1】
state = Math.max(state, hc.getCompletionStateLocked());
}
return state;
}
获取mHandlerCheckers列表中等待状态值最大的state.
3.3.1 getCompletionStateLocked
public int getCompletionStateLocked() {
if (mCompleted) {
return COMPLETED;
} else {
long latency = SystemClock.uptimeMillis() - mStartTime;
if (latency mWaitMax/2) {
return WAITING;
} else if (latency mWaitMax) {
return WAITED_HALF;
}
}
return OVERDUE;
}
COMPLETED = 0:等待完成;
WAITING = 1:等待时间小于DEFAULT_TIMEOUT的一半,即30s;
WAITED_HALF = 2:等待时间处于30s~60s之间;
OVERDUE = 3:等待时间大于或等于60s。
3.4 AMS.dumpStackTraces
public static File dumpStackTraces(boolean clearTraces, ArrayList firstPids,
ProcessCpuTracker processCpuTracker, SparseArray lastPids, String[] nativeProcs) {
//默认为 data/anr/traces.txt
String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
if (tracesPath == null || tracesPath.length() == 0) {
return null;
}
File tracesFile = new File(tracesPath);
try {
//当clearTraces,则删除已存在的traces文件
if (clearTraces && tracesFile.exists()) tracesFile.delete();
//创建traces文件
tracesFile.createNewFile();
// -rw-rw-rw-
FileUtils.setPermissions(tracesFile.getPath(), 0666, -1, -1);
} catch (IOException e) {
return null;
}
//输出trace内容
dumpStackTraces(tracesPath, firstPids, processCpuTracker, lastPids, nativeProcs);
return tracesFile;
}
关于trace内容,这里就不细说,直接说说结论:
调用Process.sendSignal()向目标进程发送信号SIGNAL_QUIT;
分别调用backtrace.dump_backtrace(),输出/system/bin/mediaserver,/system/bin/sdcard,/system/bin/surfaceflinger这3个进程的backtrace;
统计CPU使用率;
调用Process.sendSignal()向其他进程发送信号SIGNAL_QUIT。
3.5 dumpKernelStackTraces
private File dumpKernelStackTraces() {
// 路径为data/anr/traces.txt
String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
if (tracesPath == null || tracesPath.length() == 0) {
return null;
}
native_dumpKernelStacks(tracesPath);
return new File(tracesPath);
}
native_dumpKernelStacks调用到android_server_Watchdog.dumpKernelStacks
3.6 doSysRq
private void doSysRq(char c) {
try {
FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
sysrq_trigger.write(c);
sysrq_trigger.close();
} catch (IOException e) {
Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
}
}
通过向节点/proc/sysrq-trigger写入字符,触发kernel来dump所有阻塞线程,输出所有CPU的backtrace到kernel log。
3.7 dropBox
关于dropbox已在dropBox源码篇详细讲解过,输出文件到/data/system/dropbox,比如system_app_crash。
3.8 killProcess
Process.killProcess已经在文章理解杀进程的实现原理已详细讲解,通过发送信号9给目标进程来完成杀进程的过程。
当杀死system_server进程,从而导致zygote进程自杀,进而触发init执行重启Zygote进程,这便出现了手机framework重启的现象。
四、小结
watchdog在check过程中出现阻塞1分钟的情况,则会输出:
专栏作者简介( 点击 → 加入专栏作者 )
gityuan:Android全栈工程师:上至能写App,中间能改framework和Native代码,下至能调驱动,全栈能解决性能与稳定性。(新浪微博:@Gityuan)
打赏支持作者写出更多好文章,谢谢!
关注「安卓开发精选」
看更多精选安卓技术文章
↓↓↓