这是一个介绍 Android 平台日志系统的系列文章:
- Android 平台日志系统整体框架
- logd 守护进程初始化过程
- 客户端写日志过程分析
- logd 写日志过程分析一
- logd 写日志过程分析二(本文)
- logd 读日志过程分析
本文基于 AOSP android-10.0.0_r41 版本讲解
# 1. SocketListener 读取 App 发送的日志信息
上一篇,我们讲到 SocketListener 收到 App 发送过出来的日志信息,接着会调用 onDataAvailable() 来处理收到的数据:
// system/core/logd/LogListener.cpp
bool LogListener::onDataAvailable(SocketClient* cli) {
//prctl 系统调用用于设置进程相关的一些东西。这里使用 PR_SET_NAME 设置了线程的名字为 logd.writer
static bool name_set;
if (!name_set) {
prctl(PR_SET_NAME, "logd.writer");
name_set = true;
}
// + 1 to ensure null terminator if MAX_PAYLOAD buffer is received
char buffer[sizeof_log_id_t + sizeof(uint16_t) + sizeof(log_time) +
LOGGER_ENTRY_MAX_PAYLOAD + 1];
struct iovec iov = { buffer, sizeof(buffer) - 1 };
alignas(4) char control[CMSG_SPACE(sizeof(struct ucred))];
struct msghdr hdr = {
nullptr, 0, &iov, 1, control, sizeof(control), 0,
};
int socket = cli->getSocket();
// 接下来使用 recvmsg 从 socket 里读取数据
// To clear the entire buffer is secure/safe, but this contributes to 1.68%
// overhead under logging load. We are safe because we check counts, but
// still need to clear null terminator
// memset(buffer, 0, sizeof(buffer));
ssize_t n = recvmsg(socket, &hdr, 0);
if (n <= (ssize_t)(sizeof(android_log_header_t))) {
return false;
}
buffer[n] = 0;
// 由于我们创建 socket 时打开了 SO_PASSCRED 选项,这里我们可以读取一个用于表示客户端身份的 struct ucred
struct ucred* cred = nullptr;
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&hdr);
while (cmsg != nullptr) {
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_CREDENTIALS) {
cred = (struct ucred*)CMSG_DATA(cmsg);
break;
}
cmsg = CMSG_NXTHDR(&hdr, cmsg);
}
struct ucred fake_cred;
if (cred == nullptr) {
cred = &fake_cred;
cred->pid = 0;
cred->uid = DEFAULT_OVERFLOWUID;
}
if (cred->uid == AID_LOGD) {
// ignore log messages we send to ourself.
// Such log messages are often generated by libraries we depend on
// which use standard Android logging.
return false;
}
// 处理一些特殊值
android_log_header_t* header =
reinterpret_cast<android_log_header_t*>(buffer);
log_id_t logId = static_cast<log_id_t>(header->id);
if (/* logId < LOG_ID_MIN || */ logId >= LOG_ID_MAX ||
logId == LOG_ID_KERNEL) {
return false;
}
if ((logId == LOG_ID_SECURITY) &&
(!__android_log_security() ||
!clientHasLogCredentials(cred->uid, cred->gid, cred->pid))) {
return false;
}
// Check credential validity, acquire corrected details if not supplied.
if (cred->pid == 0) {
cred->pid = logbuf ? logbuf->tidToPid(header->tid)
: android::tidToPid(header->tid);
if (cred->pid == getpid()) {
// We expect that /proc/<tid>/ is accessible to self even without
// readproc group, so that we will always drop messages that come
// from any of our logd threads and their library calls.
return false; // ignore self
}
}
if (cred->uid == DEFAULT_OVERFLOWUID) {
uid_t uid =
logbuf ? logbuf->pidToUid(cred->pid) : android::pidToUid(cred->pid);
if (uid == AID_LOGD) {
uid = logbuf ? logbuf->pidToUid(header->tid)
: android::pidToUid(cred->pid);
}
if (uid != AID_LOGD) cred->uid = uid;
}
char* msg = ((char*)buffer) + sizeof(android_log_header_t);
n -= sizeof(android_log_header_t);
// NB: hdr.msg_flags & MSG_TRUNC is not tested, silently passing a
// truncated message to the logs.
// 调用 logbuf->log 将数据写入 LogBuffer
if (logbuf != nullptr) {
int res = logbuf->log(
logId, header->realtime, cred->uid, cred->pid, header->tid, msg,
((size_t)n <= UINT16_MAX) ? (uint16_t)n : UINT16_MAX);
// 此时可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()
if (res > 0 && reader != nullptr) {
reader->notifyNewLog(static_cast<log_mask_t>(1 << logId));
}
}
return true;
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
函数主要流程如下:
- prctl 系统调用用于设置进程相关的一些东西。这里使用 PR_SET_NAME 设置了线程的名字为 logd.writer
- 由于我们创建 socket 时打开了 SO_PASSCRED 选项,这里我们可以读取一个用于表示客户端身份的 struct ucred
- 接下来,如果 struct ucred 内部有一些特殊值,做单独处理
- 调用 logbuf->log 将数据写入 LogBuffer
- 此时可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()
# 2. 写入 LogBuffer
log 的写入分为 4 个步骤:
- 根据日志的内容对数据做调整,数据不对,就直接 return
- 使用一个状态机去除重复的 log
- 写入 log
- 如果需要,删除一些 log 以避免 log 数据过多
接下来分步查看源码:
根据 tag 和优先级判断该 log 是否可以写入
// system/core/logd/LogBuffer.cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
if (log_id >= LOG_ID_MAX) {
return -EINVAL;
}
// Slip the time by 1 nsec if the incoming lands on xxxxxx000 ns.
// This prevents any chance that an outside source can request an
// exact entry with time specified in ms or us precision.
if ((realtime.tv_nsec % 1000) == 0) ++realtime.tv_nsec;
// LogBufferElement 用于保存一条日志信息
LogBufferElement* elem =
new LogBufferElement(log_id, realtime, uid, pid, tid, msg, len);
// 根据数据内容,调整数据
if (log_id != LOG_ID_SECURITY) {
int prio = ANDROID_LOG_INFO;
const char* tag = nullptr;
size_t tag_len = 0;
if (log_id == LOG_ID_EVENTS || log_id == LOG_ID_STATS) {
tag = tagToName(elem->getTag());
if (tag) {
tag_len = strlen(tag);
}
} else {
prio = *msg;
tag = msg + 1;
tag_len = strnlen(tag, len - 1);
}
if (!__android_log_is_loggable_len(prio, tag, tag_len,
ANDROID_LOG_VERBOSE)) {
// Log traffic received to total
wrlock();
stats.addTotal(elem);
unlock();
delete elem;
return -EACCES;
}
}
// ......
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
使用一个状态机去除重复的 log
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
//......
wrlock();
LogBufferElement* currentLast = lastLoggedElements[log_id];
if (currentLast) {
LogBufferElement* dropped = droppedElements[log_id];
uint16_t count = dropped ? dropped->getDropped() : 0;
//
// State Init
// incoming:
// dropped = nullptr
// currentLast = nullptr;
// elem = incoming message
// outgoing:
// dropped = nullptr -> State 0
// currentLast = copy of elem
// log elem
// State 0
// incoming:
// count = 0
// dropped = nullptr
// currentLast = copy of last message
// elem = incoming message
// outgoing: if match != DIFFERENT
// dropped = copy of first identical message -> State 1
// currentLast = reference to elem
// break: if match == DIFFERENT
// dropped = nullptr -> State 0
// delete copy of last message (incoming currentLast)
// currentLast = copy of elem
// log elem
// State 1
// incoming:
// count = 0
// dropped = copy of first identical message
// currentLast = reference to last held-back incoming
// message
// elem = incoming message
// outgoing: if match == SAME
// delete copy of first identical message (dropped)
// dropped = reference to last held-back incoming
// message set to chatty count of 1 -> State 2
// currentLast = reference to elem
// outgoing: if match == SAME_LIBLOG
// dropped = copy of first identical message -> State 1
// take sum of currentLast and elem
// if sum overflows:
// log currentLast
// currentLast = reference to elem
// else
// delete currentLast
// currentLast = reference to elem, sum liblog.
// break: if match == DIFFERENT
// delete dropped
// dropped = nullptr -> State 0
// log reference to last held-back (currentLast)
// currentLast = copy of elem
// log elem
// State 2
// incoming:
// count = chatty count
// dropped = chatty message holding count
// currentLast = reference to last held-back incoming
// message.
// dropped = chatty message holding count
// elem = incoming message
// outgoing: if match != DIFFERENT
// delete chatty message holding count
// dropped = reference to last held-back incoming
// message, set to chatty count + 1
// currentLast = reference to elem
// break: if match == DIFFERENT
// log dropped (chatty message)
// dropped = nullptr -> State 0
// log reference to last held-back (currentLast)
// currentLast = copy of elem
// log elem
//
enum match_type match = identical(elem, currentLast);
if (match != DIFFERENT) {
if (dropped) {
// Sum up liblog tag messages?
if ((count == 0) /* at Pass 1 */ && (match == SAME_LIBLOG)) {
android_log_event_int_t* event =
reinterpret_cast<android_log_event_int_t*>(
const_cast<char*>(currentLast->getMsg()));
//
// To unit test, differentiate with something like:
// event->header.tag = htole32(CHATTY_LOG_TAG);
// here, then instead of delete currentLast below,
// log(currentLast) to see the incremental sums form.
//
uint32_t swab = event->payload.data;
unsigned long long total = htole32(swab);
event = reinterpret_cast<android_log_event_int_t*>(
const_cast<char*>(elem->getMsg()));
swab = event->payload.data;
lastLoggedElements[LOG_ID_EVENTS] = elem;
total += htole32(swab);
// check for overflow
if (total >= UINT32_MAX) {
log(currentLast);
unlock();
return len;
}
stats.addTotal(currentLast);
delete currentLast;
swab = total;
event->payload.data = htole32(swab);
unlock();
return len;
}
if (count == USHRT_MAX) {
log(dropped);
count = 1;
} else {
delete dropped;
++count;
}
}
if (count) {
stats.addTotal(currentLast);
currentLast->setDropped(count);
}
droppedElements[log_id] = currentLast;
lastLoggedElements[log_id] = elem;
unlock();
return len;
}
if (dropped) { // State 1 or 2
if (count) { // State 2
log(dropped); // report chatty
} else { // State 1
delete dropped;
}
droppedElements[log_id] = nullptr;
log(currentLast); // report last message in the series
} else { // State 0
delete currentLast;
}
}
lastLoggedElements[log_id] = new LogBufferElement(*elem);
// .....
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
这里使用一个状态机来去除重复的 log,知道流程即可,有兴趣的同学可以参考源码中的注释自行分析。
写入 log
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
//......
log(elem);
unlock();
return len;
}
2
3
4
5
6
7
8
这里调用另一个重载 log:
// assumes LogBuffer::wrlock() held, owns elem, look after garbage collection
void LogBuffer::log(LogBufferElement* elem) {
// cap on how far back we will sort in-place, otherwise append
static uint32_t too_far_back = 5; // five seconds
// Insert elements in time sorted order if possible
// NB: if end is region locked, place element at end of list
LogBufferElementCollection::iterator it = mLogElements.end();
LogBufferElementCollection::iterator last = it;
if (__predict_true(it != mLogElements.begin())) --it;
if (__predict_false(it == mLogElements.begin()) ||
__predict_true((*it)->getRealTime() <= elem->getRealTime()) ||
__predict_false((((*it)->getRealTime().tv_sec - too_far_back) >
elem->getRealTime().tv_sec) &&
(elem->getLogId() != LOG_ID_KERNEL) &&
((*it)->getLogId() != LOG_ID_KERNEL))) {
mLogElements.push_back(elem);
} else {
log_time end(log_time::EPOCH);
bool end_set = false;
bool end_always = false;
LogTimeEntry::rdlock();
LastLogTimes::iterator times = mTimes.begin();
while (times != mTimes.end()) {
LogTimeEntry* entry = times->get();
if (!entry->mNonBlock) {
end_always = true;
break;
}
// it passing mEnd is blocked by the following checks.
if (!end_set || (end <= entry->mEnd)) {
end = entry->mEnd;
end_set = true;
}
times++;
}
if (end_always || (end_set && (end > (*it)->getRealTime()))) {
mLogElements.push_back(elem);
} else {
// should be short as timestamps are localized near end()
do {
last = it;
if (__predict_false(it == mLogElements.begin())) {
break;
}
--it;
} while (((*it)->getRealTime() > elem->getRealTime()) &&
(!end_set || (end <= (*it)->getRealTime())));
mLogElements.insert(last, elem);
}
LogTimeEntry::unlock();
}
stats.add(elem);
maybePrune(elem->getLogId());
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
__predict_true 和 __predict_false 用来提示编译器对应的判断很可能是 true/false,类似于 Linux 内核的 likely/unlikely。如果判断正确,可以得到很大的性能提升。
客户端使用 Unix-domain socket 来写入 log,读出来的时候,很可能就已经按时间顺序排好,这个时候,只需要把 LogBufferElement 插入列表末尾就可以了。
万一很不幸的,上面的判断失败了,就只能从后往前遍历列表,找到一个合适的位置来插入 LogBufferElement。这个过程类似于插入排序。
如果需要,删除一些 log 以避免 log 数据过多
void LogBuffer::log(LogBufferElement* elem)
在最后会调用 maybePrune,根据 log 总量判断是否需要删除一些 log。
// system/core/logd/LogBuffer.cpp
// Prune at most 10% of the log entries or maxPrune, whichever is less.
//
// LogBuffer::wrlock() must be held when this function is called.
void LogBuffer::maybePrune(log_id_t id) {
size_t sizes = stats.sizes(id);
unsigned long maxSize = log_buffer_size(id);
if (sizes > maxSize) {
size_t sizeOver = sizes - ((maxSize * 9) / 10);
size_t elements = stats.realElements(id);
size_t minElements = elements / 100;
if (minElements < minPrune) {
minElements = minPrune;
}
unsigned long pruneRows = elements * sizeOver / sizes;
if (pruneRows < minElements) {
pruneRows = minElements;
}
if (pruneRows > maxPrune) {
pruneRows = maxPrune;
}
prune(id, pruneRows);
}
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
如果 id 类型的 log 超过了总量限制,就删除 10% 的 log。在这个前提下,所删除的 log 调试大于 minElements 和 minPrune,小于 maxPrune。其中,minElements 是所有 id 类型的 log 的总条数的百分之一。
当每条 log 都很大、log 总量又很小,限制最小值可以避免总是需要剔除旧 log。如果 log 每条很小,限制最大数目可以避免删除过多的 log。
实际的删除工作由 prune 方法完成,这个我们会在冗余日志处理过程分析日志部分来讲解。