Binder 多线程情景分析

8/24/2023

Android Framework 系列教程:https://yuandaimaahao.github.io/AndroidFrameworkTutorialPages/

内容更为详实的视频教程、源码、答疑服务请联系微信 zzh0838

# binder_thread 的创建

当应用层调用到 ioclt 时,会陷入内核调用到 binder 驱动中的 binder_ioctl:

static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
    //......
	thread = binder_get_thread(proc);
	if (thread == NULL) {
		ret = -ENOMEM;
		goto err;
	}
    //......
}
1
2
3
4
5
6
7
8
9
10

接下来我们来看看 binder_get_thread 的具体实现:

static struct binder_thread *binder_get_thread(struct binder_proc *proc)
{
	struct binder_thread *thread;
	struct binder_thread *new_thread;

	binder_inner_proc_lock(proc); //加锁
    //注意最后一个参数是 NULL
	thread = binder_get_thread_ilocked(proc, NULL);
	binder_inner_proc_unlock(proc);
	if (!thread) {
        //构建一个 binder_thread 对象
		new_thread = kzalloc(sizeof(*thread), GFP_KERNEL);
		if (new_thread == NULL)
			return NULL;
        //再调用 binder_get_thread_ilocked  
		binder_inner_proc_lock(proc);
		thread = binder_get_thread_ilocked(proc, new_thread);
		binder_inner_proc_unlock(proc);
		if (thread != new_thread)
			kfree(new_thread);
	}
	return thread;
}

static struct binder_thread *binder_get_thread_ilocked(
		struct binder_proc *proc, struct binder_thread *new_thread)
{
	struct binder_thread *thread = NULL;
	struct rb_node *parent = NULL;
    //proc->threads 是一个红黑树
	struct rb_node **p = &proc->threads.rb_node;

    //根据线程的 pid 查找
	while (*p) {
		parent = *p;
		thread = rb_entry(parent, struct binder_thread, rb_node);

		if (current->pid < thread->pid)
			p = &(*p)->rb_left;
		else if (current->pid > thread->pid)
			p = &(*p)->rb_right;
		else
			return thread;
	}
    //第一次进入查找不到,返回 NULL
	if (!new_thread)
		return NULL;
    //第二次进入,初始化 binder_thread,并将其插入 proc->threads 红黑树中
	thread = new_thread;
	binder_stats_created(BINDER_STAT_THREAD);
	thread->proc = proc;
	thread->pid = current->pid;
	get_task_struct(current);
	thread->task = current;
	atomic_set(&thread->tmp_ref, 0);
	init_waitqueue_head(&thread->wait);
	INIT_LIST_HEAD(&thread->todo);
	rb_link_node(&thread->rb_node, parent, p);
	rb_insert_color(&thread->rb_node, &proc->threads);
	thread->looper_need_return = true;
	thread->return_error.work.type = BINDER_WORK_RETURN_ERROR;
	thread->return_error.cmd = BR_OK;
	thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR;
	thread->reply_error.cmd = BR_OK;
	INIT_LIST_HEAD(&new_thread->waiting_thread_node);
	return thread;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

总结一下:对于应用层的线程,Binder 驱动中会有一个数据结构 binder_thread 描述记录它,这个数据结构保存在 binder_proc 的成员红黑树 threads 中。

# waiting_threads 管理休眠中的线程

当服务端读数据时,内核中的操作如下:

static int binder_thread_read(struct binder_proc *proc,
			      struct binder_thread *thread,
			      binder_uintptr_t binder_buffer, size_t size,
			      binder_size_t *consumed, int non_block)
{

    //......
	if (non_block) {
		if (!binder_has_work(thread, wait_for_proc_work))
			ret = -EAGAIN;
	} else { //通常是阻塞式,走这里
		ret = binder_wait_for_work(thread, wait_for_proc_work);
	}
    //......
}

static int binder_wait_for_work(struct binder_thread *thread,
				bool do_proc_work)
{
	DEFINE_WAIT(wait);
	struct binder_proc *proc = thread->proc;
	int ret = 0;

	freezer_do_not_count();
	binder_inner_proc_lock(proc);
	for (;;) {
		prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE);
		if (binder_has_work_ilocked(thread, do_proc_work))
			break;
        // proc->waiting_threads 是一个队列
        // 把当前线程对应的 binder_thread 插入 proc->waiting_threads 队列中
		if (do_proc_work)
			list_add(&thread->waiting_thread_node,
				 &proc->waiting_threads);
		binder_inner_proc_unlock(proc);
        //发起 cpu 调度请求,当前线程下 cpu 进入休眠状态
		schedule();
		binder_inner_proc_lock(proc);
		list_del_init(&thread->waiting_thread_node);
		if (signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}
	}
	finish_wait(&thread->wait, &wait);
	binder_inner_proc_unlock(proc);
	freezer_count();

	return ret;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

当客户端发送数据时,会陷入内核,最终调用到 binder_transaction:

static void binder_transaction(struct binder_proc *proc,
			       struct binder_thread *thread,
			       struct binder_transaction_data *tr, int reply,
			       binder_size_t extra_buffers_size)
{
	//......
	if (reply) {
		//......
	} else if (!(t->flags & TF_ONE_WAY)) {
		//......
		binder_inner_proc_unlock(proc);
		//核心操作
		if (!binder_proc_transaction(t, target_proc, target_thread)) {
			binder_inner_proc_lock(proc);
			binder_pop_transaction_ilocked(thread, t);
			binder_inner_proc_unlock(proc);
			goto err_dead_proc_or_thread;
		}
	} else {
		//......
	}
	//......
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

上述代码的核心是 binder_proc_transaction,其实现如下:

static bool binder_proc_transaction(struct binder_transaction *t,
				    struct binder_proc *proc,
				    struct binder_thread *thread)
{

    //......

	// thread 为空
	// pending_async false
	if (!thread && !pending_async) //走这
        //关注点1
	    //从 target_proc 的 waiting_threads 链表中选择第一个作为 target_thread
		thread = binder_select_thread_ilocked(proc);

    //把数据 binder_transaction *t 插入到目标线程的 todo 链表中
	if (thread) {//走这
		binder_transaction_priority(thread->task, t, node_prio,
					    node->inherit_rt);
		//把 binder_transaction 插入到 target_thread 的 todo 链表中
		binder_enqueue_thread_work_ilocked(thread, &t->work);
	} else if (!pending_async) { 
		binder_enqueue_work_ilocked(&t->work, &proc->todo);
	} else {
		binder_enqueue_work_ilocked(&t->work, &node->async_todo);
	}

	if (!pending_async) //走这
        //关注点2
		//唤醒远程线程
		binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */);

	binder_inner_proc_unlock(proc);
	binder_node_unlock(node);

	return true;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

关注点 1 处,调用 binder_select_thread_ilocked 函数,函数内部从 target_proc 的 waiting_threads 链表中选择第一个作为 target_thread:

//从 target_proc 的 waiting_threads 链表中选择第一个作为 target_thread
static struct binder_thread *
binder_select_thread_ilocked(struct binder_proc *proc)
{
	struct binder_thread *thread;

	assert_spin_locked(&proc->inner_lock);
    //拿到 target_proc 的 waiting_threads 链表中的第一个节点
	thread = list_first_entry_or_null(&proc->waiting_threads,
					  struct binder_thread,
					  waiting_thread_node);

    //从链表中删除
	if (thread) 
		list_del_init(&thread->waiting_thread_node);
    
    //返回获取到的 binder_thread
	return thread;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

接下来就会把数据 binder_transaction *t 插入到目标线程的 todo 链表中,然后进入关注点 2 处,关注点 2 处会调用 binder_wakeup_thread_ilocked 唤醒我们刚才获得的目标线程:

//唤醒接收端
static void binder_wakeup_thread_ilocked(struct binder_proc *proc,
					 struct binder_thread *thread,
					 bool sync)
{
	assert_spin_locked(&proc->inner_lock);

	if (thread) {
		if (sync) //走这
			//唤醒目标
			wake_up_interruptible_sync(&thread->wait);
		else
			wake_up_interruptible(&thread->wait);
		return;
	}
	binder_wakeup_poll_threads_ilocked(proc, sync);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# 服务端工作线程不够的情景

服务端在读数据完成后,会去检测线程相关变量,判断没有线程可用后

static int binder_thread_read(struct binder_proc *proc,
			      struct binder_thread *thread,
			      binder_uintptr_t binder_buffer, size_t size,
			      binder_size_t *consumed, int non_block)
{
   //......
   if (non_block) {
		if (!binder_has_work(thread, wait_for_proc_work))
			ret = -EAGAIN;
	} else { // 走这里进入休眠
		ret = binder_wait_for_work(thread, wait_for_proc_work);
	}

    //......

    //被唤醒后,处理数据
    while (1) {
        //当&thread->todo和&proc->todo都为空时,goto到retry标志处,否则往下执行:
        struct binder_transaction_data tr;
        struct binder_transaction *t = NULL;
        switch (w->type) {
          case BINDER_WORK_TRANSACTION: ...
          case BINDER_WORK_TRANSACTION_COMPLETE: ...
          case BINDER_WORK_NODE: ...
          case BINDER_WORK_DEAD_BINDER: ...
          case BINDER_WORK_DEAD_BINDER_AND_CLEAR: ...
          case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: ...
        }
        ...
    }

done:
	*consumed = ptr - buffer;
	binder_inner_proc_lock(proc);

    /* 满足以下几个条件
     * 1. 请求线程数等于 0
     * 2. 已准备好线程数等于 0
     * 3. 已启动线程数小于最大线程数(15)
     * 4. looper 状态为已注册或已进入时创建新的线程
     * 满足这几个条件就会通过 put_user 函数传递 BR_SPAWN_LOOPER 命令给应用层
     */
	if (proc->requested_threads == 0 &&
	    list_empty(&thread->proc->waiting_threads) &&
	    proc->requested_threads_started < proc->max_threads &&
	    (thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
	     BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */
	     /*spawn a new thread if we leave this out */) {
		proc->requested_threads++;
		binder_inner_proc_unlock(proc);
		binder_debug(BINDER_DEBUG_THREADS,
			     "%d:%d BR_SPAWN_LOOPER\n",
			     proc->pid, thread->pid);
        //传递 BR_SPAWN_LOOPER 命令给应用层
		if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
			return -EFAULT;
		binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
	} else
		binder_inner_proc_unlock(proc);
	return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

接下来我们来看看应用层的响应:

status_t IPCThreadState::executeCommand(int32_t cmd)
{
    BBinder* obj;
    RefBase::weakref_type* refs;
    status_t result = NO_ERROR;

    switch ((uint32_t)cmd) {
    
    //......

    case BR_SPAWN_LOOPER: // 应用层进一步响应 spawnPooledThread   
        mProcess->spawnPooledThread(false);
        break;

    //......

    if (result != NO_ERROR) {
        mLastError = result;
    }

    return result;
}

void ProcessState::spawnPooledThread(bool isMain)
{   
    //启动新的应用层线程 PoolThread
    if (mThreadPoolStarted) {
        String8 name = makeBinderThreadName();
        ALOGV("Spawning new pooled thread, name=%s\n", name.string());   
        sp<Thread> t = new PoolThread(isMain);
        t->run(name.string());
    }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

# Java 层 Binder 线程的启动

在 Android 中,当我们启动一个 Java 进程时,会启动 Binder 线程,接下来我们来看看 Binder 线程是如何启动的,Java 层进程的创建都是通过 Process.start() 方法,向 Zygote 进程发出创建进程的 socket 消息, Zygote 收到消息后会调用 Zygote.forkAndSpecialize() 来 fork 出新进程,在新进程中会调用到 RuntimeInit.nativeZygoteInit 方法,该方法经过 jni 映射,最终会调用到 app_main.cpp 中的onZygoteInit:

virtual void onZygoteInit()
{
    sp<ProcessState> proc = ProcessState::self();
    ALOGV("App process: starting thread pool.\n");
    proc->startThreadPool();
}
1
2
3
4
5
6

可以看出 Java 层启动的 Binder 线程实际是一个 startThreadPool 启动的 native 层线程,启动过程在上文已做分析,这里就不再重复了。

# 参考资料