vmbus device 发现
1.在创建vmbus时又一个hv_setup_vmbus_irq函数内容如下,注册进去的函数是 vmbus_isr函数
static void (*vmbus_handler)(void);
__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
{
>-------struct pt_regs *old_regs = set_irq_regs(regs);
>-------entering_irq();
>-------inc_irq_stat(irq_hv_callback_count);
>-------if (vmbus_handler)
>------->-------vmbus_handler();
>-------if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
>------->-------ack_APIC_irq();
>-------exiting_irq();
>-------set_irq_regs(old_regs);
}
void hv_setup_vmbus_irq(void (*handler)(void))
{
>-------vmbus_handler = handler;
}
void hv_remove_vmbus_irq(void)
{
>-------/* We have no way to deallocate the interrupt gate */
>-------vmbus_handler = NULL;
}
hyperv_vector_handler函数是一个中断处理函数,这个中断处理函数处理的是0xf3中断
当有这个中断的时候 会调用vmbus_isr进行中断处理。
vmbus_isr函数中,主要做了两件事情,第一是调用vmbus_chan_sched 函数,第二是挂起 hv_cpu->msg_dpc这个tasklet。而vmbus_chan_sched 函数中会遍历当前cpu的chan_list链表,并挂起channel->callback_event tasklet。这个tasklet最后会发现是触发网络的napi操作。 先讲 hv_cpu->msg_dpc这个tasklet
static void vmbus_isr(void)
{
>-------struct hv_per_cpu_context *hv_cpu
>------->-------= this_cpu_ptr(hv_context.cpu_context);
>-------void *page_addr = hv_cpu->synic_event_page;
>-------struct hv_message *msg;
>-------union hv_synic_event_flags *event;
>-------bool handled = false;
>-------if (unlikely(page_addr == NULL))
>------->-------return;
>-------event = (union hv_synic_event_flags *)page_addr +
>------->------->------->------->------- VMBUS_MESSAGE_SINT;
>-------/*
>------- * Check for events before checking for messages. This is the order
>------- * in which events and messages are checked in Windows guests on
>------- * Hyper-V, and the Windows team suggested we do the same.
>------- */
>-------if ((vmbus_proto_version == VERSION_WS2008) ||
>------->-------(vmbus_proto_version == VERSION_WIN7)) {
>------->-------/* Since we are a child, we only need to check bit 0 */
>------->-------if (sync_test_and_clear_bit(0, event->flags))
>------->------->-------handled = true;
>-------} else {
>------->-------/*
>------->------- * Our host is win8 or above. The signaling mechanism
>------->------- * has changed and we can directly look at the event page.
>------->------- * If bit n is set then we have an interrup on the channel
>------->------- * whose id is n.
>------->------- */
>------->-------handled = true;
>-------}
>-------if (handled)
>------->-------vmbus_chan_sched(hv_cpu);
>-------page_addr = hv_cpu->synic_message_page;
>-------msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
>-------/* Check if there are actual msgs to be processed */
>-------if (msg->header.message_type != HVMSG_NONE) {
>------->-------if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
>------->------->-------hv_process_timer_expiration(msg, hv_cpu);
>------->-------else
>------->------->-------tasklet_schedule(&hv_cpu->msg_dpc);
>-------}
>-------add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
}
hv_cpu->msg_dpc这个tasklet是在hv_synic_alloc函数中初始化的。这个函数对hv_context进行了初始化.我们可以看到msg_dpc 这个tasklet最后将要执行的函数是 vmbus_on_msg_dpc 函数,需要注意这里又一个percpu的tasklet,表示这个类型的tasklet可以并发执行,相当于搞了一个软中断号。可以看到这里创建了一个workqueue , workqueue里面执行的是vmbus_onmessage_work函数,vmbus_onmessage_work函数里面直接调用 vmbus_onmessage函数。
void vmbus_on_msg_dpc(unsigned long data)
{
>-------struct hv_per_cpu_context *hv_cpu = (void *)data;
>-------void *page_addr = hv_cpu->synic_message_page;
>-------struct hv_message *msg = (struct hv_message *)page_addr +
>------->------->------->------- VMBUS_MESSAGE_SINT;
>-------struct vmbus_channel_message_header *hdr;
>-------const struct vmbus_channel_message_table_entry *entry;
>-------struct onmessage_work_context *ctx;
>-------u32 message_type = msg->header.message_type;
>-------if (message_type == HVMSG_NONE)
>------->-------/* no msg */
>------->-------return;
>-------hdr = (struct vmbus_channel_message_header *)msg->u.payload;
>-------trace_vmbus_on_msg_dpc(hdr);
>-------if (hdr->msgtype >= CHANNELMSG_COUNT) {
>------->-------WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
>------->-------goto msg_handled;
>-------}
>-------entry = &channel_message_table[hdr->msgtype];
>-------if (entry->handler_type>== VMHT_BLOCKING) {
>------->-------ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
>------->-------if (ctx == NULL)
>------->------->-------return;
>------->-------INIT_WORK(&ctx->work, vmbus_onmessage_work);
>------->-------memcpy(&ctx->msg, msg, sizeof(*msg));
>------->-------/*
>------->------- * The host can generate a rescind message while we
>------->------- * may still be handling the original offer. We deal with
>------->------- * this condition by ensuring the processing is done on the
>------->------- * same CPU.
>------->------- */
>------->-------switch (hdr->msgtype) {
>------->-------case CHANNELMSG_RESCIND_CHANNELOFFER:
>------->------->-------/*
>------->------->------- * If we are handling the rescind message;
>------->------->------- * schedule the work on the global work queue.
>------->------->------- */
>------->------->-------schedule_work_on(vmbus_connection.connect_cpu,
>------->------->------->------->------- &ctx->work);
>------->------->-------break;
>------->-------case CHANNELMSG_OFFERCHANNEL:
>------->------->-------atomic_inc(&vmbus_connection.offer_in_progress);
>------->------->-------queue_work_on(vmbus_connection.connect_cpu,
>------->------->------->------- vmbus_connection.work_queue,
>------->------->------->------- &ctx->work);
>------->------->-------break;
>------->-------default:
>------->------->-------queue_work(vmbus_connection.work_queue, &ctx->work);
>------->-------}
>-------} else
>------->-------entry->message_handler(hdr);
msg_handled:
>-------vmbus_signal_eom(msg, message_type);
}
vmbus_onmessage函数内容如下,这个函数前面又一个message table 用来路由消息的处理函数。
const struct vmbus_channel_message_table_entry
channel_message_table[CHANNELMSG_COUNT] = {
>-------{ CHANNELMSG_INVALID,>-->------->-------0, NULL },
>-------{ CHANNELMSG_OFFERCHANNEL,>----->-------0, vmbus_onoffer },
>-------{ CHANNELMSG_RESCIND_CHANNELOFFER,>-----0, vmbus_onoffer_rescind },
>-------{ CHANNELMSG_REQUESTOFFERS,>---->-------0, NULL },
>-------{ CHANNELMSG_ALLOFFERS_DELIVERED,>------1, vmbus_onoffers_delivered },
>-------{ CHANNELMSG_OPENCHANNEL,>------>-------0, NULL },
>-------{ CHANNELMSG_OPENCHANNEL_RESULT,>-------1, vmbus_onopen_result },
>-------{ CHANNELMSG_CLOSECHANNEL,>----->-------0, NULL },
>-------{ CHANNELMSG_GPADL_HEADER,>----->-------0, NULL },
>-------{ CHANNELMSG_GPADL_BODY,>------->-------0, NULL },
>-------{ CHANNELMSG_GPADL_CREATED,>---->-------1, vmbus_ongpadl_created },
>-------{ CHANNELMSG_GPADL_TEARDOWN,>--->-------0, NULL },
>-------{ CHANNELMSG_GPADL_TORNDOWN,>--->-------1, vmbus_ongpadl_torndown },
>-------{ CHANNELMSG_RELID_RELEASED,>--->-------0, NULL },
>-------{ CHANNELMSG_INITIATE_CONTACT,>->-------0, NULL },
>-------{ CHANNELMSG_VERSION_RESPONSE,>->-------1, vmbus_onversion_response },
>-------{ CHANNELMSG_UNLOAD,>--->------->-------0, NULL },
>-------{ CHANNELMSG_UNLOAD_RESPONSE,>-->-------1, vmbus_unload_response },
>-------{ CHANNELMSG_18,>------->------->-------0, NULL },
>-------{ CHANNELMSG_19,>------->------->-------0, NULL },
>-------{ CHANNELMSG_20,>------->------->-------0, NULL },
>-------{ CHANNELMSG_TL_CONNECT_REQUEST,>-------0, NULL },
};
/*
* vmbus_onmessage - Handler for channel protocol messages.
*
* This is invoked in the vmbus worker thread context.
*/
void vmbus_onmessage(void *context)
{
>-------struct hv_message *msg = context;
>-------struct vmbus_channel_message_header *hdr;
>-------int size;
>-------hdr = (struct vmbus_channel_message_header *)msg->u.payload;
>-------size = msg->header.payload_size;
>-------trace_vmbus_on_message(hdr);
>-------if (hdr->msgtype >= CHANNELMSG_COUNT) {
>------->-------pr_err("Received invalid channel message type %d size %d\n",
>------->------->------- hdr->msgtype, size);
>------->-------print_hex_dump_bytes("", DUMP_PREFIX_NONE,
>------->------->------->------- (unsigned char *)msg->u.payload, size);
>------->-------return;
>-------}
>-------if (channel_message_table[hdr->msgtype].message_handler)
>------->-------channel_message_table[hdr->msgtype].message_handler(hdr);
>-------else
>------->-------pr_err("Unhandled channel message type %d\n", hdr->msgtype);
}
在这些消息处理函数里面又一个vmbus_onoffer处理函数,这个函数会创建新的channel,并且调用vmbus_process_offer函数,这个函数会判断当前channel是primary还是sub channel,并设置相应的关系。最后会创建workqueue 调用 vmbus_add_channel_work 函数。vmbus_add_channel函数如下
static void vmbus_add_channel_work(struct work_struct *work)
{
>-------struct vmbus_channel *newchannel =
>------->-------container_of(work, struct vmbus_channel, add_channel_work);
>-------struct vmbus_channel *primary_channel = newchannel->primary_channel;
>-------unsigned long flags;
>-------u16 dev_type;
>-------int ret;
>-------dev_type = hv_get_dev_type(newchannel);
>-------init_vp_index(newchannel, dev_type);
>-------if (newchannel->target_cpu != get_cpu()) {
>------->-------put_cpu();
>------->-------smp_call_function_single(newchannel->target_cpu,
>------->------->------->------->------- percpu_channel_enq,
>------->------->------->------->------- newchannel, true);
>-------} else {
>------->-------percpu_channel_enq(newchannel);
>------->-------put_cpu();
>-------}
>-------/*
>------- * This state is used to indicate a successful open
>------- * so that when we do close the channel normally, we
>------- * can cleanup properly.
>------- */
>-------newchannel->state = CHANNEL_OPEN_STATE;
>-------if (primary_channel != NULL) {
>------->-------/* newchannel is a sub-channel. */
>------->-------struct hv_device *dev = primary_channel->device_obj;
>------->-------if (vmbus_add_channel_kobj(dev, newchannel))
>------->------->-------goto err_deq_chan;
>------->-------if (primary_channel->sc_creation_callback != NULL)
>------->------->-------primary_channel->sc_creation_callback(newchannel);
>------->-------newchannel->probe_done = true;
>------->-------return;
>-------}
>-------/*
>------- * Start the process of binding the primary channel to the driver
>------- */
>-------newchannel->device_obj = vmbus_device_create(
>------->-------&newchannel->offermsg.offer.if_type,
>------->-------&newchannel->offermsg.offer.if_instance,
>------->-------newchannel);
>-------if (!newchannel->device_obj)
>------->-------goto err_deq_chan;
>-------newchannel->device_obj->device_id = dev_type;
>-------/*
>------- * Add the new device to the bus. This will kick off device-driver
>------- * binding which eventually invokes the device driver's AddDevice()
>------- * method.
>------- */
>-------ret = vmbus_device_register(newchannel->device_obj);
>-------if (ret != 0) {
>------->-------pr_err("unable to add child device object (relid %d)\n",
>------->------->-------newchannel->offermsg.child_relid);
>------->-------kfree(newchannel->device_obj);
>------->-------goto err_deq_chan;
>-------}
>-------newchannel->probe_done = true;
>-------return;
err_deq_chan:
>-------mutex_lock(&vmbus_connection.channel_mutex);
>-------/*
>------- * We need to set the flag, otherwise
>------- * vmbus_onoffer_rescind() can be blocked.
>------- */
>-------newchannel->probe_done = true;
>-------if (primary_channel == NULL) {
>------->-------list_del(&newchannel->listentry);
>-------} else {
>------->-------spin_lock_irqsave(&primary_channel->lock, flags);
>------->-------list_del(&newchannel->sc_list);
>------->-------spin_unlock_irqrestore(&primary_channel->lock, flags);
>-------}
>-------mutex_unlock(&vmbus_connection.channel_mutex);
>-------if (newchannel->target_cpu != get_cpu()) {
>------->-------put_cpu();
>------->-------smp_call_function_single(newchannel->target_cpu,
>------->------->------->------->------- percpu_channel_deq,
>------->------->------->------->------- newchannel, true);
>-------} else {
>------->-------percpu_channel_deq(newchannel);
>------->-------put_cpu();
>-------}
>-------vmbus_release_relid(newchannel->offermsg.child_relid);
>-------free_channel(newchannel);
}
vmbus_add_channel_work函数的 对channel进行操作,不同的类型的channel操作不一样,对于primary的channel 会调用 vmbus_device_register注册一个新的vmbus 设备,对于sub channel则会调用 sc_creation_callback函数 设置任务。这块会在网卡注册的时候讲。注意一个小细节 如果当前的cpu不等于 channel的target_cpu则会调用ipi 把当前channel 加到target cpu的hv_cpu_context链表中.target_cpu是在init_vp_index中计算出来的。
vmbus_device_rgister注册后,会触发 match 以及调用 vmbus上的驱动操作,驱动虚拟设备。
有上面我们发现 vmbus上的设备创建是由 hyper-V 给虚拟机发送中断,然后发消息,然后虚拟机中的vmbus 创建 这个设备。 这个方式非常新颖。相对于pci枚举或者其他方式,这种方式把外设当作一个主体,相当于可编程的外设。个人感觉学到了