分类
未分类

vmbus 分析 一

vmbus device 发现
1.在创建vmbus时又一个hv_setup_vmbus_irq函数内容如下,注册进去的函数是 vmbus_isr函数

static void (*vmbus_handler)(void);
 __visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
 {
 >-------struct pt_regs *old_regs = set_irq_regs(regs);

 >-------entering_irq();
 >-------inc_irq_stat(irq_hv_callback_count);
 >-------if (vmbus_handler)
 >------->-------vmbus_handler();

 >-------if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
 >------->-------ack_APIC_irq();

 >-------exiting_irq();
 >-------set_irq_regs(old_regs);
 }

 void hv_setup_vmbus_irq(void (*handler)(void))
 {
 >-------vmbus_handler = handler;
 }
 void hv_remove_vmbus_irq(void)
 {
 >-------/* We have no way to deallocate the interrupt gate */
 >-------vmbus_handler = NULL;
 }

hyperv_vector_handler函数是一个中断处理函数,这个中断处理函数处理的是0xf3中断
当有这个中断的时候 会调用vmbus_isr进行中断处理。

vmbus_isr函数中,主要做了两件事情,第一是调用vmbus_chan_sched 函数,第二是挂起 hv_cpu->msg_dpc这个tasklet。而vmbus_chan_sched 函数中会遍历当前cpu的chan_list链表,并挂起channel->callback_event tasklet。这个tasklet最后会发现是触发网络的napi操作。 先讲 hv_cpu->msg_dpc这个tasklet

 static void vmbus_isr(void)
 {
 >-------struct hv_per_cpu_context *hv_cpu
 >------->-------= this_cpu_ptr(hv_context.cpu_context);
 >-------void *page_addr = hv_cpu->synic_event_page;
 >-------struct hv_message *msg;
 >-------union hv_synic_event_flags *event;
 >-------bool handled = false;

 >-------if (unlikely(page_addr == NULL))
 >------->-------return;

 >-------event = (union hv_synic_event_flags *)page_addr +
 >------->------->------->------->------- VMBUS_MESSAGE_SINT;
 >-------/*
 >------- * Check for events before checking for messages. This is the order
 >------- * in which events and messages are checked in Windows guests on
 >------- * Hyper-V, and the Windows team suggested we do the same.
 >------- */

 >-------if ((vmbus_proto_version == VERSION_WS2008) ||
 >------->-------(vmbus_proto_version == VERSION_WIN7)) {

 >------->-------/* Since we are a child, we only need to check bit 0 */
 >------->-------if (sync_test_and_clear_bit(0, event->flags))
 >------->------->-------handled = true;
 >-------} else {
 >------->-------/*
 >------->------- * Our host is win8 or above. The signaling mechanism
 >------->------- * has changed and we can directly look at the event page.
 >------->------- * If bit n is set then we have an interrup on the channel
 >------->------- * whose id is n.
 >------->------- */
 >------->-------handled = true;
 >-------}

 >-------if (handled)
 >------->-------vmbus_chan_sched(hv_cpu);

 >-------page_addr = hv_cpu->synic_message_page;
 >-------msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;

 >-------/* Check if there are actual msgs to be processed */
 >-------if (msg->header.message_type != HVMSG_NONE) {
 >------->-------if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
 >------->------->-------hv_process_timer_expiration(msg, hv_cpu);
 >------->-------else
 >------->------->-------tasklet_schedule(&hv_cpu->msg_dpc);
 >-------}

 >-------add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
 }

hv_cpu->msg_dpc这个tasklet是在hv_synic_alloc函数中初始化的。这个函数对hv_context进行了初始化.我们可以看到msg_dpc 这个tasklet最后将要执行的函数是 vmbus_on_msg_dpc 函数,需要注意这里又一个percpu的tasklet,表示这个类型的tasklet可以并发执行,相当于搞了一个软中断号。可以看到这里创建了一个workqueue , workqueue里面执行的是vmbus_onmessage_work函数,vmbus_onmessage_work函数里面直接调用 vmbus_onmessage函数。

 void vmbus_on_msg_dpc(unsigned long data)
 {
 >-------struct hv_per_cpu_context *hv_cpu = (void *)data;
 >-------void *page_addr = hv_cpu->synic_message_page;
 >-------struct hv_message *msg = (struct hv_message *)page_addr +
 >------->------->------->-------  VMBUS_MESSAGE_SINT;
 >-------struct vmbus_channel_message_header *hdr;
 >-------const struct vmbus_channel_message_table_entry *entry;
 >-------struct onmessage_work_context *ctx;
 >-------u32 message_type = msg->header.message_type;

 >-------if (message_type == HVMSG_NONE)
 >------->-------/* no msg */
 >------->-------return;

 >-------hdr = (struct vmbus_channel_message_header *)msg->u.payload;

 >-------trace_vmbus_on_msg_dpc(hdr);

 >-------if (hdr->msgtype >= CHANNELMSG_COUNT) {
 >------->-------WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
 >------->-------goto msg_handled;
 >-------}

 >-------entry = &channel_message_table[hdr->msgtype];
 >-------if (entry->handler_type>== VMHT_BLOCKING) {
 >------->-------ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
 >------->-------if (ctx == NULL)
 >------->------->-------return;

 >------->-------INIT_WORK(&ctx->work, vmbus_onmessage_work);
 >------->-------memcpy(&ctx->msg, msg, sizeof(*msg));

 >------->-------/*
 >------->------- * The host can generate a rescind message while we
 >------->------- * may still be handling the original offer. We deal with
 >------->------- * this condition by ensuring the processing is done on the
 >------->------- * same CPU.
 >------->------- */
 >------->-------switch (hdr->msgtype) {
 >------->-------case CHANNELMSG_RESCIND_CHANNELOFFER:
 >------->------->-------/*
 >------->------->------- * If we are handling the rescind message;
 >------->------->------- * schedule the work on the global work queue.
 >------->------->------- */
 >------->------->-------schedule_work_on(vmbus_connection.connect_cpu,
 >------->------->------->------->------- &ctx->work);
 >------->------->-------break;

 >------->-------case CHANNELMSG_OFFERCHANNEL:
 >------->------->-------atomic_inc(&vmbus_connection.offer_in_progress);
 >------->------->-------queue_work_on(vmbus_connection.connect_cpu,
 >------->------->------->-------      vmbus_connection.work_queue,
 >------->------->------->-------      &ctx->work);
 >------->------->-------break;

 >------->-------default:
 >------->------->-------queue_work(vmbus_connection.work_queue, &ctx->work);
 >------->-------}
 >-------} else
 >------->-------entry->message_handler(hdr);

 msg_handled:
 >-------vmbus_signal_eom(msg, message_type);
 }

vmbus_onmessage函数内容如下,这个函数前面又一个message table 用来路由消息的处理函数。

const struct vmbus_channel_message_table_entry
 channel_message_table[CHANNELMSG_COUNT] = {
 >-------{ CHANNELMSG_INVALID,>-->------->-------0, NULL },
 >-------{ CHANNELMSG_OFFERCHANNEL,>----->-------0, vmbus_onoffer },
 >-------{ CHANNELMSG_RESCIND_CHANNELOFFER,>-----0, vmbus_onoffer_rescind },
 >-------{ CHANNELMSG_REQUESTOFFERS,>---->-------0, NULL },
 >-------{ CHANNELMSG_ALLOFFERS_DELIVERED,>------1, vmbus_onoffers_delivered },
 >-------{ CHANNELMSG_OPENCHANNEL,>------>-------0, NULL },
 >-------{ CHANNELMSG_OPENCHANNEL_RESULT,>-------1, vmbus_onopen_result },
 >-------{ CHANNELMSG_CLOSECHANNEL,>----->-------0, NULL },
 >-------{ CHANNELMSG_GPADL_HEADER,>----->-------0, NULL },
 >-------{ CHANNELMSG_GPADL_BODY,>------->-------0, NULL },
 >-------{ CHANNELMSG_GPADL_CREATED,>---->-------1, vmbus_ongpadl_created },
 >-------{ CHANNELMSG_GPADL_TEARDOWN,>--->-------0, NULL },
 >-------{ CHANNELMSG_GPADL_TORNDOWN,>--->-------1, vmbus_ongpadl_torndown },
 >-------{ CHANNELMSG_RELID_RELEASED,>--->-------0, NULL },
 >-------{ CHANNELMSG_INITIATE_CONTACT,>->-------0, NULL },
 >-------{ CHANNELMSG_VERSION_RESPONSE,>->-------1, vmbus_onversion_response },
 >-------{ CHANNELMSG_UNLOAD,>--->------->-------0, NULL },
 >-------{ CHANNELMSG_UNLOAD_RESPONSE,>-->-------1, vmbus_unload_response },
 >-------{ CHANNELMSG_18,>------->------->-------0, NULL },
 >-------{ CHANNELMSG_19,>------->------->-------0, NULL },
 >-------{ CHANNELMSG_20,>------->------->-------0, NULL },
 >-------{ CHANNELMSG_TL_CONNECT_REQUEST,>-------0, NULL },
 };

 /*
  * vmbus_onmessage - Handler for channel protocol messages.
  *
  * This is invoked in the vmbus worker thread context.
  */
 void vmbus_onmessage(void *context)
 {
 >-------struct hv_message *msg = context;
 >-------struct vmbus_channel_message_header *hdr;
 >-------int size;

 >-------hdr = (struct vmbus_channel_message_header *)msg->u.payload;
 >-------size = msg->header.payload_size;

 >-------trace_vmbus_on_message(hdr);

 >-------if (hdr->msgtype >= CHANNELMSG_COUNT) {
 >------->-------pr_err("Received invalid channel message type %d size %d\n",
 >------->------->-------   hdr->msgtype, size);
 >------->-------print_hex_dump_bytes("", DUMP_PREFIX_NONE,
 >------->------->------->-------     (unsigned char *)msg->u.payload, size);
 >------->-------return;
 >-------}

 >-------if (channel_message_table[hdr->msgtype].message_handler)
 >------->-------channel_message_table[hdr->msgtype].message_handler(hdr);
 >-------else
 >------->-------pr_err("Unhandled channel message type %d\n", hdr->msgtype);
 }

在这些消息处理函数里面又一个vmbus_onoffer处理函数,这个函数会创建新的channel,并且调用vmbus_process_offer函数,这个函数会判断当前channel是primary还是sub channel,并设置相应的关系。最后会创建workqueue 调用 vmbus_add_channel_work 函数。vmbus_add_channel函数如下

 static void vmbus_add_channel_work(struct work_struct *work)
 {
 >-------struct vmbus_channel *newchannel =
 >------->-------container_of(work, struct vmbus_channel, add_channel_work);
 >-------struct vmbus_channel *primary_channel = newchannel->primary_channel;
 >-------unsigned long flags;
 >-------u16 dev_type;
 >-------int ret;

 >-------dev_type = hv_get_dev_type(newchannel);

 >-------init_vp_index(newchannel, dev_type);

 >-------if (newchannel->target_cpu != get_cpu()) {
 >------->-------put_cpu();
 >------->-------smp_call_function_single(newchannel->target_cpu,
 >------->------->------->------->------- percpu_channel_enq,
 >------->------->------->------->------- newchannel, true);
 >-------} else {
 >------->-------percpu_channel_enq(newchannel);
 >------->-------put_cpu();
 >-------}

 >-------/*
 >------- * This state is used to indicate a successful open
 >------- * so that when we do close the channel normally, we
 >------- * can cleanup properly.
 >------- */
 >-------newchannel->state = CHANNEL_OPEN_STATE;

 >-------if (primary_channel != NULL) {
 >------->-------/* newchannel is a sub-channel. */
 >------->-------struct hv_device *dev = primary_channel->device_obj;

 >------->-------if (vmbus_add_channel_kobj(dev, newchannel))
 >------->------->-------goto err_deq_chan;

 >------->-------if (primary_channel->sc_creation_callback != NULL)
 >------->------->-------primary_channel->sc_creation_callback(newchannel);

 >------->-------newchannel->probe_done = true;
 >------->-------return;
 >-------}

 >-------/*
 >------- * Start the process of binding the primary channel to the driver
 >------- */
 >-------newchannel->device_obj = vmbus_device_create(
 >------->-------&newchannel->offermsg.offer.if_type,
 >------->-------&newchannel->offermsg.offer.if_instance,
 >------->-------newchannel);
 >-------if (!newchannel->device_obj)
 >------->-------goto err_deq_chan;

 >-------newchannel->device_obj->device_id = dev_type;
 >-------/*
 >------- * Add the new device to the bus. This will kick off device-driver
 >------- * binding which eventually invokes the device driver's AddDevice()
 >------- * method.
 >------- */
 >-------ret = vmbus_device_register(newchannel->device_obj);

 >-------if (ret != 0) {
 >------->-------pr_err("unable to add child device object (relid %d)\n",
 >------->------->-------newchannel->offermsg.child_relid);
 >------->-------kfree(newchannel->device_obj);
 >------->-------goto err_deq_chan;
 >-------}

 >-------newchannel->probe_done = true;
 >-------return;

 err_deq_chan:
 >-------mutex_lock(&vmbus_connection.channel_mutex);

 >-------/*
 >------- * We need to set the flag, otherwise
 >------- * vmbus_onoffer_rescind() can be blocked.
 >------- */
 >-------newchannel->probe_done = true;

 >-------if (primary_channel == NULL) {
 >------->-------list_del(&newchannel->listentry);
 >-------} else {
 >------->-------spin_lock_irqsave(&primary_channel->lock, flags);
 >------->-------list_del(&newchannel->sc_list);
 >------->-------spin_unlock_irqrestore(&primary_channel->lock, flags);
 >-------}

 >-------mutex_unlock(&vmbus_connection.channel_mutex);

 >-------if (newchannel->target_cpu != get_cpu()) {
 >------->-------put_cpu();
 >------->-------smp_call_function_single(newchannel->target_cpu,
 >------->------->------->------->------- percpu_channel_deq,
 >------->------->------->------->------- newchannel, true);
 >-------} else {
 >------->-------percpu_channel_deq(newchannel);
 >------->-------put_cpu();
 >-------}

 >-------vmbus_release_relid(newchannel->offermsg.child_relid);

 >-------free_channel(newchannel);
 }

vmbus_add_channel_work函数的 对channel进行操作,不同的类型的channel操作不一样,对于primary的channel 会调用 vmbus_device_register注册一个新的vmbus 设备,对于sub channel则会调用 sc_creation_callback函数 设置任务。这块会在网卡注册的时候讲。注意一个小细节 如果当前的cpu不等于 channel的target_cpu则会调用ipi 把当前channel 加到target cpu的hv_cpu_context链表中.target_cpu是在init_vp_index中计算出来的。

vmbus_device_rgister注册后,会触发 match 以及调用 vmbus上的驱动操作,驱动虚拟设备。

有上面我们发现 vmbus上的设备创建是由 hyper-V 给虚拟机发送中断,然后发消息,然后虚拟机中的vmbus 创建 这个设备。 这个方式非常新颖。相对于pci枚举或者其他方式,这种方式把外设当作一个主体,相当于可编程的外设。个人感觉学到了

发表评论

电子邮件地址不会被公开。 必填项已用*标注