2 * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
28 * Hyper-V virtual machine bus
40 #include <ipxe/malloc.h>
41 #include <ipxe/iobuf.h>
42 #include <ipxe/hyperv.h>
43 #include <ipxe/vmbus.h>
45 /** VMBus initial GPADL ID
47 * This is an opaque value with no meaning. The Linux kernel uses
50 #define VMBUS_GPADL_MAGIC 0x18ae0000
55 * @v hv Hyper-V hypervisor
56 * @v header Message header
57 * @v len Length of message (including header)
58 * @ret rc Return status code
60 static int vmbus_post_message ( struct hv_hypervisor *hv,
61 const struct vmbus_message_header *header,
63 struct vmbus *vmbus = hv->vmbus;
67 if ( ( rc = hv_post_message ( hv, VMBUS_MESSAGE_ID, VMBUS_MESSAGE_TYPE,
68 header, len ) ) != 0 ) {
69 DBGC ( vmbus, "VMBUS %p could not post message: %s\n",
70 vmbus, strerror ( rc ) );
80 * @v hv Hyper-V hypervisor
81 * @v type Message type
82 * @ret rc Return status code
84 static int vmbus_post_empty_message ( struct hv_hypervisor *hv,
86 struct vmbus_message_header header = { .type = cpu_to_le32 ( type ) };
88 return vmbus_post_message ( hv, &header, sizeof ( header ) );
92 * Wait for received message
94 * @v hv Hyper-V hypervisor
95 * @ret rc Return status code
97 static int vmbus_wait_for_message ( struct hv_hypervisor *hv ) {
98 struct vmbus *vmbus = hv->vmbus;
101 /* Wait for message */
102 if ( ( rc = hv_wait_for_message ( hv, VMBUS_MESSAGE_SINT ) ) != 0 ) {
103 DBGC ( vmbus, "VMBUS %p failed waiting for message: %s\n",
104 vmbus, strerror ( rc ) );
109 if ( hv->message->received.type != cpu_to_le32 ( VMBUS_MESSAGE_TYPE ) ){
110 DBGC ( vmbus, "VMBUS %p invalid message type %d\n",
111 vmbus, le32_to_cpu ( hv->message->received.type ) );
121 * @v hv Hyper-V hypervisor
122 * @v raw VMBus protocol (raw) version
123 * @ret rc Return status code
125 static int vmbus_initiate_contact ( struct hv_hypervisor *hv,
127 struct vmbus *vmbus = hv->vmbus;
128 const struct vmbus_version_response *version = &vmbus->message->version;
129 struct vmbus_initiate_contact initiate;
132 /* Construct message */
133 memset ( &initiate, 0, sizeof ( initiate ) );
134 initiate.header.type = cpu_to_le32 ( VMBUS_INITIATE_CONTACT );
135 initiate.version.raw = cpu_to_le32 ( raw );
136 initiate.intr = virt_to_phys ( vmbus->intr );
137 initiate.monitor_in = virt_to_phys ( vmbus->monitor_in );
138 initiate.monitor_out = virt_to_phys ( vmbus->monitor_out );
141 if ( ( rc = vmbus_post_message ( hv, &initiate.header,
142 sizeof ( initiate ) ) ) != 0 )
145 /* Wait for response */
146 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
150 if ( version->header.type != cpu_to_le32 ( VMBUS_VERSION_RESPONSE ) ) {
151 DBGC ( vmbus, "VMBUS %p unexpected version response type %d\n",
152 vmbus, le32_to_cpu ( version->header.type ) );
155 if ( ! version->supported ) {
156 DBGC ( vmbus, "VMBUS %p requested version not supported\n",
160 if ( version->version.raw != cpu_to_le32 ( raw ) ) {
161 DBGC ( vmbus, "VMBUS %p unexpected version %d.%d\n",
162 vmbus, le16_to_cpu ( version->version.major ),
163 le16_to_cpu ( version->version.minor ) );
167 DBGC ( vmbus, "VMBUS %p initiated contact using version %d.%d\n",
168 vmbus, le16_to_cpu ( version->version.major ),
169 le16_to_cpu ( version->version.minor ) );
176 * @v hv Hyper-V hypervisor
177 * @ret rc Return status code
179 static int vmbus_unload ( struct hv_hypervisor *hv ) {
180 struct vmbus *vmbus = hv->vmbus;
181 const struct vmbus_message_header *header = &vmbus->message->header;
185 if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_UNLOAD ) ) != 0 )
188 /* Wait for response */
189 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
193 if ( header->type != cpu_to_le32 ( VMBUS_UNLOAD_RESPONSE ) ) {
194 DBGC ( vmbus, "VMBUS %p unexpected unload response type %d\n",
195 vmbus, le32_to_cpu ( header->type ) );
203 * Negotiate protocol version
205 * @v hv Hyper-V hypervisor
206 * @ret rc Return status code
208 static int vmbus_negotiate_version ( struct hv_hypervisor *hv ) {
211 /* We require the ability to disconnect from and reconnect to
212 * VMBus; if we don't have this then there is no (viable) way
213 * for a loaded operating system to continue to use any VMBus
214 * devices. (There is also a small but non-zero risk that the
215 * host will continue to write to our interrupt and monitor
216 * pages, since the VMBUS_UNLOAD message in earlier versions
217 * is essentially a no-op.)
219 * This requires us to ensure that the host supports protocol
220 * version 3.0 (VMBUS_VERSION_WIN8_1). However, we can't
221 * actually _use_ protocol version 3.0, since doing so causes
222 * an iSCSI-booted Windows Server 2012 R2 VM to crash due to a
223 * NULL pointer dereference in vmbus.sys.
225 * To work around this problem, we first ensure that we can
226 * connect using protocol v3.0, then disconnect and reconnect
227 * using the oldest known protocol.
230 /* Initiate contact to check for required protocol support */
231 if ( ( rc = vmbus_initiate_contact ( hv, VMBUS_VERSION_WIN8_1 ) ) != 0 )
234 /* Terminate contact */
235 if ( ( rc = vmbus_unload ( hv ) ) != 0 )
238 /* Reinitiate contact using the oldest known protocol version */
239 if ( ( rc = vmbus_initiate_contact ( hv, VMBUS_VERSION_WS2008 ) ) != 0 )
246 * Establish GPA descriptor list
248 * @v vmdev VMBus device
249 * @v data Data buffer
250 * @v len Length of data buffer
251 * @ret gpadl GPADL ID, or negative error
253 int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
255 struct hv_hypervisor *hv = vmdev->hv;
256 struct vmbus *vmbus = hv->vmbus;
257 physaddr_t addr = user_to_phys ( data, 0 );
258 unsigned int pfn_count = hv_pfn_count ( addr, len );
260 struct vmbus_gpadl_header gpadlhdr;
261 struct vmbus_gpa_range range;
262 uint64_t pfn[pfn_count];
263 } __attribute__ (( packed )) gpadlhdr;
264 const struct vmbus_gpadl_created *created = &vmbus->message->created;
265 static unsigned int gpadl = VMBUS_GPADL_MAGIC;
269 /* Allocate GPADL ID */
272 /* Construct message */
273 memset ( &gpadlhdr, 0, sizeof ( gpadlhdr ) );
274 gpadlhdr.gpadlhdr.header.type = cpu_to_le32 ( VMBUS_GPADL_HEADER );
275 gpadlhdr.gpadlhdr.channel = cpu_to_le32 ( vmdev->channel );
276 gpadlhdr.gpadlhdr.gpadl = cpu_to_le32 ( gpadl );
277 gpadlhdr.gpadlhdr.range_len =
278 cpu_to_le16 ( ( sizeof ( gpadlhdr.range ) +
279 sizeof ( gpadlhdr.pfn ) ) );
280 gpadlhdr.gpadlhdr.range_count = cpu_to_le16 ( 1 );
281 gpadlhdr.range.len = cpu_to_le32 ( len );
282 gpadlhdr.range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) );
283 for ( i = 0 ; i < pfn_count ; i++ )
284 gpadlhdr.pfn[i] = ( ( addr / PAGE_SIZE ) + i );
287 if ( ( rc = vmbus_post_message ( hv, &gpadlhdr.gpadlhdr.header,
288 sizeof ( gpadlhdr ) ) ) != 0 )
291 /* Wait for response */
292 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
296 if ( created->header.type != cpu_to_le32 ( VMBUS_GPADL_CREATED ) ) {
297 DBGC ( vmdev, "VMBUS %s unexpected GPADL response type %d\n",
298 vmdev->dev.name, le32_to_cpu ( created->header.type ) );
301 if ( created->channel != cpu_to_le32 ( vmdev->channel ) ) {
302 DBGC ( vmdev, "VMBUS %s unexpected GPADL channel %d\n",
303 vmdev->dev.name, le32_to_cpu ( created->channel ) );
306 if ( created->gpadl != cpu_to_le32 ( gpadl ) ) {
307 DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n",
308 vmdev->dev.name, le32_to_cpu ( created->gpadl ) );
311 if ( created->status != 0 ) {
312 DBGC ( vmdev, "VMBUS %s GPADL creation failed: %#08x\n",
313 vmdev->dev.name, le32_to_cpu ( created->status ) );
317 DBGC ( vmdev, "VMBUS %s GPADL %#08x is [%08lx,%08lx)\n",
318 vmdev->dev.name, gpadl, addr, ( addr + len ) );
323 * Tear down GPA descriptor list
325 * @v vmdev VMBus device
327 * @ret rc Return status code
329 int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, unsigned int gpadl ) {
330 struct hv_hypervisor *hv = vmdev->hv;
331 struct vmbus *vmbus = hv->vmbus;
332 struct vmbus_gpadl_teardown teardown;
333 const struct vmbus_gpadl_torndown *torndown = &vmbus->message->torndown;
336 /* Construct message */
337 memset ( &teardown, 0, sizeof ( teardown ) );
338 teardown.header.type = cpu_to_le32 ( VMBUS_GPADL_TEARDOWN );
339 teardown.channel = cpu_to_le32 ( vmdev->channel );
340 teardown.gpadl = cpu_to_le32 ( gpadl );
343 if ( ( rc = vmbus_post_message ( hv, &teardown.header,
344 sizeof ( teardown ) ) ) != 0 )
347 /* Wait for response */
348 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
352 if ( torndown->header.type != cpu_to_le32 ( VMBUS_GPADL_TORNDOWN ) ) {
353 DBGC ( vmdev, "VMBUS %s unexpected GPADL response type %d\n",
354 vmdev->dev.name, le32_to_cpu ( torndown->header.type ) );
357 if ( torndown->gpadl != cpu_to_le32 ( gpadl ) ) {
358 DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n",
359 vmdev->dev.name, le32_to_cpu ( torndown->gpadl ) );
369 * @v vmdev VMBus device
370 * @v op Channel operations
371 * @v out_len Outbound ring buffer length
372 * @v in_len Inbound ring buffer length
373 * @v mtu Maximum expected data packet length (including headers)
374 * @ret rc Return status code
376 * Both outbound and inbound ring buffer lengths must be a power of
377 * two and a multiple of PAGE_SIZE. The requirement to be a power of
378 * two is a policy decision taken to simplify the ring buffer indexing
381 int vmbus_open ( struct vmbus_device *vmdev,
382 struct vmbus_channel_operations *op,
383 size_t out_len, size_t in_len, size_t mtu ) {
384 struct hv_hypervisor *hv = vmdev->hv;
385 struct vmbus *vmbus = hv->vmbus;
386 struct vmbus_open_channel open;
387 const struct vmbus_open_channel_result *opened =
388 &vmbus->message->opened;
397 assert ( ( out_len % PAGE_SIZE ) == 0 );
398 assert ( ( out_len & ( out_len - 1 ) ) == 0 );
399 assert ( ( in_len % PAGE_SIZE ) == 0 );
400 assert ( ( in_len & ( in_len - 1 ) ) == 0 );
401 assert ( mtu >= ( sizeof ( struct vmbus_packet_header ) +
402 sizeof ( struct vmbus_packet_footer ) ) );
404 /* Allocate packet buffer */
405 packet = malloc ( mtu );
408 goto err_alloc_packet;
411 /* Allocate ring buffer */
412 len = ( sizeof ( *vmdev->out ) + out_len +
413 sizeof ( *vmdev->in ) + in_len );
414 assert ( ( len % PAGE_SIZE ) == 0 );
415 ring = malloc_dma ( len, PAGE_SIZE );
420 memset ( ring, 0, len );
422 /* Establish GPADL for ring buffer */
423 gpadl = vmbus_establish_gpadl ( vmdev, virt_to_user ( ring ), len );
429 /* Construct message */
430 memset ( &open, 0, sizeof ( open ) );
431 open.header.type = cpu_to_le32 ( VMBUS_OPEN_CHANNEL );
432 open.channel = cpu_to_le32 ( vmdev->channel );
434 open.id = open_id; /* Opaque random value: endianness irrelevant */
435 open.gpadl = cpu_to_le32 ( gpadl );
436 open.out_pages = ( ( sizeof ( *vmdev->out ) / PAGE_SIZE ) +
437 ( out_len / PAGE_SIZE ) );
440 if ( ( rc = vmbus_post_message ( hv, &open.header,
441 sizeof ( open ) ) ) != 0 )
444 /* Wait for response */
445 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
449 if ( opened->header.type != cpu_to_le32 ( VMBUS_OPEN_CHANNEL_RESULT ) ){
450 DBGC ( vmdev, "VMBUS %s unexpected open response type %d\n",
451 vmdev->dev.name, le32_to_cpu ( opened->header.type ) );
454 if ( opened->channel != cpu_to_le32 ( vmdev->channel ) ) {
455 DBGC ( vmdev, "VMBUS %s unexpected opened channel %#08x\n",
456 vmdev->dev.name, le32_to_cpu ( opened->channel ) );
459 if ( opened->id != open_id /* Non-endian */ ) {
460 DBGC ( vmdev, "VMBUS %s unexpected open ID %#08x\n",
461 vmdev->dev.name, le32_to_cpu ( opened->id ) );
464 if ( opened->status != 0 ) {
465 DBGC ( vmdev, "VMBUS %s open failed: %#08x\n",
466 vmdev->dev.name, le32_to_cpu ( opened->status ) );
470 /* Store channel parameters */
471 vmdev->out_len = out_len;
472 vmdev->in_len = in_len;
474 vmdev->in = ( ring + sizeof ( *vmdev->out ) + out_len );
475 vmdev->gpadl = gpadl;
478 vmdev->packet = packet;
480 DBGC ( vmdev, "VMBUS %s channel GPADL %#08x ring "
481 "[%#08lx,%#08lx,%#08lx)\n", vmdev->dev.name, vmdev->gpadl,
482 virt_to_phys ( vmdev->out ), virt_to_phys ( vmdev->in ),
483 ( virt_to_phys ( vmdev->out ) + len ) );
486 vmbus_gpadl_teardown ( vmdev, vmdev->gpadl );
488 free_dma ( ring, len );
496 * Close VMBus channel
498 * @v vmdev VMBus device
500 void vmbus_close ( struct vmbus_device *vmdev ) {
501 struct hv_hypervisor *hv = vmdev->hv;
502 struct vmbus_close_channel close;
506 /* Construct message */
507 memset ( &close, 0, sizeof ( close ) );
508 close.header.type = cpu_to_le32 ( VMBUS_CLOSE_CHANNEL );
509 close.channel = cpu_to_le32 ( vmdev->channel );
512 if ( ( rc = vmbus_post_message ( hv, &close.header,
513 sizeof ( close ) ) ) != 0 ) {
514 DBGC ( vmdev, "VMBUS %s failed to close: %s\n",
515 vmdev->dev.name, strerror ( rc ) );
516 /* Continue to attempt to tear down GPADL, so that our
517 * memory is no longer accessible by the remote VM.
521 /* Tear down GPADL */
522 if ( ( rc = vmbus_gpadl_teardown ( vmdev,
523 vmdev->gpadl ) ) != 0 ) {
524 DBGC ( vmdev, "VMBUS %s failed to tear down channel GPADL: "
525 "%s\n", vmdev->dev.name, strerror ( rc ) );
526 /* We can't prevent the remote VM from continuing to
527 * access this memory, so leak it.
532 /* Free ring buffer */
533 len = ( sizeof ( *vmdev->out ) + vmdev->out_len +
534 sizeof ( *vmdev->in ) + vmdev->in_len );
535 free_dma ( vmdev->out, len );
539 /* Free packet buffer */
540 free ( vmdev->packet );
541 vmdev->packet = NULL;
543 DBGC ( vmdev, "VMBUS %s closed\n", vmdev->dev.name );
547 * Signal channel via monitor page
549 * @v vmdev VMBus device
551 static void vmbus_signal_monitor ( struct vmbus_device *vmdev ) {
552 struct hv_hypervisor *hv = vmdev->hv;
553 struct vmbus *vmbus = hv->vmbus;
554 struct hv_monitor_trigger *trigger;
558 /* Set bit in monitor trigger group */
559 group = ( vmdev->monitor / ( 8 * sizeof ( trigger->pending ) ));
560 bit = ( vmdev->monitor % ( 8 * sizeof ( trigger->pending ) ) );
561 trigger = &vmbus->monitor_out->trigger[group];
562 hv_set_bit ( trigger, bit );
566 * Signal channel via hypervisor event
568 * @v vmdev VMBus device
570 static void vmbus_signal_event ( struct vmbus_device *vmdev ) {
571 struct hv_hypervisor *hv = vmdev->hv;
574 /* Signal hypervisor event */
575 if ( ( rc = hv_signal_event ( hv, VMBUS_EVENT_ID, 0 ) ) != 0 ) {
576 DBGC ( vmdev, "VMBUS %s could not signal event: %s\n",
577 vmdev->dev.name, strerror ( rc ) );
583 * Fill outbound ring buffer
585 * @v vmdev VMBus device
586 * @v prod Producer index
589 * @ret prod New producer index
591 * The caller must ensure that there is sufficient space in the ring
594 static size_t vmbus_produce ( struct vmbus_device *vmdev, size_t prod,
595 const void *data, size_t len ) {
599 /* Determine fragment lengths */
600 first = ( vmdev->out_len - prod );
603 second = ( len - first );
605 /* Copy fragment(s) */
606 memcpy ( &vmdev->out->data[prod], data, first );
608 memcpy ( &vmdev->out->data[0], ( data + first ), second );
610 return ( ( prod + len ) & ( vmdev->out_len - 1 ) );
614 * Consume inbound ring buffer
616 * @v vmdev VMBus device
617 * @v cons Consumer index
618 * @v data Data buffer, or NULL
619 * @v len Length to consume
620 * @ret cons New consumer index
622 static size_t vmbus_consume ( struct vmbus_device *vmdev, size_t cons,
623 void *data, size_t len ) {
627 /* Determine fragment lengths */
628 first = ( vmdev->in_len - cons );
631 second = ( len - first );
633 /* Copy fragment(s) */
634 memcpy ( data, &vmdev->in->data[cons], first );
636 memcpy ( ( data + first ), &vmdev->in->data[0], second );
638 return ( ( cons + len ) & ( vmdev->in_len - 1 ) );
642 * Send packet via ring buffer
644 * @v vmdev VMBus device
645 * @v header Packet header
647 * @v len Length of data
648 * @ret rc Return status code
650 * Send a packet via the outbound ring buffer. All fields in the
651 * packet header must be filled in, with the exception of the total
654 static int vmbus_send ( struct vmbus_device *vmdev,
655 struct vmbus_packet_header *header,
656 const void *data, size_t len ) {
657 struct hv_hypervisor *hv = vmdev->hv;
658 struct vmbus *vmbus = hv->vmbus;
659 static uint8_t padding[ 8 - 1 ];
660 struct vmbus_packet_footer footer;
671 assert ( vmdev->out != NULL );
673 /* Calculate lengths */
674 header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 );
675 pad_len = ( ( -len ) & ( 8 - 1 ) );
676 footer_len = sizeof ( footer );
677 ring_len = ( header_len + len + pad_len + footer_len );
679 /* Check that we have enough room in the outbound ring buffer */
680 cons = le32_to_cpu ( vmdev->out->cons );
681 prod = le32_to_cpu ( vmdev->out->prod );
683 fill = ( ( prod - cons ) & ( vmdev->out_len - 1 ) );
684 if ( ( fill + ring_len ) >= vmdev->out_len ) {
685 DBGC ( vmdev, "VMBUS %s ring buffer full\n", vmdev->dev.name );
689 /* Complete header */
690 header->qlen = cpu_to_le16 ( ( ring_len - footer_len ) / 8 );
692 /* Construct footer */
694 footer.prod = vmdev->out->prod;
696 /* Copy packet to buffer */
697 DBGC2 ( vmdev, "VMBUS %s sending:\n", vmdev->dev.name );
698 DBGC2_HDA ( vmdev, prod, header, header_len );
699 prod = vmbus_produce ( vmdev, prod, header, header_len );
700 DBGC2_HDA ( vmdev, prod, data, len );
701 prod = vmbus_produce ( vmdev, prod, data, len );
702 prod = vmbus_produce ( vmdev, prod, padding, pad_len );
703 DBGC2_HDA ( vmdev, prod, &footer, sizeof ( footer ) );
704 prod = vmbus_produce ( vmdev, prod, &footer, sizeof ( footer ) );
705 assert ( ( ( prod - old_prod ) & ( vmdev->out_len - 1 ) ) == ring_len );
707 /* Update producer index */
709 vmdev->out->prod = cpu_to_le32 ( prod );
711 /* Return if we do not need to signal the host. This follows
712 * the logic of hv_need_to_signal() in the Linux driver.
715 if ( vmdev->out->intr_mask )
718 cons = le32_to_cpu ( vmdev->out->cons );
719 if ( cons != old_prod )
722 /* Set channel bit in interrupt page */
723 hv_set_bit ( vmbus->intr->out, vmdev->channel );
725 /* Signal the host */
726 vmdev->signal ( vmdev );
732 * Send control packet via ring buffer
734 * @v vmdev VMBus device
735 * @v xid Transaction ID (or zero to not request completion)
737 * @v len Length of data
738 * @ret rc Return status code
740 * Send data using a VMBUS_DATA_INBAND packet.
742 int vmbus_send_control ( struct vmbus_device *vmdev, uint64_t xid,
743 const void *data, size_t len ) {
744 struct vmbus_packet_header *header = vmdev->packet;
746 /* Construct header in packet buffer */
747 assert ( header != NULL );
748 header->type = cpu_to_le16 ( VMBUS_DATA_INBAND );
749 header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
750 header->flags = ( xid ?
751 cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED ) : 0 );
752 header->xid = xid; /* Non-endian */
754 return vmbus_send ( vmdev, header, data, len );
758 * Send data packet via ring buffer
760 * @v vmdev VMBus device
761 * @v xid Transaction ID
763 * @v len Length of data
764 * @v iobuf I/O buffer
765 * @ret rc Return status code
767 * Send data using a VMBUS_DATA_GPA_DIRECT packet. The caller is
768 * responsible for ensuring that the I/O buffer remains untouched
769 * until the corresponding completion has been received.
771 int vmbus_send_data ( struct vmbus_device *vmdev, uint64_t xid,
772 const void *data, size_t len, struct io_buffer *iobuf ) {
773 physaddr_t addr = virt_to_phys ( iobuf->data );
774 unsigned int pfn_count = hv_pfn_count ( addr, iob_len ( iobuf ) );
776 struct vmbus_gpa_direct_header gpa;
777 struct vmbus_gpa_range range;
778 uint64_t pfn[pfn_count];
779 } __attribute__ (( packed )) *header = vmdev->packet;
783 assert ( header != NULL );
784 assert ( sizeof ( *header ) <= vmdev->mtu );
786 /* Construct header in packet buffer */
787 header->gpa.header.type = cpu_to_le16 ( VMBUS_DATA_GPA_DIRECT );
788 header->gpa.header.hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
789 header->gpa.header.flags = cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED );
790 header->gpa.header.xid = xid; /* Non-endian */
791 header->gpa.range_count = 1;
792 header->range.len = cpu_to_le32 ( iob_len ( iobuf ) );
793 header->range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) );
794 for ( i = 0 ; i < pfn_count ; i++ )
795 header->pfn[i] = ( ( addr / PAGE_SIZE ) + i );
797 return vmbus_send ( vmdev, &header->gpa.header, data, len );
801 * Send completion packet via ring buffer
803 * @v vmdev VMBus device
804 * @v xid Transaction ID
806 * @v len Length of data
807 * @ret rc Return status code
809 * Send data using a VMBUS_COMPLETION packet.
811 int vmbus_send_completion ( struct vmbus_device *vmdev, uint64_t xid,
812 const void *data, size_t len ) {
813 struct vmbus_packet_header *header = vmdev->packet;
815 /* Construct header in packet buffer */
816 assert ( header != NULL );
817 header->type = cpu_to_le16 ( VMBUS_COMPLETION );
818 header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
820 header->xid = xid; /* Non-endian */
822 return vmbus_send ( vmdev, header, data, len );
826 * Send cancellation packet via ring buffer
828 * @v vmdev VMBus device
829 * @v xid Transaction ID
830 * @ret rc Return status code
832 * Send data using a VMBUS_CANCELLATION packet.
834 int vmbus_send_cancellation ( struct vmbus_device *vmdev, uint64_t xid ) {
835 struct vmbus_packet_header *header = vmdev->packet;
837 /* Construct header in packet buffer */
838 assert ( header != NULL );
839 header->type = cpu_to_le16 ( VMBUS_CANCELLATION );
840 header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
842 header->xid = xid; /* Non-endian */
844 return vmbus_send ( vmdev, header, NULL, 0 );
848 * Get transfer page set from pageset ID
850 * @v vmdev VMBus device
851 * @v pageset Page set ID (in protocol byte order)
852 * @ret pages Page set, or NULL if not found
854 static struct vmbus_xfer_pages * vmbus_xfer_pages ( struct vmbus_device *vmdev,
856 struct vmbus_xfer_pages *pages;
858 /* Locate page set */
859 list_for_each_entry ( pages, &vmdev->pages, list ) {
860 if ( pages->pageset == pageset )
864 DBGC ( vmdev, "VMBUS %s unrecognised page set ID %#04x\n",
865 vmdev->dev.name, le16_to_cpu ( pageset ) );
870 * Construct I/O buffer list from transfer pages
872 * @v vmdev VMBus device
873 * @v header Transfer page header
874 * @v list I/O buffer list to populate
875 * @ret rc Return status code
877 static int vmbus_xfer_page_iobufs ( struct vmbus_device *vmdev,
878 struct vmbus_packet_header *header,
879 struct list_head *list ) {
880 struct vmbus_xfer_page_header *page_header =
881 container_of ( header, struct vmbus_xfer_page_header, header );
882 struct vmbus_xfer_pages *pages;
883 struct io_buffer *iobuf;
884 struct io_buffer *tmp;
887 unsigned int range_count;
892 assert ( header->type == cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) );
894 /* Locate page set */
895 pages = vmbus_xfer_pages ( vmdev, page_header->pageset );
901 /* Allocate and populate I/O buffers */
902 range_count = le32_to_cpu ( page_header->range_count );
903 for ( i = 0 ; i < range_count ; i++ ) {
906 len = le32_to_cpu ( page_header->range[i].len );
907 offset = le32_to_cpu ( page_header->range[i].offset );
909 /* Allocate I/O buffer */
910 iobuf = alloc_iob ( len );
912 DBGC ( vmdev, "VMBUS %s could not allocate %zd-byte "
913 "I/O buffer\n", vmdev->dev.name, len );
918 /* Add I/O buffer to list */
919 list_add ( &iobuf->list, list );
921 /* Populate I/O buffer */
922 if ( ( rc = pages->op->copy ( pages, iob_put ( iobuf, len ),
923 offset, len ) ) != 0 ) {
924 DBGC ( vmdev, "VMBUS %s could not populate I/O buffer "
925 "range [%zd,%zd): %s\n",
926 vmdev->dev.name, offset, len, strerror ( rc ) );
935 list_for_each_entry_safe ( iobuf, tmp, list, list ) {
936 list_del ( &iobuf->list );
946 * @v vmdev VMBus device
947 * @ret rc Return status code
949 int vmbus_poll ( struct vmbus_device *vmdev ) {
950 struct vmbus_packet_header *header = vmdev->packet;
951 struct list_head list;
963 assert ( vmdev->packet != NULL );
964 assert ( vmdev->in != NULL );
966 /* Return immediately if buffer is empty */
967 if ( ! vmbus_has_data ( vmdev ) )
969 cons = le32_to_cpu ( vmdev->in->cons );
972 /* Consume (start of) header */
973 cons = vmbus_consume ( vmdev, cons, header, sizeof ( *header ) );
975 /* Parse and sanity check header */
976 header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 );
977 if ( header_len < sizeof ( *header ) ) {
978 DBGC ( vmdev, "VMBUS %s received underlength header (%zd "
979 "bytes)\n", vmdev->dev.name, header_len );
982 len = ( ( le16_to_cpu ( header->qlen ) * 8 ) - header_len );
983 footer_len = sizeof ( struct vmbus_packet_footer );
984 ring_len = ( header_len + len + footer_len );
985 if ( ring_len > vmdev->mtu ) {
986 DBGC ( vmdev, "VMBUS %s received overlength packet (%zd "
987 "bytes)\n", vmdev->dev.name, ring_len );
990 xid = le64_to_cpu ( header->xid );
992 /* Consume remainder of packet */
993 cons = vmbus_consume ( vmdev, cons,
994 ( ( ( void * ) header ) + sizeof ( *header ) ),
995 ( ring_len - sizeof ( *header ) ) );
996 DBGC2 ( vmdev, "VMBUS %s received:\n", vmdev->dev.name );
997 DBGC2_HDA ( vmdev, old_cons, header, ring_len );
998 assert ( ( ( cons - old_cons ) & ( vmdev->in_len - 1 ) ) == ring_len );
1000 /* Allocate I/O buffers, if applicable */
1001 INIT_LIST_HEAD ( &list );
1002 if ( header->type == cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) ) {
1003 if ( ( rc = vmbus_xfer_page_iobufs ( vmdev, header,
1008 /* Update producer index */
1010 vmdev->in->cons = cpu_to_le32 ( cons );
1013 data = ( ( ( void * ) header ) + header_len );
1014 switch ( header->type ) {
1016 case cpu_to_le16 ( VMBUS_DATA_INBAND ) :
1017 if ( ( rc = vmdev->op->recv_control ( vmdev, xid, data,
1019 DBGC ( vmdev, "VMBUS %s could not handle control "
1021 vmdev->dev.name, strerror ( rc ) );
1026 case cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) :
1027 if ( ( rc = vmdev->op->recv_data ( vmdev, xid, data, len,
1029 DBGC ( vmdev, "VMBUS %s could not handle data packet: "
1030 "%s\n", vmdev->dev.name, strerror ( rc ) );
1035 case cpu_to_le16 ( VMBUS_COMPLETION ) :
1036 if ( ( rc = vmdev->op->recv_completion ( vmdev, xid, data,
1038 DBGC ( vmdev, "VMBUS %s could not handle completion: "
1039 "%s\n", vmdev->dev.name, strerror ( rc ) );
1044 case cpu_to_le16 ( VMBUS_CANCELLATION ) :
1045 if ( ( rc = vmdev->op->recv_cancellation ( vmdev, xid ) ) != 0){
1046 DBGC ( vmdev, "VMBUS %s could not handle cancellation: "
1047 "%s\n", vmdev->dev.name, strerror ( rc ) );
1053 DBGC ( vmdev, "VMBUS %s unknown packet type %d\n",
1054 vmdev->dev.name, le16_to_cpu ( header->type ) );
1062 * Dump channel status (for debugging)
1064 * @v vmdev VMBus device
1066 void vmbus_dump_channel ( struct vmbus_device *vmdev ) {
1067 size_t out_prod = le32_to_cpu ( vmdev->out->prod );
1068 size_t out_cons = le32_to_cpu ( vmdev->out->cons );
1069 size_t in_prod = le32_to_cpu ( vmdev->in->prod );
1070 size_t in_cons = le32_to_cpu ( vmdev->in->cons );
1075 /* Dump ring status */
1076 DBGC ( vmdev, "VMBUS %s out %03zx:%03zx%s in %03zx:%03zx%s\n",
1077 vmdev->dev.name, out_prod, out_cons,
1078 ( vmdev->out->intr_mask ? "(m)" : "" ), in_prod, in_cons,
1079 ( vmdev->in->intr_mask ? "(m)" : "" ) );
1081 /* Dump inbound ring contents, if any */
1082 if ( in_prod != in_cons ) {
1083 in_len = ( ( in_prod - in_cons ) &
1084 ( vmdev->in_len - 1 ) );
1085 first = ( vmdev->in_len - in_cons );
1086 if ( first > in_len )
1088 second = ( in_len - first );
1089 DBGC_HDA ( vmdev, in_cons, &vmdev->in->data[in_cons], first );
1090 DBGC_HDA ( vmdev, 0, &vmdev->in->data[0], second );
1095 * Find driver for VMBus device
1097 * @v vmdev VMBus device
1098 * @ret driver Driver, or NULL
1100 static struct vmbus_driver * vmbus_find_driver ( const union uuid *type ) {
1101 struct vmbus_driver *vmdrv;
1103 for_each_table_entry ( vmdrv, VMBUS_DRIVERS ) {
1104 if ( memcmp ( &vmdrv->type, type, sizeof ( *type ) ) == 0 )
1113 * @v hv Hyper-V hypervisor
1114 * @v parent Parent device
1115 * @ret rc Return status code
1117 static int vmbus_probe_channels ( struct hv_hypervisor *hv,
1118 struct device *parent ) {
1119 struct vmbus *vmbus = hv->vmbus;
1120 const struct vmbus_message_header *header = &vmbus->message->header;
1121 const struct vmbus_offer_channel *offer = &vmbus->message->offer;
1122 const union uuid *type;
1123 struct vmbus_driver *driver;
1124 struct vmbus_device *vmdev;
1125 struct vmbus_device *tmp;
1126 unsigned int channel;
1130 if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0)
1131 goto err_post_message;
1133 /* Collect responses */
1136 /* Wait for response */
1137 if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 )
1138 goto err_wait_for_message;
1140 /* Handle response */
1141 if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) {
1144 type = &offer->type;
1145 channel = le32_to_cpu ( offer->channel );
1146 DBGC2 ( vmbus, "VMBUS %p offer %d type %s",
1147 vmbus, channel, uuid_ntoa ( type ) );
1148 if ( offer->monitored )
1149 DBGC2 ( vmbus, " monitor %d", offer->monitor );
1150 DBGC2 ( vmbus, "\n" );
1152 /* Look for a driver */
1153 driver = vmbus_find_driver ( type );
1155 DBGC2 ( vmbus, "VMBUS %p has no driver for "
1156 "type %s\n", vmbus, uuid_ntoa ( type ));
1157 /* Not a fatal error */
1161 /* Allocate and initialise device */
1162 vmdev = zalloc ( sizeof ( *vmdev ) );
1165 goto err_alloc_vmdev;
1167 snprintf ( vmdev->dev.name, sizeof ( vmdev->dev.name ),
1168 "vmbus:%02x", channel );
1169 vmdev->dev.desc.bus_type = BUS_TYPE_HV;
1170 INIT_LIST_HEAD ( &vmdev->dev.children );
1171 list_add_tail ( &vmdev->dev.siblings,
1172 &parent->children );
1173 vmdev->dev.parent = parent;
1175 vmdev->channel = channel;
1176 vmdev->monitor = offer->monitor;
1177 vmdev->signal = ( offer->monitored ?
1178 vmbus_signal_monitor :
1179 vmbus_signal_event );
1180 INIT_LIST_HEAD ( &vmdev->pages );
1181 vmdev->driver = driver;
1182 vmdev->dev.driver_name = driver->name;
1183 DBGC ( vmdev, "VMBUS %s has driver \"%s\"\n",
1184 vmdev->dev.name, vmdev->driver->name );
1186 } else if ( header->type ==
1187 cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) {
1192 DBGC ( vmbus, "VMBUS %p unexpected offer response type "
1193 "%d\n", vmbus, le32_to_cpu ( header->type ) );
1195 goto err_unexpected_offer;
1199 /* Probe all devices. We do this only after completing
1200 * enumeration since devices will need to send and receive
1203 list_for_each_entry ( vmdev, &parent->children, dev.siblings ) {
1204 if ( ( rc = vmdev->driver->probe ( vmdev ) ) != 0 ) {
1205 DBGC ( vmdev, "VMBUS %s could not probe: %s\n",
1206 vmdev->dev.name, strerror ( rc ) );
1214 /* Remove driver from each device that was already probed */
1215 list_for_each_entry_continue_reverse ( vmdev, &parent->children,
1217 vmdev->driver->remove ( vmdev );
1219 err_unexpected_offer:
1221 err_wait_for_message:
1222 /* Free any devices allocated (but potentially not yet probed) */
1223 list_for_each_entry_safe ( vmdev, tmp, &parent->children,
1225 list_del ( &vmdev->dev.siblings );
1235 * @v hv Hyper-V hypervisor
1236 * @v parent Parent device
1238 static void vmbus_remove_channels ( struct hv_hypervisor *hv __unused,
1239 struct device *parent ) {
1240 struct vmbus_device *vmdev;
1241 struct vmbus_device *tmp;
1243 /* Remove devices */
1244 list_for_each_entry_safe ( vmdev, tmp, &parent->children,
1246 vmdev->driver->remove ( vmdev );
1247 assert ( list_empty ( &vmdev->dev.children ) );
1248 assert ( vmdev->out == NULL );
1249 assert ( vmdev->in == NULL );
1250 assert ( vmdev->packet == NULL );
1251 assert ( list_empty ( &vmdev->pages ) );
1252 list_del ( &vmdev->dev.siblings );
1258 * Probe Hyper-V virtual machine bus
1260 * @v hv Hyper-V hypervisor
1261 * @v parent Parent device
1262 * @ret rc Return status code
1264 int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ) {
1265 struct vmbus *vmbus;
1268 /* Allocate and initialise structure */
1269 vmbus = zalloc ( sizeof ( *vmbus ) );
1276 /* Initialise message buffer pointer
1278 * We use a pointer to the fixed-size Hyper-V received message
1279 * buffer. This allows us to access fields within received
1280 * messages without first checking the message size: any
1281 * fields beyond the end of the message will read as zero.
1283 vmbus->message = ( ( void * ) hv->message->received.data );
1284 assert ( sizeof ( *vmbus->message ) <=
1285 sizeof ( hv->message->received.data ) );
1287 /* Allocate interrupt and monitor pages */
1288 if ( ( rc = hv_alloc_pages ( hv, &vmbus->intr, &vmbus->monitor_in,
1289 &vmbus->monitor_out, NULL ) ) != 0 )
1290 goto err_alloc_pages;
1292 /* Enable message interrupt */
1293 hv_enable_sint ( hv, VMBUS_MESSAGE_SINT );
1295 /* Negotiate protocol version */
1296 if ( ( rc = vmbus_negotiate_version ( hv ) ) != 0 )
1297 goto err_negotiate_version;
1299 /* Enumerate channels */
1300 if ( ( rc = vmbus_probe_channels ( hv, parent ) ) != 0 )
1301 goto err_probe_channels;
1305 vmbus_remove_channels ( hv, parent );
1307 vmbus_unload ( hv );
1308 err_negotiate_version:
1309 hv_disable_sint ( hv, VMBUS_MESSAGE_SINT );
1310 hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out,
1319 * Remove Hyper-V virtual machine bus
1321 * @v hv Hyper-V hypervisor
1322 * @v parent Parent device
1324 void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent ) {
1325 struct vmbus *vmbus = hv->vmbus;
1327 vmbus_remove_channels ( hv, parent );
1328 vmbus_unload ( hv );
1329 hv_disable_sint ( hv, VMBUS_MESSAGE_SINT );
1330 hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out,