Add qemu 2.4.0
[kvmfornfv.git] / qemu / tests / image-fuzzer / qcow2 / layout.py
1 # Generator of fuzzed qcow2 images
2 #
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 import random
20 import struct
21 import fuzz
22 from math import ceil
23 from os import urandom
24 from itertools import chain
25
26 MAX_IMAGE_SIZE = 10 * (1 << 20)
27 # Standard sizes
28 UINT32_S = 4
29 UINT64_S = 8
30
31
32 class Field(object):
33
34     """Atomic image element (field).
35
36     The class represents an image field as quadruple of a data format
37     of value necessary for its packing to binary form, an offset from
38     the beginning of the image, a value and a name.
39
40     The field can be iterated as a list [format, offset, value, name].
41     """
42
43     __slots__ = ('fmt', 'offset', 'value', 'name')
44
45     def __init__(self, fmt, offset, val, name):
46         self.fmt = fmt
47         self.offset = offset
48         self.value = val
49         self.name = name
50
51     def __iter__(self):
52         return iter([self.fmt, self.offset, self.value, self.name])
53
54     def __repr__(self):
55         return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
56             (self.fmt, self.offset, str(self.value), self.name)
57
58
59 class FieldsList(object):
60
61     """List of fields.
62
63     The class allows access to a field in the list by its name.
64     """
65
66     def __init__(self, meta_data=None):
67         if meta_data is None:
68             self.data = []
69         else:
70             self.data = [Field(*f)
71                          for f in meta_data]
72
73     def __getitem__(self, name):
74         return [x for x in self.data if x.name == name]
75
76     def __iter__(self):
77         return iter(self.data)
78
79     def __len__(self):
80         return len(self.data)
81
82
83 class Image(object):
84
85     """ Qcow2 image object.
86
87     This class allows to create qcow2 images with random valid structures and
88     values, fuzz them via external qcow2.fuzz module and write the result to
89     a file.
90     """
91
92     def __init__(self, backing_file_name=None):
93         """Create a random valid qcow2 image with the correct header and stored
94         backing file name.
95         """
96         cluster_bits, self.image_size = self._size_params()
97         self.cluster_size = 1 << cluster_bits
98         self.header = FieldsList()
99         self.backing_file_name = FieldsList()
100         self.backing_file_format = FieldsList()
101         self.feature_name_table = FieldsList()
102         self.end_of_extension_area = FieldsList()
103         self.l2_tables = FieldsList()
104         self.l1_table = FieldsList()
105         self.refcount_table = FieldsList()
106         self.refcount_blocks = FieldsList()
107         self.ext_offset = 0
108         self.create_header(cluster_bits, backing_file_name)
109         self.set_backing_file_name(backing_file_name)
110         self.data_clusters = self._alloc_data(self.image_size,
111                                               self.cluster_size)
112         # Percentage of fields will be fuzzed
113         self.bias = random.uniform(0.2, 0.5)
114
115     def __iter__(self):
116         return chain(self.header, self.backing_file_format,
117                      self.feature_name_table, self.end_of_extension_area,
118                      self.backing_file_name, self.l1_table, self.l2_tables,
119                      self.refcount_table, self.refcount_blocks)
120
121     def create_header(self, cluster_bits, backing_file_name=None):
122         """Generate a random valid header."""
123         meta_header = [
124             ['>4s', 0, "QFI\xfb", 'magic'],
125             ['>I', 4, random.randint(2, 3), 'version'],
126             ['>Q', 8, 0, 'backing_file_offset'],
127             ['>I', 16, 0, 'backing_file_size'],
128             ['>I', 20, cluster_bits, 'cluster_bits'],
129             ['>Q', 24, self.image_size, 'size'],
130             ['>I', 32, 0, 'crypt_method'],
131             ['>I', 36, 0, 'l1_size'],
132             ['>Q', 40, 0, 'l1_table_offset'],
133             ['>Q', 48, 0, 'refcount_table_offset'],
134             ['>I', 56, 0, 'refcount_table_clusters'],
135             ['>I', 60, 0, 'nb_snapshots'],
136             ['>Q', 64, 0, 'snapshots_offset'],
137             ['>Q', 72, 0, 'incompatible_features'],
138             ['>Q', 80, 0, 'compatible_features'],
139             ['>Q', 88, 0, 'autoclear_features'],
140             # Only refcount_order = 4 is supported by current (07.2014)
141             # implementation of QEMU
142             ['>I', 96, 4, 'refcount_order'],
143             ['>I', 100, 0, 'header_length']
144         ]
145         self.header = FieldsList(meta_header)
146
147         if self.header['version'][0].value == 2:
148             self.header['header_length'][0].value = 72
149         else:
150             self.header['incompatible_features'][0].value = \
151                                                         random.getrandbits(2)
152             self.header['compatible_features'][0].value = random.getrandbits(1)
153             self.header['header_length'][0].value = 104
154         # Extensions start at the header last field offset and the field size
155         self.ext_offset = struct.calcsize(
156             self.header['header_length'][0].fmt) + \
157             self.header['header_length'][0].offset
158         end_of_extension_area_len = 2 * UINT32_S
159         free_space = self.cluster_size - self.ext_offset - \
160                      end_of_extension_area_len
161         # If the backing file name specified and there is enough space for it
162         # in the first cluster, then it's placed in the very end of the first
163         # cluster.
164         if (backing_file_name is not None) and \
165            (free_space >= len(backing_file_name)):
166             self.header['backing_file_size'][0].value = len(backing_file_name)
167             self.header['backing_file_offset'][0].value = \
168                                     self.cluster_size - len(backing_file_name)
169
170     def set_backing_file_name(self, backing_file_name=None):
171         """Add the name of the backing file at the offset specified
172         in the header.
173         """
174         if (backing_file_name is not None) and \
175            (not self.header['backing_file_offset'][0].value == 0):
176             data_len = len(backing_file_name)
177             data_fmt = '>' + str(data_len) + 's'
178             self.backing_file_name = FieldsList([
179                 [data_fmt, self.header['backing_file_offset'][0].value,
180                  backing_file_name, 'bf_name']
181             ])
182
183     def set_backing_file_format(self, backing_file_fmt=None):
184         """Generate the header extension for the backing file format."""
185         if backing_file_fmt is not None:
186             # Calculation of the free space available in the first cluster
187             end_of_extension_area_len = 2 * UINT32_S
188             high_border = (self.header['backing_file_offset'][0].value or
189                            (self.cluster_size - 1)) - \
190                 end_of_extension_area_len
191             free_space = high_border - self.ext_offset
192             ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
193
194             if free_space >= ext_size:
195                 ext_data_len = len(backing_file_fmt)
196                 ext_data_fmt = '>' + str(ext_data_len) + 's'
197                 ext_padding_len = 7 - (ext_data_len - 1) % 8
198                 self.backing_file_format = FieldsList([
199                     ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
200                     ['>I', self.ext_offset + UINT32_S, ext_data_len,
201                      'ext_length'],
202                     [ext_data_fmt, self.ext_offset + UINT32_S * 2,
203                      backing_file_fmt, 'bf_format']
204                 ])
205                 self.ext_offset = \
206                         struct.calcsize(
207                             self.backing_file_format['bf_format'][0].fmt) + \
208                         ext_padding_len + \
209                         self.backing_file_format['bf_format'][0].offset
210
211     def create_feature_name_table(self):
212         """Generate a random header extension for names of features used in
213         the image.
214         """
215         def gen_feat_ids():
216             """Return random feature type and feature bit."""
217             return (random.randint(0, 2), random.randint(0, 63))
218
219         end_of_extension_area_len = 2 * UINT32_S
220         high_border = (self.header['backing_file_offset'][0].value or
221                        (self.cluster_size - 1)) - \
222             end_of_extension_area_len
223         free_space = high_border - self.ext_offset
224         # Sum of sizes of 'magic' and 'length' header extension fields
225         ext_header_len = 2 * UINT32_S
226         fnt_entry_size = 6 * UINT64_S
227         num_fnt_entries = min(10, (free_space - ext_header_len) /
228                               fnt_entry_size)
229         if not num_fnt_entries == 0:
230             feature_tables = []
231             feature_ids = []
232             inner_offset = self.ext_offset + ext_header_len
233             feat_name = 'some cool feature'
234             while len(feature_tables) < num_fnt_entries * 3:
235                 feat_type, feat_bit = gen_feat_ids()
236                 # Remove duplicates
237                 while (feat_type, feat_bit) in feature_ids:
238                     feat_type, feat_bit = gen_feat_ids()
239                 feature_ids.append((feat_type, feat_bit))
240                 feat_fmt = '>' + str(len(feat_name)) + 's'
241                 feature_tables += [['B', inner_offset,
242                                     feat_type, 'feature_type'],
243                                    ['B', inner_offset + 1, feat_bit,
244                                     'feature_bit_number'],
245                                    [feat_fmt, inner_offset + 2,
246                                     feat_name, 'feature_name']
247                 ]
248                 inner_offset += fnt_entry_size
249             # No padding for the extension is necessary, because
250             # the extension length is multiple of 8
251             self.feature_name_table = FieldsList([
252                 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
253                 # One feature table contains 3 fields and takes 48 bytes
254                 ['>I', self.ext_offset + UINT32_S,
255                  len(feature_tables) / 3 * 48, 'ext_length']
256             ] + feature_tables)
257             self.ext_offset = inner_offset
258
259     def set_end_of_extension_area(self):
260         """Generate a mandatory header extension marking end of header
261         extensions.
262         """
263         self.end_of_extension_area = FieldsList([
264             ['>I', self.ext_offset, 0, 'ext_magic'],
265             ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
266         ])
267
268     def create_l_structures(self):
269         """Generate random valid L1 and L2 tables."""
270         def create_l2_entry(host, guest, l2_cluster):
271             """Generate one L2 entry."""
272             offset = l2_cluster * self.cluster_size
273             l2_size = self.cluster_size / UINT64_S
274             entry_offset = offset + UINT64_S * (guest % l2_size)
275             cluster_descriptor = host * self.cluster_size
276             if not self.header['version'][0].value == 2:
277                 cluster_descriptor += random.randint(0, 1)
278             # While snapshots are not supported, bit #63 = 1
279             # Compressed clusters are not supported => bit #62 = 0
280             entry_val = (1 << 63) + cluster_descriptor
281             return ['>Q', entry_offset, entry_val, 'l2_entry']
282
283         def create_l1_entry(l2_cluster, l1_offset, guest):
284             """Generate one L1 entry."""
285             l2_size = self.cluster_size / UINT64_S
286             entry_offset = l1_offset + UINT64_S * (guest / l2_size)
287             # While snapshots are not supported bit #63 = 1
288             entry_val = (1 << 63) + l2_cluster * self.cluster_size
289             return ['>Q', entry_offset, entry_val, 'l1_entry']
290
291         if len(self.data_clusters) == 0:
292             # All metadata for an empty guest image needs 4 clusters:
293             # header, rfc table, rfc block, L1 table.
294             # Header takes cluster #0, other clusters ##1-3 can be used
295             l1_offset = random.randint(1, 3) * self.cluster_size
296             l1 = [['>Q', l1_offset, 0, 'l1_entry']]
297             l2 = []
298         else:
299             meta_data = self._get_metadata()
300             guest_clusters = random.sample(range(self.image_size /
301                                                  self.cluster_size),
302                                            len(self.data_clusters))
303             # Number of entries in a L1/L2 table
304             l_size = self.cluster_size / UINT64_S
305             # Number of clusters necessary for L1 table
306             l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
307             l1_start = self._get_adjacent_clusters(self.data_clusters |
308                                                    meta_data, l1_size)
309             meta_data |= set(range(l1_start, l1_start + l1_size))
310             l1_offset = l1_start * self.cluster_size
311             # Indices of L2 tables
312             l2_ids = []
313             # Host clusters allocated for L2 tables
314             l2_clusters = []
315             # L1 entries
316             l1 = []
317             # L2 entries
318             l2 = []
319             for host, guest in zip(self.data_clusters, guest_clusters):
320                 l2_id = guest / l_size
321                 if l2_id not in l2_ids:
322                     l2_ids.append(l2_id)
323                     l2_clusters.append(self._get_adjacent_clusters(
324                         self.data_clusters | meta_data | set(l2_clusters),
325                         1))
326                     l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
327                                               guest))
328                 l2.append(create_l2_entry(host, guest,
329                                           l2_clusters[l2_ids.index(l2_id)]))
330         self.l2_tables = FieldsList(l2)
331         self.l1_table = FieldsList(l1)
332         self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
333                                                 float(self.cluster_size**2)))
334         self.header['l1_table_offset'][0].value = l1_offset
335
336     def create_refcount_structures(self):
337         """Generate random refcount blocks and refcount table."""
338         def allocate_rfc_blocks(data, size):
339             """Return indices of clusters allocated for refcount blocks."""
340             cluster_ids = set()
341             diff = block_ids = set([x / size for x in data])
342             while len(diff) != 0:
343                 # Allocate all yet not allocated clusters
344                 new = self._get_available_clusters(data | cluster_ids,
345                                                    len(diff))
346                 # Indices of new refcount blocks necessary to cover clusters
347                 # in 'new'
348                 diff = set([x / size for x in new]) - block_ids
349                 cluster_ids |= new
350                 block_ids |= diff
351             return cluster_ids, block_ids
352
353         def allocate_rfc_table(data, init_blocks, block_size):
354             """Return indices of clusters allocated for the refcount table
355             and updated indices of clusters allocated for blocks and indices
356             of blocks.
357             """
358             blocks = set(init_blocks)
359             clusters = set()
360             # Number of entries in one cluster of the refcount table
361             size = self.cluster_size / UINT64_S
362             # Number of clusters necessary for the refcount table based on
363             # the current number of refcount blocks
364             table_size = int(ceil((max(blocks) + 1) / float(size)))
365             # Index of the first cluster of the refcount table
366             table_start = self._get_adjacent_clusters(data, table_size + 1)
367             # Clusters allocated for the current length of the refcount table
368             table_clusters = set(range(table_start, table_start + table_size))
369             # Clusters allocated for the refcount table including
370             # last optional one for potential l1 growth
371             table_clusters_allocated = set(range(table_start, table_start +
372                                                  table_size + 1))
373             # New refcount blocks necessary for clusters occupied by the
374             # refcount table
375             diff = set([c / block_size for c in table_clusters]) - blocks
376             blocks |= diff
377             while len(diff) != 0:
378                 # Allocate clusters for new refcount blocks
379                 new = self._get_available_clusters((data | clusters) |
380                                                    table_clusters_allocated,
381                                                    len(diff))
382                 # Indices of new refcount blocks necessary to cover
383                 # clusters in 'new'
384                 diff = set([x / block_size for x in new]) - blocks
385                 clusters |= new
386                 blocks |= diff
387                 # Check if the refcount table needs one more cluster
388                 if int(ceil((max(blocks) + 1) / float(size))) > table_size:
389                     new_block_id = (table_start + table_size) / block_size
390                     # Check if the additional table cluster needs
391                     # one more refcount block
392                     if new_block_id not in blocks:
393                         diff.add(new_block_id)
394                     table_clusters.add(table_start + table_size)
395                     table_size += 1
396             return table_clusters, blocks, clusters
397
398         def create_table_entry(table_offset, block_cluster, block_size,
399                                cluster):
400             """Generate a refcount table entry."""
401             offset = table_offset + UINT64_S * (cluster / block_size)
402             return ['>Q', offset, block_cluster * self.cluster_size,
403                     'refcount_table_entry']
404
405         def create_block_entry(block_cluster, block_size, cluster):
406             """Generate a list of entries for the current block."""
407             entry_size = self.cluster_size / block_size
408             offset = block_cluster * self.cluster_size
409             entry_offset = offset + entry_size * (cluster % block_size)
410             # While snapshots are not supported all refcounts are set to 1
411             return ['>H', entry_offset, 1, 'refcount_block_entry']
412         # Size of a block entry in bits
413         refcount_bits = 1 << self.header['refcount_order'][0].value
414         # Number of refcount entries per refcount block
415         # Convert self.cluster_size from bytes to bits to have the same
416         # base for the numerator and denominator
417         block_size = self.cluster_size * 8 / refcount_bits
418         meta_data = self._get_metadata()
419         if len(self.data_clusters) == 0:
420             # All metadata for an empty guest image needs 4 clusters:
421             # header, rfc table, rfc block, L1 table.
422             # Header takes cluster #0, other clusters ##1-3 can be used
423             block_clusters = set([random.choice(list(set(range(1, 4)) -
424                                                      meta_data))])
425             block_ids = set([0])
426             table_clusters = set([random.choice(list(set(range(1, 4)) -
427                                                      meta_data -
428                                                      block_clusters))])
429         else:
430             block_clusters, block_ids = \
431                                 allocate_rfc_blocks(self.data_clusters |
432                                                     meta_data, block_size)
433             table_clusters, block_ids, new_clusters = \
434                                     allocate_rfc_table(self.data_clusters |
435                                                        meta_data |
436                                                        block_clusters,
437                                                        block_ids,
438                                                        block_size)
439             block_clusters |= new_clusters
440
441         meta_data |= block_clusters | table_clusters
442         table_offset = min(table_clusters) * self.cluster_size
443         block_id = None
444         # Clusters allocated for refcount blocks
445         block_clusters = list(block_clusters)
446         # Indices of refcount blocks
447         block_ids = list(block_ids)
448         # Refcount table entries
449         rfc_table = []
450         # Refcount entries
451         rfc_blocks = []
452
453         for cluster in sorted(self.data_clusters | meta_data):
454             if cluster / block_size != block_id:
455                 block_id = cluster / block_size
456                 block_cluster = block_clusters[block_ids.index(block_id)]
457                 rfc_table.append(create_table_entry(table_offset,
458                                                     block_cluster,
459                                                     block_size, cluster))
460             rfc_blocks.append(create_block_entry(block_cluster, block_size,
461                                                  cluster))
462         self.refcount_table = FieldsList(rfc_table)
463         self.refcount_blocks = FieldsList(rfc_blocks)
464
465         self.header['refcount_table_offset'][0].value = table_offset
466         self.header['refcount_table_clusters'][0].value = len(table_clusters)
467
468     def fuzz(self, fields_to_fuzz=None):
469         """Fuzz an image by corrupting values of a random subset of its fields.
470
471         Without parameters the method fuzzes an entire image.
472
473         If 'fields_to_fuzz' is specified then only fields in this list will be
474         fuzzed. 'fields_to_fuzz' can contain both individual fields and more
475         general image elements as a header or tables.
476
477         In the first case the field will be fuzzed always.
478         In the second a random subset of fields will be selected and fuzzed.
479         """
480         def coin():
481             """Return boolean value proportional to a portion of fields to be
482             fuzzed.
483             """
484             return random.random() < self.bias
485
486         if fields_to_fuzz is None:
487             for field in self:
488                 if coin():
489                     field.value = getattr(fuzz, field.name)(field.value)
490         else:
491             for item in fields_to_fuzz:
492                 if len(item) == 1:
493                     for field in getattr(self, item[0]):
494                         if coin():
495                             field.value = getattr(fuzz,
496                                                   field.name)(field.value)
497                 else:
498                     # If fields with the requested name were not generated
499                     # getattr(self, item[0])[item[1]] returns an empty list
500                     for field in getattr(self, item[0])[item[1]]:
501                         field.value = getattr(fuzz, field.name)(field.value)
502
503     def write(self, filename):
504         """Write an entire image to the file."""
505         image_file = open(filename, 'w')
506         for field in self:
507             image_file.seek(field.offset)
508             image_file.write(struct.pack(field.fmt, field.value))
509
510         for cluster in sorted(self.data_clusters):
511             image_file.seek(cluster * self.cluster_size)
512             image_file.write(urandom(self.cluster_size))
513
514         # Align the real image size to the cluster size
515         image_file.seek(0, 2)
516         size = image_file.tell()
517         rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
518         if rounded > size:
519             image_file.seek(rounded - 1)
520             image_file.write("\0")
521         image_file.close()
522
523     @staticmethod
524     def _size_params():
525         """Generate a random image size aligned to a random correct
526         cluster size.
527         """
528         cluster_bits = random.randrange(9, 21)
529         cluster_size = 1 << cluster_bits
530         img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
531         return (cluster_bits, img_size)
532
533     @staticmethod
534     def _get_available_clusters(used, number):
535         """Return a set of indices of not allocated clusters.
536
537         'used' contains indices of currently allocated clusters.
538         All clusters that cannot be allocated between 'used' clusters will have
539         indices appended to the end of 'used'.
540         """
541         append_id = max(used) + 1
542         free = set(range(1, append_id)) - used
543         if len(free) >= number:
544             return set(random.sample(free, number))
545         else:
546             return free | set(range(append_id, append_id + number - len(free)))
547
548     @staticmethod
549     def _get_adjacent_clusters(used, size):
550         """Return an index of the first cluster in the sequence of free ones.
551
552         'used' contains indices of currently allocated clusters. 'size' is the
553         length of the sequence of free clusters.
554         If the sequence of 'size' is not available between 'used' clusters, its
555         first index will be append to the end of 'used'.
556         """
557         def get_cluster_id(lst, length):
558             """Return the first index of the sequence of the specified length
559             or None if the sequence cannot be inserted in the list.
560             """
561             if len(lst) != 0:
562                 pairs = []
563                 pair = (lst[0], 1)
564                 for i in range(1, len(lst)):
565                     if lst[i] == lst[i-1] + 1:
566                         pair = (lst[i], pair[1] + 1)
567                     else:
568                         pairs.append(pair)
569                         pair = (lst[i], 1)
570                 pairs.append(pair)
571                 random.shuffle(pairs)
572                 for x, s in pairs:
573                     if s >= length:
574                         return x - length + 1
575             return None
576
577         append_id = max(used) + 1
578         free = list(set(range(1, append_id)) - used)
579         idx = get_cluster_id(free, size)
580         if idx is None:
581             return append_id
582         else:
583             return idx
584
585     @staticmethod
586     def _alloc_data(img_size, cluster_size):
587         """Return a set of random indices of clusters allocated for guest data.
588         """
589         num_of_cls = img_size/cluster_size
590         return set(random.sample(range(1, num_of_cls + 1),
591                                  random.randint(0, num_of_cls)))
592
593     def _get_metadata(self):
594         """Return indices of clusters allocated for image metadata."""
595         ids = set()
596         for x in self:
597             ids.add(x.offset/self.cluster_size)
598         return ids
599
600
601 def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
602                  fields_to_fuzz=None):
603     """Create a fuzzed image and write it to the specified file."""
604     image = Image(backing_file_name)
605     image.set_backing_file_format(backing_file_fmt)
606     image.create_feature_name_table()
607     image.set_end_of_extension_area()
608     image.create_l_structures()
609     image.create_refcount_structures()
610     image.fuzz(fields_to_fuzz)
611     image.write(test_img_path)
612     return image.image_size