2 * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
26 /****************************************************************************
28 * This file provides the decompress() and decompress16() functions
29 * which can be called in order to decompress an LZMA-compressed
30 * image. The code is modelled on the public-domain "XZ Embedded"
31 * implementation as used by the Linux kernel. Symbol names are
32 * chosen to match the XZ Embedded implementation where possible, for
35 * This code is optimised for size rather than speed, since the amount
36 * of data to be decompressed is trivially small by modern standards.
38 * The same basic assembly code is used to compile both decompress()
41 * Note that these functions require large amounts of stack space.
43 ****************************************************************************
48 .section ".prefix.lib", "ax", @progbits
53 #define decompress decompress16
61 /****************************************************************************
63 ****************************************************************************
65 * This code will usually run in 16-bit protected mode, in which case
66 * only the 0xe9 debug port (present on some virtual machines) can be
69 * To debug on real hardware, build with DEBUG=libprefix. This will
70 * cause this code to be called in flat real mode, and so DEBUG_INT10
74 /* Enable debugging via 0xe9 debug port */
77 /* Enable debugging via BIOS INT 10 (works only when in flat real mode) */
80 #if ( DEBUG_E9 || DEBUG_INT10 )
81 .macro print_character, reg
105 .macro print_hex_nibble
116 .macro print_hex_byte, reg
130 .macro print_hex_word, reg
138 .macro print_hex_dword, reg
148 .macro print_character, char
150 .macro print_hex_byte, reg
152 .macro print_hex_word, reg
154 .macro print_hex_dword, reg
158 /****************************************************************************
159 * LZMA parameters and data structures
160 ****************************************************************************
163 /* LZMA decompressor states (as used in XZ Embedded) */
164 #define STATE_LIT_LIT 0x00
165 #define STATE_MATCH_LIT_LIT 0x01
166 #define STATE_REP_LIT_LIT 0x02
167 #define STATE_SHORTREP_LIT_LIT 0x03
168 #define STATE_MATCH_LIT 0x04
169 #define STATE_REP_LIT 0x05
170 #define STATE_SHORTREP_LIT 0x06
171 #define STATE_LIT_MATCH 0x07
172 #define STATE_LIT_LONGREP 0x08
173 #define STATE_LIT_SHORTREP 0x09
174 #define STATE_NONLIT_MATCH 0x0a
175 #define STATE_NONLIT_REP 0x0b
177 /* LZMA maximum decompressor state in which most recent symbol was a literal */
178 #define STATE_LIT_MAX 0x06
180 /* LZMA number of literal context bits ("lc=" parameter) */
187 low: .rept ( 1 << 3 )
190 mid: .rept ( 1 << 3 )
193 high: .rept ( 1 << 8 )
196 .equ sizeof__lzma_len_dec, . - lzma_len_dec
211 is_match: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
212 is_rep: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
213 is_rep0: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
214 is_rep1: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
215 is_rep2: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
216 is_rep0_long: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
217 dist_slot: .rept ( 4 * ( 1 << 6 ) )
220 dist_special: .rept ( ( 1 << ( 14 / 2 ) ) - 14 )
223 dist_align: .rept ( 1 << 4 )
226 match_len_dec: .space sizeof__lzma_len_dec
227 rep_len_dec: .space sizeof__lzma_len_dec
228 literal: .rept ( ( 1 << LZMA_LC ) * 0x300 )
232 .equ sizeof__lzma_dec, . - lzma_dec
235 /* Some binutils versions seem not to handle .struct/.previous */
236 .section ".prefix.lib", "ax", @progbits
238 /*****************************************************************************
239 * Normalise range encoder
242 * %ss:%ebp : LZMA parameter block
243 * %ds:%esi : compressed input data pointer
245 * %ds:%esi : compressed input data pointer (possibly updated)
246 * %eax : current range
247 *****************************************************************************
250 /* Check if rc_range is less than 1<<24 */
251 testb $0xff, (rc_range+3)(%ebp)
253 /* If it is, shift in a new byte from the compressed input data */
254 shll $8, rc_range(%ebp)
255 shll $8, rc_code(%ebp)
257 movb %al, (rc_code+0)(%ebp)
258 1: /* Return current range */
259 movl rc_range(%ebp), %eax
261 .size rc_normalise, . - rc_normalise
263 /*****************************************************************************
264 * Decode single range-encoded bit using a probability estimate
267 * %ss:%ebp : LZMA parameter block
268 * %ds:%esi : compressed input data pointer
269 * %ebx : probability estimate pointer (offset from %ebp)
271 * %ds:%esi : compressed input data pointer (possibly updated)
273 * ZF : inverse of decoded bit
276 *****************************************************************************
279 /* Preserve registers */
282 /* Perform normalisation */
284 /* Calculate bound in %eax and probability estimate in %dx */
286 movzwl (%ebp,%ebx), %edx
287 mul %edx /* will zero %edx */
288 movw (%ebp,%ebx), %dx
289 /* Compare code against bound */
290 cmpl %eax, rc_code(%ebp)
292 1: /* Code is less than bound */
293 movl %eax, rc_range(%ebp)
297 addw %dx, (%ebp,%ebx)
298 xorw %ax, %ax /* Clear CF, set ZF */
300 2: /* Code is greater than or equal to bound */
301 subl %eax, rc_range(%ebp)
302 subl %eax, rc_code(%ebp)
304 subw %dx, (%ebp,%ebx)
305 incw %dx /* Clear ZF (%dx is 11-bit; can never wrap) */
307 99: /* Restore registers and return */
311 .size rc_bit, . - rc_bit
313 /*****************************************************************************
314 * Decode MSB-first bittree
317 * %ss:%ebp : LZMA parameter block
318 * %ds:%esi : compressed input data pointer
319 * %ebx : probability estimate set pointer (offset from %ebp)
320 * %cx : number of bits to decode
322 * %ds:%esi : compressed input data pointer (possibly updated)
323 * %eax : decoded bittree
326 *****************************************************************************
329 /* Preserve registers */
333 /* Initialise registers */
336 leaw (%edi,%eax,2), %bx /* high word always zero anyway */
340 /* Restore registers, clear unwanted high bit of result, and return */
346 .size rc_bittree, . - rc_bittree
348 /*****************************************************************************
349 * Decode LSB-first bittree
352 * %ss:%ebp : LZMA parameter block
353 * %ds:%esi : compressed input data pointer
354 * %ebx : probability estimate set pointer (offset from %ebp)
355 * %cx : number of bits to decode
357 * %ds:%esi : compressed input data pointer (possibly updated)
358 * %eax : decoded bittree
361 *****************************************************************************
364 /* Preserve registers */
368 1: /* Reverse result */
373 /* Restore registers and return */
376 .size rc_bittree_reverse, . - rc_bittree_reverse
378 /*****************************************************************************
379 * Decode MSB-first bittree with optional match byte
382 * %ss:%ebp : LZMA parameter block
383 * %ds:%esi : compressed input data pointer
384 * %ebx : probability estimate set pointer (offset from %ebp)
386 * %ch : 1 to use match byte, 0 to ignore match byte
388 * %ds:%esi : compressed input data pointer (possibly updated)
389 * %eax : decoded bittree
392 *****************************************************************************
395 /* Preserve registers */
400 /* Initialise registers */
405 andb %dh, %dl /* match_bit in %dl */
409 addw %ax, %bx /* offset + match_bit + symbol */
410 leaw (%edi,%ebx,2), %bx /* high word always zero anyway */
416 andb %dl, %ch /* offset &= ( match_bit ^ bit ) */
419 /* Restore registers, clear unwanted high bit of result, and return */
426 .size rc_bittree_match, . - rc_bittree_match
428 /*****************************************************************************
429 * Decode direct bits (no probability estimates)
432 * %ss:%ebp : LZMA parameter block
433 * %ds:%esi : compressed input data pointer
434 * %cx : number of bits to decode
436 * %ds:%esi : compressed input data pointer (possibly updated)
437 * %eax : decoded bits
440 *****************************************************************************
443 /* Preserve registers */
447 /* Initialise registers */
449 1: /* Perform normalisation */
453 movl %eax, rc_range(%ebp)
454 movl rc_code(%ebp), %ebx
457 movl %ebx, rc_code(%ebp)
462 /* Restore registers and return */
468 .size rc_direct, . - rc_direct
470 /*****************************************************************************
471 * Decode an LZMA literal
474 * %ss:%ebp : LZMA parameter block
475 * %ds:%esi : compressed input data pointer
476 * %es:%edi : uncompressed output data pointer
479 * %ds:%esi : compressed input data pointer (possibly updated)
480 * %es:%edi : uncompressed output data pointer (updated)
482 * CF : end of payload marker found (always zero)
487 *****************************************************************************
489 * Literals are coded as an eight-bit tree, using a match byte if the
490 * previous symbol was not a literal.
494 /* Get most recent output byte, if available */
496 cmpl %edi, out_start(%ebp)
498 movb %es:-1(%edi), %bh
499 1: /* Locate probability estimate set */
500 shrb $( 8 - LZMA_LC ), %bh
502 leaw literal(%ebx,%ebx,2), %bx
503 /* Get match byte, if applicable */
505 cmpb $STATE_LIT_MAX, %dl
507 movl rep0(%ebp), %eax
509 movb %es:(%edi,%eax), %cl
511 1: /* Decode bittree */
512 call rc_bittree_match
513 /* Store output byte */
516 print_character $(' ')
517 /* Update LZMA state */
524 1: /* Clear CF and return */
527 .size lzma_literal, . - lzma_literal
529 /*****************************************************************************
530 * Decode an LZMA length
533 * %ss:%ebp : LZMA parameter block
534 * %ds:%esi : compressed input data pointer
535 * %ebx : length parameter pointer (offset from %ebp)
537 * %ds:%esi : compressed input data pointer (possibly updated)
540 *****************************************************************************
542 * Lengths are encoded as:
544 * "0" + 3 bits : lengths 2-9 ("low")
545 * "10" + 3 bits : lengths 10-17 ("mid")
546 * "11" + 8 bits : lengths 18-273 ("high")
549 /* Preserve registers */
554 /* Start by assuming three bits and a base length of 2 */
557 /* Check low-length choice bit */
558 leal choice(%edi), %ebx
562 /* Check high-length choice bit */
563 leal choice2(%edi), %ebx
568 leal high(%edi), %ebx
571 1: /* Get encoded length */
574 /* Restore registers and return */
580 .size lzma_len, . - lzma_len
582 /*****************************************************************************
583 * Copy (possibly repeated) matched data
586 * %ss:%ebp : LZMA parameter block
587 * %ds:%esi : compressed input data pointer
588 * %es:%edi : uncompressed output data pointer
589 * %cl : repeated match distance index (for repeated matches)
590 * %eax : match distance (for non-repeated matches)
592 * %ds:%esi : compressed input data pointer (possibly updated)
593 * %es:%edi : uncompressed output data pointer
594 * CF : match distance is out of range
599 *****************************************************************************
601 match: /* Update repeated match list */
602 print_character $('[')
606 print_character $('[')
607 print_character $('R')
609 print_character $('=')
611 movl reps(%ebp,%ecx,4), %eax
613 1: movl (reps-4)(%ebp,%ecx,4), %ebx
614 movl %ebx, reps(%ebp,%ecx,4)
616 movl %eax, rep0(%ebp)
617 2: /* Preserve registers */
619 /* Get stored match length */
620 movzwl len(%ebp), %ecx
622 print_character $('+')
624 print_character $(']')
625 print_character $(' ')
626 /* Abort with CF set if match distance is out of range */
627 movl out_start(%ebp), %esi
629 leal -1(%edi,%esi), %esi
634 leal (%edi,%eax), %esi
636 99: /* Restore registers and return */
639 .size match, . - match
641 /*****************************************************************************
642 * Decode an LZMA match
645 * %ss:%ebp : LZMA parameter block
646 * %ds:%esi : compressed input data pointer
647 * %es:%edi : uncompressed output data pointer
650 * %ds:%esi : compressed input data pointer (possibly updated)
651 * %es:%edi : uncompressed output data pointer
653 * CF : end of payload marker found
658 *****************************************************************************
660 * Matches are encoded as an LZMA length followed by a 6-bit "distance
661 * slot" code, 0-26 fixed-probability bits, and 0-5 context encoded
665 /* Preserve registers */
667 /* Update LZMA state */
668 cmpb $STATE_LIT_MAX, %dl
669 movb $STATE_LIT_MATCH, %dl
671 movb $STATE_NONLIT_MATCH, %dl
672 1: /* Decode length */
673 movl $match_len_dec, %ebx
675 /* Decode distance slot */
685 /* Distance slots 0-3 are literal distances */
688 /* Determine initial bits: 10/11 for even/odd distance codes */
692 /* Determine number of context-encoded bits */
696 /* Select context to be used in absence of fixed-probability bits */
700 leaw (dist_special-2)(%ebx,%ebx), %bx
701 /* Decode fixed-probability bits, if any */
708 /* Select context to be used in presence of fixed-probability bits */
710 movl $dist_align, %ebx
711 1: /* Decode context-encoded bits */
713 call rc_bittree_reverse
715 99: /* Restore registers and tail-call */
718 .size lzma_match, . - lzma_match
720 /*****************************************************************************
721 * Decode an LZMA repeated match
724 * %ss:%ebp : LZMA parameter block
725 * %ds:%esi : compressed input data pointer
726 * %es:%edi : uncompressed output data pointer
729 * %ds:%esi : compressed input data pointer (possibly updated)
730 * %es:%edi : uncompressed output data pointer
732 * CF : end of payload marker found
737 *****************************************************************************
739 * Repeated matches are encoded as:
741 * "00" : shortrep0 (implicit length 1)
742 * "01" + len : longrep0
743 * "10" + len : longrep1
744 * "110" + len : longrep2
745 * "111" + len : longrep3
748 /* Initially assume longrep0 */
749 movw $(STATE_LIT_LONGREP << 8), %cx
750 /* Get is_rep0 bit */
751 leal is_rep0(,%edx,2), %ebx
754 /* Get is_rep0_long bit */
755 leal is_rep0_long(,%edx,2), %ebx
759 movb $STATE_LIT_SHORTREP, %ch
761 1: /* Get is_rep1 bit */
763 leal is_rep1(,%edx,2), %ebx
766 /* Get is_rep2 bit */
768 leal is_rep2(,%edx,2), %ebx
771 98: /* Decode length */
772 movl $rep_len_dec, %ebx
774 99: /* Update LZMA state */
775 cmpb $STATE_LIT_MAX, %dl
778 movb $STATE_NONLIT_REP, %dl
781 .size lzma_match, . - lzma_match
783 /*****************************************************************************
784 * Decode one LZMA symbol
787 * %ss:%ebp : LZMA parameter block
788 * %ds:%esi : compressed input data pointer
789 * %es:%edi : uncompressed output data pointer
792 * %ds:%esi : compressed input data pointer (possibly updated)
793 * %es:%edi : uncompressed output data pointer (updated)
795 * CF : end of payload marker found
800 *****************************************************************************
803 /* Get is_match bit */
804 leal is_match(,%edx,2), %ebx
808 leal is_rep(,%edx,2), %ebx
812 .size lzma_decode, . - lzma_decode
814 /****************************************************************************
815 * Undo effect of branch-call-jump (BCJ) filter
818 * %es:%esi : start of uncompressed output data (note %es)
819 * %es:%edi : end of uncompressed output data
827 *****************************************************************************
830 /* Store (negative) start of data in %edx */
833 /* Calculate limit in %ecx */
834 leal -5(%edi,%edx), %ecx
835 1: /* Calculate offset in %ebx */
836 leal (%esi,%edx), %ebx
837 /* Check for end of data */
840 /* Check for an opcode which would be followed by a rel32 address */
845 /* Get current jump target value in %eax */
847 /* Convert absolute addresses in the range [0,limit) back to
848 * relative addresses in the range [-offset,limit-offset).
852 subl %ebx,%es:-4(%esi)
853 2: /* Convert negative numbers in the range [-offset,0) back to
854 * positive numbers in the range [limit-offset,limit).
856 notl %eax /* Range is now [0,offset) */
859 addl %ecx,%es:-4(%esi)
863 .size bcj_filter, . - bcj_filter
865 /****************************************************************************
866 * decompress (real-mode or 16/32-bit protected-mode near call)
870 * Parameters (passed via registers):
871 * %ds:%esi : Start of compressed input data
872 * %es:%edi : Start of output buffer
874 * %ds:%esi - End of compressed input data
875 * %es:%edi - End of decompressed output data
876 * All other registers are preserved
878 * NOTE: It would be possible to build a smaller version of the
879 * decompression code for -DKEEP_IT_REAL by using 16-bit registers
881 ****************************************************************************
885 /* Preserve registers */
891 /* Allocate parameter block */
892 subl $sizeof__lzma_dec, %esp
894 /* Zero parameter block and set all probabilities to 0.5 */
901 movl $( sizeof__lzma_dec / 4 ), %ecx
903 leal probs(%ebp), %edi
904 movw $( ( 1 << 11 ) / 2 ), %ax
905 movl $( ( sizeof__lzma_dec - probs ) / 2 ), %ecx
909 /* Initialise remaining parameters */
910 movl %edi, out_start(%ebp)
911 print_character $('\n')
912 ADDR32 lodsb /* discard initial byte */
917 print_character $('\n')
918 movl %eax, rc_code(%ebp)
920 movl $STATE_LIT_LIT, %edx
921 1: /* Decompress until we reach end of buffer */
925 print_character $('\n')
926 /* Undo BCJ filter */
928 movl out_start(%ebp), %esi
931 /* Restore registers and return */
932 addl $sizeof__lzma_dec, %esp
940 /* Specify minimum amount of stack space required */
941 .globl _min_decompress_stack
942 .equ _min_decompress_stack, ( sizeof__lzma_dec + 512 /* margin */ )