Mercurial > repo
changeset 3120:84768634eb6d
<L8D> fetch http://www.muppetlabs.com/~breadbox/software/tiny/bf.asm.txt
author | HackBot |
---|---|
date | Sun, 16 Jun 2013 06:13:29 +0000 |
parents | 0f7abdc831d1 |
children | 38bec9e9e910 |
files | bf.asm.txt |
diffstat | 1 files changed, 269 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bf.asm.txt Sun Jun 16 06:13:29 2013 +0000 @@ -0,0 +1,269 @@ +;; bf.asm: Copyright (C) 1999 Brian Raiter <breadbox@muppetlabs.com> +;; Licensed under the terms of the GNU General Public License, either +;; version 2 or (at your option) any later version. +;; +;; To build: +;; nasm -f bin -o bf bf.asm && chmod +x bf +;; To use: +;; bf < foo.b > foo && chmod +x foo + +BITS 32 + +;; This is the size of the data area supplied to compiled programs. + +%define arraysize 30000 + +;; For the compiler, the text segment is also the data segment. The +;; memory image of the compiler is inside the code buffer, and is +;; modified in place to become the memory image of the compiled +;; program. The area of memory that is the data segment for compiled +;; programs is not used by the compiler. The text and data segments of +;; compiled programs are really only different areas in a single +;; segment, from the system's point of view. Both the compiler and +;; compiled programs load the entire file contents into a single +;; memory segment which is both writeable and executable. + +%define TEXTORG 0x45E9B000 +%define DATAOFFSET 0x2000 +%define DATAORG (TEXTORG + DATAOFFSET) + +;; Here begins the file image. + + org TEXTORG + +;; At the beginning of the text segment is the ELF header and the +;; program header table, the latter consisting of a single entry. The +;; two structures overlap for a space of eight bytes. Nearly all +;; unused fields in the structures are used to hold bits of code. + +;; The beginning of the ELF header. + + db 0x7F, "ELF" ; ehdr.e_ident + +;; The top(s) of the main compiling loop. The loop jumps back to +;; different positions, depending on how many bytes to copy into the +;; code buffer. After doing that, esi is initialized to point to the +;; epilog code chunk, a copy of edi (the pointer to the end of the +;; code buffer) is saved in ebp, the high bytes of eax are reset to +;; zero (via the exchange with ebx), and then the next character of +;; input is retrieved. + +emitputchar: add esi, byte (putchar - decchar) - 4 +emitgetchar: lodsd +emit6bytes: movsd +emit2bytes: movsb +emit1byte: movsb +compile: lea esi, [byte ecx + epilog - filesize] + xchg eax, ebx + cmp eax, 0x00030002 ; ehdr.e_type (0x0002) + ; ehdr.e_machine (0x0003) + mov ebp, edi ; ehdr.e_version + jmp short getchar + +;; The entry point for the compiler (and compiled programs), and the +;; location of the program header table. + + dd _start ; ehdr.e_entry + dd proghdr - $$ ; ehdr.e_phoff + +;; The last routine of the compiler, called when there is no more +;; input. The epilog code chunk is copied into the code buffer. The +;; text origin is popped off the stack into ecx, and subtracted from +;; edi to determine the size of the compiled program. This value is +;; stored in the program header table, and then is moved into edx. +;; The program then jumps to the putchar routine, which sends the +;; compiled program to stdout before falling through to the epilog +;; routine and exiting. + +eof: movsd ; ehdr.e_shoff + xchg eax, ecx + pop ecx + sub edi, ecx ; ehdr.e_flags + xchg eax, edi + stosd + xchg eax, edx + jmp short putchar ; ehdr.e_ehsize + +;; 0x20 == the size of one program header table entry. + + dw 0x20 ; ehdr.e_phentsize + +;; The beginning of the program header table. 1 == PT_LOAD, indicating +;; that the segment is to be loaded into memory. + +proghdr: dd 1 ; ehdr.e_phnum & phdr.p_type + ; ehdr.e_shentsize + dd 0 ; ehdr.e_shnum & phdr.p_offset + ; ehdr.e_shstrndx + +;; (Note that the next four bytes, in addition to containing the first +;; two instructions of the bracket routine, also comprise the memory +;; address of the text origin.) + + db 0 ; phdr.p_vaddr + +;; The bracket routine emits code for the "[" instruction. This +;; instruction translates to a simple "jmp near", but the target of +;; the jump will not be known until the matching "]" is seen. The +;; routine thus outputs a random target, and pushes the location of +;; the target in the code buffer onto the stack. + +bracket: mov al, 0xE9 + inc ebp + push ebp ; phdr.p_paddr + stosd + jmp short emit1byte + +;; This is where the size of the executable file is stored in the +;; program header table. The compiler updates this value just before +;; it outputs the compiled program. This is the only field in the two +;; headers that differs between the compiler and its compiled +;; programs. (While the compiler is reading input, the first byte of +;; this field is also used as an input buffer.) + +filesize: dd compilersize ; phdr.p_filesz + +;; The size of the program in memory. This entry creates an area of +;; bytes, arraysize in size, all initialized to zero, starting at +;; DATAORG. + + dd DATAOFFSET + arraysize ; phdr.p_memsz + +;; The code chunk for the "." instruction. eax is set to 4 to invoke +;; the write system call. ebx, the file handle to write to, is set to +;; 1 for stdout. ecx points to the buffer containing the bytes to +;; output, and edx equals the number of bytes to output. (Note that +;; the first byte of the first instruction, which is also the least +;; significant byte of the p_flags field, encodes to 0xB3. Having the +;; 2-bit set marks the memory containing the compiler, and its +;; compiled programs, as writeable.) + +putchar: mov bl, 1 ; phdr.p_flags + mov al, 4 + int 0x80 ; phdr.p_align + +;; The epilog code chunk. After restoring the initialized registers, +;; eax and ebx are both zero. eax is incremented to 1, so as to invoke +;; the exit system call. ebx specifies the process's return value. + +epilog: popa + inc eax + int 0x80 + +;; The code chunks for the ">", "<", "+", and "-" instructions. + +incptr: inc ecx +decptr: dec ecx +incchar: inc byte [ecx] +decchar: dec byte [ecx] + +;; The main loop of the compiler continues here, by obtaining the next +;; character of input. This is also the code chunk for the "," +;; instruction. eax is set to 3 to invoke the read system call. ebx, +;; the file handle to read from, is set to 0 for stdin. ecx points to +;; a buffer to receive the bytes that are read, and edx equals the +;; number of bytes to read. + +getchar: mov al, 3 + xor ebx, ebx + int 0x80 + +;; If eax is zero or negative, then there is no more input, and the +;; compiler proceeds to the eof routine. + + or eax, eax + jle eof + +;; Otherwise, esi is advanced four bytes (from the epilog code chunk +;; to the incptr code chunk), and the character read from the input is +;; stored in al, with the high bytes of eax reset to zero. + + lodsd + mov eax, [ecx] + +;; The compiler compares the input character with ">" and "<". esi is +;; advanced to the next code chunk with each failed test. + + cmp al, '>' + jz emit1byte + inc esi + cmp al, '<' + jz emit1byte + inc esi + +;; The next four tests check for the characters "+", ",", "-", and +;; ".", respectively. These four characters are contiguous in ASCII, +;; and so are tested for by doing successive decrements of eax. + + sub al, '+' + jz emit2bytes + dec eax + jz emitgetchar + inc esi + inc esi + dec eax + jz emit2bytes + dec eax + jz emitputchar + +;; The remaining instructions, "[" and "]", have special routines for +;; emitting the proper code. (Note that the jump back to the main loop +;; is at the edge of the short-jump range. Routines below here +;; therefore use this jump as a relay to return to the main loop; +;; however, in order to use it correctly, the routines must be sure +;; that the zero flag is cleared at the time.) + + cmp al, '[' - '.' + jz bracket + cmp al, ']' - '.' +relay: jnz compile + +;; The endbracket routine emits code for the "]" instruction, as well +;; as completing the code for the matching "[". The compiler first +;; emits "cmp dh, [ecx]" and the first two bytes of a "jnz near". The +;; location of the missing target in the code for the "[" instruction +;; is then retrieved from the stack, the correct target value is +;; computed and stored, and then the current instruction's jmp target +;; is computed and emitted. + +endbracket: mov eax, 0x850F313A + stosd + lea esi, [byte edi - 8] + pop eax + sub esi, eax + mov [eax], esi + sub eax, edi + stosd + jmp short relay + +;; This is the entry point, for both the compiler and its compiled +;; programs. The shared initialization code sets ecx to the beginning +;; of the array that is the compiled program's data area, and edx to +;; one. (This also clears the zero flag for the relay jump below.) The +;; registers are then saved on the stack, to be restored at the end. + +_start: + mov ecx, DATAORG + inc edx + pusha + +;; At this point, the compiler and its compiled programs diverge. +;; Although every compiled program includes all the code in this file +;; above this point, only the three instructions directly above are +;; actually used by both. This point is where the compiler begins +;; storing the generated code, so only the compiler sees the +;; instructions below. This routine first modifies ecx to contain +;; TEXTORG, which is stored on the stack, and then offsets it to point +;; to filesize. edi is set equal to codebuf, and then the compiler +;; enters the main loop. + +codebuf: + mov ch, (TEXTORG >> 8) & 0xFF + push ecx + mov cl, filesize - $$ + lea edi, [byte ecx + codebuf - filesize] + jmp short relay + +;; Here ends the file image. + +compilersize equ $ - $$