Home > Back-end >  Replace one character with string in assembly language 8086
Replace one character with string in assembly language 8086

Time:09-11

Hi I'm making this program in assembly language 8086 where it can detect 'E' and replace that 'E' with 'Egg' Example: In english: English! In eggnglish: Eggnglish! Here's my attempt:

.MODEL SMALL
.STACK 100H
.DATA
english DB "In English$"
result DB 100 dup(?)
;english DB "In english: English$"
;In english: You like english and espresso, excellent!
newline DB 10,13,'$'
bigegg DB "Egg$"
smallegg DB "egg$"

.CODE
MAIN PROC
MOV AX, @DATA
MOV DS, AX

mov ah, 09h
lea dx, english
int 21h

mov ah, 09h
lea dx, newline
int 21h

lea si, english ;start from first char
lea di, result

mov cx, 19

loop1:
mov al, [si]
cmp al, 45h ;E
je bige
jne checke

bige:
mov bx, si
mov cx, si
loop2:
mov ah, 02h
mov dl, [di]
int 21h
loop loop2

inc si
mov ah, 09h
lea dx, bigegg
int 21h
jmp change

checke:
cmp al, 65h ;e
je smalle
jne again

smalle:
mov bx, si
mov cx, si
loop3:
mov ah, 02h
mov dl, [di]
int 21h
loop loop3

inc si
mov ah, 09h
lea dx, smallegg
int 21h
jmp change

again:
mov al, [si] ;save I
mov [di], al
inc si
inc di
loop loop1

change:
lea si, english   3
sub cx, si
mov cx, cx

loop4:
mov ah, 02h
mov dl, [di]
int 21h
loop loop4

MOV AX, 4C00H
INT 21H

MAIN ENDP
END MAIN

I actually want to make my system as below: If 'e' is detected, the system will take out the 'In' first, then ignore the 'e' and then it will print 'egg' followed by 'nglish'. However, when I run my code, it prints out blank space instead. I appreciate if anyone could guide me for this Thank you for reading!

CodePudding user response:

Here's my solution. I intentionally didn't optimise it much, to show the workings of the loop in a simple way. This assembles to a flat .COM format file using a command like nasm test.asm -o test.com (which uses the implicit default -f bin format of NASM). Should mostly work with another assembler too.

I made some line comments and two protocol comments for the subroutines.


        cpu 8086
        org 256
start:
        mov dx, message
        mov cx, message_length
        call display

        mov dx, linebreak
        mov cx, linebreak_length
        call display

        mov si, message
chunkloop:
        mov dx, si              ; initialise offset of most recent chunk start

byteloop:
        mov al, byte [si]       ; get this byte
        cmp al, 'E'             ; check for replacement
        je big                  ; replace big letter -->
        cmp al, 'e'
        je small                ; replace small letter -->
        cmp al, 0               ; terminator ?
        je end                  ; yes, end -->
        inc si                  ; -> next byte
        jmp byteloop            ; loop back to scan next byte

big:
        call displaychunk       ; display chunk up to this
        mov dx, replace_big
        mov cx, replace_big_length
        call display            ; display replacement
        inc si                  ; -> next byte after what we replaced
        jmp chunkloop           ; jump back and re-initialise dx so it will
                                ;  get the start offset of the next chunk

small:                          ; (same code as for big, different replacement)
        call displaychunk
        mov dx, replace_small
        mov cx, replace_small_length
        call display
        inc si
        jmp chunkloop

end:
        call displaychunk       ; display the last chunk

        mov dx, linebreak
        mov cx, linebreak_length
        call display

        mov ax, 4C00h
        int 21h


                ; INP:  ds:dx -> most recent chunk
                ;       ds:si -> byte to be replaced (or terminator)
                ; REM:  Here si points to behind the last byte that we
                ;        want to write to display the chunk. Therefore
                ;        si minus dx is the length we want to write.
                ;       The chunk length may be zero (dx equals si).
displaychunk:
        mov cx, si              ; get offset behind the chunk
        sub cx, dx              ; calculate length of chunk
        jz emptychunk           ; if empty -->

                ; INP:  ds:dx -> message data
                ;       cx = length of message to write
display:
        mov bx, 1               ; = stdout handle
        mov ah, 40h
        int 21h                 ; write to file handle
emptychunk:
        retn



message:
        db "In english: English"
message_length equ $ - message
        db 0            ; terminator for main loop

replace_big:
        db "Egg"
replace_big_length equ $ - replace_big

replace_small:
        db "egg"
replace_small_length equ $ - replace_small

linebreak:
        db 13,10
linebreak_length equ $ - linebreak

A few possible optimisations:

  • Share code for the big and small cases

  • Share incrementing si for all control flow pathes

  • Use lodsb to both load from byte [ds:si] and increment si

  • Dynamically write the to-be-replaced letter into the replacement string so it gets a small or big letter from the source message

  • The _length equates could use local labels for NASM to reduce the amount of typing for the labels (avoided here for better compatibility with other assemblers)

CodePudding user response:

The errors in your code are numerous and ecm's first comment summarizes most of them. I will not repeat this but present a solution that shows a loop where the main loop condition is tested at the bottom of the loop (minimizing branching), and where there are, in the loop body, additional conditions accompanied by a nested loop.
Rather than outputting the result character by character or even chunk after chunk, I display the result all at once, readily accompanied by an included carriage return / linefeed pair and the necessary $-terminator.
The program is a .COM executable file. I use FASM but very few changes are needed to use NASM instead.
In contrast to ecm's solution, I stay closer to the OP's attempt that uses $-terminated strings that are so common under DOS.

    ORG  256

    mov  dx, english
    mov  ah, 09h       ; DOS.PrintString
    int  21h

    mov  si, english
    mov  di, result
    jmp  BottomOfLoop
TopOfLoop:
    mov  bx, bigegg
    cmp  al, "E"
    je   .b
    mov  bx, smallegg
    cmp  al, "e"
    je   .b
    mov  bx, empty

.a: stosb
.b: mov  al, [bx]
    inc  bx
    cmp  al, "$"
    jne  .a

BottomOfLoop:
    lodsb
    cmp  al, 0
    jne  TopOfLoop

    mov  dx, english
    mov  ah, 09h       ; DOS.PrintString
    int  21h

    mov  ax, 4C00h     ; DOS.Terminate
    int  21h

english   db "In english: English", 13, 10, "$", 0
bigegg    db "Egg$"
smallegg  db "egg"
empty     db "$"

result    rb 100

Using rb (use resb for NASM), we can avoid including the result buffer in the file, thereby keeping the final executable small. In general, small files load somewhat faster.

  • Related