Home > Net >  Is the "mmap tutorial" incorrect, or does GCC miscompile it?
Is the "mmap tutorial" incorrect, or does GCC miscompile it?

Time:11-05

This mmap tutorial from 15 years ago ranks high in Google searches, but it actually runs subtly incorrectly on my Linux system.

mmap_write.c:

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>

#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS  (1000)
#define FILESIZE (NUMINTS * sizeof(int))

int main(int argc, char *argv[])
{
    int i;
    int fd;
    int result;
    int *map;  /* mmapped array of int's */

    /* Open a file for writing.
     *  - Creating the file if it doesn't exist.
     *  - Truncating it to 0 size if it already exists. (not really needed)
     *
     * Note: "O_WRONLY" mode is not sufficient when mmaping.
     */
    fd = open(FILEPATH, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
    if (fd == -1) {
    perror("Error opening file for writing");
    exit(EXIT_FAILURE);
    }

    /* Stretch the file size to the size of the (mmapped) array of ints
     */
    result = lseek(fd, FILESIZE-1, SEEK_SET);
    if (result == -1) {
    close(fd);
    perror("Error calling lseek() to 'stretch' the file");
    exit(EXIT_FAILURE);
    }
    
    /* Something needs to be written at the end of the file to
     * have the file actually have the new size.
     * Just writing an empty string at the current file position will do.
     *
     * Note:
     *  - The current position in the file is at the end of the stretched 
     *    file due to the call to lseek().
     *  - An empty string is actually a single '\0' character, so a zero-byte
     *    will be written at the last byte of the file.
     */
    result = write(fd, "", 1);
    if (result != 1) {
    close(fd);
    perror("Error writing last byte of the file");
    exit(EXIT_FAILURE);
    }

    /* Now the file is ready to be mmapped.
     */
    map = mmap(0, FILESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (map == MAP_FAILED) {
    close(fd);
    perror("Error mmapping the file");
    exit(EXIT_FAILURE);
    }
    
    /* Now write int's to the file as if it were memory (an array of ints).
     */
    for (i = 1; i <=NUMINTS;   i) {
    map[i] = 2 * i; 
    }

    /* Don't forget to free the mmapped memory
     */
    if (munmap(map, FILESIZE) == -1) {
    perror("Error un-mmapping the file");
    /* Decide here whether to close(fd) and exit() or not. Depends... */
    }

    /* Un-mmaping doesn't close the file, so we still need to do that.
     */
    close(fd);
    return 0;
}

mmap_read.c:

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>

#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS  (1000)
#define FILESIZE (NUMINTS * sizeof(int))

int main(int argc, char *argv[])
{
    int i;
    int fd;
    int *map;  /* mmapped array of int's */

    fd = open(FILEPATH, O_RDONLY);
    if (fd == -1) {
    perror("Error opening file for reading");
    exit(EXIT_FAILURE);
    }

    map = mmap(0, FILESIZE, PROT_READ, MAP_SHARED, fd, 0);
    if (map == MAP_FAILED) {
    close(fd);
    perror("Error mmapping the file");
    exit(EXIT_FAILURE);
    }
    
    /* Read the file int-by-int from the mmap
     */
    for (i = 1; i <=NUMINTS;   i) {
    printf("%d: %d\n", i, map[i]);
    }

    if (munmap(map, FILESIZE) == -1) {
    perror("Error un-mmapping the file");
    }
    close(fd);
    return 0;
}

If the file does not already exist, the output of mmap_read is

...
998: 1996
999: 1998
1000: 2000

But if it does, the output is

...
998: 1996
999: 1998
1000: 0

Should the author have flushed the write? Or is GCC miscompiling the code?


Edit: I noticed that it's the prior existence or non-existence of the file that makes a difference, not the compilation flag.

CodePudding user response:

You are starting at the second element, and writing 2000 after the end of the map.

for (i = 1; i <=NUMINTS;   i) {
    map[i] = 2 * i; 
}

should be

for (i = 0; i < NUMINTS;   i) {
    map[i] = 2 * ( i   1 ); 
}

Demo

It's not a buffering issue. write is a system call, so the data passed to the OS directly. It doesn't mean the data has been written to disk when write returns, but it is in the OS's hands, so it's as if it was on disk as far as OS functions are concerned, including its memory-mapping functionality.

CodePudding user response:

In C indexes are from zero. Writing and reading index 1000 you invoke undefined behaviour

Change to in the write.:

   for (i = 1; i <=NUMINTS;   i) {
       map[i - 1] = 2 * i; 
   }

and reading to:

    for (i = 1; i <=NUMINTS;   i) {
    printf("%d: %d\n", i, map[i-1]);
    }
  • Related