Home > database >  Multiprocessing Error - File Access Issue
Multiprocessing Error - File Access Issue

Time:09-19

I have a combined threading/multiprocessing solution built out, however having trouble resolving issues with multiprocessing errors.

Steps:

  1. Delete existing directory
  2. Create new directory (to write files to)
  3. write files via threading/multiprocessing functions

Took a while to figure out that I needed to run this in an external console (can't import main running in interactive mode ie. in an IDE). Problem can be replicated with the code below as long a folder called 'new' exists

  • Persistent permission and non existent directory errors keep occurring. As I'm new to implementing multiprocessing, I'm sure there's something I'm overlooking here.
import glob
import os
from pathlib import Path
import time
import shutil
import pickle

import numpy as np

from concurrent.futures import ThreadPoolExecutor
from functools import partial
import multiprocessing

# Cd to 'new'
os.chdir(str(Path.home()))
real_path = glob.glob('./**/new', recursive=True)[0]
file_path = os.path.abspath(real_path)
os.chdir(r"{}".format(file_path))

# Create folder if they don't exist
if 'temp' not in os.listdir(file_path):
    os.makedirs('temp')

# Remove existing files from directory
if os.path.isdir('files/data'):
    shutil.rmtree('files/data')
    os.makedirs('files/data')
else:
    os.makedirs('files/data')

def test(x):
    with open(f'./files/data/{x}.pickle', 'wb') as handle:
        pickle.dump(None, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f'{x} Complete')
               
def multithread(f, lst):
    print('Thread running')
    with ThreadPoolExecutor() as thread_executor:
        thread_executor.map(f, lst)
        
def multiprocessing_executor(lst, f):
    chunks = np.array_split(lst, os.cpu_count()//1.5)
    with multiprocessing.Pool() as multiprocessing_pool:
        multiprocessing_pool.map(partial(multithread, f), chunks)


if __name__ == '__main__':
    num_list = range(0,1000)
    multiprocessing_executor(num_list, test)
    
time.sleep(200)

Errors:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
    prepare(preparation_data)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
    _fixup_main_from_path(data['init_main_from_path'])
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    main_content = runpy.run_path(main_path,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
    return _run_module_code(code, init_globals, run_name,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 33, in <module>
    os.makedirs('files/data')
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
    mkdir(name, mode)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
Traceback (most recent call last):
Traceback (most recent call last):
  File "<string>", line 1, in <module>
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "<string>", line 1, in <module>
  File "<string>", line 1, in <module>
  File "<string>", line 1, in <module>
  File "<string>", line 1, in <module>
  File "<string>", line 1, in <module>
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
    exitcode = _main(fd, parent_sentinel)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
    exitcode = _main(fd, parent_sentinel)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 125, in _main
    prepare(preparation_data)
    prepare(preparation_data)
    prepare(preparation_data)
    prepare(preparation_data)
    prepare(preparation_data)
    prepare(preparation_data)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
    prepare(preparation_data)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 236, in prepare
    _fixup_main_from_path(data['init_main_from_path'])
    _fixup_main_from_path(data['init_main_from_path'])
    _fixup_main_from_path(data['init_main_from_path'])
    _fixup_main_from_path(data['init_main_from_path'])
    _fixup_main_from_path(data['init_main_from_path'])
    _fixup_main_from_path(data['init_main_from_path'])
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    _fixup_main_from_path(data['init_main_from_path'])
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    main_content = runpy.run_path(main_path,
    main_content = runpy.run_path(main_path,
    main_content = runpy.run_path(main_path,
    main_content = runpy.run_path(main_path,
    main_content = runpy.run_path(main_path,
    main_content = runpy.run_path(main_path,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
    main_content = runpy.run_path(main_path,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 269, in run_path
    return _run_module_code(code, init_globals, run_name,
    return _run_module_code(code, init_globals, run_name,
    return _run_module_code(code, init_globals, run_name,
    return _run_module_code(code, init_globals, run_name,
    return _run_module_code(code, init_globals, run_name,
    return _run_module_code(code, init_globals, run_name,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
    return _run_module_code(code, init_globals, run_name,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 96, in _run_module_code
    _run_code(code, mod_globals, init_globals,
    _run_code(code, mod_globals, init_globals,
    _run_code(code, mod_globals, init_globals,
    _run_code(code, mod_globals, init_globals,
    _run_code(code, mod_globals, init_globals,
    _run_code(code, mod_globals, init_globals,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
    _run_code(code, mod_globals, init_globals,
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
  File "C:\Users\user\Anaconda3\envs\test_env\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
    exec(code, run_globals)
    exec(code, run_globals)
    exec(code, run_globals)
    exec(code, run_globals)
    exec(code, run_globals)
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 32, in <module>
    exec(code, run_globals)
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 32, in <module>
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 33, in <module>
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 35, in <module>
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 33, in <module>
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 33, in <module>
  File "C:\Users\user\ShareDrive\Desktop\Test\ess_caller\untitled0.py", line 35, in <module>
    shutil.rmtree('files/data')
    shutil.rmtree('files/data')
    os.makedirs('files/data')
    os.makedirs('files/data')
    os.makedirs('files/data')
    os.makedirs('files/data')
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 747, in rmtree
    os.makedirs('files/data')
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 747, in rmtree
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
  File "C:\Users\user\Anaconda3\envs\test_env\lib\os.py", line 225, in makedirs
    return _rmtree_unsafe(path, one rror)
    return _rmtree_unsafe(path, one rror)
    mkdir(name, mode)
    mkdir(name, mode)
    mkdir(name, mode)
    mkdir(name, mode)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 598, in _rmtree_unsafe
    mkdir(name, mode)
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 621, in _rmtree_unsafe
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'files/data'
    one rror(os.scandir, path, sys.exc_info())
    one rror(os.rmdir, path, sys.exc_info())
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 595, in _rmtree_unsafe
  File "C:\Users\user\Anaconda3\envs\test_env\lib\shutil.py", line 619, in _rmtree_unsafe
    with os.scandir(path) as scandir_it:
FileNotFoundError: [WinError 3] The system cannot find the path specified: 'files/data'
    os.rmdir(path)
PermissionError: [WinError 5] Access is denied: 'files/data'

CodePudding user response:

It appears from your directory names that you are running this on Windows, a platform that uses method spawn to create new processes. What this means for you is that all code at global scope that is not within an if __name__ == '__main__': block will be executed as part of the initialization of each multiprocessing pool process. I am specifically referring to all the code that is creating and re-creating directories. You need to ensure that this code is only executed by the main process as part of its initialization prior to creating any pools.

You also have at global scope a call to time.sleep(200) causing a delay of 200 seconds before any pool process will start executing submitted tasks. I see no reason for this.

# These are the only import statements required by each
# multiprocessing pool process. The others are only
# required by the main process and have been moved so that
# they are not unnecessarily executed by child processes (although leaving
# them at global scope and importing modules that are not
# used is certainly not fatal):
from concurrent.futures import ThreadPoolExecutor
import pickle

if __name__ == '__main__':
    import glob
    import os
    from pathlib import Path
    import shutil

    import numpy as np

    from functools import partial
    import multiprocessing

    # Cd to 'new'
    os.chdir(str(Path.home()))
    real_path = glob.glob('./**/new', recursive=True)[0]
    file_path = os.path.abspath(real_path)
    os.chdir(r"{}".format(file_path))

    # Create folder if they don't exist
    if 'temp' not in os.listdir(file_path):
        os.makedirs('temp')

    # Remove existing files from directory
    if os.path.isdir('files/data'):
        shutil.rmtree('files/data')
        os.makedirs('files/data')
    else:
        os.makedirs('files/data')

def test(x):
    with open(f'./files/data/{x}.pickle', 'wb') as handle:
        pickle.dump(None, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f'{x} Complete')

def multithread(f, lst):
    print('Thread running')
    with ThreadPoolExecutor() as thread_executor:
        thread_executor.map(f, lst)

def multiprocessing_executor(lst, f):
    chunks = np.array_split(lst, os.cpu_count()//1.5)
    with multiprocessing.Pool() as multiprocessing_pool:
        multiprocessing_pool.map(partial(multithread, f), chunks)


if __name__ == '__main__':
    num_list = range(0,1000)
    multiprocessing_executor(num_list, test)

In reality, the following sequential code, without using multiprocessing and/or multithreading code, will run just as fast if not faster:

import pickle

import glob
import os
from pathlib import Path
import shutil

def test(x):
    with open(f'./files/data/{x}.pickle', 'wb') as handle:
        pickle.dump(None, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f'{x} Complete')


# Cd to 'new'
os.chdir(str(Path.home()))
real_path = glob.glob('./**/new', recursive=True)[0]
file_path = os.path.abspath(real_path)
os.chdir(file_path) # simplified

# Create folder if they don't exist
if 'temp' not in os.listdir(file_path):
    os.makedirs('temp')

# Remove existing files from directory
if os.path.isdir('files/data'):
    shutil.rmtree('files/data')
    os.makedirs('files/data')
else:
    os.makedirs('files/data')

for x in range(0,1000):
    test(x)
  • Related