I have a List of links that I have collected from google search results and I'm downloading these (PDF) files using selenium.
I want to rename each file so that its filename contains the URL.
What can I do?
I have not tried any code so please help me. I'm showing the code of selenium that I used to download the files.
folderName=input(("Enter The FolderName:\t")).upper() #Geting Input for the name of folder
newDir="C:\\Users\\sulta\\Data Science CV\\" folderName
print(newDir)
if not os.path.exists(newDir):
os.makedirs(newDir) #creating folder
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {
"download.default_directory":"C:\\Users\\sulta\\Data Science CV\\" folderName, #Downloading the files to thi path
"download.prompt_for_download": False, #To auto download the file
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True #It will not show PDF directly in chrome
})
driver = webdriver.Chrome(options=options)
for z in range(len(link)): #My All links are stored in the list named link
try:
driver.get(link[z])
driver.set_page_load_timeout(10)
except:
continue
CodePudding user response:
I dont think there's any python core library that can do this on selenium download. What you can do is to have a folder watchdog that keeps track of any changes or events that occur in the folder so that you can the rename the new file from there.
Check out pyWatch
it could be of help.
CodePudding user response:
One solution would be to read all the files on the directory, download the new file then read again to get its name and them rename it to what you want, something like this
folderName=input(("Enter The FolderName:\t")).upper() #Geting Input for the name of folder
newDir="C:\\Users\\sulta\\Data Science CV\\" folderName
print(newDir)
if not os.path.exists(newDir):
os.makedirs(newDir) #creating folder
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {
"download.default_directory":"C:\\Users\\sulta\\Data Science CV\\" folderName, #Downloading the files to thi path
"download.prompt_for_download": False, #To auto download the file
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True #It will not show PDF directly in chrome
})
driver = webdriver.Chrome(options=options)
for z in range(len(link)): #My All links are stored in the list named link
try:
import os
files_before = os.listdir(newDir) #list all files
driver.get(link[z])
driver.set_page_load_timeout(10)
files_after = os.listdir(newDir) # list all files after download
new_file = list(set(files_after) - set(files_before))[0] # get new file in folder
new_name = 'new_name.file' #name of new file
os.rename(newDir '/' new_file,newDir '/' new_name) #rename new file in path
except:
continue
CodePudding user response:
for z in range(len(link)):
try:
driver.get(link[z])
driver.set_page_load_timeout(10)
# create the file name from the link, like below:
file_name = link[z].replace("/", "-")
# add the code to download the pdf file
# rename the downloaded file
os.rename('<downloaded file name with path>', file_name <with path>)
except:
continue