this is the part of my code to retrive data from canadian stat data according to the chanpter and save them in the excel form then open these excel form and change them and save it in the text form.
the problem for me is that in the first column I want to eleminate - from it. I tried alot but it does'nt work at all.
# Commodity: Specific Chapter
ch_numbers = ["02","04","10","11","12","15","20","23","38"]
startrow = 0
for ch_number in ch_numbers:
dropdown_rC = driver.find_element_by_id('report_chapters')
dd_rC = Select(dropdown_rC)
dd_rC.select_by_value(ch_number)
# for retriving the data and store it in the excel form
driver.find_element(By.XPATH,'//*[@id="report"]/div[1]/div[3]/div[5]/p[2]/button').click()
time.sleep(2)
Canada_Result=[]
pages = len(driver.find_elements_by_xpath('//a[@ or @ and @title]'))
pages = driver.find_element_by_xpath('//a[@onclick and @ or @ and @title][%d]' % (pages)).text.strip("Page\n")
if pages == '':
pages = 1
for J in range (int(pages)):
year_month = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[1]') # the year details in each row //*[@id="report_table"]/tbody/tr[1]/td[1]
commodities = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[2]/a')
Countries = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[4]')
quantities = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[7]')
weights = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[8]/abbr')
time.sleep(1)
period_entries = len(commodities)
for i in range(period_entries):
temporary_data= {'year':year_month[i].text,'Commodity':commodities[i].text,'Country':Countries[i].text,'quantity':quantities[i].text,'weight': weights[i].text }
Canada_Result.append(temporary_data)
df_data = pd.DataFrame(Canada_Result)
df_data.to_excel('Canada_scrapping_result_' str(ch_number) '.xlsx', index=False, header=0)
if J == int(pages) - 1:
print("Done")
break
# click on the Next button
driver.find_element(By.XPATH,'//*[@id="report_results_next"]').click()
time.sleep(1)
# ----------for convert the data in excel and save it again---------------
df = pd.read_excel('Canada_scrapping_result_' str(ch_number) '.xlsx')
df.to_csv('Remove_characters_' str(ch_number) '.txt', index=False, header=0)
time.sleep(2)
# Click Submit
driver.find_element(By.XPATH,'//*[@id="report"]/div[1]/div[3]/div[5]/p[2]/button').click() # For going to the next page
time.sleep(2)
CodePudding user response:
If they are string values, you can use replace
in list comprehension for each row:
#### Mock dataset
data = [["2022-01-01" , "0207.14.93.00" , "United States" , 2000 , "KGM"],
["2022-01-01" , "0407.14.03.00" , "United States" , 3400, "KGM"]]
df = pd.DataFrame(data, columns = ["Date", "Num1", "Country", "Num1", "Col"])
#############
df.Date = [x.replace("-", "") for x in df.Date]
df
Output:
Date Num1 Country Num1 Col
0 20220101 0207.14.93.00 United States 2000 KGM
1 20220101 0407.14.03.00 United States 3400 KGM
If they are loaded in as Timestamps, then try this:
df.Date = df.Date.dt.strftime('%Y%m%d')
CodePudding user response:
Use the pandas .str.replace()
method.
df['year'] = df['year'].str.replace('-', '')