Home > front end >  openpyxl module (Python): how can I get the coordinates of a cell when I know the string value of th
openpyxl module (Python): how can I get the coordinates of a cell when I know the string value of th

Time:03-25

I have been working on a project that automates some Excel calculations. Somewhere in my code (I cannot share it because of confidentiality) I need to retrieve the coordinates of cells from which I know the values.

I know how to find the values, when I have the coordinates, but how does the other way round work?

CodePudding user response:

For example... A1 = 5 , B1 = 6

Your code should be,

from openpyxl import Workbook path = "C:\Users\Admin\Desktop\demo.xlsx"

workbook object is created

wb_obj = openpyxl.load_workbook(path)

sheet_obj = wb_obj.active

sheet_obj.cell(row = 2, column = 2) = sheet_obj.cell(row = 1, column = 1) sheet_obj.cell(row = 1, column = 2)

x = sheet_obj.cell(row = 2, column = 2)

print(x.value) #that can be access wb_obj.save('logo.xlsx')

Now you can accesses the value it's in the x

CodePudding user response:

Following is my solution (works for string search values)

Contents of my file ('Book1.xlsx'): Image of my data in the file

###############################################################
# Import openpyxl
# Note: openpyxl package provides both read and write capabilities to excel
import openpyxl
from openpyxl.utils import get_column_letter



# Class definitions should use CamelCase convention based on pep-8 guidelines
class CustomOpenpyxl:

    # Initialize the class with filename as only argument
    def __init__(self, _my_file_name):
        assert _my_file_name.split('.')[-1] == 'xlsx', 'Input file is not xlsx'
        self.my_filename = _my_file_name
        self.my_base_wb = openpyxl.load_workbook(self.my_filename, read_only=False)
        # following line will get the names of worksheets in the workbook
        self.ws_names_in_my_base_wb = self.my_base_wb.sheetnames
        # following line will get the number of worksheets in the workbook
        self.num_ws_in_my_base_wb = len(self.my_base_wb.sheetnames)
        # following line will set the last worksheet in the workbook as active
        self.my_base_active_ws = self.my_base_wb.active
        # following line will get the maximum row number in active worksheet
        self.my_base_active_ws_max_row = self.my_base_wb.active.max_row
        # following line will get the maximum column number in active worksheet
        self.my_base_active_ws_max_col = self.my_base_active_ws.max_column
        # following line will get the column names (as list) in active worksheet
        self.my_base_active_ws_col_titles = [_col_name[0].value for _col_name in self.my_base_active_ws.iter_cols()]

    # Method to get values for specific row in a given worksheet
    # Argument to this method is: - Row number of values to be fetched
    def get_specific_row_val_as_list_in_active_ws(self, _val_row_num):
        for _col in self.my_base_active_ws.iter_cols(min_col=1, max_col=1):
            # Iterate once for the specific row number in active worksheet
            for _row in self.my_base_active_ws.iter_rows(min_row=_val_row_num, max_row=_val_row_num):
                # Return a list of values
                return [_cell.value for _cell in _row]
    
    # Method to get cell coordinate by a search value
    # Argument to this method is:- search string
    # Assumption cell value is unique
    def get_cell_coordinate_by_value(self, _search_val):
        # List comprehension to get the row index based on the sarch value
        _row_processor = [_row_idx for _row_idx, _main_rec in enumerate(self.my_base_active_ws.values, start=1) if _search_val in _main_rec]
        # retrun type is a list, hence following line to assign it to variable and manage the data type later
        _row_idx = _row_processor[-1]
        # Get the value of the entire row and fetch the column index
        _col_processor = [_col_idx for _col_idx, _val in enumerate(self.get_specific_row_val_as_list_in_active_ws(int(_row_idx)), start=1) if _val == _search_val]
        # retrun type is a list, hence following line to assign it to variable and manage the data type later
        _col_idx = _col_processor[-1]
        # get the column letter
        _col_letter = get_column_letter(int(_col_idx))
        # string concatenation to join column letter and row index
        _cell_address = _col_letter str(_row_idx)
        return (_cell_address)
        
########################################################################################################################################################################

from Automation._Ashish_Samarth_Custom_Classes._under_progress_custom_xl_openpyxl_pkg import CustomOpenpyxl

# Create a new sub class and inherit methods from custom_pandas and CustomOpenpyxl
class SO( CustomOpenpyxl):
    # Initialize this class with the filename in question
    def __init__(self, _my_file_path, _my_file_name):
        # This is an important and needed step
        # Initialize the inherited class with the arguments needed for that class
        CustomOpenpyxl.__init__(self, _my_file_name)
        

_my_file_name = 'Book1.xlsx'

# Instantiate an object using the newly created class in this code block
# So that this object gets access to all methods in the class
_my_src_obj = SO(_my_file_path, _my_file_name)
print(_my_src_obj.get_cell_coordinate_by_value('BAGS'))
print(_my_src_obj.get_cell_coordinate_by_value(1000))

####################################################################PRINT Result################################################################################

B2
C4

Process finished with exit code 0
  • Related