Getting none from fields while parsing a pdf file-CodePudding

I am trying to parse a pdf file. I want to get all the values in a list or dictionary of the checkbox values. But I am getting this error.

"return OrderedDict((k, v.get('/V', '')) for k, v in fields.items()) AttributeError: 'NoneType' object has no attribute 'items'"

The code I am trying is this

from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader

def _getFields(obj, tree=None, retval=None, fileobj=None):
    
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval

def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

if __name__ == '__main__':
    from pprint import pprint

    pdf_file_name = 'Guild.pdf'

    pprint(get_form_fields(pdf_file_name))

CodePudding user response：

After tracing through your code, on the 10th line it seems that catalog stores the value {'/Metadata': IndirectObject(16, 0), '/Pages': IndirectObject(1, 0), '/Type': '/Catalog'}, meaning /AcroForm is not a key in the dictionary and your function returns None.

CodePudding user response：

Your _getFields explicitly returns None from first if block. So basically that's where you could get this error from.