I am trying to parse a pdf file. I want to get all the values in a list or dictionary of the checkbox values. But I am getting this error.
"return OrderedDict((k, v.get('/V', '')) for k, v in fields.items()) AttributeError: 'NoneType' object has no attribute 'items'"
The code I am trying is this
from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader
def _getFields(obj, tree=None, retval=None, fileobj=None):
fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
'/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
if retval is None:
retval = OrderedDict()
catalog = obj.trailer["/Root"]
# get the AcroForm tree
if "/AcroForm" in catalog:
tree = catalog["/AcroForm"]
else:
return None
if tree is None:
return retval
obj._checkKids(tree, retval, fileobj)
for attr in fieldAttributes:
if attr in tree:
# Tree is a field
obj._buildField(tree, retval, fileobj, fieldAttributes)
break
if "/Fields" in tree:
fields = tree["/Fields"]
for f in fields:
field = f.getObject()
obj._buildField(field, retval, fileobj, fieldAttributes)
return retval
def get_form_fields(infile):
infile = PdfFileReader(open(infile, 'rb'))
fields = _getFields(infile)
return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())
if __name__ == '__main__':
from pprint import pprint
pdf_file_name = 'Guild.pdf'
pprint(get_form_fields(pdf_file_name))
CodePudding user response:
After tracing through your code, on the 10th line it seems that catalog
stores the value {'/Metadata': IndirectObject(16, 0), '/Pages': IndirectObject(1, 0), '/Type': '/Catalog'}
, meaning /AcroForm
is not a key in the dictionary and your function returns None
.
CodePudding user response:
Your _getFields
explicitly returns None
from first if
block.
So basically that's where you could get this error from.