I'm trying to parse the json response from an api and I want these field: 'id', 'label', 'degree', 'pep', 'sanctioned', 'countries', 'addresses', 'relationship_count'. I figured that, while inelegant, it would be easy to call pd.json_normalize(entity.json(), 'data')
on the data and then select the columns I want. But I'm not getting all the fields under data. Here's the columns I'm getting:
Index(['id', 'label', 'degree', 'pep', 'sanctioned', 'psa_count', 'type',
'entity_url', 'identifiers', 'countries', 'addresses',
'source_count.7a92887d4f18fc21abe0d658b25364e7.count',
'source_count.7a92887d4f18fc21abe0d658b25364e7.label', 'matches.name',
'matches.address',
'source_count.54243e61aaa4ce9289f34558f67d2e40.count',
'source_count.54243e61aaa4ce9289f34558f67d2e40.label',
'matches.business_purpose',
'source_count.82ca2242478ec8330c861c6a3acd7ed1.count',
'source_count.82ca2242478ec8330c861c6a3acd7ed1.label',
'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.count',
'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.label'],
dtype='object')
And here's what the json/dictionary looks like:
{'offset': 0,
'limit': 100,
'next': False,
'size': {'count': 5, 'qualifier': 'eq'},
'data':
[{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
'label': 'CPV MANUFACTURING, INC.',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
'identifiers': [{'value': '002302313',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
{'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
'identifiers': [],
'countries': ['USA'],
'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
'label': 'USA Paycheck Protection Program (PPP) $150k Loan Recipients Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
{'id': '7f--6WFMJkzZTdjiaX4hTQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
'identifiers': [{'value': '079228019',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
{'id': 'bCYPMeeymyiveMS0EW700g',
'label': 'Admiral Valve, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 0,
'type': 'company',
'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
'identifiers': [{'value': '4238068',
'type': 'usa_pa_corporate_registry_id',
'label': 'Usa Pa Corporate Registry Id'}],
'countries': ['USA'],
'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
'label': 'USA Pennsylvania Secretary of State'}},
'relationship_count': {},
'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
'Admiral <em>Valve</em>, <em>LLC</em>'],
'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
{'id': '_9oASm59LBr-iEs4gHj_lQ',
'label': 'ADMIRAL VALVE LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
'identifiers': [{'value': '901034776',
'type': 'usa_fei_number',
'label': 'Usa Fei Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
'label': 'USA Department of Labor Form 5500 Filings Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}
I feel like some combination of tweaking the record_path
or the meta
argument should get me there but I haven't figured it out.
Thanks in advance!
CodePudding user response:
Possible solution is the following:
import pandas as pd
data = {'offset': 0,
'limit': 100,
'next': False,
'size': {'count': 5, 'qualifier': 'eq'},
'data':
[{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
'label': 'CPV MANUFACTURING, INC.',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
'identifiers': [{'value': '002302313',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
{'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
'identifiers': [],
'countries': ['USA'],
'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
'label': 'USA Paycheck Protection Program (PPP) $150k Loan Recipients Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
{'id': '7f--6WFMJkzZTdjiaX4hTQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
'identifiers': [{'value': '079228019',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
{'id': 'bCYPMeeymyiveMS0EW700g',
'label': 'Admiral Valve, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 0,
'type': 'company',
'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
'identifiers': [{'value': '4238068',
'type': 'usa_pa_corporate_registry_id',
'label': 'Usa Pa Corporate Registry Id'}],
'countries': ['USA'],
'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
'label': 'USA Pennsylvania Secretary of State'}},
'relationship_count': {},
'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
'Admiral <em>Valve</em>, <em>LLC</em>'],
'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
{'id': '_9oASm59LBr-iEs4gHj_lQ',
'label': 'ADMIRAL VALVE LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
'identifiers': [{'value': '901034776',
'type': 'usa_fei_number',
'label': 'Usa Fei Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
'label': 'USA Department of Labor Form 5500 Filings Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}
df = pd.json_normalize(data['data'])
df
Returns
CodePudding user response:
Try:
data = pd.json_normalize(json_data, 'data')
data = data.drop(columns=['identifiers']).join(pd.json_normalize(data['identifiers'].explode()), rsuffix='_identifiers')