Home > Mobile >  json_normalize not returning expected columns
json_normalize not returning expected columns

Time:03-17

I'm trying to parse the json response from an api and I want these field: 'id', 'label', 'degree', 'pep', 'sanctioned', 'countries', 'addresses', 'relationship_count'. I figured that, while inelegant, it would be easy to call pd.json_normalize(entity.json(), 'data') on the data and then select the columns I want. But I'm not getting all the fields under data. Here's the columns I'm getting:

Index(['id', 'label', 'degree', 'pep', 'sanctioned', 'psa_count', 'type',
       'entity_url', 'identifiers', 'countries', 'addresses',
       'source_count.7a92887d4f18fc21abe0d658b25364e7.count',
       'source_count.7a92887d4f18fc21abe0d658b25364e7.label', 'matches.name',
       'matches.address',
       'source_count.54243e61aaa4ce9289f34558f67d2e40.count',
       'source_count.54243e61aaa4ce9289f34558f67d2e40.label',
       'matches.business_purpose',
       'source_count.82ca2242478ec8330c861c6a3acd7ed1.count',
       'source_count.82ca2242478ec8330c861c6a3acd7ed1.label',
       'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.count',
       'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.label'],
      dtype='object')

And here's what the json/dictionary looks like:

{'offset': 0,
 'limit': 100,
 'next': False,
 'size': {'count': 5, 'qualifier': 'eq'},
 'data': 
 [{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
   'label': 'CPV MANUFACTURING, INC.',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
   'identifiers': [{'value': '002302313',
     'type': 'duns_number',
     'label': 'Duns Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
    '851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
    '851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
   'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
     'label': 'USA USASpending.gov Profiles Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
     '851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
  {'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
   'label': 'ADMIRAL VALVE, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
   'identifiers': [],
   'countries': ['USA'],
   'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
   'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
     'label': 'USA Paycheck Protection Program (PPP) $150k  Loan Recipients Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
    'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
  {'id': '7f--6WFMJkzZTdjiaX4hTQ',
   'label': 'ADMIRAL VALVE, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
   'identifiers': [{'value': '079228019',
     'type': 'duns_number',
     'label': 'Duns Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
    '503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
   'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
     'label': 'USA USASpending.gov Profiles Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
  {'id': 'bCYPMeeymyiveMS0EW700g',
   'label': 'Admiral Valve, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 0,
   'type': 'company',
   'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
   'identifiers': [{'value': '4238068',
     'type': 'usa_pa_corporate_registry_id',
     'label': 'Usa Pa Corporate Registry Id'}],
   'countries': ['USA'],
   'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
   'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
     'label': 'USA Pennsylvania Secretary of State'}},
   'relationship_count': {},
   'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
     'Admiral <em>Valve</em>, <em>LLC</em>'],
    'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
  {'id': '_9oASm59LBr-iEs4gHj_lQ',
   'label': 'ADMIRAL VALVE LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
   'identifiers': [{'value': '901034776',
     'type': 'usa_fei_number',
     'label': 'Usa Fei Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
   'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
     'label': 'USA Department of Labor Form 5500 Filings Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
     'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
    'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}

I feel like some combination of tweaking the record_path or the meta argument should get me there but I haven't figured it out.

Thanks in advance!

CodePudding user response:

Possible solution is the following:

import pandas as pd

data = {'offset': 0,
 'limit': 100,
 'next': False,
 'size': {'count': 5, 'qualifier': 'eq'},
 'data':
 [{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
   'label': 'CPV MANUFACTURING, INC.',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
   'identifiers': [{'value': '002302313',
     'type': 'duns_number',
     'label': 'Duns Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
    '851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
    '851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
   'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
     'label': 'USA USASpending.gov Profiles Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
     '851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
  {'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
   'label': 'ADMIRAL VALVE, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
   'identifiers': [],
   'countries': ['USA'],
   'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
   'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
     'label': 'USA Paycheck Protection Program (PPP) $150k  Loan Recipients Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
    'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
  {'id': '7f--6WFMJkzZTdjiaX4hTQ',
   'label': 'ADMIRAL VALVE, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
   'identifiers': [{'value': '079228019',
     'type': 'duns_number',
     'label': 'Duns Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
    '503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
   'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
     'label': 'USA USASpending.gov Profiles Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
     'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
    'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
     '503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
  {'id': 'bCYPMeeymyiveMS0EW700g',
   'label': 'Admiral Valve, LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 0,
   'type': 'company',
   'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
   'identifiers': [{'value': '4238068',
     'type': 'usa_pa_corporate_registry_id',
     'label': 'Usa Pa Corporate Registry Id'}],
   'countries': ['USA'],
   'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
   'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
     'label': 'USA Pennsylvania Secretary of State'}},
   'relationship_count': {},
   'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
     'Admiral <em>Valve</em>, <em>LLC</em>'],
    'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
  {'id': '_9oASm59LBr-iEs4gHj_lQ',
   'label': 'ADMIRAL VALVE LLC',
   'degree': 0,
   'pep': False,
   'sanctioned': False,
   'psa_count': 4,
   'type': 'company',
   'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
   'identifiers': [{'value': '901034776',
     'type': 'usa_fei_number',
     'label': 'Usa Fei Number'}],
   'countries': ['USA'],
   'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
   'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
     'label': 'USA Department of Labor Form 5500 Filings Database'}},
   'relationship_count': {},
   'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
     'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
    'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}


df = pd.json_normalize(data['data'])

df

Returns

enter image description here

CodePudding user response:

Try:

data = pd.json_normalize(json_data, 'data')
data = data.drop(columns=['identifiers']).join(pd.json_normalize(data['identifiers'].explode()), rsuffix='_identifiers')
  • Related