Home > front end >  Java SaxParser XML
Java SaxParser XML

Time:09-15

this is my first question here. do not judge strictly:) I'm parsing a file with the dim extension of the xml format, in fact, so I chose SAXparser. the problem is probably architectural or I don’t know how to call it correctly. in general, if you describe: there is a list of tags that I need to pull out and, in accordance with them, create objects, assign values ​​​​to their fields, which the parser will pull out as strings from certain tags. in accordance with the tags I need, I have implemented classes with which I need to create objects in the startElement method, after that in endElement I need to assign the appropriate strings to this object. The crux of the problem is that I can't figure out how I can avoid a lot of code in the SaxParserHandler class, including a lot of if/else in the startElement and endElement methods. tried to use enum and factory pattern, but all in vain. example of my current code in SaxParserHandler:

public class SaxParserHandler extends DefaultHandler {

private Dataset_Id dataset_id = null;

private StringBuilder data = null;

private Dataset_Frame dataset_frame = null;

private String MetadataId = "Metadata_Id";
private String dataset_name_tag = "DATASET_NAME";
private String dataset_frame_tag = "Dataset_Frame";

private Vertex vertex = null;
private List<Vertex> vertices = null;

private String vertex_tag = "Vertex";
private String FRAME_LON = "FRAME_LON";
private String FRAME_LAT = "FRAME_LAT";
private String FRAME_X = "FRAME_X";
private String FRAME_Y = "FRAME_Y";
private String FRAME_ROW = "FRAME_ROW";
private String FRAME_COL = "FRAME_COL";

private Source_Information source_information = null;
private Scene_Source scene_source = null;

private String source_info_tag = "Source_Information";
private String source_id_tag = "SOURCE_ID";
private String scene_source_tag = "Scene_Source";
private String imaging_date_tag = "IMAGING_DATE";
private String imaging_time_tag = "IMAGING_TIME";
private String mission_tag = "MISSION";
private String mission_index_tag = "MISSION_INDEX";
private String instrument_tag = "INSTRUMENT";
private String satellite_incidence_angle_tag = "SATELLITE_INCIDENCE_ANGLE";
private String viewing_angle_tag = "VIEWING_ANGLE";
private String sun_azimuth_tag = "SUN_AZIMUTH";
private String sun_elevation_tag = "SUN_ELEVATION";
private String theoretical_resolution_tag = "THEORETICAL_RESOLUTION";

private Coordinate_Reference_System coordinate_reference_system = null;
private Horizontal_CS horizontal_cs = null;

private String Coordinate_Reference_System_Tag = "Coordinate_Reference_System";
private String Horizontal_CS_Tag = "Horizontal_CS";
private String HORIZONTAL_CS_CODE_TAG = "HORIZONTAL_CS_CODE";
private String HORIZONTAL_CS_TYPE_TAG = "HORIZONTAL_CS_TYPE";
private String HORIZONTAL_CS_NAME_TAG = "HORIZONTAL_CS_NAME";

private Production production = null;
private Production_Facility production_facility = null;

private String Production_Tag = "Production";
private String DATASET_PRODUCTION_DATE_TAG = "DATASET_PRODUCTION_DATE";
private String PRODUCT_TYPE_TAG = "PRODUCT_TYPE";
private String PRODUCT_INFO_TAG = "PRODUCT_INFO";
private String JOB_ID_TAG = "JOB_ID";
private String Production_Facility_Tag = "Production_Facility";
private String SOFTWARE_NAME_TAG = "SOFTWARE_NAME";
private String SOFTWARE_VERSION_TAG = "SOFTWARE_VERSION";
private String PROCESSING_CENTER_TAG = "PROCESSING_CENTER";

private Raster_Encoding raster_encoding = null;

private String Raster_Encoding_Tag = "Raster_Encoding";
private String DATA_TYPE_TAG = "DATA_TYPE";
private String NBITS_TAG = "NBITS";
private String BYTEORDER_TAG = "BYTEORDER";
private String BANDS_LAYOUT_TAG = "BANDS_LAYOUT";

private Data_Processing data_processing = null;

private String Data_Processing_Tag = "Data_Processing";
private String PROCESSING_LEVEL_TAG = "PROCESSING_LEVEL";
private String GEOMETRIC_PROCESSING_TAG = "GEOMETRIC_PROCESSING";
private String RADIOMETRIC_PROCESSING_TAG = "RADIOMETRIC_PROCESSING";

private Data_Access data_access = null;
private Data_File_List data_file_list = null;

private String Data_Access_Tag = "Data_Access";
private String DATA_FILE_FORMAT_TAG = "DATA_FILE_FORMAT";
private String DATA_FILE_ORGANISATION_TAG = "DATA_FILE_ORGANISATION";
private String Data_File_List_Tag = "Data_File_List";
private String DATA_FILE_PATH_TAG = "DATA_FILE_PATH";

private Image_Display image_display = null;
private Band_Display_Order band_display_order = null;

private String Image_Display_Tag = "Image_Display";
private String Band_Display_Order_Tag = "Band_Display_Order";
private String RED_CHANNEL_TAG = "RED_CHANNEL";
private String GREEN_CHANNEL_TAG = "GREEN_CHANNEL";
private String BLUE_CHANNEL_TAG = "BLUE_CHANNEL";

private Data_Strip data_strip = null;

private Data_Strip_Identification data_strip_identification = null;
private Time_Stamp time_stamp = null;
private Ephemeris ephemeris = null;

private String Data_Strip_Tag = "Data_Strip";

private String Data_Strip_Identification_Tag = "Data_Strip_Identification";
private String DATA_STRIP_ID_TAG = "DATA_STRIP_ID";
private String SEGMENT_ID_TAG = "SEGMENT_ID";

private String Time_Stamp_Tag = "Time_Stamp";

private String REFERENCE_BAND_TAG = "REFERENCE_BAND";
private String REFERENCE_TIME_TAG = "REFERENCE_TIME";
private String REFERENCE_LINE_TAG = "REFERENCE_LINE";
private String LINE_PERIOD_TAG = "LINE_PERIOD";

private String Ephemeris_Tag = "Ephemeris";

private String SATELLITE_ALTITUDE_TAG = "SATELLITE_ALTITUDE";


@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
    
    if (qName.equalsIgnoreCase(dataset_name_tag))
            dataset_id = new Dataset_Id();
        else if (qName.equalsIgnoreCase(dataset_frame_tag)) {
            dataset_frame = new Dataset_Frame();
        } else if (qName.equals(vertex_tag)) {
            vertex = new Vertex();
            if (vertices == null)
                vertices = new ArrayList<>();
        } else if (qName.equalsIgnoreCase(source_info_tag)) {
            source_information = new Source_Information();
        } else if (qName.equals(scene_source_tag)) {
            scene_source = new Scene_Source();
        } else if (qName.equals(Coordinate_Reference_System_Tag)) {
            coordinate_reference_system = new Coordinate_Reference_System();
        } else if (qName.equals(Horizontal_CS_Tag)) {
            horizontal_cs = new Horizontal_CS();
        } else if (qName.equals(Production_Tag)) {
            production = new Production();
        } else if (qName.equals(Production_Facility_Tag)) {
            production_facility = new Production_Facility();
        } else if (qName.equals(Raster_Encoding_Tag)) {
            raster_encoding = new Raster_Encoding();
        } else if (qName.equals(Data_Processing_Tag)) {
            data_processing = new Data_Processing();
        } else if (qName.equals(Data_Access_Tag)) {
            data_access = new Data_Access();
        } else if (qName.equals(Data_File_List_Tag)) {
            data_file_list = new Data_File_List();
        }else if (qName.equals(Image_Display_Tag)) {
            image_display = new Image_Display();
        }else if (qName.equals(Band_Display_Order_Tag)) {
            band_display_order = new Band_Display_Order();
        }else if (qName.equals(Data_Strip_Tag)){
            data_strip = new Data_Strip();
        }else if (qName.equals(Data_Strip_Identification_Tag)) {
            data_strip_identification = new Data_Strip_Identification();
        }else if (qName.equals(Time_Stamp_Tag)) {
            time_stamp = new Time_Stamp();
        }else if (qName.equals(Ephemeris_Tag)) {
            ephemeris = new Ephemeris();
        }



    data = new StringBuilder();
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
    if (qName.equals(dataset_name_tag)) {
        dataset_id.setDataset_name(data.toString());
    } else if (qName.equals(FRAME_LON)) {
        vertex.setFRAME_LON(data.toString());
    } else if (qName.equals(FRAME_LAT)) {
        vertex.setFRAME_LAT(data.toString());
    } else if (qName.equals(FRAME_X)) {
        vertex.setFRAME_X(data.toString());
    } else if (qName.equals(FRAME_Y)) {
        vertex.setFRAME_Y(data.toString());
    } else if (qName.equals(FRAME_ROW)) {
        vertex.setFRAME_ROW(data.toString());
    } else if (qName.equals(FRAME_COL)) {
        vertex.setFRAME_COL(data.toString());
    } else if (qName.equals(vertex_tag)) {
        vertices.add(vertex);
    } else if (qName.equals(dataset_frame_tag)) {
        dataset_frame.setDataset_Frame(vertices);
    } else if (qName.equals(source_id_tag)) {
        source_information.setSource_id(data.toString());
    } else if (qName.equals(imaging_date_tag)) {
        scene_source.setIMAGING_DATE(data.toString());
    } else if (qName.equals(imaging_time_tag)) {
        scene_source.setIMAGING_TIME(data.toString());
    } else if (qName.equals(mission_tag)) {
        scene_source.setMISSION(data.toString());
    } else if (qName.equals(mission_index_tag)) {
        scene_source.setMISSION_INDEX(data.toString());
    } else if (qName.equals(instrument_tag)) {
        scene_source.setINSTRUMENT(data.toString());
    } else if (qName.equals(satellite_incidence_angle_tag)) {
        scene_source.setSATELLITE_INCIDENCE_ANGLE(data.toString());
    } else if (qName.equals(viewing_angle_tag)) {
        scene_source.setVIEWING_ANGLE(data.toString());
    } else if (qName.equals(sun_azimuth_tag)) {
        scene_source.setSUN_AZIMUTH(data.toString());
    } else if (qName.equals(sun_elevation_tag)) {
        scene_source.setSUN_ELEVATION(data.toString());
    } else if (qName.equals(theoretical_resolution_tag)) {
        scene_source.setTHEORETICAL_RESOLUTION(data.toString());
    } else if (qName.equals(source_info_tag)) {
        source_information.setScene_source(scene_source);
    } else if (qName.equals(HORIZONTAL_CS_TYPE_TAG)) {
        horizontal_cs.setHORIZONTAL_CS_TYPE(data.toString());
    } else if (qName.equals(HORIZONTAL_CS_NAME_TAG)) {
        horizontal_cs.setHORIZONTAL_CS_NAME(data.toString());
    } else if (qName.equals(HORIZONTAL_CS_CODE_TAG)) {
        horizontal_cs.setHORIZONTAL_CS_CODE(data.toString());
    } else if (qName.equals(Horizontal_CS_Tag)) {
        coordinate_reference_system.setHorizontal_cs(horizontal_cs);
    } else if (qName.equals(DATASET_PRODUCTION_DATE_TAG)) {
        production.setDATASET_PRODUCTION_DATE(data.toString());
    } else if (qName.equals(PRODUCT_TYPE_TAG)) {
        production.setPRODUCT_TYPE(data.toString());
    } else if (qName.equals(PRODUCT_INFO_TAG)) {
        production.setPRODUCT_INFO(data.toString());
    } else if (qName.equals(JOB_ID_TAG)) {
        production.setJOB_ID(data.toString());
    } else if (qName.equals(SOFTWARE_NAME_TAG)) {
        production_facility.setSOFTWARE_NAME(data.toString());
    } else if (qName.equals(SOFTWARE_VERSION_TAG)) {
        production_facility.setSOFTWARE_VERSION(data.toString());
    } else if (qName.equals(PROCESSING_CENTER_TAG)) {
        production_facility.setPROCESSING_CENTER(data.toString());
    } else if (qName.equals(Production_Tag)) {
        production.setProduction_facility(production_facility);
    } else if (qName.equals(DATA_TYPE_TAG)) {
        raster_encoding.setDATA_TYPE(data.toString());
    } else if (qName.equals(NBITS_TAG)) {
        raster_encoding.setNBITS(data.toString());
    } else if (qName.equals(BYTEORDER_TAG)) {
        raster_encoding.setBYTEORDER(data.toString());
    } else if (qName.equals(BANDS_LAYOUT_TAG)) {
        raster_encoding.setBANDS_LAYOUT(data.toString());
    } else if (qName.equals(PROCESSING_LEVEL_TAG)) {
        data_processing.setPROCESSING_LEVEL(data.toString());
    } else if (qName.equals(GEOMETRIC_PROCESSING_TAG)) {
        data_processing.setGEOMETRIC_PROCESSING(data.toString());
    } else if (qName.equals(RADIOMETRIC_PROCESSING_TAG)) {
        data_processing.setRADIOMETRIC_PROCESSING(data.toString());
    } else if (qName.equals(DATA_FILE_FORMAT_TAG)) {
        data_access.setDATA_FILE_FORMAT(data.toString());
    } else if (qName.equals(DATA_FILE_ORGANISATION_TAG)) {
        data_access.setDATA_FILE_ORGANISATION(data.toString());
    } else if (qName.equals(DATA_FILE_PATH_TAG)) {
        data_file_list.setDATA_FILE_PATH(data.toString());
    } else if (qName.equals(Data_File_List_Tag)) {
        data_access.setData_file_lists(data_file_list);
    }else if (qName.equals(RED_CHANNEL_TAG)) {
        band_display_order.setRED_CHANNEL(data.toString());
    }else if (qName.equals(GREEN_CHANNEL_TAG)) {
        band_display_order.setGREEN_CHANNEL(data.toString());
    }else if (qName.equals(BLUE_CHANNEL_TAG)) {
        band_display_order.setBLUE_CHANNEL(data.toString());
    }else if (qName.equals(Band_Display_Order_Tag)) {
        image_display.setBand_display_orders(band_display_order);
    }else if (qName.equals(DATA_STRIP_ID_TAG)){
        data_strip_identification.setDATA_STRIP_ID(data.toString());
    }else if (qName.equals(SEGMENT_ID_TAG)) {
        data_strip_identification.setSEGMENT_ID(data.toString());
    }else if (qName.equals(Data_Strip_Identification_Tag)) {
        data_strip.setData_strip_identification(data_strip_identification);
    }else if (qName.equals(REFERENCE_BAND_TAG)) {
        time_stamp.setREFERENCE_BAND(data.toString());
    }else if (qName.equals(REFERENCE_TIME_TAG)){
        time_stamp.setREFERENCE_TIME(data.toString());
    }else if (qName.equals(REFERENCE_LINE_TAG)) {
        time_stamp.setREFERENCE_LINE(data.toString());
    }else if (qName.equals(LINE_PERIOD_TAG)) {
        time_stamp.setLINE_PERIOD(data.toString());
    }else if (qName.equals(Time_Stamp_Tag)) {
        data_strip.setTime_stamp(time_stamp);
    }else if (qName.equals(SATELLITE_ALTITUDE_TAG)) {
        ephemeris.setSATELLITE_ALTITUDE(data.toString());
    }else if (qName.equals(Ephemeris_Tag)) {
        data_strip.setEphemerises(ephemeris);
    }
}

@Override
public void characters(char[] ch, int start, int length) throws SAXException {

    data.append(new String(ch, start, length));

}

Root root = new Root();

public Root getRoot() {
    root.setDataset_id(dataset_id);
    root.setDataset_frame(dataset_frame);
    root.setSource_information(source_information);
    root.setCoordinate_reference_systems(coordinate_reference_system);
    root.setProductions(production);
    root.setRaster_encodings(raster_encoding);
    root.setData_processings(data_processing);
    root.setData_accesses(data_access);
    root.setImage_displays(image_display);
    root.setData_strips(data_strip);
    return root;
}

}

part of dim file sample

<?xml version="1.0" encoding="ISO-8859-1"?>
<Dimap_Document xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <Metadata_Id>
        <METADATA_FORMAT version="1.1">DIMAP</METADATA_FORMAT>
    </Metadata_Id>
    <Dataset_Id>
        <DATASET_NAME>KM000604MI_017_MUL_L1G</DATASET_NAME>
        <DATASET_TN_PATH href="KM000604MI_017_MUL_L1G_tn.jpg"/>
        <DATASET_QL_PATH href="KM000604MI_017_MUL_L1G_ql.jpg"/>
    </Dataset_Id>
    <Production>
        <DATASET_PRODUCER_NAME/>
        <DATASET_PRODUCTION_DATE>2015-09-28</DATASET_PRODUCTION_DATE>
        <PRODUCT_TYPE/>
    </Production>
    <Dataset_Use>
        <DATASET_CONTENT/>
    </Dataset_Use>
    <Data_Processing>
        <GEOMETRIC_PROCESSING/>
        <Processing_Parameter>
            <PROC_PARAMETER_DESC>SOFTWARE</PROC_PARAMETER_DESC>
            <PROC_PARAMETER_VALUE>Keystone 3.8.9.FINAL.</PROC_PARAMETER_VALUE>
        </Processing_Parameter>
    </Data_Processing>
    <Coordinate_Reference_System>
        <GEO_TABLES>EPSG</GEO_TABLES>
        <Horizontal_CS>
            <HORIZONTAL_CS_CODE>EPSG:32642</HORIZONTAL_CS_CODE>
            <HORIZONTAL_CS_TYPE>PROJECTED</HORIZONTAL_CS_TYPE>
            <HORIZONTAL_CS_NAME>WGS 84 / UTM zone 42N</HORIZONTAL_CS_NAME>
            <Coordinate_Axis>
                <AXIS1_NAME>Easting</AXIS1_NAME>
                <AXIS2_NAME>Northing</AXIS2_NAME>
                <AXIS1_ORIENTATION>EAST</AXIS1_ORIENTATION>
                <AXIS2_ORIENTATION>NORTH</AXIS2_ORIENTATION>
            </Coordinate_Axis>
            <Projection>
                <PROJECTION_NAME>UTM zone 42N</PROJECTION_NAME>
                <PROJECTION_CODE>EPSG:16042</PROJECTION_CODE>
                <Projection_CT_Method>
                    <PROJECTION_CT_NAME>Transverse Mercator</PROJECTION_CT_NAME>
                    <PROJECTION_CT_CODE>EPSG:9807</PROJECTION_CT_CODE>
                    <Projection_Parameters>
                        <Projection_Parameter>
                            <PROJECTION_PARAMETER_NAME>Latitude_of_natural_origin</PROJECTION_PARAMETER_NAME>
                            <PROJECTION_PARAMETER_VALUE unit="DEG">0.0</PROJECTION_PARAMETER_VALUE>
                        </Projection_Parameter>
                        <Projection_Parameter>
                            <PROJECTION_PARAMETER_NAME>Longitude_of_natural_origin</PROJECTION_PARAMETER_NAME>
                            <PROJECTION_PARAMETER_VALUE unit="DEG">69.0</PROJECTION_PARAMETER_VALUE>
                        </Projection_Parameter>
                        <Projection_Parameter>
                            <PROJECTION_PARAMETER_NAME>Scale_factor_at_natural_origin</PROJECTION_PARAMETER_NAME>
                            <PROJECTION_PARAMETER_VALUE>0.9996</PROJECTION_PARAMETER_VALUE>
                        </Projection_Parameter>
                        <Projection_Parameter>
                            <PROJECTION_PARAMETER_NAME>False_easting</PROJECTION_PARAMETER_NAME>
                            <PROJECTION_PARAMETER_VALUE unit="M">500000.0</PROJECTION_PARAMETER_VALUE>
                        </Projection_Parameter>
                        <Projection_Parameter>
                            <PROJECTION_PARAMETER_NAME>False_northing</PROJECTION_PARAMETER_NAME>
                            <PROJECTION_PARAMETER_VALUE unit="M">0.0</PROJECTION_PARAMETER_VALUE>
                        </Projection_Parameter>
                    </Projection_Parameters>
                </Projection_CT_Method>
            </Projection>
            <Geographic_CS>
                <GEOGRAPHIC_CS_NAME>WGS 84</GEOGRAPHIC_CS_NAME>
                <GEOGRAPHIC_CS_CODE>EPSG:4326</GEOGRAPHIC_CS_CODE>
                <Horizontal_Datum>
                    <HORIZONTAL_DATUM_NAME>World Geodetic System 1984</HORIZONTAL_DATUM_NAME>
                    <HORIZONTAL_DATUM_CODE>EPSG:6326</HORIZONTAL_DATUM_CODE>
                    <Ellipsoid>
                        <ELLIPSOID_NAME>WGS 84</ELLIPSOID_NAME>
                        <ELLIPSOID_CODE>EPSG:7030</ELLIPSOID_CODE>
                        <Ellipsoid_Parameters>
                            <ELLIPSOID_MAJOR_AXIS unit="M">6378137.0</ELLIPSOID_MAJOR_AXIS>
                            <ELLIPSOID_MINOR_AXIS unit="M">6356752.314245</ELLIPSOID_MINOR_AXIS>
                        </Ellipsoid_Parameters>
                    </Ellipsoid>
                    <Prime_Meridian>
                        <PRIME_MERIDIAN_NAME>Greenwich</PRIME_MERIDIAN_NAME>
                        <PRIME_MERIDIAN_CODE>EPSG:8901</PRIME_MERIDIAN_CODE>
                        <PRIME_MERIDIAN_OFFSET unit="DEG">0.0</PRIME_MERIDIAN_OFFSET>
                    </Prime_Meridian>
                </Horizontal_Datum>
            </Geographic_CS>
        </Horizontal_CS>
    </Coordinate_Reference_System>

CodePudding user response:

Maybe you could use a string list of fields that your are interested in. Then store these fields in a Map<String, String>.

Finally you could try to use reflection to push the strings into Objects via loops. This last step either requires the object's properties to match the xml tag names - or you might need a mapping here as well.

In total this should make your code a lot shorter and maintainable while it might need a bit more runtime than you current code.

CodePudding user response:

If you are open to use third party libraries, you might give Jsoup a try. Jsoup is actually a HTML-parser but is also able to parse XML. It provides IMHO an intuitive and simple selector syntax and API which you can use to get the elements you are interested in.

I am not realy sure what you are trying to achieve and didn't find all the tags from your code in the sample xml you provided. But to give you a simple starting point on how to use Jsoup, please see below snippet where I fetch the Production DATASET_PRODUCTION_DATE and the Projection_Parameters from the sample xml:

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;

public class Example2 {
    public static void main(String[] args) throws IOException {
        InputStream in = new FileInputStream(new File("path to your xml file"));
        Document doc = Jsoup.parse(in, "UTF-8", "", Parser.xmlParser());

        String productionDate = doc.selectFirst("Production DATASET_PRODUCTION_DATE").text();
        System.out.println("PRODUCTION_DATE: "   productionDate);

        Elements projection_Parameters = doc.select("Projection_Parameter");

        projection_Parameters.forEach(param -> {
            String name  = param.selectFirst("PROJECTION_PARAMETER_NAME").text();
            String value = param.selectFirst("PROJECTION_PARAMETER_VALUE").text();
            System.out.printf("NAME: %s, VALUE: %s %n", name, value);
        });
    }
}

output:

PRODUCTION_DATE: 2015-09-28
NAME: Latitude_of_natural_origin, VALUE: 0.0 
NAME: Longitude_of_natural_origin, VALUE: 69.0 
NAME: Scale_factor_at_natural_origin, VALUE: 0.9996 
NAME: False_easting, VALUE: 500000.0 
NAME: False_northing, VALUE: 0.0 

Instead of printing to console you could of course create directly your POJOs. If interested get Jsoup from Maven central

<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.15.3</version>
</dependency>
  • Related