Home > Software engineering >  C#, read and write XML-like schema to a similar data format
C#, read and write XML-like schema to a similar data format

Time:07-21

I have this data set which is structured just like XML data except it doesn't use <> or </> to separate data but instead uses a (), tab, and new line. But the data works the same way, there are schema definitions and child/parent nodes.

Is there a way in C# to read/write to this data using something similar to XPathSelectElements() ?

Here is a sample of the Data I need to parse through:

WARNING: Do Not Modify This File!
Check 24.1.6 Data File
CONTROL 1 (
  code = CEZ_90_1_2016
  compliance mode = UA
  version = 24.1.6 )
LOCATION 1 (
  state = Texas
  city =  USA )
BUILDING 1 (
  project type = NEW_CONSTRUCTION
  bldg use type = WHOLE_BLDG
  feet bldg height = 0.000
  number of stories = 1
  is nonresidential conditioning = TRUE
  is residential conditioning = FALSE
  is semiheated conditioning = FALSE
  conditioning = HEATING_AND_COOLING)
ENVELOPE 1 (
  use orient details = TRUE
  use vlt details = TRUE
  use cool roof performance details = FALSE )
AG WALL 1 (
  wall type = MASONRY_AG_WALL
  next to uncond space = FALSE
  concrete thickness = 12.00
  concrete density = 115.00
  furring type = NO_FURRING
  cmu type = CMU_PARTIAL_GROUT_CELLS_EMPTY
  list position = 1
  description = <|Concrete Block:12", Partially Grouted, Cells Empty|>
  assembly type = <|Exterior Wall 1|>
  parent number = 0
  bldg use key = 1884629903
  continuous rvalue = 10.00
  daylight credit = 0.000
  orientation = WEST
  allowance type = ENV_ALLOWANCE_NONE
  exemption type = ENV_EXEMPTION_NONE
  valid allowance type = FALSE
  valid exemption type = FALSE
  construction type = NON_RESIDENTIAL 
  adjacent space type = ADJACENT_SPACE_EXTERIOR 
  gross area = 3377.000)
DOOR 4 (
  door type = INSUL_METAL_DOOR
  door open type = SWINGING_DOOR
  list position = 3
  description = <|Insulated Metal|>
  assembly type = <|Door 1|>
  parent number = 1
  bldg use key = 1884629903
  prop uvalue = 0.100000
  daylight credit = 0.000
  orientation = WEST
  allowance type = ENV_ALLOWANCE_NONE
  exemption type = ENV_EXEMPTION_NONE
  valid allowance type = FALSE
  valid exemption type = FALSE
  construction type = NON_RESIDENTIAL 
  adjacent space type = ADJACENT_SPACE_EXTERIOR 
  gross area = 21.000)
AG WALL 2 (
  wall type = METAL_FRAME_16_AG_WALL
  next to uncond space = FALSE
  list position = 4
  description = <|Steel-Framed, 16" o.c.|>
  assembly type = <|Exterior Wall 2|>
  parent number = 0
  bldg use key = 1884629903
  cavity rvalue = 21.00
  continuous rvalue = 0.00
  daylight credit = 0.000
  orientation = NORTH
  allowance type = ENV_ALLOWANCE_NONE
  exemption type = ENV_EXEMPTION_NONE
  valid allowance type = FALSE
  valid exemption type = FALSE
  construction type = NON_RESIDENTIAL 
  adjacent space type = ADJACENT_SPACE_EXTERIOR 
  gross area = 42.000)

And here is the schema layout. As you see it even says rootXmlClass, so this is an xml format, I am just very confused how it works with this tab, newline, and parenthesis format instead of your typical XML files. I also got the .xsd schema definition from the creator of this software but it did not provide me any help...

# Documentation available at https://cvs.pnl.gov/BecpApps/wiki/schemaGenerator
namespace http://energycode.pnl.gov/ns/ComCheckBuildingSchema
rootXmlClass BUILDING

#############################
# Building and Project Data #
#############################

class BUILDING
child CONTROL 1
child EFFICIENCY_PACKAGE efficiencyPackages
child LOCATION 1
child PROJECT 1
child ENVELOPE 1
child LIGHTING 1
child HVAC 1
child SWH_SYSTEM serviceWaterHeating
child REQUIREMENT_ANSWER requirements


class CONTROL

class PROJECT

class LOCATION

class WHOLE_BLDG_USE
child INTERIOR_SPACE 1 activity_category_number

class ACTIVITY_USE
child INTERIOR_SPACE 1 activity_category_number

class EXTERIOR_USE
child EXTERIOR_SPACE 1 linked_use_area_index


#################
# Lighting Data #
#################

container LIGHTING
child WHOLE_BLDG_USE
child ACTIVITY_USE
child EXTERIOR_USE

class INTERIOR_SPACE interiorLightingSpace
child FIXTURE * parent_number

# Removed "child FIXTURE * parent_number" from the EXTERIOR_SPACE class, which will break the schema generator
class EXTERIOR_SPACE exteriorLightingSpace
child EXTERIOR_FIXTURE fixtures parent_number

class FIXTURE
class EXTERIOR_FIXTURE fixture


#################
# Envelope Date #
#################

container ENVELOPE
child AG_WALL aboveGroundWalls
child BG_WALL belowGroundWalls
child ROOF
child FLOOR
child DOOR
child WINDOW
child SKYLIGHT

class AG_WALL
child WINDOW * parent_number
child DOOR * parent_number

class BG_WALL
child WINDOW * parent_number
child DOOR * parent_number

class FLOOR

class ROOF
child SKYLIGHT * parent_number

class SKYLIGHT

class WINDOW

class DOOR


###################
# Mechanical Data #
###################

container HVAC
child HVAC_SYSTEM
child HVAC_PLANT
child FAN_SYSTEM

class HVAC_SYSTEM
child REQUIREMENT_ANSWER requirements parent_number

class HVAC_PLANT
child REQUIREMENT_ANSWER requirements parent_number

class SWH_SYSTEM
child REQUIREMENT_ANSWER requirements parent_number

class FAN_SYSTEM
child FAN * parent_number
child PRESSURE_DROP_CREDIT * parent_number

class FAN

class PRESSURE_DROP_CREDIT

################
# Requirements #
################

class REQUIREMENT_ANSWER

################
# efficiency Packages #
################
class EFFICIENCY_PACKAGE

Thank you

CodePudding user response:

A good use case for InvisibleXML: https://invisiblexml.org. If you can write a BNF grammar for your data format (using the InvisibleXML dialect of BNF) then you will be able to process the data using XML tools such as XSLT, XPath, and XSD.

CodePudding user response:

Sure. Here's a quick grammar that parses the file. I don't actually know the rules, or what the desired structure would be, so this is just a quick reverse engineering job.

Document = Warning, check, data , #a? .
-Warning = -"WARNING: Do Not Modify This File!", -#a.
-check = -"Check 24.1.6 Data File", -#a .
data = name, -' ', number, -' (', -#a, property  -#a, -' '?, -')', -#a .

name = namechar, (namechar | ' ')*, namechar .
-namechar = ['A'-'Z'] | ['a'-'z'] .
number = ['0'-'9']  .
property = -' '*, name, -' = ', value .
value = ~[#a]  .

From the sample above, it produces:

<Document xmlns:ixml="http://invisiblexml.org/NS" ixml:state="ambiguous">
   <data>
      <name>CONTROL</name>
      <number>1</number>
      <property>
         <name>code</name>
         <value>CEZ_90_1_2016</value>
      </property>
      <property>
         <name>compliance mode</name>
         <value>UA</value>
      </property>
      <property>
         <name>version</name>
         <value>24.1.6</value>
      </property>
   </data>
   <data>
      <name>LOCATION</name>
      <number>1</number>
      <property>
         <name>state</name>
         <value>Texas</value>
      </property>
      <property>
         <name>city</name>
         <value> USA</value>
      </property>
   </data>
   <data>
      <name>BUILDING</name>
      <number>1</number>
      <property>
         <name>project type</name>
         <value>NEW_CONSTRUCTION</value>
      </property>
      <property>
         <name>bldg use type</name>
         <value>WHOLE_BLDG</value>
      </property>
      <property>
         <name>feet bldg height</name>
         <value>0.000</value>
      </property>
      <property>
         <name>number of stories</name>
         <value>1</value>
      </property>
      <property>
         <name>is nonresidential conditioning</name>
         <value>TRUE</value>
      </property>
      <property>
         <name>is residential conditioning</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>is semiheated conditioning</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>conditioning</name>
         <value>HEATING_AND_COOLING</value>
      </property>
   </data>
   <data>
      <name>ENVELOPE</name>
      <number>1</number>
      <property>
         <name>use orient details</name>
         <value>TRUE</value>
      </property>
      <property>
         <name>use vlt details</name>
         <value>TRUE</value>
      </property>
      <property>
         <name>use cool roof performance details</name>
         <value>FALSE</value>
      </property>
   </data>
   <data>
      <name>AG WALL</name>
      <number>1</number>
      <property>
         <name>wall type</name>
         <value>MASONRY_AG_WALL</value>
      </property>
      <property>
         <name>next to uncond space</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>concrete thickness</name>
         <value>12.00</value>
      </property>
      <property>
         <name>concrete density</name>
         <value>115.00</value>
      </property>
      <property>
         <name>furring type</name>
         <value>NO_FURRING</value>
      </property>
      <property>
         <name>cmu type</name>
         <value>CMU_PARTIAL_GROUT_CELLS_EMPTY</value>
      </property>
      <property>
         <name>list position</name>
         <value>1</value>
      </property>
      <property>
         <name>description</name>
         <value>&lt;|Concrete Block:12", Partially Grouted, Cells Empty|></value>
      </property>
      <property>
         <name>assembly type</name>
         <value>&lt;|Exterior Wall 1|></value>
      </property>
      <property>
         <name>parent number</name>
         <value>0</value>
      </property>
      <property>
         <name>bldg use key</name>
         <value>1884629903</value>
      </property>
      <property>
         <name>continuous rvalue</name>
         <value>10.00</value>
      </property>
      <property>
         <name>daylight credit</name>
         <value>0.000</value>
      </property>
      <property>
         <name>orientation</name>
         <value>WEST</value>
      </property>
      <property>
         <name>allowance type</name>
         <value>ENV_ALLOWANCE_NONE</value>
      </property>
      <property>
         <name>exemption type</name>
         <value>ENV_EXEMPTION_NONE</value>
      </property>
      <property>
         <name>valid allowance type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>valid exemption type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>construction type</name>
         <value>NON_RESIDENTIAL </value>
      </property>
      <property>
         <name>adjacent space type</name>
         <value>ADJACENT_SPACE_EXTERIOR </value>
      </property>
      <property>
         <name>gross area</name>
         <value>3377.000</value>
      </property>
   </data>
   <data>
      <name>DOOR</name>
      <number>4</number>
      <property>
         <name>door type</name>
         <value>INSUL_METAL_DOOR</value>
      </property>
      <property>
         <name>door open type</name>
         <value>SWINGING_DOOR</value>
      </property>
      <property>
         <name>list position</name>
         <value>3</value>
      </property>
      <property>
         <name>description</name>
         <value>&lt;|Insulated Metal|></value>
      </property>
      <property>
         <name>assembly type</name>
         <value>&lt;|Door 1|></value>
      </property>
      <property>
         <name>parent number</name>
         <value>1</value>
      </property>
      <property>
         <name>bldg use key</name>
         <value>1884629903</value>
      </property>
      <property>
         <name>prop uvalue</name>
         <value>0.100000</value>
      </property>
      <property>
         <name>daylight credit</name>
         <value>0.000</value>
      </property>
      <property>
         <name>orientation</name>
         <value>WEST</value>
      </property>
      <property>
         <name>allowance type</name>
         <value>ENV_ALLOWANCE_NONE</value>
      </property>
      <property>
         <name>exemption type</name>
         <value>ENV_EXEMPTION_NONE</value>
      </property>
      <property>
         <name>valid allowance type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>valid exemption type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>construction type</name>
         <value>NON_RESIDENTIAL </value>
      </property>
      <property>
         <name>adjacent space type</name>
         <value>ADJACENT_SPACE_EXTERIOR </value>
      </property>
      <property>
         <name>gross area</name>
         <value>21.000</value>
      </property>
   </data>
   <data>
      <name>AG WALL</name>
      <number>2</number>
      <property>
         <name>wall type</name>
         <value>METAL_FRAME_16_AG_WALL</value>
      </property>
      <property>
         <name>next to uncond space</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>list position</name>
         <value>4</value>
      </property>
      <property>
         <name>description</name>
         <value>&lt;|Steel-Framed, 16" o.c.|></value>
      </property>
      <property>
         <name>assembly type</name>
         <value>&lt;|Exterior Wall 2|></value>
      </property>
      <property>
         <name>parent number</name>
         <value>0</value>
      </property>
      <property>
         <name>bldg use key</name>
         <value>1884629903</value>
      </property>
      <property>
         <name>cavity rvalue</name>
         <value>21.00</value>
      </property>
      <property>
         <name>continuous rvalue</name>
         <value>0.00</value>
      </property>
      <property>
         <name>daylight credit</name>
         <value>0.000</value>
      </property>
      <property>
         <name>orientation</name>
         <value>NORTH</value>
      </property>
      <property>
         <name>allowance type</name>
         <value>ENV_ALLOWANCE_NONE</value>
      </property>
      <property>
         <name>exemption type</name>
         <value>ENV_EXEMPTION_NONE</value>
      </property>
      <property>
         <name>valid allowance type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>valid exemption type</name>
         <value>FALSE</value>
      </property>
      <property>
         <name>construction type</name>
         <value>NON_RESIDENTIAL </value>
      </property>
      <property>
         <name>adjacent space type</name>
         <value>ADJACENT_SPACE_EXTERIOR </value>
      </property>
      <property>
         <name>gross area</name>
         <value>42.000</value>
      </property>
   </data>
</Document>

It's ambiguous because sometimes the thing I've called data ends with ) and sometimes it ends with ). It is probably possible to remove that ambiguity, but I think it's harmless.

If there's more actual variability in the real data, with respect to whitespace for example, you might have to work a little harder.

Note: in order to avoid ambiguity about the formatting of the name followed by the number, this grammar requires names to be at least two characters long.

CodePudding user response:

Here is simple code that parses everything into a dictionary

using System;
using System.Linq;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;

namespace ConsoleApp2
{
    class Program
    {

        const string FILENAME = @"c:\temp\test.txt";
        enum STATE {
            GET_DATA_FILE_NAME,
            GET_CONTAINER,
            READ_CONTAINER
        };

        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader(FILENAME);
            int rowNumber = 0;
            string dataFileName = "";
            Dictionary<string, Dictionary<string, string>> dict = new Dictionary<string, Dictionary<string, string>>();
            STATE state = STATE.GET_DATA_FILE_NAME;
            string line = "";
            Dictionary<string, string> containerDict = null;
            string pattern = @"(?'key'[^=] )=(?'value'.*)\)?";
            while((line = reader.ReadLine()) != null)
            {
                line = line.Trim();
                if(line.Length > 0)
                {
                    switch(state)
                    {
                        case STATE.GET_DATA_FILE_NAME:
                            if (line.Contains("Data File"))
                            {
                                dataFileName = line;
                                state = STATE.GET_CONTAINER;
                            }
                            break;
                        case STATE.GET_CONTAINER:
                            if(line.Contains("("))
                            {
                                string containerName = line.Substring(0, line.IndexOf("(")).Trim();
                                containerDict = new Dictionary<string, string>();
                                dict.Add(containerName, containerDict);
                                state = STATE.READ_CONTAINER;
                            }
                            break;
                        case STATE.READ_CONTAINER:
                            Match match = Regex.Match(line, pattern);
                            string key = match.Groups["key"].Value.Trim();
                            string value = match.Groups["value"].Value.Trim();
                            containerDict.Add(key, value);
                            if (line.Contains(")"))
                            {
                                state = STATE.GET_CONTAINER;
                            }
                            break;
                    }
                }
            }
        }

    }

}
  • Related