I have this data set which is structured just like XML data except it doesn't use <> or </> to separate data but instead uses a (), tab, and new line. But the data works the same way, there are schema definitions and child/parent nodes.
Is there a way in C# to read/write to this data using something similar to XPathSelectElements() ?
Here is a sample of the Data I need to parse through:
WARNING: Do Not Modify This File!
Check 24.1.6 Data File
CONTROL 1 (
code = CEZ_90_1_2016
compliance mode = UA
version = 24.1.6 )
LOCATION 1 (
state = Texas
city = USA )
BUILDING 1 (
project type = NEW_CONSTRUCTION
bldg use type = WHOLE_BLDG
feet bldg height = 0.000
number of stories = 1
is nonresidential conditioning = TRUE
is residential conditioning = FALSE
is semiheated conditioning = FALSE
conditioning = HEATING_AND_COOLING)
ENVELOPE 1 (
use orient details = TRUE
use vlt details = TRUE
use cool roof performance details = FALSE )
AG WALL 1 (
wall type = MASONRY_AG_WALL
next to uncond space = FALSE
concrete thickness = 12.00
concrete density = 115.00
furring type = NO_FURRING
cmu type = CMU_PARTIAL_GROUT_CELLS_EMPTY
list position = 1
description = <|Concrete Block:12", Partially Grouted, Cells Empty|>
assembly type = <|Exterior Wall 1|>
parent number = 0
bldg use key = 1884629903
continuous rvalue = 10.00
daylight credit = 0.000
orientation = WEST
allowance type = ENV_ALLOWANCE_NONE
exemption type = ENV_EXEMPTION_NONE
valid allowance type = FALSE
valid exemption type = FALSE
construction type = NON_RESIDENTIAL
adjacent space type = ADJACENT_SPACE_EXTERIOR
gross area = 3377.000)
DOOR 4 (
door type = INSUL_METAL_DOOR
door open type = SWINGING_DOOR
list position = 3
description = <|Insulated Metal|>
assembly type = <|Door 1|>
parent number = 1
bldg use key = 1884629903
prop uvalue = 0.100000
daylight credit = 0.000
orientation = WEST
allowance type = ENV_ALLOWANCE_NONE
exemption type = ENV_EXEMPTION_NONE
valid allowance type = FALSE
valid exemption type = FALSE
construction type = NON_RESIDENTIAL
adjacent space type = ADJACENT_SPACE_EXTERIOR
gross area = 21.000)
AG WALL 2 (
wall type = METAL_FRAME_16_AG_WALL
next to uncond space = FALSE
list position = 4
description = <|Steel-Framed, 16" o.c.|>
assembly type = <|Exterior Wall 2|>
parent number = 0
bldg use key = 1884629903
cavity rvalue = 21.00
continuous rvalue = 0.00
daylight credit = 0.000
orientation = NORTH
allowance type = ENV_ALLOWANCE_NONE
exemption type = ENV_EXEMPTION_NONE
valid allowance type = FALSE
valid exemption type = FALSE
construction type = NON_RESIDENTIAL
adjacent space type = ADJACENT_SPACE_EXTERIOR
gross area = 42.000)
And here is the schema layout. As you see it even says rootXmlClass, so this is an xml format, I am just very confused how it works with this tab, newline, and parenthesis format instead of your typical XML files. I also got the .xsd schema definition from the creator of this software but it did not provide me any help...
# Documentation available at https://cvs.pnl.gov/BecpApps/wiki/schemaGenerator
namespace http://energycode.pnl.gov/ns/ComCheckBuildingSchema
rootXmlClass BUILDING
#############################
# Building and Project Data #
#############################
class BUILDING
child CONTROL 1
child EFFICIENCY_PACKAGE efficiencyPackages
child LOCATION 1
child PROJECT 1
child ENVELOPE 1
child LIGHTING 1
child HVAC 1
child SWH_SYSTEM serviceWaterHeating
child REQUIREMENT_ANSWER requirements
class CONTROL
class PROJECT
class LOCATION
class WHOLE_BLDG_USE
child INTERIOR_SPACE 1 activity_category_number
class ACTIVITY_USE
child INTERIOR_SPACE 1 activity_category_number
class EXTERIOR_USE
child EXTERIOR_SPACE 1 linked_use_area_index
#################
# Lighting Data #
#################
container LIGHTING
child WHOLE_BLDG_USE
child ACTIVITY_USE
child EXTERIOR_USE
class INTERIOR_SPACE interiorLightingSpace
child FIXTURE * parent_number
# Removed "child FIXTURE * parent_number" from the EXTERIOR_SPACE class, which will break the schema generator
class EXTERIOR_SPACE exteriorLightingSpace
child EXTERIOR_FIXTURE fixtures parent_number
class FIXTURE
class EXTERIOR_FIXTURE fixture
#################
# Envelope Date #
#################
container ENVELOPE
child AG_WALL aboveGroundWalls
child BG_WALL belowGroundWalls
child ROOF
child FLOOR
child DOOR
child WINDOW
child SKYLIGHT
class AG_WALL
child WINDOW * parent_number
child DOOR * parent_number
class BG_WALL
child WINDOW * parent_number
child DOOR * parent_number
class FLOOR
class ROOF
child SKYLIGHT * parent_number
class SKYLIGHT
class WINDOW
class DOOR
###################
# Mechanical Data #
###################
container HVAC
child HVAC_SYSTEM
child HVAC_PLANT
child FAN_SYSTEM
class HVAC_SYSTEM
child REQUIREMENT_ANSWER requirements parent_number
class HVAC_PLANT
child REQUIREMENT_ANSWER requirements parent_number
class SWH_SYSTEM
child REQUIREMENT_ANSWER requirements parent_number
class FAN_SYSTEM
child FAN * parent_number
child PRESSURE_DROP_CREDIT * parent_number
class FAN
class PRESSURE_DROP_CREDIT
################
# Requirements #
################
class REQUIREMENT_ANSWER
################
# efficiency Packages #
################
class EFFICIENCY_PACKAGE
Thank you
CodePudding user response:
A good use case for InvisibleXML: https://invisiblexml.org. If you can write a BNF grammar for your data format (using the InvisibleXML dialect of BNF) then you will be able to process the data using XML tools such as XSLT, XPath, and XSD.
CodePudding user response:
Sure. Here's a quick grammar that parses the file. I don't actually know the rules, or what the desired structure would be, so this is just a quick reverse engineering job.
Document = Warning, check, data , #a? .
-Warning = -"WARNING: Do Not Modify This File!", -#a.
-check = -"Check 24.1.6 Data File", -#a .
data = name, -' ', number, -' (', -#a, property -#a, -' '?, -')', -#a .
name = namechar, (namechar | ' ')*, namechar .
-namechar = ['A'-'Z'] | ['a'-'z'] .
number = ['0'-'9'] .
property = -' '*, name, -' = ', value .
value = ~[#a] .
From the sample above, it produces:
<Document xmlns:ixml="http://invisiblexml.org/NS" ixml:state="ambiguous">
<data>
<name>CONTROL</name>
<number>1</number>
<property>
<name>code</name>
<value>CEZ_90_1_2016</value>
</property>
<property>
<name>compliance mode</name>
<value>UA</value>
</property>
<property>
<name>version</name>
<value>24.1.6</value>
</property>
</data>
<data>
<name>LOCATION</name>
<number>1</number>
<property>
<name>state</name>
<value>Texas</value>
</property>
<property>
<name>city</name>
<value> USA</value>
</property>
</data>
<data>
<name>BUILDING</name>
<number>1</number>
<property>
<name>project type</name>
<value>NEW_CONSTRUCTION</value>
</property>
<property>
<name>bldg use type</name>
<value>WHOLE_BLDG</value>
</property>
<property>
<name>feet bldg height</name>
<value>0.000</value>
</property>
<property>
<name>number of stories</name>
<value>1</value>
</property>
<property>
<name>is nonresidential conditioning</name>
<value>TRUE</value>
</property>
<property>
<name>is residential conditioning</name>
<value>FALSE</value>
</property>
<property>
<name>is semiheated conditioning</name>
<value>FALSE</value>
</property>
<property>
<name>conditioning</name>
<value>HEATING_AND_COOLING</value>
</property>
</data>
<data>
<name>ENVELOPE</name>
<number>1</number>
<property>
<name>use orient details</name>
<value>TRUE</value>
</property>
<property>
<name>use vlt details</name>
<value>TRUE</value>
</property>
<property>
<name>use cool roof performance details</name>
<value>FALSE</value>
</property>
</data>
<data>
<name>AG WALL</name>
<number>1</number>
<property>
<name>wall type</name>
<value>MASONRY_AG_WALL</value>
</property>
<property>
<name>next to uncond space</name>
<value>FALSE</value>
</property>
<property>
<name>concrete thickness</name>
<value>12.00</value>
</property>
<property>
<name>concrete density</name>
<value>115.00</value>
</property>
<property>
<name>furring type</name>
<value>NO_FURRING</value>
</property>
<property>
<name>cmu type</name>
<value>CMU_PARTIAL_GROUT_CELLS_EMPTY</value>
</property>
<property>
<name>list position</name>
<value>1</value>
</property>
<property>
<name>description</name>
<value><|Concrete Block:12", Partially Grouted, Cells Empty|></value>
</property>
<property>
<name>assembly type</name>
<value><|Exterior Wall 1|></value>
</property>
<property>
<name>parent number</name>
<value>0</value>
</property>
<property>
<name>bldg use key</name>
<value>1884629903</value>
</property>
<property>
<name>continuous rvalue</name>
<value>10.00</value>
</property>
<property>
<name>daylight credit</name>
<value>0.000</value>
</property>
<property>
<name>orientation</name>
<value>WEST</value>
</property>
<property>
<name>allowance type</name>
<value>ENV_ALLOWANCE_NONE</value>
</property>
<property>
<name>exemption type</name>
<value>ENV_EXEMPTION_NONE</value>
</property>
<property>
<name>valid allowance type</name>
<value>FALSE</value>
</property>
<property>
<name>valid exemption type</name>
<value>FALSE</value>
</property>
<property>
<name>construction type</name>
<value>NON_RESIDENTIAL </value>
</property>
<property>
<name>adjacent space type</name>
<value>ADJACENT_SPACE_EXTERIOR </value>
</property>
<property>
<name>gross area</name>
<value>3377.000</value>
</property>
</data>
<data>
<name>DOOR</name>
<number>4</number>
<property>
<name>door type</name>
<value>INSUL_METAL_DOOR</value>
</property>
<property>
<name>door open type</name>
<value>SWINGING_DOOR</value>
</property>
<property>
<name>list position</name>
<value>3</value>
</property>
<property>
<name>description</name>
<value><|Insulated Metal|></value>
</property>
<property>
<name>assembly type</name>
<value><|Door 1|></value>
</property>
<property>
<name>parent number</name>
<value>1</value>
</property>
<property>
<name>bldg use key</name>
<value>1884629903</value>
</property>
<property>
<name>prop uvalue</name>
<value>0.100000</value>
</property>
<property>
<name>daylight credit</name>
<value>0.000</value>
</property>
<property>
<name>orientation</name>
<value>WEST</value>
</property>
<property>
<name>allowance type</name>
<value>ENV_ALLOWANCE_NONE</value>
</property>
<property>
<name>exemption type</name>
<value>ENV_EXEMPTION_NONE</value>
</property>
<property>
<name>valid allowance type</name>
<value>FALSE</value>
</property>
<property>
<name>valid exemption type</name>
<value>FALSE</value>
</property>
<property>
<name>construction type</name>
<value>NON_RESIDENTIAL </value>
</property>
<property>
<name>adjacent space type</name>
<value>ADJACENT_SPACE_EXTERIOR </value>
</property>
<property>
<name>gross area</name>
<value>21.000</value>
</property>
</data>
<data>
<name>AG WALL</name>
<number>2</number>
<property>
<name>wall type</name>
<value>METAL_FRAME_16_AG_WALL</value>
</property>
<property>
<name>next to uncond space</name>
<value>FALSE</value>
</property>
<property>
<name>list position</name>
<value>4</value>
</property>
<property>
<name>description</name>
<value><|Steel-Framed, 16" o.c.|></value>
</property>
<property>
<name>assembly type</name>
<value><|Exterior Wall 2|></value>
</property>
<property>
<name>parent number</name>
<value>0</value>
</property>
<property>
<name>bldg use key</name>
<value>1884629903</value>
</property>
<property>
<name>cavity rvalue</name>
<value>21.00</value>
</property>
<property>
<name>continuous rvalue</name>
<value>0.00</value>
</property>
<property>
<name>daylight credit</name>
<value>0.000</value>
</property>
<property>
<name>orientation</name>
<value>NORTH</value>
</property>
<property>
<name>allowance type</name>
<value>ENV_ALLOWANCE_NONE</value>
</property>
<property>
<name>exemption type</name>
<value>ENV_EXEMPTION_NONE</value>
</property>
<property>
<name>valid allowance type</name>
<value>FALSE</value>
</property>
<property>
<name>valid exemption type</name>
<value>FALSE</value>
</property>
<property>
<name>construction type</name>
<value>NON_RESIDENTIAL </value>
</property>
<property>
<name>adjacent space type</name>
<value>ADJACENT_SPACE_EXTERIOR </value>
</property>
<property>
<name>gross area</name>
<value>42.000</value>
</property>
</data>
</Document>
It's ambiguous because sometimes the thing I've called data
ends with )
and sometimes it ends with )
. It is probably possible to remove that ambiguity, but I think it's harmless.
If there's more actual variability in the real data, with respect to whitespace for example, you might have to work a little harder.
Note: in order to avoid ambiguity about the formatting of the name followed by the number, this grammar requires names to be at least two characters long.
CodePudding user response:
Here is simple code that parses everything into a dictionary
using System;
using System.Linq;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
namespace ConsoleApp2
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
enum STATE {
GET_DATA_FILE_NAME,
GET_CONTAINER,
READ_CONTAINER
};
static void Main(string[] args)
{
StreamReader reader = new StreamReader(FILENAME);
int rowNumber = 0;
string dataFileName = "";
Dictionary<string, Dictionary<string, string>> dict = new Dictionary<string, Dictionary<string, string>>();
STATE state = STATE.GET_DATA_FILE_NAME;
string line = "";
Dictionary<string, string> containerDict = null;
string pattern = @"(?'key'[^=] )=(?'value'.*)\)?";
while((line = reader.ReadLine()) != null)
{
line = line.Trim();
if(line.Length > 0)
{
switch(state)
{
case STATE.GET_DATA_FILE_NAME:
if (line.Contains("Data File"))
{
dataFileName = line;
state = STATE.GET_CONTAINER;
}
break;
case STATE.GET_CONTAINER:
if(line.Contains("("))
{
string containerName = line.Substring(0, line.IndexOf("(")).Trim();
containerDict = new Dictionary<string, string>();
dict.Add(containerName, containerDict);
state = STATE.READ_CONTAINER;
}
break;
case STATE.READ_CONTAINER:
Match match = Regex.Match(line, pattern);
string key = match.Groups["key"].Value.Trim();
string value = match.Groups["value"].Value.Trim();
containerDict.Add(key, value);
if (line.Contains(")"))
{
state = STATE.GET_CONTAINER;
}
break;
}
}
}
}
}
}