I have a client application consuming data from an external API based on a configuration during startup.
Since calculating the necessary data is not easy for me, I tried to fake the implementation and calculate it locally for now.
There are many WorkItems linked to each other and the configuration only knows about the relations to consider.
Some explanations:
Workitem
A workitem has a unique id and a type
e.g.
{
id: 1,
type: "person"
}
so you know that Workitem 1 is of type person
Linkrole
A linkrole is a composite key of three keys
- The parrent workitem type
- The child workitem type
- The name of the role
e.g.
{
parentWorkItemType: "person",
childWorkItemType: "house",
name: "owns",
}
which represents Person owns House
Links
The backend knows about the relations, a link between workitems might look like
{
/* each link is unique */
linkRole: {
parentWorkItemType: "person",
childWorkItemType: "house",
name: "owns",
},
parentId: 1,
childId: 2
}
so you know that Person 1 owns House 2
Fake prerequisites:
Fake backend
I created some fake data for my calculations. The file backend.js represents the backend / database
// backend.js
export const workItems = [{
id: 1,
type: "car"
}, {
id: 2,
type: "car"
}, {
id: 3,
type: "bird"
}, {
id: 4,
type: "bird"
}, {
id: 5,
type: "bird"
}, {
id: 6,
type: "invalid"
}, {
id: 7,
type: "house"
}, {
id: 8,
type: "house"
}, {
id: 9,
type: "house",
}, {
id: 10,
type: "person",
}];
export const workItemLinks = [{
linkRole: {
parentWorkItemType: "car",
childWorkItemType: "bird",
name: "isChildOf",
},
parentId: 1,
childId: 3
}, {
linkRole: {
parentWorkItemType: "car",
childWorkItemType: "bird",
name: "isChildOf",
},
parentId: 1,
childId: 4
}, {
linkRole: {
parentWorkItemType: "bird",
childWorkItemType: "house",
name: "with",
},
parentId: 3,
childId: 7
}, {
linkRole: {
parentWorkItemType: "bird",
childWorkItemType: "house",
name: "with",
},
parentId: 3,
childId: 8
}, {
linkRole: {
parentWorkItemType: "person",
childWorkItemType: "car",
name: "owns",
},
parentId: 10,
childId: 1
}, {
linkRole: {
parentWorkItemType: "car",
childWorkItemType: "car",
name: "references",
},
parentId: 1,
childId: 2
}];
Fake API
I created some methods inside the file api.js to fake an API asking the backend for data.
Important sidenote: These methods just help me to find the data I need. I can add more "helpers" if needed (In the real world I can ask to add more API endpoints) so everything can be queried. Please feel free to modify the "Fake API".
// api.js
import { workItems, workItemLinks } from "./backend.js";
export function getWorkItemsByType(workItemType) {
return workItems.filter(workItem => workItem.type === workItemType);
}
export function getWorkItemsByIds(workItemIds) {
return workItems.filter(workItem => workItemIds.some(workItemId => workItemId === workItem.id));
}
export function getWorkItemLinksByLinkRoleAndLeftSideIds(linkRole, leftSideIds, leftSideIsParentSide) {
return workItemLinks.filter(workItemLink => {
// Pseudo equality check
if (workItemLink.linkRole.parentWorkItemType === linkRole.parentWorkItemType &&
workItemLink.linkRole.childWorkItemType === linkRole.childWorkItemType &&
workItemLink.linkRole.name === linkRole.name) {
const leftSideIdInLink = leftSideIsParentSide ? workItemLink.parentId : workItemLink.childId;
// Return this link if it matches with the left side id ( you're looking for the right side id )
return leftSideIds.some(leftSideId => leftSideId === leftSideIdInLink);
}
return false;
});
}
Fake configuration
I created definitions inside configuration.js to start the calculations based on those
// configuration.js
export const definitions = [
{ // fetch linked birds from every root car
linkRole: {
parentWorkItemType: "car",
childWorkItemType: "bird",
name: "isChildOf",
}
},
{ // !! Root Element !! Fetch work items based on type
workItemType: "car",
isRootWorkItem: true
},
{ // fetch linked houses from every bird ( car - bird - house )
linkRole: {
parentWorkItemType: "bird",
childWorkItemType: "house",
name: "with",
}
},
{ // fetch every person from every car ( person - car )
linkRole: {
parentWorkItemType: "person",
childWorkItemType: "car",
name: "owns",
}
},
{ // Self reference, fetch linked cars from every root car
linkRole: {
parentWorkItemType: "car",
childWorkItemType: "car",
name: "references",
}
},
];
Problem to solve:
I am looking for a performant way to fetch every related link and workitem from the backend based on the chain of relations starting with each root item.
Expected output based on the configuration:
export const workItems = [{
id: 1,
type: "car"
}, {
id: 2,
type: "car"
}, {
id: 3,
type: "bird"
}, {
id: 4,
type: "bird"
}, /* no car is linked to bird 5, no links for invalid 6 */ {
id: 7,
type: "house"
}, {
id: 8,
type: "house"
}, /* no bird is linked to house 9 */ {
id: 10,
type: "person",
}];
/* based on the configuration every workItemLink from the backend got fetched */
My first implementation approach:
import { inspect } from "util";
import { getWorkItemsByType, getWorkItemsByIds, getWorkItemLinksByLinkRoleAndLeftSideIds } from "./api.js";
import { definitions } from "./configuration.js";
// Find the root definition
const { workItemType: rootWorkItemType } = definitions.find(definition => definition.isRootWorkItem);
// Fetch all the root workitems and initialize the store holding unique workitems
const workItems = getWorkItemsByType(rootWorkItemType); // will hold car 1 and car 2
const initialWorkItemIds = workItems.map(workItem => workItem.id);
const workItemIdsToLoad = []; // try to read all missing ids from fetched links and fetch all the workitems at once later on
const workItemLinks = []; // global store for links
const definitionsToTraverse = definitions.filter(definition => definition.linkRole !== undefined); // do not consider the root definition
run(initialWorkItemIds, rootWorkItemType, definitionsToTraverse, workItemIdsToLoad, workItemLinks);
const missingWorkItemIdsToLoad = workItemIdsToLoad.filter(workItemIdToLoad => !rootWorkItemIds.some(rootWorkItemId => rootWorkItemId === workItemIdToLoad)); // filter out all the existing root item ids
const workItemsToAdd = getWorkItemsByIds(missingWorkItemIdsToLoad); // fetch all the missing workitems at once
workItems.push(...workItemsToAdd); // add them to the global store
console.log(inspect({
workItems,
workItemLinks
}, false, null, true))
function run(leftSideWorkItemIds, leftSideWorkItemType, definitionsToTraverse, workItemIdsToLoad, workItemLinks) {
/*
consider all the defitions from "definitionsToTraverse" directly related to "leftSideWorkItemType"
remove them from "definitionsToTraverse" to prevent endless loops
loop backwards so splice won't struggle with the indices
*/
for (let definitionIndex = definitionsToTraverse.length - 1; definitionIndex >= 0; definitionIndex--) {
const currentDefinition = definitionsToTraverse[definitionIndex];
const { linkRole } = currentDefinition;
const { parentWorkItemType, childWorkItemType } = linkRole;
const leftSideWorkItemTypeIsParent = parentWorkItemType === leftSideWorkItemType;
const leftSideWorkItemTypeIsChild = childWorkItemType === leftSideWorkItemType;
// Check the direct relation
if (leftSideWorkItemTypeIsParent || leftSideWorkItemTypeIsChild) {
// Remove the inspected definition
definitionsToTraverse.splice(definitionIndex, 1);
const relatedWorkItemLinks = getWorkItemLinksByLinkRoleAndLeftSideIds(linkRole, leftSideWorkItemIds, leftSideWorkItemTypeIsParent);
// store all the right side workitem IDs for the next run
const rightSideWorkItemIds = [];
for (let relatedWorkItemLinkIndex = 0; relatedWorkItemLinkIndex < relatedWorkItemLinks.length; relatedWorkItemLinkIndex ) {
const relatedWorkItemLink = relatedWorkItemLinks[relatedWorkItemLinkIndex];
// Process the link if not processed yet
if (!workItemLinks.some(workItemLink => /* !!! pseudo equality check !!! */
workItemLink.linkRole.parentWorkItemType === relatedWorkItemLink.linkRole.parentWorkItemType &&
workItemLink.linkRole.childWorkItemType === relatedWorkItemLink.linkRole.childWorkItemType &&
workItemLink.linkRole.name === relatedWorkItemLink.linkRole.name &&
workItemLink.parentId === relatedWorkItemLink.parentId &&
workItemLink.childId === relatedWorkItemLink.childId)) {
// Push the link to the store
workItemLinks.push(relatedWorkItemLink);
// Get the right side id from the link
const rightSideWorkItemId = leftSideWorkItemTypeIsParent ? relatedWorkItemLink.childId : relatedWorkItemLink.parentId;
rightSideWorkItemIds.push(rightSideWorkItemId);
// Push the id to the store if it doesn't exist
if (!workItemIdsToLoad.some(workItemIdToLoad => workItemIdToLoad === rightSideWorkItemId)) {
workItemIdsToLoad.push(rightSideWorkItemId);
}
}
}
// Find the opposite workitem type of the current definition
const rightSideWorkItemType = leftSideWorkItemTypeIsParent ? childWorkItemType : parentWorkItemType;
// Run again but use this definition as the previous one
run(rightSideWorkItemIds, rightSideWorkItemType, definitionsToTraverse, workItemIdsToLoad, workItemLinks);
}
}
}
The problem is that my approach crashes because the backwards loop even runs if definitionsToTraverse
is empty. And I'm mutating the arrays from the parameters directly, I think I shouldn't do that.
So any help would be appreciated a lot!
CodePudding user response:
I may be oversimplifying what you need to do here. I'm treating this as described in my comment on the question:
Do I have this right? You have (here) a single root type of car. and so you want to include both cars, and then you want to include all nodes which have either of those cars as parent or child values in a link, and then include all nodes which have any of those results a a parent or child in a link, and so on recursively, collecting all the results not in a tree but in a simple list.
If that's correct, then this seems like a reasonable approach:
const getLinkedIds = (links, ids) => {
const toProcess = [...ids], processed = []
while (toProcess .length > 0) {
const id = toProcess .pop ()
processed .push (id)
links .filter (({parentId}) => id == parentId)
.forEach (({childId}) => {
if (! processed .includes (childId) && ! toProcess .includes (childId)) {
toProcess .push (childId)
}
})
links .filter (({childId}) => id == childId)
.forEach (({parentId}) => {
if (! processed .includes (parentId) && ! toProcess .includes (parentId)) {
toProcess .push (parentId)
}
})
}
return [...processed]
}
const linkedItems = (defs, items, links) => {
const rootTypes = defs .filter (({isRootWorkItem = false}) => isRootWorkItem)
.map (({workItemType}) => workItemType)
const roots = items .filter (({type}) => rootTypes .includes (type))
const ids = getLinkedIds (links, roots.flatMap (({id}) => id))
return items .filter (({id}) => ids .includes (id))
}
const definitions = [ /* linkTypes deleted */ {workItemType: "car", isRootWorkItem: true}]
const workItems = [{id: 1, type: "car"}, {id: 2, type: "car"}, {id: 3, type: "bird"}, {id: 4, type: "bird"}, {id: 5, type: "bird"}, {id: 6, type: "invalid"}, {id: 7, type: "house"}, {id: 8, type: "house"}, {id: 9, type: "house"}, {id: 10, type: "person"}]
const workItemLinks = [ /* linkTypes deleted */ {parentId: 1, childId: 3}, {parentId: 1, childId: 4}, {parentId: 3, childId: 7}, {parentId: 3, childId: 8}, {parentId: 10, childId: 1}, {parentId: 1, childId: 2}]
console .log (
linkedItems (definitions, workItems, workItemLinks)
)
.as-console-wrapper {max-height: 100% !important; top: 0}
In linkedItems
, we start with the definitions to collect the root types, then use those to collect all the root elements. We then call our main function, getLinkedIds
using links, and the ids of these elements. Then we filter the items to find those whose ids we chose.
getLinkedIds
keeps two lists of ids: those still to process and those we've already processed, repeatedly removing an id from the first list, adding it to the second and then queueing up all the unprocessed parentId
s for which we have a childId
link to the current value and then all the unprocessed childId
s for which we have a parentId
link to the current value. When the toProcess
queue is empty, we return the processed
one.
I haven't tried to read your code and understand how it differs from this, but if my guess is correct about your requirements, this is relatively simple.
This is not my usual coding style, and it bothers me. I tend to work with direct recursion, and I usually use immutable data structures. I would love to see an approach that handles this using that style, but I didn't come up with one right away.
CodePudding user response:
General Considerations
Suggest pushing most of the work onto the server, as presumably the workItems
and workItemLinks
will be comprised of a large number of records. If the work of traversing the tree of workItems
is left to the client making individual API calls, then network latency comes into play...
Data Structure
Suggest normalizing the data as follows:
- WorkItems - No change.
- WorkItemLinks - Reduce attributes to
parentId
andchildId
, which is essentially a many-to-many entity with the WorkItems entity. - Definitions - Flatten to
parentWorkItemType
,childWorkItemType
, andname
. Root entries are handled by settingparentWorkItemType
to null.
Note also that realistically, the need for Definitions
is likely not necessary when traversing WorkItemLinks
, as presumably the entries within the WorkItemLinks
, when created, were only permitted to be created if the relationship between the parentId
type and childId
type conformed to the Defintions
. If indeed this is the case, then the proposed code can likely be simplified further...
Proposed APIs
The solution below proposes two (2) APIs:
- getDefinitionChain( rootWorkItemType ) - Given a rootWorkItemType (eg, "car"), this API traverses the children and parents of the
Definition
entity, and returns all the relatedworkItemTypes
. - getWorkItemChain( rootWorkItemType ) - Initially calls getDefinitionChain to identify the related
workItemTypes
, and then traversesworkItems
starting with the rootWorkItemType.
Again, the concept being that these APIs are server side, reducing the chattiness between the client and server...
const workItems = [{"id":1,"type":"car"},{"id":2,"type":"car"},{"id":3,"type":"bird"},{"id":4,"type":"bird"},{"id":5,"type":"bird"},{"id":6,"type":"invalid"},{"id":7,"type":"house"},{"id":8,"type":"house"},{"id":9,"type":"house"},{"id":10,"type":"person"}];
const workItemLinks = [
{
"parentId": 1,
"childId": 3
},
{
"parentId": 1,
"childId": 4
},
{
"parentId": 3,
"childId": 7
},
{
"parentId": 3,
"childId": 8
},
{
"parentId": 10,
"childId": 1
},
{
"parentId": 1,
"childId": 2
}
];
const definitions = [
{
"parentWorkItemType": "car",
"childWorkItemType": "bird",
"name": "isChildOf"
},
{
"parentWorkItemType": null,
"childWorkItemType": "car",
"name": "root"
},
{
"parentWorkItemType": "bird",
"childWorkItemType": "house",
"name": "with"
},
{
"parentWorkItemType": "person",
"childWorkItemType": "car",
"name": "owns"
},
{
"parentWorkItemType": "car",
"childWorkItemType": "car",
"name": "references"
}
];
function getDefinitionChain( rootWorkItemType ) {
let alreadyVisitedType = new Set();
let definitionChain = [];
function recurse( workItemType ) {
// Visit the children types...
let nodeTypeChildList = definitions.filter( def => def.parentWorkItemType === workItemType );
for ( let childType of nodeTypeChildList ) {
if ( !alreadyVisitedType.has( childType.childWorkItemType ) ) {
alreadyVisitedType.add( childType.childWorkItemType );
definitionChain.push( childType.childWorkItemType );
recurse( childType.childWorkItemType );
}
}
// Visit the parent types...
let nodeTypeParentList = definitions.filter( def => def.childWorkItemType === workItemType );
for ( let parentType of nodeTypeParentList ) {
// Don't go up the definition chain if parent is null.
if ( parentType.parentWorkItemType && !alreadyVisitedType.has( parentType.parentWorkItemType ) ) {
alreadyVisitedType.add( parentType.parentWorkItemType );
definitionChain.push( parentType.parentWorkItemType );
recurse( parentType.parentWorkItemType );
}
}
}
recurse( rootWorkItemType )
if ( !alreadyVisitedType.has( rootWorkItemType ) ) {
definitionChain.push( rootWorkItemType );
}
return definitionChain;
}
function getWorkItemChain( rootWorkItemType ) {
// Get the list of allowable definitions...
let allowableDefinitions = new Set( getDefinitionChain( rootWorkItemType ) );
// Now, traverse the WorkItems tree, ensuring that they are in the WorkItemType list...
let wim = new Map();
function recurse( workItemCandidates ) {
for ( let workItem of workItemCandidates ) {
// Is this a workItemType that conforms to our allowable definitions?
if ( allowableDefinitions.has( workItem.type ) ) {
// If never having traversed this WorkItem, then let's traverse the children and parents.
if ( !wim.has( workItem.id ) ) {
wim.set( workItem.id, workItem );
// Get the list of WorkItems that are children to the workItem.id.
recurse( workItemLinks.filter( wix => wix.parentId === workItem.id ).map( wil => workItems.find( wif => wif.id === wil.childId )) );
// Get the list of WorkItems that are parents to the workItem.id.
recurse( workItemLinks.filter( wix => wix.childId === workItem.id ).map( wil => workItems.find( wif => wif.id === wil.parentId )) );
}
}
}
}
// Start with all the workItems that are of type rootWorkItemType.
recurse( workItems.filter( wi => wi.type === rootWorkItemType ) );
return [...wim.values()];
}
console.log( 'WorkItem Types in Definitions with a root of "car".' );
let definitionChain = getDefinitionChain( 'car' );
console.log( definitionChain );
console.log( 'WorkItems with a root of "car"' );
let workItemChain = getWorkItemChain( 'car' );
console.log( workItemChain );