Home > Software engineering >  Get dat from HTML text using javascript in angular
Get dat from HTML text using javascript in angular

Time:11-30

I am trying to get data from html text .

First getting html text :

const url = "https://********/dash/video/mp4/RP4-6";
this.http2.get(url, { responseType: "text" as "json" }).subscribe(
  (result) => {
   console.log(result) // result as html text
},
  (error) => {}
);

result html :

<html>
<head><title>Index of /dash/video/mp4/RP4-6/</title></head>
<body>
<h1>Index of /dash/video/mp4/RP4-6/</h1><hr><pre><a href="../">../</a>
<a href="21-10-27-13-14.mp4">21-10-27-13-14.mp4</a>                                 27-Oct-2021 10:15              689323
<a href="21-10-27-13-17.mp4">21-10-27-13-17.mp4</a>                                 27-Oct-2021 10:18              749977
<a href="21-10-27-13-19.mp4">21-10-27-13-19.mp4</a>                                 27-Oct-2021 10:20              704187
<a href="21-10-27-13-21.mp4">21-10-27-13-21.mp4</a>                                 27-Oct-2021 10:22              665388
<a href="21-10-27-13-22.mp4">21-10-27-13-22.mp4</a>                                 27-Oct-2021 10:23              626806
<a href="21-10-27-13-25.mp4">21-10-27-13-25.mp4</a>                                 27-Oct-2021 10:26              609054
<a href="21-10-27-13-31.mp4">21-10-27-13-31.mp4</a>                                 27-Oct-2021 10:32              684083
<a href="21-10-27-13-38.mp4">21-10-27-13-38.mp4</a>                                 27-Oct-2021 10:39              697931
<a href="21-10-27-13-39.mp4">21-10-27-13-39.mp4</a>                                 27-Oct-2021 10:40              806641
<a href="21-10-27-13-42.mp4">21-10-27-13-42.mp4</a>                                 27-Oct-2021 10:43              689665
<a href="21-10-27-13-44.mp4">21-10-27-13-44.mp4</a>                                 27-Oct-2021 10:45              745707
<a href="21-10-27-13-45.mp4">21-10-27-13-45.mp4</a>                                 27-Oct-2021 10:46              751812
<a href="21-10-27-13-47.mp4">21-10-27-13-47.mp4</a>                                 27-Oct-2021 10:48              624906
<a href="21-10-27-13-50.mp4">21-10-27-13-50.mp4</a>                                 27-Oct-2021 10:51              773567
<a href="21-10-27-13-53.mp4">21-10-27-13-53.mp4</a>                                 27-Oct-2021 10:54              824234
<a href="21-10-27-13-54.mp4">21-10-27-13-54.mp4</a>                                 27-Oct-2021 10:55              830270
<a href="21-10-27-13-56.mp4">21-10-27-13-56.mp4</a>                                 27-Oct-2021 10:57              824970
<a href="21-10-27-13-57.mp4">21-10-27-13-57.mp4</a>                                 27-Oct-2021 10:58              830408
<a href="21-10-27-14-00.mp4">21-10-27-14-00.mp4</a>                                 27-Oct-2021 11:01              833959
<a href="21-10-27-14-01.mp4">21-10-27-14-01.mp4</a>                                 27-Oct-2021 11:02              823081
<a href="21-10-27-14-04.mp4">21-10-27-14-04.mp4</a>                                 27-Oct-2021 11:05              823855
<a href="21-10-27-14-05.mp4">21-10-27-14-05.mp4</a>                                 27-Oct-2021 11:06              827716
<a href="21-10-27-14-06.mp4">21-10-27-14-06.mp4</a>                                 27-Oct-2021 11:07              830899
<a href="21-10-27-14-07.mp4">21-10-27-14-07.mp4</a>                                 27-Oct-2021 11:08              831162
<a href="21-11-02-14-45.mp4">21-11-02-14-45.mp4</a>                                 02-Nov-2021 12:46              765390
<a href="21-11-02-14-46.mp4">21-11-02-14-46.mp4</a>                                 02-Nov-2021 12:47              709565
<a href="21-11-02-14-50.mp4">21-11-02-14-50.mp4</a>                                 02-Nov-2021 12:51              728354
<a href="21-11-02-14-51.mp4">21-11-02-14-51.mp4</a>                                 02-Nov-2021 12:52              730780
<a href="21-11-02-14-52.mp4">21-11-02-14-52.mp4</a>                                 02-Nov-2021 13:00              117131
<a href="21-11-02-14-54.mp4">21-11-02-14-54.mp4</a>                                 02-Nov-2021 12:55              657311
<a href="21-11-02-14-55.mp4">21-11-02-14-55.mp4</a>                                 02-Nov-2021 12:56              592647
<a href="21-11-02-15-02.mp4">21-11-02-15-02.mp4</a>                                 02-Nov-2021 13:03              601726
<a href="21-11-02-15-08.mp4">21-11-02-15-08.mp4</a>                                 02-Nov-2021 15:14              166307
<a href="21-11-02-15-13.mp4">21-11-02-15-13.mp4</a>                                 02-Nov-2021 13:14              802128
<a href="21-11-02-17-16.mp4">21-11-02-17-16.mp4</a>                                 02-Nov-2021 15:17              735023
<a href="21-11-02-17-17.mp4">21-11-02-17-17.mp4</a>                                 02-Nov-2021 15:18              716030
<a href="21-11-02-17-18.mp4">21-11-02-17-18.mp4</a>                                 02-Nov-2021 15:19              794337
<a href="21-11-02-17-22.mp4">21-11-02-17-22.mp4</a>                                 02-Nov-2021 15:23              757134
<a href="21-11-02-17-23.mp4">21-11-02-17-23.mp4</a>                                 02-Nov-2021 15:24              831960
<a href="21-11-02-17-28.mp4">21-11-02-17-28.mp4</a>                                 02-Nov-2021 15:29              715001
<a href="21-11-02-17-30.mp4">21-11-02-17-30.mp4</a>                                 02-Nov-2021 15:31              743813
<a href="21-11-02-17-32.mp4">21-11-02-17-32.mp4</a>                                 02-Nov-2021 15:33              754264
<a href="21-11-02-17-34.mp4">21-11-02-17-34.mp4</a>                                 02-Nov-2021 15:35              791702
<a href="21-11-02-17-36.mp4">21-11-02-17-36.mp4</a>                                 02-Nov-2021 15:37              793964
<a href="21-11-02-17-37.mp4">21-11-02-17-37.mp4</a>                                 02-Nov-2021 15:38              766486
<a href="21-11-02-17-41.mp4">21-11-02-17-41.mp4</a>                                 02-Nov-2021 15:42              825196
<a href="21-11-02-17-44.mp4">21-11-02-17-44.mp4</a>                                 02-Nov-2021 15:45              794551
<a href="21-11-02-17-46.mp4">21-11-02-17-46.mp4</a>                                 02-Nov-2021 15:47              818823
<a href="21-11-02-17-47.mp4">21-11-02-17-47.mp4</a>                                 02-Nov-2021 15:48              803400
<a href="21-11-02-17-48.mp4">21-11-02-17-48.mp4</a>                                 02-Nov-2021 15:49              833760
<a href="21-11-04-12-03.mp4">21-11-04-12-03.mp4</a>                                 04-Nov-2021 10:04              565540
<a href="21-11-04-12-09.mp4">21-11-04-12-09.mp4</a>                                 04-Nov-2021 10:10              764848
<a href="21-11-04-12-10.mp4">21-11-04-12-10.mp4</a>                                 04-Nov-2021 10:11              818846
<a href="21-11-04-12-11.mp4">21-11-04-12-11.mp4</a>                                 04-Nov-2021 10:12              688041
<a href="21-11-04-12-13.mp4">21-11-04-12-13.mp4</a>                                 04-Nov-2021 10:14              752951
<a href="21-11-04-12-19.mp4">21-11-04-12-19.mp4</a>                                 04-Nov-2021 10:20              786564
</pre><hr></body>
</html>

I want to get all data in href like 21-11-04-12-13.mp4.

After searching for a while i have an idea is to convert html text to JSON object, is dowable ? Because this page is not fix and data inside will change.

How i can solve this problem?

CodePudding user response:

You can do it with a regex, just do it like :

let tab = [];
tab = str.match(/(?<=href="). ?(?=")/gm);
//remove the first element because it's "../" (line 4 of your example)
tab = tab.slice(1);

The result will be an array with all the href you want

CodePudding user response:

You can use a RegExp to extract out text values matching certain patterns and then return those values inside a JS object.

The regular-expression pattern >(\d\d\-\d\d\-\d\d\-\d\d\-\d\d\.mp4)< will match the #text node contents of the <a></a> elements - though you may prefer to match the href="" attribute instead, like so:

href="(\d\d\-\d\d\-\d\d\-\d\d\-\d\d\.mp4)">


function extractFileNames( html: string ): string[] {
    
    const reg = />\d\d\-\d\d\-\d\d\-\d\d\-\d\d\.mp4</ig;
    
    const matches: string[] = [];

    let match: RegExpExecArray | null = null;
    while( ( match = reg.exec( html ) ) !== null ) {
        
        matches.push( match[0] );
    }

    return matches;
}

Here's a JS demo (TypeScript type annotations commented-out):

const sampleInput = `

<a href="21-10-27-14-00.mp4">21-10-27-14-00.mp4</a>                                 27-Oct-2021 11:01              833959
<a href="21-10-27-14-01.mp4">21-10-27-14-01.mp4</a>                                 27-Oct-2021 11:02              823081
<a href="21-10-27-14-04.mp4">21-10-27-14-04.mp4</a>                                 27-Oct-2021 11:05              823855
<a href="21-10-27-14-05.mp4">21-10-27-14-05.mp4</a>                                 27-Oct-2021 11:06              827716
<a href="21-10-27-14-06.mp4">21-10-27-14-06.mp4</a>                                 27-Oct-2021 11:07              830899
<a href="21-10-27-14-07.mp4">21-10-27-14-07.mp4</a>                                 27-Oct-2021 11:08              831162
<a href="21-11-02-14-45.mp4">21-11-02-14-45.mp4</a>                                 02-Nov-2021 12:46              765390
<a href="21-11-02-14-46.mp4">21-11-02-14-46.mp4</a>`
;


function extractFileNames( html/*: string */ )/*: string[] */ {
    
    const reg = />(\d\d\-\d\d\-\d\d\-\d\d\-\d\d\.mp4)</ig;
    
    const matches/*: string[]*/ = [];

    let match/*: RegExpExecArray | null*/ = null;
    while( ( match = reg.exec( html ) ) !== null ) {
        
        const entireMatch       = match[0];
        const justFileNameGroup = match[1];
        matches.push( justFileNameGroup );
    }

    return matches;
}

function doTheThing() {
    
    const extracted = extractFileNames( sampleInput );
  //console.log (extracted );

  const ul = document.getElementById( 'output' );
  for( const m of extracted ) {
      const li = document.createElement( 'li' );
      li.textContent = m;
      ul.appendChild( li );
  }


}
<button type="button" onclick="doTheThing()">Click me</button>

<ul id="output"></ul>
<iframe name="sif1" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>

CodePudding user response:

First you can convert your response text into Document Then you can use DOM API's to retrieve the href values

let doc =new DOMParser().parseFromString(result, "text/xml")
const anchors= doc.getElementsByTagName('a');

let urlList =[];
for (let anchor of anchors) {
  urlList.push(cell.getAttribute('href');
}
  • Related