Here I am trying to retrieve all the internship offers(stage in French) from LinkedIn.
If I do the same on a simple website and change my search parameters, it works. I cannot see what I am doing wrong.
const PORT = 8000
const express = require('express')
const axios = require('axios')
const cheerio = require('cheerio')
const app = express()
const articles = []
app.get('/', (req, res) => {
res.json('Scraping')
})
app.get('/news', (req, res) => {
axios.get('https://www.linkedin.com/jobs/')
.then((response) => {
const html = response.data
const $ = cheerio.load(html)
$('a:contains("stage")', html).each(function () {
const title = $(this).text()
const url = $(this).attr('href')
articles.push({
title,
url
})
})
res.json(articles)
}).catch((err) => console.log(err))
})
app.listen(PORT, () => console.log('server running on PORT ${8000}'))
CodePudding user response:
I was able to scrape for data engineers with this : remove " , html" and replaced it like that : $('a:contains("Data")').each. Made a console log on the http://localhost:8000/news. And it printed some URLs.
const PORT = 8000
const express = require('express')
const axios = require('axios')
const cheerio = require('cheerio')
const app = express()
const articles = []
app.get('/', (req, res) => {
res.json('Scraping')
})
app.get('/news', (req, res) => {
axios.get('https://www.linkedin.com/jobs/')
.then((response) => {
const html = response.data
const $ = cheerio.load(html)
// Find <a> elements with a title attribute that contains the word "stage"
$('a:contains("Data")').each(function () {
const title = $(this).text()
const url = $(this).attr('href')
articles.push({
title,
url
})
})
console.log(articles)
res.json(articles)
}).catch((err) => console.log(err))
})
app.listen(PORT, () => console.log('server running on PORT ${8000}'))