I'm trying to scrape a few pieces of text from this URL https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate
So far I have one piece working, but I can't figure out how to get the other two.
I'm trying to get the original text (eng), the Chinese character (character) and the Pinyin (cn)
Here is what I have - the eng is working but I can't get the cn or character to work.
import fetch from "node-fetch"
import cheerio from "cheerio"
const getRawData = (URL) => {
return fetch(URL)
.then((response) => response.text())
.then((data) => {
return data;
});
};
const URL = "https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate";
const getData = async () => {
const data = await getRawData(URL);
const $ = cheerio.load(data);
const eng = $("div.D5aOJc")[0].children[0].data
const cn = $("div.kO6q6e")[0].data
const character = $("span.Q4iAWc").data
console.log(eng, cn, character);
};
getData();
CodePudding user response:
As Sandun Isuru Niraj said, you need a Puppeteer. Here is the solution to your problem:
const puppeteer = require("puppeteer");
const queryup = "Hello";
const query = encodeURI(queryup);
async function GTranslate(searchQuery) {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(`https://translate.google.ca/?sl=en&tl=zh-TW&text=${searchQuery}&op=translate`);
await page.waitForSelector(".Q4iAWc");
const eng = await page.$eval(".D5aOJc.Hapztf", (el) => el.textContent);
const cn = await page.$eval(".dePhmb .kO6q6e", (el) => el.textContent);
const character = await page.$eval(".Q4iAWc", (el) => el.textContent);
console.log(eng, cn, character);
await browser.close();
}
GTranslate(query);
Output:
Hello Nǐ hǎo 你好