// Edit me ///////////////////////////////////////////////
let search = "Google * and SEO";
// Edit me ///////////////////////////////////////////////
let searchQ = search.replace(' ', '+').trim();
let searchUrl = `https://www.google.com/search?q=%22${searchQ}%22&num=1000`;
let stopwordsUrl = "https://gist.githubusercontent.com/sebleier/554280/raw/7e0e4a1ce04c2bb7bd41089c9821dbcf6d0c786c/NLTK's%20list%20of%20english%20stopwords";
let stopWords = new Set(); // Define stopWords in an accessible scope
// Fetch stop words
fetch(stopwordsUrl)
.then(response => {
if (!response.ok) throw new Error('Network response was not ok');
return response.text();
})
.then(stopwordsData => {
stopwordsData.split(/\n/).forEach(word => stopWords.add(word.trim()));
return fetch(searchUrl); // Proceed to fetch the search results
})
.then(response => {
if (!response.ok) throw new Error('Network response was not ok');
return response.text();
})
.then(data => {
let _htmlDoc = new DOMParser().parseFromString(data, "text/html");
const bottomPercentile = 0.98;
// Process and filter h3 text content
let processedTexts = Array.from(_htmlDoc.querySelectorAll('h3')).map(h3 =>
h3.textContent.trim().toLowerCase().replace(/[^\w\s]|_/g, "")
.split(/\s+/).filter(word => !stopWords.has(word))
);
// Count word frequency
let wordCounts = processedTexts.flatMap(words => words).reduce((acc, word) => {
acc[word] = (acc[word] || 0) + 1;
return acc;
}, {});
// Determine the frequency threshold
let sortedCounts = Object.values(wordCounts).sort((a, b) => a - b);
let thresholdIndex = Math.floor(sortedCounts.length * bottomPercentile);
let thresholdValue = sortedCounts[thresholdIndex];
// Filter out frequent words
let frequentWords = new Set(Object.keys(wordCounts).filter(word => wordCounts[word] > thresholdValue));
// Reconstruct text without frequent words
let reconstructedText = new Set(processedTexts
.map(words => words.filter(word => !frequentWords.has(word)).join(' '))
.filter(text => text.split(' ').length > 1));
reconstructedText.forEach(text => console.log(text));
})
.catch(error => console.error('Fetch error:', error));