Extract Title Terms from Google

12/13/2023 - 12:08 AM

Extract Title Terms from Google

// Edit me ///////////////////////////////////////////////
let search = "Google * and SEO";
// Edit me ///////////////////////////////////////////////
 
let searchQ = search.replace(' ', '+').trim();
let searchUrl = `https://www.google.com/search?q=%22${searchQ}%22&num=1000`;
let stopwordsUrl = "https://gist.githubusercontent.com/sebleier/554280/raw/7e0e4a1ce04c2bb7bd41089c9821dbcf6d0c786c/NLTK's%20list%20of%20english%20stopwords";
let stopWords = new Set(); // Define stopWords in an accessible scope
 
// Fetch stop words
fetch(stopwordsUrl)
  .then(response => {
    if (!response.ok) throw new Error('Network response was not ok');
    return response.text();
  })
  .then(stopwordsData => {
    stopwordsData.split(/\n/).forEach(word => stopWords.add(word.trim()));
    return fetch(searchUrl); // Proceed to fetch the search results
  })
  .then(response => {
    if (!response.ok) throw new Error('Network response was not ok');
    return response.text();
  })
  .then(data => {
    let _htmlDoc = new DOMParser().parseFromString(data, "text/html");
    const bottomPercentile = 0.98;
 
    // Process and filter h3 text content
    let processedTexts = Array.from(_htmlDoc.querySelectorAll('h3')).map(h3 => 
      h3.textContent.trim().toLowerCase().replace(/[^\w\s]|_/g, "")
      .split(/\s+/).filter(word => !stopWords.has(word))
    );
 
    // Count word frequency
    let wordCounts = processedTexts.flatMap(words => words).reduce((acc, word) => {
        acc[word] = (acc[word] || 0) + 1;
        return acc;
    }, {});
 
    // Determine the frequency threshold
    let sortedCounts = Object.values(wordCounts).sort((a, b) => a - b);
    let thresholdIndex = Math.floor(sortedCounts.length * bottomPercentile);
    let thresholdValue = sortedCounts[thresholdIndex];
 
    // Filter out frequent words
    let frequentWords = new Set(Object.keys(wordCounts).filter(word => wordCounts[word] > thresholdValue));
 
    // Reconstruct text without frequent words
    let reconstructedText = new Set(processedTexts
      .map(words => words.filter(word => !frequentWords.has(word)).join(' '))
      .filter(text => text.split(' ').length > 1));
 
    reconstructedText.forEach(text => console.log(text));
  })
  .catch(error => console.error('Fetch error:', error));

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

lock Extract Title Terms from Google

Extract Title Terms from Google