高级前端dom【Q411】如何找到当前页面出现次数最多的HTML标签

如何找到当前页面出现次数最多的HTML标签

Issue 欢迎在 Gtihub Issue 中回答此问题: Issue 418

Author 回答者: shfshanyue

这是一道前端基础与编程功底具备的面试题:

  • 如果你前端基础强会了解 document.querySelectorAll(*) 能够列出页面内所有标签
  • 如果你编程能力强能够用递归/正则快速实现同等的效果

有三种 API 可以列出页面所有标签:

  1. document.querySelectorAll('*'),标准规范实现
  2. $$('*'),devtools 实现
  3. document.all,非标准规范实现
> document.querySelectorAll('*')
< NodeList(593) [html, head, meta, meta, meta, meta, meta, meta, meta, title, link#favicon, link, link#MainCss, link#mobile-style, link, link, link, script, script, script, script, script, script, script, link, script, link, link, script, input#_w_brink, body, a, div#home, div#header, div#blogTitle, a#lnkBlogLogo, img#blogLogo, h1, a#Header1_HeaderTitle.headermaintitle.HeaderMainTitle, h2, div#navigator, ul#navList, li, a#blog_nav_sitehome.menu, li, a#blog_nav_myhome.menu, li, a#blog_nav_newpost.menu, li, a#blog_nav_contact.menu, li, a#blog_nav_rss.menu, li, a#blog_nav_admin.menu, div.blogStats, span#stats_post_count, span#stats_article_count, span#stats-comment_count, div#main, div#mainContent, div.forFlow, div#post_detail, div#topics, div.post, h1.postTitle, a#cb_post_title_url.postTitle2.vertical-middle, span, div.clear, div.postBody, div#cnblogs_post_body.blogpost-body, p, p, strong, p, p, p, strong, div.cnblogs_code, pre, span, span, span, span, span, p, span, strong, pre, strong, span, strong, br, br, br, div.cnblogs_code, pre, span, span, p, p, …]
[099]
[100199]
[200299]
[300399]
[400499]
[500592]
__proto__: NodeList

使用 document.querySelectorAll 实现如下

// 实现一个 maxBy 方便找出出现次数最多的 HTML 标签
const maxBy = (list, keyBy) =>
  list.reduce((x, y) => (keyBy(x) > keyBy(y) ? x : y));
 
function getFrequentTag() {
  const tags = [...document.querySelectorAll("*")]
    .map((x) => x.tagName)
    .reduce((o, tag) => {
      o[tag] = o[tag] ? o[tag] + 1 : 1;
      return o;
    }, {});
  return maxBy(Object.entries(tags), (tag) => tag[1]);
}

使用 element.children 递归迭代如下 (最终结果多一个 document)

function getAllTags(el = document) {
  const children = Array.from(el.children).reduce(
    (x, y) => [...x, y.tagName, ...getAllTags(y)],
    [],
  );
  return children;
}
 
// 或者通过 flatMap 实现
function getAllTags(el = document) {
  const children = Array.prototype.flatMap.call(el.children, (x) =>
    getAllTags(x),
  );
  return [el, ...children];
}

如果你已经快速答了上来,那么还有两道拓展的面试题在等着你

  1. 如何找到当前页面出现次数前三多的 HTML 标签
  2. 如过多个标签出现次数同样多,则取多个标签

Author 回答者: Harry3014

使用document.querySelectorAll实现如下(包括可能次数一样多的标签)

function getMostFrequentTag() {
  const counter = {};
 
  document.querySelectorAll("*").forEach((element) => {
    counter[element.tagName] = counter[element.tagName]
      ? counter[element.tagName] + 1
      : 1;
  });
 
  const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
    if (tag1[1] < tag2[1]) {
      return 1;
    }
    if (tag1[1] > tag2[1]) {
      return -1;
    }
    return 0;
  });
 
  const result = [];
  for (const tag of orderedTags) {
    if (tag[1] < orderedTags[0][1]) {
      break;
    }
    result.push(tag[0]);
  }
  return result;
}

Author 回答者: Harry3014

使用Element.children递归实现如下

function getMostFrequentTag() {
  const counter = {};
 
  const traversalElement = (parent) => {
    if (parent.tagName !== undefined) {
      counter[parent.tagName] = counter[parent.tagName]
        ? counter[parent.tagName] + 1
        : 1;
    }
    const children = parent.children;
    for (let i = 0, length = children.length; i < length; i++) {
      traversalElement(children[i]);
    }
  };
 
  traversalElement(document);
 
  const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
    if (tag1[1] < tag2[1]) {
      return 1;
    }
    if (tag1[1] > tag2[1]) {
      return -1;
    }
    return 0;
  });
 
  const result = [];
  for (const tag of orderedTags) {
    if (tag[1] < orderedTags[0][1]) {
      break;
    }
    result.push(tag[0]);
  }
  return result;
}

Author 回答者: hwb2017

codepen demo

const allElements = document.querySelectorAll("*");
const elementFrequency = Array.from(allElements).reduce((a, b) => {
  a[b.tagName] = a[b.tagName] ? a[b.tagName] + 1 : 1;
  return a;
}, {});
console.log(elementFrequency);
 
const sortedElementFrequency = Object.entries(elementFrequency).sort(
  (a, b) => b[1] - a[1],
);
console.log(sortedElementFrequency);
 
const copiedElementFrequency = JSON.parse(
  JSON.stringify(sortedElementFrequency),
);
const mergedElementFrequency = copiedElementFrequency.reduce((a, b) => {
  if (a.length === 0) {
    a.push(b);
    return a;
  }
  let lastItem = a[a.length - 1];
  if (lastItem[1] === b[1]) {
    // if (Array.isArray(lastItem[0])) {
    //   lastItem[0].push(b[0])
    // } else {
    //   lastItem[0] = [lastItem[0], b[0]]
    // }
    lastItem[0] = Array.isArray(lastItem[0])
      ? lastItem[0].concat([b[0]])
      : [lastItem[0], b[0]];
  } else {
    a.push(b);
  }
  return a;
}, []);
console.log(mergedElementFrequency);

Author 回答者: ethanlamm

// 获取当前页面所有HTML标签
const allelements = document.querySelectorAll("*");
 
function findMost(arr) {
  let temp = {};
  let maxNum = 0;
  let maxEle = null;
  for (let i = 0; i < arr.length; i++) {
    let ele = arr[i].tagName; // 标签名
    temp[ele] === undefined ? (temp[ele] = 1) : temp[ele]++;
    if (temp[ele] > maxNum) {
      maxNum = temp[ele];
      maxEle = ele;
    }
  }
  // 应考虑次数相同的情况
  let eleArry = [];
  for (let key in temp) {
    if (temp[key] === maxNum) {
      eleArry.push(key);
    }
  }
  return { eleArry, maxNum };
}
let result = findMost(allelements);
console.log(result);

Author 回答者: hatedMe

// 利用hash
const map = new Map();
[...document.querySelectorAll("*")].forEach((item) => {
  const tagName = item.tagName.toLowerCase();
  map.set(tagName, map.has(tagName) ? map.get(tagName) + 1 : 1);
});

Author 回答者: Ghaining

function findMostEle() {
  const els = document.querySelectorAll("*");
  const map = new Map();
  for (let i = 0; i < els.length; i++) {
    const el = els[i];
    const tag = el.tagName;
    if (map.has(tag)) {
      map.set(tag, map.get(tag) + 1);
    } else {
      map.set(tag, 1);
    }
  }
  return [...map].sort((a, b) => b[1] - a[1]);
}

Author 回答者: shfshanyue

@Ghaining markdown 没有标记 javascript 语言呀

Author 回答者: 601odd

文中有个代码有错 使用 element.children 递归迭代如下 (最终结果多一个 document) 修改如下:

function getAllTags(el = document) {
  const children = Array.from(el.children).reduce(
    (x, y) => [...x, y.tagName, ...getAllTags(y)],
    [],
  );
  return children;
}

Author 回答者: shfshanyue

@601odd 已修复

Author 回答者: justorez

// 找到当前页面出现次数前几位的 HTML 标签
// 如果多个标签出现次数同样多,则取多个标签
function getMaxFreguentTag(top = 1) {
  const tags = [...document.querySelectorAll("*")]
    .map((el) => el.tagName)
    .reduce((res, tag) => {
      res[tag] = res[tag] ? res[tag] + 1 : 1;
      return res;
    }, {});
 
  // 利用数组把标签排序
  const sortedTags = [];
  for (const [k, v] of Object.entries(tags)) {
    sortedTags[v] ||= [];
    sortedTags[v].push(k);
  }
 
  // 数组末尾 top 个非空元素,即所要的结果
  const res = [];
  const len = Math.min(top, sortedTags.length);
  for (let i = 0; i < len; ) {
    const tag = sortedTags.pop();
    if (tag) {
      res.push(...tag); // 包含同频次标签
      i++;
    }
  }
  return res;
}

Author 回答者: Si3ver

Object.entries($$('*').map(it => it.tagName.toLowerCase()).reduce((cntArr, tag) => { cntArr[tag] = cntArr[tag] ? cntArr[tag] + 1 : 1; return cntArr }, {})).reduce((x, y) => x[1] > y[1] ? x : y)