如何找到当前页面出现次数最多的HTML标签
Issue 欢迎在 Gtihub Issue 中回答此问题: Issue 418
Author 回答者: shfshanyue
这是一道前端基础与编程功底具备的面试题:
- 如果你前端基础强会了解
document.querySelectorAll(*)
能够列出页面内所有标签 - 如果你编程能力强能够用递归/正则快速实现同等的效果
有三种 API 可以列出页面所有标签:
document.querySelectorAll('*')
,标准规范实现$$('*')
,devtools 实现document.all
,非标准规范实现
> document.querySelectorAll('*')
< NodeList(593) [html, head, meta, meta, meta, meta, meta, meta, meta, title, link#favicon, link, link#MainCss, link#mobile-style, link, link, link, script, script, script, script, script, script, script, link, script, link, link, script, input#_w_brink, body, a, div#home, div#header, div#blogTitle, a#lnkBlogLogo, img#blogLogo, h1, a#Header1_HeaderTitle.headermaintitle.HeaderMainTitle, h2, div#navigator, ul#navList, li, a#blog_nav_sitehome.menu, li, a#blog_nav_myhome.menu, li, a#blog_nav_newpost.menu, li, a#blog_nav_contact.menu, li, a#blog_nav_rss.menu, li, a#blog_nav_admin.menu, div.blogStats, span#stats_post_count, span#stats_article_count, span#stats-comment_count, div#main, div#mainContent, div.forFlow, div#post_detail, div#topics, div.post, h1.postTitle, a#cb_post_title_url.postTitle2.vertical-middle, span, div.clear, div.postBody, div#cnblogs_post_body.blogpost-body, p, p, strong, p, p, p, strong, div.cnblogs_code, pre, span, span, span, span, span, p, span, strong, pre, strong, span, strong, br, br, br, div.cnblogs_code, pre, span, span, p, p, …]
[0 … 99]
[100 … 199]
[200 … 299]
[300 … 399]
[400 … 499]
[500 … 592]
__proto__: NodeList
使用 document.querySelectorAll
实现如下
// 实现一个 maxBy 方便找出出现次数最多的 HTML 标签
const maxBy = (list, keyBy) =>
list.reduce((x, y) => (keyBy(x) > keyBy(y) ? x : y));
function getFrequentTag() {
const tags = [...document.querySelectorAll("*")]
.map((x) => x.tagName)
.reduce((o, tag) => {
o[tag] = o[tag] ? o[tag] + 1 : 1;
return o;
}, {});
return maxBy(Object.entries(tags), (tag) => tag[1]);
}
使用 element.children
递归迭代如下 (最终结果多一个 document)
function getAllTags(el = document) {
const children = Array.from(el.children).reduce(
(x, y) => [...x, y.tagName, ...getAllTags(y)],
[],
);
return children;
}
// 或者通过 flatMap 实现
function getAllTags(el = document) {
const children = Array.prototype.flatMap.call(el.children, (x) =>
getAllTags(x),
);
return [el, ...children];
}
如果你已经快速答了上来,那么还有两道拓展的面试题在等着你
- 如何找到当前页面出现次数前三多的 HTML 标签
- 如过多个标签出现次数同样多,则取多个标签
Author 回答者: Harry3014
使用document.querySelectorAll
实现如下(包括可能次数一样多的标签)
function getMostFrequentTag() {
const counter = {};
document.querySelectorAll("*").forEach((element) => {
counter[element.tagName] = counter[element.tagName]
? counter[element.tagName] + 1
: 1;
});
const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
if (tag1[1] < tag2[1]) {
return 1;
}
if (tag1[1] > tag2[1]) {
return -1;
}
return 0;
});
const result = [];
for (const tag of orderedTags) {
if (tag[1] < orderedTags[0][1]) {
break;
}
result.push(tag[0]);
}
return result;
}
Author 回答者: Harry3014
使用Element.children
递归实现如下
function getMostFrequentTag() {
const counter = {};
const traversalElement = (parent) => {
if (parent.tagName !== undefined) {
counter[parent.tagName] = counter[parent.tagName]
? counter[parent.tagName] + 1
: 1;
}
const children = parent.children;
for (let i = 0, length = children.length; i < length; i++) {
traversalElement(children[i]);
}
};
traversalElement(document);
const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
if (tag1[1] < tag2[1]) {
return 1;
}
if (tag1[1] > tag2[1]) {
return -1;
}
return 0;
});
const result = [];
for (const tag of orderedTags) {
if (tag[1] < orderedTags[0][1]) {
break;
}
result.push(tag[0]);
}
return result;
}
Author 回答者: hwb2017
const allElements = document.querySelectorAll("*");
const elementFrequency = Array.from(allElements).reduce((a, b) => {
a[b.tagName] = a[b.tagName] ? a[b.tagName] + 1 : 1;
return a;
}, {});
console.log(elementFrequency);
const sortedElementFrequency = Object.entries(elementFrequency).sort(
(a, b) => b[1] - a[1],
);
console.log(sortedElementFrequency);
const copiedElementFrequency = JSON.parse(
JSON.stringify(sortedElementFrequency),
);
const mergedElementFrequency = copiedElementFrequency.reduce((a, b) => {
if (a.length === 0) {
a.push(b);
return a;
}
let lastItem = a[a.length - 1];
if (lastItem[1] === b[1]) {
// if (Array.isArray(lastItem[0])) {
// lastItem[0].push(b[0])
// } else {
// lastItem[0] = [lastItem[0], b[0]]
// }
lastItem[0] = Array.isArray(lastItem[0])
? lastItem[0].concat([b[0]])
: [lastItem[0], b[0]];
} else {
a.push(b);
}
return a;
}, []);
console.log(mergedElementFrequency);
Author 回答者: ethanlamm
// 获取当前页面所有HTML标签
const allelements = document.querySelectorAll("*");
function findMost(arr) {
let temp = {};
let maxNum = 0;
let maxEle = null;
for (let i = 0; i < arr.length; i++) {
let ele = arr[i].tagName; // 标签名
temp[ele] === undefined ? (temp[ele] = 1) : temp[ele]++;
if (temp[ele] > maxNum) {
maxNum = temp[ele];
maxEle = ele;
}
}
// 应考虑次数相同的情况
let eleArry = [];
for (let key in temp) {
if (temp[key] === maxNum) {
eleArry.push(key);
}
}
return { eleArry, maxNum };
}
let result = findMost(allelements);
console.log(result);
Author 回答者: hatedMe
// 利用hash
const map = new Map();
[...document.querySelectorAll("*")].forEach((item) => {
const tagName = item.tagName.toLowerCase();
map.set(tagName, map.has(tagName) ? map.get(tagName) + 1 : 1);
});
Author 回答者: Ghaining
function findMostEle() {
const els = document.querySelectorAll("*");
const map = new Map();
for (let i = 0; i < els.length; i++) {
const el = els[i];
const tag = el.tagName;
if (map.has(tag)) {
map.set(tag, map.get(tag) + 1);
} else {
map.set(tag, 1);
}
}
return [...map].sort((a, b) => b[1] - a[1]);
}
Author 回答者: shfshanyue
@Ghaining markdown 没有标记 javascript 语言呀
Author 回答者: 601odd
文中有个代码有错 使用 element.children 递归迭代如下 (最终结果多一个 document)
修改如下:
function getAllTags(el = document) {
const children = Array.from(el.children).reduce(
(x, y) => [...x, y.tagName, ...getAllTags(y)],
[],
);
return children;
}
Author 回答者: shfshanyue
@601odd 已修复
Author 回答者: justorez
// 找到当前页面出现次数前几位的 HTML 标签
// 如果多个标签出现次数同样多,则取多个标签
function getMaxFreguentTag(top = 1) {
const tags = [...document.querySelectorAll("*")]
.map((el) => el.tagName)
.reduce((res, tag) => {
res[tag] = res[tag] ? res[tag] + 1 : 1;
return res;
}, {});
// 利用数组把标签排序
const sortedTags = [];
for (const [k, v] of Object.entries(tags)) {
sortedTags[v] ||= [];
sortedTags[v].push(k);
}
// 数组末尾 top 个非空元素,即所要的结果
const res = [];
const len = Math.min(top, sortedTags.length);
for (let i = 0; i < len; ) {
const tag = sortedTags.pop();
if (tag) {
res.push(...tag); // 包含同频次标签
i++;
}
}
return res;
}
Author 回答者: Si3ver
Object.entries($$('*').map(it => it.tagName.toLowerCase()).reduce((cntArr, tag) => { cntArr[tag] = cntArr[tag] ? cntArr[tag] + 1 : 1; return cntArr }, {})).reduce((x, y) => x[1] > y[1] ? x : y)