import HTMLParser from 'uni-helpers/html-parser'
function removeDOCTYPE (html) {
return html
.replace(/<\?xml.*\?>\n/, '')
.replace(/\n/, '')
.replace(/\n/, '')
}
function parseAttrs (attrs) {
return attrs.reduce(function (pre, attr) {
let value = attr.value
const name = attr.name
if (value.match(/ /) && ['style', 'src'].indexOf(name) === -1) {
value = value.split(' ')
}
if (pre[name]) {
if (Array.isArray(pre[name])) {
pre[name].push(value)
} else {
pre[name] = [pre[name], value]
}
} else {
pre[name] = value
}
return pre
}, {})
}
export default function parseHtml (html) {
html = removeDOCTYPE(html)
const stacks = []
const results = {
node: 'root',
children: []
}
HTMLParser(html, {
start: function (tag, attrs, unary) {
const node = {
name: tag
}
if (attrs.length !== 0) {
node.attrs = parseAttrs(attrs)
}
if (unary) {
const parent = stacks[0] || results
if (!parent.children) {
parent.children = []
}
parent.children.push(node)
} else {
stacks.unshift(node)
}
},
end: function (tag) {
const node = stacks.shift()
if (node.name !== tag) console.error('invalid state: mismatch end tag')
if (stacks.length === 0) {
results.children.push(node)
} else {
const parent = stacks[0]
if (!parent.children) {
parent.children = []
}
parent.children.push(node)
}
},
chars: function (text) {
const node = {
type: 'text',
text: text
}
if (stacks.length === 0) {
results.children.push(node)
} else {
const parent = stacks[0]
if (!parent.children) {
parent.children = []
}
parent.children.push(node)
}
},
comment: function (text) {
const node = {
node: 'comment',
text: text
}
const parent = stacks[0]
if (!parent.children) {
parent.children = []
}
parent.children.push(node)
}
})
return results.children
}