Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 3 additions & 60 deletions app/serializers/article.js
Original file line number Diff line number Diff line change
@@ -1,64 +1,7 @@
import DS from 'ember-data';
import HNDocument from '../utils/hn-document';

// Readibility Parser returns some really strange HTML from time-to-time (non-
// matching open/close tags, body tags in random spots, etc), so we need to
// to "clean" through an iframe.

var iframe = document.createElement('iframe');

iframe.setAttribute('src', 'about:blank');
iframe.setAttribute('style', 'display:none');

function removeElement(el) {
el.parentNode.removeChild(el);
}

function cleanElement(el) {
switch (el.tagName) {
case 'A':
/* jshint scripturl: true */
if (el.hasAttribute('src') && el.getAttribute('src').indexOf('javascript:')) {
/* jshint scripturl: false */
removeElement(el);
return;
} else {
el.setAttribute('target', '_blank');
break;
}
}

el.removeAttribute('id');
el.removeAttribute('name');
el.removeAttribute('class');
el.removeAttribute('style');

var children = [].slice.call( el.children );

for (let i=0; i < children.length; i++) {
cleanElement(children[i]);
}
}

function cleanMarkup(html) {
document.body.appendChild(iframe);

var doc = iframe.contentDocument;

doc.open();
doc.write(`<html><head></head><body>${ html }</body></html>`);
doc.close();

cleanElement(doc.body);

html = doc.body.innerHTML;

doc.open();
doc.close();

document.body.removeChild(iframe);

return html;
}
const hnDocumentInstance = new HNDocument();

export default DS.RESTSerializer.extend({

Expand All @@ -69,7 +12,7 @@ export default DS.RESTSerializer.extend({
// Sucessful
title: payload.title || null,
author: payload.author || null,
body: cleanMarkup(payload.content) || null,
body: hnDocumentInstance.cleanMarkup(payload.content) || null,

// Error
error: payload.error || false,
Expand Down
214 changes: 214 additions & 0 deletions app/utils/hn-document.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/**
* @module Utils
*/

/**
* Wraps a DOMElement and knows how to _clean_ it. This includes removal if
* unsafe and recursive cleaning of child nodes.
*
* This is a base interface and suitable for most elements. More spesific
* elements can have their own version. To facilitate this use the `factory()`
* method for construction which will return the correct class type based on
* the element `tagName`.
*
* @class HNElement
* @constructor
* @private
*/
class HNElement {
constructor(el) {
this.el = el;
}

/**
* The DOM element this class wraps.
* @property {DOMElement} el
*/

/**
* Cleans the element's atributes and recursivly clean its children.
* @method clean
* @chainable
*/
clean() {
this.el.removeAttribute('id');
this.el.removeAttribute('name');
this.el.removeAttribute('class');
this.el.removeAttribute('style');
this.children.forEach(el => HNElement.factory(el).clean());
return this;
}

/**
* The children of this element as an Array.
* @property {Array} children
*/
get children() {
return [].slice.call(this.el.children);
}

/**
* Is the element unsafe for consumption?
* @property {Boolean} isUnsafe
* @default false
*/
get isUnsafe() {
return false;
}

/**
* Remove this element from its parent.
* @method remove
* @chainable
*/
remove() {
this.el.parentNode.removeChild(this.el);
return this;
}

/**
* Factory to determine which type of HNElement should wrap the DOMElement.
* @static
* @method factory
* @param {DOMElement} el the DOM element to wrap
* @return {HNElement} the correct HNElement implementation based on the
* element type
*/
static factory(el) {
const ElementClass = ELEMENT_CLASSES[el.tagName] || ELEMENT_CLASSES._default;
return new ElementClass(el);
}
}

/**
* HNElement for `<a>` tags
* @class HNLinkElement
* @extends HNElement
* @constructor
* @private
*/
class HNLinkElement extends HNElement {
clean() {
if (this.isUnsafe) {
return this.remove();
}
this.el.setAttribute('target', '_blank');
return super();
}
get isUnsafe() {
/* jshint scripturl: true */
return this.el.hasAttribute('href') &&
this.el.getAttribute('href').indexOf('javascript:') >= 0;
/* jshint scripturl: false */
}
}

const ELEMENT_CLASSES = {
A: HNLinkElement,
_default: HNElement
};

/**
* Cleans up raw HTML from HN.
*
* Readibility Parser returns some really strange HTML from time-to-time (non-
* matching open/close tags, body tags in random spots, etc), so we need to
* to "clean" through an iframe.
*
* @class HNDocument
* @constructor
*/
export default class HNDocument {
constructor() {
this.iframe = document.createElement('iframe');
this.iframe.setAttribute('src', 'about:blank');
this.iframe.setAttribute('style', 'display:none');
}

/**
* @property {DOMElement} iframe
* @private
*/

/**
* @property {DOMDocument} doc
* @private
*/

/**
* Attach the iframe to the document and assign a contentDocument.
* @method initDocument
* @chainable
* @private
*/
initDocument() {
document.body.appendChild(this.iframe);
this.doc = this.iframe.contentDocument;
return this;
}

/**
* Write raw HTML to iframe document.
* @method writeDocument
* @param {String} html the raw HTML
* @chainable
* @private
*/
writeDocument(html) {
this.doc.open();
this.doc.write(`<html><head></head><body>${ html }</body></html>`);
this.doc.close();
return this;
}

/**
* Empty the iframe document (helps garbage collection).
* @method emptyDocument
* @chainable
* @private
*/
emptyDocument() {
this.doc.open();
this.doc.close();
return this;
}

/**
* Remove iframe from main document.
* @method destroyDocument
* @chainable
* @private
*/
destroyDocument() {
this.doc = null;
document.body.removeChild(this.iframe);
return this;
}

/**
* Recursivly clean all the elements in the iframe document.
* @method cleanElements
* @chainable
* @private
*/
cleanElements() {
HNElement.factory(this.doc.body).clean();
return this;
}

/**
* Clean and sanitize raw html.
* @method cleanMarkup
* @param {String} html the raw HTML
* @return {String} sanitized HTML
*/
cleanMarkup(html) {
this.initDocument()
.writeDocument(html)
.cleanElements();
const cleanHtml = this.doc.body.innerHTML;
this.emptyDocument()
.destroyDocument();
return cleanHtml;
}
}