` is the last part of the URL of the Browserscope anchor in your JSPerf test. Something like `"agt1YS1wcm9maWxlcnINCxIEVGVzdBjlm_EQDA"` in the URL `http://www.browserscope.org/user/tests/table/agt1YS1wcm9maWxlcnINCxIEVGVzdBjlm_EQDA`
-
-
+ * Drawings:
+ * not supported - there seems to be no API function to export a drawing as a rasterized or vector image.
+ * Equations:
+ * Equations are converted to LaTex equations and surrounded by ``$`` signs
+ * Table of contents:
+ * Is replaced by `[[TOC]]`
+ * Horizontal line:
+ * Inserts a `---`
+ * Header/Footer:
+ * Extracts text with all formattings and inserts it at the top and bottom of the markdown document, seperated by `---`.
+ * Tables:
+ * Converted to Markdown tables following GitHub Markdown syntax. Formatting within cells gets transferred.
+ * Source code:
+ * Fenced code blocks are started by three back-ticks and a string and ended by three back-ticks. If such a fenced code block is detected, single line break is used within it.
## CONTRIBUTORS
* Renato Mangini - [G+](//google.com/+renatomangini) - [Github](//github.com/mangini)
* Ed Bacher - [G+](//plus.google.com/106923847899206957842) - [Github](//github.com/evbacher)
+* Andreas Wolke - [G+](//plus.google.com/+AndreasWolke) - [Github](//github.com/jacksonicson)
## LICENSE
diff --git a/converttomarkdown.gapps b/converttomarkdown.gapps
index b6344de..941a1d5 100644
--- a/converttomarkdown.gapps
+++ b/converttomarkdown.gapps
@@ -1,289 +1,644 @@
-/*
-Usage:
- Adding this script to your doc:
- - Tools > Script Manager > New
- - Select "Blank Project", then paste this code in and save.
- Running the script:
- - Tools > Script Manager
- - Select "ConvertToMarkdown" function.
- - Click Run button.
- - Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]...".
-*/
+// Open handler to add Menu
+function onOpen(e) {
+ var ui = DocumentApp.getUi();
+
+ if (e && e.authMode == ScriptApp.AuthMode.NONE) {
+ ui.createMenu('Markdown')
+ .addItem('Latex Equation', 'ConvertEquation')
+ .addToUi();
+ } else {
+ ui.createMenu('Markdown')
+ .addItem('Export File', 'ConvertToMarkdownFile')
+ .addItem('Export Email', 'ConvertToMarkdownEmail')
+ .addItem('Latex Equation', 'ConvertEquation')
+ .addToUi();
+ }
+}
+
+function onInstall(e) {
+ onOpen(e);
+}
-function ConvertToMarkdown() {
- var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren();
- var text = "";
- var inSrc = false;
- var inClass = false;
- var globalImageCounter = 0;
- var globalListCounters = {};
- // edbacher: added a variable for indent in src block. Let style sheet do margin.
- var srcIndent = "";
+function ConvertEquation() {
+ var element = DocumentApp.getActiveDocument().getCursor().getElement();
- var attachments = [];
+ // Scan upwards for an equation
+ while(element.getType() != DocumentApp.ElementType.EQUATION) {
+ if(element.getParent() == null)
+ break;
+
+ element = element.getParent();
+ }
- // Walk through all the child elements of the doc.
- for (var i = 0; i < numChildren; i++) {
- var child = DocumentApp.getActiveDocument().getActiveSection().getChild(i);
- var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters);
- globalImageCounter += (result && result.images) ? result.images.length : 0;
- if (result!==null) {
- if (result.sourcePretty==="start" && !inSrc) {
- inSrc=true;
- text+="\n";
- } else if (result.sourcePretty==="end" && inSrc) {
- inSrc=false;
- text+="\n\n";
- } else if (result.source==="start" && !inSrc) {
- inSrc=true;
- text+="\n";
- } else if (result.source==="end" && inSrc) {
- inSrc=false;
- text+="\n\n";
- } else if (result.inClass==="start" && !inClass) {
- inClass=true;
- text+="\n";
- } else if (result.inClass==="end" && inClass) {
- inClass=false;
- text+="
\n\n";
- } else if (inClass) {
- text+=result.text+"\n\n";
- } else if (inSrc) {
- text+=(srcIndent+escapeHTML(result.text)+"\n");
- } else if (result.text && result.text.length>0) {
- text+=result.text+"\n\n";
- }
-
- if (result.images && result.images.length>0) {
- for (var j=0; j/g, '>');
+// Convert current document to markdown and email it
+function ConvertToMarkdownEmail() {
+ // Convert to markdown
+ var convertedDoc = markdown();
+
+ // Add markdown document to attachments
+ convertedDoc.attachments.push({"fileName":DocumentApp.getActiveDocument().getName()+".md",
+ "mimeType": "text/plain", "content": convertedDoc.text});
+
+ // In some cases user email is not accessible
+ var mail = Session.getActiveUser().getEmail();
+ if(mail === '') {
+ DocumentApp.getUi().alert("Could not read your email address");
+ return;
+ }
+
+ // Send email with markdown document
+ MailApp.sendEmail(mail,
+ "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(),
+ "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+
+ "\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md\n",
+ { "attachments": convertedDoc.attachments });
}
-// Process each child element (not just paragraphs).
-function processParagraph(index, element, inSrc, imageCounter, listCounters) {
- // First, check for things that require no processing.
- if (element.getNumChildren()==0) {
- return null;
- }
- // Punt on TOC.
- if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
- return {"text": "[[TOC]]"};
+
+// Convert current document to file and save it to GDrive
+function ConvertToMarkdownFile() {
+ // Convert to markdwon
+ var convertedDoc = markdown();
+
+ // Create folder
+ var id = DocumentApp.getActiveDocument().getId();
+ var file = DocsList.getFileById(id);
+ var parents = file.getParents();
+
+ if(parents.length > 1) {
+ Logger.log("File has multiple parent directory. Script does not work in this case");
+ DocumentApp.getUi().alert("Document must not be in multiple directories");
+ return;
}
- // Set up for real results.
- var result = {};
- var pOut = "";
- var textElements = [];
- var imagePrefix = "image_";
-
- // Handle Table elements. Pretty simple-minded now, but works for simple tables.
- // Note that Markdown does not process within block-level HTML, so it probably
- // doesn't make sense to add markup within tables.
- if (element.getType() === DocumentApp.ElementType.TABLE) {
- textElements.push("\n");
- var nCols = element.getChild(0).getNumCells();
- for (var i = 0; i < element.getNumChildren(); i++) {
- textElements.push(" \n");
- // process this row
- for (var j = 0; j < nCols; j++) {
- textElements.push(" | " + element.getChild(i).getChild(j).getText() + " | \n");
- }
- textElements.push("
\n");
- }
- textElements.push("
\n");
+ if(parents.length == 0) {
+ DocumentApp.getUi().alert("Document has to be in a directory for the export");
+ return;
}
- // Process various types (ElementType).
- for (var i = 0; i < element.getNumChildren(); i++) {
- var t=element.getChild(i).getType();
+ // Use first parent
+ var parent = parents[0];
+
+ // Check if target folder exists
+ for(var folder in parent.getFolders()) {
+ folder = parent.getFolders()[folder];
- if (t === DocumentApp.ElementType.TABLE_ROW) {
- // do nothing: already handled TABLE_ROW
- } else if (t === DocumentApp.ElementType.TEXT) {
- var txt=element.getChild(i);
- pOut += txt.getText();
- textElements.push(txt);
- } else if (t === DocumentApp.ElementType.INLINE_IMAGE) {
- result.images = result.images || [];
- var contentType = element.getChild(i).getBlob().getContentType();
- var extension = "";
- if (/\/png$/.test(contentType)) {
- extension = ".png";
- } else if (/\/gif$/.test(contentType)) {
- extension = ".gif";
- } else if (/\/jpe?g$/.test(contentType)) {
- extension = ".jpg";
+ if(folder.getName() == 'target') {
+ var ui = DocumentApp.getUi();
+ var result = ui.alert(
+ 'Existing target folder found!',
+ 'Delete all contents of target folder?',
+ ui.ButtonSet.YES_NO);
+ if(result == ui.Button.YES) {
+ Logger.log("Trashing target folder...");
+ folder.setTrashed(true);
+ break;
} else {
- throw "Unsupported image type: "+contentType;
+ Logger.log("Do not delete target folder, stopping!");
+ return;
}
- var name = imagePrefix + imageCounter + extension;
- imageCounter++;
- textElements.push('');
- result.images.push( {
- "bytes": element.getChild(i).getBlob().getBytes(),
- "type": contentType,
- "name": name});
- } else if (t === DocumentApp.ElementType.PAGE_BREAK) {
- // ignore
- } else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) {
- textElements.push('* * *\n');
- } else if (t === DocumentApp.ElementType.FOOTNOTE) {
- textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')');
- } else {
- throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: "
- +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index;
}
}
-
- if (textElements.length==0) {
- // Isn't result empty now?
- return result;
+
+ // Create new target folder
+ Logger.log("Creating output folder...");
+ var found = parent.createFolder("target");
+
+ // Write all files to target folder
+ for(var file in convertedDoc.files) {
+ file = convertedDoc.files[file];
+ var blob = file.blob.copyBlob();
+ var name = file.name;
+ blob.setName(name);
+ found.createFile(blob);
}
- // evb: Add source pretty too. (And abbreviations: src and srcp.)
- // process source code block:
- if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) {
- result.sourcePretty = "start";
- } else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) {
- result.source = "start";
- } else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) {
- result.inClass = "start";
- result.className = RegExp.$1;
- } else if (/^\s*---\s*$/.test(pOut)) {
- result.source = "end";
- result.sourcePretty = "end";
- result.inClass = "end";
- } else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) {
- result.text = '';
- } else {
+ // Write mardown file to target folder
+ found.createFile(DocumentApp.getActiveDocument().getName() + ".md", convertedDoc.text, "text/plain");
+}
- prefix = findPrefix(inSrc, element, listCounters);
+function processSection(section) {
+ var state = {
+ 'inSource' : false, // Document read pointer is within a fenced code block
+ 'images' : [], // Image data found in document
+ 'imageCounter' : 0, // Image counter
+ 'prevDoc' : [], // Pointer to the previous element on aparsing tree level
+ 'nextDoc' : [], // Pointer to the next element on a parsing tree level
+ 'size' : [], // Number of elements on a parsing tree level
+ 'listCounters' : [], // List counter
+ };
- var pOut = "";
- for (var i=0; i 0) {
+ textElements = textElements.concat(teHead.textElements);
+ textElements.push('\n\n');
+ textElements.push('---');
+ textElements.push('\n\n');
+ }
}
- return result;
+ // Process body
+ var doc = DocumentApp.getActiveDocument().getBody();
+ doc = processSection(doc);
+ textElements = textElements.concat(doc.textElements);
+
+ // Process footer
+ var foot = DocumentApp.getActiveDocument().getFooter();
+ Logger.log("foot: " + foot);
+ if(foot != null) {
+ var teFoot = processSection(foot);
+ // Do not include empty footer sections
+ if(teFoot.textElements.length > 0) {
+ textElements.push('\n\n');
+ textElements.push('---');
+ textElements.push('\n\n');
+ textElements = textElements.concat(teFoot.textElements);
+ }
+ }
+
+ // Build final output string
+ var text = textElements.join('');
+
+ // Replace critical chars
+ text = text.replace('\u201d', '"').replace('\u201c', '"');
+
+ // Debug logging
+ Logger.log("Result: " + text);
+ Logger.log("Images: " + doc.state.imageCounter);
+
+ // Build attachment and file lists
+ var attachments = [];
+ var files = [];
+ for(var i in doc.state.images) {
+ var image = doc.state.images[i];
+ attachments.push( {
+ "fileName": image.name,
+ "mimeType": image.type,
+ "content": image.bytes
+ } );
+
+ files.push( {
+ "name" : image.name,
+ "blob" : image.blob
+ });
+ }
+
+ // Results
+ return {
+ 'files' : files,
+ 'attachments' : attachments,
+ 'text' : text,
+ };
}
-// Add correct prefix to list items.
-function findPrefix(inSrc, element, listCounters) {
- var prefix="";
- if (!inSrc) {
- if (element.getType()===DocumentApp.ElementType.PARAGRAPH) {
- var paragraphObj = element;
- switch (paragraphObj.getHeading()) {
- // Add a # for each heading level. No break, so we accumulate the right number.
- case DocumentApp.ParagraphHeading.HEADING6: prefix+="#";
- case DocumentApp.ParagraphHeading.HEADING5: prefix+="#";
- case DocumentApp.ParagraphHeading.HEADING4: prefix+="#";
- case DocumentApp.ParagraphHeading.HEADING3: prefix+="#";
- case DocumentApp.ParagraphHeading.HEADING2: prefix+="#";
- case DocumentApp.ParagraphHeading.HEADING1: prefix+="# ";
- default:
- }
- } else if (element.getType()===DocumentApp.ElementType.LIST_ITEM) {
- var listItem = element;
- var nesting = listItem.getNestingLevel()
- for (var i=0; i/g, '>');
+}
+
+// Add repeat function to strings
+String.prototype.repeat = function( num ) {
+ return new Array( num + 1 ).join( this );
+}
+
+function handleTable(element, state, depth) {
+ var textElements = [];
+
+ textElements.push("\n");
+
+ function buildTable(size) {
+ var stack = []
+ var maxSize = 0;
+
+ for(var ir=0; ir):
- if (gt === DocumentApp.GlyphType.BULLET
- || gt === DocumentApp.GlyphType.HOLLOW_BULLET
- || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
- prefix += "* ";
- } else {
- // Ordered list ():
- var key = listItem.getListId() + '.' + listItem.getNestingLevel();
- var counter = listCounters[key] || 0;
- counter++;
- listCounters[key] = counter;
- prefix += counter+". ";
+
+ // Add table data
+ for(var ic=0; ic text.length) {
+ text += " ".repeat(size - text.length)
+ }
+
+ stack.push("| " + text);
}
+
+ stack.push(" |\n");
}
+
+ stack.push("\n");
+ return {
+ maxSize : maxSize,
+ stack : stack,
+ };
}
- return prefix;
+
+ var table = buildTable(100);
+ table = buildTable(Math.max(10, table.maxSize + 1));
+ textElements = textElements.concat(table.stack);
+
+ textElements.push('\n');
+ return textElements;
}
-function processTextElement(inSrc, txt) {
- if (typeof(txt) === 'string') {
- return txt;
+function formatMd(text, indexLeft, formatLeft, indexRight, formatRight) {
+ var leftPad = '' + formatLeft;
+ if(indexLeft > 0) {
+ if(text[indexLeft - 1] != ' ')
+ leftPad = ' ' + formatLeft;
}
- var pOut = txt.getText();
- if (! txt.getTextAttributeIndices) {
- return pOut;
+ var rightPad = formatRight + '';
+ if(indexRight < text.length) {
+ if(text[indexRight] != ' ') {
+ rightPad = formatRight + ' ';
+ }
}
- var attrs=txt.getTextAttributeIndices();
- var lastOff=pOut.length;
+ var formatted = text.substring(0, indexLeft) + leftPad + text.substring(indexLeft, indexRight) + rightPad + text.substring(indexRight);
+ return formatted;
+}
+
- for (var i=attrs.length-1; i>=0; i--) {
- var off=attrs[i];
- var url=txt.getLinkUrl(off);
- var font=txt.getFontFamily(off);
- if (url) { // start of link
- if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) {
- // detect links that are in multiple pieces because of errors on formatting:
- i-=1;
- off=attrs[i];
- url=txt.getLinkUrl(off);
+function handleText(doc, state) {
+ var formatted = doc.getText();
+ var lastIndex = formatted.length;
+ var attrs = doc.getTextAttributeIndices();
+
+ // Iterate backwards through all attributes
+ for(var i=attrs.length-1; i >= 0; i--) {
+ // Current position in text
+ var index = attrs[i];
+
+ // Handle links
+ if(doc.getLinkUrl(index)) {
+ var url = doc.getLinkUrl(index);
+ if (i > 0 && attrs[i-1] == index - 1 && doc.getLinkUrl(attrs[i-1]) === url) {
+ i -= 1;
+ index = attrs[i];
+ url = txt.getLinkUrl(off);
}
- pOut=pOut.substring(0, off)+'['+pOut.substring(off, lastOff)+']('+url+')'+pOut.substring(lastOff);
- } else if (font) {
- if (!inSrc && font===font.COURIER_NEW) {
- while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) {
- // detect fonts that are in multiple pieces because of errors on formatting:
- i-=1;
- off=attrs[i];
+ formatted = formatted.substring(0, index) + '[' + formatted.substring(index, lastIndex) + '](' + url + ')' + formatted.substring(lastIndex);
+
+ // Do not handle additional formattings for links
+ continue;
+ }
+
+ // Handle font family
+ if(doc.getFontFamily(index)) {
+ var font = doc.getFontFamily(index);
+ var sourceFont = font.COURIER_NEW;
+
+ if (!state.inSource && font === sourceFont) {
+ // Scan left until text without source font is found
+ while (i > 0 && doc.getFontFamily(attrs[i-1]) && doc.getFontFamily(attrs[i-1]) === sourceFont) {
+ i -= 1;
+ off = attrs[i];
}
- pOut=pOut.substring(0, off)+'`'+pOut.substring(off, lastOff)+'`'+pOut.substring(lastOff);
+
+ formatted = formatMd(formatted, index, '`', lastIndex, '`');
+
+ // Do not handle additional formattings for code
+ continue;
}
}
- if (txt.isBold(off)) {
- var d1 = d2 = "**";
- if (txt.isItalic(off)) {
+
+ // Handle bold and bold italic
+ if(doc.isBold(index)) {
+ var dleft, right;
+ dleft = dright = "**";
+ if (doc.isItalic(index))
+ {
// edbacher: changed this to handle bold italic properly.
- d1 = "**_"; d2 = "_**";
+ dleft = "**_";
+ dright = "_**";
}
- pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff);
- } else if (txt.isItalic(off)) {
- pOut=pOut.substring(0, off)+'*'+pOut.substring(off, lastOff)+'*'+pOut.substring(lastOff);
+
+ formatted = formatMd(formatted, index, dleft, lastIndex, dright);
+ }
+ // Handle italic
+ else if(doc.isItalic(index)) {
+ formatted = formatMd(formatted, index, '*', lastIndex, '*');
}
- lastOff=off;
+
+ // Keep track of last position in text
+ lastIndex = index;
}
- return pOut;
+
+ var textElements = [formatted];
+ return textElements;
+}
+
+
+
+function handleListItem(item, state, depth) {
+ var textElements = [];
+
+ // Prefix
+ var prefix = '';
+
+ // Add nesting level
+ for (var i=0; i= 0)?doc.getChild(i-1) : child;
+ state.prevDoc[depth] = prevDoc;
+
+ textElements = textElements.concat(processElement(child, state, depth+1));
+ }
+ return textElements;
+}
+
+
+function processElement(element, state, depth) {
+ // Result
+ var textElements = [];
+
+ switch(element.getType()) {
+ case DocumentApp.ElementType.DOCUMENT:
+ Logger.log("this is a document");
+ break;
+
+ case DocumentApp.ElementType.BODY_SECTION:
+ textElements = textElements.concat(processChilds(element, state, depth));
+ break;
+
+ case DocumentApp.ElementType.PARAGRAPH:
+ // Determine header prefix
+ var prefix = '';
+ switch (element.getHeading()) {
+ // Add a # for each heading level. No break, so we accumulate the right number.
+ case DocumentApp.ParagraphHeading.HEADING6: prefix += '#';
+ case DocumentApp.ParagraphHeading.HEADING5: prefix += '#';
+ case DocumentApp.ParagraphHeading.HEADING4: prefix += '#';
+ case DocumentApp.ParagraphHeading.HEADING3: prefix += '#';
+ case DocumentApp.ParagraphHeading.HEADING2: prefix += '#';
+ case DocumentApp.ParagraphHeading.HEADING1: prefix += '#';
+ }
+
+ // Add space
+ if(prefix.length > 0)
+ prefix += ' ';
+
+ // Push prefix
+ textElements.push(prefix);
+
+ // Process childs
+ textElements = textElements.concat(processChilds(element, state, depth));
+
+ // Add paragraph break only if its not the last element on this layer
+ if(state.nextDoc[depth-1] == element)
+ break;
+
+ if(state.inSource)
+ textElements.push('\n');
+ else
+ textElements.push('\n\n');
+
+ break;
+
+ case DocumentApp.ElementType.LIST_ITEM:
+ textElements = textElements.concat(handleListItem(element, state, depth));
+ textElements.push('\n');
+
+ if(state.nextDoc[depth-1].getType() != element.getType()) {
+ textElements.push('\n');
+ }
+
+ break;
+
+ case DocumentApp.ElementType.HEADER_SECTION:
+ textElements = textElements.concat(processChilds(element, state, depth));
+ break;
+
+ case DocumentApp.ElementType.FOOTER_SECTION:
+ textElements = textElements.concat(processChilds(element, state, depth));
+ break;
+
+ case DocumentApp.ElementType.FOOTNOTE:
+ textElements.push(' (NOTE: ');
+ textElements = textElements.concat(processChilds(element.getFootnoteContents(), state, depth));
+ textElements.push(')');
+ break;
+
+ case DocumentApp.ElementType.HORIZONTAL_RULE:
+ textElements.push('---\n');
+ break;
+
+ case DocumentApp.ElementType.INLINE_DRAWING:
+ // Cannot handle this type - there is no export function for rasterized or SVG images...
+ break;
+
+ case DocumentApp.ElementType.TABLE:
+ textElements = textElements.concat(handleTable(element, state, depth));
+ break;
+
+ case DocumentApp.ElementType.TABLE_OF_CONTENTS:
+ textElements.push('[[TOC]]');
+ break;
+
+ case DocumentApp.ElementType.TEXT:
+ var text = handleText(element, state);
+
+ // Check for source code delimiter
+ if(/^```.+$/.test(text.join(''))) {
+ state.inSource = true;
+ }
+
+ if(text.join('') === '```') {
+ state.inSource = false;
+ }
+
+ textElements = textElements.concat(text);
+ break;
+
+ case DocumentApp.ElementType.INLINE_IMAGE:
+ textElements = textElements.concat(handleImage(element, state));
+ break;
+
+ case DocumentApp.ElementType.PAGE_BREAK:
+ // Ignore page breaks
+ break;
+
+ case DocumentApp.ElementType.EQUATION:
+ var latexEquation = handleEquationFunction(element, state);
+
+ // If equation is the only one in a paragraph - center it
+ var wrap = '$'
+ if(state.size[depth-1] == 1) {
+ wrap = '$$'
+ }
+
+ latexEquation = wrap + latexEquation.trim() + wrap;
+ textElements.push(latexEquation);
+ break;
+ default:
+ throw("Unknown element type: " + element.getType());
+ }
+
+ return textElements;
}
diff --git a/markdown.png b/markdown.png
new file mode 100644
index 0000000..1f7ed22
Binary files /dev/null and b/markdown.png differ