As some of you may know I write articles at codeproject, which I enjoy doing. One of the things I do not enjoy doing however, is created blasted table of contents, it is a ball ache. One day I thought sod it time to write some code to parse the Html and create a Table Of Contents on the fly for me. My work colleague Ryan Worsley said hey that would be dead easy in jQuery (and it is I will show you that too). But to use the jQuery route you have to be able to reference a script which I will show you in a second. Problem is when submitting to online portals like codeproject, they may not allow you to add references to your own JavaScript (and rightly so), due to possible security violations.
So what can a fella do?
Well just resort to reading the source HTML file and rewrite it with changes is what I came to.
Typically I have a structure something like this (codeproject kinda likes this format)
H1
H2
H2
H3
H3
H1
H2
Note that these elements are siblings not children, so we need to convert from flat structure to a new Table Of Contents tree / nested list structure. Totally feasible. So how can we do this.
TOC Element
Both solutions Winforms/JQuery plugin will expect there to be an empty div with the id “TOC”. Here is what that looks like
<div id="TOC"></div>
Winform Approach
The approach I needed to take for codeproject was to read in the file in memory and write out a modified one, as they do not allow me to add arbitrary links to JavaScript files.
Here are the steps taken
- Ask user for file
- Read in line by line, if its a H1,H2,H3,H4
- Keep a record of line number
- Create new object to hold this heading, which provides
- String representation of ALL its children
- Link text
- Fully changed text
- If we see a new H{x} tag, look for its previous H{x-1}, and append to parents children
- Write out line by line, if its changed use updated wrapped H{x} line representation
- Look for “TOC” div, if found replace it, with string representation of entire new tree structure of H1-H4 tags
- Write the file
Here is all the source files for this appoach:
Helper class
public enum HeadingType { h1, h2, h3, h4 }
public class HeadingLineStructure
{
public HeadingLineStructure(string text, string linkText, HeadingType headingType, int lineNumber)
{
this.Text = text;
this.LinkText = linkText;
this.HeadingType = headingType;
this.LineNumber = lineNumber;
this.Children = new List<HeadingLineStructure>();
}
public string Text { get; set; }
public string LinkText { get; set; }
public HeadingType HeadingType { get; set; }
public int LineNumber { get; set; }
public bool IsTopLevel
{
get { return HeadingType == HeadingType.h1; }
}
public List<HeadingLineStructure> Children { get; set; }
public string FinalText
{
get
{
return String.Format(@"<{0}><a name=""{1}"" id=""{1}"">{2}</a></{0}>", HeadingType, LinkText, Text);
}
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
if (Children.Any())
{
sb.AppendLine(string.Format(@"<li><a href=""#{0}"">{1}</a>", LinkText, Text));
sb.AppendLine("<ul>");
foreach (HeadingLineStructure child in Children)
{
sb.AppendLine(child.ToString());
}
sb.AppendLine("</ul>");
}
else
{
sb.AppendLine(string.Format(@"<li><a href=""#{0}"">{1}</a></li>", LinkText, Text));
}
return sb.ToString();
}
}
File modifying code
public partial class Form1 : Form
{
private List<HeadingLineStructure> h1s = new List<HeadingLineStructure>();
private List<HeadingLineStructure> h2s = new List<HeadingLineStructure>();
private List<HeadingLineStructure> h3s = new List<HeadingLineStructure>();
private List<HeadingLineStructure> h4s = new List<HeadingLineStructure>();
private Dictionary<int, HeadingLineStructure> lineReplacements = new Dictionary<int, HeadingLineStructure>();
private Dictionary<int, string> oldLines = new Dictionary<int, string>();
private List<string> newLines = new List<string>();
public Form1()
{
InitializeComponent();
}
private void btnFile_Click(object sender, EventArgs e)
{
OpenFileDialog dialog = new OpenFileDialog();
dialog.Filter = "All files (*.*)|*.*";
dialog.InitialDirectory = @"C:\";
dialog.Title = "Please select an image file to generator TOC for.";
if (dialog.ShowDialog() == DialogResult.OK)
{
FileInfo file = new FileInfo(dialog.FileName);
if (file.Extension != ".htm" && file.Extension != ".html")
{
MessageBox.Show("Source document MUST be Html or Htm file", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
}
lblFileName.Text = file.Name;
Parse(file);
}
}
private void Parse(FileInfo file)
{
int lineNumber = 1;
foreach (var line in File.ReadAllLines(file.FullName))
{
CreateHeadingItem(h1s, line, new string[] { "<h1>", "</h1>", "<H1>", "</H1>" }, HeadingType.h1, lineNumber, null);
CreateHeadingItem(h2s, line, new string[] { "<h2>", "</h2>", "<H2>", "</H2>" }, HeadingType.h2, lineNumber, h1s.Any() ? h1s.Last() : null);
CreateHeadingItem(h3s, line, new string[] { "<h3>", "</h3>", "<H3>", "</H3>" }, HeadingType.h3, lineNumber, h2s.Any() ? h2s.Last() : null);
CreateHeadingItem(h4s, line, new string[] { "<h4>", "</h4>", "<H4>", "</H4>" }, HeadingType.h4, lineNumber, h3s.Any() ? h3s.Last() : null);
oldLines.Add(lineNumber, line);
lineNumber++;
}
if(!oldLines.Values.Any(x => x.Contains(@"<div id=""TOC""></div>")))
{
MessageBox.Show(@"<div id=""TOC""></div> element not found, check original document", "Error",MessageBoxButtons.OK, MessageBoxIcon.Error);
}
else
{
foreach (KeyValuePair<int, string> oldLine in oldLines)
{
if (lineReplacements.Keys.Contains(oldLine.Key))
{
newLines.Add(lineReplacements[oldLine.Key].FinalText);
}
else if (oldLine.Value.Contains(@"<div id=""TOC""></div>"))
{
newLines.Add(BuildTOC());
}
else
{
newLines.Add(oldLine.Value);
}
}
string newFile = Path.Combine(file.Directory.FullName,
string.Format("{0}_WithTOC{1}", file.Name.Substring(0, file.Name.IndexOf(".")), file.Extension));
File.WriteAllLines(newFile, newLines);
}
MessageBox.Show("DONE");
}
private string BuildTOC()
{
StringBuilder sb = new StringBuilder();
sb.AppendLine("<ul>");
foreach (KeyValuePair<int, HeadingLineStructure> headings in lineReplacements.Where(x => x.Value.IsTopLevel))
{
sb.AppendLine(headings.Value.ToString());
}
sb.AppendLine("</ul>");
return sb.ToString();
}
private void CreateHeadingItem(List<HeadingLineStructure> sourceList, string line,
string[] tags, HeadingType headingType, int lineNumber, HeadingLineStructure parent)
{
string trimmedLine = line.Trim();
if ((trimmedLine.StartsWith(tags[0]) && trimmedLine.EndsWith(tags[1])) ||
(trimmedLine.StartsWith(tags[2]) && trimmedLine.EndsWith(tags[3])))
{
string strippedText = GetReplacementLinkText(line, tags);
HeadingLineStructure headingStructure = new HeadingLineStructure(strippedText, strippedText.Replace(" ", "-"), headingType, lineNumber);
sourceList.Add(headingStructure);
if (parent != null)
{
parent.Children.Add(headingStructure);
}
lineReplacements.Add(lineNumber, headingStructure);
}
}
private string GetReplacementLinkText(string line, string[] toStrip)
{
string stripped = line.Trim();
foreach (var replacement in toStrip)
{
stripped = stripped.Replace(replacement, "");
}
return stripped;
}
}
Here is a link to a small demo project https://dl.dropbox.com/u/2600965/Blogposts/2012/11/HtmlTOCBuilder.zip
jQuery plugin code
And here is a jsFiddle that Ryan created to demo his jQuery plugin version
(function($) {
var project = function(structure) {
var current = structure.shift();
var nodes = [];
var currentSet = this.filter(current);
for (var i = 0; i < currentSet.length; ++i) {
var context = currentSet[i];
var children;
if (structure.length > 0)
children = project
.apply($(context)
.nextUntil(current), [structure.slice(0)]);
nodes.push({ node: context, children: children || [] });
}
return nodes;
}
$.fn.projectTree = function(structure, handler) {
handler(project.apply(this, [structure]));
};
})(jQuery);
$(document).ready(function() {
$('body').children().projectTree(['h1', 'h2', 'h3'], function(trees) {
var nodeCount = 0;
for (var i = 0; i < trees.length; ++i) {
var root = trees[i];
var parent = $('<ol></ol>').appendTo('#toc');
drawTree(root, parent);
}
function drawTree(root, parent) {
var nodeName = $(root.node).text();
var linkName = ++nodeCount + '-' + nodeName.replace(' ', '-');
var link = $('<a></a>').attr('href', '#' + linkName).text(nodeName);
var list = $('<li></li>');
var wrapper = $(root.node).wrap(function () {
return $('<a></a>').attr('name', linkName);
});
$(list).append(link).appendTo(parent);
$(link).hover(function () {
$(root.node).css('color', 'red');
}, function () {
$(root.node).css('color', '#907F7F');
});
if (root.children.length > 0) {
var childList = $('<ol></ol>').appendTo(parent);
$(root.children).each(function() {
drawTree(this, childList);
});
}
}
});
});
- The hierarchy of the nodes (most to least precedence) as an array of jQuery selectors
- a function which will receive the forest of trees as an array.
- Node – this node
- Children – an array of these same objects that have been found to be children according to the precedence you set above
Ryans jsFiddle is available here : http://jsfiddle.net/8ekAu/1/
Nice one Sasha! It could be handy to all of us who write on CP. Will you share the source?
Cheers
WOW that was quick, yeah sure code is there now. Enjoy
Fastest mouse in the west!
Thanks
Have you considered an HtmlAgilityPack solution?
Don’t know about that solution, have you got a link?
Ryan’s code needs some explaination
And as someone else mentioned, HtmlAgilityPack
I will add some explanation of Ryans code, and could you provide a link to the HtmlAgilityPack solution?
on CodePlex @
http://htmlagilitypack.codeplex.com/
[...] Html Table Of Contents Generator (Sacha Barber) [...]