Option Explicit
Dim strTableborder, strKeywords, strHTMLTitle, strThisfile, strShortfile, strHtmlfile, _
strThispath, strtempfile As String
Dim Headno As Integer
Dim FSys As Variant
Sub htmltag()
'
' Htmltag Macro: main procedure
' Macro created 1/11/02 by Jos Kingston. Last update 18/10/04
' www.joskingston.org email jos@joskingston.org
'
' Jos Kingston asserts her moral rights of authorship of htmltag
' as laid down in the 1988 Copyright, Design and Patents Act.
' These rights include the right to be identified as the author of htmltag
' and the right not to have this work "subjected to derogatory treatment"
' - for example "addition, deletion or alteration prejudicial to the
' honour or reputation of the author."
'
' Table conversion only works on regular tables - merged or split cells will fail.
' No picture-handling capabilities - but links to pics will work.
' This macro has NOT been developed as a general purpose html converter.
' It is for customised use on Word files using a limited set of styles.
' There is an accompanying Word file htmltaguser.doc
' Which can be used for demonstration and further info about Htmltag.
' This macro copies the document to a temporary working file, then runs
' search and replace routines to insert html tags corresponding to
' the styles applied.
' When the routine is complete, the file is saved as unformatted text
' with an html extension. It is displayed in a browser window.
' The original document is then opened in Word for editing, following
' which the macro can be run again as required after final corrections.
' --------------------------------------------------------------------------
' Prepare for conversion - initialise variables, save to temp working file,
' Strip section and page breaks
Getready
' Add a paragraph of normal text after headings to avoid tagging problems
Consecheads
' Replace special html characters with html code
Tagspecials
' Convert hyperlinks. Must be done before TOC generated
' At present won't convert Word internal document anchors - sortable
Taghyperlinks
' Convert bold and italic to tags
Tagbolditalic
' Tag Code, Red and Blue character styles
Tagcharstyles
' Heading 3s are tagged with unique anchor names
' and linked to a contents list at the top of the file.
' NEED TO EDIT THIS ROUTINE IF YOU WANT TO USE A DIFFERENT LEVEL FOR HYPERLINKS.
Tagcontents
' Internal links - convert Doclink character style to internal links.
' This only works to link to Heading 3, and if the Doclink text itself
' is exactly the same as the heading title.
' This is pretty tacky and bookmarks will be sorted sometime.
Tagdoclinks
' Convert paras and heading styles to tags
' This must be done after character formatting and anchor tagging
' Otherwise nesting won't be correct
TagParasandHeadings
' Tag bullets, numbered and indented lists.
Taglists
'Once all tagging is done, whole document is reformatted in normal style.
'This prevents unwanted asterisks creeping into lists
Setnormal
' Convert tables - only works properly where no split or merged cells
Tagtables
' Add update and head info + end tags
Tagheadinfo
' Save this file as a temporary text file
Tempsave
' Clean out unwanted bits and pieces which creep in during replace routines etc.
Cleantext
' Text file saved with html extension, process completed, user notified
Finalhtml
End Sub
Sub Getready()
' Called from htmltag
'Initialise filename and path variables
strThispath = ActiveDocument.Path
strThisfile = ActiveDocument.FullName
strShortfile = Left(ActiveDocument.Name, Len(ActiveDocument.Name) - 4)
strHtmlfile = strShortfile & ".html"
' Save document
ActiveDocument.Save
' Save doc to temporary file for working with
ChangeFileOpenDirectory strThispath
ActiveDocument.SaveAs FileName:="Tempdoc.doc", FileFormat:=wdFormatDocument, _
LockComments:=False, Password:="", AddToRecentFiles:=True, WritePassword _
:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
False
' Normal-style line added at end of file for process reasons
Selection.EndKey Unit:=wdStory
Selection.TypeParagraph
Selection.Style = ActiveDocument.Styles("Normal")
Selection.HomeKey Unit:=wdStory
' Strip all section breaks out of the file
' Full settings specified as this is first search in macro
' - don't need repeating every time.
Selection.HomeKey Unit:=wdStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^p^b"
.Replacement.Text = "^p"
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
' Strip all page breaks out of the file
Selection.HomeKey Unit:=wdStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^p^m"
.Replacement.Text = "^p"
End With
Selection.Find.Execute Replace:=wdReplaceAll
' If the document has a TOC, delete it
If ActiveDocument.TablesOfContents.Count > 0 Then
ActiveDocument.TablesOfContents(1).Delete
End If
End Sub
Sub Consecheads()
' this routine is to prevent tagging problems
' where two consecutive lines are set to a style
' a line of normal text between headings is required to avoid tagging problems
' the gobbledegook text is there to provide something which can be replaced with normal
Dim Headno As Integer
Dim Itemfound As Boolean
Headno = 1
Itemfound = True
Do While Headno <= 5
Selection.HomeKey Unit:=wdStory
With Selection.Find
.Text = ""
.Replacement.Text = ""
End With
Do While Itemfound
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
Selection.Find.Style = ActiveDocument.Styles("Heading " & Headno)
Selection.Find.Execute
If Selection.Find.Found = False Then Itemfound = False
If Itemfound = True Then
Selection.EndOf
Selection.InsertParagraphAfter
Selection.TypeText Text:="fiddledefooderops"
End If
Loop
Itemfound = True
Headno = Headno + 1
Loop
Selection.HomeKey Unit:=wdStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.Style = ActiveDocument.Styles("Normal")
With Selection.Find
.Text = "fiddledefooderops^p"
.Replacement.Text = "^p"
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = ""
.Replacement.Text = ""
End With
End Sub
Sub Tagspecials()
' Called from htmltag
' Must do & first, or other special characters get in a twist.
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "&"
.Replacement.Text = "&"
End With
Selection.Find.Execute Replace:=wdReplaceAll
With Selection.Find
.Text = "<"
.Replacement.Text = "<"
End With
Selection.Find.Execute Replace:=wdReplaceAll
With Selection.Find
.Text = ">"
.Replacement.Text = ">"
End With
Selection.Find.Execute Replace:=wdReplaceAll
' Replace line breaks with br tag
With Selection.Find
.Text = "^l"
.Replacement.Text = " "
Selection.TypeParagraph
.TablesOfContents.Add Range:=Selection.Range, UseHeadingStyles:=True, _
UpperHeadingLevel:=3, LowerHeadingLevel:=3, IncludePageNumbers:=False
Selection.TypeText Text:=" and
"
End With
Selection.Find.Execute Replace:=wdReplaceAll
End Sub
Sub Taghyperlinks()
' Called from htmltag
Dim rngTemp As Range
Dim fieldLoop As Field
Dim fieldcontent As String
Dim Fieldno As Integer
Fieldno = 1
Selection.HomeKey Unit:=wdStory
For Each fieldLoop In ActiveDocument.Fields
If fieldLoop.Type = wdFieldHyperlink Then
If ActiveDocument.Fields.Count >= Fieldno Then
fieldcontent = ""
Set rngTemp = ActiveDocument.Fields(Fieldno).Result
If ActiveDocument.Fields.Count > Fieldno Then
Fieldno = Fieldno + 1
End If
' If not condition prevents incorrect tagging of Word bookmark hyperlinks
If Not InStr(LCase(fieldLoop.Code.Text), "\l") <> 0 Then
rngTemp.Text = fieldcontent + fieldLoop.Result.Text + ""
ActiveDocument.Fields(Fieldno).Update
End If
End If
End If
Next fieldLoop
' Get rid of the word HYPERLINK which is included in Word field content
' Two search routines to handle space or no space before
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^&"
End With
Selection.Find.Execute Replace:=wdReplaceAll
' Blue character style
Selection.Find.ClearFormatting
Selection.Find.Style = ActiveDocument.Styles("Blue")
Selection.Find.Replacement.ClearFormatting
Selection.Find.Replacement.Style = ActiveDocument.Styles( _
"Default Paragraph Font")
With Selection.Find
.Text = "^p"
.Replacement.Text = "^p"
End With
Selection.Find.Execute Replace:=wdReplaceAll
With Selection.Find
.Text = ""
.Replacement.Text = "^&"
End With
Selection.Find.Execute Replace:=wdReplaceAll
End Sub
Sub Tagcontents()
' Called from htmltag
' This is tidied up into final html code with two search and replace routines.
' I did it this way because I just couldn't get it to create active links if
' the whole thing was in the InsertAfter line.
' Note the complexities when including inverted commas in tags.
' To differentiate from VBA reserved use of ", must use ""
Dim Itemno As Integer
Itemno = 1
Selection.HomeKey Unit:=wdStory
With ActiveDocument.Content.Find
.ClearFormatting
.Style = wdStyleHeading3
Do While .Execute(FindText:="", Forward:=True, _
Format:=True) = True
With .Parent
.StartOf Unit:=wdParagraph, Extend:=wdMove
.InsertAfter "
" End With Selection.Find.Execute Replace:=wdReplaceAll ' Headings won't tag correctly if they're immediately followed by a table ' Code here fixes this Do While Tableno <= ActiveDocument.Tables.Count ActiveDocument.Tables(Tableno).Select Selection.splittable ' Selection.MoveUp Unit:=wdLine, Count:=1 ' Selection.InsertParagraphAfter Tableno = Tableno + 1 Loop 'Heading tagging done as two routines so Word para end doesn't precede closing tag 'Then corrects places where tagging has put
into heading styles
Do While Headno <= 5
Selection.HomeKey Unit:=wdStory
Selection.Find.ClearFormatting
Selection.Find.Style = ActiveDocument.Styles("Heading " & Headno)
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = ""
.Replacement.Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll With Selection.Find .Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll Headno = Headno + 1 Loop End Sub Sub Taglists() ' Called from htmltag ' Style s&r routine puts tags at beginning and end of list. ' Each list item then has to be handled separately. ' Style then changed to normal to avoid unwanted asterisks in html ' The above routines put a redundant line at the end of each list. ' This is stripped out. ' Note that line spacing for indents is included as style in head info ' Plain indent - must be done before numlist 2 gets blockquote tag added Selection.Find.ClearFormatting Selection.Find.Style = ActiveDocument.Styles("indent") Selection.Find.Replacement.ClearFormatting With Selection.Find .Text = "" .Replacement.Text = "^&" End With Selection.Find.Execute Replace:=wdReplaceAll Selection.Find.ClearFormatting With Selection.Find .Text = "^p" .Replacement.Text = "^p" End With Selection.Find.Execute Replace:=wdReplaceAll ' Tag bullets Selection.HomeKey Unit:=wdStory Selection.Find.ClearFormatting Selection.Find.Style = ActiveDocument.Styles("bullet") Selection.Find.Replacement.ClearFormatting With Selection.Find .Text = "" .Replacement.Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll With Selection.Find .Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll Selection.Find.ClearFormatting With Selection.Find .Text = "" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll With Selection.Find .Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll Selection.Find.ClearFormatting With Selection.Find .Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll With Selection.Find .Text = "^p" .Replacement.Text = "^p
- ^&
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll With Selection.Find .Text = "
" .Replacement.Text = "" End With Selection.Find.Execute Replace:=wdReplaceAll Selection.Find.ClearFormatting With Selection.Find .Text = "^p" ' Add table opening tags Selection.GoTo What:=wdGoToTable, Which:=wdGoToFirst Selection.TypeText Text:="
"
' Before table is converted to text,
' and tags which got into it from previous formatting routines are stripped out. ' Then, paragraph endings within cells are handled. ActiveDocument.Tables(1).Select Selection.Find.ClearFormatting Selection.Find.Replacement.ClearFormatting With Selection.Find .Text = "" .Replacement.Text = "" .Forward = True .Wrap = wdFindStop End With Selection.Find.Execute Replace:=wdReplaceAll ActiveDocument.Tables(1).Select Selection.Find.ClearFormatting Selection.Find.Replacement.ClearFormatting With Selection.Find .Text = " " .Replacement.Text = "" .Wrap = wdFindStop End With Selection.Find.Execute Replace:=wdReplaceAll ActiveDocument.Tables(1).Select Selection.Find.ClearFormatting Selection.Find.Replacement.ClearFormatting With Selection.Find .Text = "^p" .Replacement.Text = "^l" .Wrap = wdFindStop End With Selection.Find.Execute Replace:=wdReplaceAll ' Put a back in front of |