Converting
document files is never easier, I recently had requirement to convert some RTF text into
Simple HTML using SQL Server.
SQL Server RTF
to HTML Conversion Code Limitations:
- Color tags are not handled
- Only few List item tags handled
- Some of the tags are marked as not necessary and replaced with nothing
Script is for RTF to HTML converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USE SQLCache | |
GO | |
------------------------------------------------------------------------------------------------------ | |
---- Step 1 ---------- Create Table RTFFiles and import Text into Column RTFFileText ------------------ | |
CREATE TABLE RTFFiles | |
( | |
FileID INT IDENTITY(1, 1), | |
RTFFileText NVARCHAR(MAX), | |
HTMLFileText NVARCHAR(MAX), | |
) | |
GO | |
------------------------------------------------------------------------------------------------------- | |
---- Step 2 ---------- Create Conversion Stored Procedure --------------------------------------------- | |
/* | |
Script By: Amna Asif for http://sqlcache.blogspot.com/ | |
Purpose: To Convert RTF Text into Simple HTML through SQL Server | |
*/ | |
SET ANSI_NULLS ON | |
GO | |
SET QUOTED_IDENTIFIER ON | |
GO | |
CREATE PROCEDURE [dbo].[ConvertRTFtoHTML] ( @RTFFileID INT ) | |
AS | |
BEGIN | |
DECLARE @RTFFileText NVARCHAR(MAX) | |
DECLARE @fontSize VARCHAR(20) | |
DECLARE @fontType VARCHAR(20) | |
DECLARE @startfont VARCHAR(MAX) | |
--SELECT FileText from RTFFiles where fileid=@RTFFileID | |
---- Convert Bold Tags | |
SELECT @RTFFileText = REPLACE(REPLACE(REPLACE(RTFFileText, '\ \', '\'),'\b0', '</b>'), '\b', '<b>') | |
FROM RTFFiles | |
WHERE FileID = @RTFFileID | |
--- Addition for Specific Client | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\<b> - : </b>', ' - : ') | |
--- Convert Italic | |
SELECT @RTFFileText = REPLACE(REPLACE(@RTFFileText, '\i0', '</i>'), '\i', '<i>') | |
--- Convert Underline | |
SELECT @RTFFileText = REPLACE(REPLACE(REPLACE(@RTFFileText, '\ulnone','</u>'), '\ul0', '</u>'),'\ul', '<u>') | |
--- Convert Alignments | |
SELECT @RTFFileText = REPLACE(REPLACE(REPLACE(@RTFFileText, '\pard\\qc', '<div align=center>'), '\pard\\qr', '<div align=right>'), '\pard','<div align=left>') | |
--- Remove \pard from the tags so it doesn't get confused with \par. | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\pard', '') | |
--- Convert line breaks | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\par', '<br>') | |
--- Convert Tabs | |
SELECT @RTFFileText = REPLACE(REPLACE(REPLACE(@RTFFileText, '\par', ' '), '\(tab)', ''),'\tab', ' ') | |
--Convert \\ to \ | |
SELECT @RTFFileText = REPLACE(REPLACE(@RTFFileText, '\\', '\'), '\ \', '\') | |
--- Convert \{ to to { | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\{', '{') | |
--Convert \) to } | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\} ', '}') | |
----Replace | |
--select @RTFFileText= replace(@RTFFileText,' ',' ') | |
--- Remove color tags because they are not handled | |
SELECT @RTFFileText = CASE WHEN @RTFFileText LIKE '%\colortbl%' | |
THEN REPLACE(@RTFFileText, | |
SUBSTRING(@RTFFileText, CHARINDEX('\colortbl', @RTFFileText), | |
CHARINDEX('}', @RTFFileText, CHARINDEX('\colortbl', @RTFFileText))- CHARINDEX('\colortbl', @RTFFileText)), | |
'') | |
ELSE @RTFFileText | |
END | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\nowidctlpar\tx288', '') | |
--- Set Font Size | |
SELECT @fontSize = CASE WHEN @RTFFileText LIKE '%{\f9\fs%' | |
THEN SUBSTRING(@RTFFileText, | |
CHARINDEX('{\f9\fs',@RTFFileText) + 7, 2) | |
ELSE 12 | |
END | |
--- Set Font Type | |
SELECT @fonttype = CASE WHEN @RTFFileText LIKE '%\fprq%' | |
THEN SUBSTRING(@RTFFileText, | |
CHARINDEX('\fprq',@RTFFileText) + 7, | |
CHARINDEX(';', @RTFFileText,CHARINDEX('\fprq', @RTFFileText)+ 7)- ( CHARINDEX('\fprq', @RTFFileText)+ 7 )) | |
ELSE 'Arial' | |
END | |
--- Convert Font style | |
SELECT @RTFFileText = CASE WHEN @RTFFileText LIKE '%}}%' | |
THEN REPLACE(@RTFFileText, SUBSTRING(@RTFFileText, 1, CHARINDEX('}}', @RTFFileText)+ 1) | |
, ' <span style="font-family:'+ ISNULL(@fonttype, 'Arial') + ';font-size:' + ISNULL(@fontSize, '12')+ '"> ') | |
ELSE @RTFFileText | |
END | |
--- Close Span | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '}}', '}</span>') | |
--- Remove Extra {} | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '{}', '') | |
--------------------------------------- Start SubString Looping | |
--------------------------------------- Convert file text in sub Strings to deal with tags | |
DECLARE @totalSize INT, | |
@StartIndex INT, | |
@EndIndex INT, | |
@newString VARCHAR(MAX)= '' | |
SELECT @StartIndex = 1, | |
@EndIndex = 1, | |
@startfont = '', | |
@fontSize = 12 | |
WHILE ( @StartIndex > 0 | |
AND @EndIndex > 0 | |
) | |
BEGIN | |
SELECT @StartIndex = ( SELECT CHARINDEX('{', @RTFFileText, @StartIndex) ) | |
SELECT @EndIndex = ( SELECT CHARINDEX('}', @RTFFileText, @EndIndex) ) | |
SELECT @startfont = REPLACE(ISNULL(SUBSTRING(@RTFFileText, @StartIndex, CASE WHEN @EndIndex > @StartIndex THEN @EndIndex - @StartIndex + 1 | |
ELSE 0 | |
END), | |
''), '{}', '') | |
SELECT @newstring = REPLACE(REPLACE(REPLACE(REPLACE(@startfont, '{\f9\fs18', '<span style="font-size:' + @fontSize + '"> '), | |
'}', '}</span>'), | |
'\fs18', | |
'<span style="font-size:' | |
+ @fontSize + '"> '), '}', | |
'}</span>') | |
SELECT @newstring = CASE WHEN @newstring LIKE '%\li0 \li288%' | |
THEN REPLACE(REPLACE(@newstring, '\li0 \li288', '<dd>'), | |
'}', '</dd>') | |
ELSE @newstring | |
END | |
SELECT @newstring = CASE WHEN @newstring LIKE '%\li0 %' | |
THEN REPLACE(REPLACE(@newstring, '\li0', '<dt>'), | |
'}', '</dt>') | |
ELSE @newstring | |
END | |
SELECT @newstring = CASE WHEN @newstring LIKE '%<b>%' | |
AND @newstring NOT LIKE '%</b>%' | |
THEN @newstring + '</b>' | |
ELSE @newstring | |
END | |
SELECT @StartIndex = CASE WHEN @StartIndex = 0 THEN -999 | |
ELSE @EndIndex + 1 | |
END | |
SELECT @EndIndex = CASE WHEN @EndIndex = 0 THEN -999 | |
ELSE @EndIndex + 1 | |
END | |
SELECT @RTFFileText = REPLACE(@RTFFileText, @startfont, | |
REPLACE(REPLACE(REPLACE(@newstring, '{', ''), '}', ''),'\rtf1\ansi', '')) | |
END | |
--------------------------------------- End SubString Looping | |
--------------------------------------- Start ListItem Looping | |
--------------------------------------- Add closing Tags of Lists | |
IF ( @RTFFileText LIKE '%<dl>%' ) | |
BEGIN | |
WHILE ( @StartIndex < LEN(@RTFFileText) ) | |
BEGIN | |
SELECT @StartIndex = REPLACE(( SELECT CHARINDEX('>td<', REVERSE(@RTFFileText)) - LEN(@RTFFileText) | |
), '-', '') | |
SELECT @EndIndex = REPLACE(( SELECT CHARINDEX('>dd/<', REVERSE(@RTFFileText)) - LEN(@RTFFileText) | |
), '-', '') | |
SELECT @RTFFileText = STUFF(@RTFFileText, @StartIndex - 3, 5,' <dl> <dt> ') | |
SELECT @RTFFileText = STUFF(@RTFFileText, @EndIndex + 3, 5, ' </dd> </dl> ') | |
SELECT @StartIndex = @EndIndex + @StartIndex | |
END | |
END | |
--------------------------------------- END ListItem Looping | |
--- Convert list item Tags | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\li0', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\li250', '<li>') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\li500', '<li>') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\li288', '<li>') | |
------------------------ Start --- Extra Tags ------------------------------ Remove extra tags that are not necessary to handle | |
SELECT @RTFFileText = REPLACE(REPLACE(REPLACE(@RTFFileText, '\fs18 ',''), '{', ''), '}', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\viewkind4\uc1', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\f0', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\fs9', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\f9', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\fs16', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\fs20', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\cf1', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\cf1\f1130/90 \cf0 \cf1\f1', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\cf0', '') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '\f1', '') | |
------------------------ End --- Extra Tags ------------------------------ | |
--- Remove Extra Span Tags | |
SELECT @RTFFileText = REPLACE(@RTFFileText, | |
'<span style="font-size:12"> <br><span style="font-size:12"> <br><span style="font-size:12">', | |
'<span style="font-size:12"> <br>') | |
SELECT @RTFFileText = REPLACE(@RTFFileText, '<BR> <BR>', '<BR>') | |
--- Update HTMLFileText column of RTFFiles Table | |
UPDATE RTFFiles | |
SET HTMLFileText = @RTFFileText | |
WHERE FileID = @RTFFileID | |
END | |
--------------------------------------------------------------------------------------------------------- | |
---- Step 3 ---------- Execute SP against FileID whose text is to be converted -------------------------- | |
EXECUTE [dbo].[ConvertRTFtoHTML] @RTFFileID = 1 | |