Documents storing inside database might be a requirement
of SQL Server driven application. SQL Server uses nvarchar for unicode texts whereas
varbinary data type for storing document files (like
images, word documents, pdfs) in database.
Suppose you have
a folder containing various document files (like images, word documents, pdfs,
html) and it is required to perform certain actions to upload data from file
system in to the database.
1.
List all files with their path in a folder and
sub-folders to a Table
---------------------------------------------------------------------
CREATE PROCEDURE GetAllDirectoryFilesandFolders -- GetAllDirectoryFilesandFolders
@RootDir='E:\Post\SqlCache'
(
@RootDir VARCHAR(1000)='E:\SQLCache'
)
AS
BEGIN
DECLARE @RowIndex
INT
, @SearchDir VARCHAR(1000)
IF EXISTS ( SELECT 1 FROM SQLCacheDocuments.SYS.TABLES WHERE name ='DocumentsRepository'
)
DROP TABLE SQLCacheDocuments.dbo.DocumentsRepository
CREATE TABLE SQLCacheDocuments.dbo.DocumentsRepository
(
FileID INT IDENTITY(1,1),
DocumentFilePath VARCHAR(max),
isfile INT,
DocumentFileName VARCHAR(max),
FileDepth INT
)
--- Get Root
Direcotry Files\Folders
INSERT INTO DocumentsRepository(DocumentFileName, FileDepth,isFile)
EXEC MASTER.SYS.xp_dirtree @RootDir,1,1
--- If No
file\Folder exits then exit
IF NOT exists( SELECT * FROM DocumentsRepository )
RETURN
--- Update
File\Folder Path
UPDATE
DocumentsRepository
SET
DocumentFilePath = @RootDir + '\' + DocumentFileName
--- Next root
directory
SELECT @RowIndex=min(FileID) FROM DocumentsRepository
--- Get Sub
Direcotry Files\Folders
WHILE 1=1
BEGIN
SELECT
@SearchDir = DocumentFilePath
FROM
DocumentsRepository
WHERE FileID=@RowIndex
INSERT INTO DocumentsRepository(DocumentFileName, FileDepth,isfile)
EXEC MASTER.SYS.xp_dirtree @SearchDir,1,1
UPDATE
DocumentsRepository
SET
DocumentFilePath = @SearchDir + '\' + DocumentFileName
WHERE
DocumentFilePath is null
SET
@RowIndex = @RowIndex +
1
-- Exit if current
Rowindex is Max FileID
IF ( @RowIndex > (SELECT max(FileID) FROM DocumentsRepository)
or @RowIndex is
null )
BREAK
END
SELECT SUM(CASE ISFILE WHEN 0 THEN 1 ELSE 0 END ) TolalFolders
, SUM(CASE ISFILE WHEN 1 THEN 1 ELSE 0 END ) TolalFiles
FROM SQLCacheDocuments.dbo.DocumentsRepository
Delete
FROM SQLCacheDocuments.dbo.DocumentsRepository
WHERE isfile=0
SELECT FileID, DocumentFilePath,
DocumentFileName
FROM
SQLCacheDocuments.dbo.DocumentsRepository
ORDER BY FileID
END
---------------------------------------------------------------------
2.
Divide
files into two Tables based on operations
i.
Files
(like Images, PDFs, Word documents ) which are inserted as binary
ii.
Files (text, HTML) whose content is inserted
into the database
---------------------------------------------------------------------
SELECT * INTO
SQLCacheDocuments.dbo.TextDocuments
FROM
SQLCacheDocuments.dbo.DocumentsRepository
WHERE
DocumentFileName like '%.html'
OR DocumentFileName like
'%.TXT'
Alter table SQLCacheDocuments.dbo.TextDocuments ADD
FileText NVARCHAR(max)
---------------------------------------
SELECT * INTO
SQLCacheDocuments.dbo.HandDocuments
FROM
SQLCacheDocuments.dbo.DocumentsRepository
WHERE
DocumentFileName not like
'%.html'
AND DocumentFileName not
like '%.TXT'
ALTER TABLE SQLCacheDocuments.dbo.HandDocuments ADD
FileBinary VARBINARY(max)
---------------------------------------------------------------------
3. Import files (like Images, PDFs, Word documents ) as binary into the database table
---------------------------------------------------------------------
;WITH DocumentFiles
AS
(
SELECT FileID, DocumentFilePath
FROM SQLCacheDocuments.dbo.HandDocuments
WHERE FileBinary IS NULL
)
SELECT ' UPDATE SQLCacheDocuments.dbo.HandDocuments
SET FileBinary= tempImg.BULKCOLUMN
FROM OPENROWSET(BULK N''' + DocumentFilePath + ''', SINGLE_BLOB) as tempImg,
SQLCacheDocuments.dbo.HandDocuments FileBinary
WHERE CONVERT(int,FileBinary.FileID)='+ CONVERT(VARCHAR(200),FileID)+' GO '
FROM DocumentFiles
ORDER BY FileID
---------------------------------------------------------------------
4. Import content of files as text into the database table
---------------------------------------------------------------------
;WITH DocumentFiles
AS
(
SELECT FileID, DocumentFilePath
FROM SQLCacheDocuments.dbo.TextDocuments
WHERE FileText IS NULL
)
SELECT ' UPDATE SQLCacheDocuments.dbo.TextDocuments
SET FileText= tempImg.BULKCOLUMN
FROM OPENROWSET(BULK N''' + DocumentFilePath + ''', SINGLE_CLOB) as tempImg,
SQLCacheDocuments.dbo.TextDocuments TextNote
WHERE CONVERT(int,TextNote.FileID)='+ CONVERT(VARCHAR(200),FileID)+' GO '
FROM DocumentFiles
ORDER BY FileID
---------------------------------------------------------------------
5. Verify files binary by converting back to OS file
---------------------------------------------------------------------
;WITH DocumentFiles
AS
(
SELECT FileID, DocumentFileName
FROM SQLCacheDocuments.dbo.HandDocuments
WHERE FILEID=1
)
SELECT ' EXEC
master..xp_cmdshell ''BCP "SELECT FileBinary FROM SQLCacheDocuments.dbo. HandDocuments
WHERE FileID ='
+Cast(FILEID as varchar(50))+'"
queryout "E:\Post\'+DocumentFileName
+'"
-T -N -SsqlserverR2'''
FROM DocumentFiles
ORDER BY FileID
---------------------------------------------------------------------
6. Validate content of files by comparing OS file text and column value of database table.
7. Check files size and
length of documents
---------------------------------------------------------------------
SELECT (CAST(DATALENGTH(FileBinary)AS BIGINT) /1024.00) FileSize,* FROM SQLCacheDocuments.dbo.HandDocuments
ORDER BY CAST(DATALENGTH(FileBinary) AS BIGINT) DESC
SELECT Len(FileText),* FROM SQLCacheDocuments.dbo.TextDocuments
ORDER BY CAST(Len(FileText) AS BIGINT) DESC
---------------------------------------------------------------------