本文实例讲述了Python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:

#coding=utf-8
__author__ = \'zhm\'
from win32com import client as wc
import os
import time
import random
import MySQLdb
import re
def wordsToHtml(dir):
#批量把文件夹的word文档转换成html文件
 #金山WPS调用,抢先版的用KWPS,正式版WPS
 word = wc.Dispatch(\'KWPS.Application\')
 for path, subdirs, files in os.walk(dir):
  for wordFile in files:
   wordFullName = os.path.join(path, wordFile)
   #print \"word:\" + wordFullName
   doc = word.Documents.Open(wordFullName)
   wordFile2 = unicode(wordFile, \"gbk\")
   dotIndex = wordFile2.rfind(\".\")
   if(dotIndex == -1):
    print \'********************ERROR: 未取得后缀名!\'
   fileSuffix = wordFile2[(dotIndex + 1) : ]
   if(fileSuffix == \"doc\" or fileSuffix == \"docx\"):
    fileName = wordFile2[ : dotIndex]
    htmlName = fileName + \".html\"
    htmlFullName = os.path.join(unicode(path, \"gbk\"), htmlName)
    # htmlFullName = unicode(path, \"gbk\") + \"\\\\\" + htmlName
    print u\'生成了html文件:\' + htmlFullName
    doc.SaveAs(htmlFullName, 8)
    doc.Close()
 word.Quit()
 print \"\"
 print \"Finished!\"
def html_add_to_db(dir):
#将转换成功的html文件批量插入数据库中。
 conn = MySQLdb.connect(
  host=\'localhost\',
  port=3306,
  user=\'root\',
  passwd=\'root\',
  db=\'test\',
  charset=\'utf8\'
  )
 cur = conn.cursor()
 for path, subdirs, files in os.walk(dir):
  for htmlFile in files:
   htmlFullName = os.path.join(path, htmlFile)
   title = os.path.splitext(htmlFile)[0]
   targetDir = \'D:/files/htmls/\'
   #D:/files为web服务器配置的静态目录
   sconds = time.time()
   msconds = sconds * 1000
   targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +\'.html\')
   htmlFile2 = unicode(htmlFile, \"gbk\")
   dotIndex = htmlFile2.rfind(\".\")
   if(dotIndex == -1):
    print \'********************ERROR: 未取得后缀名!\'
   fileSuffix = htmlFile2[(dotIndex + 1) : ]
   if(fileSuffix == \"htm\" or fileSuffix == \"html\"):
    if not os.path.exists(targetDir):
     os.makedirs(targetDir)
    htmlFullName = os.path.join(unicode(path, \"gbk\"), htmlFullName)
    htFile = open(htmlFullName,\'rb\')
    #获取网页内容
    htmStrCotent = htFile.read()
    #找出里面的图片
    img=re.compile(r\"\"\"\"\"\",re.I)
    m = img.findall(htmStrCotent)
    for tagContent in m:
     imgSrc = unicode(tagContent, \"gbk\")
     imgSrcFullName = os.path.join(path, imgSrc)
     #上传图片
     imgTarget = \'D:/files/images/whzx/\'
     img_sconds = time.time()
     img_msconds = sconds * 1000
     targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +\'.png\')
     if not os.path.exists(imgTarget):
      os.makedirs(imgTarget)
     if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):
      tmpImgFile = open(imgSrcFullName,\'rb\')
      tmpWriteImgFile = open(targetImgFile, \"wb\")
      tmpWriteImgFile.write(tmpImgFile.read())
      tmpImgFile.close()
      tmpWriteImgFile.close()
      htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(\":\")[1])
    if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):
     #用iframe包装转换好的html文件。
     iframeHtml=\'\'\'
     
     
     \'\'\'
     tmpTargetFile = open(targetFile, \"wb\")
     tmpTargetFile.write(htmStrCotent)
     tmpTargetFile.close()
     htFile.close()
     try:
      # 执行
      sql = \"insert into common_article(title,content) values(%s,%s)\"
      param = (unicode(title, \"gbk\"),iframeHtml)
      cur.execute(sql,param)
     except:
      print \"Error: unable to insert data\"
 cur.close()
 conn.commit()
 # 关闭数据库连接
 conn.close()
if __name__ == \'__main__\':
 wordsToHtml(\'d:/word\')
 html_add_to_db(\'d:/word\')

希望本文所述对大家的Python程序设计有所帮助。