Skip to content

Commit

Permalink
添加图片压缩,html 也对应压缩
Browse files Browse the repository at this point in the history
  • Loading branch information
shuzhi-bj committed Aug 10, 2019
1 parent b2322f0 commit cbfa176
Show file tree
Hide file tree
Showing 8 changed files with 290 additions and 39 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ RUN sed -i 's/security.ubuntu/mirrors.aliyun/g' /etc/apt/sources.list && \
apt-get -qq -y install git python3-pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
pip3 install flask requests
pip3 install flask requests pillow bs4 lxml -i https://pypi.tuna.tsinghua.edu.cn/simple

RUN git clone https://github.com/ider-zh/api-pdf2htmlex.git

# COPY $PWD/* api-pdf2htmlex/
EXPOSE 5000
CMD python3 api-pdf2htmlex/run.py
6 changes: 5 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
[[source]]
url = "https://pypi.org/simple"
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
verify_ssl = true
name = "pypi"

[packages]
flask = "*"
bs4 = "*"
pillow = "*"
lxml = "*"
reuqests = "*"

[dev-packages]
requests = "*"
Expand Down
171 changes: 140 additions & 31 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: '2.2'
services:
# pdf2htmlex_large:
# build: .
# #image: ider/api-pdf2htmlex
# environment:
# - PDF2PDF=http://pdfconv:5000/pdf2pdf
# - POOL_SIZE=9
# ports:
# - 5001:5000
# restart: always
# mem_limit: 30G
# memswap_limit: 800G
# container_name: 'pdf2htmlEX_L'
# links:
# - pdfconv

# pdf2htmlex_small:
# build: .
# #image: ider/api-pdf2htmlex
# environment:
# - POOL_SIZE=10
# expose:
# - 5000
# restart: always
# mem_limit: 5G
# memswap_limit: 5G
# scale: 5

# pdf2htmlex-lb:
# image: 'dockercloud/haproxy:latest'
# volumes:
# - /var/run/docker.sock:/var/run/docker.sock
# links:
# - pdf2htmlex_small
# restart: always
# ports:
# - 5000:80

# pdfconv:
# image: ider/libreoffice
# restart: always

pdf2htmlex_small:
build: .
#image: ider/api-pdf2htmlex
environment:
- POOL_SIZE=10
ports:
- 5000:5000
restart: always
mem_limit: 5G
memswap_limit: 5G
scale: 1
61 changes: 61 additions & 0 deletions file_compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
'''
@Version: 0.0.1
@Author: ider
@Date: 2019-08-10 16:55:38
@LastEditors: ider
@LastEditTime: 2019-08-10 18:09:52
@Description: 压缩 图片和 html
'''
from bs4 import BeautifulSoup
import codecs
from PIL import Image
import io
import os

def compress_base64_img(data):
img_data = codecs.decode(data.encode('utf8'),'base64')
img = Image.open(io.BytesIO(img_data))
newio = io.BytesIO()
img.save(newio,format='webp',quality=10,lossless=False)
newio.seek(0)
return codecs.encode(newio.read(),'base64').decode('utf8')


def compress(file_path):
if file_path.endswith('.png'):
return compress_png(file_path)
elif file_path.endswith('.html'):
return compress_html(file_path)
return file_path

def compress_png(file_path):
with open(file_path,'rb')as f:
im = Image.open(f)
new_file_path = file_path.rstrip('png') + 'webp'
with open(new_file_path,'wb')as fp:
im.save(fp,format='webp',quality=10,lossless=False)
return new_file_path

def compress_html(file_path):
with open(file_path,'rb')as f:
html = f.read()
soup = BeautifulSoup(html,'lxml')
flag = 0
for img in soup.select('img'):
data = img.attrs.get('src')
if not data or not data.startswith('data:image'):
continue
if data.startswith('data:image/webp;base64'):
continue
line = data.split(',')
data = compress_base64_img(line[1])
img.attrs['src'] = 'data:image/webp;base64,'+data
flag = 1
if flag == 1:
with open(file_path,'wt')as f:
f.write(str(soup))
return file_path

if __name__ == '__main__':
print(compress('/tmp/cccc/img-3-3.png'))
print(compress('/tmp/cccc/temp.html'))
2 changes: 2 additions & 0 deletions pdf2htmlEX
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def remote_convert(src, dst, argv):
with zipfile.ZipFile(fp, 'r') as myzip:
myzip.extractall(dst)
return
else:
print(req.text)
else:
print('remote api is not available!')
# raise Exception('remote api is not available! %s'%req.text)
Expand Down
Loading

0 comments on commit cbfa176

Please sign in to comment.