-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtmlFct.py
More file actions
123 lines (114 loc) · 4.61 KB
/
htmlFct.py
File metadata and controls
123 lines (114 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
# attention, l'ordre des sous-fonctions est important
from os import sep
from urllib import request as urlRequest
from PIL import Image
from io import BytesIO
import base64
from fileLocal import pathRoot
from textFct import *
import loggerFct as log
listTagsContainer =( 'ul', 'ol', 'dl', 'table', 'nav', 'div', 'fieldset', 'form', 'figure', 'math', 'section', 'article', 'body', 'header', 'footer', 'main' )
listTagsIntern =( 'i', 'b', 'em', 'span', 'strong', 'thead', 'tbody' )
tagHtml =(
('\n<h1>', '\n== '), ('\n<h2>', '\n** '), ('\n<h3>', '\n-- '), ('\n<h4>', '\n__ '), ('\n<h5>', '\n## '), ('\n<h6>', '\n++ '),
("\n<hr class='h1'/>\n", '\n\n==\n\n'), ("\n<hr class='h2'/>\n", '\n\n**\n\n'), ("\n<hr class='h3'/>\n", '\n\n--\n\n'),
("\n<hr>\n", '\n\n**\n\n'), ("\n<hr/>\n", '\n\n**\n\n'),
('\n<figure>', '\nFig\n'), ('\n<xmp>!', '\nCode\n'), ('\n<xmp>: ', '\nCode: '), ('\n<li>', '\n\t')
)
def findTitleFromUrl (link):
if link[-1] == '/' or link[-1] == '\\': link = link[:-1]
pos =0
if '/' in link: pos =1+ link.rfind ('/')
elif '\\' in link: pos =1+ link.rfind ('\\')
title = link[pos:]
if '.' in title:
pos = title.rfind ('.')
if len (title) -pos <11: title = title[:pos]
title = title.replace ('www.',"")
urlWords =( '-', '_', '.', '?', '#', '%20' );
for word in urlWords: title = title.replace (word, " ")
while " " in title: title = title.replace (" "," ")
return title
def imageFromBase64One (imgStr):
buff = BytesIO (base64.b64decode (imgStr))
return Image.open (buff)
def imgToB64Local (imgPath):
if sep == '\\':
imgPath = imgPath.replace ('/', sep)
imgPath = imgPath.replace ('users\\lenovo', 'Users\\LENOVO')
imgPath = imgPath.replace ('c:\\', 'C:\\')
if pathRoot not in imgPath: return ""
imageOriginal = Image.open (imgPath)
imageOriginal = imageOriginal.convert ('RGB')
buff = BytesIO()
if imgPath[-3:] == 'jpg': imageOriginal.save (buff, format='jpeg')
else: imageOriginal.save (buff, format=imgPath[-3:])
imgStr = base64.b64encode (buff.getvalue())
imgStr = 'data:image/' + imgPath[-3:] + ';base64,' + str (imgStr)[2:-1]
return imgStr
def imgToB64Web (imgPath):
myRequest = urlRequest.Request (imgPath, headers={ 'User-Agent': 'Mozilla/5.0' })
response = urlRequest.urlopen (myRequest)
tmpByte = response.read()
imgStr = base64.b64encode (tmpByte)
imgStr = 'data:image/' + imgPath[-3:] + ';base64,' + str (imgStr)[2:-1]
return imgStr
def imgToB64 (text):
if 'src=' not in text: return text
textList = text.split ('src=')
textRange = range (1, len (textList))
for t in textRange:
f= textList[t].find (textList[t][0], 2)
if textList[t][f-4:f] not in '.bmp .png .gif .jpg': continue
imgStr = textList[t][1:f]
if imgStr[0:4] == 'http': imgStr = imgToB64Web (imgStr)
else: imgStr = imgToB64Local (imgStr)
textList[t] = textList[t][0] + imgStr + textList[t][f:]
text = 'src='.join (textList)
return text
def imgToB64One (imageName):
imageOriginal = Image.open (imageName)
imageOriginal = imageOriginal.convert ('RGB')
buff = BytesIO()
if imageName[-3:] == 'jpg': imageOriginal.save (buff, format='jpeg')
else: imageOriginal.save (buff, format=imageName[-3:])
imgStr = base64.b64encode (buff.getvalue())
imgStr = 'data:image/' + imageName[-3:] + ';base64,' + str (imgStr)[2:-1]
return imgStr
def imgToB64_va (text):
if 'src=' in text:
text = text.replace ("src='http", "scr='http")
text = text.replace ('src="http', 'scr="http')
if 'src=' in text:
textList = text.split ('src=')
textRange = range (1, len (textList))
for t in textRange:
# if textList[t][1:5] == 'http': continue
f= textList[t].find (textList[t][0], 2)
if textList[t][f-4:f] not in '.bmp .png .gif .jpg': continue
imageName = textList[t][1:f].replace ('/', sep)
if pathRoot not in imageName: continue
imgStr = imgToB64One (imageName)
textList[t] = textList[t][0] + imgStr + textList[t][f:]
text = 'src='.join (textList)
text = text.replace ('scr=', 'src=')
return text
def cleanHtmlForWritting (text):
if '</xmp>' in text: text = cleanBasic (text)
else: text = cleanHtml (text)
for tag in listTagsIntern:
text = text.replace ('<'+ tag +'>', ' <'+ tag +'>')
text = text.replace ('</'+ tag +'>', '</'+ tag +'> ')
text = text.replace ('</a>', '</a> ')
text = text.replace ("<a ", " <a ")
while " " in text: text = text.replace (" ", " ")
text = text.replace ('> <', '><')
points = '.,)'
for p in points: text = text.replace (" "+p, p)
text = text.replace ("( ", '(')
innerTags =( 'i', 'b', 'em', 'span', 'strong', 'a')
for tag in innerTags:
for tig in innerTags: text = text.replace ('</'+ tag + '><'+ tig +'>', '</'+ tag + '> <'+ tig +'>')
return text