@@ -36,11 +36,11 @@ def parse_to_record(self) -> DemonstrationRecord:
3636 """
3737 boundary = self .__find_boundary ()
3838 self .parts_dict = self .__split_file_by_boundary (boundary )
39- self .comments = self .__get_comments (
40- self .parts_dict ['main.htm' ]['Content' ])
41- self .steps = self .__get_steps (self .parts_dict ['main.htm' ]['Content' ])
39+ self .comments = self .__get_comments (self .parts_dict ["main.htm" ]["Content" ])
40+ self .steps = self .__get_steps (self .parts_dict ["main.htm" ]["Content" ])
4241 record = DemonstrationRecord (
43- list (set (self .applications )), len (self .steps ), ** self .steps )
42+ list (set (self .applications )), len (self .steps ), ** self .steps
43+ )
4444
4545 return record
4646
@@ -54,14 +54,14 @@ def __find_boundary(self) -> str:
5454 if boundary_start != - 1 :
5555 boundary_start += len ("boundary=" )
5656 boundary_end = self .content .find ("\n " , boundary_start )
57- boundary = self .content [boundary_start :boundary_end ].strip ('\ " ' )
57+ boundary = self .content [boundary_start :boundary_end ].strip ('"' )
5858 return boundary
5959 else :
6060 raise ValueError ("Boundary not found in the .mht file." )
6161
6262 def __split_file_by_boundary (self , boundary : str ) -> dict :
6363 """
64- Split the file by the boundary into parts,
64+ Split the file by the boundary into parts,
6565 Store the parts in a dictionary, including the content type,
6666 content location and content transfer encoding.
6767 boundary: The boundary of the file.
@@ -72,27 +72,36 @@ def __split_file_by_boundary(self, boundary: str) -> dict:
7272 for part in parts :
7373 content_type_start = part .find ("Content-Type:" )
7474 content_location_start = part .find ("Content-Location:" )
75- content_transfer_encoding_start = part .find (
76- "Content-Transfer-Encoding:" )
75+ content_transfer_encoding_start = part .find ("Content-Transfer-Encoding:" )
7776 part_info = {}
7877 if content_location_start != - 1 :
7978 content_location_end = part .find ("\n " , content_location_start )
80- content_location = part [content_location_start :content_location_end ].split (":" )[
81- 1 ].strip ()
79+ content_location = (
80+ part [content_location_start :content_location_end ]
81+ .split (":" )[1 ]
82+ .strip ()
83+ )
8284
8385 # add the content location
8486 if content_type_start != - 1 :
8587 content_type_end = part .find ("\n " , content_type_start )
86- content_type = part [content_type_start :content_type_end ].split (":" )[
87- 1 ].strip ()
88+ content_type = (
89+ part [content_type_start :content_type_end ].split (":" )[1 ].strip ()
90+ )
8891 part_info ["Content-Type" ] = content_type
8992
9093 # add the content transfer encoding
9194 if content_transfer_encoding_start != - 1 :
9295 content_transfer_encoding_end = part .find (
93- "\n " , content_transfer_encoding_start )
94- content_transfer_encoding = part [content_transfer_encoding_start :content_transfer_encoding_end ].split (":" )[
95- 1 ].strip ()
96+ "\n " , content_transfer_encoding_start
97+ )
98+ content_transfer_encoding = (
99+ part [
100+ content_transfer_encoding_start :content_transfer_encoding_end
101+ ]
102+ .split (":" )[1 ]
103+ .strip ()
104+ )
96105 part_info ["Content-Transfer-Encoding" ] = content_transfer_encoding
97106
98107 content = part [content_location_end :].strip ()
@@ -112,25 +121,30 @@ def __get_steps(self, content: str) -> dict:
112121 """
113122
114123 user_action_data = re .search (
115- r'<UserActionData>(.*?)</UserActionData>' , content , re .DOTALL )
124+ r"<UserActionData>(.*?)</UserActionData>" , content , re .DOTALL
125+ )
116126 if user_action_data :
117127
118128 root = ET .fromstring (user_action_data .group (1 ))
119129 steps = {}
120130
121- for each_action in root .findall (' EachAction' ):
131+ for each_action in root .findall (" EachAction" ):
122132
123- action_number = each_action .get ('ActionNumber' )
124- application = each_action .get ('FileName' )
125- description = each_action .find ('Description' ).text
126- action = each_action .find ('Action' ).text
127- screenshot_file_name = each_action .find (
128- 'ScreenshotFileName' ).text
133+ action_number = each_action .get ("ActionNumber" )
134+ application = each_action .get ("FileName" )
135+ description = each_action .find ("Description" ).text
136+ action = each_action .find ("Action" ).text
137+ screenshot_file_name = each_action .find ("ScreenshotFileName" ).text
129138 screenshot = self .__get_screenshot (screenshot_file_name )
130139 step_key = f"step_{ int (action_number ) - 1 } "
131140
132141 step = DemonstrationStep (
133- application , description , action , screenshot , self .comments .get (step_key ))
142+ application ,
143+ description ,
144+ action ,
145+ screenshot ,
146+ self .comments .get (step_key ),
147+ )
134148 steps [step_key ] = step
135149 self .applications .append (application )
136150 return steps
@@ -143,16 +157,21 @@ def __get_comments(self, content: str) -> dict:
143157 content: The content of the main.htm file.
144158 return: A dictionary of comments for each step.
145159 """
146- soup = BeautifulSoup (content , ' html.parser' )
160+ soup = BeautifulSoup (content , " html.parser" )
147161 body = soup .body
148- steps_html = body .find ('div' , id = 'Steps' )
149- steps = steps_html .find_all (lambda tag : tag .name == 'div' and tag .has_attr (
150- 'id' ) and re .match (r'^Step\d+$' , tag ['id' ]))
162+ steps_html = body .find ("div" , id = "Steps" )
163+ steps = steps_html .find_all (
164+ lambda tag : tag .name == "div"
165+ and tag .has_attr ("id" )
166+ and re .match (r"^Step\d+$" , tag ["id" ])
167+ )
151168
152169 comments = {}
153170 for index , step in enumerate (steps ):
154- comment_tag = step .find ('b' , text = 'Comment: ' )
155- comments [f'step_{ index } ' ] = comment_tag .next_sibling if comment_tag else None
171+ comment_tag = step .find ("b" , text = "Comment: " )
172+ comments [f"step_{ index } " ] = (
173+ comment_tag .next_sibling if comment_tag else None
174+ )
156175 return comments
157176
158177 def __get_screenshot (self , screenshot_file_name : str ) -> str :
@@ -163,11 +182,12 @@ def __get_screenshot(self, screenshot_file_name: str) -> str:
163182 return: The screenshot in base64 string.
164183 """
165184 screenshot_part = self .parts_dict [screenshot_file_name ]
166- content = screenshot_part [' Content' ]
167- content_type = screenshot_part [' Content-Type' ]
168- content_transfer_encoding = screenshot_part [' Content-Transfer-Encoding' ]
185+ content = screenshot_part [" Content" ]
186+ content_type = screenshot_part [" Content-Type" ]
187+ content_transfer_encoding = screenshot_part [" Content-Transfer-Encoding" ]
169188
170- screenshot = 'data:{type};{encoding}, {content}' .format (
171- type = content_type , encoding = content_transfer_encoding , content = content )
189+ screenshot = "data:{type};{encoding}, {content}" .format (
190+ type = content_type , encoding = content_transfer_encoding , content = content
191+ )
172192
173193 return screenshot
0 commit comments