1+ #!/usr/bin/env python3
2+ """
3+ Script for automatic README generation based on the /docs structure
4+ Analyzes markdown files and creates a README following the existing concept
5+ """
6+
7+ import os
8+ import re
9+ from pathlib import Path
10+ from typing import Dict , List , Tuple
11+
12+ def is_file_complete (file_path : str ) -> bool :
13+ """
14+ Checks if a markdown file is completely filled out or just a template
15+ """
16+ try :
17+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
18+ content = f .read ()
19+
20+ # Check for empty or template indicators
21+ if not content .strip ():
22+ return False
23+
24+ # Check for template indicators
25+ template_indicators = [
26+ '## \n **Description:**\n \n **Latest status:**\n \n ```\n \n ```' ,
27+ '**Description:**\n \n **Latest status:**\n \n ```\n \n ```' ,
28+ 'Example |' ,
29+ '| Name | Type | Description | Example | Required |' ,
30+ ]
31+
32+ # If it only contains template structure, it's not complete
33+ for indicator in template_indicators :
34+ if indicator in content and len (content .strip ()) < 500 :
35+ return False
36+
37+ # Check for actual API endpoints
38+ if 'GET /' in content or 'POST /' in content or 'DELETE /' in content or 'PUT /' in content :
39+ return True
40+
41+ # Check for filled query parameters or response examples
42+ if ('```json\n {' in content and content .count ('```json' ) > 1 ) or \
43+ ('startAt | number' in content ) or \
44+ ('"x":' in content and '"y":' in content ):
45+ return True
46+
47+ # Check for index/overview pages with meaningful content and links
48+ # These are complete if they have a title, description, and multiple internal links
49+ has_title = bool (re .search (r'^## \w+' , content , re .MULTILINE ))
50+ has_description = 'Description:' in content and not content .count ('Description:' ) == content .count ('**Description:**\n \n **Latest status:**' )
51+ has_internal_links = content .count ('](/docs/' ) >= 3 # At least 3 internal documentation links
52+ has_structure = content .count ('###' ) >= 2 # At least 2 subsections
53+
54+ if has_title and (has_description or has_internal_links ) and has_structure :
55+ return True
56+
57+ return False
58+
59+ except Exception :
60+ return False
61+
62+ def extract_description_from_file (file_path : str ) -> str :
63+ """
64+ Extracts the description from between **Description:** and **Latest status:** markers
65+ """
66+ try :
67+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
68+ content = f .read ()
69+
70+ # Look for content specifically between **Description:** and **Latest status:**
71+ desc_match = re .search (r'\*\*Description:\*\*\s*\n\n(.*?)\n\n\*\*Latest status:\*\*' , content , re .DOTALL )
72+ if desc_match :
73+ description = desc_match .group (1 ).strip ()
74+ # Only return if there's actual content (not empty or just whitespace)
75+ if description and description != '' :
76+ # Clean up the description - take first sentence or limit length
77+ if '.' in description and len (description ) > 50 :
78+ description = description .split ('.' )[0 ] + '.'
79+ if len (description ) > 150 :
80+ description = description [:147 ] + '...'
81+ return description
82+
83+ # If no description section found or it's empty, return empty string
84+ return ""
85+
86+ except Exception :
87+ return ""
88+
89+ def extract_title_from_file (file_path : str ) -> str :
90+ """
91+ Extracts the title from a markdown file
92+ """
93+ try :
94+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
95+ content = f .read ()
96+
97+ # Search for ## Title
98+ match = re .search (r'^## (.+)' , content , re .MULTILINE )
99+ if match :
100+ title = match .group (1 ).strip ()
101+ if title and title != '' :
102+ return title
103+ except Exception :
104+ pass
105+
106+ # Fallback: Use filename
107+ filename = Path (file_path ).stem
108+ return filename .replace ('-' , ' ' ).title ()
109+
110+ def scan_directory (base_path : str ) -> Dict :
111+ """
112+ Scans a directory recursively and creates a structure of markdown files
113+ """
114+ structure = {}
115+ docs_path = Path (base_path ) / 'docs'
116+
117+ if not docs_path .exists ():
118+ print (f"Error: { docs_path } does not exist!" )
119+ return structure
120+
121+ for root , dirs , files in os .walk (docs_path ):
122+ # Skip template.md
123+ if 'template.md' in files :
124+ files .remove ('template.md' )
125+
126+ for file in files :
127+ if file .endswith ('.md' ):
128+ file_path = os .path .join (root , file )
129+ relative_path = os .path .relpath (file_path , docs_path )
130+
131+ # Create structure based on path
132+ parts = relative_path .split (os .sep )
133+ current = structure
134+
135+ for part in parts [:- 1 ]: # All except the file
136+ if part not in current :
137+ current [part ] = {}
138+ current = current [part ]
139+
140+ # Add the file
141+ filename = parts [- 1 ]
142+ title = extract_title_from_file (file_path )
143+ description = extract_description_from_file (file_path )
144+ is_complete = is_file_complete (file_path )
145+
146+ if '_files' not in current :
147+ current ['_files' ] = []
148+
149+ current ['_files' ].append ({
150+ 'filename' : filename ,
151+ 'title' : title ,
152+ 'description' : description ,
153+ 'path' : relative_path ,
154+ 'complete' : is_complete
155+ })
156+
157+ return structure
158+
159+ def generate_table_entries (files : List [Dict ]) -> List [str ]:
160+ """
161+ Generates table entries for a list of files with links to the documents and descriptions
162+ """
163+ entries = []
164+ for file_info in sorted (files , key = lambda x : x ['filename' ]):
165+ state = "✅" if file_info ['complete' ] else "❌"
166+ title = file_info ['title' ]
167+ description = file_info .get ('description' , '' )
168+ # Create relative link to the markdown file
169+ link = f"/docs/{ file_info ['path' ]} "
170+ linked_title = f"[{ title } ]({ link } )"
171+
172+ # If no description, leave empty (no placeholder)
173+ if not description :
174+ description = ""
175+
176+ entries .append (f"| { state } | { linked_title } | { description } |" )
177+
178+ return entries
179+
180+ def generate_table_of_contents (structure : Dict ) -> str :
181+ """
182+ Dynamically generates table of contents based on actual structure
183+ """
184+ toc = ["## Table of Contents" , "" ]
185+
186+ # Websites section
187+ if 'websites' in structure :
188+ toc .append ("- [Websites](#websites)" )
189+ websites = structure ['websites' ]
190+
191+ for section_key in sorted (websites .keys ()):
192+ if section_key != '_files' :
193+ section_name = section_key .title ()
194+ toc .append (f" - [{ section_name } ](#{ section_key .lower ()} )" )
195+
196+ # Add subsections if they exist
197+ section = websites [section_key ]
198+ if isinstance (section , dict ):
199+ for subsection_key in sorted (section .keys ()):
200+ if subsection_key != '_files' :
201+ subsection_name = subsection_key .title ()
202+ toc .append (f" - [{ subsection_name } ](#{ subsection_key .lower ()} )" )
203+
204+ # Root level sections
205+ for section_key in sorted (structure .keys ()):
206+ if section_key not in ['websites' ] and section_key != '_files' :
207+ section_name = section_key .title ()
208+ toc .append (f"- [{ section_name } ](#{ section_key .lower ()} )" )
209+
210+ # Add subsections if they exist
211+ section = structure [section_key ]
212+ if isinstance (section , dict ):
213+ for subsection_key in sorted (section .keys ()):
214+ if subsection_key != '_files' :
215+ subsection_name = subsection_key .title ()
216+ toc .append (f" - [{ subsection_name } ](#{ subsection_key .lower ()} )" )
217+
218+ return '\n ' .join (toc ) + '\n \n '
219+
220+ def generate_section_content (section_name : str , section_data : Dict , level : int = 3 ) -> str :
221+ """
222+ Recursively generates content for any section
223+ """
224+ content = ""
225+ heading = "#" * level
226+
227+ content += f"{ heading } { section_name .title ()} \n "
228+
229+ # Add files in this section
230+ if '_files' in section_data and section_data ['_files' ]:
231+ content += "| State | Name | Description |\n "
232+ content += "| :---: | :--- | :--- |\n "
233+ entries = generate_table_entries (section_data ['_files' ])
234+ content += '\n ' .join (entries ) + '\n '
235+ else :
236+ # Check if there are subsections
237+ has_subsections = any (key != '_files' and isinstance (section_data [key ], dict )
238+ for key in section_data .keys ())
239+
240+ if not has_subsections :
241+ # No files and no subsections, add empty placeholder
242+ content += "| State | Name | Description |\n "
243+ content += "| :---: | :--- | :--- |\n "
244+ content += "| | | |\n "
245+
246+ content += "\n "
247+
248+ # Process subsections
249+ for subsection_key in sorted (section_data .keys ()):
250+ if subsection_key != '_files' and isinstance (section_data [subsection_key ], dict ):
251+ content += generate_section_content (subsection_key , section_data [subsection_key ], level + 1 )
252+
253+ return content
254+
255+ def generate_readme_content (structure : Dict ) -> str :
256+ """
257+ Generates README content based on the structure - now completely dynamic
258+ """
259+ content = "# umami API docs\n \n "
260+
261+ # Add status legend
262+ content += "### 📊 Documentation Status Legend\n "
263+ content += "| Symbol | Meaning |\n "
264+ content += "| :---: | :--- |\n "
265+ content += "| ✅ | Documentation complete – file contains full API specification |\n "
266+ content += "| ❌ | Documentation incomplete – file is template or missing content |\n \n "
267+
268+ # Generate dynamic table of contents
269+ content += generate_table_of_contents (structure )
270+
271+ content += "---\n ---\n \n "
272+ # Generate Websites section
273+ if 'websites' in structure :
274+ content += "## Websites\n \n "
275+ websites_content = generate_section_content ("websites" , structure ['websites' ], level = 2 )
276+ # Remove the first line (## Websites) since we already added it
277+ content += '\n ' .join (websites_content .split ('\n ' )[1 :])
278+
279+ content += "--- \n \n "
280+
281+ # Generate all other root-level sections dynamically
282+ for section_key in sorted (structure .keys ()):
283+ if section_key not in ['websites' ] and section_key != '_files' :
284+ content += generate_section_content (section_key , structure [section_key ], level = 2 )
285+ content += "---\n \n "
286+
287+ return content .rstrip () + "\n "
288+
289+ def main ():
290+ """
291+ Main function of the script
292+ """
293+ # Get the parent directory of .github (the repository root)
294+ base_path = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
295+
296+ print ("🔍 Scanning /docs directory..." )
297+ structure = scan_directory (base_path )
298+
299+ print ("📝 Generating README content..." )
300+ readme_content = generate_readme_content (structure )
301+
302+ # Write new README
303+ readme_path = os .path .join (base_path , 'README.md' )
304+
305+ print (f"💾 Writing README to { readme_path } ..." )
306+ with open (readme_path , 'w' , encoding = 'utf-8' ) as f :
307+ f .write (readme_content )
308+
309+ print ("✅ README successfully generated!" )
310+
311+ # Output statistics
312+ total_files = 0
313+ complete_files = 0
314+
315+ def count_files (struct ):
316+ nonlocal total_files , complete_files
317+ if isinstance (struct , dict ):
318+ if '_files' in struct :
319+ for file_info in struct ['_files' ]:
320+ total_files += 1
321+ if file_info ['complete' ]:
322+ complete_files += 1
323+ for key , value in struct .items ():
324+ if key != '_files' :
325+ count_files (value )
326+
327+ count_files (structure )
328+
329+ print (f"\n 📊 Statistics:" )
330+ print (f" Total files found: { total_files } " )
331+ print (f" Completely filled: { complete_files } " )
332+ print (f" Still to be processed: { total_files - complete_files } " )
333+ print (f" Progress: { (complete_files / total_files * 100 ):.1f} %" if total_files > 0 else " Progress: 0%" )
334+
335+ if __name__ == "__main__" :
336+ main ()
0 commit comments