1+ #!/usr/bin/env python3
2+ """
3+ Script for automatic README generation based on the /docs structure
4+ Analyzes markdown files and creates a README following the existing concept
5+ """
6+
7+ import os
8+ import re
9+ from pathlib import Path
10+ from typing import Dict , List , Tuple
11+
12+ def is_file_complete (file_path : str ) -> bool :
13+ """
14+ Checks if a markdown file is completely filled out or just a template
15+ """
16+ try :
17+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
18+ content = f .read ()
19+
20+ # Check for empty or template indicators
21+ if not content .strip ():
22+ return False
23+
24+ # Check for template indicators
25+ template_indicators = [
26+ '## \n **Description:**\n \n **Latest status:**\n \n ```\n \n ```' ,
27+ '**Description:**\n \n **Latest status:**\n \n ```\n \n ```' ,
28+ 'Example |' ,
29+ '| Name | Type | Description | Example | Required |' ,
30+ ]
31+
32+ # If it only contains template structure, it's not complete
33+ for indicator in template_indicators :
34+ if indicator in content and len (content .strip ()) < 500 :
35+ return False
36+
37+ # Check for actual API endpoints
38+ if 'GET /' in content or 'POST /' in content or 'DELETE /' in content or 'PUT /' in content :
39+ return True
40+
41+ # Check for filled query parameters or response examples
42+ if ('```json\n {' in content and content .count ('```json' ) > 1 ) or \
43+ ('startAt | number' in content ) or \
44+ ('"x":' in content and '"y":' in content ):
45+ return True
46+
47+ return False
48+
49+ except Exception :
50+ return False
51+
52+ def extract_title_from_file (file_path : str ) -> str :
53+ """
54+ Extracts the title from a markdown file
55+ """
56+ try :
57+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
58+ content = f .read ()
59+
60+ # Search for ## Title
61+ match = re .search (r'^## (.+)' , content , re .MULTILINE )
62+ if match :
63+ title = match .group (1 ).strip ()
64+ if title and title != '' :
65+ return title
66+ except Exception :
67+ pass
68+
69+ # Fallback: Use filename
70+ filename = Path (file_path ).stem
71+ return filename .replace ('-' , ' ' ).title ()
72+
73+ def scan_directory (base_path : str ) -> Dict :
74+ """
75+ Scans a directory recursively and creates a structure of markdown files
76+ """
77+ structure = {}
78+ docs_path = Path (base_path ) / 'docs'
79+
80+ if not docs_path .exists ():
81+ print (f"Error: { docs_path } does not exist!" )
82+ return structure
83+
84+ for root , dirs , files in os .walk (docs_path ):
85+ # Skip template.md
86+ if 'template.md' in files :
87+ files .remove ('template.md' )
88+
89+ for file in files :
90+ if file .endswith ('.md' ):
91+ file_path = os .path .join (root , file )
92+ relative_path = os .path .relpath (file_path , docs_path )
93+
94+ # Create structure based on path
95+ parts = relative_path .split (os .sep )
96+ current = structure
97+
98+ for part in parts [:- 1 ]: # All except the file
99+ if part not in current :
100+ current [part ] = {}
101+ current = current [part ]
102+
103+ # Add the file
104+ filename = parts [- 1 ]
105+ title = extract_title_from_file (file_path )
106+ is_complete = is_file_complete (file_path )
107+
108+ if '_files' not in current :
109+ current ['_files' ] = []
110+
111+ current ['_files' ].append ({
112+ 'filename' : filename ,
113+ 'title' : title ,
114+ 'path' : relative_path ,
115+ 'complete' : is_complete
116+ })
117+
118+ return structure
119+
120+ def generate_table_entries (files : List [Dict ]) -> List [str ]:
121+ """
122+ Generates table entries for a list of files with links to the documents
123+ """
124+ entries = []
125+ for file_info in sorted (files , key = lambda x : x ['filename' ]):
126+ state = "✅" if file_info ['complete' ] else "❌"
127+ title = file_info ['title' ]
128+ # Create relative link to the markdown file
129+ link = f"/docs/{ file_info ['path' ]} "
130+ linked_title = f"[{ title } ]({ link } )"
131+ entries .append (f"| { state } | { linked_title } |" )
132+
133+ return entries
134+
135+ def generate_table_of_contents (structure : Dict ) -> str :
136+ """
137+ Dynamically generates table of contents based on actual structure
138+ """
139+ toc = ["## Table of Contents" , "" ]
140+
141+ # Websites section
142+ if 'websites' in structure :
143+ toc .append ("- [Websites](#websites)" )
144+ websites = structure ['websites' ]
145+
146+ for section_key in sorted (websites .keys ()):
147+ if section_key != '_files' :
148+ section_name = section_key .title ()
149+ toc .append (f" - [{ section_name } ](#{ section_key .lower ()} )" )
150+
151+ # Add subsections if they exist
152+ section = websites [section_key ]
153+ if isinstance (section , dict ):
154+ for subsection_key in sorted (section .keys ()):
155+ if subsection_key != '_files' :
156+ subsection_name = subsection_key .title ()
157+ toc .append (f" - [{ subsection_name } ](#{ subsection_key .lower ()} )" )
158+
159+ # Root level sections
160+ for section_key in sorted (structure .keys ()):
161+ if section_key not in ['websites' ] and section_key != '_files' :
162+ section_name = section_key .title ()
163+ toc .append (f"- [{ section_name } ](#{ section_key .lower ()} )" )
164+
165+ # Add subsections if they exist
166+ section = structure [section_key ]
167+ if isinstance (section , dict ):
168+ for subsection_key in sorted (section .keys ()):
169+ if subsection_key != '_files' :
170+ subsection_name = subsection_key .title ()
171+ toc .append (f" - [{ subsection_name } ](#{ subsection_key .lower ()} )" )
172+
173+ return '\n ' .join (toc ) + '\n \n '
174+
175+ def generate_section_content (section_name : str , section_data : Dict , level : int = 3 ) -> str :
176+ """
177+ Recursively generates content for any section
178+ """
179+ content = ""
180+ heading = "#" * level
181+
182+ content += f"{ heading } { section_name .title ()} \n "
183+
184+ # Add files in this section
185+ if '_files' in section_data and section_data ['_files' ]:
186+ content += "| State | Name |\n "
187+ content += "| :---: | :--- |\n "
188+ entries = generate_table_entries (section_data ['_files' ])
189+ content += '\n ' .join (entries ) + '\n '
190+ else :
191+ # Check if there are subsections
192+ has_subsections = any (key != '_files' and isinstance (section_data [key ], dict )
193+ for key in section_data .keys ())
194+
195+ if not has_subsections :
196+ # No files and no subsections, add empty placeholder
197+ content += "| State | Name |\n "
198+ content += "| :---: | :--- |\n "
199+ content += "| | |\n "
200+
201+ content += "\n "
202+
203+ # Process subsections
204+ for subsection_key in sorted (section_data .keys ()):
205+ if subsection_key != '_files' and isinstance (section_data [subsection_key ], dict ):
206+ content += generate_section_content (subsection_key , section_data [subsection_key ], level + 1 )
207+
208+ return content
209+
210+ def generate_readme_content (structure : Dict ) -> str :
211+ """
212+ Generates README content based on the structure - now completely dynamic
213+ """
214+ content = "# umami API docs\n \n "
215+
216+ # Add status legend
217+ content += "### 📊 Documentation Status Legend\n "
218+ content += "| Symbol | Meaning |\n "
219+ content += "| :---: | :--- |\n "
220+ content += "| ✅ | Documentation complete – file contains full API specification |\n "
221+ content += "| ❌ | Documentation incomplete – file is template or missing content |\n \n "
222+
223+ # Generate dynamic table of contents
224+ content += generate_table_of_contents (structure )
225+
226+ content += "---\n ---\n \n "
227+ # Generate Websites section
228+ if 'websites' in structure :
229+ content += "## Websites\n \n "
230+ websites_content = generate_section_content ("websites" , structure ['websites' ], level = 2 )
231+ # Remove the first line (## Websites) since we already added it
232+ content += '\n ' .join (websites_content .split ('\n ' )[1 :])
233+
234+ content += "--- \n \n "
235+
236+ # Generate all other root-level sections dynamically
237+ for section_key in sorted (structure .keys ()):
238+ if section_key not in ['websites' ] and section_key != '_files' :
239+ content += generate_section_content (section_key , structure [section_key ], level = 2 )
240+ content += "---\n \n "
241+
242+ return content .rstrip () + "\n "
243+
244+ def main ():
245+ """
246+ Main function of the script
247+ """
248+ # Get the parent directory of .github (the repository root)
249+ base_path = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
250+
251+ print ("🔍 Scanning /docs directory..." )
252+ structure = scan_directory (base_path )
253+
254+ print ("📝 Generating README content..." )
255+ readme_content = generate_readme_content (structure )
256+
257+ # Write new README
258+ readme_path = os .path .join (base_path , 'README.md' )
259+
260+ print (f"💾 Writing README to { readme_path } ..." )
261+ with open (readme_path , 'w' , encoding = 'utf-8' ) as f :
262+ f .write (readme_content )
263+
264+ print ("✅ README successfully generated!" )
265+
266+ # Output statistics
267+ total_files = 0
268+ complete_files = 0
269+
270+ def count_files (struct ):
271+ nonlocal total_files , complete_files
272+ if isinstance (struct , dict ):
273+ if '_files' in struct :
274+ for file_info in struct ['_files' ]:
275+ total_files += 1
276+ if file_info ['complete' ]:
277+ complete_files += 1
278+ for key , value in struct .items ():
279+ if key != '_files' :
280+ count_files (value )
281+
282+ count_files (structure )
283+
284+ print (f"\n 📊 Statistics:" )
285+ print (f" Total files found: { total_files } " )
286+ print (f" Completely filled: { complete_files } " )
287+ print (f" Still to be processed: { total_files - complete_files } " )
288+ print (f" Progress: { (complete_files / total_files * 100 ):.1f} %" if total_files > 0 else " Progress: 0%" )
289+
290+ if __name__ == "__main__" :
291+ main ()
0 commit comments