diff --git a/sbapp/md2bbcode/__init__.py b/sbapp/md2bbcode/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sbapp/md2bbcode/html2bbcode.py b/sbapp/md2bbcode/html2bbcode.py new file mode 100644 index 0000000..98fd830 --- /dev/null +++ b/sbapp/md2bbcode/html2bbcode.py @@ -0,0 +1,132 @@ +# converts some HTML tags to BBCode +# pass --debug to save the output to readme.finalpass +# may be better off replacing this with html to markdown (and then to bbcode). Lepture recommeds a JS html to markdown converter: sundown +from bs4 import BeautifulSoup, NavigableString +import argparse + +def handle_font_tag(tag, replacements): + """Handles the conversion of tag with attributes like color and size.""" + attributes = [] + if 'color' in tag.attrs: + attributes.append(f"COLOR={tag['color']}") + if 'size' in tag.attrs: + attributes.append(f"SIZE={tag['size']}") + if 'face' in tag.attrs: + attributes.append(f"FONT={tag['face']}") + + inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children) + if attributes: + # Nest all attributes. Example: [COLOR=red][SIZE=5]content[/SIZE][/COLOR] + for attr in reversed(attributes): + inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]" + return inner_content + +def handle_style_tag(tag, replacements): + """Handles the conversion of tags with style attributes like color, size, and font.""" + attributes = [] + style = tag.attrs.get('style', '') + + # Extracting CSS properties + css_properties = {item.split(':')[0].strip(): item.split(':')[1].strip() for item in style.split(';') if ':' in item} + + # Mapping CSS properties to BBCode + if 'color' in css_properties: + attributes.append(f"COLOR={css_properties['color']}") + if 'font-size' in css_properties: + attributes.append(f"SIZE={css_properties['font-size']}") + if 'font-family' in css_properties: + attributes.append(f"FONT={css_properties['font-family']}") + if 'text-decoration' in css_properties and 'line-through' in css_properties['text-decoration']: + attributes.append("S") # Assume strike-through + if 'text-decoration' in css_properties and 'underline' in css_properties['text-decoration']: + attributes.append("U") + if 'font-weight' in css_properties: + if css_properties['font-weight'].lower() == 'bold' or (css_properties['font-weight'].isdigit() and int(css_properties['font-weight']) >= 700): + attributes.append("B") # Assume bold + + inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children) + if attributes: + # Nest all attributes + for attr in reversed(attributes): + if '=' in attr: # For attributes with values + inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]" + else: # For simple BBCode tags like [B], [I], [U], [S] + inner_content = f"[{attr}]{inner_content}[/{attr}]" + return inner_content + +def recursive_html_to_bbcode(element): + """Recursively convert HTML elements to BBCode.""" + bbcode = '' + + if isinstance(element, NavigableString): + bbcode += str(element) + elif element.name == 'details': + # Handle
tag + summary = element.find('summary') + spoiler_title = '' + if summary: + # Get the summary content and remove the summary element + spoiler_title = '=' + ''.join([recursive_html_to_bbcode(child) for child in summary.contents]) + summary.decompose() + + # Process remaining content + content = ''.join([recursive_html_to_bbcode(child) for child in element.contents]) + bbcode += f'[SPOILER{spoiler_title}]{content}[/SPOILER]' + elif element.name == 'summary': + # Skip summary tag as it's handled in details + return '' + else: + # Handle other tags or pass through + content = ''.join([recursive_html_to_bbcode(child) for child in element.contents]) + bbcode += content + + return bbcode + +def html_to_bbcode(html): + replacements = { + 'b': 'B', + 'strong': 'B', + 'i': 'I', + 'em': 'I', + 'u': 'U', + 's': 'S', + 'sub': 'SUB', + 'sup': 'SUP', + 'p': '', # Handled by default + 'ul': 'LIST', + 'ol': 'LIST=1', + 'li': '*', # Special handling in recursive function + 'font': '', # To be handled for attributes + 'blockquote': 'QUOTE', + 'pre': 'CODE', + 'code': 'ICODE', + 'a': 'URL', # Special handling for attributes + 'img': 'IMG' # Special handling for attributes + } + + soup = BeautifulSoup(html, 'html.parser') + return recursive_html_to_bbcode(soup) + +def process_html(input_html, debug=False, output_file=None): + converted_bbcode = html_to_bbcode(input_html) + + if debug: + with open(output_file, 'w', encoding='utf-8') as file: + file.write(converted_bbcode) + else: + return converted_bbcode + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert HTML to BBCode with optional debugging output.") + parser.add_argument('input_file', type=str, help='Input HTML file path') + parser.add_argument('--debug', action='store_true', help='Save output to readme.finalpass for debugging') + + args = parser.parse_args() + input_file = args.input_file + output_file = 'readme.finalpass' if args.debug else None + + with open(input_file, 'r', encoding='utf-8') as file: + html_content = file.read() + + # Call the processing function + process_html(html_content, debug=args.debug, output_file=output_file) \ No newline at end of file diff --git a/sbapp/md2bbcode/main.py b/sbapp/md2bbcode/main.py new file mode 100644 index 0000000..4cb1d1c --- /dev/null +++ b/sbapp/md2bbcode/main.py @@ -0,0 +1,67 @@ +# uses a custom mistune renderer to convert Markdown to BBCode. The custom renderer is defined in the bbcode.py file. +# pass --debug to save the output to readme.1stpass (main.py) and readme.finalpass (html2bbcode) +# for further debugging, you can convert the markdown file to AST using md2ast.py. Remember to load the plugin(s) you want to test. + +#standard library +import argparse +import sys + +# mistune +import mistune +from mistune.plugins.formatting import strikethrough, mark, superscript, subscript, insert +from mistune.plugins.table import table, table_in_list +from mistune.plugins.footnotes import footnotes +from mistune.plugins.task_lists import task_lists +from mistune.plugins.def_list import def_list +from mistune.plugins.abbr import abbr +from mistune.plugins.spoiler import spoiler + +# local +from md2bbcode.plugins.merge_lists import merge_ordered_lists +from md2bbcode.renderers.bbcode import BBCodeRenderer +from md2bbcode.html2bbcode import process_html + +def convert_markdown_to_bbcode(markdown_text, domain): + # Create a Markdown parser instance using the custom BBCode renderer + markdown_parser = mistune.create_markdown(renderer=BBCodeRenderer(domain=domain), plugins=[strikethrough, mark, superscript, subscript, insert, table, footnotes, task_lists, def_list, abbr, spoiler, table_in_list, merge_ordered_lists]) + + # Convert Markdown text to BBCode + return markdown_parser(markdown_text) + +def process_readme(markdown_text, domain=None, debug=False): + # Convert Markdown to BBCode + bbcode_text = convert_markdown_to_bbcode(markdown_text, domain) + + # If debug mode, save intermediate BBCode + if debug: + with open('readme.1stpass', 'w', encoding='utf-8') as file: + file.write(bbcode_text) + + # Convert BBCode formatted as HTML to final BBCode + final_bbcode = process_html(bbcode_text, debug, 'readme.finalpass') + + return final_bbcode + +def main(): + parser = argparse.ArgumentParser(description='Convert Markdown file to BBCode with HTML processing.') + parser.add_argument('input', help='Input Markdown file path') + parser.add_argument('--domain', help='Domain to prepend to relative URLs') + parser.add_argument('--debug', action='store_true', help='Output intermediate results to files for debugging') + args = parser.parse_args() + + if args.input == '-': + # Read Markdown content from stdin + markdown_text = sys.stdin.read() + else: + with open(args.input, 'r', encoding='utf-8') as md_file: + markdown_text = md_file.read() + + # Process the readme and get the final BBCode + final_bbcode = process_readme(markdown_text, args.domain, args.debug) + + # Optionally, print final BBCode to console + if not args.debug: + print(final_bbcode) + +if __name__ == '__main__': + main() diff --git a/sbapp/md2bbcode/md2ast.py b/sbapp/md2bbcode/md2ast.py new file mode 100644 index 0000000..65b7c3d --- /dev/null +++ b/sbapp/md2bbcode/md2ast.py @@ -0,0 +1,47 @@ +# this is for debugging the custom mistune renderer bbcode.py +import argparse +import mistune +import json # Import the json module for serialization +from mistune.plugins.formatting import strikethrough, mark, superscript, subscript, insert +from mistune.plugins.table import table, table_in_list +from mistune.plugins.footnotes import footnotes +from mistune.plugins.task_lists import task_lists +from mistune.plugins.def_list import def_list +from mistune.plugins.abbr import abbr +from mistune.plugins.spoiler import spoiler + +#local +from md2bbcode.plugins.merge_lists import merge_ordered_lists + +def convert_markdown_to_ast(input_filepath, output_filepath): + # Initialize Markdown parser with no renderer to produce an AST + markdown_parser = mistune.create_markdown(renderer=None, plugins=[strikethrough, mark, superscript, subscript, insert, table, footnotes, task_lists, def_list, abbr, spoiler, table_in_list, merge_ordered_lists]) + + # Read the input Markdown file + with open(input_filepath, 'r', encoding='utf-8') as md_file: + markdown_text = md_file.read() + + # Convert Markdown text to AST + ast_text = markdown_parser(markdown_text) + + # Serialize the AST to a JSON string + ast_json = json.dumps(ast_text, indent=4) + + # Write the output AST to a new file in JSON format + with open(output_filepath, 'w', encoding='utf-8') as ast_file: + ast_file.write(ast_json) + +def main(): + # Create argument parser + parser = argparse.ArgumentParser(description='Convert Markdown file to AST file (JSON format).') + # Add arguments + parser.add_argument('input', help='Input Markdown file path') + parser.add_argument('output', help='Output AST file path (JSON format)') + # Parse arguments + args = parser.parse_args() + + # Convert the Markdown to AST using the provided paths + convert_markdown_to_ast(args.input, args.output) + +if __name__ == '__main__': + main() diff --git a/sbapp/md2bbcode/plugins/merge_lists.py b/sbapp/md2bbcode/plugins/merge_lists.py new file mode 100644 index 0000000..5f499e1 --- /dev/null +++ b/sbapp/md2bbcode/plugins/merge_lists.py @@ -0,0 +1,83 @@ +from typing import Dict, Any, List + +def merge_ordered_lists(md): + """ + A plugin to merge consecutive "top-level" ordered lists into one, + and also attach any intervening code blocks or blank lines to the + last list item so that the final BBCode appears as a single list + with multiple steps. + + This relies on a few assumptions: + 1) The only tokens between two ordered lists that should be merged + are code blocks or blank lines (not normal paragraphs). + 2) We want any code block(s) right after a list item to appear in + that same bullet item. + """ + + def rewrite_tokens(md, state): + tokens = state.tokens + merged = [] + i = 0 + + while i < len(tokens): + token = tokens[i] + + # Check if this token is a top-level ordered list + if ( + token["type"] == "list" + and token.get("attrs", {}).get("ordered", False) + and token.get("attrs", {}).get("depth", 0) == 0 + ): + # Start new merged list + current_depth = token["attrs"]["depth"] + list_items = list(token["children"]) # bullet items in the first list + i += 1 + + # Continue until we run into something that's not: + # another top-level ordered list, + # or code blocks / blank lines (which we'll attach to the last bullet). + while i < len(tokens): + nxt = tokens[i] + + # If there's another ordered list at the same depth, merge its bullet items + if ( + nxt["type"] == "list" + and nxt.get("attrs", {}).get("ordered", False) + and nxt.get("attrs", {}).get("depth", 0) == current_depth + ): + list_items.extend(nxt["children"]) + i += 1 + + # If there's a code block or blank line, attach it to the *last* bullet item. + elif nxt["type"] in ["block_code", "blank_line"]: + if list_items: # attach to last bullet item, if any + list_items[-1]["children"].append(nxt) + i += 1 + + else: + # Not a same-depth list or code block—stop merging + break + + # Create single merged list token + merged.append( + { + "type": "list", + "children": list_items, + "attrs": { + "ordered": True, + "depth": current_depth, + }, + } + ) + + else: + # If not a top-level ordered list, just keep it as-is + merged.append(token) + i += 1 + + # Replace the old tokens with the merged version + state.tokens = merged + + # Attach to before_render_hooks so we can manipulate tokens before rendering + md.before_render_hooks.append(rewrite_tokens) + return md \ No newline at end of file diff --git a/sbapp/md2bbcode/renderers/__init__.py b/sbapp/md2bbcode/renderers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sbapp/md2bbcode/renderers/bbcode.py b/sbapp/md2bbcode/renderers/bbcode.py new file mode 100644 index 0000000..32e8b49 --- /dev/null +++ b/sbapp/md2bbcode/renderers/bbcode.py @@ -0,0 +1,228 @@ +from mistune.core import BaseRenderer +from mistune.util import escape as escape_text, striptags, safe_entity +from urllib.parse import urljoin, urlparse + + +class BBCodeRenderer(BaseRenderer): + """A renderer for converting Markdown to BBCode.""" + _escape: bool + NAME = 'bbcode' + + def __init__(self, escape=False, domain=None): + super(BBCodeRenderer, self).__init__() + self._escape = escape + self.domain = domain + + def render_token(self, token, state): + func = self._get_method(token['type']) + attrs = token.get('attrs') + + if 'raw' in token: + text = token['raw'] + elif 'children' in token: + text = self.render_tokens(token['children'], state) + else: + if attrs: + return func(**attrs) + else: + return func() + if attrs: + return func(text, **attrs) + else: + return func(text) + + def safe_url(self, url: str) -> str: + # Simple URL sanitization + if url.startswith(('javascript:', 'vbscript:', 'data:')): + return '#harmful-link' + # Check if the URL is absolute by looking for a netloc part in the URL + if not urlparse(url).netloc: + url = urljoin(self.domain, url) + return url + + def text(self, text: str) -> str: + if self._escape: + return escape_text(text) + return text + + def emphasis(self, text: str) -> str: + return '[i]' + text + '[/i]' + + def strong(self, text: str) -> str: + return '[b]' + text + '[/b]' + + def link(self, text: str, url: str, title=None) -> str: + return '[url=' + self.safe_url(url) + ']' + text + '[/url]' + + def image(self, text: str, url: str, title=None) -> str: + alt_text = f' alt="{text}"' if text else '' + img_tag = f'[img{alt_text}]' + self.safe_url(url) + '[/img]' + # Check if alt text starts with 'pixel' and treat it as pixel art + if text and text.lower().startswith('pixel'): + return f'[pixelate]{img_tag}[/pixelate]' + return img_tag + + def codespan(self, text: str) -> str: + return '[icode]' + text + '[/icode]' + + def linebreak(self) -> str: + return '\n' + + def softbreak(self) -> str: + return '' + + def inline_html(self, html: str) -> str: + if self._escape: + return escape_text(html) + return html + + def paragraph(self, text: str) -> str: + return text + '\n\n' + + def heading(self, text: str, level: int, **attrs) -> str: + if 1 <= level <= 3: + return f"[HEADING={level}]{text}[/HEADING]\n" + else: + # Handle cases where level is outside 1-3 + return f"[HEADING=3]{text}[/HEADING]\n" + + def blank_line(self) -> str: + return '' + + def thematic_break(self) -> str: + return '[hr][/hr]\n' + + def block_text(self, text: str) -> str: + return text + + def block_code(self, code: str, **attrs) -> str: + # Renders blocks of code using the language specified in Markdown + special_cases = { + 'plaintext': None # Default [CODE] + } + + if 'info' in attrs: + lang_info = safe_entity(attrs['info'].strip()) + lang = lang_info.split(None, 1)[0].lower() + # Check if the language needs special handling + bbcode_lang = special_cases.get(lang, lang) # Use the special case if it exists, otherwise use lang as is + if bbcode_lang: + return f"[CODE={bbcode_lang}]{escape_text(code)}[/CODE]\n" + else: + return f"[CODE]{escape_text(code)}[/CODE]\n" + else: + # No language specified, render with a generic [CODE] tag + return f"[CODE]{escape_text(code)}[/CODE]\n" + + def block_quote(self, text: str) -> str: + return '[QUOTE]\n' + text + '[/QUOTE]\n' + + def block_html(self, html: str) -> str: + if self._escape: + return '

' + escape_text(html.strip()) + '

\n' + return html + '\n' + + def block_error(self, text: str) -> str: + return '[color=red][icode]' + text + '[/icode][/color]\n' + + def list(self, text: str, ordered: bool, **attrs) -> str: + # For ordered lists, always use [list=1] to get automatic sequential numbering + # For unordered lists, use [list] + tag = 'list=1' if ordered else 'list' + return '[{}]'.format(tag) + text + '[/list]\n' + + def list_item(self, text: str) -> str: + return '[*]' + text + '\n' + + def strikethrough(self, text: str) -> str: + return '[s]' + text + '[/s]' + + def mark(self, text: str) -> str: + # Simulate the mark effect with a background color in BBCode + return '[mark]' + text + '[/mark]' + + def insert(self, text: str) -> str: + # Use underline to represent insertion + return '[u]' + text + '[/u]' + + def superscript(self, text: str) -> str: + return '[sup]' + text + '[/sup]' + + def subscript(self, text: str) -> str: + return '[sub]' + text + '[/sub]' + + def inline_spoiler(self, text: str) -> str: + return '[ISPOILER]' + text + '[/ISPOILER]' + + def block_spoiler(self, text: str) -> str: + return '[SPOILER]\n' + text + '\n[/SPOILER]' + + def footnote_ref(self, key: str, index: int): + # Use superscript for the footnote reference + return f'[sup][u][JUMPTO=fn-{index}]{index}[/JUMPTO][/u][/sup]' + + def footnotes(self, text: str): + # Optionally wrap all footnotes in a specific section if needed + return '[b]Footnotes:[/b]\n' + text + + def footnote_item(self, text: str, key: str, index: int): + # Define the footnote with an anchor at the end of the document + return f'[ANAME=fn-{index}]{index}[/ANAME]. {text}' + + def table(self, children, **attrs): + # Starting with a full-width table by default if not specified + # width = attrs.get('width', '100%') # comment out until XF 2.3 + # return f'[TABLE width="{width}"]\n' + children + '[/TABLE]\n' # comment out until XF 2.3 + return '[TABLE]\n' + children + '[/TABLE]\n' + + def table_head(self, children, **attrs): + return '[TR]\n' + children + '[/TR]\n' + + def table_body(self, children, **attrs): + return children + + def table_row(self, children, **attrs): + return '[TR]\n' + children + '[/TR]\n' + + def table_cell(self, text, align=None, head=False, **attrs): + # BBCode does not support direct cell alignment, + # use [LEFT], [CENTER], or [RIGHT] tags + + # Use th for header cells and td for normal cells + tag = 'TH' if head else 'TD' + + # Initialize alignment tags + alignment_start = '' + alignment_end = '' + + if align == 'center': + alignment_start = '[CENTER]' + alignment_end = '[/CENTER]' + elif align == 'right': + alignment_start = '[RIGHT]' + alignment_end = '[/RIGHT]' + elif align == 'left': + alignment_start = '[LEFT]' + alignment_end = '[/LEFT]' + + return f'[{tag}]{alignment_start}{text}{alignment_end}[/{tag}]\n' + + def task_list_item(self, text: str, checked: bool = False) -> str: + # Using emojis to represent the checkbox + checkbox_emoji = '🗹' if checked else '☐' + return checkbox_emoji + ' ' + text + '\n' + + def def_list(self, text: str) -> str: + # No specific BBCode tag for
, so we just use the plain text grouping + return '\n' + text + '\n' + + def def_list_head(self, text: str) -> str: + return '[b]' + text + '[/b]' + ' ' + ':' + '\n' + + def def_list_item(self, text: str) -> str: + return '[INDENT]' + text + '[/INDENT]\n' + + def abbr(self, text: str, title: str) -> str: + if title: + return f'[abbr={title}]{text}[/abbr]' + return text \ No newline at end of file