Source code for in2lambda.main

"""The main input for in2lambda, defining both the CLT and main library function."""

# This commented block makes it run the local files rather than the pip library (I think, I don't understand it. Kevin wrote it.)
#
# import sys
# import os
# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import importlib
import pkgutil
import subprocess
from typing import Optional

import panflute as pf
import rich_click as click

import in2lambda.filters
from in2lambda.api.module import Module


[docs] def docx_to_md(docx_file: str) -> str: """Converts .docx files to markdown. Args: docx_file: A file path with the file extension included. Returns: the contents of the .docx file in markdown formatting """ md_output = subprocess.check_output(["pandoc", docx_file, "-t", "markdown"]) return md_output.decode("utf-8")
[docs] def file_type(file: str) -> str: """Determines which pandoc file format to use for a given file. See https://github.com/jgm/pandoc/blob/bad922a69236e22b20d51c4ec0b90c5a6c038433/src/Text/Pandoc/Format.hs#L171 (or any newer commit) for pandoc's supported file extensions. Args: file: A file path with the file extension included. Returns: An option in `pandoc --list-input-formats` that matches the given file type Examples: >>> from in2lambda.main import file_type >>> file_type("example.tex") 'latex' >>> file_type("/some/random/path/demo.md") 'markdown' >>> file_type("no_extension") Traceback (most recent call last): RuntimeError: Unsupported file extension: .no_extension >>> file_type("demo.unknown_extension") Traceback (most recent call last): RuntimeError: Unsupported file extension: .unknown_extension """ match (extension := file.split(".")[-1].lower()): case "tex" | "latex" | "ltx": return "latex" case ( "md" | "rmd" | "markdown" | "mdown" | "mdwn" | "mkd" | "mkdn" | "text" | "txt" ): return "markdown" case "docx": return "docx" # Pandoc doesn't seem to support .doc, and panflute doesn't like .docx. raise RuntimeError(f"Unsupported file extension: .{extension}")
[docs] def runner( question_file: str, chosen_filter: str, output_dir: Optional[str] = None, answer_file: Optional[str] = None, ) -> Module: r"""Takes in a TeX file for a given subject and outputs how it's broken down within Lambda Feedback. Args: question_file: The absolute path to a TeX question file. chosen_filter: The filter chosen to parse the TeX file. output_dir: An optional argument for where to output the Lambda Feedback compatible json/zip files. answer_file: The absolute path to a TeX answer file. Returns: A list of questions and how they would be broken down into different Lambda Feedback sections in a Python-readable format. If `output_dir` is specified, the corresponding json/zip files are produced. Examples: >>> import os >>> from in2lambda.main import runner >>> # Retrieve an example TeX file and run the given filter. >>> runner(f"{os.path.dirname(in2lambda.__file__)}/filters/PartsSepSol/example.tex", "PartsSepSol") # doctest: +ELLIPSIS Module(questions=[Question(title='', parts=[Part(text=..., worked_solution=''), ...], images=[], main_text='This is a sample question\n\n'), ...]) >>> runner(f"{os.path.dirname(in2lambda.__file__)}/filters/PartsOneSol/example.tex", "PartsOneSol") # doctest: +ELLIPSIS Module(questions=[Question(title='', parts=[Part(text='This is part (a)\n\n', worked_solution=''), ...], images=[], main_text='Here is some preliminary question information that might be useful.'), ...) """ # The list of questions for Lambda Feedback as a Python API. module = Module() # Dynamically import the correct pandoc filter depending on the subject. filter_module = importlib.import_module(f"in2lambda.filters.{chosen_filter}.filter") if file_type(question_file) == "docx": # Convert .docx to md using Pandoc and proceed text = docx_to_md(question_file) input_format = "markdown" else: with open(question_file, "r", encoding="utf-8") as file: text = file.read() input_format = file_type(question_file) # Parse the Pandoc AST using the relevant panflute filter. pf.run_filter( filter_module.pandoc_filter, doc=pf.convert_text(text, input_format=input_format, standalone=True), module=module, tex_file=question_file, parsing_answers=False, ) # If separate answer TeX file provided, parse that as well. if answer_file: if file_type(answer_file) == "docx": answer_text = docx_to_md(answer_file) answer_format = "markdown" else: with open(answer_file, "r", encoding="utf-8") as file: answer_text = file.read() answer_format = file_type(answer_file) pf.run_filter( filter_module.pandoc_filter, doc=pf.convert_text( answer_text, input_format=answer_format, standalone=True ), module=module, tex_file=answer_file, parsing_answers=True, ) # Read the Python API format and convert to JSON. if output_dir is not None: module.to_json(output_dir) return module
@click.command( no_args_is_help=True, epilog="See the docs at https://lambda-feedback.github.io/in2lambda/ for more details.", ) @click.argument( # Use resolve_path to get absolute path "question_file", type=click.Path(exists=True, readable=True, resolve_path=True) ) # Python files in the subjects directory @click.argument( "chosen_filter", type=click.Choice( [ i.name for i in pkgutil.iter_modules(in2lambda.filters.__path__) if i.name != "markdown" ], case_sensitive=False, ), ) @click.option( "--out", "-o", "output_dir", default="./out", show_default=True, help="Directory to output json/zip files to.", type=click.Path(resolve_path=True), ) @click.option( "--answers", "-a", "answer_file", default=None, help="File containing solutions for QUESTION_FILE.", type=click.Path(resolve_path=True, exists=True, dir_okay=False), ) def cli( question_file: str, chosen_filter: str, output_dir: str, answer_file: Optional[str] ) -> None: """Takes in a QUESTION_FILE for a given SUBJECT and produces Lambda Feedback compatible json/zip files.""" # main() is made separate from click() so that it can be easily imported as part of a library. runner(question_file, chosen_filter, output_dir, answer_file) if __name__ == "__main__": cli()