Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:11:34

0001 #!/usr/bin/env python3
0002 from datetime import datetime
0003 from pathlib import Path
0004 import shutil
0005 import base64
0006 import subprocess
0007 from subprocess import run
0008 from typing import IO, List
0009 import re
0010 import textwrap
0011 import tempfile
0012 import functools
0013 import asyncio
0014 
0015 import typer
0016 import toml
0017 import pydantic
0018 import rich.console
0019 import rich.panel
0020 import rich.pretty
0021 from jinja2 import Template
0022 import aiohttp
0023 from gidgethub.aiohttp import GitHubAPI
0024 from fsspec.implementations.zip import ZipFileSystem
0025 
0026 
0027 app = typer.Typer()
0028 
0029 
0030 class MetaData(pydantic.BaseModel):
0031     authors: List[str]
0032     title: str
0033     description: str = ""
0034 
0035 
0036 class WhitePaper(pydantic.BaseModel):
0037     repository: str
0038     slug: str
0039     pdf_url: str | None = None
0040     metadata: MetaData | None = None
0041 
0042     @property
0043     def repo(self) -> str:
0044         m = re.match(r"^https://github.com/(.*/.*)$", self.repository)
0045         assert m is not None
0046         return m.group(1)
0047 
0048 
0049 class Config(pydantic.BaseModel):
0050     white_papers: List[WhitePaper]
0051 
0052 
0053 def parse_metadata(content: str) -> MetaData:
0054     m = re.search(r"\\author{(.*)}", content)
0055     assert m is not None
0056     author_text = m.group(1)
0057     authors = [a.strip() for a in author_text.split(r"\and")]
0058 
0059     m = re.search(r"\\title{(.*)}", content)
0060     assert m is not None
0061     title = m.group(1).strip()
0062 
0063     return MetaData(authors=authors, title=title)
0064 
0065 
0066 def which(cmd: str) -> Path | None:
0067     try:
0068         exe = (
0069             run(["which", cmd], check=True, capture_output=True).stdout.decode().strip()
0070         )
0071         return Path(exe)
0072     except subprocess.CalledProcessError:
0073         return None
0074 
0075 
0076 def coro(fn):
0077     @functools.wraps(fn)
0078     def wrapper(*args, **kwargs):
0079         return asyncio.run(fn(*args, **kwargs))
0080 
0081     return wrapper
0082 
0083 
0084 def make_titlepage_image(convert: Path, pdf: Path, output: Path):
0085     run(
0086         [
0087             convert,
0088             "-density",
0089             "300",
0090             f"{pdf}[0]",
0091             "-background",
0092             "white",
0093             "-alpha",
0094             "remove",
0095             "-alpha",
0096             "off",
0097             "-depth",
0098             "2",
0099             str(output),
0100         ],
0101         check=True,
0102         stdout=subprocess.PIPE,
0103         stderr=subprocess.STDOUT,
0104     )
0105 
0106 
0107 async def get_file(gh: GitHubAPI, repo: str, file: str, ref: str | None = None) -> str:
0108     url = f"/repos/{repo}/contents/{file}"
0109     if ref is not None:
0110         url += f"?ref={ref}"
0111     r = await gh.getitem(url)
0112     return base64.b64decode(r["content"]).decode()
0113 
0114 
0115 class Release(pydantic.BaseModel):
0116     class Asset(pydantic.BaseModel):
0117         name: str
0118         browser_download_url: str
0119 
0120     id: int
0121     created_at: datetime
0122     published_at: datetime | None
0123     tag_name: str | None
0124     assets: List[Asset]
0125 
0126 
0127 class Artifact(pydantic.BaseModel):
0128     class WorkflowRun(pydantic.BaseModel):
0129         head_branch: str
0130         head_sha: str
0131 
0132     updated_at: datetime
0133     workflow_run: WorkflowRun
0134     archive_download_url: str
0135 
0136 
0137 async def get_latest_release(gh: GitHubAPI, repo: str) -> Release | None:
0138     latest_release = None
0139 
0140     async for release in gh.getiter(f"/repos/{repo}/releases"):
0141         release = Release(**release)
0142         if release.published_at is None:
0143             continue
0144         if latest_release is None:
0145             latest_release = release
0146             continue
0147 
0148         assert latest_release.published_at is not None
0149         if release.published_at > latest_release.published_at:
0150             latest_release = release
0151 
0152     return latest_release
0153 
0154 
0155 async def get_latest_artifact(
0156     gh: GitHubAPI, repo: str, branch: str = "main"
0157 ) -> Artifact | None:
0158     latest_artifact = None
0159     async for artifact in gh.getiter(
0160         f"/repos/{repo}/actions/artifacts", iterable_key="artifacts"
0161     ):
0162         artifact = Artifact(**artifact)
0163         if artifact.workflow_run.head_branch != branch:
0164             continue
0165         if latest_artifact is None:
0166             latest_artifact = artifact
0167             continue
0168 
0169         if artifact.updated_at > latest_artifact.updated_at:
0170             latest_artifact = artifact
0171 
0172     return latest_artifact
0173 
0174 
0175 async def download_file(
0176     session: aiohttp.ClientSession, url: str, target: IO[bytes], *args, **kwargs
0177 ):
0178     r = await session.get(url, *args, **kwargs)
0179 
0180     async for data, _ in r.content.iter_chunks():
0181         target.write(data)
0182     target.seek(0)
0183 
0184 
0185 def extract_pdf_from_artifact(fh: IO[bytes], target: IO[bytes]):
0186     zipfs = ZipFileSystem(fh)
0187     files = zipfs.ls("/", detail=False)
0188     if len(files) != 1:
0189         print("Unexpected number of files in artifact", files)
0190         raise typer.Exit(1)
0191     if not files[0].endswith(".pdf"):
0192         print("Unexpected file in artifact", files)
0193         raise typer.Exit(1)
0194 
0195     shutil.copyfileobj(zipfs.open(files[0]), target)
0196 
0197 
0198 @app.command()
0199 @coro
0200 async def pull(
0201     config_file: Path = Path(__file__).parent / "white_papers.toml",
0202     github_token: str = typer.Option(..., envvar="GITHUB_TOKEN"),
0203 ):
0204     console = rich.console.Console()
0205     convert = which("convert")
0206     if convert is None:
0207         print("convert (imagemagick) not found, please install")
0208         raise typer.Exit(1)
0209 
0210     async with aiohttp.ClientSession() as session:
0211         gh = GitHubAPI(session, "requester", oauth_token=github_token)
0212 
0213         with console.status("[bold green]Loading config...") as status:
0214             config = Config(**toml.load(config_file))
0215 
0216             for whp in config.white_papers:
0217                 status.update(f"Loading {whp.repository}...")
0218 
0219                 repo = whp.repo
0220                 slug = whp.slug
0221 
0222                 image_path = Path(__file__).parent / "white_papers" / "figures"
0223                 assert image_path.is_dir(), image_path
0224 
0225                 title_page_file = image_path / f"{slug}.png"
0226 
0227                 latest_release = await get_latest_release(gh, repo)
0228 
0229                 metadata_ref = None
0230 
0231                 if latest_release is not None:
0232                     for asset in latest_release.assets:
0233                         if asset.name.endswith(".pdf"):
0234                             whp.pdf_url = asset.browser_download_url
0235                             metadata_ref = latest_release.tag_name
0236                             break
0237 
0238                 if whp.pdf_url is not None:
0239                     with tempfile.NamedTemporaryFile(suffix=".pdf") as pdf_fh:
0240                         status.update(f"\[{repo}] Downloading PDF...")
0241                         await download_file(
0242                             session,
0243                             whp.pdf_url,
0244                             pdf_fh,
0245                             headers={"Authorization": f"Token {github_token}"},
0246                         )
0247                         status.update(f"\[{repo}] Converting PDF to PNG...")
0248                         make_titlepage_image(
0249                             convert, Path(pdf_fh.name), title_page_file
0250                         )
0251 
0252                 else:
0253                     latest_artifact = await get_latest_artifact(gh, repo)
0254 
0255                     if latest_artifact is None:
0256                         print("No artifacts found for", whp.repository)
0257                         raise typer.Exit(1)
0258 
0259                     metadata_ref = latest_artifact.workflow_run.head_sha
0260 
0261                     status.update(f"\[{repo}] Downloading artifact...")
0262 
0263                     with tempfile.TemporaryFile() as fh, tempfile.NamedTemporaryFile(
0264                         suffix=".pdf"
0265                     ) as pdf_fh:
0266                         await download_file(
0267                             session,
0268                             latest_artifact.archive_download_url,
0269                             fh,
0270                             headers={"Authorization": f"Token {github_token}"},
0271                         )
0272 
0273                         extract_pdf_from_artifact(fh, pdf_fh)
0274 
0275                         status.update(f"\[{repo}] Converting PDF to PNG...")
0276                         make_titlepage_image(
0277                             convert, Path(pdf_fh.name), title_page_file
0278                         )
0279 
0280                 status.update(f"\[{repo}] Getting metadata for ref {metadata_ref}...")
0281 
0282                 metadata = await get_file(gh, repo, "metadata.tex", ref=metadata_ref)
0283                 abstract = await get_file(gh, repo, "abstract.tex", ref=metadata_ref)
0284 
0285                 abstract = textwrap.dedent(abstract).strip()
0286                 abstract = "\n".join(
0287                     [
0288                         line
0289                         for line in abstract.split("\n")
0290                         if not line.strip().startswith("%")
0291                     ]
0292                 ).strip()
0293                 metadata = parse_metadata(metadata)
0294                 metadata.description = abstract
0295 
0296                 console.print(
0297                     rich.panel.Panel(rich.pretty.Pretty(metadata), title=whp.repository)
0298                 )
0299 
0300                 whp.metadata = metadata
0301 
0302             status.update("Updating config...")
0303 
0304             with open(config_file, "w") as f:
0305                 toml.dump(config.model_dump(), f)
0306 
0307 
0308 @app.command()
0309 def render(config_file: Path = Path(__file__).parent / "white_papers.toml"):
0310     config = Config(**toml.load(config_file))
0311     docs_path = Path(__file__).parent / "white_papers"
0312     assert docs_path.is_dir(), docs_path
0313     index_target_file = docs_path / "index.md"
0314 
0315     template_file = Path(__file__).parent / "white_paper_template.md.j2"
0316     index_template_file = Path(__file__).parent / "white_paper_index_template.md.j2"
0317 
0318     tpl = Template(template_file.read_text())
0319     index_tpl = Template(index_template_file.read_text())
0320 
0321     index_target_file.write_text(index_tpl.render(config=config))
0322     print("Index written to", index_target_file)
0323 
0324     for whp in config.white_papers:
0325         assert whp.metadata is not None, "White paper meta data is missing"
0326         target_file = docs_path / f"{whp.slug}.md"
0327         target_file.write_text(tpl.render(whp=whp, image_path="figures"))
0328         print("-", whp.metadata.title, "->", target_file)
0329 
0330 
0331 if "__main__" == __name__:
0332     app()