{"name":"Loadbay","description":"Catalog of harnesses for AI agents, across domains.","repo":"https://github.com/alessandrorodi/loadbay","count":50,"domains":{"trading":"Trading","coding":"Coding & DevOps","browser":"Browser & computer","productivity":"Productivity","data":"Data & search","social":"Social & comms","health":"Health","science":"Research & science","gaming":"Gaming","media":"Voice & media","robotics":"Robotics"},"traits":{"mcp":"Exposes an MCP server","keys":"Needs API keys / auth","sandbox":"Runs sandboxed","guard":"Guardrails / limits"},"harnesses":[{"slug":"minimax-mcp","name":"MiniMax-MCP","author":"MiniMax-AI","domain":"media","integrations":["MiniMax"],"language":"Python","license":"MIT","stars":1500,"traits":["mcp","oss","keys"],"summary":"MiniMax's official MCP server — give an agent text-to-speech, voice cloning, and image and video generation.","repo":"https://github.com/MiniMax-AI/MiniMax-MCP","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/minimax-mcp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"bolna","name":"Bolna","author":"bolna-ai","domain":"media","integrations":["Voice"],"language":"Python","license":"MIT","stars":675,"traits":["oss","keys"],"summary":"An open framework for building conversational voice AI agents, including telephony.","repo":"https://github.com/bolna-ai/bolna","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/bolna","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"generative-media-skills","name":"Generative-Media-Skills","author":"SamurAIGPT","domain":"media","integrations":["Media"],"language":"Shell","license":"MIT","stars":3600,"traits":["oss","keys"],"summary":"Multimodal generative-media skills (image, video, audio) you mount into a coding or agent runtime.","repo":"https://github.com/SamurAIGPT/Generative-Media-Skills","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/generative-media-skills","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"pipecat","name":"pipecat","author":"pipecat-ai","domain":"media","integrations":["ElevenLabs","Deepgram"],"language":"Python","license":"Other","stars":12870,"traits":["oss","keys"],"summary":"Open-source framework for building realtime voice and multimodal conversational AI agents with pluggable STT, LLM, and TTS.","repo":"https://github.com/pipecat-ai/pipecat","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/pipecat","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"livekit-agents","name":"agents","author":"livekit","domain":"media","integrations":["LiveKit","ElevenLabs"],"language":"Python","license":"Apache-2.0","stars":11019,"traits":["oss","keys"],"summary":"Framework for building realtime voice and video AI agents that join LiveKit rooms with streaming STT, LLM, and TTS pipelines.","repo":"https://github.com/livekit/agents","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/livekit-agents","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"audiocraft","name":"audiocraft","author":"facebookresearch","domain":"media","integrations":["MusicGen"],"language":"Python","license":"MIT","stars":23380,"traits":["oss"],"summary":"Meta's library for audio generation featuring the MusicGen music model and the EnCodec audio tokenizer.","repo":"https://github.com/facebookresearch/audiocraft","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/audiocraft","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"diffusers","name":"diffusers","author":"huggingface","domain":"media","integrations":["Stable Diffusion","Flux"],"language":"Python","license":"Apache-2.0","stars":33879,"traits":["oss"],"summary":"Hugging Face library of state-of-the-art diffusion models and pipelines for image, video, and audio generation in PyTorch.","repo":"https://github.com/huggingface/diffusers","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/diffusers","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"elevenlabs-python","name":"elevenlabs-python","author":"elevenlabs","domain":"media","integrations":["ElevenLabs"],"language":"Python","license":"MIT","stars":3006,"traits":["oss","keys"],"summary":"Official Python SDK for the ElevenLabs API, giving agents text-to-speech, voice cloning, and audio generation.","repo":"https://github.com/elevenlabs/elevenlabs-python","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/elevenlabs-python","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"whisper-streaming","name":"whisper_streaming","author":"ufal","domain":"media","integrations":["Whisper"],"language":"Python","license":"MIT","stars":3640,"traits":["oss"],"summary":"Realtime streaming wrapper around Whisper for long-form, low-latency speech-to-text transcription and translation.","repo":"https://github.com/ufal/whisper_streaming","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/whisper-streaming","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"spotify-mcp","name":"spotify-mcp","author":"varunneal","domain":"media","integrations":["Spotify"],"language":"Python","license":"MIT","stars":605,"traits":["mcp","oss","keys"],"summary":"MCP server that connects an LLM to Spotify to control playback, search the catalog, and manage queues and playlists.","repo":"https://github.com/varunneal/spotify-mcp","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/spotify-mcp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"youtube-transcript-mcp","name":"mcp-server-youtube-transcript","author":"kimtaeyoon83","domain":"media","integrations":["YouTube"],"language":"TypeScript","license":"MIT","stars":563,"traits":["mcp","oss"],"summary":"MCP server that fetches YouTube video transcripts so an AI assistant can read and summarize video content.","repo":"https://github.com/kimtaeyoon83/mcp-server-youtube-transcript","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/youtube-transcript-mcp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"blender-mcp","name":"blender-mcp","author":"ahujasid","domain":"media","integrations":["Blender"],"language":"Python","license":"MIT","stars":22876,"traits":["mcp","oss"],"summary":"MCP server that connects Claude to Blender for prompt-driven 3D modeling, scene creation, and rendering.","repo":"https://github.com/ahujasid/blender-mcp","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/blender-mcp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"comfyui","name":"ComfyUI","author":"comfyanonymous","domain":"media","integrations":["Stable Diffusion","Flux"],"language":"Python","license":"GPL-3.0","stars":117384,"traits":["oss"],"summary":"Modular node-graph diffusion GUI, API, and backend for image and video generation that agents can drive via workflows.","repo":"https://github.com/comfyanonymous/ComfyUI","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/comfyui","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"fooocus","name":"Fooocus","author":"lllyasviel","domain":"media","integrations":["Stable Diffusion"],"language":"Python","license":"GPL-3.0","stars":50314,"traits":["oss"],"summary":"Streamlined Stable Diffusion image generator focused on prompting with minimal configuration.","repo":"https://github.com/lllyasviel/Fooocus","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/fooocus","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"invokeai","name":"InvokeAI","author":"invoke-ai","domain":"media","integrations":["Stable Diffusion"],"language":"TypeScript","license":"Apache-2.0","stars":27456,"traits":["oss"],"summary":"Creative engine and WebUI for Stable Diffusion with a node workflow system and REST API for generating visual media.","repo":"https://github.com/invoke-ai/InvokeAI","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/invokeai","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"stable-diffusion-webui","name":"stable-diffusion-webui","author":"AUTOMATIC1111","domain":"media","integrations":["Stable Diffusion"],"language":"Python","license":"AGPL-3.0","stars":163770,"traits":["oss"],"summary":"The most widely used Stable Diffusion web UI with extensions and an API endpoint agents can call for text-to-image.","repo":"https://github.com/AUTOMATIC1111/stable-diffusion-webui","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/stable-diffusion-webui","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"animatediff","name":"AnimateDiff","author":"guoyww","domain":"media","integrations":["Stable Diffusion"],"language":"Python","license":"Apache-2.0","stars":12144,"traits":["oss"],"summary":"Official implementation that animates personalized text-to-image diffusion models into short videos.","repo":"https://github.com/guoyww/AnimateDiff","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/animatediff","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"hunyuanvideo","name":"HunyuanVideo","author":"Tencent-Hunyuan","domain":"media","integrations":["HunyuanVideo"],"language":"Python","license":"Other","stars":12217,"traits":["oss"],"summary":"Tencent framework and open weights for large-scale text-to-video generation.","repo":"https://github.com/Tencent-Hunyuan/HunyuanVideo","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/hunyuanvideo","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"wan2-1","name":"Wan2.1","author":"Wan-Video","domain":"media","integrations":["Wan"],"language":"Python","license":"Apache-2.0","stars":16272,"traits":["oss"],"summary":"Open large-scale video generative models from Alibaba supporting text-to-video and image-to-video synthesis.","repo":"https://github.com/Wan-Video/Wan2.1","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/wan2-1","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"cogvideo","name":"CogVideo","author":"zai-org","domain":"media","integrations":["CogVideoX"],"language":"Python","license":"Apache-2.0","stars":12794,"traits":["oss"],"summary":"Text-to-video and image-to-video generation models including CogVideoX with open weights and inference code.","repo":"https://github.com/zai-org/CogVideo","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/cogvideo","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"mochi","name":"mochi","author":"genmoai","domain":"media","integrations":["Mochi"],"language":"Python","license":"Apache-2.0","stars":3670,"traits":["oss"],"summary":"Open text-to-video generation model from Genmo with code and weights for high-fidelity motion synthesis.","repo":"https://github.com/genmoai/mochi","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/mochi","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"riffusion-hobby","name":"riffusion-hobby","author":"riffusion","domain":"media","integrations":["Riffusion"],"language":"Python","license":"MIT","stars":3898,"traits":["oss"],"summary":"Real-time music generation using Stable Diffusion applied to audio spectrogram images.","repo":"https://github.com/riffusion/riffusion-hobby","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/riffusion-hobby","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"coqui-tts","name":"TTS","author":"coqui-ai","domain":"media","integrations":["XTTS"],"language":"Python","license":"Other","stars":45573,"traits":["oss"],"summary":"Deep-learning text-to-speech and voice-cloning toolkit with many pretrained multilingual models.","repo":"https://github.com/coqui-ai/TTS","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/coqui-tts","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"f5-tts","name":"F5-TTS","author":"SWivid","domain":"media","integrations":["F5-TTS"],"language":"Python","license":"MIT","stars":14771,"traits":["oss"],"summary":"Flow-matching text-to-speech model for fast, fluent zero-shot voice cloning from a short reference clip.","repo":"https://github.com/SWivid/F5-TTS","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/f5-tts","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"openvoice","name":"OpenVoice","author":"myshell-ai","domain":"media","integrations":["OpenVoice"],"language":"Python","license":"MIT","stars":36726,"traits":["oss"],"summary":"Instant voice-cloning audio model that copies tone color and controls style across languages from one reference clip.","repo":"https://github.com/myshell-ai/OpenVoice","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/openvoice","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"bark","name":"bark","author":"suno-ai","domain":"media","integrations":["Bark"],"language":"Python","license":"MIT","stars":39159,"traits":["oss"],"summary":"Text-prompted generative audio model that produces speech, music, sound effects, and nonverbal sounds from text.","repo":"https://github.com/suno-ai/bark","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/bark","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"chattts","name":"ChatTTS","author":"2noise","domain":"media","integrations":["ChatTTS"],"language":"Python","license":"AGPL-3.0","stars":39469,"traits":["oss"],"summary":"Generative speech model optimized for natural conversational dialogue in English and Chinese.","repo":"https://github.com/2noise/ChatTTS","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/chattts","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"kokoro","name":"kokoro","author":"hexgrad","domain":"media","integrations":["Kokoro"],"language":"Python","license":"Apache-2.0","stars":7520,"traits":["oss"],"summary":"Lightweight 82M-parameter text-to-speech model delivering high-quality multilingual voices with low compute.","repo":"https://github.com/hexgrad/kokoro","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/kokoro","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"sadtalker","name":"SadTalker","author":"OpenTalker","domain":"media","integrations":["SadTalker"],"language":"Python","license":"Other","stars":13902,"traits":["oss"],"summary":"Generates stylized talking-head videos from a single portrait image and an audio clip.","repo":"https://github.com/OpenTalker/SadTalker","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/sadtalker","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"wav2lip","name":"Wav2Lip","author":"Rudrabha","domain":"media","integrations":["Wav2Lip"],"language":"Python","license":"Unlicensed","stars":13046,"traits":["oss"],"summary":"Lip-sync model that accurately matches a face video to any target speech audio in the wild.","repo":"https://github.com/Rudrabha/Wav2Lip","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/wav2lip","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"screenshot-to-code","name":"screenshot-to-code","author":"abi","domain":"media","integrations":["Claude","GPT"],"language":"Python","license":"MIT","stars":72941,"traits":["oss","keys"],"summary":"Drops in a screenshot and converts it to clean HTML, Tailwind, React, or Vue code using vision models.","repo":"https://github.com/abi/screenshot-to-code","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/screenshot-to-code","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"draw-a-ui","name":"draw-a-ui","author":"SawyerHood","domain":"media","integrations":["GPT"],"language":"TypeScript","license":"MIT","stars":13601,"traits":["oss","keys"],"summary":"Draw a low-fidelity mockup on a canvas and generate working HTML from it with a vision model.","repo":"https://github.com/SawyerHood/draw-a-ui","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/draw-a-ui","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"hunyuan3d-2","name":"Hunyuan3D-2","author":"Tencent-Hunyuan","domain":"media","integrations":["Hunyuan3D"],"language":"Python","license":"Other","stars":13988,"traits":["oss"],"summary":"High-resolution image-to-3D and text-to-3D asset generation using large-scale Hunyuan3D diffusion models.","repo":"https://github.com/Tencent-Hunyuan/Hunyuan3D-2","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/hunyuan3d-2","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"whisper","name":"Whisper","author":"openai","domain":"media","integrations":["PyTorch","ffmpeg","HuggingFace"],"language":"Python","license":"MIT","stars":102900,"traits":["oss"],"summary":"OpenAI robust multilingual speech-to-text model and the de-facto open standard for transcription and translation.","repo":"https://github.com/openai/whisper","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/whisper","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"whisper-cpp","name":"whisper.cpp","author":"ggml-org","domain":"media","integrations":["ggml","CUDA","Core ML"],"language":"C++","license":"MIT","stars":50800,"traits":["oss","sandbox"],"summary":"High-performance C/C++ port of Whisper for fast local and on-device speech-to-text with no Python runtime.","repo":"https://github.com/ggml-org/whisper.cpp","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/whisper-cpp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"whisperx","name":"WhisperX","author":"m-bain","domain":"media","integrations":["faster-whisper","pyannote","PyTorch"],"language":"Python","license":"BSD-2-Clause","stars":22500,"traits":["oss"],"summary":"Whisper with word-level timestamps, voice activity detection, and speaker diarization for aligned, speaker-attributed transcripts.","repo":"https://github.com/m-bain/whisperX","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/whisperx","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"flux","name":"FLUX.1","author":"black-forest-labs","domain":"media","integrations":["PyTorch","ComfyUI","diffusers"],"language":"Python","license":"Apache-2.0","stars":25600,"traits":["oss","keys"],"summary":"Official inference for the FLUX.1 family, the leading open-weight text-to-image diffusion models.","repo":"https://github.com/black-forest-labs/flux","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/flux","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"stability-generative-models","name":"Stability generative-models","author":"Stability-AI","domain":"media","integrations":["PyTorch","diffusers","ComfyUI"],"language":"Python","license":"MIT","stars":27200,"traits":["oss"],"summary":"Stability AI official repo for SDXL, SD 2.x, and Stable Video Diffusion, the canonical Stable Diffusion model family.","repo":"https://github.com/Stability-AI/generative-models","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/stability-generative-models","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"fish-speech","name":"Fish Speech","author":"fishaudio","domain":"media","integrations":["PyTorch","HuggingFace","Gradio"],"language":"Python","license":"Apache-2.0","stars":30800,"traits":["oss"],"summary":"State-of-the-art open multilingual text-to-speech with voice cloning and low-latency synthesis.","repo":"https://github.com/fishaudio/fish-speech","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/fish-speech","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"chatterbox","name":"Chatterbox","author":"resemble-ai","domain":"media","integrations":["PyTorch","HuggingFace"],"language":"Python","license":"MIT","stars":25100,"traits":["oss"],"summary":"Resemble AI open SOTA text-to-speech with expressive zero-shot voice cloning and emotion control.","repo":"https://github.com/resemble-ai/chatterbox","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/chatterbox","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"csm","name":"CSM","author":"SesameAILabs","domain":"media","integrations":["PyTorch","HuggingFace"],"language":"Python","license":"Apache-2.0","stars":14600,"traits":["oss"],"summary":"Sesame open conversational speech generation model for natural, context-aware spoken dialogue and voice agents.","repo":"https://github.com/SesameAILabs/csm","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/csm","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"liveportrait","name":"LivePortrait","author":"KlingAIResearch","domain":"media","integrations":["PyTorch","ComfyUI","ONNX"],"language":"Python","license":"MIT","stars":18600,"traits":["oss"],"summary":"Efficient portrait animation that drives a single source image with video, audio, or image-derived motion.","repo":"https://github.com/KlingAIResearch/LivePortrait","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/liveportrait","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"open-sora","name":"Open-Sora","author":"hpcaitech","domain":"media","integrations":["PyTorch","HuggingFace","Colossal-AI"],"language":"Python","license":"Apache-2.0","stars":29100,"traits":["oss"],"summary":"Open-source text-to-video generation framework aiming to democratize Sora-style video with full training and inference code.","repo":"https://github.com/hpcaitech/Open-Sora","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/open-sora","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"real-esrgan","name":"Real-ESRGAN","author":"xinntao","domain":"media","integrations":["PyTorch","ComfyUI","NCNN"],"language":"Python","license":"BSD-3-Clause","stars":35800,"traits":["oss"],"summary":"Practical algorithms for general image and video super-resolution and restoration; the standard open upscaler.","repo":"https://github.com/xinntao/Real-ESRGAN","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/real-esrgan","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"facefusion","name":"FaceFusion","author":"facefusion","domain":"media","integrations":["ONNX Runtime","ffmpeg","Gradio"],"language":"Python","license":"Other","stars":28800,"traits":["oss","sandbox"],"summary":"Open face-swapping and face-manipulation platform with CLI and headless modes for automated media pipelines.","repo":"https://github.com/facefusion/facefusion","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/facefusion","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"elevenlabs-mcp","name":"ElevenLabs MCP","author":"elevenlabs","domain":"media","integrations":["MCP","Claude Desktop","Cursor"],"language":"Python","license":"MIT","stars":1100,"traits":["oss","mcp","keys"],"summary":"Official ElevenLabs MCP server exposing text-to-speech, voice cloning, speech-to-text, and audio tools to MCP clients.","repo":"https://github.com/elevenlabs/elevenlabs-mcp","community":false,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/elevenlabs-mcp","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"openmontage","name":"OpenMontage","author":"calesthio","domain":"media","integrations":["Claude Code","ElevenLabs","Stable Diffusion","FFmpeg","Remotion"],"language":null,"license":"AGPL-3.0","stars":23115,"traits":["oss","keys"],"summary":"An agentic video production system with 12 pipelines, 52 tools, and 500+ agent skills — an agent handles research, scripting, asset generation, clip retrieval, timeline editing, and final rendering from a plain-language prompt.","repo":"https://github.com/calesthio/OpenMontage","community":true,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/openmontage","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"video-use","name":"video-use","author":"browser-use","domain":"media","integrations":["Claude Code","Codex","Hermes","ElevenLabs","ffmpeg","HyperFrames","Remotion","Manim"],"language":null,"license":"MIT","stars":10438,"traits":["oss","keys"],"summary":"An open-source video editing harness for coding agents: drop raw footage in a folder, chat with Claude Code or any shell-capable agent, and get a finished video back. Agents drive ffmpeg pipelines to cut filler words, auto color grade, burn subtitles, and spawn parallel sub-agents for animation overlays.","repo":"https://github.com/browser-use/video-use","community":true,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/video-use","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"claude-video","name":"claude-video","author":"bradautomates","domain":"media","integrations":["Claude Code","YouTube","ffmpeg","Whisper"],"language":null,"license":"MIT","stars":8000,"traits":["oss","keys"],"summary":"Claude Code slash command (/watch) that downloads any video, extracts frames, transcribes audio, and feeds the full context to Claude so the agent can reason about video content.","repo":"https://github.com/bradautomates/claude-video","community":true,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/claude-video","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}},{"slug":"pollinations","name":"Pollinations","author":"pollinations","domain":"media","integrations":["Claude","MCP clients","Stable Diffusion","Flux"],"language":null,"license":"MIT","stars":4855,"traits":["oss","mcp"],"summary":"An open-source generative AI platform with a built-in MCP server that unifies text, image, video, audio, 3D, and embedding generation under a single API endpoint, powering 500+ community projects without requiring API keys.","repo":"https://github.com/pollinations/pollinations","community":true,"tip":{"method":"x402","network":"eip155:8453","endpoint":"/api/tip/pollinations","note":"GET with an x402 client to tip the author; returns 402 with payment requirements until paid."}}]}