import gradio as gr import os import numpy as np from PIL import Image import requests import torch from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline from transformers import pipeline # Initialize models (load only when needed) def load_models(): models = {} # Image generation models if "image_pipe" not in models: models["image_pipe"] = StableDiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16 ).to("cuda") # Video generation model if "video_pipe" not in models: models["video_pipe"] = StableVideoDiffusionPipeline.from_pretrained( "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16" ).to("cuda") # Face swapping model if "faceswap_pipe" not in models: models["faceswap_pipe"] = pipeline("image-to-image", model="ocean-sunset/swapper") # Audio mixing model if "audio_pipe" not in models: models["audio_pipe"] = pipeline("audio-to-audio", model="facebook/musicgen-small") # Logo generation model if "logo_pipe" not in models: models["logo_pipe"] = pipeline("text-to-image", model="logoai-ai/LogoDiffusion") return models # ===== Core Functions ===== def generate_image(prompt, negative_prompt=""): models = load_models() image = models["image_pipe"]( prompt, negative_prompt=negative_prompt, num_inference_steps=25 ).images[0] return image def generate_logo(prompt, style="minimal"): models = load_models() logo = models["logo_pipe"]( f"{prompt}, {style} style logo, vector art, high quality", num_inference_steps=30 ).images[0] return logo def create_video(prompt, duration=2.0, fps=10): models = load_models() image = generate_image(prompt) frames = models["video_pipe"]( image, decode_chunk_size=5, motion_bucket_id=180, noise_aug_strength=0.1, num_frames=int(duration * fps) ).frames[0] return frames, image def face_swap(source_img, target_video): models = load_models() # Convert video to frames frames = [...] # (FFmpeg processing would go here) # Process each frame swapped_frames = [] for frame in frames: result = models["faceswap_pipe"](source_img, frame) swapped_frames.append(result) # Convert frames back to video output_video = [...] # (FFmpeg processing) return output_video def mix_audios(audio1, audio2, mix_ratio=0.5): models = load_models() # Process audio mixing mixed_audio = models["audio_pipe"]( [audio1, audio2], mix_weights=[mix_ratio, 1-mix_ratio] )["audio"] return mixed_audio # ===== Gradio Interface ===== with gr.Blocks(title="AI Media Studio", theme=gr.themes.Glass()) as demo: gr.Markdown("# 🎨 AI Media Creation Studio") gr.Markdown("Generate videos, images, logos, face swaps, and mixed audio") with gr.Tab("🎨 Image Generation"): img_prompt = gr.Textbox(label="Describe your image") img_neg_prompt = gr.Textbox(label="Exclude from image") img_btn = gr.Button("Generate") img_output = gr.Image(label="Generated Image") img_btn.click(generate_image, [img_prompt, img_neg_prompt], img_output) with gr.Tab("🖼️ Logo Design"): logo_prompt = gr.Textbox(label="Describe your logo") logo_style = gr.Radio(["minimal", "vintage", "modern", "handdrawn"], value="minimal") logo_btn = gr.Button("Design Logo") logo_output = gr.Image(label="Generated Logo") logo_btn.click(generate_logo, [logo_prompt, logo_style], logo_output) with gr.Tab("🎥 Video Creation"): vid_prompt = gr.Textbox(label="Describe your video") vid_duration = gr.Slider(1, 10, value=3, label="Duration (sec)") vid_fps = gr.Slider(5, 30, value=10, label="FPS") vid_btn = gr.Button("Generate Video") vid_output = gr.Video(label="Generated Video") vid_preview = gr.Image(label="Key Frame", visible=True) vid_btn.click(create_video, [vid_prompt, vid_duration, vid_fps], [vid_output, vid_preview]) with gr.Tab("👤 Face Swap"): with gr.Row(): face_source = gr.Image(label="Source Face", type="pil") face_target = gr.Video(label="Target Video") face_btn = gr.Button("Swap Faces") face_output = gr.Video(label="Result Video") face_btn.click(face_swap, [face_source, face_target], face_output) with gr.Tab("🔊 Audio Mixing"): audio1 = gr.Audio(label="Audio Track 1") audio2 = gr.Audio(label="Audio Track 2") mix_ratio = gr.Slider(0, 1, value=0.5, label="Track 1 Volume") audio_btn = gr.Button("Mix Audio") audio_output = gr.Audio(label="Mixed Audio") audio_btn.click(mix_audios, [audio1, audio2, mix_ratio], audio_output) # ===== Deployment Settings ===== if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), share=True )