metadata
			tags:
  - image-generation
  - generative-model
  - multimodal
  - SOTA
model_name: CustomImageGenerator
model_type: image-generation
description: >
  CustomImageGenerator is a state-of-the-art multimodal generative model based
  on the GPT-2 architecture, capable of generating high-quality images from
  textual prompts. The model combines advanced techniques from natural language
  processing (NLP) and computer vision to produce visually coherent and
  contextually relevant images.
architecture: GPT-2
tasks:
  - image-generation
references:
  - title: Generative Pre-trained Transformer 2.0
    url: >
      https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
  - title: Learning to Generate Images from Text
    url: https://arxiv.org/abs/1511.02793
  - title: Stable Diffusion Models for Image Generation
    url: https://arxiv.org/abs/2105.05233
related_models:
  - name: BigGAN
    description: >-
      State-of-the-art generative adversarial network (GAN) for image
      generation.
    url: https://github.com/ajbrock/BigGAN-PyTorch
  - name: CLIP
    description: >
      Contrastive Language-Image Pre-training model for understanding images and
      text.
    url: https://github.com/openai/CLIP
language:
  - en
license: apache-2.0