Files
the_information_nexus/docs/tech_docs/llm/llm_visuals.md

1.4 KiB

Claude

graph TD
    A[Input Text] --> B[Tokenization]
    B --> C[Embedding]
    C --> D[Transformer Encoder]
    D --> E[Transformer Decoder]
    E --> F[Output Text]

    subgraph Transformer
        D[Transformer Encoder]
        E[Transformer Decoder]
    end

    subgraph AttentionMechanism
        G[Multi-Head Attention]
        H[Feed Forward]
        I[Add & Norm]
    end

    D --> G
    G --> H
    H --> I
    I --> D
    I --> E

    classDef input fill:#f9f,stroke:#333,stroke-width:4px
    classDef output fill:#9ff,stroke:#333,stroke-width:4px
    classDef transformer fill:#ff9,stroke:#333,stroke-width:2px
    classDef attention fill:#f90,stroke:#333,stroke-width:2px

    class A input
    class F output
    class Transformer transformer
    class AttentionMechanism attention

zeroshot


ChatGPT

Graph 1

graph TD;
    A[Input Layer: Text Tokens] --> B[Embedding Layer];
    B --> C[Transformer Block 1];
    C --> D[Transformer Block 2];
    D --> E[...];
    E --> F[Transformer Block N];
    F --> G[Output Layer: Predicted Next Token];

Graph 2

graph TD;
    A[Input from Previous Layer] --> B[Self-Attention Mechanism];
    B --> C[Add & Normalize];
    C --> D[Feed-Forward Neural Network];
    D --> E[Add & Normalize];
    E --> F[Output to Next Layer];
    A -->|Skip Connection| C;
    C -->|Skip Connection| E;

zeroshot