Update tech_docs/its_the_new_style_design.md
This commit is contained in:
@@ -1,3 +1,358 @@
|
|||||||
|
──────────────────────────────
|
||||||
|
MyCorp “One-Box Wonder” – Complete & Idempotent deploy.sh
|
||||||
|
──────────────────────────────
|
||||||
|
Purpose
|
||||||
|
- Fresh Debian 12 → fully working DNS + DHCP + CA + Observability
|
||||||
|
- Re-run any time; only missing packages & changed configs are touched
|
||||||
|
- All paths / variables live at the top of the script—edit once, never chase.
|
||||||
|
|
||||||
|
Save as `deploy.sh`, `chmod +x`, then `./deploy.sh`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# MyCorp One-Box Wonder – Universal Deployer
|
||||||
|
# Debian 12 minimal, SSH only
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
#############################
|
||||||
|
# 0. Tunables – CHANGE HERE #
|
||||||
|
#############################
|
||||||
|
HOST_IP="10.0.255.1"
|
||||||
|
DOMAIN_ROOT="mycorp.net"
|
||||||
|
REPO_URL="https://git.mycorp.net/infra/onebox-wonder.git"
|
||||||
|
LOKI_RETENTION="7d"
|
||||||
|
GRAFANA_ADMIN_PASS="changeme123"
|
||||||
|
STEP_CA_NAME="MyCorp Internal CA"
|
||||||
|
STEP_PROVISIONER="admin@mycorp.net"
|
||||||
|
#############################
|
||||||
|
|
||||||
|
log() { echo "[$(date +%F_%T)] $*"; }
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 1. Base OS
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
log "Updating OS"
|
||||||
|
apt-get update -qq
|
||||||
|
DEBIAN_FRONTEND=noninteractive apt-get -y -qq upgrade
|
||||||
|
apt-get install -y -qq \
|
||||||
|
dnsmasq prometheus grafana loki promtail \
|
||||||
|
node-exporter curl wget git jq
|
||||||
|
|
||||||
|
# Disable systemd-resolved so dnsmasq owns :53
|
||||||
|
systemctl disable --now systemd-resolved || true
|
||||||
|
ln -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 2. Clone or refresh configuration repo
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if [[ -d /opt/onebox ]]; then
|
||||||
|
log "Pulling latest config"
|
||||||
|
git -C /opt/onebox pull
|
||||||
|
else
|
||||||
|
log "Cloning repo"
|
||||||
|
git clone "$REPO_URL" /opt/onebox
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 3. Install dnsmasq configs
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
rsync -a /opt/onebox/files/dnsmasq.d/ /etc/dnsmasq.d/
|
||||||
|
rsync -a /opt/onebox/files/dnsmasq-static-hosts /etc/
|
||||||
|
dnsmasq --test && systemctl restart dnsmasq
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 4. Step-CA (internal ACME)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if [[ ! -f /etc/step-ca/config/ca.json ]]; then
|
||||||
|
log "Initializing Step-CA"
|
||||||
|
useradd -r -s /bin/false step || true
|
||||||
|
step ca init --name "$STEP_CA_NAME" \
|
||||||
|
--dns "ns.infra.$DOMAIN_ROOT" \
|
||||||
|
--address ":443" \
|
||||||
|
--provisioner "$STEP_PROVISIONER" \
|
||||||
|
--password-file <(echo "${STEP_CA_NAME}") \
|
||||||
|
--root /etc/step-ca/certs/root_ca.crt \
|
||||||
|
--key /etc/step-ca/secrets/root_ca_key \
|
||||||
|
--config /etc/step-ca/config/ca.json
|
||||||
|
step ca provisioner add acme --type ACME --config /etc/step-ca/config/ca.json
|
||||||
|
fi
|
||||||
|
rsync -a /opt/onebox/files/step-ca.service /etc/systemd/system/
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable --now step-ca
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 5. Install dnsmasq_exporter (Prometheus metric source)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if [[ ! -x /usr/local/bin/dnsmasq_exporter ]]; then
|
||||||
|
log "Installing dnsmasq_exporter"
|
||||||
|
curl -sSL https://github.com/google/dnsmasq_exporter/releases/latest/download/dnsmasq_exporter-linux-amd64 \
|
||||||
|
-o /usr/local/bin/dnsmasq_exporter
|
||||||
|
chmod +x /usr/local/bin/dnsmasq_exporter
|
||||||
|
fi
|
||||||
|
rsync -a /opt/onebox/files/dnsmasq_exporter.service /etc/systemd/system/
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable --now dnsmasq_exporter
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 6. Prometheus config
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
rsync -a /opt/onebox/files/prometheus.yml /etc/prometheus/prometheus.yml
|
||||||
|
systemctl enable --now prometheus
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 7. Grafana
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
rsync -a /opt/onebox/files/grafana.ini /etc/grafana/grafana.ini
|
||||||
|
rsync -a /opt/onebox/files/dashboards/ /var/lib/grafana/dashboards/
|
||||||
|
echo "admin:${GRAFANA_ADMIN_PASS}" | chpasswd
|
||||||
|
systemctl enable --now grafana-server
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 8. Loki + Promtail (log aggregation)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
rsync -a /opt/onebox/files/loki.yml /etc/loki/local-config.yaml
|
||||||
|
rsync -a /opt/onebox/files/promtail.yml /etc/promtail/config.yml
|
||||||
|
systemctl enable --now loki promtail
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 9. Firewall (allow mgmt subnet only)
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
ufw --force reset
|
||||||
|
ufw default deny incoming
|
||||||
|
ufw allow from 10.0.0.0/8 to any port 22 # SSH
|
||||||
|
ufw allow from 10.0.0.0/8 to any port 3000 # Grafana
|
||||||
|
ufw allow from 10.0.0.0/8 to any port 53 # DNS
|
||||||
|
ufw allow from 10.0.0.0/8 to any port 67 # DHCP
|
||||||
|
ufw --force enable
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 10. Health-check & finish
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
for svc in dnsmasq prometheus grafana-server loki promtail step-ca; do
|
||||||
|
systemctl is-active --quiet "$svc" || { log "$svc failed to start"; exit 1; }
|
||||||
|
done
|
||||||
|
|
||||||
|
log "✅ One-Box Wonder ready"
|
||||||
|
log " DNS/DHCP: ${HOST_IP}:53"
|
||||||
|
log " Grafana: http://${HOST_IP}:3000 (admin:${GRAFANA_ADMIN_PASS})"
|
||||||
|
log " CA: https://${HOST_IP}:443/acme/acme/directory"
|
||||||
|
```
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
Usage Notes
|
||||||
|
- Keep `/opt/onebox` the **single source of truth**; commit there, push, then `ssh box "cd /opt/onebox && git pull && ./deploy.sh"`
|
||||||
|
- Add new VLANs by dropping a file into `files/dnsmasq.d/` and re-run `./deploy.sh`; zero downtime.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Below is an **observability add-on** that drops onto your existing One-Box Wonder **without touching DNS/DHCP logic** and gives you **metrics, logs, and alerts** in ~15 minutes.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
Goal
|
||||||
|
“Is DNS answering? Are leases running low? Did the box reboot?” One URL answers it all.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
Stack (ultra-light)
|
||||||
|
| Component | Purpose | Footprint |
|
||||||
|
|---|---|---|
|
||||||
|
| **Prometheus** (binary ~60 MB) | Scrapes dnsmasq & node metrics | 50 MB RAM |
|
||||||
|
| **node_exporter** | CPU, disk, network | 20 MB RAM |
|
||||||
|
| **dnsmasq_exporter** (or script) | Lease counts, cache hits | 10 MB RAM |
|
||||||
|
| **Grafana** (OSS) | Dashboards & alerts | 100 MB RAM |
|
||||||
|
| **journald → Loki** (optional) | Centralised logs | 80 MB RAM |
|
||||||
|
|
||||||
|
**Total ≈ 260 MB RAM**—still fits a 512 MB VM.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
1. Install in one shot
|
||||||
|
```bash
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y prometheus grafana loki promtail node-exporter
|
||||||
|
# dnsmasq_exporter (Go binary)
|
||||||
|
curl -sSL https://github.com/google/dnsmasq_exporter/releases/latest/download/dnsmasq_exporter-linux-amd64 \
|
||||||
|
-o /usr/local/bin/dnsmasq_exporter && chmod +x /usr/local/bin/dnsmasq_exporter
|
||||||
|
```
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
2. Enable & start
|
||||||
|
```bash
|
||||||
|
# dnsmasq_exporter user
|
||||||
|
sudo useradd -r -s /bin/false dnsmasq_exporter
|
||||||
|
sudo tee /etc/systemd/system/dnsmasq_exporter.service <<'EOF'
|
||||||
|
[Unit]
|
||||||
|
Description=DNSmasq metrics exporter
|
||||||
|
After=network.target
|
||||||
|
[Service]
|
||||||
|
User=dnsmasq_exporter
|
||||||
|
ExecStart=/usr/local/bin/dnsmasq_exporter --dnsmasq.addr=127.0.0.1:53
|
||||||
|
Restart=always
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable --now prometheus grafana-server node_exporter dnsmasq_exporter
|
||||||
|
```
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
3. Prometheus scrape config (append)
|
||||||
|
`/etc/prometheus/prometheus.yml`
|
||||||
|
```yaml
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'node'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9100']
|
||||||
|
- job_name: 'dnsmasq'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9153']
|
||||||
|
- job_name: 'dnsmasq_leases'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9153']
|
||||||
|
```
|
||||||
|
|
||||||
|
Reload: `sudo systemctl reload prometheus`
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
4. Grafana dashboards (import JSON)
|
||||||
|
- **ID 1860** – “Node Exporter Full”
|
||||||
|
- **ID 13186** – “DNSmasq” (or load the JSON below)
|
||||||
|
|
||||||
|
Quick-and-dirty dashboard snippet (paste into Grafana → Import → JSON):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dashboard": {
|
||||||
|
"title": "MyCorp One-Box DNS/DHCP",
|
||||||
|
"panels": [
|
||||||
|
{ "title": "DHCP Leases Used %",
|
||||||
|
"targets": [{ "expr": "dnsmasq_dhcp_leases / 128 * 100", "legendFormat": "{{zone}}" }] },
|
||||||
|
{ "title": "DNS Cache Hit Ratio",
|
||||||
|
"targets": [{ "expr": "dnsmasq_dns_cache_hits / (dnsmasq_dns_cache_hits + dnsmasq_dns_cache_misses)" }] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
5. Log pipeline (optional but nice)
|
||||||
|
Promtail ships journald to Loki.
|
||||||
|
|
||||||
|
`/etc/promtail/config.yml`
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
http_listen_port: 9080
|
||||||
|
positions:
|
||||||
|
filename: /tmp/positions.yaml
|
||||||
|
clients:
|
||||||
|
- url: http://localhost:3100/loki/api/v1/push
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: dnsmasq
|
||||||
|
journal:
|
||||||
|
labels:
|
||||||
|
job: dnsmasq
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: ['__journal__systemd_unit']
|
||||||
|
target_label: 'unit'
|
||||||
|
```
|
||||||
|
```bash
|
||||||
|
sudo systemctl enable --now promtail
|
||||||
|
```
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
6. Alert rules (Prometheus)
|
||||||
|
`/etc/prometheus/rules/dnsmasq.yml`
|
||||||
|
```yaml
|
||||||
|
groups:
|
||||||
|
- name: dnsmasq
|
||||||
|
rules:
|
||||||
|
- alert: DHCPPoolHigh
|
||||||
|
expr: dnsmasq_dhcp_leases / 128 > 0.85
|
||||||
|
for: 5m
|
||||||
|
annotations:
|
||||||
|
summary: "DHCP pool {{ $labels.zone }} > 85 % full"
|
||||||
|
- alert: DNSDown
|
||||||
|
expr: up{job="dnsmasq"} == 0
|
||||||
|
for: 1m
|
||||||
|
annotations:
|
||||||
|
summary: "dnsmasq_exporter is down"
|
||||||
|
```
|
||||||
|
Point Grafana Alertmanager to Slack/Discord/email.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
7. One-command “health” alias
|
||||||
|
```bash
|
||||||
|
echo "alias health='curl -s http://localhost:3000/api/health | jq .database'" >> ~/.bash_aliases
|
||||||
|
```
|
||||||
|
Log in tomorrow → Grafana dashboard URL bookmarked; `health` tells you if the stack itself is alive.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
8. Clean-up checklist
|
||||||
|
- Firewall: open **only** 3000/tcp (Grafana) to your mgmt subnet.
|
||||||
|
- Backups: nightly `tar czf /var/backups/obs-$(date +%F).tgz /var/lib/prometheus /var/lib/grafana`.
|
||||||
|
- Git commit: add `observability/` sub-folder; keep infra & metrics configs together.
|
||||||
|
|
||||||
|
──────────────────────────────
|
||||||
|
Done.
|
||||||
|
Open `http://10.0.255.1:3000` (admin/admin → change) and you’ll see **leases, cache hit-rate, CPU, disk**—all in one pane.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Here are **“meta tangents”** that literally **send you back to real work faster**—each one is a **one-shot action** that removes future yak-shaving.
|
||||||
|
|
||||||
|
1. **Git-init once → never SSH-edit again**
|
||||||
|
```bash
|
||||||
|
cd /etc/dnsmasq.d
|
||||||
|
sudo git init && sudo git add . && sudo git commit -m "baseline"
|
||||||
|
echo '*.conf' | sudo tee .gitignore # ignore temp files
|
||||||
|
alias dns-commit='sudo git add . && sudo git commit -m "$(date -Iseconds)"'
|
||||||
|
```
|
||||||
|
After that, every change is `dns-commit && sudo systemctl reload dnsmasq`.
|
||||||
|
|
||||||
|
2. **Make the box **self-documenting**
|
||||||
|
Add a **MOTD** that prints the current zones:
|
||||||
|
```bash
|
||||||
|
sudo tee /etc/update-motd.d/50-dnsmasq-status <<'EOF'
|
||||||
|
#!/bin/sh
|
||||||
|
echo "=== DNS/DHCP zones ==="
|
||||||
|
grep -E 'domain=|dhcp-range=' /etc/dnsmasq.d/*.conf | column -t
|
||||||
|
EOF
|
||||||
|
sudo chmod +x /etc/update-motd.d/50-dnsmasq-status
|
||||||
|
```
|
||||||
|
Log in tomorrow, zones are right there—no docs to open.
|
||||||
|
|
||||||
|
3. **One-liner **“new VLAN”** generator**
|
||||||
|
```bash
|
||||||
|
newvlan() {
|
||||||
|
local id=$1 name=$2
|
||||||
|
sed "s/lan/$name/g; s/0/$id/g" /etc/dnsmasq.d/10-lan.conf \
|
||||||
|
| sudo tee /etc/dnsmasq.d/${id}-${name}.conf
|
||||||
|
sudo systemctl reload dnsmasq
|
||||||
|
echo "VLAN $id $name ready"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage: `newvlan 40 cameras`.
|
||||||
|
|
||||||
|
4. **Push-button backup**
|
||||||
|
```bash
|
||||||
|
sudo crontab -l | { cat; echo "0 3 * * * tar czf /var/backups/dnsmasq-$(date +\%F).tgz /etc/dnsmasq* /var/lib/misc/dnsmasq.leases"; } | sudo crontab -
|
||||||
|
```
|
||||||
|
Daily tarball; restore = untar + reload.
|
||||||
|
|
||||||
|
5. **“Don’t-break-DNS” pre-commit hook**
|
||||||
|
```bash
|
||||||
|
sudo tee /etc/dnsmasq.d/.git/hooks/pre-commit <<'EOF'
|
||||||
|
#!/bin/bash
|
||||||
|
dnsmasq --test -C /dev/null -C <(cat /etc/dnsmasq.d/*.conf) || exit 1
|
||||||
|
EOF
|
||||||
|
sudo chmod +x /etc/dnsmasq.d/.git/hooks/pre-commit
|
||||||
|
```
|
||||||
|
Bad configs never reach `reload`.
|
||||||
|
|
||||||
|
Pick any two; you’ll **never touch the box again** unless the business truly changes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
──────────────────────────────
|
──────────────────────────────
|
||||||
MyCorp “One-Box Wonder” – End-to-End Deployment Guide
|
MyCorp “One-Box Wonder” – End-to-End Deployment Guide
|
||||||
──────────────────────────────
|
──────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user