PVEOnHygon

Host configuration

Hardware Info:

# cat /proc/cpuinfo | grep -i "model name"
model name	: Hygon C86 3350  8-core Processor

Os Info:

root@pve:~# cat /etc/debian_version 
12.4
root@pve:~# uname -a
Linux pve 6.5.11-8-pve #1 SMP PREEMPT_DYNAMIC PMX 6.5.11-8 (2024-01-30T12:27Z) x86_64 GNU/Linux

Modifications :

# cat /etc/default/grub | grep amd_iommu
GRUB_CMDLINE_LINUX_DEFAULT="quiet iommu=pt amd_iommu=on initcall_blacklist=sysfb_init pcie_acs_override=downstream,multifunction"
# cat /etc/modules
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
# cat /etc/modprobe.d/vfio.conf
options vfio-pci ids=1002:6611,1002:aab0
options vfio-pci disable_idle_d3=1
# update-initramfs -u -k all && update-grub && reboot

virtualization

Win10 configuration:

/images/20240819_082807_x.jpg

you can get win10 working properly directly, but for win7, it could only boot into system, after driver installation, it will remains black and could not startup.

amd radeon 550

Added more vfio items:

# cat /etc/modprobe.d/vfio.conf 
options vfio-pci ids=1002:6611,1002:aab0,1002:699f,1002:aae0
options vfio-pci disable_idle_d3=1
# update-initramfs -u -k all && update-grub && reboot

The same result as radeon 520.

Changed back to an old driver, OK.

-rw-rw-r-- 1 dash dash  478517432  8月 19 09:27 non-whql-win7-64bit-radeon-software-crimson-relive-17.5.1-may4.exe

Re-installation

/images/20240819_095520_x.jpg

commands:

root        1308       1 80 15:40 ?        00:01:22 /usr/bin/kvm -id 107 -name 107107,debug-threads=on -no-shutdown -chardev socket,id=qmp,path=/var/run/qemu-server/107.qmp,server=on,wait=off -mon chardev=qmp,mode=control -chardev socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5 -mon chardev=qmp-event,mode=control -pidfile /var/run/qemu-server/107.pid -daemonize -smbios type=1,uuid=67b73609-9afe-4dd0-93b8-df42b3e114b5 -smp 2,sockets=1,cores=2,maxcpus=2 -nodefaults -boot menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg -vga none -nographic -cpu qemu64,+aes,enforce,hv_ipi,hv_relaxed,hv_reset,hv_runtime,hv_spinlocks=0x1fff,hv_stimer,hv_synic,hv_time,hv_vapic,hv_vendor_id=proxmox,hv_vpindex,kvm=off,+kvm_pv_eoi,+kvm_pv_unhalt,+pni,+popcnt,+sse4.1,+sse4.2,+ssse3 -m 3072 -readconfig /usr/share/qemu-server/pve-q35-4.0.cfg -device vmgenid,guid=cb144ce1-60a1-4af3-8842-6b47fa91f4df -device usb-tablet,id=tablet,bus=ehci.0,port=1 -device vfio-pci,host=0000:06:00.0,id=hostpci0,bus=pcie.0,addr=0x10,x-vga=on -device usb-host,vendorid=0x30fa,productid=0x0300,id=usb0 -device usb-host,hostbus=1,hostport=1.1,id=usb1 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on -iscsi initiator-name=iqn.1993-08.org.debian:01:ba1fd0778bd -drive file=/var/lib/vz/template/iso/Win7_hygon.iso,if=none,id=drive-ide2,media=cdrom,aio=io_uring -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=101 -device ahci,id=ahci0,multifunction=on,bus=pci.0,addr=0x7 -drive file=/dev/pve/vm-107-disk-0,if=none,id=drive-sata0,format=raw,cache=none,aio=io_uring,detect-zeroes=on -device ide-hd,bus=ahci0.0,drive=drive-sata0,id=sata0,bootindex=100 -netdev type=tap,id=net0,ifname=tap107i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown -device e1000,mac=BC:24:11:9A:C1:70,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=102 -rtc driftfix=slew,base=localtime -machine hpet=off,smm=off,type=pc-q35-8.1+pve0 -global kvm-pit.lost_tick_policy=discard

WorkingTipsOnIngressSpice

Load and push images:

 nerdctl load<nginxslim.tar
 nerdctl tag gcr.io/google_containers/nginx-slim:0.8 localhost:35000/nginx-slim:0.8
 nerdctl push localhost:35000/nginx-slim:0.8

Create the deployment:

# cat nginx01.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: nginx01
  name: nginx01
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nginx01
  template:
    metadata:
      labels:
        app: nginx01
    spec:
      containers:
      - image: 192.168.1.11:35000/nginx-slim:0.8
        name: nginx01
# kubectl create -f nginx01.yaml
# kubectl expose deployment nginx01 --name=nginx01-svr --type=ClusterIP --port=80

Create ingress:

# cat ingress_nginx.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: nginx-ingress
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
spec:
  ingressClassName: nginx
  rules:
    - host: securebrowser.example
      http:
        paths:
          - path: /nginx
            pathType: Prefix
            backend:
              service:
                name: nginx01-svr
                port:
                  number: 80
# kubectl create -f ingress_nginx.yaml

Test:

$ cat /etc/hosts  | grep secure
192.168.1.11	securebrowser.example
$ curl securebrowser.example/nginx

JumperConfiguration

Ubuntu22.04 Install desktop version.

sudo apt update -y
sudo apt upgrade -y
sudo apt install -y openssh-server tigervnc-standalone-server tigervnc-xorg-extension lxqt vim net-tools curl
sudo systemctl set-default multi-user.target
sudo reboot

Configure vnc:

$ vncpasswd

Configure the vnc:

test@jumper:~$ cat ~/.vnc/config 
session=lxqt
geometry=1920x1080
localhost=no
alwaysshared
test@jumper:~$ cat /etc/tigervnc/vncserver.users 
# TigerVNC User assignment
#
# This file assigns users to specific VNC display numbers.
# The syntax is <display>=<username>. E.g.:
#
# :2=andrew
# :3=lisa
:1=test
test@jumper:~$ sudo systemctl enable tigervncserver@:1
Created symlink /etc/systemd/system/multi-user.target.wants/tigervncserver@:1.service → /lib/systemd/system/tigervncserver@.service.

download the citrix workspace from websiste, and install them via:

sudo dpkg -i icaclient_24.5.0.76_amd64.deb 
sudo dpkg -i ctxusb_24.5.0.76_amd64.deb 

Configure network in network manager.

ens160: static ip 192.168.1.33
ens192: dhcp from company networking. 

Add crontab for sharing:

root@jumper:/home/test# crontab -l
@reboot sleep 10 && /usr/bin/startsharing.sh
root@jumper:/home/test# cat /usr/bin/startsharing.sh
#!/bin/sh -e
iptables -t nat -A POSTROUTING -o ens192 -j MASQUERADE
iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
iptables -A FORWARD -i ens160 -o ens192 -j ACCEPT
echo "1" > /proc/sys/net/ipv4/ip_forward

nvidiat4OnRyzenVfioTips

Host Preparation

Hardware information:

root@hope:/etc/libvirt# sudo lspci -nn| grep -i nvidia
08:00.0 3D controller [0302]: NVIDIA Corporation TU104GL [Tesla T4] [10de:1eb8] (rev a1)
root@hope:/etc/libvirt# lscpu | grep -i model
Model:                              96
Model name:                         AMD Ryzen 5 4500 6-Core Processor

Edit the grub configuration:

$ sudo vim /etc/default/grub
GRUB_CMDLINE_LINUX_DEFAULT="amd_iommu=on iommu=pt kvm.ignore_msrs=1 video=efifb:off vfio-pci.ids=10de:1eb8"
$ sudo update-grub2
$ sudo vim /etc/initramfs-tools/modules
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd

Specify the vfio driver for nvidia t4:

$ sudo vim /etc/modprobe.d/vfio.conf 
options vfio-pci ids=10de:1eb8
$ sudo update-initramfs -u -k all

Download the vbios for nvidia t4 from https://www.techpowerup.com/vgabios/259926/259926.

After reboot, check the driver status:

dash@hope:~$ lspci -vvnn -s 08:00.0
08:00.0 3D controller [0302]: NVIDIA Corporation TU104GL [Tesla T4] [10de:1eb8] (rev a1)
	Subsystem: NVIDIA Corporation TU104GL [Tesla T4] [10de:12a2]
	Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Interrupt: pin A routed to IRQ 255
	Region 0: Memory at fb000000 (32-bit, non-prefetchable) [disabled] [size=16M]
	Region 1: Memory at ffc0000000 (64-bit, prefetchable) [disabled] [size=256M]
	Region 3: Memory at fff0000000 (64-bit, prefetchable) [disabled] [size=32M]
	Capabilities: <access denied>
	Kernel driver in use: vfio-pci
	Kernel modules: nvidiafb, nouveau

vm setup

UEFI setting:

/images/20240718_180958_x.jpg

Continue for installation, until it finished.

sudo apt install -y libevent-dev build-essential vim
sudo apt-get upgrade
sudo shutdown -h now

Shutdown and add the tesla t4:

/images/20240718_185046_x.jpg

Change the video to none:

/images/20240718_185215_x.jpg

Start, and from now on, you could only ssh into the machine.

nvidia driver installation

Steps are listed as following:

distro=ubuntu2204
arch=x86_64
wget https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb 
sudo apt-get install cuda-toolkit
sudo apt-get install nvidia-gds
sudo ubuntu-drivers autoinstall
sudo apt-get install --install-recommends linux-generic-hwe-22.04

Only in hwe kernel, nvidia-smi could be running properly.

$ vim ~/.bashrc
# cuda related
export PATH=/usr/local/cuda-12.5/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-12.5/lib64\
                         ${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

Examine the nvcc version:

$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0

Examine the card info:

$ sudo nvidia-smi 
Thu Jul 18 12:09:39 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.06              Driver Version: 555.42.06      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  Tesla T4                       Off |   00000000:07:00.0 Off |                    0 |
| N/A   35C    P8              9W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                                                         
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|  No running processes found                                                             |
+-----------------------------------------------------------------------------------------+

ComfyUI Setup

Install git-lfs:

$ sudo apt install -y git git-lfs
$ git lfs install

Install miniconda:

mkdir -p ~/miniconda3
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
rm -rf ~/miniconda3/miniconda.sh
~/miniconda3/bin/conda init bash

Relogin the terminal.

Install ComfyUI:

$ cd Code
$ git clone https://github.com/comfyanonymous/ComfyUI.git
$ conda create -n comfyui python=3.10
$ pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
$ pip3 install torch torchvision torchaudio

Install :

git clone https://github.com/Limitex/ComfyUI-Diffusers.git
cd ComfyUI-Diffusers
pip install -r requirements.txt
git clone https://github.com/cumulo-autumn/StreamDiffusion.git
python -m streamdiffusion.tools.install-tensorrt

Configure:

$ sudo apt-get install -y nfs-common
$ sudo mkdir -p /media/nfs
$ sudo mount model_on_nfs /media/nfs
$ cd ~/Code/ComfyUI
$ cp extra_model_paths.yaml.example extra_model_paths.yaml
$ vim extra_model_paths.yaml
a111: 
    base_path: /media/nfs/stable-diffusion-webui/
goto ComfyUI/custom_nodes dir in terminal(cmd)
$ git clone https://github.com/ltdrdata/ComfyUI-Manager.git
Restart ComfyUI
$ python main.py --port 8188 --listen 192.168.1.60

WorkingTipsOnComfyUIUbuntu2204

Installation

Ubuntu22.04, with a6000, install steps:

git clone https://github.com/comfyanonymous/ComfyUI.git
cd ComfyUI
conda create -n comfyui python=3.10
conda activate comfyui
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install torch torchvision torchaudio
pip install -r requirements.txt 
 cp extra_model_paths.yaml.example extra_model_paths.yaml
 vim extra_model_paths.yaml
 cd models/
 ls
 cd ..
 vim extra_model_paths.yaml
 python main.py --port 8188 --listen 192.168.1.7

Install :

sudo apt install nvidia-cudnn

Install ComfyUI manager:

goto ComfyUI/custom_nodes dir in terminal(cmd)
git clone https://github.com/ltdrdata/ComfyUI-Manager.git
Restart ComfyUI