MickJ commited on
Commit
2806d4a
·
verified ·
1 Parent(s): 0d9bff2

Sync clean ModelOpt weights from BBuf/HunyuanVideo-ModelOpt-FP8-SGLang

Browse files
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: sglang
5
+ base_model: hunyuanvideo-community/HunyuanVideo
6
+ tags:
7
+ - sglang
8
+ - diffusion
9
+ - modelopt
10
+ - fp8
11
+ - transformer
12
+ ---
13
+
14
+ # hunyuanvideo-modelopt-fp8-sglang-transformer
15
+
16
+ This repository contains the SGLang-native ModelOpt FP8 transformer override for `hunyuanvideo-community/HunyuanVideo`.
17
+
18
+ It is intended to be used with SGLang Diffusion through `--transformer-path` while keeping the base model separate:
19
+
20
+ ```bash
21
+ sglang generate \
22
+ --model-path hunyuanvideo-community/HunyuanVideo \
23
+ --transformer-path lmsys/hunyuanvideo-modelopt-fp8-sglang-transformer \
24
+ --prompt "A cinematic scene with detailed lighting" \
25
+ --save-output
26
+ ```
27
+
28
+ ## Contents
29
+
30
+ The repository is intentionally minimal and contains only:
31
+
32
+ - `config.json`
33
+ - `*.safetensors` weight shard files
34
+ - `*.safetensors.index.json` when the checkpoint is sharded
35
+
36
+ Validation images, benchmark outputs, profiler traces, and conversion scratch artifacts are not stored in this model repository.
37
+
38
+ ## Notes
39
+
40
+ - Quantization config is stored in `config.json` with `quant_method=modelopt` and `quant_algo=FP8`.
41
+ - Use this checkpoint with an SGLang version that includes diffusion ModelOpt support for the corresponding model family.
42
+ - The original base model license and usage terms still apply.
config.json ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "HunyuanVideoTransformer3DModel",
3
+ "_diffusers_version": "0.37.0",
4
+ "_name_or_path": "/root/.cache/huggingface/hub/models--hunyuanvideo-community--HunyuanVideo/snapshots/e8c2aaa66fe3742a32c11a6766aecbf07c56e773/transformer",
5
+ "attention_head_dim": 128,
6
+ "guidance_embeds": true,
7
+ "image_condition_type": null,
8
+ "in_channels": 16,
9
+ "mlp_ratio": 4.0,
10
+ "num_attention_heads": 24,
11
+ "num_layers": 20,
12
+ "num_refiner_layers": 2,
13
+ "num_single_layers": 40,
14
+ "out_channels": 16,
15
+ "patch_size": 2,
16
+ "patch_size_t": 1,
17
+ "pooled_projection_dim": 768,
18
+ "qk_norm": "rms_norm",
19
+ "quantization_config": {
20
+ "ignore": [
21
+ "double_blocks.0.img_attn_k_norm",
22
+ "double_blocks.0.img_attn_q_norm",
23
+ "double_blocks.0.img_mod.linear",
24
+ "double_blocks.0.txt_attn_k_norm",
25
+ "double_blocks.0.txt_attn_q_norm",
26
+ "double_blocks.0.txt_mod.linear",
27
+ "double_blocks.1.img_attn_k_norm",
28
+ "double_blocks.1.img_attn_q_norm",
29
+ "double_blocks.1.img_mod.linear",
30
+ "double_blocks.1.txt_attn_k_norm",
31
+ "double_blocks.1.txt_attn_q_norm",
32
+ "double_blocks.1.txt_mod.linear",
33
+ "double_blocks.10.img_attn_k_norm",
34
+ "double_blocks.10.img_attn_q_norm",
35
+ "double_blocks.10.img_mod.linear",
36
+ "double_blocks.10.txt_attn_k_norm",
37
+ "double_blocks.10.txt_attn_q_norm",
38
+ "double_blocks.10.txt_mod.linear",
39
+ "double_blocks.11.img_attn_k_norm",
40
+ "double_blocks.11.img_attn_q_norm",
41
+ "double_blocks.11.img_mod.linear",
42
+ "double_blocks.11.txt_attn_k_norm",
43
+ "double_blocks.11.txt_attn_q_norm",
44
+ "double_blocks.11.txt_mod.linear",
45
+ "double_blocks.12.img_attn_k_norm",
46
+ "double_blocks.12.img_attn_q_norm",
47
+ "double_blocks.12.img_mod.linear",
48
+ "double_blocks.12.txt_attn_k_norm",
49
+ "double_blocks.12.txt_attn_q_norm",
50
+ "double_blocks.12.txt_mod.linear",
51
+ "double_blocks.13.img_attn_k_norm",
52
+ "double_blocks.13.img_attn_q_norm",
53
+ "double_blocks.13.img_mod.linear",
54
+ "double_blocks.13.txt_attn_k_norm",
55
+ "double_blocks.13.txt_attn_q_norm",
56
+ "double_blocks.13.txt_mod.linear",
57
+ "double_blocks.14.img_attn_k_norm",
58
+ "double_blocks.14.img_attn_q_norm",
59
+ "double_blocks.14.img_mod.linear",
60
+ "double_blocks.14.txt_attn_k_norm",
61
+ "double_blocks.14.txt_attn_q_norm",
62
+ "double_blocks.14.txt_mod.linear",
63
+ "double_blocks.15.img_attn_k_norm",
64
+ "double_blocks.15.img_attn_q_norm",
65
+ "double_blocks.15.img_mod.linear",
66
+ "double_blocks.15.txt_attn_k_norm",
67
+ "double_blocks.15.txt_attn_q_norm",
68
+ "double_blocks.15.txt_mod.linear",
69
+ "double_blocks.16.img_attn_k_norm",
70
+ "double_blocks.16.img_attn_q_norm",
71
+ "double_blocks.16.img_mod.linear",
72
+ "double_blocks.16.txt_attn_k_norm",
73
+ "double_blocks.16.txt_attn_q_norm",
74
+ "double_blocks.16.txt_mod.linear",
75
+ "double_blocks.17.img_attn_k_norm",
76
+ "double_blocks.17.img_attn_q_norm",
77
+ "double_blocks.17.img_mod.linear",
78
+ "double_blocks.17.txt_attn_k_norm",
79
+ "double_blocks.17.txt_attn_q_norm",
80
+ "double_blocks.17.txt_mod.linear",
81
+ "double_blocks.18.img_attn_k_norm",
82
+ "double_blocks.18.img_attn_q_norm",
83
+ "double_blocks.18.img_mod.linear",
84
+ "double_blocks.18.txt_attn_k_norm",
85
+ "double_blocks.18.txt_attn_q_norm",
86
+ "double_blocks.18.txt_mod.linear",
87
+ "double_blocks.19.img_attn_k_norm",
88
+ "double_blocks.19.img_attn_q_norm",
89
+ "double_blocks.19.img_mod.linear",
90
+ "double_blocks.19.txt_attn_k_norm",
91
+ "double_blocks.19.txt_attn_q_norm",
92
+ "double_blocks.19.txt_mod.linear",
93
+ "double_blocks.2.img_attn_k_norm",
94
+ "double_blocks.2.img_attn_q_norm",
95
+ "double_blocks.2.img_mod.linear",
96
+ "double_blocks.2.txt_attn_k_norm",
97
+ "double_blocks.2.txt_attn_q_norm",
98
+ "double_blocks.2.txt_mod.linear",
99
+ "double_blocks.3.img_attn_k_norm",
100
+ "double_blocks.3.img_attn_q_norm",
101
+ "double_blocks.3.img_mod.linear",
102
+ "double_blocks.3.txt_attn_k_norm",
103
+ "double_blocks.3.txt_attn_q_norm",
104
+ "double_blocks.3.txt_mod.linear",
105
+ "double_blocks.4.img_attn_k_norm",
106
+ "double_blocks.4.img_attn_q_norm",
107
+ "double_blocks.4.img_mod.linear",
108
+ "double_blocks.4.txt_attn_k_norm",
109
+ "double_blocks.4.txt_attn_q_norm",
110
+ "double_blocks.4.txt_mod.linear",
111
+ "double_blocks.5.img_attn_k_norm",
112
+ "double_blocks.5.img_attn_q_norm",
113
+ "double_blocks.5.img_mod.linear",
114
+ "double_blocks.5.txt_attn_k_norm",
115
+ "double_blocks.5.txt_attn_q_norm",
116
+ "double_blocks.5.txt_mod.linear",
117
+ "double_blocks.6.img_attn_k_norm",
118
+ "double_blocks.6.img_attn_q_norm",
119
+ "double_blocks.6.img_mod.linear",
120
+ "double_blocks.6.txt_attn_k_norm",
121
+ "double_blocks.6.txt_attn_q_norm",
122
+ "double_blocks.6.txt_mod.linear",
123
+ "double_blocks.7.img_attn_k_norm",
124
+ "double_blocks.7.img_attn_q_norm",
125
+ "double_blocks.7.img_mod.linear",
126
+ "double_blocks.7.txt_attn_k_norm",
127
+ "double_blocks.7.txt_attn_q_norm",
128
+ "double_blocks.7.txt_mod.linear",
129
+ "double_blocks.8.img_attn_k_norm",
130
+ "double_blocks.8.img_attn_q_norm",
131
+ "double_blocks.8.img_mod.linear",
132
+ "double_blocks.8.txt_attn_k_norm",
133
+ "double_blocks.8.txt_attn_q_norm",
134
+ "double_blocks.8.txt_mod.linear",
135
+ "double_blocks.9.img_attn_k_norm",
136
+ "double_blocks.9.img_attn_q_norm",
137
+ "double_blocks.9.img_mod.linear",
138
+ "double_blocks.9.txt_attn_k_norm",
139
+ "double_blocks.9.txt_attn_q_norm",
140
+ "double_blocks.9.txt_mod.linear",
141
+ "final_layer.adaLN_modulation.linear",
142
+ "final_layer.linear",
143
+ "guidance_in.mlp.fc_in",
144
+ "guidance_in.mlp.fc_out",
145
+ "img_in.proj",
146
+ "single_blocks.0.k_norm",
147
+ "single_blocks.0.modulation.linear",
148
+ "single_blocks.0.q_norm",
149
+ "single_blocks.1.k_norm",
150
+ "single_blocks.1.modulation.linear",
151
+ "single_blocks.1.q_norm",
152
+ "single_blocks.10.k_norm",
153
+ "single_blocks.10.modulation.linear",
154
+ "single_blocks.10.q_norm",
155
+ "single_blocks.11.k_norm",
156
+ "single_blocks.11.modulation.linear",
157
+ "single_blocks.11.q_norm",
158
+ "single_blocks.12.k_norm",
159
+ "single_blocks.12.modulation.linear",
160
+ "single_blocks.12.q_norm",
161
+ "single_blocks.13.k_norm",
162
+ "single_blocks.13.modulation.linear",
163
+ "single_blocks.13.q_norm",
164
+ "single_blocks.14.k_norm",
165
+ "single_blocks.14.modulation.linear",
166
+ "single_blocks.14.q_norm",
167
+ "single_blocks.15.k_norm",
168
+ "single_blocks.15.modulation.linear",
169
+ "single_blocks.15.q_norm",
170
+ "single_blocks.16.k_norm",
171
+ "single_blocks.16.modulation.linear",
172
+ "single_blocks.16.q_norm",
173
+ "single_blocks.17.k_norm",
174
+ "single_blocks.17.modulation.linear",
175
+ "single_blocks.17.q_norm",
176
+ "single_blocks.18.k_norm",
177
+ "single_blocks.18.modulation.linear",
178
+ "single_blocks.18.q_norm",
179
+ "single_blocks.19.k_norm",
180
+ "single_blocks.19.modulation.linear",
181
+ "single_blocks.19.q_norm",
182
+ "single_blocks.2.k_norm",
183
+ "single_blocks.2.modulation.linear",
184
+ "single_blocks.2.q_norm",
185
+ "single_blocks.20.k_norm",
186
+ "single_blocks.20.modulation.linear",
187
+ "single_blocks.20.q_norm",
188
+ "single_blocks.21.k_norm",
189
+ "single_blocks.21.modulation.linear",
190
+ "single_blocks.21.q_norm",
191
+ "single_blocks.22.k_norm",
192
+ "single_blocks.22.modulation.linear",
193
+ "single_blocks.22.q_norm",
194
+ "single_blocks.23.k_norm",
195
+ "single_blocks.23.modulation.linear",
196
+ "single_blocks.23.q_norm",
197
+ "single_blocks.24.k_norm",
198
+ "single_blocks.24.modulation.linear",
199
+ "single_blocks.24.q_norm",
200
+ "single_blocks.25.k_norm",
201
+ "single_blocks.25.modulation.linear",
202
+ "single_blocks.25.q_norm",
203
+ "single_blocks.26.k_norm",
204
+ "single_blocks.26.modulation.linear",
205
+ "single_blocks.26.q_norm",
206
+ "single_blocks.27.k_norm",
207
+ "single_blocks.27.modulation.linear",
208
+ "single_blocks.27.q_norm",
209
+ "single_blocks.28.k_norm",
210
+ "single_blocks.28.modulation.linear",
211
+ "single_blocks.28.q_norm",
212
+ "single_blocks.29.k_norm",
213
+ "single_blocks.29.modulation.linear",
214
+ "single_blocks.29.q_norm",
215
+ "single_blocks.3.k_norm",
216
+ "single_blocks.3.modulation.linear",
217
+ "single_blocks.3.q_norm",
218
+ "single_blocks.30.k_norm",
219
+ "single_blocks.30.modulation.linear",
220
+ "single_blocks.30.q_norm",
221
+ "single_blocks.31.k_norm",
222
+ "single_blocks.31.modulation.linear",
223
+ "single_blocks.31.q_norm",
224
+ "single_blocks.32.k_norm",
225
+ "single_blocks.32.modulation.linear",
226
+ "single_blocks.32.q_norm",
227
+ "single_blocks.33.k_norm",
228
+ "single_blocks.33.modulation.linear",
229
+ "single_blocks.33.q_norm",
230
+ "single_blocks.34.k_norm",
231
+ "single_blocks.34.modulation.linear",
232
+ "single_blocks.34.q_norm",
233
+ "single_blocks.35.k_norm",
234
+ "single_blocks.35.modulation.linear",
235
+ "single_blocks.35.q_norm",
236
+ "single_blocks.36.k_norm",
237
+ "single_blocks.36.modulation.linear",
238
+ "single_blocks.36.q_norm",
239
+ "single_blocks.37.k_norm",
240
+ "single_blocks.37.modulation.linear",
241
+ "single_blocks.37.q_norm",
242
+ "single_blocks.38.k_norm",
243
+ "single_blocks.38.modulation.linear",
244
+ "single_blocks.38.q_norm",
245
+ "single_blocks.39.k_norm",
246
+ "single_blocks.39.modulation.linear",
247
+ "single_blocks.39.q_norm",
248
+ "single_blocks.4.k_norm",
249
+ "single_blocks.4.modulation.linear",
250
+ "single_blocks.4.q_norm",
251
+ "single_blocks.5.k_norm",
252
+ "single_blocks.5.modulation.linear",
253
+ "single_blocks.5.q_norm",
254
+ "single_blocks.6.k_norm",
255
+ "single_blocks.6.modulation.linear",
256
+ "single_blocks.6.q_norm",
257
+ "single_blocks.7.k_norm",
258
+ "single_blocks.7.modulation.linear",
259
+ "single_blocks.7.q_norm",
260
+ "single_blocks.8.k_norm",
261
+ "single_blocks.8.modulation.linear",
262
+ "single_blocks.8.q_norm",
263
+ "single_blocks.9.k_norm",
264
+ "single_blocks.9.modulation.linear",
265
+ "single_blocks.9.q_norm",
266
+ "time_in.mlp.fc_in",
267
+ "time_in.mlp.fc_out",
268
+ "txt_in.c_embedder.fc_in",
269
+ "txt_in.c_embedder.fc_out",
270
+ "txt_in.input_embedder",
271
+ "txt_in.refiner_blocks.0.adaLN_modulation.linear",
272
+ "txt_in.refiner_blocks.0.mlp.fc_in",
273
+ "txt_in.refiner_blocks.0.mlp.fc_out",
274
+ "txt_in.refiner_blocks.0.norm1",
275
+ "txt_in.refiner_blocks.0.norm2",
276
+ "txt_in.refiner_blocks.0.self_attn_proj",
277
+ "txt_in.refiner_blocks.0.self_attn_qkv",
278
+ "txt_in.refiner_blocks.1.adaLN_modulation.linear",
279
+ "txt_in.refiner_blocks.1.mlp.fc_in",
280
+ "txt_in.refiner_blocks.1.mlp.fc_out",
281
+ "txt_in.refiner_blocks.1.norm1",
282
+ "txt_in.refiner_blocks.1.norm2",
283
+ "txt_in.refiner_blocks.1.self_attn_proj",
284
+ "txt_in.refiner_blocks.1.self_attn_qkv",
285
+ "txt_in.t_embedder.mlp.fc_in",
286
+ "txt_in.t_embedder.mlp.fc_out",
287
+ "vector_in.fc_in",
288
+ "vector_in.fc_out"
289
+ ],
290
+ "producer": {
291
+ "name": "modelopt",
292
+ "version": "0.43.0rc2.dev66+gf7557221e.d20260407"
293
+ },
294
+ "quant_algo": "FP8",
295
+ "quant_method": "modelopt",
296
+ "quant_type": "FP8"
297
+ },
298
+ "rope_axes_dim": [
299
+ 16,
300
+ 56,
301
+ 56
302
+ ],
303
+ "rope_theta": 256.0,
304
+ "text_embed_dim": 4096
305
+ }
diffusion_pytorch_model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe07c0639acf57bec983c4a0e071f8c9ed4581cdd9560300b9dbb5b470f7107
3
+ size 6914498560
diffusion_pytorch_model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b52b3e49113c2a62837846767ba28955cd8f817e349fa194a1bc6a95cfd96b8c
3
+ size 6240621592
diffusion_pytorch_model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778ecdc728b4961b2426e404bed7e4638c4217f010ac20b20ee7cb9a4f6f9300
3
+ size 3427543416
diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff