| |
| |
|
|
| from transformers import AutoModel, AutoConfig |
| from huggingface_hub import hf_hub_download |
| import torch |
| import numpy as np |
| import importlib.util |
|
|
| def load_model_and_collator(): |
| |
| model_files = hf_hub_download(repo_id="videoloc/seamless-basic", filename="modeling_seamless_basic.py") |
| spec = importlib.util.spec_from_file_location("modeling_seamless_basic", model_files) |
| modeling_module = importlib.util.module_from_spec(spec) |
| spec.loader.exec_module(modeling_module) |
|
|
| |
| config = modeling_module.SeamlessBasicConfig.from_pretrained("videoloc/seamless-basic") |
| model = modeling_module.HFSeamlessBasic.from_pretrained("videoloc/seamless-basic") |
| |
| |
| collator_file = hf_hub_download(repo_id="videoloc/seamless-basic", filename="data_collator.py") |
| spec = importlib.util.spec_from_file_location("data_collator", collator_file) |
| collator_module = importlib.util.module_from_spec(spec) |
| spec.loader.exec_module(collator_module) |
| |
| data_collator = collator_module.DataCollatorSimpleSeamless( |
| processor="facebook/hf-seamless-m4t-medium", |
| max_audio_length_sec=8.0, |
| max_text_length=256 |
| ) |
| |
| return model, data_collator |
|
|
| def example_inference(): |
| model, collator = load_model_and_collator() |
| |
| |
| data = [{ |
| 'raw_audio': np.random.randn(16000 * 3), |
| 'raw_text': "Hello, welcome to our presentation today.", |
| }] |
| |
| batch = collator(data) |
| model.eval() |
| with torch.no_grad(): |
| outputs = model(**batch) |
| tte_prediction = outputs.logits.item() |
| |
| print(f"Predicted Time To Edit (TTE): {tte_prediction:.2f} seconds") |
| return tte_prediction |
|
|
| if __name__ == "__main__": |
| example_inference() |
|
|