| | from typing import Dict, Any, List, Union |
| |
|
| | from transformers import ( |
| | AutoTokenizer, |
| | AutoModelForSequenceClassification, |
| | TextClassificationPipeline, |
| | ) |
| |
|
| |
|
| | class EndpointHandler: |
| | """Custom handler for Hugging Face Inference Endpoints. |
| | |
| | Loads a fine-tuned text-classification model and exposes a callable |
| | that the endpoint runtime will invoke. The runtime will instantiate |
| | this class once at startup, passing the model directory path. |
| | """ |
| |
|
| | def __init__(self, path: str = "", **kwargs): |
| | |
| | |
| | model_dir = path or "." |
| |
|
| | |
| | self.tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(model_dir) |
| |
|
| | |
| | self.pipeline = TextClassificationPipeline( |
| | model=self.model, |
| | tokenizer=self.tokenizer, |
| | device=-1, |
| | return_all_scores=False, |
| | function_to_apply="sigmoid" |
| | if getattr(self.model.config, "problem_type", None) |
| | == "multi_label_classification" |
| | else "softmax", |
| | ) |
| |
|
| | def __call__(self, data: Dict[str, Any]) -> Union[Dict[str, Any], List[Dict[str, Any]]]: |
| | """Run inference on the incoming request. |
| | |
| | Expected input format from the Inference Endpoint runtime: |
| | { |
| | "inputs": "some text" | ["text 1", "text 2", ...], |
| | "parameters": { ... } # optional pipeline kwargs (e.g., top_k) |
| | } |
| | """ |
| | |
| | raw_inputs = data.get("inputs", data) |
| | if isinstance(raw_inputs, str): |
| | raw_inputs = [raw_inputs] |
| |
|
| | |
| | parameters = data.get("parameters", {}) |
| |
|
| | |
| | outputs = self.pipeline(raw_inputs, **parameters) |
| |
|
| | |
| | if len(outputs) == 1: |
| | return outputs[0] |
| | return outputs |