Spaces:
Running
Running
| import evaluate | |
| import datasets | |
| from text2sql_eval.metrics.bleu import bleu_score | |
| _DESCRIPTION = "SQL token BLEU (0–1). Returns mean score in [0, 1]." | |
| def _to_str(x): | |
| if isinstance(x, (list, tuple)): | |
| return x[0] if x else "" | |
| return "" if x is None else str(x) | |
| class SQLBLEU(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation="Uses sacrebleu via text2sql-eval implementation.", | |
| features=datasets.Features( | |
| { | |
| "predictions": datasets.Value("string"), | |
| "references": datasets.Value("string"), | |
| } | |
| ), | |
| ) | |
| def _compute(self, predictions, references): | |
| scores = [] | |
| for p, r in zip(predictions, references): | |
| scores.append(float(bleu_score(_to_str(p), _to_str(r)))) | |
| mean = sum(scores) / len(scores) if scores else 0.0 | |
| return {"sql_bleu": mean} | |