textmodel_gepa_classify

textmodel_gepa_classify(
    x=None,
    y=None,
    module=None,
    which='predict',
    reflection_lm=None,
    metric='accuracy',
    auto='light',
    train_size=0.75,
    test_size=0.1,
    seed=4321,
    track_stats=True,
    num_threads=1,
    trace=False,
    **gepa_kwargs,
)

Fit a GEPA-optimized text classification model using DSPy.

Parameters

Name Type Description Default
x Union[pd.DataFrame, Iterable[str]] If x is a pd.DataFrame, it should be a TIF-compatible DataFrame (with doc_id, text, target columns) If x is a list of str, you need to supply y None
y Optional[Iterable[str]] or None A list of str containing the labels to predict, required when x is raw text. None
module dspy.Signature A dspy signature to tune, with dspy.Predict or dspy.ChainOfThought where the signature has inputfield ‘text’ and outputfield ‘target’ None
which (predict, chainofthought) Either Predict or ChainOfThought 'predict'
reflection_lm dspy.LM or None Language model used by GEPA for reflection. Defaults to the configured dspy lm. None
metric accuracy Either a str with values ‘accuracy’ or a callable which returns how good the prediction was. Defaults to accuracy. 'accuracy'
auto (light, medium, heavy, None) GEPA auto budget. Default 'light'. 'light'
train_size float Size of the training set. Defaults to the 0.75, indicating 75% of (the size of the data x - test_size ) 0.75
test_size int Number of holdout test samples. 0.1
seed int Seed for shuffing the input data 4321
track_stats bool Track stats on evaluation dataset True
num_threads int Number of threads to use in GEPA. Defaults to 1. Only change this if the default lm you are using is a remote lm served through an API. 1
trace bool Boolean allowing to disable dspy logging. Defaults to True. False
**gepa_kwargs Forwarded to dspy.GEPA. {}

Returns

Name Type Description
TextModelGEPA A GEPA prompt-finetuned classifier

Examples

>>> ######################################################################################
>>> ## Define the models to use
>>> ##
>>> import localllm
>>> from localllm import localllm_download_model, textmodel_gepa_classify    
>>> ######################################################################################
>>> ## GEPA requires a reflection model and a model which for which the module is tuned. 
>>> ## Make sure the reflection models is defined first as the module 
>>> ## will use the default lm which is set lastly to tune the instructions
>>> ## Example below: reflection model running in LMStudio    
>>> opts = dict(api_base = "http://localhost:1234/v1", api_key = "none", model_type = "chat", provider = "openai", cache = True, response_format = dict(type = "text"))
>>> reflection_lm = localllm.connect("openai/gemma-4-E4b-it-GGUF", opts)        
>>> ##
>>> ## Example connecting to a local LLM running on LMStudio or an API running e.g. on your computer
>>> ##
>>> opts = dict(api_base = "http://localhost:1234/v1", api_key = "none", model_type = "chat", provider = "openai", cache = True, response_format = dict(type = "text"))
>>> lm = localllm.connect("openai/gemma-4-E2B-it-GGUF", opts)
>>> ##
>>> ## Example to connect to a local llm directly in Python
>>> ##
>>> lm   = localllm.connect("localllm/Qwen3-4B-Instruct-Q4_K_M")
>>> opts = dict(n_ctx = 512, n_gpu_layers = 0, n_threads = 1, flash_attn = False, swa_full = False, verbose = False)    
>>> lm   = localllm.connect("localllm/gemma-3-270m-it-Q8_0", opts)
...
>>> 
>>> ######################################################################################
>>> ## Get data, define target to predict
>>> ##
>>> import pandas as pd    
>>> from localllm.data import data_be_parliament
>>> from localllm.utilities.converters import tif
>>> be = data_be_parliament()
>>> be = pd.DataFrame.from_records(be)
>>> be = be[be["question_theme_main"].isin(["VERVOERBELEID", "OPENBARE VEILIGHEID"])]
>>> be = tif(be, docid_field = "doc_id", text_field = "question", target_field = "question_theme_main")
>>> list(be.columns)
['doc_id', 'text', 'target']
>>> dataset = be.sample(100)
>>> 
>>> ######################################################################################
>>> ## Auto-tune the prompt using GEPA
>>> ##
>>> from s3generics import summary, predict
>>> model = textmodel_gepa_classify(x = dataset["text"], y = dataset["target"], auto = None, max_metric_calls = 1, reflection_minibatch_size = 3, test_size = 0, trace = True)
...
>>> score = predict(model, ["We gaan met de trein op reis naar Blankenberge", "De politie is met man en macht op straat"])
>>> ## A more realistic example
>>> model = textmodel_gepa_classify(x = dataset["text"], y = dataset["target"], auto = "light", test_size = 10)
>>> score = predict(model, ["We gaan met de trein op reis naar Blankenberge", "De politie is met man en macht op straat"])
>>> score
['VERVOERBELEID', 'OPENBARE VEILIGHEID']
>>> summary(model)
Method       : Classification (DSPy GEPA): predict
GEPA auto    : None
Classes      : ['OPENBARE VEILIGHEID', 'VERVOERBELEID']    
...