textmodel_gepa_classify

textmodel_gepa_classify(
    x=None,
    y=None,
    module=None,
    which='predict',
    reflection_lm=None,
    metric='accuracy',
    auto='light',
    train_size=0.75,
    test_size=0.1,
    seed=4321,
    track_stats=True,
    num_threads=1,
    trace=False,
    **gepa_kwargs,
)

Fit a GEPA-optimized text classification model using DSPy.

Parameters

Name	Type	Description	Default
x	Union[pd.DataFrame, Iterable[str]]	If x is a pd.DataFrame, it should be a TIF-compatible DataFrame (with doc_id, text, target columns) If x is a list of str, you need to supply y	`None`
y	Optional[Iterable[str]] or None	A list of str containing the labels to predict, required when x is raw text.	`None`
module	dspy.Signature	A dspy signature to tune, with dspy.Predict or dspy.ChainOfThought where the signature has inputfield ‘text’ and outputfield ‘target’	`None`
which	(predict, chainofthought)	Either Predict or ChainOfThought	`'predict'`
reflection_lm	dspy.LM or None	Language model used by GEPA for reflection. Defaults to the configured dspy lm.	`None`
metric	accuracy	Either a str with values ‘accuracy’ or a callable which returns how good the prediction was. Defaults to accuracy.	`'accuracy'`
auto	(light, medium, heavy, None)	GEPA `auto` budget. Default `'light'`.	`'light'`
train_size	float	Size of the training set. Defaults to the 0.75, indicating 75% of (the size of the data x - test_size )	`0.75`
test_size	int	Number of holdout test samples.	`0.1`
seed	int	Seed for shuffing the input data	`4321`
track_stats	bool	Track stats on evaluation dataset	`True`
num_threads	int	Number of threads to use in GEPA. Defaults to 1. Only change this if the default lm you are using is a remote lm served through an API.	`1`
trace	bool	Boolean allowing to disable dspy logging. Defaults to True.	`False`
**gepa_kwargs		Forwarded to `dspy.GEPA`.	`{}`

Returns

Name	Type	Description
	TextModelGEPA	A GEPA prompt-finetuned classifier

Examples

>>> ######################################################################################
>>> ## Define the models to use
>>> ##
>>> import localllm
>>> from localllm import localllm_download_model, textmodel_gepa_classify    
>>> ######################################################################################
>>> ## GEPA requires a reflection model and a model which for which the module is tuned. 
>>> ## Make sure the reflection models is defined first as the module 
>>> ## will use the default lm which is set lastly to tune the instructions
>>> ## Example below: reflection model running in LMStudio    
>>> opts = dict(api_base = "http://localhost:1234/v1", api_key = "none", model_type = "chat", provider = "openai", cache = True, response_format = dict(type = "text"))
>>> reflection_lm = localllm.connect("openai/gemma-4-E4b-it-GGUF", opts)        
>>> ##
>>> ## Example connecting to a local LLM running on LMStudio or an API running e.g. on your computer
>>> ##
>>> opts = dict(api_base = "http://localhost:1234/v1", api_key = "none", model_type = "chat", provider = "openai", cache = True, response_format = dict(type = "text"))
>>> lm = localllm.connect("openai/gemma-4-E2B-it-GGUF", opts)
>>> ##
>>> ## Example to connect to a local llm directly in Python
>>> ##
>>> lm   = localllm.connect("localllm/Qwen3-4B-Instruct-Q4_K_M")
>>> opts = dict(n_ctx = 512, n_gpu_layers = 0, n_threads = 1, flash_attn = False, swa_full = False, verbose = False)    
>>> lm   = localllm.connect("localllm/gemma-3-270m-it-Q8_0", opts)
...
>>> 
>>> ######################################################################################
>>> ## Get data, define target to predict
>>> ##
>>> import pandas as pd    
>>> from localllm.data import data_be_parliament
>>> from localllm.utilities.converters import tif
>>> be = data_be_parliament()
>>> be = pd.DataFrame.from_records(be)
>>> be = be[be["question_theme_main"].isin(["VERVOERBELEID", "OPENBARE VEILIGHEID"])]
>>> be = tif(be, docid_field = "doc_id", text_field = "question", target_field = "question_theme_main")
>>> list(be.columns)
['doc_id', 'text', 'target']
>>> dataset = be.sample(100)
>>> 
>>> ######################################################################################
>>> ## Auto-tune the prompt using GEPA
>>> ##
>>> from s3generics import summary, predict
>>> model = textmodel_gepa_classify(x = dataset["text"], y = dataset["target"], auto = None, max_metric_calls = 1, reflection_minibatch_size = 3, test_size = 0, trace = True)
...
>>> score = predict(model, ["We gaan met de trein op reis naar Blankenberge", "De politie is met man en macht op straat"])
>>> ## A more realistic example
>>> model = textmodel_gepa_classify(x = dataset["text"], y = dataset["target"], auto = "light", test_size = 10)
>>> score = predict(model, ["We gaan met de trein op reis naar Blankenberge", "De politie is met man en macht op straat"])
>>> score
['VERVOERBELEID', 'OPENBARE VEILIGHEID']
>>> summary(model)
Method       : Classification (DSPy GEPA): predict
GEPA auto    : None
Classes      : ['OPENBARE VEILIGHEID', 'VERVOERBELEID']    
...