-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSelector.html
executable file
·410 lines (367 loc) · 29.9 KB
/
Selector.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
<!DOCTYPE html>
<html class="writer-html5" lang="en">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Selector Module — easyjailbreak 0.1.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js?v=2389946f"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=4825356b"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="prev" title="Seed Module" href="Seed.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
easyjailbreak
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="attacker.html">Attacker Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="constraint.html">Constraint Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="datasets.html">Datasets Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="metrics.html">Metric Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="evaluator.html">Evaluator Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="Seed.html">Seed Module</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Selector Module</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#selector">selector</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#selectpolicy-class">SelectPolicy class</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#ucbselectpolicy">UCBSelectPolicy</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#ucbselectpolicy-class">UCBSelectPolicy class</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#selectbasedonscores">SelectBasedOnScores</a></li>
<li class="toctree-l2"><a class="reference internal" href="#roundrobinselectpolicy">RoundRobinSelectPolicy</a></li>
<li class="toctree-l2"><a class="reference internal" href="#randomselector">RandomSelector</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#randomselectpolicy-class">RandomSelectPolicy class</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#mctsexploreselectpolicy">MCTSExploreSelectPolicy</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#mctsexploreselectpolicy-class">MCTSExploreSelectPolicy class</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#exp3selectpolicy">EXP3SelectPolicy</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#exp3selectpolicy-class">EXP3SelectPolicy class</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#referencelossselector">ReferenceLossSelector</a></li>
</ul>
</li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">easyjailbreak</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Selector Module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/Selector.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="selector-module">
<h1>Selector Module<a class="headerlink" href="#selector-module" title="Permalink to this heading"></a></h1>
<p>This section of documentation introduces the various submodules in easyjailbreak.selector. The Selector module is used to select the most suitable sample from the dataset for mutation
In some circumstances, a seed can exponentially generate innumerable jailbreak instances. Therefore, it is important to select jailbreak instances that have great potential for later processes, especially when compute resources are limited. EasyJailbreak offers several kinds of selectors for users to pick.</p>
<section id="selector">
<h2>selector<a class="headerlink" href="#selector" title="Permalink to this heading"></a></h2>
<section id="selectpolicy-class">
<h3>SelectPolicy class<a class="headerlink" href="#selectpolicy-class" title="Permalink to this heading"></a></h3>
<p>This file contains the implementation of policies for selecting instances from datasets,
specifically tailored for use in easy jailbreak scenarios. It defines abstract base classes
and concrete implementations for selecting instances based on various criteria.</p>
</section>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.selector.</span></span><span class="sig-name descname"><span class="pre">SelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Datasets</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Abstract base class representing a policy for selecting instances from a JailbreakDataset.
It provides a framework for implementing various selection strategies.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">initial</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span></dt>
<dd><p>Initializes or resets any internal state of the selection policy, if necessary.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<em class="property"><span class="pre">abstract</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Instance</span></span></span></dt>
<dd><p>Abstract method that must be implemented by subclasses to define the selection strategy.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~Instance<span class="colon">:</span></dt>
<dd class="field-odd"><p>The selected instance from the dataset.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">jailbreak_dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Updates the internal state of the selection policy, if necessary.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>jailbreak_dataset</strong> (<em>~JailbreakDataset</em>) – The dataset to update the policy with.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="ucbselectpolicy">
<h2>UCBSelectPolicy<a class="headerlink" href="#ucbselectpolicy" title="Permalink to this heading"></a></h2>
<section id="ucbselectpolicy-class">
<h3>UCBSelectPolicy class<a class="headerlink" href="#ucbselectpolicy-class" title="Permalink to this heading"></a></h3>
</section>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.UCBSelectPolicy.</span></span><span class="sig-name descname"><span class="pre">UCBSelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">explore_coeff</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>A selection policy based on the Upper Confidence Bound (UCB) algorithm. This policy is designed
to balance exploration and exploitation when selecting instances from a JailbreakDataset.
It uses the UCB formula to select instances that either have high rewards or have not been explored much.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">JailbreakDataset</span></span></span></dt>
<dd><p>Selects an instance from the dataset based on the UCB algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-odd"><p>The selected JailbreakDataset from the dataset.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Updates the rewards for the last selected instance based on the success of the prompts.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>Dataset</strong> (<em>~JailbreakDataset</em>) – The dataset containing prompts used for updating rewards.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="selectbasedonscores">
<h2>SelectBasedOnScores<a class="headerlink" href="#selectbasedonscores" title="Permalink to this heading"></a></h2>
<p>‘SelectBasedOnScores’, select those instances whose scores are high(scores are on the extent of jailbreaking),
detail information can be found in the following paper.</p>
<p>Paper title: Tree of Attacks: Jailbreaking Black-Box LLMs Automatically
arXiv link: <a class="reference external" href="https://arxiv.org/abs/2312.02119">https://arxiv.org/abs/2312.02119</a>
Source repository: <a class="reference external" href="https://github.com/RICommunity/TAP">https://github.com/RICommunity/TAP</a></p>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.SelectBasedOnScores.</span></span><span class="sig-name descname"><span class="pre">SelectBasedOnScores</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tree_width</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>This class implements a selection policy based on the scores of instances in a JailbreakDataset.
It selects a subset of instances with high scores, relevant for jailbreaking tasks.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Instance</span><span class="p"><span class="pre">]</span></span></span></span></dt>
<dd><p>Selects a subset of instances from the dataset based on their scores.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>dataset</strong> (<em>~JailbreakDataset</em>) – The dataset from which instances are to be selected.</p>
</dd>
<dt class="field-even">Return List[Instance]<span class="colon">:</span></dt>
<dd class="field-even"><p>A list of selected instances with high evaluation scores.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="roundrobinselectpolicy">
<h2>RoundRobinSelectPolicy<a class="headerlink" href="#roundrobinselectpolicy" title="Permalink to this heading"></a></h2>
<p>RoundRobinSelectPolicy class</p>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.RoundRobinSelectPolicy.</span></span><span class="sig-name descname"><span class="pre">RoundRobinSelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>A selection policy that selects instances from a JailbreakDataset in a round-robin manner.
This policy iterates over the dataset, selecting each instance in turn, and then repeats the process.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">JailbreakDataset</span></span></span></dt>
<dd><p>Selects the next instance in the dataset based on a round-robin approach and increments its visited count.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-odd"><p>The selected instance from the dataset.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prompt_nodes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Updates the selection index based on the length of the dataset.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>prompt_nodes</strong> (<em>~JailbreakDataset</em>) – Not used in this implementation.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="randomselector">
<h2>RandomSelector<a class="headerlink" href="#randomselector" title="Permalink to this heading"></a></h2>
<section id="randomselectpolicy-class">
<h3>RandomSelectPolicy class<a class="headerlink" href="#randomselectpolicy-class" title="Permalink to this heading"></a></h3>
</section>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.RandomSelector.</span></span><span class="sig-name descname"><span class="pre">RandomSelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Datasets</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>A selection policy that randomly selects an instance from a JailbreakDataset.
It extends the SelectPolicy abstract base class, providing a concrete implementation
for the random selection strategy.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">JailbreakDataset</span></span></span></dt>
<dd><p>Selects an instance randomly from the dataset and increments its visited count.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-odd"><p>The randomly selected instance from the dataset.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="mctsexploreselectpolicy">
<h2>MCTSExploreSelectPolicy<a class="headerlink" href="#mctsexploreselectpolicy" title="Permalink to this heading"></a></h2>
<section id="mctsexploreselectpolicy-class">
<h3>MCTSExploreSelectPolicy class<a class="headerlink" href="#mctsexploreselectpolicy-class" title="Permalink to this heading"></a></h3>
</section>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.MCTSExploreSelectPolicy.</span></span><span class="sig-name descname"><span class="pre">MCTSExploreSelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inital_prompt_pool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Questions</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ratio</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.2</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>This class implements a selection policy based on the Monte Carlo Tree Search (MCTS) algorithm.
It is designed to explore and exploit a dataset of instances for effective jailbreaking of LLMs.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Instance</span></span></span></dt>
<dd><p>Selects an instance from the dataset using MCTS algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-odd"><p>The selected instance from the dataset.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prompt_nodes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Updates the weights of nodes in the MCTS tree based on their performance.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>prompt_nodes</strong> (<em>~JailbreakDataset</em>) – Dataset of prompt nodes to update.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="exp3selectpolicy">
<h2>EXP3SelectPolicy<a class="headerlink" href="#exp3selectpolicy" title="Permalink to this heading"></a></h2>
<section id="exp3selectpolicy-class">
<h3>EXP3SelectPolicy class<a class="headerlink" href="#exp3selectpolicy-class" title="Permalink to this heading"></a></h3>
</section>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.EXP3SelectPolicy.</span></span><span class="sig-name descname"><span class="pre">EXP3SelectPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">Dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">energy</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gamma</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.05</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">25</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>A selection policy based on the Exponential-weight algorithm for Exploration and Exploitation (EXP3).
This policy is designed for environments with adversarial contexts, balancing between exploring new instances
and exploiting known rewards in a JailbreakDataset.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">initial</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span></dt>
<dd><p>Initializes or resets the weights and probabilities for each instance in the dataset.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Instance</span></span></span></dt>
<dd><p>Selects an instance from the dataset based on the EXP3 algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-odd"><p>The selected instance from the dataset.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prompt_nodes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">JailbreakDataset</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>Updates the weights of the last chosen instance based on the success of the prompts.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>prompt_nodes</strong> (<em>~JailbreakDataset</em>) – The dataset containing prompts used for updating weights.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
<section id="referencelossselector">
<h2>ReferenceLossSelector<a class="headerlink" href="#referencelossselector" title="Permalink to this heading"></a></h2>
<dl class="py class">
<dt class="sig sig-object py">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">easyjailbreak.selector.ReferenceLossSelector.</span></span><span class="sig-name descname"><span class="pre">ReferenceLossSelector</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">WhiteBoxModelBase</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">is_universal</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span></dt>
<dd><p>This class implements a selection policy based on the reference loss. It selects instances from a set of parents
based on the minimum loss calculated on their reference target, discarding others.</p>
<dl class="py method">
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">JailbreakDataset</span></span></span></dt>
<dd><p>Selects instances from the dataset based on the calculated reference loss.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>dataset</strong> (<em>~JailbreakDataset</em>) – The dataset from which instances are to be selected.</p>
</dd>
<dt class="field-even">Return ~JailbreakDataset<span class="colon">:</span></dt>
<dd class="field-even"><p>A new dataset containing selected instances with minimum reference loss.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="Seed.html" class="btn btn-neutral float-left" title="Seed Module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>© Copyright 2024, zwk.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>