@@ -26,6 +26,28 @@ impl ContextSize {
26
26
}
27
27
}
28
28
29
+ /// Represents the type of a line in a unified diff.
30
+ #[ doc( alias = "git2" ) ]
31
+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
32
+ pub enum DiffLineType {
33
+ /// A line that exists in both the old and the new version is called a context line.
34
+ Context ,
35
+ /// A line that was added in the new version.
36
+ Add ,
37
+ /// A line that was removed from the old version.
38
+ Remove ,
39
+ }
40
+
41
+ impl DiffLineType {
42
+ const fn to_prefix ( self ) -> char {
43
+ match self {
44
+ DiffLineType :: Context => ' ' ,
45
+ DiffLineType :: Add => '+' ,
46
+ DiffLineType :: Remove => '-' ,
47
+ }
48
+ }
49
+ }
50
+
29
51
/// Specify where to put a newline.
30
52
#[ derive( Debug , Copy , Clone ) ]
31
53
pub enum NewlineSeparator < ' a > {
@@ -39,31 +61,45 @@ pub enum NewlineSeparator<'a> {
39
61
AfterHeaderAndWhenNeeded ( & ' a str ) ,
40
62
}
41
63
64
+ /// Holds information about a unified diff hunk, specifically with respect to line numbers.
65
+ pub struct HunkHeader {
66
+ /// The 1-based start position in the 'before' lines.
67
+ pub before_hunk_start : u32 ,
68
+ /// The size of the 'before' hunk in lines.
69
+ pub before_hunk_len : u32 ,
70
+ /// The 1-based start position in the 'after' lines.
71
+ pub after_hunk_start : u32 ,
72
+ /// The size of the 'after' hunk in lines.
73
+ pub after_hunk_len : u32 ,
74
+ }
75
+
76
+ impl std:: fmt:: Display for HunkHeader {
77
+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
78
+ write ! (
79
+ f,
80
+ "@@ -{},{} +{},{} @@" ,
81
+ self . before_hunk_start, self . before_hunk_len, self . after_hunk_start, self . after_hunk_len
82
+ )
83
+ }
84
+ }
85
+
42
86
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
43
87
pub trait ConsumeHunk {
44
88
/// The item this instance produces after consuming all hunks.
45
89
type Out ;
46
90
47
- /// Consume a single ` hunk` in unified diff format, that would be prefixed with `header`.
48
- /// Note that all newlines are added .
91
+ /// Consume a single hunk. Note that it is the implementation's responsibility to add newlines
92
+ /// where requested by `newline` .
49
93
///
50
94
/// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
51
95
/// After this method returned its first error, it will not be called anymore.
52
- ///
53
- /// The following is hunk-related information and the same that is used in the `header`.
54
- /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
55
- /// * `before_hunk_len` the amount of lines of this hunk in the old file.
56
- /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
57
- /// * `after_hunk_len` the amount of lines of this hunk in the new file.
58
96
fn consume_hunk (
59
97
& mut self ,
60
- before_hunk_start : u32 ,
61
- before_hunk_len : u32 ,
62
- after_hunk_start : u32 ,
63
- after_hunk_len : u32 ,
64
- header : & str ,
65
- hunk : & [ u8 ] ,
98
+ header : HunkHeader ,
99
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
100
+ newline : NewlineSeparator < ' _ > ,
66
101
) -> std:: io:: Result < ( ) > ;
102
+
67
103
/// Called after the last hunk is consumed to produce an output.
68
104
fn finish ( self ) -> Self :: Out ;
69
105
}
@@ -75,14 +111,10 @@ pub(super) mod _impl {
75
111
use imara_diff:: { intern, Sink } ;
76
112
use intern:: { InternedInput , Interner , Token } ;
77
113
78
- use super :: { ConsumeHunk , ContextSize , NewlineSeparator } ;
79
-
80
- const CONTEXT : char = ' ' ;
81
- const ADDITION : char = '+' ;
82
- const REMOVAL : char = '-' ;
114
+ use super :: { ConsumeHunk , ContextSize , DiffLineType , HunkHeader , NewlineSeparator } ;
83
115
84
- /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu- diff` if the `-u` option is used,
85
- /// and passes it in full to a consumer .
116
+ /// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the
117
+ /// format typically output by `git` or `gnu-diff` if the `-u` option is used .
86
118
pub struct UnifiedDiff < ' a , T , D >
87
119
where
88
120
T : Hash + Eq + AsRef < [ u8 ] > ,
@@ -108,8 +140,8 @@ pub(super) mod _impl {
108
140
ctx_size : u32 ,
109
141
newline : NewlineSeparator < ' a > ,
110
142
111
- buffer : Vec < u8 > ,
112
- header_buf : String ,
143
+ buffer : Vec < ( DiffLineType , Vec < u8 > ) > ,
144
+
113
145
delegate : D ,
114
146
115
147
err : Option < std:: io:: Error > ,
@@ -120,12 +152,13 @@ pub(super) mod _impl {
120
152
T : Hash + Eq + AsRef < [ u8 ] > ,
121
153
D : ConsumeHunk ,
122
154
{
123
- /// Create a new instance to create unified diff using the lines in `input`,
155
+ /// Create a new instance to create a unified diff using the lines in `input`,
124
156
/// which also must be used when running the diff algorithm.
125
157
/// `context_size` is the amount of lines around each hunk which will be passed
126
- ///to `consume_hunk`.
158
+ /// to `consume_hunk`.
127
159
///
128
- /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
160
+ /// `consume_hunk` is called for each hunk with all the information required to create a
161
+ /// unified diff.
129
162
pub fn new (
130
163
input : & ' a InternedInput < T > ,
131
164
consume_hunk : D ,
@@ -147,28 +180,16 @@ pub(super) mod _impl {
147
180
newline : newline_separator,
148
181
149
182
buffer : Vec :: with_capacity ( 8 ) ,
150
- header_buf : String :: new ( ) ,
151
183
delegate : consume_hunk,
152
184
153
185
err : None ,
154
186
}
155
187
}
156
188
157
- fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
189
+ fn print_tokens ( & mut self , tokens : & [ Token ] , line_type : DiffLineType ) {
158
190
for & token in tokens {
159
- self . buffer . push_char ( prefix) ;
160
- let line = & self . interner [ token] ;
161
- self . buffer . push_str ( line) ;
162
- match self . newline {
163
- NewlineSeparator :: AfterHeaderAndLine ( nl) => {
164
- self . buffer . push_str ( nl) ;
165
- }
166
- NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
167
- if !line. as_ref ( ) . ends_with_str ( nl) {
168
- self . buffer . push_str ( nl) ;
169
- }
170
- }
171
- }
191
+ let content = self . interner [ token] . as_ref ( ) . to_vec ( ) ;
192
+ self . buffer . push ( ( line_type, content) ) ;
172
193
}
173
194
}
174
195
@@ -183,38 +204,35 @@ pub(super) mod _impl {
183
204
184
205
let hunk_start = self . before_hunk_start + 1 ;
185
206
let hunk_end = self . after_hunk_start + 1 ;
186
- self . header_buf . clear ( ) ;
187
- std:: fmt:: Write :: write_fmt (
188
- & mut self . header_buf ,
189
- format_args ! (
190
- "@@ -{},{} +{},{} @@{nl}" ,
191
- hunk_start,
192
- self . before_hunk_len,
193
- hunk_end,
194
- self . after_hunk_len,
195
- nl = match self . newline {
196
- NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
197
- nl
198
- }
199
- }
200
- ) ,
201
- )
202
- . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
203
- self . delegate . consume_hunk (
204
- hunk_start,
205
- self . before_hunk_len ,
206
- hunk_end,
207
- self . after_hunk_len ,
208
- & self . header_buf ,
209
- & self . buffer ,
210
- ) ?;
207
+
208
+ let header = HunkHeader {
209
+ before_hunk_start : hunk_start,
210
+ before_hunk_len : self . before_hunk_len ,
211
+ after_hunk_start : hunk_end,
212
+ after_hunk_len : self . after_hunk_len ,
213
+ } ;
214
+
215
+ // TODO:
216
+ // If I remember correctly, we wanted this buffer to be reused between calls, but I ran
217
+ // into lifetime issues when I tried to make it a field on `UnifiedDiff`.
218
+ let buffer2: Vec < ( DiffLineType , & [ u8 ] ) > = self
219
+ . buffer
220
+ . iter ( )
221
+ . map ( |( line_type, content) | ( * line_type, content. as_slice ( ) ) )
222
+ . collect ( ) ;
223
+
224
+ self . delegate . consume_hunk ( header, & buffer2, self . newline ) ?;
211
225
212
226
self . reset_hunks ( ) ;
213
227
Ok ( ( ) )
214
228
}
215
229
216
230
fn print_context_and_update_pos ( & mut self , print : Range < u32 > , move_to : u32 ) {
217
- self . print_tokens ( & self . before [ print. start as usize ..print. end as usize ] , CONTEXT ) ;
231
+ self . print_tokens (
232
+ & self . before [ print. start as usize ..print. end as usize ] ,
233
+ DiffLineType :: Context ,
234
+ ) ;
235
+
218
236
let len = print. end - print. start ;
219
237
self . ctx_pos = Some ( move_to) ;
220
238
self . before_hunk_len += len;
@@ -270,8 +288,11 @@ pub(super) mod _impl {
270
288
self . before_hunk_len += before. end - before. start ;
271
289
self . after_hunk_len += after. end - after. start ;
272
290
273
- self . print_tokens ( & self . before [ before. start as usize ..before. end as usize ] , REMOVAL ) ;
274
- self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , ADDITION ) ;
291
+ self . print_tokens (
292
+ & self . before [ before. start as usize ..before. end as usize ] ,
293
+ DiffLineType :: Remove ,
294
+ ) ;
295
+ self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , DiffLineType :: Add ) ;
275
296
}
276
297
277
298
fn finish ( mut self ) -> Self :: Out {
@@ -289,12 +310,32 @@ pub(super) mod _impl {
289
310
impl ConsumeHunk for String {
290
311
type Out = Self ;
291
312
292
- fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
293
- self . push_str ( header) ;
294
- self . push_str (
295
- hunk. to_str ( )
296
- . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
297
- ) ;
313
+ fn consume_hunk (
314
+ & mut self ,
315
+ header : HunkHeader ,
316
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
317
+ newline : NewlineSeparator < ' _ > ,
318
+ ) -> std:: io:: Result < ( ) > {
319
+ self . push_str ( & header. to_string ( ) ) ;
320
+ self . push_str ( match newline {
321
+ NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => nl,
322
+ } ) ;
323
+
324
+ for & ( line_type, content) in lines {
325
+ self . push ( line_type. to_prefix ( ) ) ;
326
+ self . push_str ( std:: str:: from_utf8 ( content) . map_err ( |e| std:: io:: Error :: new ( ErrorKind :: Other , e) ) ?) ;
327
+
328
+ match newline {
329
+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
330
+ self . push_str ( nl) ;
331
+ }
332
+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
333
+ if !content. ends_with_str ( nl) {
334
+ self . push_str ( nl) ;
335
+ }
336
+ }
337
+ }
338
+ }
298
339
Ok ( ( ) )
299
340
}
300
341
@@ -307,9 +348,32 @@ pub(super) mod _impl {
307
348
impl ConsumeHunk for Vec < u8 > {
308
349
type Out = Self ;
309
350
310
- fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
311
- self . push_str ( header) ;
312
- self . push_str ( hunk) ;
351
+ fn consume_hunk (
352
+ & mut self ,
353
+ header : HunkHeader ,
354
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
355
+ newline : NewlineSeparator < ' _ > ,
356
+ ) -> std:: io:: Result < ( ) > {
357
+ self . push_str ( header. to_string ( ) ) ;
358
+ self . push_str ( match newline {
359
+ NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => nl,
360
+ } ) ;
361
+
362
+ for & ( line_type, content) in lines {
363
+ self . push ( line_type. to_prefix ( ) as u8 ) ;
364
+ self . extend_from_slice ( content) ;
365
+
366
+ match newline {
367
+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
368
+ self . push_str ( nl) ;
369
+ }
370
+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
371
+ if !content. ends_with_str ( nl) {
372
+ self . push_str ( nl) ;
373
+ }
374
+ }
375
+ }
376
+ }
313
377
Ok ( ( ) )
314
378
}
315
379
0 commit comments