Skip to content

Commit e24d8f4

Browse files
committed
feat!: add DiffLineType and HunkHeader
This commit modifies the public API of `ConsumeHunk::consume_hunk` to use the new types `DiffLineType` and `HunkHeader`. It also shifts responsibility for adding newlines to the API's consumer.
1 parent c149116 commit e24d8f4

File tree

2 files changed

+208
-88
lines changed

2 files changed

+208
-88
lines changed

gix-diff/src/blob/unified_diff.rs

Lines changed: 142 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,28 @@ impl ContextSize {
2626
}
2727
}
2828

29+
/// Represents the type of a line in a unified diff.
30+
#[doc(alias = "git2")]
31+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32+
pub enum DiffLineType {
33+
/// A line that exists in both the old and the new version is called a context line.
34+
Context,
35+
/// A line that was added in the new version.
36+
Add,
37+
/// A line that was removed from the old version.
38+
Remove,
39+
}
40+
41+
impl DiffLineType {
42+
const fn to_prefix(self) -> char {
43+
match self {
44+
DiffLineType::Context => ' ',
45+
DiffLineType::Add => '+',
46+
DiffLineType::Remove => '-',
47+
}
48+
}
49+
}
50+
2951
/// Specify where to put a newline.
3052
#[derive(Debug, Copy, Clone)]
3153
pub enum NewlineSeparator<'a> {
@@ -39,31 +61,45 @@ pub enum NewlineSeparator<'a> {
3961
AfterHeaderAndWhenNeeded(&'a str),
4062
}
4163

64+
/// Holds information about a unified diff hunk, specifically with respect to line numbers.
65+
pub struct HunkHeader {
66+
/// The 1-based start position in the 'before' lines.
67+
pub before_hunk_start: u32,
68+
/// The size of the 'before' hunk in lines.
69+
pub before_hunk_len: u32,
70+
/// The 1-based start position in the 'after' lines.
71+
pub after_hunk_start: u32,
72+
/// The size of the 'after' hunk in lines.
73+
pub after_hunk_len: u32,
74+
}
75+
76+
impl std::fmt::Display for HunkHeader {
77+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78+
write!(
79+
f,
80+
"@@ -{},{} +{},{} @@",
81+
self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len
82+
)
83+
}
84+
}
85+
4286
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
4387
pub trait ConsumeHunk {
4488
/// The item this instance produces after consuming all hunks.
4589
type Out;
4690

47-
/// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
48-
/// Note that all newlines are added.
91+
/// Consume a single hunk. Note that it is the implementation's responsibility to add newlines
92+
/// where requested by `newline`.
4993
///
5094
/// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
5195
/// After this method returned its first error, it will not be called anymore.
52-
///
53-
/// The following is hunk-related information and the same that is used in the `header`.
54-
/// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
55-
/// * `before_hunk_len` the amount of lines of this hunk in the old file.
56-
/// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
57-
/// * `after_hunk_len` the amount of lines of this hunk in the new file.
5896
fn consume_hunk(
5997
&mut self,
60-
before_hunk_start: u32,
61-
before_hunk_len: u32,
62-
after_hunk_start: u32,
63-
after_hunk_len: u32,
64-
header: &str,
65-
hunk: &[u8],
98+
header: HunkHeader,
99+
lines: &[(DiffLineType, &[u8])],
100+
newline: NewlineSeparator<'_>,
66101
) -> std::io::Result<()>;
102+
67103
/// Called after the last hunk is consumed to produce an output.
68104
fn finish(self) -> Self::Out;
69105
}
@@ -75,14 +111,10 @@ pub(super) mod _impl {
75111
use imara_diff::{intern, Sink};
76112
use intern::{InternedInput, Interner, Token};
77113

78-
use super::{ConsumeHunk, ContextSize, NewlineSeparator};
79-
80-
const CONTEXT: char = ' ';
81-
const ADDITION: char = '+';
82-
const REMOVAL: char = '-';
114+
use super::{ConsumeHunk, ContextSize, DiffLineType, HunkHeader, NewlineSeparator};
83115

84-
/// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
85-
/// and passes it in full to a consumer.
116+
/// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the
117+
/// format typically output by `git` or `gnu-diff` if the `-u` option is used.
86118
pub struct UnifiedDiff<'a, T, D>
87119
where
88120
T: Hash + Eq + AsRef<[u8]>,
@@ -108,8 +140,8 @@ pub(super) mod _impl {
108140
ctx_size: u32,
109141
newline: NewlineSeparator<'a>,
110142

111-
buffer: Vec<u8>,
112-
header_buf: String,
143+
buffer: Vec<(DiffLineType, Vec<u8>)>,
144+
113145
delegate: D,
114146

115147
err: Option<std::io::Error>,
@@ -120,12 +152,13 @@ pub(super) mod _impl {
120152
T: Hash + Eq + AsRef<[u8]>,
121153
D: ConsumeHunk,
122154
{
123-
/// Create a new instance to create unified diff using the lines in `input`,
155+
/// Create a new instance to create a unified diff using the lines in `input`,
124156
/// which also must be used when running the diff algorithm.
125157
/// `context_size` is the amount of lines around each hunk which will be passed
126-
///to `consume_hunk`.
158+
/// to `consume_hunk`.
127159
///
128-
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
160+
/// `consume_hunk` is called for each hunk with all the information required to create a
161+
/// unified diff.
129162
pub fn new(
130163
input: &'a InternedInput<T>,
131164
consume_hunk: D,
@@ -147,28 +180,16 @@ pub(super) mod _impl {
147180
newline: newline_separator,
148181

149182
buffer: Vec::with_capacity(8),
150-
header_buf: String::new(),
151183
delegate: consume_hunk,
152184

153185
err: None,
154186
}
155187
}
156188

157-
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
189+
fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineType) {
158190
for &token in tokens {
159-
self.buffer.push_char(prefix);
160-
let line = &self.interner[token];
161-
self.buffer.push_str(line);
162-
match self.newline {
163-
NewlineSeparator::AfterHeaderAndLine(nl) => {
164-
self.buffer.push_str(nl);
165-
}
166-
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
167-
if !line.as_ref().ends_with_str(nl) {
168-
self.buffer.push_str(nl);
169-
}
170-
}
171-
}
191+
let content = self.interner[token].as_ref().to_vec();
192+
self.buffer.push((line_type, content));
172193
}
173194
}
174195

@@ -183,38 +204,35 @@ pub(super) mod _impl {
183204

184205
let hunk_start = self.before_hunk_start + 1;
185206
let hunk_end = self.after_hunk_start + 1;
186-
self.header_buf.clear();
187-
std::fmt::Write::write_fmt(
188-
&mut self.header_buf,
189-
format_args!(
190-
"@@ -{},{} +{},{} @@{nl}",
191-
hunk_start,
192-
self.before_hunk_len,
193-
hunk_end,
194-
self.after_hunk_len,
195-
nl = match self.newline {
196-
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
197-
nl
198-
}
199-
}
200-
),
201-
)
202-
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
203-
self.delegate.consume_hunk(
204-
hunk_start,
205-
self.before_hunk_len,
206-
hunk_end,
207-
self.after_hunk_len,
208-
&self.header_buf,
209-
&self.buffer,
210-
)?;
207+
208+
let header = HunkHeader {
209+
before_hunk_start: hunk_start,
210+
before_hunk_len: self.before_hunk_len,
211+
after_hunk_start: hunk_end,
212+
after_hunk_len: self.after_hunk_len,
213+
};
214+
215+
// TODO:
216+
// If I remember correctly, we wanted this buffer to be reused between calls, but I ran
217+
// into lifetime issues when I tried to make it a field on `UnifiedDiff`.
218+
let buffer2: Vec<(DiffLineType, &[u8])> = self
219+
.buffer
220+
.iter()
221+
.map(|(line_type, content)| (*line_type, content.as_slice()))
222+
.collect();
223+
224+
self.delegate.consume_hunk(header, &buffer2, self.newline)?;
211225

212226
self.reset_hunks();
213227
Ok(())
214228
}
215229

216230
fn print_context_and_update_pos(&mut self, print: Range<u32>, move_to: u32) {
217-
self.print_tokens(&self.before[print.start as usize..print.end as usize], CONTEXT);
231+
self.print_tokens(
232+
&self.before[print.start as usize..print.end as usize],
233+
DiffLineType::Context,
234+
);
235+
218236
let len = print.end - print.start;
219237
self.ctx_pos = Some(move_to);
220238
self.before_hunk_len += len;
@@ -270,8 +288,11 @@ pub(super) mod _impl {
270288
self.before_hunk_len += before.end - before.start;
271289
self.after_hunk_len += after.end - after.start;
272290

273-
self.print_tokens(&self.before[before.start as usize..before.end as usize], REMOVAL);
274-
self.print_tokens(&self.after[after.start as usize..after.end as usize], ADDITION);
291+
self.print_tokens(
292+
&self.before[before.start as usize..before.end as usize],
293+
DiffLineType::Remove,
294+
);
295+
self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineType::Add);
275296
}
276297

277298
fn finish(mut self) -> Self::Out {
@@ -289,12 +310,32 @@ pub(super) mod _impl {
289310
impl ConsumeHunk for String {
290311
type Out = Self;
291312

292-
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
293-
self.push_str(header);
294-
self.push_str(
295-
hunk.to_str()
296-
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?,
297-
);
313+
fn consume_hunk(
314+
&mut self,
315+
header: HunkHeader,
316+
lines: &[(DiffLineType, &[u8])],
317+
newline: NewlineSeparator<'_>,
318+
) -> std::io::Result<()> {
319+
self.push_str(&header.to_string());
320+
self.push_str(match newline {
321+
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl,
322+
});
323+
324+
for &(line_type, content) in lines {
325+
self.push(line_type.to_prefix());
326+
self.push_str(std::str::from_utf8(content).map_err(|e| std::io::Error::new(ErrorKind::Other, e))?);
327+
328+
match newline {
329+
NewlineSeparator::AfterHeaderAndLine(nl) => {
330+
self.push_str(nl);
331+
}
332+
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
333+
if !content.ends_with_str(nl) {
334+
self.push_str(nl);
335+
}
336+
}
337+
}
338+
}
298339
Ok(())
299340
}
300341

@@ -307,9 +348,32 @@ pub(super) mod _impl {
307348
impl ConsumeHunk for Vec<u8> {
308349
type Out = Self;
309350

310-
fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
311-
self.push_str(header);
312-
self.push_str(hunk);
351+
fn consume_hunk(
352+
&mut self,
353+
header: HunkHeader,
354+
lines: &[(DiffLineType, &[u8])],
355+
newline: NewlineSeparator<'_>,
356+
) -> std::io::Result<()> {
357+
self.push_str(header.to_string());
358+
self.push_str(match newline {
359+
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl,
360+
});
361+
362+
for &(line_type, content) in lines {
363+
self.push(line_type.to_prefix() as u8);
364+
self.extend_from_slice(content);
365+
366+
match newline {
367+
NewlineSeparator::AfterHeaderAndLine(nl) => {
368+
self.push_str(nl);
369+
}
370+
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
371+
if !content.ends_with_str(nl) {
372+
self.push_str(nl);
373+
}
374+
}
375+
}
376+
}
313377
Ok(())
314378
}
315379

0 commit comments

Comments
 (0)