xdiff: implement empty line chunk heuristic

In order to produce the smallest possible diff and combine several diff
hunks together, we implement a heuristic from GNU Diff which moves diff
hunks forward as far as possible when we find common context above and
below a diff hunk. This sometimes produces less readable diffs when
writing C, Shell, or other programming languages, ie:

...
 /*
+ *
+ *
+ */
+
+/*
...

instead of the more readable equivalent of

...
+/*
+ *
+ *
+ */
+
 /*
...

Implement the following heuristic to (optionally) produce the desired
output.

  If there are diff chunks which can be shifted around, shift each hunk
  such that the last common empty line is below the chunk with the rest
  of the context above.

This heuristic appears to resolve the above example and several other
common issues without producing significantly weird results. However, as
with any heuristic it is not really known whether this will always be
more optimal. Thus, it can be disabled via diff.compactionHeuristic.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 files changed
tree: f5c443c8ae8a4def49ffd7f147ccc82ab1420805
  1. block-sha1/
  2. builtin/
  3. compat/
  4. contrib/
  5. Documentation/
  6. ewah/
  7. git-gui/
  8. gitk-git/
  9. gitweb/
  10. mergetools/
  11. perl/
  12. po/
  13. ppc/
  14. t/
  15. templates/
  16. vcs-svn/
  17. xdiff/
  18. .gitattributes
  19. .gitignore
  20. .mailmap
  21. abspath.c
  22. aclocal.m4
  23. advice.c
  24. advice.h
  25. alias.c
  26. alloc.c
  27. archive-tar.c
  28. archive-zip.c
  29. archive.c
  30. archive.h
  31. argv-array.c
  32. argv-array.h
  33. attr.c
  34. attr.h
  35. base85.c
  36. bisect.c
  37. bisect.h
  38. blob.c
  39. blob.h
  40. branch.c
  41. branch.h
  42. builtin.h
  43. bulk-checkin.c
  44. bulk-checkin.h
  45. bundle.c
  46. bundle.h
  47. cache-tree.c
  48. cache-tree.h
  49. cache.h
  50. check-builtins.sh
  51. check-racy.c
  52. check_bindir
  53. color.c
  54. color.h
  55. column.c
  56. column.h
  57. combine-diff.c
  58. command-list.txt
  59. commit-slab.h
  60. commit.c
  61. commit.h
  62. config.c
  63. config.mak.in
  64. config.mak.uname
  65. configure.ac
  66. connect.c
  67. connect.h
  68. connected.c
  69. connected.h
  70. convert.c
  71. convert.h
  72. copy.c
  73. COPYING
  74. credential-cache--daemon.c
  75. credential-cache.c
  76. credential-store.c
  77. credential.c
  78. credential.h
  79. csum-file.c
  80. csum-file.h
  81. ctype.c
  82. daemon.c
  83. date.c
  84. decorate.c
  85. decorate.h
  86. delta.h
  87. diff-delta.c
  88. diff-lib.c
  89. diff-no-index.c
  90. diff.c
  91. diff.h
  92. diffcore-break.c
  93. diffcore-delta.c
  94. diffcore-order.c
  95. diffcore-pickaxe.c
  96. diffcore-rename.c
  97. diffcore.h
  98. dir.c
  99. dir.h
  100. editor.c
  101. entry.c
  102. environment.c
  103. exec_cmd.c
  104. exec_cmd.h
  105. fast-import.c
  106. fetch-pack.c
  107. fetch-pack.h
  108. fmt-merge-msg.h
  109. fsck.c
  110. fsck.h
  111. generate-cmdlist.sh
  112. gettext.c
  113. gettext.h
  114. git-add--interactive.perl
  115. git-am.sh
  116. git-archimport.perl
  117. git-bisect.sh
  118. git-compat-util.h
  119. git-cvsexportcommit.perl
  120. git-cvsimport.perl
  121. git-cvsserver.perl
  122. git-difftool--helper.sh
  123. git-difftool.perl
  124. git-filter-branch.sh
  125. git-instaweb.sh
  126. git-merge-octopus.sh
  127. git-merge-one-file.sh
  128. git-merge-resolve.sh
  129. git-mergetool--lib.sh
  130. git-mergetool.sh
  131. git-p4.py
  132. git-parse-remote.sh
  133. git-pull.sh
  134. git-quiltimport.sh
  135. git-rebase--am.sh
  136. git-rebase--interactive.sh
  137. git-rebase--merge.sh
  138. git-rebase.sh
  139. git-relink.perl
  140. git-remote-testgit.sh
  141. git-request-pull.sh
  142. git-send-email.perl
  143. git-sh-i18n.sh
  144. git-sh-setup.sh
  145. git-stash.sh
  146. git-submodule.sh
  147. git-svn.perl
  148. GIT-VERSION-GEN
  149. git-web--browse.sh
  150. git.c
  151. git.rc
  152. git.spec.in
  153. gpg-interface.c
  154. gpg-interface.h
  155. graph.c
  156. graph.h
  157. grep.c
  158. grep.h
  159. hashmap.c
  160. hashmap.h
  161. help.c
  162. help.h
  163. hex.c
  164. http-backend.c
  165. http-fetch.c
  166. http-push.c
  167. http-walker.c
  168. http.c
  169. http.h
  170. ident.c
  171. imap-send.c
  172. INSTALL
  173. khash.h
  174. kwset.c
  175. kwset.h
  176. levenshtein.c
  177. levenshtein.h
  178. LGPL-2.1
  179. line-log.c
  180. line-log.h
  181. line-range.c
  182. line-range.h
  183. list-objects.c
  184. list-objects.h
  185. ll-merge.c
  186. ll-merge.h
  187. lockfile.c
  188. lockfile.h
  189. log-tree.c
  190. log-tree.h
  191. mailmap.c
  192. mailmap.h
  193. Makefile
  194. match-trees.c
  195. merge-blobs.c
  196. merge-blobs.h
  197. merge-recursive.c
  198. merge-recursive.h
  199. merge.c
  200. mergesort.c
  201. mergesort.h
  202. name-hash.c
  203. notes-cache.c
  204. notes-cache.h
  205. notes-merge.c
  206. notes-merge.h
  207. notes-utils.c
  208. notes-utils.h
  209. notes.c
  210. notes.h
  211. object.c
  212. object.h
  213. pack-bitmap-write.c
  214. pack-bitmap.c
  215. pack-bitmap.h
  216. pack-check.c
  217. pack-objects.c
  218. pack-objects.h
  219. pack-revindex.c
  220. pack-revindex.h
  221. pack-write.c
  222. pack.h
  223. pager.c
  224. parse-options-cb.c
  225. parse-options.c
  226. parse-options.h
  227. patch-delta.c
  228. patch-ids.c
  229. patch-ids.h
  230. path.c
  231. pathspec.c
  232. pathspec.h
  233. pkt-line.c
  234. pkt-line.h
  235. preload-index.c
  236. pretty.c
  237. prio-queue.c
  238. prio-queue.h
  239. progress.c
  240. progress.h
  241. prompt.c
  242. prompt.h
  243. quote.c
  244. quote.h
  245. reachable.c
  246. reachable.h
  247. read-cache.c
  248. README
  249. reflog-walk.c
  250. reflog-walk.h
  251. refs.c
  252. refs.h
  253. remote-curl.c
  254. remote-testsvn.c
  255. remote.c
  256. remote.h
  257. replace_object.c
  258. rerere.c
  259. rerere.h
  260. resolve-undo.c
  261. resolve-undo.h
  262. revision.c
  263. revision.h
  264. run-command.c
  265. run-command.h
  266. send-pack.c
  267. send-pack.h
  268. sequencer.c
  269. sequencer.h
  270. server-info.c
  271. setup.c
  272. sh-i18n--envsubst.c
  273. sha1-array.c
  274. sha1-array.h
  275. sha1-lookup.c
  276. sha1-lookup.h
  277. sha1_file.c
  278. sha1_name.c
  279. shallow.c
  280. shell.c
  281. shortlog.h
  282. show-index.c
  283. sideband.c
  284. sideband.h
  285. sigchain.c
  286. sigchain.h
  287. split-index.c
  288. split-index.h
  289. strbuf.c
  290. strbuf.h
  291. streaming.c
  292. streaming.h
  293. string-list.c
  294. string-list.h
  295. submodule.c
  296. submodule.h
  297. symlinks.c
  298. tag.c
  299. tag.h
  300. tar.h
  301. test-chmtime.c
  302. test-config.c
  303. test-ctype.c
  304. test-date.c
  305. test-delta.c
  306. test-dump-cache-tree.c
  307. test-dump-split-index.c
  308. test-genrandom.c
  309. test-hashmap.c
  310. test-index-version.c
  311. test-line-buffer.c
  312. test-match-trees.c
  313. test-mergesort.c
  314. test-mktemp.c
  315. test-parse-options.c
  316. test-path-utils.c
  317. test-prio-queue.c
  318. test-read-cache.c
  319. test-regex.c
  320. test-revision-walking.c
  321. test-run-command.c
  322. test-scrap-cache-tree.c
  323. test-sha1-array.c
  324. test-sha1.c
  325. test-sha1.sh
  326. test-sigchain.c
  327. test-string-list.c
  328. test-subprocess.c
  329. test-svn-fe.c
  330. test-urlmatch-normalization.c
  331. test-wildmatch.c
  332. thread-utils.c
  333. thread-utils.h
  334. trace.c
  335. trace.h
  336. trailer.c
  337. trailer.h
  338. transport-helper.c
  339. transport.c
  340. transport.h
  341. tree-diff.c
  342. tree-walk.c
  343. tree-walk.h
  344. tree.c
  345. tree.h
  346. unicode_width.h
  347. unimplemented.sh
  348. unix-socket.c
  349. unix-socket.h
  350. unpack-trees.c
  351. unpack-trees.h
  352. update_unicode.sh
  353. upload-pack.c
  354. url.c
  355. url.h
  356. urlmatch.c
  357. urlmatch.h
  358. usage.c
  359. userdiff.c
  360. userdiff.h
  361. utf8.c
  362. utf8.h
  363. varint.c
  364. varint.h
  365. version.c
  366. version.h
  367. versioncmp.c
  368. walker.c
  369. walker.h
  370. wildmatch.c
  371. wildmatch.h
  372. wrap-for-bin.sh
  373. wrapper.c
  374. write_or_die.c
  375. ws.c
  376. wt-status.c
  377. wt-status.h
  378. xdiff-interface.c
  379. xdiff-interface.h
  380. zlib.c