|
static struct dlist_anchor * | dlist_anchor_create (void) |
|
static void | dlist_list_clean (struct dlist_entry *d) |
|
static void | dlist_anchor_destroy (struct dlist_anchor *anchor) |
|
static unsigned | dlist_entries_count (const struct dlist_entry *d) |
|
static struct dlist_entry * | dlist_edit_append (struct dlist_anchor *anchor, enum dlist_entry_type type, size_t offset, size_t cnt) |
|
static struct dlist_entry * | dlist_center_find (struct dlist_entry *start, size_t cnt) |
|
static size_t | dentry_encoded_overflow (unsigned sat, size_t value) |
|
static size_t | dentry_rel_offset_calc (const struct dlist_entry *e) |
|
static size_t | dentry_encoded_size_get (const struct dlist_entry *e) |
|
static enum dentry_merge_status | dentry_merge_check (const struct dlist_entry *left, const struct dlist_entry *right) |
|
static enum dentry_merge_status | dentry_right_walk_merge_check (const struct dlist_entry *e) |
|
static enum dentry_merge_status | dentry_left_walk_merge_check (const struct dlist_entry *e) |
|
static struct dlist_entry * | dentry_merge_to_left (struct dlist_entry *left, struct dlist_entry *right) |
|
static size_t | dlist_identity (struct dlist_anchor *anchor, size_t length, const uint8_t line[length], const uint8_t ref[length], size_t begin) |
|
static size_t | dlist_repeat (struct dlist_anchor *anchor, size_t length, const uint8_t line[length], size_t begin) |
|
static struct dlist_anchor * | dlist_create (size_t length, const uint8_t line[length], const uint8_t ref[length]) |
|
static void | output_dlist (const struct dlist_entry *d) |
|
static size_t | dlist_line_size_calc (const struct dlist_entry *start) |
|
static struct dlist_anchor * | dlist_overview (size_t length, const uint8_t line[length], const uint8_t ref[length]) |
|
static struct dlist_anchor * | dlist_preview (size_t length, const uint8_t line[length], const uint8_t ref[length]) |
|
static size_t | repeating_pattern_check_ahead (size_t cnt, const uint8_t line[cnt]) |
|
static void | dlist_entry_replace (struct dlist_anchor *anchor, struct dlist_entry *repl) |
|
static void | dlist_substitute_feaze (struct dlist_entry *diff, const uint8_t *line) |
|
static void | dlist_list_substitute_feaze (struct dlist_entry *edit, const uint8_t *pattern) |
|
static void | dlist_merge_useless_entries (struct dlist_entry *edit, int under_pressure) |
|
The encoding method is very limited in the way some kind of data reduction can be made. So, it seems, its hard to improve the algorithm. Refer test_edit*.c files for further test details.
Use this file and its content for reference. It isn't used yet.
Check if a merge of the given entries is helpful
- Parameters
-
[in] | left | The left 'edit' |
[in] | right | The right 'edit' |
- Return values
-
DENTRY_MERGE_REC | Merge possible, new size is smaller or at least unchanged |
DENTRY_MERGE_INA | Merge inappropriate (makes no sense, enlarges the encoding, only under 'edit' pressure) |
DENTRY_MERGE_IMP | Merge impossible (these two types cannot be merged) |
Valid combinations:
repeat plain *******####### must be kept if the merge wouldn't be smaller
######## can be merged under 'edit' pressure
plain repeat
#******* must be kept if the merge wouldn't be smaller
######## can be merged under 'edit' pressure
equal plain *******####### must be kept if the merge wouldn't be smaller
######## can be merged under 'edit' pressure
plain equal
#******* must be kept if the merge wouldn't be smaller
######## can be merged under 'edit' pressure
repeat equal *******####### can never be merged -> if the equal part continues the repeat pattern! equal repeat
#******* can never be merged -> if the equal part continues the repeat pattern!
This routine really calculates the encoded size for a merged 'edit'. You cannot simply add the byte counts they would cover if merged, since 'offset' and 'count' overflows can add more or less bytes as well. Sometimes the encoding of the following 'edit' changes as well and could be taken into account if merging of the current 'edits' makes sense or not. But I think it's not worth the effort.
static struct dlist_anchor * dlist_overview |
( |
size_t |
length, |
|
|
const uint8_t |
line[length], |
|
|
const uint8_t |
ref[length] |
|
) |
| |
|
static |
- Parameters
-
[in] | length | Length in bytes of the active content |
- Todo:
- DLIST_EQ and DLIST_REP edits can overlap
Eine andere Möglichkeit wäre, zunächst nur auf Gleichheit der Zeilen zu testen und dafür 'edit'-Einträge zu erzeugen. Dann im nachgeschalteten Test, die Räume zwischen diesen Einträgen verarbeiten und ggf. auf Ersetzen oder Wiederholen umändern. Das würde die Komplexität pro Test-Schritt rausnehmen.
00 00 00 00 00 11 22 33 44 55 55 55 88 99 00 00 00 00 00 | equal | | different | | equal | step one | diff | | eq | |dif| step two und diese drei ersetzen dann den ganzen "different"
Mit den verketteten Listen sollte das auch gehen, einen Eintrag durch mehrere zu ersetzen bzw. anpassen. Beispielsweise einen "diff"-Eintrag nehmen mit seinem Start und Ende und den wiederum wie eine Zeile in diff und rep aufteilen und dann anschl. die neu entstandenen edit-Einträge statt dem originalen einzuhängen. Das ginge dann mehr oder weniger rekursiv.
static void dlist_merge_useless_entries |
( |
struct dlist_entry * |
edit, |
|
|
int |
under_pressure |
|
) |
| |
|
static |
Merge useless 'edits' into its neighbours
- Parameters
-
[in,out] | edit | The start of the chain (can be NULL) |
[in] | under_pressure | Not '0' if we are under 'edit' count pressure |
Sometimes useless 'edits' are created, like a DLIST_EQ with less than three byte lengths. These are not worth to keep, since the encoding of the neighbour 'edits' needs more bytes than it saves by keeping the DLIST_EQ 'edit'.
This routine walks along the whole chain and tests at each 'edit' if it can be merged with its right neighbour. If yes, the right neighbour is merged into it and removed from the chain.
Covered case:
- [EQU][DIFF] -> [DIFF]
- [DIFF][EQU] -> [DIFF]
- [DIFF][DIFF] -> [DIFF]
If it has done a merge, it continues to check the new right neighbour of the merged 'edit'.
- Note
- A merge - if possible but inappropriate - will however be done if under_pressure signals (e.g. not '0') we need to reduce the 'edit' count.