|
| 1 | +schema: https://htr-united.github.io/schema/2023-06-27/schema.json |
| 2 | +title: >- |
| 3 | + Ground Truth Set for Handwritten Text Recognition (HTR/OCR): Dresdner |
| 4 | + Hofdiarium 1665 (Mscr.Dresd.K.80) - 17th century Kurrent manuscript |
| 5 | +url: https://doi.org/10.5281/zenodo.14356190 |
| 6 | +authors: |
| 7 | + - name: Stefan |
| 8 | + surname: Beckert |
| 9 | + orcid: 0009-0005-2394-0075 |
| 10 | + roles: |
| 11 | + - transcriber |
| 12 | + - aligner |
| 13 | + - project-manager |
| 14 | + - quality-control |
| 15 | +institutions: [] |
| 16 | +description: >- |
| 17 | + This dataset contains ten pages of Ground Truth from the Dresden Court Diaries |
| 18 | + of elector Johann Georg II. as Page XML, Alto XML and jpg. |
| 19 | +language: |
| 20 | + - deu |
| 21 | +production-software: eScriptorium + Kraken |
| 22 | +automatically-aligned: false |
| 23 | +script: |
| 24 | + - iso: Latn |
| 25 | + qualify: Kurrent |
| 26 | +script-type: only-manuscript |
| 27 | +time: |
| 28 | + notBefore: '1665' |
| 29 | + notAfter: '1665' |
| 30 | +hands: |
| 31 | + count: '1' |
| 32 | + precision: exact |
| 33 | +license: |
| 34 | + name: CC-BY-NC-SA 4.0 |
| 35 | + url: https://creativecommons.org/licenses/by/4.0/ |
| 36 | +format: Alto-XML |
| 37 | +sources: |
| 38 | + - reference: >- |
| 39 | + Beckert, S. (2024). Ground Truth Set for Handwritten Text Recognition |
| 40 | + (HTR/OCR): Dresdner Hofdiarium 1665 (Mscr.Dresd.K.80) - 17th century |
| 41 | + Kurrent manuscript [Data set]. Zenodo. |
| 42 | + https://doi.org/10.5281/zenodo.14356190 |
| 43 | + link: '' |
| 44 | +volume: |
| 45 | + - metric: pages |
| 46 | + count: 10 |
| 47 | +transcription-guidelines: >- |
| 48 | + Transcription guidelines are oriented on the DTABF-M schema |
| 49 | + (https://www.deutschestextarchiv.de/doku/basisformat/manuskript.html), but |
| 50 | + have been adapted as follows: |
| 51 | +
|
| 52 | +
|
| 53 | + - I and J majuscules are not distinguished |
| 54 | +
|
| 55 | + - u and v are reproduced true to the original (e.g. vnd) |
| 56 | +
|
| 57 | + - Long-s (ſ) and round-s (s) are distinguished |
| 58 | +
|
| 59 | + - sz ligature is rendered as ß in Kurrent scripts and as sz (e.g. "Libusza") |
| 60 | + in Antiqua scripts |
| 61 | +
|
| 62 | + - ij ligature is rendered as y |
| 63 | +
|
| 64 | + - other ligatures, if they occur at all, are dissolved |
| 65 | +
|
| 66 | + - r graphemes are rendered as r in their modern day form |
| 67 | +
|
| 68 | + - an m with a nasal stroke was rendered as a simple m |
| 69 | +
|
| 70 | + - Where possible, abbreviation signs (Abbrechungszeichen) for the contemporary |
| 71 | + identification of abbreviations have been included as single letters and not |
| 72 | + marked separately. The subsequent punctuation mark (“.” or “:”) for further |
| 73 | + identification of the abbreviation has also been included (cf. also Capelli, |
| 74 | + 1928, Lexicon abbreviaturarum I, p.X) |
| 75 | +
|
| 76 | + - Diacritics in u were not marked |
| 77 | +
|
| 78 | + - In the case of uncertain capitalization, an approximation is sought via the |
| 79 | + letter size |
| 80 | +
|
0 commit comments