Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate meaningful bibliography Ids from DOI import #520

Merged
merged 5 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
"remove-markdown": "^0.3.0",
"rgb-hex": "^3.0.0",
"romans": "^2.0.4",
"stopwords": "^0.0.9",
"stylelint-config-standard": "^24.0.0",
"typescript": "^4.0.2",
"unified": "^9.2.1",
Expand Down
64 changes: 31 additions & 33 deletions src/bibliography/domain/BibliographyEntry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ function getName(author: unknown): string {
return particle ? `${particle} ${family}` : family
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
export type CslData = { readonly [key: string]: any }
export default class BibliographyEntry {
readonly [immerable] = true
Expand All @@ -28,88 +29,85 @@ export default class BibliographyEntry {
constructor(cslData?: CslData | null | undefined) {
this.cslData = cslData
? produce(cslData, (draft) => {
_.keys(draft)
Object.keys(draft)
.filter((key) => key.startsWith('_'))
.forEach(_.partial(_.unset, draft))
.forEach((key) => delete draft[key])
if (draft.author) {
draft.author = draft.author.map(
_.partialRight(_.pick, authorProperties)
draft.author = draft.author.map((author) =>
_.pick(author, authorProperties)
)
}
})
: {}
}

get id(): string {
return _.get(this.cslData, 'id', '')
return this.cslData.id || ''
}

get primaryAuthor(): string {
return _.head(this.authors) || ''
return this.authors[0] || ''
}

get authors(): string[] {
return _.get(this.cslData, 'author', []).map(getName)
return (this.cslData.author || []).map(getName)
}

get year(): string {
const start = _.get(this.cslData, 'issued.date-parts.0.0', '')
const end = _.get(this.cslData, 'issued.date-parts.1.0', '')
const dates = _.get(this.cslData, 'issued.date-parts', [])
const start = dates[0]?.[0] || ''
const end = dates[1]?.[0] || ''
return end ? `${start}–${end}` : String(start)
}

get title(): string {
return _.get(this.cslData, 'title', '')
return this.cslData.title || ''
}

get shortContainerTitle(): string {
return _.get(this.cslData, 'container-title-short', '')
return this.cslData['container-title-short'] || ''
}

get shortTitle(): string {
return _.get(this.cslData, 'title-short', '')
return this.cslData['title-short'] || ''
}

get collectionNumber(): string {
return _.get(this.cslData, 'collection-number', '')
return this.cslData['collection-number'] || ''
}

get volume(): string {
return _.get(this.cslData, 'volume', '')
return this.cslData.volume || ''
}

get link(): string {
const url = _.get(this.cslData, 'URL', '')
const doi = _.get(this.cslData, 'DOI', '')
return url || (doi ? `https://doi.org/${doi}` : '')
return (
this.cslData.URL ||
(this.cslData.DOI ? `https://doi.org/${this.cslData.DOI}` : '')
)
}

get authorYearTitle(): string {
return `${this.primaryAuthor} ${this.year} ${this.title}`
}

get abberviationContainer(): string | undefined {
const containerTitleShort = this.shortContainerTitle
const collectionNumber = this.collectionNumber
? ` ${this.collectionNumber}`
: ''
return containerTitleShort
? `${containerTitleShort}${collectionNumber}`
: undefined
get abbreviationContainer(): string | undefined {
const container = this.shortContainerTitle
const number = this.collectionNumber ? ` ${this.collectionNumber}` : ''
return container ? `${container}${number}` : undefined
}

get abbreviationTitle(): string | undefined {
const { shortTitle } = this
const volume = this.volume ? ` ${this.volume}` : ''
return shortTitle ? `${shortTitle}${volume}` : undefined
const title = this.shortTitle
const vol = this.volume ? ` ${this.volume}` : ''
return title ? `${title}${vol}` : undefined
}

get abbreviations(): string | undefined {
const { abberviationContainer, abbreviationTitle } = this
if (abberviationContainer && abbreviationTitle) {
return `${abberviationContainer} = ${abbreviationTitle}`
}
return abberviationContainer ?? abbreviationTitle ?? undefined
const container = this.abbreviationContainer
const title = this.abbreviationTitle
if (container && title) return `${container} = ${title}`
return container || title
}

get label(): string {
Expand Down
66 changes: 66 additions & 0 deletions src/bibliography/domain/GenerateIds.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { generateIds } from './GenerateIds'
import { CslData } from 'bibliography/domain/BibliographyEntry'

const testEntry: CslData = {
author: [{ family: 'Doe', given: 'John' }],
title: 'The Quick Brown Fox Jumps Over the Lazy Dog',
issued: { 'date-parts': [[2023]] },
language: 'en',
}

describe('generateIds', () => {
const testIdGeneration = (
name: string,
modifiedEntry: Partial<CslData>,
expectedId: string
) => {
test(name, () => {
const result = generateIds({ ...testEntry, ...modifiedEntry })
expect(result).toBe(expectedId)
})
}

test('basic ID generation', () => {
const result = generateIds(testEntry)
expect(result).toBe('doe2023quick')
})

testIdGeneration(
'ID generation with missing author',
{ author: undefined },
'unknownauthor2023quick'
)
testIdGeneration(
'ID generation with missing year',
{ issued: undefined },
'doe9999quick'
)
testIdGeneration(
'ID generation with missing title',
{ title: undefined },
'doe2023unknowntitle'
)
testIdGeneration(
'ID generation with all significant words as stop words',
{ title: 'The Of And But Or Nor For' },
'doe2023unknowntitle'
)

testIdGeneration(
'ID generation with different language (German)',
{
language: 'de',
title: 'Der Schnelle Braune Fuchs Springt Über den Faulen Hund',
},
'doe2023schnelle'
)

testIdGeneration(
'ID generation with language not supported',
{
language: 'ru',
title: 'Экс-граф? Плюш изъят. Бьём чуждый цен хвощ!',
},
'doe2023экс-граф?'
)
})
27 changes: 27 additions & 0 deletions src/bibliography/domain/GenerateIds.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { CslData } from 'bibliography/domain/BibliographyEntry'
import stopwords from 'stopwords'

const STOPWORDS = {
en: new Set(stopwords.english),
de: new Set(stopwords.german),
it: new Set(stopwords.italian),
es: new Set(stopwords.spanish),
fr: new Set(stopwords.french),
}

export function generateIds(entry: CslData): string {
const language = entry.language || 'en'
const stopwordSet = STOPWORDS[language] || STOPWORDS.en

const author = entry.author?.[0]?.family || 'unknownauthor'
const year = entry.issued?.['date-parts']?.[0]?.[0] || '9999'

const titleWords = entry.title?.split(' ') || []
const firstSignificantWord =
titleWords.find((word) => !stopwordSet.has(word.toLowerCase())) ||
'unknowntitle'

return `${author}${year}${firstSignificantWord}`
.replace(/\s+/g, '')
.toLowerCase()
}
10 changes: 8 additions & 2 deletions src/bibliography/ui/BibliographyEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,15 @@ export default withData<
BibliographyEntry
>(
BibliographyEditor,
(props) => props.bibliographyService.find(props.match.params['id']),
(props) => {
const decodedId = decodeURIComponent(props.match.params['id'])
return props.bibliographyService.find(decodedId)
},
{
watch: (props) => [props.create, props.match.params['id']],
watch: (props) => [
props.create,
decodeURIComponent(props.match.params['id']),
],
filter: (props) => !props.create,
defaultData: () => template,
}
Expand Down
91 changes: 72 additions & 19 deletions src/bibliography/ui/BibliographyEntryForm.test.tsx
Original file line number Diff line number Diff line change
@@ -1,34 +1,87 @@
import React from 'react'
import { render, screen } from '@testing-library/react'
import _ from 'lodash'

import { render, screen, waitFor } from '@testing-library/react'
import { changeValueByLabel, clickNth } from 'test-support/utils'
import BibliographyEntryForm from './BibliographyEntryForm'
import { bibliographyEntryFactory } from 'test-support/bibliography-fixtures'
import BibliographyEntry from 'bibliography/domain/BibliographyEntry'

let json: string
let entry: BibliographyEntry
let onSubmit: () => void
let mockJson: string
let mockEntry: BibliographyEntry
let onSubmitMock: jest.Mock

beforeEach(() => {
entry = bibliographyEntryFactory.build()
json = JSON.stringify(entry.toCslData(), null, 2)
onSubmit = jest.fn()
mockEntry = bibliographyEntryFactory.build()
mockJson = JSON.stringify(mockEntry.toCslData(), null, 2)
onSubmitMock = jest.fn()
})

test(`Changing document calls onChange with updated value.`, async () => {
render(<BibliographyEntryForm onSubmit={onSubmit} />)
changeValueByLabel(screen, 'Data', json)
await screen.findByText(new RegExp(_.escapeRegExp(`(${entry.year})`)))
const waitForSaveButtonToBeEnabled = async () => {
await waitFor(
() => expect(screen.getByRole('button', { name: /Save/i })).toBeEnabled(),
{ timeout: 1000 }
)
}

test('Form updates and submits entry with correct data', async () => {
render(<BibliographyEntryForm onSubmit={onSubmitMock} />)
changeValueByLabel(screen, 'Data', mockJson)
await screen.findByText(new RegExp(`\\(${mockEntry.year}\\)`))
clickNth(screen, 'Save', 0)

expect(onSubmit).toHaveBeenCalledWith(entry)
expect(onSubmitMock).toHaveBeenCalledWith(mockEntry)
})

test(`Shows value as CSL-JSON.`, async () => {
render(<BibliographyEntryForm value={entry} onSubmit={onSubmit} />)
await screen.findByDisplayValue(
new RegExp(_.escapeRegExp(json).replace(/\s+/g, '\\s*'))
)
test('Displays CSL-JSON input correctly', async () => {
render(<BibliographyEntryForm value={mockEntry} onSubmit={onSubmitMock} />)
const textarea = screen.getByLabelText('Data') as HTMLTextAreaElement
await waitFor(() => {
expect(textarea.value.replace(/\s/g, '')).toContain(
JSON.stringify(mockEntry.toCslData()).replace(/\s/g, '')
)
})
})

test('Applies custom ID when no ID exists', async () => {
const entryWithoutId = bibliographyEntryFactory.build({
toCslData: () => ({ ...mockEntry.toCslData(), id: undefined }),
})
const jsonWithoutId = JSON.stringify(entryWithoutId.toCslData(), null, 2)

render(<BibliographyEntryForm onSubmit={onSubmitMock} />)
changeValueByLabel(screen, 'Data', jsonWithoutId)

await waitForSaveButtonToBeEnabled()

clickNth(screen, 'Save', 0)

await waitFor(() => {
expect(onSubmitMock).toHaveBeenCalled()

const submittedEntry = onSubmitMock.mock.calls[0][0]

expect(submittedEntry.id).not.toBeUndefined()
expect(submittedEntry.id).not.toMatch(/^temp_id/)
})
})

test('Preserves existing ID', async () => {
const entryWithValidId = bibliographyEntryFactory.build({
toCslData: () => ({ ...mockEntry.toCslData(), id: 'validId123' }),
})
const jsonWithValidId = JSON.stringify(entryWithValidId.toCslData(), null, 2)

render(<BibliographyEntryForm onSubmit={onSubmitMock} />)
changeValueByLabel(screen, 'Data', jsonWithValidId)

await waitForSaveButtonToBeEnabled()

clickNth(screen, 'Save', 0)

await waitFor(() => {
expect(onSubmitMock).toHaveBeenCalled()

const submittedEntry = onSubmitMock.mock.calls[0][0]

expect(submittedEntry.id).toEqual('validId123')
})
})
Loading
Loading