Skip to content

Commit

Permalink
✨ 채널 추가 기능 고도화 #23
Browse files Browse the repository at this point in the history
  • Loading branch information
kenu committed Jun 4, 2024
1 parent 2bcb038 commit 8e85305
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 19 deletions.
1 change: 1 addition & 0 deletions services/channel.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ async function findChannelInfo(forHandle) {
return data
} catch (error) {
console.error('Error:', error)
throw error
}
}

Expand Down
5 changes: 3 additions & 2 deletions web/routes/admin.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ router.post('/api/channel', async function (req, res, next) {
...req.body,
...channel,
}

const result = await dao.create(channel)
await addVideos(channel.channelId)
res.json(result.dataValues)
Expand All @@ -80,8 +81,8 @@ async function addVideos(channelId) {
const videos = await vapi.getLatestVideos(channelId)
await videos
.map((item) => item.channelId)
.forEach(async () => {
vapi.addVideos()
.forEach(async (channelId) => {
vapi.addVideos(channelId)
})
}

Expand Down
52 changes: 35 additions & 17 deletions web/utils/transcript.js
Original file line number Diff line number Diff line change
@@ -1,33 +1,51 @@
import { parse } from 'node-html-parser'

async function fetchTranscript(videoId) {
const PAGE = await fetch('https://www.youtube.com/watch?v=' + videoId)
.then((res) => res.text())
.then((html) => parse(html))
const videoPageHtml = await fetchHtml(
`https://www.youtube.com/watch?v=${videoId}`
)
const playerScript = findPlayerScript(videoPageHtml)
const captionsUrl = extractCaptionsUrl(playerScript)
const captionsXml = await fetchXml(
captionsUrl.replace('lang=de-DE', 'lang=ko-KR')
)
const transcript = extractTranscriptFromXml(captionsXml)
return transcript
}

async function fetchHtml(url) {
const response = await fetch(url)
const html = await response.text()
return parse(html)
}

const scripts = PAGE.getElementsByTagName('script')
const playerScript = scripts.find((script) =>
function findPlayerScript(html) {
const scripts = html.getElementsByTagName('script')
return scripts.find((script) =>
script.textContent.includes('var ytInitialPlayerResponse = {')
)
}

function extractCaptionsUrl(playerScript) {
const dataString = playerScript.textContent
?.split('var ytInitialPlayerResponse = ')?.[1]
?.slice(0, -1)
const data = JSON.parse(dataString.trim())
const captionsUrl =
data.captions.playerCaptionsTracklistRenderer.captionTracks[0].baseUrl
const captionsDefault = captionsUrl.replace('lang=de-DE', 'lang=ko-KR')
return data.captions.playerCaptionsTracklistRenderer.captionTracks[0].baseUrl
}

const resXML = await fetch(captionsDefault)
.then((res) => res.text())
.then((xml) => parse(xml))
async function fetchXml(url) {
const response = await fetch(url)
const xml = await response.text()
return parse(xml)
}

let transcript = ''
const chunks = resXML.getElementsByTagName('text')
for (const chunk of chunks) {
transcript += chunk.textContent
}
return transcript
function extractTranscriptFromXml(xml) {
const chunks = xml.getElementsByTagName('text')
return Array.from(chunks).reduce(
(transcript, chunk) => transcript + chunk.textContent,
''
)
}

export default fetchTranscript

0 comments on commit 8e85305

Please sign in to comment.