support repeated line, multiline

This commit is contained in:
Kendall Garner
2023-12-13 20:22:32 -08:00
parent 40d29f2cb1
commit 235010ce73
2 changed files with 139 additions and 25 deletions

View File

@@ -24,10 +24,12 @@ type Lyric struct {
}
// support the standard [mm:ss.mm], as well as [hh:*] and [*.mmm]
const timeRegexString = `(\[(([0-9]{1,2}):)?([0-9]{1,2}):([0-9]{1,2})(\.([0-9]{1,3}))?\])`
const timeRegexString = `\[([0-9]{1,2}:)?([0-9]{1,2}):([0-9]{1,2})(.[0-9]{1,3})?\]`
var (
lineRegex = regexp.MustCompile(timeRegexString + "([^\n]+)?")
// Should either be at the beginning of file, or beginning of line
syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString)
timeRegex = regexp.MustCompile(timeRegexString)
lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset):([^\]]+)\]`)
)
@@ -35,16 +37,23 @@ func ToLyrics(language, text string) (*Lyric, error) {
text = utils.SanitizeText(text)
lines := strings.Split(text, "\n")
synced := true
artist := ""
title := ""
var offset *int64 = nil
structuredLines := []Line{}
synced := syncRegex.MatchString(text)
priorLine := ""
validLine := false
timestamps := []int64{}
for _, line := range lines {
line := strings.TrimSpace(line)
if line == "" {
if validLine {
priorLine += "\n"
}
continue
}
var text string
@@ -72,54 +81,105 @@ func ToLyrics(language, text string) (*Lyric, error) {
continue
}
syncedMatch := lineRegex.FindStringSubmatch(line)
if syncedMatch == nil {
synced = false
text = utils.SanitizeText(line)
} else {
times := timeRegex.FindAllStringSubmatchIndex(line, -1)
// The second condition is for when there is a timestamp in the middle of
// a line (after any text)
if times == nil || times[0][0] != 0 {
if validLine {
priorLine += "\n" + line
}
continue
}
if validLine {
for idx := range timestamps {
structuredLines = append(structuredLines, Line{
Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine),
})
}
timestamps = []int64{}
}
end := 0
// [fullStart, fullEnd, hourStart, hourEnd, minStart, minEnd, secStart, secEnd, msStart, msEnd]
for _, match := range times {
var hours, millis int64
var err error
if syncedMatch[3] != "" {
hours, err = strconv.ParseInt(syncedMatch[3], 10, 64)
// for multiple matches, we need to check that later matches are not
// in the middle of the string
if end != 0 {
middle := strings.TrimSpace(line[end:match[0]])
if middle != "" {
break
}
}
end = match[1]
hourStart := match[2]
if hourStart != -1 {
// subtract 1 because group has : at the end
hourEnd := match[3] - 1
hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64)
if err != nil {
return nil, err
}
}
min, err := strconv.ParseInt(syncedMatch[4], 10, 64)
min, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64)
if err != nil {
return nil, err
}
sec, err := strconv.ParseInt(syncedMatch[5], 10, 64)
sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64)
if err != nil {
return nil, err
}
if syncedMatch[7] != "" {
millis, err = strconv.ParseInt(syncedMatch[7], 10, 64)
secStart := match[8]
if secStart != -1 {
secEnd := match[9]
// +1 offset since this capture group contains .
millis, err = strconv.ParseInt(line[secStart+1:secEnd], 10, 64)
if err != nil {
return nil, err
}
if len(syncedMatch[7]) == 2 {
if secEnd-secStart == 3 {
millis *= 10
}
}
timeInMillis := (((((hours * 60) + min) * 60) + sec) * 1000) + millis
time = &timeInMillis
text = utils.SanitizeText(syncedMatch[8])
timestamps = append(timestamps, timeInMillis)
}
if end >= len(line) {
priorLine = ""
} else {
priorLine = strings.TrimSpace(line[end:])
}
validLine = true
} else {
text = line
structuredLines = append(structuredLines, Line{
Start: time,
Value: text,
})
}
}
structuredLines = append(structuredLines, Line{
Start: time,
Value: text,
})
if validLine {
for idx := range timestamps {
structuredLines = append(structuredLines, Line{
Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine),
})
}
}
lyric := Lyric{

View File

@@ -7,11 +7,11 @@ import (
)
var _ = Describe("ToLyrics", func() {
num := int64(1551)
It("should parse tags with spaces", func() {
num := int64(1551)
lyrics, err := ToLyrics("xxx", "[offset: 1551 ]\n[ti: A title ]\n[ar: An artist ]\n[00:00.00]Hi there")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.DisplayArtist).To(Equal("An artist"))
Expect(lyrics.DisplayTitle).To(Equal("A title"))
Expect(lyrics.Offset).To(Equal(&num))
@@ -24,8 +24,8 @@ var _ = Describe("ToLyrics", func() {
})
It("should accept lines with no text and weird times", func() {
var a, b, c, d = int64(0), int64(10040), int64(40000), int64(1000 * 60 * 60)
lyrics, err := ToLyrics("xxx", "[00:00.00]Hi there\n\n\n[00:10.040] \n[00:40]Test\n[01:00:00]late")
a, b, c, d := int64(0), int64(10040), int64(40000), int64(1000*60*60)
lyrics, err := ToLyrics("xxx", "[00:00.00]Hi there\n\n\n[00:10.040]\n[00:40]Test\n[01:00:00]late")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
@@ -35,4 +35,58 @@ var _ = Describe("ToLyrics", func() {
{Start: &d, Value: "late"},
}))
})
It("Should support multiple timestamps per line", func() {
a, b, c, d := int64(0), int64(10000), int64(13*60*1000), int64(1000*60*60*51)
lyrics, err := ToLyrics("xxx", "[00:00.00] [00:10.00]Repeated\n[13:00][51:00:00.00]")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Repeated"},
{Start: &b, Value: "Repeated"},
{Start: &c, Value: ""},
{Start: &d, Value: ""},
}))
})
It("Should support parsing multiline string", func() {
a, b := int64(0), int64(10*60*1000+1)
lyrics, err := ToLyrics("xxx", "[00:00.00]This is\na multiline \n\n [:0] string\n[10:00.001]This is\nalso one")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "This is\na multiline\n\n[:0] string"},
{Start: &b, Value: "This is\nalso one"},
}))
})
It("Does not match timestamp in middle of line", func() {
lyrics, err := ToLyrics("xxx", "This could [00:00:00] be a synced file")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeFalse())
Expect(lyrics.Line).To(Equal([]Line{
{Value: "This could [00:00:00] be a synced file"},
}))
})
It("Allows timestamp in middle of line if also at beginning", func() {
a, b := int64(0), int64(1000)
lyrics, err := ToLyrics("xxx", " [00:00] This is [00:00:00] be a synced file\n [00:01]Line 2")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "This is [00:00:00] be a synced file"},
{Start: &b, Value: "Line 2"},
}))
})
It("Ignores lines in synchronized lyric prior to first timestamp", func() {
a := int64(0)
lyrics, err := ToLyrics("xxx", "This is some prelude\nThat doesn't\nmatter\n[00:00]Text")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Text"},
}))
})
})