From 235010ce73430dd5f63579d4c41b6e4018ea826b Mon Sep 17 00:00:00 2001 From: Kendall Garner <17521368+kgarner7@users.noreply.github.com> Date: Wed, 13 Dec 2023 20:22:32 -0800 Subject: [PATCH] support repeated line, multiline --- model/lyrics.go | 102 ++++++++++++++++++++++++++++++++++--------- model/lyrics_test.go | 62 ++++++++++++++++++++++++-- 2 files changed, 139 insertions(+), 25 deletions(-) diff --git a/model/lyrics.go b/model/lyrics.go index c95892682..213a808a5 100644 --- a/model/lyrics.go +++ b/model/lyrics.go @@ -24,10 +24,12 @@ type Lyric struct { } // support the standard [mm:ss.mm], as well as [hh:*] and [*.mmm] -const timeRegexString = `(\[(([0-9]{1,2}):)?([0-9]{1,2}):([0-9]{1,2})(\.([0-9]{1,3}))?\])` +const timeRegexString = `\[([0-9]{1,2}:)?([0-9]{1,2}):([0-9]{1,2})(.[0-9]{1,3})?\]` var ( - lineRegex = regexp.MustCompile(timeRegexString + "([^\n]+)?") + // Should either be at the beginning of file, or beginning of line + syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString) + timeRegex = regexp.MustCompile(timeRegexString) lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset):([^\]]+)\]`) ) @@ -35,16 +37,23 @@ func ToLyrics(language, text string) (*Lyric, error) { text = utils.SanitizeText(text) lines := strings.Split(text, "\n") - synced := true artist := "" title := "" var offset *int64 = nil structuredLines := []Line{} + synced := syncRegex.MatchString(text) + priorLine := "" + validLine := false + timestamps := []int64{} + for _, line := range lines { line := strings.TrimSpace(line) if line == "" { + if validLine { + priorLine += "\n" + } continue } var text string @@ -72,54 +81,105 @@ func ToLyrics(language, text string) (*Lyric, error) { continue } - syncedMatch := lineRegex.FindStringSubmatch(line) - if syncedMatch == nil { - synced = false - text = utils.SanitizeText(line) - } else { + times := timeRegex.FindAllStringSubmatchIndex(line, -1) + // The second condition is for when there is a timestamp in the middle of + // a line (after any text) + if times == nil || times[0][0] != 0 { + if validLine { + priorLine += "\n" + line + } + continue + } + + if validLine { + for idx := range timestamps { + structuredLines = append(structuredLines, Line{ + Start: ×tamps[idx], + Value: strings.TrimSpace(priorLine), + }) + } + timestamps = []int64{} + } + + end := 0 + + // [fullStart, fullEnd, hourStart, hourEnd, minStart, minEnd, secStart, secEnd, msStart, msEnd] + for _, match := range times { var hours, millis int64 var err error - if syncedMatch[3] != "" { - hours, err = strconv.ParseInt(syncedMatch[3], 10, 64) + // for multiple matches, we need to check that later matches are not + // in the middle of the string + if end != 0 { + middle := strings.TrimSpace(line[end:match[0]]) + if middle != "" { + break + } + } + + end = match[1] + + hourStart := match[2] + if hourStart != -1 { + // subtract 1 because group has : at the end + hourEnd := match[3] - 1 + hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64) if err != nil { return nil, err } } - min, err := strconv.ParseInt(syncedMatch[4], 10, 64) + min, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64) if err != nil { return nil, err } - sec, err := strconv.ParseInt(syncedMatch[5], 10, 64) + sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64) if err != nil { return nil, err } - if syncedMatch[7] != "" { - millis, err = strconv.ParseInt(syncedMatch[7], 10, 64) + secStart := match[8] + if secStart != -1 { + secEnd := match[9] + // +1 offset since this capture group contains . + millis, err = strconv.ParseInt(line[secStart+1:secEnd], 10, 64) if err != nil { return nil, err } - if len(syncedMatch[7]) == 2 { + if secEnd-secStart == 3 { millis *= 10 } } timeInMillis := (((((hours * 60) + min) * 60) + sec) * 1000) + millis - time = &timeInMillis - text = utils.SanitizeText(syncedMatch[8]) + timestamps = append(timestamps, timeInMillis) } + + if end >= len(line) { + priorLine = "" + } else { + priorLine = strings.TrimSpace(line[end:]) + } + + validLine = true } else { text = line + structuredLines = append(structuredLines, Line{ + Start: time, + Value: text, + }) } + } - structuredLines = append(structuredLines, Line{ - Start: time, - Value: text, - }) + if validLine { + for idx := range timestamps { + structuredLines = append(structuredLines, Line{ + Start: ×tamps[idx], + Value: strings.TrimSpace(priorLine), + }) + } } lyric := Lyric{ diff --git a/model/lyrics_test.go b/model/lyrics_test.go index 93adb1a13..0dd879feb 100644 --- a/model/lyrics_test.go +++ b/model/lyrics_test.go @@ -7,11 +7,11 @@ import ( ) var _ = Describe("ToLyrics", func() { - num := int64(1551) - It("should parse tags with spaces", func() { + num := int64(1551) lyrics, err := ToLyrics("xxx", "[offset: 1551 ]\n[ti: A title ]\n[ar: An artist ]\n[00:00.00]Hi there") Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) Expect(lyrics.DisplayArtist).To(Equal("An artist")) Expect(lyrics.DisplayTitle).To(Equal("A title")) Expect(lyrics.Offset).To(Equal(&num)) @@ -24,8 +24,8 @@ var _ = Describe("ToLyrics", func() { }) It("should accept lines with no text and weird times", func() { - var a, b, c, d = int64(0), int64(10040), int64(40000), int64(1000 * 60 * 60) - lyrics, err := ToLyrics("xxx", "[00:00.00]Hi there\n\n\n[00:10.040] \n[00:40]Test\n[01:00:00]late") + a, b, c, d := int64(0), int64(10040), int64(40000), int64(1000*60*60) + lyrics, err := ToLyrics("xxx", "[00:00.00]Hi there\n\n\n[00:10.040]\n[00:40]Test\n[01:00:00]late") Expect(err).ToNot(HaveOccurred()) Expect(lyrics.Synced).To(BeTrue()) Expect(lyrics.Line).To(Equal([]Line{ @@ -35,4 +35,58 @@ var _ = Describe("ToLyrics", func() { {Start: &d, Value: "late"}, })) }) + + It("Should support multiple timestamps per line", func() { + a, b, c, d := int64(0), int64(10000), int64(13*60*1000), int64(1000*60*60*51) + lyrics, err := ToLyrics("xxx", "[00:00.00] [00:10.00]Repeated\n[13:00][51:00:00.00]") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "Repeated"}, + {Start: &b, Value: "Repeated"}, + {Start: &c, Value: ""}, + {Start: &d, Value: ""}, + })) + }) + + It("Should support parsing multiline string", func() { + a, b := int64(0), int64(10*60*1000+1) + lyrics, err := ToLyrics("xxx", "[00:00.00]This is\na multiline \n\n [:0] string\n[10:00.001]This is\nalso one") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "This is\na multiline\n\n[:0] string"}, + {Start: &b, Value: "This is\nalso one"}, + })) + }) + + It("Does not match timestamp in middle of line", func() { + lyrics, err := ToLyrics("xxx", "This could [00:00:00] be a synced file") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeFalse()) + Expect(lyrics.Line).To(Equal([]Line{ + {Value: "This could [00:00:00] be a synced file"}, + })) + }) + + It("Allows timestamp in middle of line if also at beginning", func() { + a, b := int64(0), int64(1000) + lyrics, err := ToLyrics("xxx", " [00:00] This is [00:00:00] be a synced file\n [00:01]Line 2") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "This is [00:00:00] be a synced file"}, + {Start: &b, Value: "Line 2"}, + })) + }) + + It("Ignores lines in synchronized lyric prior to first timestamp", func() { + a := int64(0) + lyrics, err := ToLyrics("xxx", "This is some prelude\nThat doesn't\nmatter\n[00:00]Text") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "Text"}, + })) + }) })