Skip to content
Snippets Groups Projects
Commit d67ed268 authored by SolitudeSF's avatar SolitudeSF Committed by Zed
Browse files

Improve tweet url and hashtag parsing

Dry

Fixes
parent 9d860905
No related branches found
No related tags found
No related merge requests found
import strutils, times, macros, htmlgen, unicode, options
import strutils, times, macros, htmlgen, unicode, options, algorithm
import regex, packedjson
import types, utils, formatters
......@@ -6,9 +6,18 @@ const
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
unReplace = "$1<a href=\"/$2\">@$2</a>"
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
type
ReplaceSliceKind = enum
rkRemove, rkUrl, rkHashtag, rkMention
ReplaceSlice = object
slice: Slice[int]
kind: ReplaceSliceKind
url, display: string
template isNull*(js: JsonNode): bool = js.kind == JNull
template notNull*(js: JsonNode): bool = js.kind != JNull
......@@ -124,65 +133,92 @@ proc getTombstone*(js: JsonNode): string =
result = js{"tombstoneInfo", "richText", "text"}.getStr
result.removeSuffix(" Learn more")
template getSlice(text: string; slice: seq[int]): string =
text.runeSubStr(slice[0], slice[1] - slice[0])
proc getSlice(text: string; js: JsonNode): string =
if js.kind != JArray or js.len < 2 or js[0].kind != JInt: return text
let slice = @[js{0}.getInt, js{1}.getInt]
text.getSlice(slice)
proc expandUrl(text: var string; js: JsonNode; tLen: int; hideTwitter=false) =
let u = js{"url"}.getStr
if u.len == 0 or u notin text:
return
proc extractSlice(js: JsonNode): Slice[int] =
result = js["indices"][0].getInt ..< js["indices"][1].getInt
proc extractUrls(result: var seq[ReplaceSlice]; js: JsonNode;
textLen: int; hideTwitter = false) =
let
url = js{"expanded_url"}.getStr
slice = js{"indices"}[1].getInt
url = js["expanded_url"].getStr
slice = js.extractSlice
if hideTwitter and slice >= tLen and url.isTwitterUrl:
text = text.replace(u, "")
text.removeSuffix(' ')
text.removeSuffix('\n')
if hideTwitter and slice.b >= textLen and url.isTwitterUrl:
if slice.a < textLen:
result.add ReplaceSlice(kind: rkRemove, slice: slice)
else:
text = text.replace(u, a(shortLink(url), href=url))
proc expandMention(text: var string; orig: string; js: JsonNode) =
let
name = js{"name"}.getStr
href = '/' & js{"screen_name"}.getStr
uname = orig.getSlice(js{"indices"})
text = text.replace(uname, a(uname, href=href, title=name))
result.add ReplaceSlice(kind: rkUrl, url: url,
display: url.shortLink, slice: slice)
proc extractHashtags(result: var seq[ReplaceSlice]; js: JsonNode) =
result.add ReplaceSlice(kind: rkHashtag, slice: js.extractSlice)
proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice];
textSlice: Slice[int]): string =
template extractLowerBound(i: int; idx): int =
if i > 0: repls[idx].slice.b.succ else: textSlice.a
result = newStringOfCap(runes.len)
for i, rep in repls:
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
case rep.kind
of rkHashtag:
let
name = $runes[rep.slice.a.succ .. rep.slice.b]
symbol = $runes[rep.slice.a]
result.add a(symbol & name, href = "/search?q=%23" & name)
of rkMention:
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
of rkUrl:
result.add a(rep.display, href = rep.url)
of rkRemove:
discard
result.add $runes[extractLowerBound(repls.len, ^1) ..< textSlice.b]
proc deduplicate(s: var seq[ReplaceSlice]) =
var
len = s.len
i = 0
while i < len:
var j = i + 1
while j < len:
if s[i].slice.a == s[j].slice.a:
s.del j
dec len
else:
inc j
inc i
proc cmp(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
proc expandProfileEntities*(profile: var Profile; js: JsonNode) =
let
orig = profile.bio
orig = profile.bio.toRunes
ent = ? js{"entities"}
with urls, ent{"url", "urls"}:
profile.website = urls[0]{"expanded_url"}.getStr
var replacements = newSeq[ReplaceSlice]()
with urls, ent{"description", "urls"}:
for u in urls: profile.bio.expandUrl(u, orig.high)
for u in urls:
replacements.extractUrls(u, orig.high)
replacements.deduplicate
replacements.sort(cmp)
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
profile.bio = profile.bio.replace(unRegex, unReplace)
.replace(htRegex, htReplace)
for mention in ? ent{"user_mentions"}:
profile.bio.expandMention(orig, mention)
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
let
orig = tweet.text
orig = tweet.text.toRunes
textRange = js{"display_text_range"}
slice = @[textRange{0}.getInt, textRange{1}.getInt]
textSlice = textRange{0}.getInt .. textRange{1}.getInt
hasQuote = js{"is_quote_status"}.getBool
hasCard = tweet.card.isSome
tweet.text = tweet.text.getSlice(slice)
var replyTo = ""
if tweet.replyId != 0:
with reply, js{"in_reply_to_screen_name"}:
......@@ -191,26 +227,45 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
let ent = ? js{"entities"}
var replacements = newSeq[ReplaceSlice]()
with urls, ent{"urls"}:
for u in urls:
tweet.text.expandUrl(u, slice[1], hasQuote)
let urlStr = u["url"].getStr
if urlStr.len == 0 or urlStr notin tweet.text:
continue
replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
if hasCard and u{"url"}.getStr == get(tweet.card).url:
get(tweet.card).url = u{"expanded_url"}.getStr
with media, ent{"media"}:
for m in media: tweet.text.expandUrl(m, slice[1], hideTwitter=true)
if "hashtags" in ent or "symbols" in ent:
tweet.text = tweet.text.replace(htRegex, htReplace)
for mention in ? ent{"user_mentions"}:
let
name = mention{"screen_name"}.getStr
idx = tweet.reply.find(name)
if mention{"indices"}[0].getInt >= slice[0]:
tweet.text.expandMention(orig, mention)
if idx > -1 and name != replyTo:
tweet.reply.delete idx
elif idx == -1 and tweet.replyId != 0:
tweet.reply.add name
for m in media:
replacements.extractUrls(m, textSlice.b, hideTwitter = true)
if "hashtags" in ent:
for hashtag in ent["hashtags"]:
replacements.extractHashtags(hashtag)
if "symbols" in ent:
for symbol in ent["symbols"]:
replacements.extractHashtags(symbol)
if "user_mentions" in ent:
for mention in ent["user_mentions"]:
let
name = mention{"screen_name"}.getStr
slice = mention.extractSlice
idx = tweet.reply.find(name)
if slice.a >= textSlice.a:
replacements.add ReplaceSlice(kind: rkMention, slice: slice,
url: "/" & name, display: mention["name"].getStr)
if idx > -1 and name != replyTo:
tweet.reply.delete idx
elif idx == -1 and tweet.replyId != 0:
tweet.reply.add name
replacements.deduplicate
replacements.sort(cmp)
tweet.text = orig.replacedWith(replacements, textSlice)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment