1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import std/[os, strutils, json, httpclient, times]
import libs/tiny_sqlite
type
User = object
id* : int64
username* : string
displayName* : string
url* : string
Image = object
id* : int64
url* : string
width* : int
height* : int
Post = object
id* : int64
url* : string
user* : User
createdAt* : DateTime
content* : string
attachedImages* : seq[Image]
proc fetchPosts (client: HttpClient, instance: string, userId: int64, maxId: int64 = 0): seq[Post] =
var
url : string = "https://" & instance & "/api/v1/accounts/" & $userId & "/statuses"
response : string
if maxId != 0: url.add("?max_id=" & $maxId)
while true:
try:
response = client.getContent(url)
break
except HttpRequestError:
let msg = getCurrentExceptionMsg()
if msg == "429 Too Many Requests":
echo "Hit a rate-limit! I'll take a 30 sec break!"
sleep(30000)
else:
break
let jsonData = response.parseJson
for post in jsonData:
var images = newSeq[Image]()
for image in post["media_attachments"]:
images.add(Image(
id : image["id"].getStr.parseInt,
url : image["url"].getStr,
width : image["meta"]["original"]["width"].getInt,
height : image["meta"]["original"]["height"].getInt
))
result.add(Post(
id : post["id"].getStr.parseInt,
url : post["url"].getStr,
user : User(
id : post["account"]["id"].getStr.parseInt,
username : post["account"]["username"].getStr,
displayName : post["account"]["display_name"].getStr,
url : post["account"]["url"].getStr,
),
createdAt : parse(post["created_at"].getStr, "yyyy-MM-dd'T'HH:mm:ss'.'fff'Z'"),
content : post["content"].getStr,
attachedImages : images
))
proc postExists(db: DbConn, id: int64): bool =
let query = db.one("SELECT EXISTS(SELECT 1 FROM posts WHERE id=?);", id)
if query.isSome:
return fromDbValue(query.get[0], bool)
else:
return false
if getEnv("DB_PATH") == "": quit "No DB_PATH given!"
let db = openDatabase(getEnv("DB_PATH"))
db.execScript("""
CREATE TABLE IF NOT EXISTS posts (
id INTEGER PRIMARY KEY,
url TEXT NOT NULL,
userId INTEGER NOT NULL,
userName TEXT NOT NULL,
userDisplay TEXT NOT NULL,
userUrl TEXT NOT NULL,
createdAt VACHAR(20) NOT NULL,
content TEXT
);
CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY,
post_id INTEGER NOT NULL,
url TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL,
containsHorn BOOL NOT NULL
);""")
let
client : HttpClient = newHttpClient()
instance : string = if getEnv("INSTANCE") != "": getEnv("INSTANCE") else: "chaos.social"
userId : int64 = if getEnv("USER_ID") != "": getEnv("USER_ID").parseInt else: 000069675
oldestPostId : int64 = if getEnv("OLDEST_POST") != "": getEnv("OLDEST_POST").parseBiggestInt else: 101063382828117630
allPostsFetched : bool = db.postExists(oldestPostId)
var
posts : seq[Post] = client.fetchPosts(instance, userId)
lastPostId : int64 = 0
block fetchPosts:
while true:
for post in posts:
lastPostId = post.id
if not db.postExists(post.id):
db.exec("""
INSERT INTO posts(id, url, userId, userName, userDisplay, userUrl, createdAt, content)
VALUES(?, ?, ?, ?, ?, ?, ?, ?);
""", post.id, post.url, post.user.id, post.user.username, post.user.displayName, post.user.url, post.createdAt.format("yyyy-MM-dd'T'HH:mm:ss"), post.content)
for image in post.attachedImages:
db.exec("""
INSERT INTO images(id, post_id, url, width, height, containsHorn)
VALUES(?, ?, ?, ?, ?, ?);
""", image.id, post.id, image.url, image.width, image.height, true)
echo "Inserted new post with id: " & $post.id
if post.id == oldestPostId:
echo "All posts successfully fetched!"
break fetchPosts
else:
if allPostsFetched:
echo "All new posts fetched!"
break fetchPosts
else:
echo "Post exists!"
posts = client.fetchPosts(instance, userId, lastPostId)
db.close()