Skip to content

Commit 859eee5

Browse files
🏗️✨ add sync:proposal-data task
1 parent 52591a1 commit 859eee5

File tree

8 files changed

+2788
-1766
lines changed

8 files changed

+2788
-1766
lines changed

_tasks/sync-proposal-data.mjs

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
/**
2+
* @file Synchronizes local proposal data with upstream GitHub proposal data.
3+
* @author Derek Lewis <[email protected]>
4+
*/
5+
6+
// -----------------------------------------------------------------------------
7+
// Requirements
8+
// -----------------------------------------------------------------------------
9+
10+
import { createRequire } from 'module';
11+
import { curlyQuote, loggerOpts } from './util/common.mjs';
12+
import { GhFileImporter } from './util/gh-file-importer.mjs';
13+
import { mdTbl2json } from './util/table.mjs';
14+
import { mkdir, readFile, rmdir, writeFile } from 'fs/promises';
15+
import { ownProperty } from './util/object.mjs';
16+
import { ProposalRecord } from './types.mjs';
17+
import { resolve as pathResolve } from 'path';
18+
import he from 'he';
19+
import htmlStringify from 'rehype-stringify';
20+
import markdown from 'remark-parse';
21+
import raw from 'rehype-raw';
22+
import remark2rehype from 'remark-rehype';
23+
import sanitizeHtml from 'sanitize-html';
24+
import unified from 'unified';
25+
import visit from 'unist-util-visit';
26+
import yaml from 'js-yaml';
27+
28+
const DIR_DATA = '_data';
29+
const DIR_TEMP = 'tmp';
30+
const REGEX_JS_INFO_STRS = /(javascript|js|mjs)/;
31+
const URL_RAW_PROPOSALS_README =
32+
'https://raw.githubusercontent.com/tc39/proposals/HEAD/README.md';
33+
34+
const require = createRequire(import.meta.url);
35+
36+
const { prompt } = require('enquirer');
37+
const { Signale } = require('signale');
38+
const ghFileImporter = new GhFileImporter(DIR_TEMP);
39+
40+
loggerOpts.scope = 'Sync Proposal Data Task';
41+
const log = new Signale(loggerOpts);
42+
43+
await rmdir(DIR_TEMP, { recursive: true });
44+
await mkdir(DIR_TEMP);
45+
await ghFileImporter.importFileFromUrl(URL_RAW_PROPOSALS_README);
46+
47+
// -----------------------------------------------------------------------------
48+
// Events
49+
// -----------------------------------------------------------------------------
50+
51+
process.on('uncaughtException', (err) => {
52+
log.error(err);
53+
});
54+
55+
process.on('unhandledRejection', (wrn) => {
56+
log.warn(wrn);
57+
});
58+
59+
// -----------------------------------------------------------------------------
60+
// Helpers
61+
// -----------------------------------------------------------------------------
62+
// > A 'full reference link' consists of a 'link text' immediately followed by a
63+
// > 'link label' that matches a link reference definition elsewhere in the
64+
// > document.
65+
// @see https://github.github.com/gfm/#full-reference-link
66+
//
67+
// [foo][bar]
68+
// ^ ^----------- link label
69+
// |---------------- link text
70+
//
71+
// @example ```markdown
72+
// [foo][bar]
73+
//
74+
75+
// bar: http://foo.bar
76+
// ```
77+
78+
/**
79+
* @param {string} cellContents
80+
* @returns {boolean}
81+
*/
82+
function isFullRefLink(cellConents) {
83+
return cellConents.startsWith('[') ? true : false; // oversimplified
84+
}
85+
86+
/**
87+
* @param {string} fullRefLink
88+
* @param {number} index The 'full reference link' partition index (0 for the
89+
* 'link text', 1 for the 'link label').
90+
* @returns {string} A full reference link partition's contents.
91+
*/
92+
function getFullRefLinkContent(fullRefLink, index) {
93+
const array = fullRefLink.split('][');
94+
return index === 0 ? array[index].slice(1) : array[index].slice(0, -1);
95+
}
96+
97+
async function mkCodeTags(markdownSrc) {
98+
let compiledSrc = await unified()
99+
.use(markdown)
100+
.use(remark2rehype, { allowDangerousHtml: true })
101+
.use(raw)
102+
.use(htmlStringify)
103+
.process(markdownSrc);
104+
105+
return sanitizeHtml(compiledSrc, { allowedTags: ['code'] });
106+
}
107+
108+
/**
109+
* Gets the URL from the partial 'link text' of a 'full reference link' in the
110+
* proposal repo's README file.
111+
* @param {string} linkTextLike
112+
* @returns {string}
113+
*/
114+
function getUrlFromDoc(linkTextLike) {
115+
const linkTextLikeRegex = new RegExp(`\\[${linkTextLike}.*\\]: (.*)`, 'g');
116+
const results = linkTextLikeRegex.exec(prpslsReadmeMarkdown);
117+
return results[1];
118+
}
119+
120+
/**
121+
* Downloads the proposal's README file using the GitHub API, parses the
122+
* Markdown file, and returns the contents of the JavaScript code block.
123+
*
124+
* If there are multiple matching JavaScript code blocks, the one that comes
125+
* last in the document is used. If there are no matching JavaScript code,
126+
* returns undefined.
127+
*
128+
* @param {string} prpslId
129+
* @returns {!(string | undefined)}
130+
*/
131+
async function getCodeSample(prpslId) {
132+
const codeBlocks = [];
133+
// Some proposal repos use a different branch (e.g., `gh-pages`) for their
134+
// README file. Because of this, we need repo metadata to find the raw URL.
135+
let rawReadmeUrl;
136+
await ghFileImporter
137+
.fetchPathMetadata('tc39', prpslId, 'README.md')
138+
.then((result) => {
139+
rawReadmeUrl = result.data.download_url;
140+
});
141+
// Get the file and parse out the code samples.
142+
await ghFileImporter.importFileFromUrl(rawReadmeUrl).then(async (result) => {
143+
const docSourceText = result;
144+
await unified()
145+
.use(markdown)
146+
.use(() => {
147+
return (tree) => {
148+
visit(tree, null, (node) => {
149+
if (
150+
node &&
151+
node.type === 'code' &&
152+
node.lang.match(REGEX_JS_INFO_STRS)
153+
) {
154+
codeBlocks.push(he.encode(node.value));
155+
}
156+
});
157+
};
158+
})
159+
.use(remark2rehype, { allowDangerousHtml: true })
160+
.use(raw)
161+
.use(htmlStringify)
162+
.process(docSourceText);
163+
});
164+
return codeBlocks.length > 0 ? codeBlocks.pop() : undefined;
165+
}
166+
167+
/**
168+
* @param {string} linkTextLike
169+
* @returns {string} The proposal ID.
170+
*/
171+
function getPrpslId(linkTextLike) {
172+
let prpslUrl = getUrlFromDoc(linkTextLike);
173+
if (prpslUrl.endsWith('/')) {
174+
prpslUrl = prpslUrl.slice(0, -1); // lose trailing slashes to prevent crash
175+
}
176+
return prpslUrl
177+
.split('/')
178+
.pop()
179+
.toLowerCase();
180+
}
181+
182+
/**
183+
* Checks if the proposal has a spec using the files listed by the GitHub API.
184+
* @param {string} prpslId
185+
* @returns {boolean}
186+
*/
187+
async function hasSpec(prpslId) {
188+
let isFound = false;
189+
await ghFileImporter.fetchPathMetadata('tc39', prpslId).then((result) => {
190+
result.data.forEach((value) => {
191+
if (value.path === 'spec.html') {
192+
isFound = true;
193+
}
194+
});
195+
});
196+
return isFound;
197+
}
198+
199+
/**
200+
* @param {Uint8Array} buffer
201+
* @param {Object} options
202+
* @returns {Uint8Array}
203+
*/
204+
function json2yaml(buffer, options) {
205+
const src = JSON.parse(buffer.toString());
206+
const ymlDocument = options.safe
207+
? yaml.safeDump(src, options)
208+
: yaml.dump(src, options);
209+
return Buffer.from(ymlDocument);
210+
}
211+
212+
/**
213+
* Does it already have a description by reading the yaml from stage_3.yml
214+
* @param {string}
215+
* @returns {string}
216+
*/
217+
async function getDescription(prpslId) {
218+
const data = await readFile(pathResolve(DIR_DATA, 'stage3.yml'), 'utf8');
219+
let description;
220+
yaml.safeLoadAll(data, (doc) => {
221+
doc.forEach((value) => {
222+
if (ownProperty(value, 'id') && value.id === prpslId) {
223+
// Proposal description has already has already been filled out.
224+
log.info(
225+
`Using pre-existing description for ${curlyQuote(prpslId)}: ${
226+
value.description
227+
}`
228+
);
229+
description = value.description;
230+
}
231+
});
232+
});
233+
234+
if (!description) {
235+
const response = await prompt([
236+
{
237+
type: 'input',
238+
name: 'description',
239+
message: 'What is a short description of the proposal?',
240+
initial: 'A description of the proposal',
241+
validate: isNotEmpty,
242+
},
243+
]);
244+
log.info(`Description for ${curlyQuote(prpslId)}: ${response.description}`);
245+
description = response.description;
246+
}
247+
return description;
248+
}
249+
250+
/**
251+
* Populates a PresenceObj from a value object.
252+
* @param {Object} valObj
253+
* @returns {PresenceObj}
254+
*/
255+
function presenceObjFrom(valObj) {
256+
let lastPresentedVal = valObj.last_presented;
257+
// lastPresentedVal can either be:
258+
// - a full reference link: <sub>[December&#xA0;2019][nonblocking-notes]</sub>
259+
// - just a date: <sub>September&#xA0;2020</sub>
260+
lastPresentedVal = sanitizeHtml(lastPresentedVal, { allowedTags: [] });
261+
const /** @type {PresenceObj} */ presenceObj = {};
262+
if (isFullRefLink(lastPresentedVal)) {
263+
presenceObj.date = getFullRefLinkContent(lastPresentedVal, 0);
264+
presenceObj.url = getUrlFromDoc(getFullRefLinkContent(lastPresentedVal, 1));
265+
} else {
266+
presenceObj.date = lastPresentedVal;
267+
presenceObj.url = getFullRefLinkContent(valObj.proposal, 1);
268+
}
269+
return presenceObj;
270+
}
271+
272+
/**
273+
* A predicate function to ensure a string is not empty.
274+
* @param {string} value The string value.
275+
* @returns {boolean} Whether the string is empty or not.
276+
*/
277+
const isNotEmpty = (value) => value.length > 0;
278+
279+
// -----------------------------------------------------------------------------
280+
// Main
281+
// -----------------------------------------------------------------------------
282+
283+
const prpslsReadmeMarkdown = await readFile(pathResolve(DIR_TEMP, 'README.md'));
284+
const tblRegex = new RegExp(/(### Stage 3\n\n)([.+?\s\S]+)(\n\n### Stage 2)/g);
285+
const results = tblRegex.exec(prpslsReadmeMarkdown);
286+
const markdownTbl = results[2]; // Capture group 2: col-oriented Markdown table.
287+
const jsonTbl = mdTbl2json(markdownTbl);
288+
289+
// Now, with our stage 3 table in JSON form, we must take what we need from each
290+
// row and use the cell contents to construct our ProposalRecord data structure
291+
// prior to making the JSON -> YAML conversion.
292+
// -----------------------------------------------------------------------------
293+
294+
/** @type {Array<Promise<ProposalRecord>>} */
295+
const prpslRcrdPromiseArr = jsonTbl.map(async (value) => {
296+
const prpslRcrd = new ProposalRecord();
297+
298+
prpslRcrd.id = getPrpslId(getFullRefLinkContent(value.proposal, 1));
299+
prpslRcrd.authors = value.author.split('<br />');
300+
prpslRcrd.champions = value.champion.split('<br />');
301+
prpslRcrd.description = await getDescription(prpslRcrd.id);
302+
prpslRcrd.example = await getCodeSample(prpslRcrd.id);
303+
prpslRcrd.has_specification = await hasSpec(prpslRcrd.id);
304+
prpslRcrd.presented = [presenceObjFrom(value)];
305+
prpslRcrd.title = await mkCodeTags(getFullRefLinkContent(value.proposal, 0));
306+
307+
if (ownProperty(value, 'tests') && isFullRefLink(value.tests)) {
308+
prpslRcrd.tests = [getUrlFromDoc(getFullRefLinkContent(value.tests, 1))];
309+
}
310+
311+
return prpslRcrd;
312+
});
313+
314+
Promise.allSettled(prpslRcrdPromiseArr).then(async (results) => {
315+
const data = [];
316+
results.forEach((result) => {
317+
data.push(result.value);
318+
});
319+
const dataBuffer = Buffer.from(JSON.stringify(data));
320+
const /** @type {Uint8Array} */ resultBuffer = json2yaml(dataBuffer, {
321+
safe: true,
322+
});
323+
await writeFile(pathResolve(DIR_DATA, 'stage3.yml'), resultBuffer);
324+
});

_tasks/types.mjs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/**
2+
* @typedef {{
3+
* date: string,
4+
* url: string,
5+
* }} */
6+
export const PresenceObj = {};
7+
8+
/** @record */
9+
export class ProposalRecord {
10+
constructor() {
11+
/** Proposal ID.
12+
* @type {string} */
13+
this.id;
14+
/** Proposal title.
15+
* @type {string} */
16+
this.title;
17+
/** Proposal code sample. Optional.
18+
* @type {!(string | undefined)} */
19+
this.example;
20+
/** The last time the proposal was presented.
21+
* @type {PresenceObj} */
22+
this.presented;
23+
/** True if the repository has a specification.
24+
* @type {boolean} */
25+
this.has_specification;
26+
/** One-line description of the proposal.
27+
* @type {string} */
28+
this.description;
29+
/** Array of proposal authors.
30+
* @type {Array<string>} */
31+
this.authors;
32+
/** Array of proposal champions.
33+
* @type {Array<string>} */
34+
this.champions;
35+
/** Array of proposal tests. Optional.
36+
* @type {!(Array<string> | undefined)} */
37+
this.tests;
38+
}
39+
}

0 commit comments

Comments
 (0)