constnumberValueSorter=(a,b)=>a-b;constcalculateMedian=list=>{// an empty list has no medianif (list.length==0)returnundefined;// sort the valuesconstsorted=Array.from(list).sort(numberValueSorter);if (sorted.length%2==0){// we're dealing with an even-sized set, so take the midpoint// of the middle two valuesconsta=sorted.length/2-1;constb=a+1;return (list[a]+list[b])/2;}else{// pick the middle valueconsti=Math.floor(sorted.length/2);returnlist[i];}}
连接步骤
现在我们已经有了步骤,让我们以经典的命令式风格写出代码,以便我们更好地理解这个过程是什么样的。
const fetch = require('node-fetch'); const _wordCount = require('@iarna/word-count'); const getRedditJSONUrl = url => url.replace(/\/?$/, '.json'); const fetchData = url => fetch(url).then(response => response.json()); const extractPosts = redditPage => redditPage.data.children; const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext; const countWords = text => _wordCount(text); const numberValueSorter = (a, b) => a - b; const calculateMedian = list => { if (list.length == 0) return undefined; const sorted = Array.from(list).sort(numberValueSorter); if (sorted.length % 2 == 0) { const a = sorted.length / 2 - 1; const b = a + 1; return (list[a] + list[b]) / 2; } else { const i = Math.floor(sorted.length / 2); return list[i]; } } const URL = 'https://www.reddit.com/r/dataisbeautiful/'; // because some of the steps require resolving Promises, we'll // use an async function so we can await the result (async () => { // step 1 const jsonURL = getRedditJSONUrl(URL); // step 2 – needs awaiting const pageData = await fetchData(jsonURL); // step 3 const posts = extractPosts(pageData); // step 4 – we need to map over the elements of the array const texts = posts.map(extractPostTextAndTitle); // step 5 - same here const wordCounts = texts.map(countWords); // step 6 const median = calculateMedian(wordCounts); console.log('Median word count for ' + URL, median); })();
constgetMedianWordCountReport=pipeline(getRedditJSONUrl,fetchData,extractPosts,map(extractPostTextAndTitle),map(countWords),calculateMedian);constURL='https://www.reddit.com/r/dataisbeautiful/';// it's an async function, so we need to wait for it to resolvegetMedianWordCountReport(URL).then(median=>console.log('Median word count for '+URL,median)).catch(error=>console.error(error));
constpipeline=(...steps)=>{// take a list of steps,returnasyncinput=>{// return an async function that takes an input,returninput;// and eventually returns a result};};
constpipeline=(...steps)=>{// take a list of steps defining the processreturnasyncinput=>{// and return an async function that takes input;letresult=input;// the first intermediate result is the input;for (conststepofsteps)// iterate over each step;result=awaitstep(result);// run the step on the result and update it;returnresult;// return the last result!};};
你可能会想,“不,不可能是这样。真的是这样吗?”
是的。你自己试试吧:
const fetch = require('node-fetch'); const _wordCount = require('@iarna/word-count'); const getRedditJSONUrl = url => url.replace(/\/?$/, '.json'); const fetchData = url => fetch(url).then(response => response.json()); const extractPosts = redditPage => redditPage.data.children; const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext; const countWords = text => _wordCount(text); const numberValueSorter = (a, b) => a - b; const calculateMedian = list => { if (list.length == 0) return undefined; const sorted = Array.from(list).sort(numberValueSorter); if (sorted.length % 2 == 0) { const a = sorted.length / 2 - 1; const b = a + 1; return (list[a] + list[b]) / 2; } else { const i = Math.floor(sorted.length / 2); return list[i]; } } const map = mapper => array => array.map(mapper); const pipeline = (...steps) => { return async input => { let result = input; for (const step of steps) result = await step(result); return result; }; }; const getMedianWordCount = pipeline( getRedditJSONUrl, fetchData, extractPosts, map(extractPostTextAndTitle), map(countWords), calculateMedian ); const URL = 'https://www.reddit.com/r/dataisbeautiful/'; getMedianWordCount(URL) .then(median => console.log('Median word count', median));
constmap=(...mappers)=>// take an array of mappers,array=>// and return a function that takes an array;array.map(// map each item of the arrayitem=>mappers.reduce(// through a function that passes each item(result,mapper)=>mapper(result)// and runs them through the chain of mappers));
constmap=(...mappers)=>asyncarray=>{// we now have to return an async functionconstresults=[];for (constvalueofarray){// for each value of the array,letresult=value;// set the first intermediate result to the first value;for (constmapperofmappers)// take each mapper;result=awaitmapper(result);// and pass the intermediate result to the next;results.push(result);// and push the result onto the results array;}returnresults;// return the final array};
现在我们已经解决了这个边缘情况,我们可以通过将两个单项函数分组为一个步骤来重新制定我们的流程函数:
const fetch = require('node-fetch'); const _wordCount = require('@iarna/word-count'); const getRedditJSONUrl = url => url.replace(/\/?$/, '.json'); const fetchData = url => fetch(url).then(response => response.json()); const extractPosts = redditPage => redditPage.data.children; const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext; const countWords = text => _wordCount(text); const numberValueSorter = (a, b) => a - b; const calculateMedian = list => { if (list.length == 0) return undefined; const sorted = Array.from(list).sort(numberValueSorter); if (sorted.length % 2 == 0) { const a = sorted.length / 2 - 1; const b = a + 1; return (list[a] + list[b]) / 2; } else { const i = Math.floor(sorted.length / 2); return list[i]; } } const pipeline = (...steps) => { return async input => { let result = input; for (const step of steps) result = await step(result); return result; }; }; const map = (...mappers) => async array => { const results = []; for (const value of array) { let result = value; for (const mapper of mappers) result = await mapper(result); results.push(result); } return results; }; const getMedianWordCount = pipeline( getRedditJSONUrl, fetchData, extractPosts, map( extractPostTextAndTitle, countWords ), calculateMedian ); const URL = 'https://www.reddit.com/r/dataisbeautiful/'; getMedianWordCount(URL) .then(median => console.log('Median word count', median));
constgetMedianWordCount=pipeline(map(extractPostTextAndTitle,countWords),calculateMedian);constgetMedianCommentCount=pipeline(map(countComments),calculateMedian);constgetImagePresentRatio=pipeline(map(hasImageAttached),calculateRatio);// this is a convenience function that associates names to the results returnedconstjoinResults=([medianWordCount,medianCommentCount,imagePresentRatio])=>({medianWordCount,medianCommentCount,imagePresentRatio});// the process function, now with forking!constgetSubredditMetrics=pipeline(getRedditJSONUrl,fetchData,extractPosts,fork(getMedianWordCount,getMedianCommentCount,getImagePresentRatio),joinResults);
根据上述要求,该fork函数采用一系列流水线。
fork此时,考虑到上述限制,我建议您继续尝试编写自己的实现。您的实现可能与扩展的非常相似map。
以下是我对该fork功能的看法:
constfork=(...pipelines)=>// a function that takes a list of pipelines,asyncvalue=>// returns an async function that takes a value;awaitPromise.all(// it returns the results of promises...pipelines.map(// ...mapped over pipelines...pipeline=>pipeline(value)// ...that are passed the value.));
constdistribute=pipeline=>// distribute takes a pipeline,values=>// and returns a function that takes a list of values;Promise.all(// it returns a promise of all the values...values.map(pipeline)// ...passed through each pipeline);
是的,我想这样就行了!我们来试试传递一个 URL 数组,看看效果如何:
const fetch = require('node-fetch'); const _wordCount = require('@iarna/word-count'); const getRedditJSONUrl = url => url.replace(/\/?$/, '.json'); const fetchData = url => fetch(url).then(response => response.json()); const extractPosts = redditPage => redditPage.data.children; const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext; const countWords = text => _wordCount(text); const numberValueSorter = (a, b) => a - b; const calculateMedian = list => { if (list.length == 0) return undefined; const sorted = Array.from(list).sort(numberValueSorter); if (sorted.length % 2 == 0) { const a = sorted.length / 2 - 1; const b = a + 1; return (list[a] + list[b]) / 2; } else { const i = Math.floor(sorted.length / 2); return list[i]; } } const pipeline = (...steps) => { return async input => { let result = input; for (const step of steps) result = await step(result); return result; }; }; const map = (...mappers) => async array => { const results = []; for (const value of array) { let result = value; for (const mapper of mappers) result = await mapper(result); results.push(result); } return results; }; const countComments = post => post.data.num_comments; const hasImageAttached = post => post.data.post_hint == 'image'; const calculateRatio = array => { if (array.length == 0) return undefined; return array.filter(value => !!value).length / array.length; }; const fork = (...pipelines) => async value => await Promise.all(pipelines.map(pipeline => pipeline(value))); const getMedianWordCount = pipeline( map( extractPostTextAndTitle, countWords ), calculateMedian ); const getMedianCommentCount = pipeline( map(countComments), calculateMedian ); const getImagePresentRatio = pipeline( map(hasImageAttached), calculateRatio ); // this is a convenience function that associates names to the results returned const joinResults = ([ medianWordCount, medianCommentCount, imagePresentRatio ]) => ({ medianWordCount, medianCommentCount, imagePresentRatio }); const getSubredditMetrics = pipeline( getRedditJSONUrl, fetchData, extractPosts, fork( getMedianWordCount, getMedianCommentCount, getImagePresentRatio ), joinResults ); const distribute = pipeline => values => Promise.all(values.map(pipeline)); const URLs = [ 'https://www.reddit.com/r/dataisbeautiful/', 'https://www.reddit.com/r/proceduralgeneration/' ]; const getAllReports = distribute(getSubredditMetrics); getAllReports (URLs) .then(results => { const reports = results.map((report, idx) => ({ url: URLs[idx], report })); console.log(reports); });