ori-data-pipeline/data.ori

75 lines
3.7 KiB
Plaintext

{
//load input data: individual usage, banktotal and collective usage.
indivusage = ./inputs/kwh-usage.csv/
banktotal = Tree.filter(./process/bank-costs.csv/, entry => entry.account === 'Bank')[0].amount
collective_usage = ./inputs/collective-usage-expenses.csv/
//group collective usage by period
collective_usage_by_period = Tree.map(collective_usage, {key: (value, key) => value.period})
//group individual usage per account under period
indiv_by_period = Tree.groupBy(indivusage, line => line.period)
//group individual usage per period under account.
indiv_by_account = Tree.groupBy(indivusage, line => line.account)
//sum all users' months for all periods (one total sum)
totalmonths = Tree.mapReduce(indivusage, null, (lines) => lines.reduce((a,b) => a + parseInt(b.months),0))
//calculate percent for months and usage.
//roundUsage.js calculates each account's percentage of the total usage,
//roundMonths.js does the same for how many months each was active in the entire period
//since some people came and left partway through.
percent_months = Tree.map(indiv_by_period, roundMonths.js)
percent_usage = Tree.map(indiv_by_period, roundUsage.js)
//add percents to each entry and flatten (reverse the Tree.groupBy)
//the output of the roundX algorithm above is a bare array of percents.
//they need to be mapped back to the individual's entries using the array index,
//which is what withPercents.js does.
with_percents = Tree.map(
indiv_by_period,
(values, key) => withPercents.js(values, key, percent_usage, percent_months)
) →
Tree.deepValues → //object of arrays → array of arrays
(values) => values.flat() //array of arrays → flat array
//now calculate usage for fixed and variable electricity expenses using the percents
//For each record, add two new properties 'amount_fixed' and 'amount_var'
//bankersRound.js multiplies the percents by the total from the collective usage table,
//using the banker's rounding rule.
with_usage = with_percents/ → (withPercents) =>
Tree.map(withPercents, (record) => bankersRound.js(record, 'percent_months', 'amount_fixed', collective_usage_by_period[record.period].exp_fixed)) →
(withFixed) => Tree.map(withFixed, (record) => bankersRound.js(record, 'percent_usage', 'amount_var', collective_usage_by_period[record.period].exp_var))
//need to calculate a single banking costs amount for the whole period.
//It's not quite accurate, because banking costs have gone up over time, but it will do.
//this pipeline is akin to the with_percents one above, but this is not subdivided per period.
//hence the use of a difference script, withBankPercents.js, with a slightly different structure.
user_months = Tree.map(
indiv_by_account,
withMonths.js
)
//the array is placed under a key 'bank' for compatibility:
//in the original dataset, there were multiple suppliers that had to be accounted for,
//and the scripts expected that structure.
user_months_flat = Tree.deepValues(user_months) → (vals) => {'bank': vals.flat()}
percent_bank = Tree.map(user_months_flat, roundMonths.js)
with_bank_percents = Tree.map(
user_months_flat,
(values, key) => withBankPercents.js(values, key, percent_bank)
) → Tree.deepValues → (values) => values.flat()
with_bank_usage = Tree.map(
with_bank_percents, (record) => bankersRound.js(record, 'percent_bank', 'amount_bank', banktotal))
//the output records: convert with_usage and with_bank_usage to a csv representing transactions.
txns_elec = Tree.map(with_usage, outputFormat.js).flat()
txns_bank = Tree.map(with_bank_usage, outputFormatBank.js)
txns = Tree.deepMerge(txns_elec, txns_bank) → Tree.deepValues //→ (values) => values.flat()
}