ori-data-pipeline/data.ori

{
  //load input data: individual usage, banktotal and collective usage.
  indivusage = ./inputs/kwh-usage.csv/
  banktotal = Tree.filter(./process/bank-costs.csv/, entry => entry.account === 'Bank')[0].amount
  collective_usage = ./inputs/collective-usage-expenses.csv/

  //group collective usage by period
  collective_usage_by_period = Tree.map(collective_usage, {key: (value, key) => value.period})

  //group individual usage per account under period
  indiv_by_period = Tree.groupBy(indivusage, line => line.period)

  //group individual usage per period under account.
  indiv_by_account = Tree.groupBy(indivusage, line => line.account)

  //sum all users' months for all periods (one total sum)
  totalmonths = Tree.mapReduce(indivusage, null, (lines) => lines.reduce((a,b) => a + parseInt(b.months),0))


  //calculate percent for months and usage.
  //roundUsage.js calculates each account's percentage of the total usage,
  //roundMonths.js does the same for how many months each was active in the entire period
  //since some people came and left partway through.
  percent_months = Tree.map(indiv_by_period, roundMonths.js)
  percent_usage = Tree.map(indiv_by_period, roundUsage.js)


  //add percents to each entry and flatten (reverse the Tree.groupBy)
  //the output of the roundX algorithm above is a bare array of percents.
  //they need to be mapped back to the individual's entries using the array index,
  //which is what withPercents.js does.
  with_percents = Tree.map(
      indiv_by_period,
      (values, key) => withPercents.js(values, key, percent_usage, percent_months)
    ) →
    Tree.deepValues →           //object of arrays → array of arrays
    (values) => values.flat()   //array of arrays → flat array

  //now calculate usage for fixed and variable electricity expenses using the percents
  //For each record, add two new properties 'amount_fixed' and 'amount_var'
  //bankersRound.js multiplies the percents by the total from the collective usage table,
  //using the banker's rounding rule.
  with_usage = with_percents/ → (withPercents) =>
    Tree.map(withPercents, (record) => bankersRound.js(record, 'percent_months', 'amount_fixed', collective_usage_by_period[record.period].exp_fixed)) →
    (withFixed) => Tree.map(withFixed, (record) => bankersRound.js(record, 'percent_usage', 'amount_var', collective_usage_by_period[record.period].exp_var))

  //need to calculate a single banking costs amount for the whole period.
  //It's not quite accurate, because banking costs have gone up over time, but it will do.
  //this pipeline is akin to the with_percents one above, but this is not subdivided per period.
  //hence the use of a difference script, withBankPercents.js, with a slightly different structure.
  user_months = Tree.map(
      indiv_by_account,
      withMonths.js
    )

  //the array is placed under a key 'bank' for compatibility:
  //in the original dataset, there were multiple suppliers that had to be accounted for,
  //and the scripts expected that structure.
  user_months_flat = Tree.deepValues(user_months) → (vals) => {'bank': vals.flat()}
  percent_bank = Tree.map(user_months_flat, roundMonths.js)

  with_bank_percents = Tree.map(
    user_months_flat,
    (values, key) => withBankPercents.js(values, key, percent_bank)
  ) → Tree.deepValues → (values) => values.flat()

  with_bank_usage = Tree.map(
    with_bank_percents, (record) => bankersRound.js(record, 'percent_bank', 'amount_bank', banktotal))

  //the output records: convert with_usage and with_bank_usage to a csv representing transactions.
  txns_elec = Tree.map(with_usage, outputFormat.js).flat()
  txns_bank = Tree.map(with_bank_usage, outputFormatBank.js)
  txns = Tree.deepMerge(txns_elec, txns_bank) → Tree.deepValues //→ (values) => values.flat()
}