266 lines
9.0 KiB
YAML
266 lines
9.0 KiB
YAML
#using sake instead of just because need file outputs for dependencies ...
|
|
#! P=process
|
|
#! I=inputs
|
|
#! O=output
|
|
#! C=csvsql --no-inference --snifflimit 0 --query
|
|
#
|
|
|
|
|
|
#skip for demo: demo dataset already has normalized names.
|
|
# txns-names:
|
|
# help: normalize account names and fix missing names
|
|
# (ignore) doc: |-
|
|
# a case statement with a when for each variant present in the input data,
|
|
# becauase the payee field varies (e.g. 'Bob Fischer', 'B. Fischer').
|
|
# Then also add missing names (some bank transactions don't have a payee)
|
|
# (ignore) WARNING: |-
|
|
# might need to check the input data over time, as new variants occur... Is there some kind of warning I can build in to see if there are any UNKNOWNS left over?
|
|
# also, the second query, 'fix missing names', sets all remaining UNKNOWNs to Bank. Is it possible to do that so there's remaining UNKNOWNs to detect?
|
|
# dependencies:
|
|
# - "$I/bank-transactions.csv"
|
|
# - "fix_missing_names.sql"
|
|
# - "query_normalize_names.sql" #not present in this demo dataset
|
|
# formula: >-
|
|
# $C "normalize_names.sql"
|
|
# --query "fix_missing_names.sql"
|
|
# < "$I/bank-transactions.csv"
|
|
# > $P/txns-names.csv
|
|
# output:
|
|
# - $P/txns-names.csv
|
|
|
|
|
|
txns-fromto:
|
|
help: convert transactions to format 'from — amount — to'
|
|
dependencies:
|
|
# - $P/txns-names.csv
|
|
- $I/bank-transactions.csv
|
|
formula: >-
|
|
$C 'select id, date, name as "from",
|
|
amount,
|
|
"shared" as "to", description
|
|
from stdin where cast(amount as real) > 0
|
|
union select id, date, "shared" as "from",
|
|
-amount,
|
|
name as "to", description
|
|
from stdin where cast(amount as real) < 0
|
|
order by id, date;'
|
|
< $I/bank-transactions.csv
|
|
> $P/txns-fromto.csv
|
|
output:
|
|
- $P/txns-fromto.csv
|
|
|
|
|
|
|
|
#skip for demo. In the real dataset, kWh-meters:users is many:one so the meter usages have to be grouped and summed.
|
|
#months added to output file manually.
|
|
# kwh-usage:
|
|
# help: everyone's usage per period, including kWh used and months active in period.
|
|
# (ignore) doc: |
|
|
# in the real dataset, some households user multiple kWh meters. We need one kWh usage figure per account,
|
|
# so we have to sum the amount, grouped by account.
|
|
# dependencies:
|
|
# - $I/kwh-meters.csv
|
|
# - $I/months.csv
|
|
# formula: >-
|
|
# $C 'with vbwsum as
|
|
# (select "end" - "start" as usage, period, account from meters),
|
|
# vb as(select period, account, cast(sum(usage) as int) as usage
|
|
# from vbwsum
|
|
# group by period, account)
|
|
# select vb.*, months.months from vb
|
|
# inner join months on vb.period = months.period
|
|
# and vb.account = months.account;'
|
|
# --tables meters,months
|
|
# $I/kwh-meters.csv
|
|
# $I/months.csv
|
|
# > $P/kwh-usage.csv
|
|
# output:
|
|
# - $P/kwh-usage.csv
|
|
|
|
bank-costs:
|
|
help: extract totals for each supplier from initial transaction table.
|
|
(ignore) doc: |
|
|
This gets calculated so that it gets updated when bank transactions input file changes. Only Bank is actually used for now...
|
|
dependencies:
|
|
- $P/txns-fromto.csv
|
|
formula: >-
|
|
$C 'select "to" as account, cast(round(sum(amount), 2) as string) as amount from stdin
|
|
where "to" = "Bank"
|
|
group by "to";'
|
|
< $P/txns-fromto.csv
|
|
> $P/bank-costs.csv
|
|
output:
|
|
- $P/bank-costs.csv
|
|
|
|
|
|
total-val-txns:
|
|
help: reverse value transactions for electricity usage variable and fixed, and banking costs
|
|
(ignore) doc: |
|
|
Doing this calculation is the main premise of this whole pipeline.
|
|
To figure out how much all the users owe or are owed by the shared account,
|
|
we allocate the final costs of energy delivered (from the invoices) to each account
|
|
based on their proportion of kWh used (that gets done in the next step, indiv-val-txns).
|
|
And then in the step txns-with-generated, we add these as 'virtual' transactions in reverse, from the Energy company to the shared account,
|
|
and then from the shared account to the individual users.
|
|
If everyone's account is settled correctly, then in the final balance every account should be at zero.
|
|
dependencies:
|
|
- $I/collective-usage-expenses.csv
|
|
- $P/bank-costs.csv
|
|
formula: >-
|
|
$C '
|
|
with bank as (
|
|
select account || "total" as id, "2025-09-01" as date, account, amount as amount, "value of banking costs" as description
|
|
from sup where account = "Bank"
|
|
)
|
|
select period || account || "valvar" as id,
|
|
enddate as date,
|
|
account as "from",
|
|
exp_var as amount,
|
|
"shared" as "to",
|
|
"value of variable electricity expenses" as description
|
|
from elec
|
|
union all select period || account || "valfixed" as id,
|
|
enddate as date,
|
|
account as "from",
|
|
exp_fixed as amount,
|
|
"shared" as "to",
|
|
"value of fixed electricity expenses" as description
|
|
from elec
|
|
|
|
union all select id, date, account, amount, "shared", description from bank
|
|
|
|
order by id, date, account;'
|
|
--tables elec,sup
|
|
$I/collective-usage-expenses.csv
|
|
$P/bank-costs.csv
|
|
> $P/total-val-txns.csv
|
|
output:
|
|
- $P/total-val-txns.csv
|
|
|
|
#Javascript version: uses percent-divide.js
|
|
indiv-val-txns:
|
|
help: create txns for value consumed by each household
|
|
(ignore) doc: |
|
|
this step gets done in Javascript because sqlite can't do accurate floating point division.
|
|
We need to allocate a proportion of the total expenses to each user:
|
|
- collective energy usage * proportion of kWh per user
|
|
- banking costs * months the user was participating (Sarah joined in the last month of period 2).
|
|
based on
|
|
The math is still not 100% accurate, but it's good enough for the current purposes.
|
|
(the errors are to the order of 10e-10).
|
|
Possible improvement: use Big.js to do the arithmetic, though there might still be a final rounding error at the end.
|
|
dependencies:
|
|
- $I/collective-usage-expenses.csv
|
|
- $P/bank-costs.csv
|
|
- $I/kwh-usage.csv
|
|
- percent-divide.js
|
|
formula: >-
|
|
deno run -A percent-divide.js $I/collective-usage-expenses.csv $P/bank-costs.csv $I/kwh-usage.csv
|
|
> $P/indiv-val-txns.csv
|
|
output:
|
|
- $P/indiv-val-txns.csv
|
|
|
|
#Web Origami version: uses data.ori
|
|
#indiv-val-txns:
|
|
# help: create txns for value consumed by each household
|
|
# (ignore) doc: |
|
|
# this step gets done in Javascript because sqlite can't do accurate floating point division.
|
|
# We need to allocate a proportion of the total expenses to each user:
|
|
# - collective energy usage * proportion of kWh per user
|
|
# - banking costs * months the user was participating (Sarah joined in the last month of period 2).
|
|
# based on
|
|
# The math is still not 100% accurate, but it's good enough for the current purposes.
|
|
# (the errors are to the order of 10e-10).
|
|
# Possible improvement: use Big.js to do the arithmetic, though there might still be a final rounding error at the end.
|
|
# dependencies:
|
|
# - $I/collective-usage-expenses.csv
|
|
# - $P/bank-costs.csv
|
|
# - $I/kwh-usage.csv
|
|
# - data.ori
|
|
##there are more dependencies -- all the JS scripts used in data.ori -- but it doesn't make sense to include those all here.
|
|
# formula: >-
|
|
# ori 'Origami.csv data.ori/txns' > $P/indiv-val-txns.csv
|
|
# output:
|
|
# - $P/indiv-val-txns.csv
|
|
|
|
|
|
txns-with-other:
|
|
help: manually add txns which were not in input bank transaction csv
|
|
(ignore) doc: |
|
|
we add this in the 'fromto' step.
|
|
dependencies:
|
|
- $P/txns-fromto.csv
|
|
- $I/txns-other.csv
|
|
formula: >-
|
|
$C 'select * from "txns-fromto" union all
|
|
select * from "txns-other"
|
|
order by date, id;'
|
|
$P/txns-fromto.csv
|
|
$I/txns-other.csv
|
|
> $P/txns-with-other.csv
|
|
output:
|
|
- $P/txns-with-other.csv
|
|
|
|
txns-with-generated:
|
|
help: add the generated value transactions.
|
|
dependencies:
|
|
- $P/txns-with-other.csv
|
|
- $P/total-val-txns.csv
|
|
- $P/indiv-val-txns.csv
|
|
formula: >-
|
|
$C 'select * from "txns-with-other"
|
|
union all select * from "total-val-txns"
|
|
union all select * from "indiv-val-txns"
|
|
order by date, id;'
|
|
$P/txns-with-other.csv
|
|
$P/total-val-txns.csv
|
|
$P/indiv-val-txns.csv
|
|
> $P/txns-with-generated.csv
|
|
output:
|
|
- $P/txns-with-generated.csv
|
|
|
|
txns:
|
|
help: expand transactions to two rows per transaction.
|
|
(ignore) doc: By expanding the transactions to two entries, one for the 'from' account and one for the 'to' account, we can do double-entry accounting (lite)
|
|
dependencies:
|
|
- $P/txns-with-generated.csv
|
|
formula: >-
|
|
$C 'select id, date,
|
|
"from" as account,
|
|
-amount as amount,
|
|
description
|
|
from stdin union all
|
|
select id, date,
|
|
"to" as account,
|
|
amount,
|
|
description
|
|
from stdin
|
|
order by date, id;'
|
|
< $P/txns-with-generated.csv
|
|
> $P/txns.csv
|
|
output:
|
|
- $P/txns.csv
|
|
|
|
|
|
balance:
|
|
help: get total balance
|
|
dependencies:
|
|
- $P/txns.csv
|
|
formula: >-
|
|
$C 'with total as (
|
|
select "total" as account, sum(amount) as amount
|
|
from stdin
|
|
), pre as (
|
|
select account,
|
|
round(sum(amount),2) as amount
|
|
from stdin
|
|
where id is not null
|
|
group by account
|
|
)
|
|
select * from pre union all select * from total;'
|
|
< $P/txns.csv
|
|
>$O/balance.csv
|
|
output:
|
|
- $O/balance.csv
|
|
|