Tim's blah blah blah

Migrate data from Home Assistant SQLite to InfluxDB

I noticed some data was missing in my InfluxDB permanent storage which was present in Home Assistant’s SQLite database in two places: states & statistics. I want to port the data from one to another.

Find data in SQLite

Connect to db. Use -readonly to not destroy stuff.

sudo sqlite3 -readonly /var/lib/homeassistant/home-assistant_v2.db
.mode column
.headers on
.width 30, 10,

Show tables and structures

.tables
PRAGMA table_info(states);
PRAGMA table_info(states_meta);
PRAGMA table_info(statistics);
PRAGMA table_info(statistics_meta);

Yields:

sqlite> PRAGMA table_info(statistics);
cid|name|type|notnull|dflt_value|pk
0|id|INTEGER|1||1
1|created|DATETIME|0||0
2|start|DATETIME|0||0
3|mean|FLOAT|0||0
4|min|FLOAT|0||0
5|max|FLOAT|0||0
6|last_reset|DATETIME|0||0
7|state|FLOAT|0||0
8|sum|FLOAT|0||0
9|metadata_id|INTEGER|0||0
10|created_ts|FLOAT|0||0
11|start_ts|FLOAT|0||0
12|last_reset_ts|FLOAT|0||0
sqlite> PRAGMA table_info(statistics_meta);
cid|name|type|notnull|dflt_value|pk
0|id|INTEGER|1||1
1|statistic_id|VARCHAR(255)|0||0
2|source|VARCHAR(32)|0||0
3|unit_of_measurement|VARCHAR(255)|0||0
4|has_mean|BOOLEAN|0||0
5|has_sum|BOOLEAN|0||0
6|name|VARCHAR(255)|0||0

Get data from state

It seems we need state and last_updated_ts or last_changed_ts. entity_id is empty but is stored in table states_meta linked via metadata_id. See also this post (home-assistant.io).

cid|name|type|notnull|dflt_value|pk
0|state_id|INTEGER|1||1
1|domain|VARCHAR(64)|0||0
2|entity_id|VARCHAR(255)|0||0
3|state|VARCHAR(255)|0||0
4|attributes|TEXT|0||0
5|event_id|INTEGER|0||0
6|last_changed|DATETIME|0||0
7|last_updated|DATETIME|0||0
8|created|DATETIME|0||0
9|old_state_id|INTEGER|0||0
10|attributes_id|INTEGER|0||0
11|origin_idx|INTEGER|0||0
12|context_id|VARCHAR(36)|0||0
13|context_user_id|VARCHAR(36)|0||0
14|context_parent_id|VARCHAR(36)|0||0
15|last_updated_ts|FLOAT|0||0
16|last_changed_ts|FLOAT|0||0
17|context_id_bin|BLOB|0||0
18|context_user_id_bin|BLOB|0||0
19|context_parent_id_bin|BLOB|0||0
20|metadata_id|INTEGER|0||0

Explore what data is stored

SELECT
  *,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
LIMIT 1;

yields:

state_id|domain|entity_id|state|attributes|event_id|last_changed|last_updated|created|old_state_id|attributes_id|origin_idx|context_id|context_user_id|context_parent_id|last_updated_ts|last_changed_ts|context_id_bin|context_user_id_bin|context_parent_id_bin|metadata_id|metadata_id|entity_id|entity_id
101258448|||1231.496|||||||83007|0||||1708654322.53454||?ӻ?fۓ?<?r???z|||40|40|sensor.smeter_production2|sensor.smeter_production2

Find state changes associated with a specific entity_id, get timestamps in nanoseconds to prepare for influxdb

SELECT
  state,
  printf("%.0f", last_updated_ts*1000000000) as timestamp,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
WHERE states_meta.entity_id = 'sensor.compr_in_power_43141' OR states_meta.entity_id = 'sensor.compr_in_energy' OR states_meta.entity_id = 'sensor.int_el_add_energy' OR states_meta.entity_id = 'sensor.int_el_add_power_43084';

Get data from statistics

Explore what data is stored, see also the docs (home-assistant.io). One addition to the docs: The created_ts seems to be the end of the statistics integration window, at which time the statistic_id sensor has value state (start_ts being the beginning).

SELECT
  count(*)
FROM statistics

SELECT
  MAX(metadata_id)
FROM statistics

.mode csv

SELECT
  *
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084'
LIMIT 10;

SELECT
  state,
  mean,
  sum,
  created_ts,
  start_ts,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084'
ORDER BY created_ts DESC
LIMIT 40;

Looks like:

state,    52.66,    663.71  ,                   ,
mean,     ,         ,       0.558742289913889,  0.0
sum,      52.66,    663.21  ,                   ,
created_ts, 1714122010.38259,1714122010.38236,1714122010.38231,1714122010.3821 == Fri 26 Apr 2024 11:00:10 AM CEST
start_ts, 1714118400.0,1714118400.0,1714118400.0,1714118400.0 == Fri 26 Apr 2024 10:00:00 AM CEST
statistic_id,sensor.int_el_add_energy,sensor.compr_in_energy,sensor.compr_in_power_43141,sensor.int_el_add_power_43084

Export data

Saving SQLite to CSV (stackoverflow.com) is supported out of the box:

.mode csv

.separator "," "\n"

.headers on

.once /home/tim/export-nibe2.csv

SELECT
  state,
  printf("%.0f", last_updated_ts) as timestamp,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
WHERE states_meta.entity_id = 'sensor.compr_in_power_43141' OR states_meta.entity_id = 'sensor.compr_in_energy' OR states_meta.entity_id = 'sensor.int_el_add_energy' OR states_meta.entity_id = 'sensor.int_el_add_power_43084';


.once /home/tim/export-nibe-stats-power2.csv

SELECT
  mean,
  printf("%.0f", created_ts) as timestamp,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084';

.once /home/tim/export-nibe-stats-energy2.csv

SELECT
  state,
  printf("%.0f", created_ts) as timestamp,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy';

Import to InfluxDB

For InfluxDB we need the line protocol (influxdata.com)

// Syntax
<measurement>[,<tag_key>=<tag_value>[,<tag_key>=<tag_value>]] <field_key>=<field_value>[,<field_key>=<field_value>] [<timestamp>]

// Example
myMeasurement,tag1=value1,tag2=value2 fieldKey="fieldValue" 1556813561098000000

In my case I want:

energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*3600*1000 <timestamp>
energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*3600*1000 <timestamp>

powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*1000 <timestamp>
powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*1000 <timestamp>

So with some awk-fu (stackoverflow.com) this gives (stackexchange.com):

N.B. Ensure you use GNU awk in case you need >32bit integers. I ran into an issue when apparantly I had mawk installed, which has 32 bit ints, and my results looked weird.

grep _energy /home/tim/export-nibe2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe.txt
grep _power_ /home/tim/export-nibe2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe.txt

grep -v "^unavailable" /home/tim/export-nibe-stats-energy2.csv | awk -F"," '{$1=sprintf("%d", $1*3600*1000); $2=$2-10; print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe.txt
grep -v "^unavailable" /home/tim/export-nibe-stats-power2.csv | awk -F"," '{$1=sprintf("%d", $1*1000); $2=$2-10; print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe.txt

sed -i -e 's/sensor.compr_in_energy/compr_in_energy_43141/' export-nibe.txt
sed -i -e 's/sensor.int_el_add_energy/int_el_add_energy_43084/' export-nibe.txt
sed -i -e 's/sensor.compr_in_power_43141/compr_in_power_43141/' export-nibe.txt
sed -i -e 's/sensor.int_el_add_power_43084/int_el_add_power_43084/' export-nibe.txt

Now insert into InfluxDB (I use a temporary measurement for testing to see ingestion works as expected)

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=s" --data-binary @export-nibe.txt

Bonus: upload Nibe USB log to InfluxDB

Nibe can log data to USB, with one file per day named YYMMDD01.LOG. The data is stored as TSV, and the first lines look like:

Divisors		1	1	10	10	10	10	10	10	10	10	10	10	10	10	100	1	10	10	10	10	1	1	1	1
Date	Time	version(43001)	R-version(44331)	BT1(40004)	BT2(40008)	EB100-EP14-BT3(40012)	BT6(40014)	BT7(40013)	EB100-EP14-BT10(40015)	EB100-EP14-BT11(40016)	EB100-EP14-BT12(40017)	EB100-EP14-BT14(40018)	EB100-EP14-BT17(40022)	BT25(40071)	BT50(40033)	Tot.Int.Add(43084)	Alarm number(45001)	Calc. Supply(43009)	Degree Minutes(40940)	BT1-Average(40067)	compr. freq. act.(43136)	Relays PCA-Base(43514)	GP1-speed EP14(43437)	GP2-speed EP14(43439)	Prio(43086)
2024-01-15	22:59:00	9443	5	-7	236	212	545	572	-35	-59	231	492	3	205	203	0	0	200	-2715	0	380	7	100	69	30
2024-01-15	22:59:30	9443	5	-7	236	212	545	572	-34	-58	232	492	4	206	204	0	0	200	-2709	0	369	7	100	69	30
2024-01-15	23:00:00	9443	5	-7	231	213	545	571	-35	-56	232	492	-31	206	203	0	0	200	-2709	0	370	7	100	69	30

Transposing for human readers:

Date	2024-01-15	2024-01-15	2024-01-15	
Time	22:59:00	22:59:30	23:00:00
1	version(43001)	9443	9443	9443
1	R-version(44331)	5	5	5
10	BT1(40004)	-7	-7	-7
10	BT2(40008)	236	236	231
10	EB100-EP14-BT3(40012)	212	212	213
10	BT6(40014)	545	545	545
10	BT7(40013)	572	572	571
10	EB100-EP14-BT10(40015)	-35	-34	-35
10	EB100-EP14-BT11(40016)	-59	-58	-56
10	EB100-EP14-BT12(40017)	231	232	232
10	EB100-EP14-BT14(40018)	492	492	492
10	EB100-EP14-BT17(40022)	3	4	-31
10	BT25(40071)	205	206	206
10	BT50(40033)	203	204	203
100	Tot.Int.Add(43084)	0	0	0
1	Alarm number(45001)	0	0	0
10	Calc. Supply(43009)	200	200	200
10	Degree Minutes(40940)	-2715	-2709	-2709
10	BT1-Average(40067)	0	0	0
10	compr. freq. act.(43136)	380	369	370
1	Relays PCA-Base(43514)	7	7	7
1	GP1-speed EP14(43437)	100	100	100
1	GP2-speed EP14(43439)	69	69	69
1	Prio(43086)	30	30	30

Fix wrong insertion

From 2024-04-24T13:49:00Z == 1713966540

To 2024-04-23T21:21:00Z == 1713907260

sort -t, -k 2 export-nibe.csv > export-nibe-gap.csv
grep -n 1713966 export-nibe-gap.csv  -> 7906
grep -n 171390 export-nibe-gap.csv -> 7812
head -7906 export-nibe-gap.csv | tail +7812 > export-nibe-gap2.csv

grep _energy /home/tim/export-nibe-gap2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe-gap2.txt
grep _power_ /home/tim/export-nibe-gap2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe-gap2.txt

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=ns" --data-binary @export-nibe-gap2.txt
delete from _temp_energyv3 where uniqueid='sensor.compr_in_energy'

Fix wrong key value

> select * from energyv3 where source='nibe' and uniqueid='int_el_add_energy_43084' order by asc limit 2
name: energyv3
time                 quantity    source type        unique_id uniqueid                value
----                 --------    ------ ----        --------- --------                -----
2024-04-24T13:50:00Z electricity nibe   consumption           int_el_add_energy_43084 189576000
2024-04-24T13:51:00Z electricity nibe   consumption           int_el_add_energy_43084 189576000
tim@proteus:~$ date +%s -d 2024-04-24T13:50:00Z
1713966600
1713966498438867000

sort -t, -k 2 export-nibe.csv > export-nibe-gap.csv
head -n 7906 export-nibe-gap.csv > export-nibe-head.csv

grep _energy /home/tim/export-nibe-head.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe-head.txt
grep _power_ /home/tim/export-nibe-head.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe-head.txt

cat export-nibe-head.txt > export-nibe-merged.txt
cat export-nibe.txt >> export-nibe-merged.txt

sed -i -e 's/sensor.compr_in_energy/compr_in_energy_43141/' export-nibe-merged.txt
sed -i -e 's/sensor.int_el_add_energy/int_el_add_energy_43084/' export-nibe-merged.txt

sort export-nibe-merged.txt | uniq > export-nibe-unique.txt

sed -i -e 's/powerv3,/_temp_powerv3,/' export-nibe-unique.txt
sed -i -e 's/energyv3,/_temp_energyv3,/' export-nibe-unique.txt

cat export-nibe-unique.txt | rev | cut -c10- | rev > export-nibe-unique-second.txt


sed -i -e 's/_temp_powerv3,/powerv3,/' export-nibe-unique-second.txt
sed -i -e 's/_temp_energyv3,/energyv3,/' export-nibe-unique-second.txt

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=s" --data-binary @export-nibe-unique-second.txt

#Home-Assistant #Influxdb #Linux #Sqlite