garethtdavies
9/21/2019 - 4:14 PM

Outputs aggregated metrics from the Zcash blockchain

Outputs aggregated metrics from the Zcash blockchain

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Output stats from Zcash blockchain using Blocksci
   Uses https://github.com/cryptolu/BlockSci for Zcash Blocksci adaptation
   Credit @st4d https://github.com/str4d for transaction type detection
"""

import blocksci
import os
import pandas as pd
import time

# Pass our blocksci data directory
chain = blocksci.Blockchain(os.path.expanduser("~/blocksci/zcash-data"))
data = []

# Loop through all blocks
# Filter this e.g for blk in chain[419200:] for Sapling only data
for blk in chain:

    for tx in blk:

        transaction_type = None
        pool = None

        tx_contains_transparent_out = tx.output_count > 0
        tx_contains_transparent = (tx.input_count > 0) or tx_contains_transparent_out
        # can also use is_sprout helper but using this for clarity
        tx_contains_sprout = tx.vpub_count > 0
        tx_contains_sapling = (tx.sspend_count > 0) or (
                tx.soutput_count > 0)  # equivalent to is_sapling helper

        sprout_in = tx.sum_vpubold > tx.sum_vpubnew
        sapling_in = tx.value_balance < 0

        # Simple metric for unshielding: it isn't shielding, and there is another type
        # of output to consume it (thus not unshielding solely for fees)
        sprout_out = (tx.sum_vpubold < tx.sum_vpubnew) and (
                sapling_in or tx_contains_transparent_out)
        sapling_out = (tx.value_balance > 0) and (
                sprout_in or tx_contains_transparent_out)

        # Fully-shielded (solely Sprout or Sapling)
        if not tx_contains_transparent:
            if tx.is_sproutshielded and (not tx.is_saplingshielded):
                transaction_type = "Shielded"
                pool = "Sprout"
            if (not tx.is_sproutshielded) and tx.is_saplingshielded:
                transaction_type = "Shielded"
                pool = "Sapling"
            if sprout_out and sapling_in:
                transaction_type = "Migration"
                pool = "Transparent"

        # Not fully shielded
        if sprout_in:
            transaction_type = "Shielding"
            pool = "Sprout"
        if sprout_out and (not sapling_in):
            transaction_type = "Deshielding"
            pool = "Sprout"
        if sapling_in and (not sprout_out):
            transaction_type = "Shielding"
            pool = "Sapling"
        if sapling_out:
            transaction_type = "Deshielding"
            pool = "Sapling"

        # Catch everything else as transparent
        if not transaction_type:
            transaction_type = "Transparent"
            pool = "Transparent"

        # Write our data
        data.append([tx.hash, tx.block_time, tx.block_height,
                     tx.is_coinbase, pool, transaction_type])

# Setup our dataframe and preview it
df = pd.DataFrame(data)
df.columns = ["Txid", "BlockTime", "Block", "Coinbase", "Pool", "Type"]

# Format the categories
df["Pool"] = df["Pool"].astype("category")
df["Type"] = df["Type"].astype("category")

# Preview our dataframe
# print(df.head(5))

df.BlockTime = df.BlockTime.dt.strftime('%Y-%m')

output = df.pivot_table(index="BlockTime", values="Txid", fill_value=0, aggfunc="count", columns=[
    "Pool", "Type"], margins=True, margins_name="Total")

output.to_csv("zcash-data.csv")

# print(output)