KS-Test
[1]:
"""
Update Parameters Here
"""
COLLECTION = "Quaks"
P_VAL = 0.001
[2]:
"""
Created on Mon Sep 13 16:47:06 2021
KS test on table that has minting accounts and rarity data
@author: nbax1
"""
from scipy import stats
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from honestnft_utils import config
"""
Plot params
"""
plt.rcParams.update({"figure.facecolor": "white", "savefig.facecolor": "white"})
"""
Helper Functions
"""
def generateSyntheticDataset(size, maxRarity, mode="default"):
"""
generates a synthetic dataset for sanity checks
MODIFY THIS FUNCTION IF YOU'RE ANALYZING A COLLECTION WITH NON-UNIFORM DISTRIBUTION
inputs:
size: number of NFTs purchased by synthetic buyer
maxRarity: should be the number of NFTs in synthetic collection
mode: default buys at random. If mode is a float then x% of buys will be in top 5% of rarity
"""
if mode == "default":
# =modify this to select sequential token_ids or only from a subset of collection
random_sample = random.sample(range(1, maxRarity), size)
return np.array(random_sample)
else:
num_rigged_buys = int(mode * size)
rigged_buys = random.sample(range(1, int(maxRarity / 20)), num_rigged_buys)
random_buys = random.sample(range(1, maxRarity), size - len(rigged_buys))
return np.array(rigged_buys + random_buys)
def getRarityArray(data, account):
"""
inputs: dataframe with columns to_account and rarity
account: the account to get data from
returns: array with rarity rank of every NFT minted by an account
"""
return np.array(data[data["to_account"] == account]["rank"])
def cal_average(num):
"""
inputs:
data: dataframe with columns to_account and rarity
"""
sum_num = 0
for t in num:
sum_num = sum_num + t
avg = sum_num / len(num)
return avg
def find_anomalies(data, threshold=2, num_replicates=1):
"""
Prints KS test results for every account in collection that was anomalously lucky
inputs:
data: dataframe with column 'to_account' for account that minted NFT, and 'rank' for rarity ranking
threshold: integer for minimum number of NFTs minted by account to be included in analysis
num_replicates: set to 1 if not generating synthetic datasets (used when rarity is non-uniformly distributed)
"""
vc = data.to_account.value_counts()
num_datapoints = len(data)
grifters_data = []
for account in vc[vc > threshold].index:
lowest_list = []
rarity_array = getRarityArray(data, account)
num_minted = len(rarity_array)
num_anomalies = 0
p_values = []
for x in range(0, num_replicates):
"""
#make synthetic dataset to compare to actual data
synthetic = generateSyntheticDataset(num_minted, num_datapoints)
"""
# generate uniform distribution
synthetic = np.array(range(1, num_datapoints + 1))
ks = stats.kstest(rvs=synthetic, cdf=rarity_array, alternative="less")
if ks[1] < P_VAL: # raise and you will get more hits
num_anomalies += 1
p_values.append(ks[1])
if num_anomalies >= num_replicates * 0.8: # arbitrary threshold
print(account + "," + str(cal_average(p_values)))
print(
"num_transactions: "
+ str(len(data[data["to_account"] == account]["txid"].unique()))
)
print("num_minted:" + str(len(data[data["to_account"] == account])))
# outputs lowest rank for each mint transaction
for transaction in data[data["to_account"] == account]["txid"].unique():
lowest_rank = min(data[data["txid"] == transaction]["rank"])
token_id = data.loc[data["rank"] == lowest_rank, "TOKEN_ID"].values[0]
lowest_list.append([lowest_rank, token_id])
print("{rank, token_id}")
print(lowest_list)
print("\n")
# add grifter to dict
grifter = dict()
grifter["address"] = account
grifter["pvalue"] = cal_average(p_values)
grifter["num_transactions"] = len(
data[data["to_account"] == account]["txid"].unique()
)
grifter["num_minted"] = len(data[data["to_account"] == account])
grifter["token_list"] = lowest_list
grifters_data.append(grifter)
pd.DataFrame.from_records(grifters_data).to_csv(
f"{config.GRIFTERS_DATA_FOLDER}/{COLLECTION}_grifters.csv", index=False
)
return
[3]:
"""
Generate Report
"""
PATH = f"{config.MINTING_FOLDER}/{COLLECTION}_minting.csv"
data_to_analyze = pd.read_csv(PATH)
print("Number of buyers:" + str(len(data_to_analyze["to_account"].unique())))
print("Lucky Buyer,p")
print("\n")
find_anomalies(data_to_analyze)
Number of buyers:820
Lucky Buyer,p
0xd9d1c2623fbb4377d9bf29075e610a9b8b4805b4,3.3893849843803126e-23
num_transactions: 10
num_minted:181
{rank, token_id}
[[759, 4936], [56, 4979], [226, 5010], [19, 5037], [217, 5045], [150, 5102], [198, 5160], [194, 5478], [105, 5830], [51, 5859]]
0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,4.408372814708358e-17
num_transactions: 8
num_minted:160
{rank, token_id}
[[146, 3497], [452, 3642], [140, 4846], [57, 4850], [397, 4875], [37, 4901], [260, 5636], [161, 5682]]
0xb9b6856efd128294a912d584366448bc3d4ea979,4.554612995930961e-05
num_transactions: 5
num_minted:100
{rank, token_id}
[[958, 4004], [369, 4013], [18, 4033], [340, 4061], [99, 4074]]
0xcd55ac0917fc01fb05600524c7980567d3aff8d5,3.904632988295227e-08
num_transactions: 4
num_minted:70
{rank, token_id}
[[309, 5212], [210, 5342], [53, 5381], [839, 5551]]
0x955807f8cb79c3a0f4f1e20a8eb336c51d37e5ca,4.282430738485098e-12
num_transactions: 2
num_minted:40
{rank, token_id}
[[131, 5776], [100, 5929]]
0xa4e107af951156c7acc6b4dd714d14f444342a12,1.6550580090071487e-08
num_transactions: 2
num_minted:40
{rank, token_id}
[[79, 5449], [11, 5798]]
0x2e2370489903d7ed48f4cdc00322cd66a4757723,3.1588657609161057e-12
num_transactions: 2
num_minted:40
{rank, token_id}
[[117, 5664], [166, 5824]]
0xe63fed8d441ee8128eaa583549dcb60df4f4f109,3.2061049681663914e-07
num_transactions: 2
num_minted:40
{rank, token_id}
[[75, 4495], [3, 4510]]
0xce3730b98c60a594466a0e6cb057162c36954024,4.846336723216502e-05
num_transactions: 1
num_minted:20
{rank, token_id}
[[534, 5141]]
0x4a9a34a36faedeab457344dea77228dcd318adff,0.0008983556653458107
num_transactions: 1
num_minted:20
{rank, token_id}
[[22, 5892]]
0xa635a54b4305d786db7a2cb3a1c8bc90bb15123e,4.1275109080411315e-08
num_transactions: 1
num_minted:20
{rank, token_id}
[[70, 5709]]
0xafe7309fd01a5e6d5a258e911461ecb9558fbfdf,0.00020578574790537977
num_transactions: 1
num_minted:20
{rank, token_id}
[[31, 5403]]
0x3e8d3cbcc30a5ae702ac5d9537992b154151e93b,5.851773684747198e-06
num_transactions: 1
num_minted:20
{rank, token_id}
[[8, 5590]]
0xd3fa40b89a890d97ff76986096809620d4622417,8.366655012075178e-05
num_transactions: 2
num_minted:16
{rank, token_id}
[[629, 5075], [127, 5119]]
0xf83defe97102b583671377cc444c92c32bcbc59b,7.781871268385283e-06
num_transactions: 1
num_minted:13
{rank, token_id}
[[25, 5693]]
0xdf0c54cbe7b4f59f8dd5bab213db30bbe4de2988,3.5495867862859083e-05
num_transactions: 2
num_minted:10
{rank, token_id}
[[2, 5184], [144, 5565]]
0x9e0e57de9cac0e9c489c080a0c07ff6e42ae12d1,0.0002062096163179165
num_transactions: 1
num_minted:10
{rank, token_id}
[[180, 5991]]
0xa684cfc51bf2d794cf197c35f3377f117bf10b6f,0.00045846499933144703
num_transactions: 1
num_minted:10
{rank, token_id}
[[108, 5943]]
0x0f5a6308b00e00137d7213c2659a18c357c6ec61,5.972131444013321e-06
num_transactions: 1
num_minted:10
{rank, token_id}
[[268, 5734]]
0x8e05bd9fa3059ec69c15bc1a6f4d94f0ac26ce00,0.00022825779509970558
num_transactions: 1
num_minted:10
{rank, token_id}
[[491, 5607]]
0xcea110cf871ba2f88262aaba1026c371da930a51,0.0001021117968667435
num_transactions: 2
num_minted:10
{rank, token_id}
[[373, 5423], [290, 5510]]
0xb9ee84826d802beb4260e5fedbdbaf6c1125a148,0.0002264147423311908
num_transactions: 2
num_minted:6
{rank, token_id}
[[480, 4228], [517, 4325]]
0xb3e0f20fe73ffb0c011d9afa10ee0521d6678619,0.0007306253677841018
num_transactions: 1
num_minted:5
{rank, token_id}
[[12, 5972]]
0x0653502fce854ea6074424105567534719f25587,0.0006248556622637224
num_transactions: 1
num_minted:5
{rank, token_id}
[[208, 4928]]
0xfdc3e8edd74a90fe971ef7d56a0c66c870b10f5d,0.00017588546056038361
num_transactions: 1
num_minted:4
{rank, token_id}
[[61, 5317]]
0x0bab9a5ca29727b112cd5dce487a89fd6b7366bd,0.0007933496066012617
num_transactions: 1
num_minted:3
{rank, token_id}
[[27, 5731]]