# =====================================================================================================================
# Script to create unilabel seeded data for MuSSCO.
# Unilabel means each document/onion has exactly one exclusive label (NO multiple labels).
# Usage: 
#   ./create_unilabel_seeds.sh 3 data.sample parameters.sample/seeds_3_per_category.txt
#
# Each cluster/label will have three seeds in the above example chosen from data.sample directory.
# Following the above usage example, the output will be generated in the file parameters.sample/seeds_3_per_category.txt
# =====================================================================================================================

n="$1"
dir="$2"
op="$3"

cd "$dir"

for i in *; 
do 
	for onionname in `ls -lrt $i | awk '{print $9}' | grep -v "^$" | sort -R | head -"$n"` 
	do
		echo "$onionname: $i" >> ../"$op"
	done
done

