« home

Four Vs of Data

machine learningmaterials informaticsinfo-graphicscetztikz

The state of the 4 Vs of data as they apply to materials informatics. Inspired by fig. 1 in https://api.semanticscholar.org/CorpusID:137734316.


Four Vs of Data

  Download

PNGPDFSVG

  Code

  LaTeX

four-vs-of-data.tex (43 lines)

\documentclass[tikz]{standalone}

\def\rA{3cm}
\def\rB{1.1 * \rA}
\def\s{1mm}

\begin{document}
\begin{tikzpicture}[
    thick, every path/.style={rounded corners=0.1},
    direction/.style={->,shorten >=2mm,shorten <=2mm},
  ]

  \begin{scope}[shift={(\s,\s)}]
    \draw [fill=orange!20] (0:\rA) arc(0:90:\rA) |- cycle;
    \draw [fill=orange!40] (0:\rA) arc(0:60:\rA) -- (0,0) -- cycle;
    \node at (25:0.6*\rA) {Veracity};
    \draw[direction] (0:\rB) arc(0:90:\rB) node[pos=0.05, above right] {high variance} node[pos=0.95, above right] {reference data};
  \end{scope}

  \begin{scope}[shift={(-\s,\s)}]
    \draw [fill=blue!20] (90:\rA) arc(90:180:\rA) -| cycle;
    \draw [fill=blue!40] (90:\rA) arc(90:100:\rA) -- (0,0) -- cycle;
    \node at (150:0.6*\rA) {Volume};
    \draw[direction] (90:\rB) arc(90:180:\rB) node[pos=0.05, above left] {kilobytes} node[pos=0.95, above left] {terabytes};
  \end{scope}

  \begin{scope}[shift={(-\s,-\s)}]
    \draw [fill=green!20] (180:\rA) arc(180:270:\rA) |- cycle;
    \draw [fill=green!40] (180:\rA) arc(180:190:\rA) -- (0,0) -- cycle;
    \node at (230:0.6*\rA) {Velocity};
    \draw[direction] (180:\rB) arc(-180:-90:\rB) node[pos=0.05, below left] {static} node[pos=0.95, below left] {dynamic};
  \end{scope}

  \begin{scope}[shift={(\s,-\s)}]
    \draw [fill=yellow!20] (270:\rA) arc(270:360:\rA) -| cycle;
    \draw [fill=yellow!40] (270:\rA) arc(270:290:\rA) -- (0,0) -- cycle;
    \node at (330:0.6*\rA) {Variety};
    \draw[direction] (270:\rB) arc(-90:0:\rB) node[pos=0.05, below right] {clustered} node[pos=0.95, below right] {heterogeneous};
  \end{scope}

\end{tikzpicture}
\end{document}

  Typst

four-vs-of-data.typ (100 lines)

#import "@preview/cetz:0.3.2": canvas, draw
#import "@preview/cetz-plot:0.1.1": chart

#set page(width: auto, height: auto, margin: 8pt)

#canvas({
  import draw: *

  let radius = 3
  let arrow-radius = radius * 1.15 // Slightly larger for arrows

  // Data structure for 8 slices (4 main + 4 darker sub-slices)
  let data = (
    // Veracity (orange, top right)
    ("veracity-main", 75, "Veracity", rgb("#FFA500").lighten(80%)),
    ("veracity-sub", 15, "", rgb("#FFA500").lighten(60%)),
    // Volume (blue, top left)
    ("volume-main", 75, "Volume", rgb("#0000FF").lighten(80%)),
    ("volume-sub", 15, "", rgb("#0000FF").lighten(60%)),
    // Velocity (green, bottom left)
    ("velocity-main", 75, "Velocity", rgb("#00FF00").lighten(80%)),
    ("velocity-sub", 15, "", rgb("#00FF00").lighten(60%)),
    // Variety (yellow, bottom right)
    ("variety-main", 75, "Variety", rgb("#FFFF00").lighten(80%)),
    ("variety-sub", 15, "", rgb("#FFFF00").lighten(60%)),
  )

  // Draw main pie chart
  chart.piechart(
    data,
    value-key: 1,
    label-key: 2,
    radius: radius,
    slice-style: data.map(itm => itm.at(3)),
    stroke: black + .8pt,
    inner-label: (
      content: (value, label) => text(weight: "regular")[#label],
      radius: 120%,
    ),
    outer-label: (
      content: (),
    ),
    legend: (label: ()),
  )

  let arrow-style = (
    stroke: black + .8pt,
    mark: (end: "stealth", fill: black, offset: 5pt, scale: .75),
  )

  // Draw curved arrows using arc
  // Veracity arrow (top right, 0-90°)
  arc(
    (arrow-radius, 0),
    start: 0deg,
    stop: 90deg,
    radius: arrow-radius,
    ..arrow-style,
    name: "veracity",
  )
  content("veracity.5%", text(size: .8em)[high variance], anchor: "south-west", padding: 3pt)
  content("veracity.95%", text(size: .8em)[reference data], anchor: "south-west", padding: 3pt)

  // Volume arrow (top left, 90-180°)
  arc(
    (0, arrow-radius),
    start: 90deg,
    stop: 180deg,
    radius: arrow-radius,
    ..arrow-style,
    name: "volume",
  )
  content("volume.5%", text(size: .8em)[kilobytes], anchor: "south-east")
  content("volume.95%", text(size: .8em)[terabytes], anchor: "south-east", padding: 3pt)

  // Velocity arrow (bottom left, 180-270°)
  arc(
    (-arrow-radius, 0),
    start: 180deg,
    stop: 270deg,
    radius: arrow-radius,
    ..arrow-style,
    name: "velocity",
  )
  content("velocity.5%", text(size: .8em)[static], anchor: "east", padding: 3pt)
  content("velocity.95%", text(size: .8em)[dynamic], anchor: "north-east", padding: 3pt)

  // Variety arrow (bottom right, 270-360°)
  arc(
    (0, -arrow-radius),
    start: 270deg,
    stop: 360deg,
    radius: arrow-radius,
    ..arrow-style,
    name: "variety",
  )
  content("variety.start", text(size: .8em)[clustered], anchor: "north-west", padding: 3pt)
  content("variety.95%", text(size: .8em)[heterogeneous], anchor: "north-west", padding: 3pt)
})