Large Java Heap with the G1 Collector – Part 1
Experimental Method
Goals
- Demonstrate maximum feasible JVM size on current cloud hardware (specific to Amazon for now) using the G1 Garbage Collector (link).
- Vary the JVM heap size (-Xmx) exponentially to find performance profile and breaking points.
- Vary the ratio of new versus old generation objects exponentially.
- Using in-memory workload to stress the JVM (avoids network or disk waits).
- Produce replicable results on commodity hardware, open source operating systems, and open source tools.
- Provide gap-free data for analysis, in spite of garbage collection pauses.
Not (Yet) in Scope
In followup to this study, subsequent efforts may include:
- Vary the number of old objects up to maximum possible memory capacity.
- Vary the number of processing threads to starve the concurrent G1 algorithm of CPU cycles.
- Vary the object size.
- Vary the field size within the object.
- Vary the G1 JVM parameters.
Tools and Versions
- Amazon Linux AMI (link)
- One cr1.8xlarge instance (link): 244 GiB RAM, 2 x Intel Xeon E5-2670 (link)
- Oracle JDK 1.7_15
JVM Parameters
This experiment varied the JVM heap size but kept other parameters constant.
- -Xmx16g, -Xmx32g, -Xmx64g, -Xmx128g, -Xmx212g
- -XX:+UseG1GC
- -XX:InitiatingHeapOccupancyPercent=0
- -XX:MaxGCPauseMillis=200
- -XX:MaxTenuringThreshold=25
- -XX:ParallelGCThreads=32
- -XX:ConcGCThreads=32
- -XX:G1ReservePercent=10
- -XX:G1HeapRegionSize=32m
Program Parameters
These parameters were kept constant for this study.
- statInterval = 1000: the reporting interval for measures in milliseconds.
- numThreads = 24: number of processing threads.
- objectsPerThread = 8,000,000: number of objects to produce for each phase.
- maxOld = 800,000: max number of old references to maintain, after which old references are overwritten with new values.
Experimental Phases
For each JVM heap size, the following phases are executed. For each phase, after maxOld reached object references get overwritten and must be garbage collected.
- RANDOM_WRITE_0: All of the objects produced are instant garbage, references not kept.
- RANDOM_WRITE_1: All of the objects references kept.
- RANDOM_WRITE_2: 1/2 of object references kept.
- RANDOM_WRITE_4: 1/4 of object references kept.
- RANDOM_WRITE_8: 1/8 of object references kept.
Experiment Results
The graph above shows the sum total of *all* garbage collection pauses across each experimental phase.
The graph above shows the *min* throughput for each second across each experimental phase.
The line chart above shows log scale throughput versus standard scale garbage collection pause across all seconds in each experimental phase.
The radar chart above shows log scale throughput versus standard scale garbage collection pause across all seconds in each experimental phase.
Addenda
Java garbage collection test class:
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicLong;
public class GCTest {
public static String configName = "Test";
public static int statInterval = 1000;
public static int numThreads = 4;
public static int objectPerThread = 500000;
public static int maxOld = 1000000;
public static enum ExperimentPhase {
SEQUENTIAL_WRITE, SEQUENTIAL_CLEAR, RANDOM_WRITE_0, RANDOM_WRITE_8, RANDOM_WRITE_4, RANDOM_WRITE_2, RANDOM_WRITE_1, RANDOM_READ
};
public static int phaseIdx;
public static ExperimentPhase phase;
public static CyclicBarrier barrier;
public static int[] oldToYoungRatios = new int[] { 0, 8, 4, 2, 1 };
public static volatile boolean done = false;
public static AtomicLong totalOfInterval = new AtomicLong();
public static Object[] oldArr = new Object[maxOld];
public static ConcurrentHashMap<Integer, Map<String, Object>> oldMap = new ConcurrentHashMap<Integer, Map<String, Object>>(
maxOld);
/**
* Generates maximum amount of objects with nested references. Saves some
* references so they go to the old generation.
*
* @author pouttum
*
*/
public static class GarbageThread extends Thread {
protected int threadNo;
public GarbageThread(int threadNo) {
super();
this.threadNo = threadNo;
}
@Override
public void run() {
await();
// Incremental update phase
for (int x = 0; x < maxOld; x++) {
if (x % numThreads == threadNo) {
oldArr[x] = getDoc(x);
totalOfInterval.incrementAndGet();
}
}
await();
// Incremental clear phase
for (int x = 0; x < maxOld; x++) {
if (x % numThreads == threadNo) {
oldArr[x] = null;
totalOfInterval.incrementAndGet();
}
}
// Random write / update phase
for (int r = 0; r < oldToYoungRatios.length; r++) {
await();
for (int x = 0; x < objectPerThread; x++) {
Map<String, Object> doc = getDoc(x);
totalOfInterval.incrementAndGet();
if (oldToYoungRatios[r] > 0
&& (oldToYoungRatios[r] == 1 || (x
% oldToYoungRatios[r] == 0))) {
int index = (int) (Math.ceil(random() * maxOld));
oldMap.put(index, doc);
}
}
}
await();
// Random read phase
for (int x = 0; x < objectPerThread; x++) {
totalOfInterval.incrementAndGet();
int index = (int) (Math.ceil(random() * maxOld));
oldMap.get(index);
}
}
protected void await() {
try {
barrier.await();
} catch (Exception e) {
}
}
protected HashMap<String, Object> getDoc(int x) {
HashMap<String, Object> doc = new HashMap<String, Object>();
doc.put("value1", "value1" + String.valueOf(x));
doc.put("value2", "value2" + String.valueOf(x));
doc.put("value3", "value3" + String.valueOf(x));
doc.put("value4", "value4" + String.valueOf(x));
doc.put("value5", "value5" + String.valueOf(x));
doc.put("value6", "value6" + String.valueOf(x));
doc.put("value7", "value7" + String.valueOf(x));
doc.put("value8", "value8" + String.valueOf(x));
doc.put("value9", "value9" + String.valueOf(x));
return doc;
}
protected double random() {
return Math.random();
}
};
/**
* Calculates ongoing stats and keeps history on the stat interval.
*
* @author pouttum
*
*/
public static class StatThread extends Thread {
@Override
public void run() {
Date previousDate = new Date();
long adjStatInterval = statInterval;
int intervalCount = 0;
do {
try {
Thread.sleep(adjStatInterval);
} catch (InterruptedException e) {
done = true;
}
adjStatInterval = statInterval;
long intervalTotal = totalOfInterval.getAndSet(0L);
Date date = new Date();
double intervalSeconds = (date.getTime() - previousDate
.getTime()) / 1000d;
StringBuilder stats = new StringBuilder(1024);
float statIntervalDouble = statInterval / 1000f;
double gcPause = intervalSeconds - statIntervalDouble;
if (intervalSeconds > statIntervalDouble * 2) {
double x = statIntervalDouble * 2;
for (; x < intervalSeconds; x += statIntervalDouble) {
stats.append(String.format("%s\t%s\t%d\t%d\t%.3f\n",
configName, phase, ++intervalCount, 0,
statIntervalDouble));
gcPause -= statIntervalDouble;
}
}
if (gcPause > 0.0d) { // Credit the next interval with some of
// the count of this interval
adjStatInterval -= gcPause * 1000L;
long intervalTotalAdj = Math
.round((gcPause / statIntervalDouble)
* intervalTotal);
intervalTotal -= intervalTotalAdj;
totalOfInterval.addAndGet(intervalTotalAdj);
}
stats.append(String.format("%s\t%s\t%d\t%d\t%.3f\n",
configName, phase, ++intervalCount, intervalTotal,
Math.max(gcPause, 0.0d)));
previousDate = date;
System.out.print(stats.toString());
} while (!done);
}
}
/**
* * @param args
*
* @throws InterruptedException
* */
public static void main(String[] args) throws Exception {
if (args.length == 5) {
configName = args[0];
statInterval = Integer.parseInt(args[1]);
numThreads = Integer.parseInt(args[2]);
objectPerThread = Integer.parseInt(args[3]);
maxOld = Integer.parseInt(args[4]);
}
barrier = new CyclicBarrier(numThreads,
new Runnable() {
@Override
public void run() {
phase = ExperimentPhase.values()[phaseIdx++];
}
});
GarbageThread[] threads = new GarbageThread[numThreads];
for (int x = 0; x < threads.length; x++) {
threads[x] = new GarbageThread(x);
threads[x].start();
}
StatThread statThread = new StatThread();
statThread.setPriority(Thread.MAX_PRIORITY);
statThread.start();
for (int x = 0; x < threads.length; x++) {
try {
threads[x].join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
done = true;
statThread.join();
}
}
Compile the code:
jdk1.7.0_15/bin/javac GCTest.java
Unix script run.sh:
jdk1.7.0_15/bin/java -server -Xms16g -Xmx16g -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=0 -XX:MaxGCPauseMillis=200 -XX:MaxTenuringThreshold=25 -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1ReservePercent=10 -XX:G1HeapRegionSize=32m GCTest mxg16 1000 24 8000000 800000 > baseline.csv jdk1.7.0_15/bin/java -server -Xms32g -Xmx32g -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=0 -XX:MaxGCPauseMillis=200 -XX:MaxTenuringThreshold=25 -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1ReservePercent=10 -XX:G1HeapRegionSize=32m GCTest mxg32 1000 24 8000000 800000 >> baseline.csv jdk1.7.0_15/bin/java -server -Xms64g -Xmx64g -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=0 -XX:MaxGCPauseMillis=200 -XX:MaxTenuringThreshold=25 -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1ReservePercent=10 -XX:G1HeapRegionSize=32m GCTest mxg64 1000 24 8000000 800000 >> baseline.csv jdk1.7.0_15/bin/java -server -Xms128g -Xmx128g -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=0 -XX:MaxGCPauseMillis=200 -XX:MaxTenuringThreshold=25 -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1ReservePercent=10 -XX:G1HeapRegionSize=32m GCTest mxg128 1000 24 8000000 800000 >> baseline.csv jdk1.7.0_15/bin/java -server -Xms212g -Xmx212g -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=0 -XX:MaxGCPauseMillis=200 -XX:MaxTenuringThreshold=25 -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1ReservePercent=10 -XX:G1HeapRegionSize=32m GCTest old_ratio_8 1000 24 8000000 800000 >> baseline.csv




